dictionary - segmentation fault in c program with a trie -
i'm getting seg fault spell check program. tried gnu , valgrind can't figure out going on. (node defined in .h file) program spell check cs50x worked on smaller dictionary seg faults while running well.
here node:
typedef struct node { bool is_word; struct node* letters[27]; } node;
and here speller.c course - written course staff:
#include <ctype.h> #include <stdio.h> #include <sys/resource.h> #include <sys/time.h> #include "dictionary.h" #undef calculate #undef getrusage // default dictionary #define dictionary "dictionaries/large" // prototype double calculate(const struct rusage* b, const struct rusage* a); int main(int argc, char* argv[]) { // check correct number of args if (argc != 2 && argc != 3) { printf("usage: speller [dictionary] text\n"); return 1; } // structs timing data struct rusage before, after; // benchmarks double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0; // determine dictionary use char* dictionary = (argc == 3) ? argv[1] : dictionary; // load dictionary getrusage(rusage_self, &before); bool loaded = load(dictionary); getrusage(rusage_self, &after); // abort if dictionary not loaded if (!loaded) { printf("could not load %s.\n", dictionary); return 1; } // calculate time load dictionary time_load = calculate(&before, &after); // try open text char* text = (argc == 3) ? argv[2] : argv[1]; file* fp = fopen(text, "r"); if (fp == null) { printf("could not open %s.\n", text); unload(); return 1; } // prepare report misspellings printf("\nmisspelled words\n\n"); // prepare spell-check int index = 0, misspellings = 0, words = 0; char word[length+1]; // spell-check each word in text (int c = fgetc(fp); c != eof; c = fgetc(fp)) { // allow alphabetical characters , apostrophes if (isalpha(c) || (c == '\'' && index > 0)) { // append character word word[index] = c; index++; // ignore alphabetical strings long words if (index > length) { // consume remainder of alphabetical string while ((c = fgetc(fp)) != eof && isalpha(c)); // prepare new word index = 0; } } // ignore words numbers (like ms word can) else if (isdigit(c)) { // consume remainder of alphanumeric string while ((c = fgetc(fp)) != eof && isalnum(c)); // prepare new word index = 0; } // must have found whole word else if (index > 0) { // terminate current word word[index] = '\0'; // update counter words++; // check word's spelling getrusage(rusage_self, &before); bool misspelled = !check(word); getrusage(rusage_self, &after); // update benchmark time_check += calculate(&before, &after); // print word if misspelled if (misspelled) { printf("%s\n", word); misspellings++; } // prepare next word index = 0; } } // check whether there error if (ferror(fp)) { fclose(fp); printf("error reading %s.\n", text); unload(); return 1; } // close text fclose(fp); // determine dictionary's size getrusage(rusage_self, &before); unsigned int n = size(); getrusage(rusage_self, &after); // calculate time determine dictionary's size time_size = calculate(&before, &after); // unload dictionary getrusage(rusage_self, &before); bool unloaded = unload(); getrusage(rusage_self, &after); // abort if dictionary not unloaded if (!unloaded) { printf("could not unload %s.\n", dictionary); return 1; } // calculate time unload dictionary time_unload = calculate(&before, &after); // report benchmarks printf("\nwords misspelled: %d\n", misspellings); printf("words in dictionary: %d\n", n); printf("words in text: %d\n", words); printf("time in load: %.2f\n", time_load); printf("time in check: %.2f\n", time_check); printf("time in size: %.2f\n", time_size); printf("time in unload: %.2f\n", time_unload); printf("time in total: %.2f\n\n", time_load + time_check + time_size + time_unload); // that's folks return 0; } /** * returns number of seconds between b , a. */ double calculate(const struct rusage* b, const struct rusage* a) { if (b == null || == null) { return 0.0; } else { return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) - (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) + ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) - (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec))) / 1000000.0); } } // prototype of build trie function , unload int build_trie(int letter, node* root, file* dict); bool unloader(node* head); // declare global variables node* root; int total_words = 0; /** * returns true if word in dictionary else false. */ bool check(const char* word) { // iniitialize pointer trav point same root node* trav = root; // iterate on word see if letters "open" paths in trie int = 0; while (word[i] != '\0') { // check if letter if (isalpha(word[i])) { if (trav->letters[tolower(word[i]) -97] == null) { return false; } else { trav = trav->letters[tolower(word[i]) -97]; i++; } } else if (word[i] == '\'') { if (trav->letters[26] == null) { return false; } else { trav = trav->letters[26]; i++; } } else { return false; } } // @ end check if word if (word[i] == '\0') return (trav->is_word); else return false; } /** * loads dictionary memory. returns true if successful else false. */ bool load(const char* dictionary) { // open file read dictionary file* dict = fopen(dictionary, "r"); if (dict == null) { printf("could not open %s.\n", dictionary); return 1; } // create root root = (node*)malloc(sizeof(node)); // initialize pointers in root null (int = 0; < 27; i++) { root->letters[i] = null; } // initialize trav node* trav = root; // initialize temp variable store new word int letter; // while loop read through dictionary while ((letter = fgetc(dict)) != eof) { build_trie(letter, trav, dict); } if (letter == eof) return true; else return false; } // build trie function // used within load function recursively int build_trie(int letter, node* trav, file* dict) { // set place value of letter - letters array place "open" int place = letter; // set base case recursive function -> end of string "\n" if (letter == '\n') { trav->is_word = true; total_words++; (int = 0; < 27; i++) { trav->letters[i] = null; } return 0; } // recursive part else { // determine in letters arrays go if (isalpha(letter)) { place = letter - 97; } else if (letter == 44) { place = 26; } // check see if new node exists // if not - create new node , recurse new letter , pointer if (trav->letters[place] == null) { trav->letters[place] = (node*)malloc(sizeof(node)); // initialize new nodes pointers null (int = 0; < 27; i++) { trav->letters[place]->letters[i] = null; } letter = fgetc(dict); return(build_trie(letter, trav->letters[place], dict)); } // if exist - new letter - , recurse pointer node else { letter = fgetc(dict); return build_trie(letter, trav->letters[place], dict); } } return 1; } /** * returns number of words in dictionary if loaded else 0 if not yet loaded. */ unsigned int size(void) { return total_words; } /** * unloads dictionary memory. returns true if successful else false. */ bool unload(void) { node* head = root; return unloader(head); } /** * recursive function free trie takes in "head" of trie */ // use recursion free nodes bool unloader(node* head) { // check base case - null pointers -> free int total = 0; (int = 0; < 27; i++) { if (head->letters[i] == null) { total++; } } // base case 27 pointers == null if (total == 27) { free(head); head = null; return true; } else { (int = 0; < 27; i++) { if (head->letters[i] != null) { unloader(head->letters[i]); } } } return false; }
when looking @ code, there incoherence between build_trie()
during dictionary load , check()
function search read word in dictionary.
in build_trie()
function, letters[26]
used ascii code 44 (magic number ?):
if (isalpha(letter)) { place = letter - 97; } else if (letter == 44) // magic number = ',' { place = 26; }
in check()
function, letters[26]
used '
(apostrophe).
else if (word[i] == '\'') // apostrophe character = ascii 39 { if (trav->letters[26] == null) { return false; } else { trav = trav->letters[26]; i++; } }
in build_trie()
function, when place
index calculated, there following unexpected behavior:
if (isalpha(letter)) // both uppercase/lowercase available { place = letter - 97; // magic number 97 = ascii 'a' } else if (letter == 44) //'\'') { place = 26; } if (trav->letters[place] == null)
using magic number 97 instead of 'a', occur negative
place
index ifletter
uppercase. , before accessingletters[place]
, index not checked.if
letter
not isalpha() nor 44 , nor\n
,place
index has been assignletter
value , 8bits value.
before using potential negative place
index, can add exit code.
if ((place < 0) || (place > 26)) { // exit when unexpected letter loaded printf("unexepected \"%c\"(%d) in dictionary.\n",(char)letter,letter); return (-1); } if (trav->letters[place] == null)
in unloader()
function, return condition not depending on recursive return conditions , true
leave-node (with is_word = true
). alternate function be:
bool unloader(node* head) { // return false when node hasn't allocated if (head == null) return (false); bool bres = true; // default answer true for(int i=0;i<27;i++) { // call recursive unloader() when allocated if (head->letters[i]!=null) { // check if allocated node freed bres = (bres && unloader(head->letters[i])); } } // free leave-node free(head); head = null; return (bres); }
Comments
Post a Comment