static int inTrieTranspose(TrieNode* node, char* word, char* suggestion, int maxEdits) { int result = FALSE; /*TrieNode* nextNode = node->children[CH_INDEX(word[1])]; if(IS_ASCII((int)word[1]) && nextNode != NULL) { TrieNode* nextNextNode = nextNode->children[CH_INDEX(word[0])]; if(nextNextNode != NULL) { suggestion[0] = word[1]; suggestion[1] = word[0]; result = inTrie(nextNextNode, word + 2, suggestion + 2, maxEdits); } }*/ if(IS_ASCII((int)word[1])) { TrieNode* nextNode = node->children[CH_INDEX(word[1])]; if(nextNode != NULL) { TrieNode* nextNextNode = nextNode->children[CH_INDEX(word[0])]; if(nextNextNode != NULL) { suggestion[0] = word[1]; suggestion[1] = word[0]; result = inTrie(nextNextNode, word + 2, suggestion + 2, maxEdits); } } } return result; }
bool check(const char *word) { char lower[LENGTH]; int i; for (i = 0; i < strlen(word); ++i) if(word[i] >= 'A' && word[i] <= 'Z') lower[i] = (char) word[i] + 32; else lower[i] = (char) word[i]; lower[i] = '\0'; return inTrie(lower); }
static int inTrieDeleteModify(TrieNode* node, char* word, char* suggestion, int maxEdits) { int result = FALSE; char ch = FIRST_ASCII_CH; while(!result && ch <= LAST_ASCII_CH) { if(node->children[CH_INDEX(ch)] != NULL) { *suggestion = ch; /* Modification */ result = inTrie(node->children[CH_INDEX(ch)], word + 1, suggestion + 1, maxEdits); if(!result) { /* Deletion */ result = inTrie(node->children[CH_INDEX(ch)], word, suggestion + 1, maxEdits); } } ch++; } return result; }
/** * Checks the spelling of a list of words against a list of dictionary words. * Any misspellings are reported to a callback function, along with a suggested * correction. The suggested correction is chosen to minimise the "edit * difference" between it and the original word. If no dictionary word is * within the maximum allowable distance from the original word, the callback * receives NULL instead of a suggestion. * * The callback function returns either TRUE or FALSE, indicating whether the * suggested correction should be applied. If TRUE, the memory allocated to the * original word is realloc'd to make space for the corrected word, which is * then copied into it. * * Parameters: * text - an array of words to spell check (each word must be * dynamically allocated); * textLength - the number of words to spell check; * dict - an array of words to use as the dictionary; * dictLength - the number of dictionary words; * maxDifference - the maximum difference between misspelt words and their * suggested corrections; * action' - a pointer to a function that will be called for each * misspelt word. */ void check(char* text[], int textLength, char* dict[], int dictLength, int maxDifference, ActionFunc action) { /* Construct a trie to represent the set of all the dictionary words. */ TrieNode* trie = buildTrie(dict, dictLength); char* suggestion = NULL; int maxLen = 0; int t; for(t = 0; t < textLength; t++) { int len = strlen(text[t]); if(maxLen < len) { /* Maintain a buffer to store, temporarily, the correction for each * word. Increase the size of the buffer as needed. */ maxLen = len; suggestion = (char*)realloc(suggestion, sizeof(char) * (maxDifference + maxLen + 1)); } /* Check whether the word, or an edited version thereof, is in the trie. */ if(!inTrie(trie, text[t], suggestion, maxDifference)) { /* Misspelling, with no suggestion available. */ (*action)(text[t], NULL); } else if(strcmp(text[t], suggestion) != 0) { /* Misspelling, with suggested correction */ if((*action)(text[t], suggestion)) { text[t] = (char*)realloc(text[t], (strlen(suggestion) + 1) * sizeof(char)); strcpy(text[t], suggestion); } } } free(suggestion); freeTrie(trie); }
/** * Checks whether a trie contains a word, or a slightly modified version of a * word. * * Parameters: * 'root' points to the root of the trie. * 'word' is the word to find. * 'suggestion' is the word actually found. This may be exactly equal to * 'word', or a slightly edited version of it. * 'maxEdits' is the maximum difference between 'word' and 'suggestion', in * edits, where an edit is a deletion, replacement, or insertion of a * character, or the transposition of two adjacent characters. * * The function returns TRUE if the word, or a slightly edited version of it, * was found, and FALSE otherwise. If FALSE is returned, the contents of * 'suggestion' are undefined. */ static int inTrie(TrieNode* root, char* word, char* suggestion, int maxEdits) { int result = FALSE; if(IS_ASCII(*word)) { if(root->children[CH_INDEX(*word)] != NULL) { *suggestion = *word; /* The current character matches. Recurse to check the next character. */ result = inTrie(root->children[CH_INDEX(*word)], word + 1, suggestion + 1, maxEdits); } /* We haven't found a match, so finding an edited version. */ if(!result && maxEdits > 0) { maxEdits--; /* This relies on short-circuit evaluation, trying an insert, then * a transposition, then a deletion or replacement, stopping at the * first edit that works. */ result = inTrieInsert(root, word, suggestion, maxEdits) || inTrieTranspose(root, word, suggestion, maxEdits) || inTrieDeleteModify(root, word, suggestion, maxEdits); } } else if(*word == '\0' && root->isWord) { /* Success! We've found a complete (though possibly edited) match. */ *suggestion = '\0'; result = TRUE; } return result; }
static int inTrieInsert(TrieNode* node, char* word, char* suggestion, int maxEdits) { return inTrie(node, word + 1, suggestion, maxEdits); }