bool Dawg::match_words(WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const { EDGE_REF edge; inT32 word_end; if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) { bool any_matched = false; NodeChildVector vec; this->unichar_ids_of(node, &vec); for (int i = 0; i < vec.size(); ++i) { word->set_unichar_id(vec[i].unichar_id, index); if (match_words(word, index, node, wildcard)) any_matched = true; } word->set_unichar_id(wildcard, index); return any_matched; } else { word_end = index == word->length() - 1; edge = edge_char_of(node, word->unichar_id(index), word_end); if (edge != NO_EDGE) { // normal edge in DAWG node = next_node(edge); if (word_end) { if (debug_level_ > 1) word->print("match_words() found: "); return true; } else if (node != 0) { return match_words(word, index+1, node, wildcard); } } } return false; }
/* Returns the basename of $fname if calls to it are to be reported. */ static char const *report_dso(char const *fname) { static int report_all = 0; static struct word_st const *flist = NULL; static int is_whitelist; char const *base; /* Read the environment if we haven't. */ if (!report_all && !flist) { char const *env; if ((env=getenv("TRACY_INLIBS")) && (flist=mkwords(env))) is_whitelist = 1; else if ((env=getenv("TRACY_EXLIBS")) && (flist=mkwords(env))) is_whitelist = 0; else report_all = 1; } /* Match against $flist if we have one. */ if (!report_all) { if ((base = match_words(flist, fname)) != NULL) return is_whitelist ? base : NULL; else if (is_whitelist) return NULL; } /* Return the basename of $fname. */ return (!(base = strrchr(fname, '/'))) ? fname : base + 1; } /* report_dso */
int Dawg::check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const { if (filename == nullptr) return 0; FILE *word_file; char string [CHARS_PER_LINE]; int misses = 0; UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard); word_file = fopen(filename, "r"); if (word_file == nullptr) { tprintf("Error: Could not open file %s\n", filename); ASSERT_HOST(word_file); } while (fgets (string, CHARS_PER_LINE, word_file) != nullptr) { chomp_string(string); // remove newline WERD_CHOICE word(string, unicharset); if (word.length() > 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) { if (!match_words(&word, 0, 0, enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) { tprintf("Missing word: %s\n", string); ++misses; } } else { tprintf("Failed to create a valid word from %s\n", string); } } fclose (word_file); // Make sure the user sees this with fprintf instead of tprintf. if (debug_level_) tprintf("Number of lost words=%d\n", misses); return misses; }
int search(const word_t *pre_words, const char **pat_words, int pat_count, int **documents) { // find common words word_t **words; int num_of_words = match_words(pre_words, pat_words, pat_count, &words); // find priorities of these words double *priorities = NULL; get_priorities((const word_t **)words, num_of_words, &priorities); // get used documents int num_of_doc = get_documents((const word_t **)words, num_of_words, documents); int *measures = (int *)malloc(sizeof(int) * num_of_doc); int *step_words = NULL; // i.e. initializing with 0s int *step_info_position = (int *)calloc(sizeof(int), num_of_words); int cur_doc_num = -1; int cur_doc = -1; int num_step_words = 0; while((num_step_words = get_step_words((const word_t **)words, num_of_words, cur_doc, step_info_position, &step_words)) != -1) { int measure = get_measure((const word_t **)words, step_info_position, priorities, step_words, num_step_words); ++cur_doc_num; cur_doc = (*documents)[cur_doc_num]; for(int i = 0; i < num_of_doc; i++) if(cur_doc == (*documents)[i]) { measures[i] = measure; break; } if(cur_doc_num == num_of_doc - 1) break; } for(int i = 0; i < num_of_doc - 1; i++) { int max = 0; for(int j = 1; j < num_of_doc - i; j++) { if(measures[j] < measures[max]) { max = j; } } int temp = measures[num_of_doc - i - 1]; measures[num_of_doc - i - 1] = measures[max]; measures[max] = temp; temp = (*documents)[num_of_doc - i - 1]; (*documents)[num_of_doc - i - 1] = (*documents)[max]; (*documents)[max] = temp; } free(step_info_position); free(priorities); free(step_words); for(int i = 0; i < num_of_doc; i++) printf("%d ", measures[i]); printf("\n"); free (measures); free(words); return num_of_doc; }