Пример #1
0
bool Dawg::match_words(WERD_CHOICE *word, inT32 index,
                       NODE_REF node, UNICHAR_ID wildcard) const {
  EDGE_REF edge;
  inT32 word_end;

  if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
    bool any_matched = false;
    NodeChildVector vec;
    this->unichar_ids_of(node, &vec);
    for (int i = 0; i < vec.size(); ++i) {
      word->set_unichar_id(vec[i].unichar_id, index);
      if (match_words(word, index, node, wildcard))
        any_matched = true;
    }
    word->set_unichar_id(wildcard, index);
    return any_matched;
  } else {
    word_end = index == word->length() - 1;
    edge = edge_char_of(node, word->unichar_id(index), word_end);
    if (edge != NO_EDGE) {  // normal edge in DAWG
      node = next_node(edge);
      if (word_end) {
        if (debug_level_ > 1) word->print("match_words() found: ");
        return true;
      } else if (node != 0) {
        return match_words(word, index+1, node, wildcard);
      }
    }
  }
  return false;
}
Пример #2
0
/* Returns the basename of $fname if calls to it are to be reported. */
static char const *report_dso(char const *fname)
{
	static int report_all = 0;
	static struct word_st const *flist = NULL;
	static int is_whitelist;
	char const *base;

	/* Read the environment if we haven't. */
	if (!report_all && !flist)
	{
		char const *env;

		if      ((env=getenv("TRACY_INLIBS")) && (flist=mkwords(env)))
			is_whitelist = 1;
		else if ((env=getenv("TRACY_EXLIBS")) && (flist=mkwords(env)))
			is_whitelist = 0;
		else
			report_all = 1;
	}

	/* Match against $flist if we have one. */
	if (!report_all)
	{
		if ((base = match_words(flist, fname)) != NULL)
			return is_whitelist ? base : NULL;
		else if (is_whitelist)
			return NULL;
	}

	/* Return the basename of $fname. */
	return (!(base = strrchr(fname, '/'))) ? fname : base + 1;
} /* report_dso */
Пример #3
0
int Dawg::check_for_words(const char *filename,
                          const UNICHARSET &unicharset,
                          bool enable_wildcard) const {
  if (filename == nullptr) return 0;

  FILE       *word_file;
  char       string [CHARS_PER_LINE];
  int misses = 0;
  UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard);

  word_file = fopen(filename, "r");
  if (word_file == nullptr) {
    tprintf("Error: Could not open file %s\n", filename);
    ASSERT_HOST(word_file);
  }

  while (fgets (string, CHARS_PER_LINE, word_file) != nullptr) {
    chomp_string(string);  // remove newline
    WERD_CHOICE word(string, unicharset);
    if (word.length() > 0 &&
        !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
      if (!match_words(&word, 0, 0,
                       enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
        tprintf("Missing word: %s\n", string);
        ++misses;
      }
    } else {
      tprintf("Failed to create a valid word from %s\n", string);
    }
  }
  fclose (word_file);
  // Make sure the user sees this with fprintf instead of tprintf.
  if (debug_level_) tprintf("Number of lost words=%d\n", misses);
  return misses;
}
Пример #4
0
int search(const word_t *pre_words, const char **pat_words, int pat_count, int **documents) {
    // find common words
    word_t **words;
    int num_of_words = match_words(pre_words, pat_words, pat_count, &words);
    // find priorities of these words
    double *priorities = NULL;
    get_priorities((const word_t **)words, num_of_words, &priorities);
    // get used documents
    int num_of_doc = get_documents((const word_t **)words, num_of_words, documents);
    
    
    int *measures = (int *)malloc(sizeof(int) * num_of_doc);
    int *step_words = NULL;
    // i.e. initializing with 0s
    int *step_info_position = (int *)calloc(sizeof(int), num_of_words);
    int cur_doc_num = -1;
    int cur_doc = -1;
    int num_step_words = 0;
    
    while((num_step_words = get_step_words((const word_t **)words, num_of_words,
                                           cur_doc, step_info_position, &step_words)) != -1) {
        int measure = get_measure((const word_t **)words, step_info_position,
                                  priorities, step_words, num_step_words);
        ++cur_doc_num;
        cur_doc = (*documents)[cur_doc_num];
        for(int i = 0; i < num_of_doc; i++)
            if(cur_doc == (*documents)[i]) {
                measures[i] = measure;
                break;
            }
        if(cur_doc_num == num_of_doc - 1)
            break;
    }
    for(int i = 0; i < num_of_doc - 1; i++) {
        int max = 0;
        for(int j = 1; j < num_of_doc - i; j++) {
            if(measures[j] < measures[max]) {
                max = j;
            }
        }
        int temp = measures[num_of_doc - i - 1];
        measures[num_of_doc - i - 1] = measures[max];
        measures[max] = temp;
        temp = (*documents)[num_of_doc - i - 1];
        (*documents)[num_of_doc - i - 1] = (*documents)[max];
        (*documents)[max] = temp;
    }
    free(step_info_position);
    free(priorities);
    free(step_words);
    for(int i = 0; i < num_of_doc; i++)
        printf("%d ", measures[i]);
    printf("\n");
    free (measures);
    free(words);
    return num_of_doc;
}