// Runs classify_word_pass1() on the current word. Outputs Tesseract's
// raw choice as a result of the classification. For words labeled with a
// single unichar also outputs all alternatives from blob_choices of the
// best choice.
void Tesseract::ambigs_classify_and_output(const char *label,
                                           PAGE_RES_IT* pr_it,
                                           FILE *output_file) {
  // Classify word.
  fflush(stdout);
  WordData word_data(*pr_it);
  SetupWordPassN(1, &word_data);
  classify_word_and_language(1, pr_it, &word_data);
  WERD_RES* werd_res = word_data.word;
  WERD_CHOICE *best_choice = werd_res->best_choice;
  ASSERT_HOST(best_choice != NULL);

  // Compute the number of unichars in the label.
  GenericVector<UNICHAR_ID> encoding;
  if (!unicharset.encode_string(label, true, &encoding, NULL, NULL)) {
    tprintf("Not outputting illegal unichar %s\n", label);
    return;
  }

  // Dump all paths through the ratings matrix (which is normally small).
  int dim = werd_res->ratings->dimension();
  const BLOB_CHOICE** blob_choices = new const BLOB_CHOICE*[dim];
  PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices,
                   unicharset, label, output_file);
  delete [] blob_choices;
}
Beispiel #2
0
void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
                                    BLOCK* block) {
  WERD_RES_IT word_it(&words);
  WERD_RES *word;
  // Since we are not using PAGE_RES to iterate over words, we need to update
  // prev_word_best_choice_ before calling classify_word_pass2().
  prev_word_best_choice_ = NULL;
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word = word_it.data();
    if ((!word->part_of_combo) && (word->box_word == NULL)) {
      classify_word_and_language(&Tesseract::classify_word_pass2,
                                 block, row, word);
    }
    prev_word_best_choice_ = word->best_choice;
  }
}