// Runs classify_word_pass1() on the current word. Outputs Tesseract's // raw choice as a result of the classification. For words labeled with a // single unichar also outputs all alternatives from blob_choices of the // best choice. void Tesseract::ambigs_classify_and_output(const char *label, PAGE_RES_IT* pr_it, FILE *output_file) { // Classify word. fflush(stdout); WordData word_data(*pr_it); SetupWordPassN(1, &word_data); classify_word_and_language(1, pr_it, &word_data); WERD_RES* werd_res = word_data.word; WERD_CHOICE *best_choice = werd_res->best_choice; ASSERT_HOST(best_choice != NULL); // Compute the number of unichars in the label. GenericVector<UNICHAR_ID> encoding; if (!unicharset.encode_string(label, true, &encoding, NULL, NULL)) { tprintf("Not outputting illegal unichar %s\n", label); return; } // Dump all paths through the ratings matrix (which is normally small). int dim = werd_res->ratings->dimension(); const BLOB_CHOICE** blob_choices = new const BLOB_CHOICE*[dim]; PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices, unicharset, label, output_file); delete [] blob_choices; }
void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block) { WERD_RES_IT word_it(&words); WERD_RES *word; // Since we are not using PAGE_RES to iterate over words, we need to update // prev_word_best_choice_ before calling classify_word_pass2(). prev_word_best_choice_ = NULL; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { word = word_it.data(); if ((!word->part_of_combo) && (word->box_word == NULL)) { classify_word_and_language(&Tesseract::classify_word_pass2, block, row, word); } prev_word_best_choice_ = word->best_choice; } }