/** * BLOB_CHOICE::BLOB_CHOICE * * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. */ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); config_ = other.config(); script_id_ = other.script_id(); }
// Creates a fake blob choice from the combination of the given fragments. // unichar is the class to be made from the combination, // expanded_fragment_lengths[choice_index] is the number of fragments to use. // old_choices[choice_index] has the classifier output for each fragment. // choice index initially indexes the last fragment and should be decremented // expanded_fragment_lengths[choice_index] times to get the earlier fragments. // Guarantees to return something non-null, or abort! BLOB_CHOICE* Wordrec::rebuild_fragments( const char* unichar, const char* expanded_fragment_lengths, int choice_index, BLOB_CHOICE_LIST_VECTOR *old_choices) { float rating = 0.0f; float certainty = 0.0f; inT16 min_xheight = -MAX_INT16; inT16 max_xheight = MAX_INT16; for (int fragment_pieces = expanded_fragment_lengths[choice_index] - 1; fragment_pieces >= 0; --fragment_pieces, --choice_index) { // Get a pointer to the classifier results from the old_choices. BLOB_CHOICE_LIST *current_choices = old_choices->get(choice_index); // Populate fragment with updated values and look for the // fragment with the same values in current_choices. // Update rating and certainty of the character being composed. CHAR_FRAGMENT fragment; fragment.set_all(unichar, fragment_pieces, expanded_fragment_lengths[choice_index], false); BLOB_CHOICE_IT choice_it(current_choices); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { BLOB_CHOICE* choice = choice_it.data(); const CHAR_FRAGMENT *current_fragment = getDict().getUnicharset().get_fragment(choice->unichar_id()); if (current_fragment && fragment.equals(current_fragment)) { rating += choice->rating(); if (choice->certainty() < certainty) { certainty = choice->certainty(); } IntersectRange(choice->min_xheight(), choice->max_xheight(), &min_xheight, &max_xheight); break; } } if (choice_it.cycled_list()) { print_ratings_list("Failure", current_choices, unicharset); tprintf("Failed to find fragment %s at index=%d\n", fragment.to_string().string(), choice_index); } ASSERT_HOST(!choice_it.cycled_list()); // Be sure we found the fragment. } return new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar), rating, certainty, -1, -1, 0, min_xheight, max_xheight, false); }
/** * BLOB_CHOICE::BLOB_CHOICE * * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. */ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); config_ = other.config(); config2_ = other.config2(); script_id_ = other.script_id(); language_model_state_ = NULL; }
/** * BLOB_CHOICE::BLOB_CHOICE * * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. */ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); fontinfo_id_ = other.fontinfo_id(); fontinfo_id2_ = other.fontinfo_id2(); script_id_ = other.script_id(); language_model_state_ = NULL; min_xheight_ = other.min_xheight_; max_xheight_ = other.max_xheight_; adapted_ = other.adapted_; }
/** * BLOB_CHOICE::BLOB_CHOICE * * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. */ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); fontinfo_id_ = other.fontinfo_id(); fontinfo_id2_ = other.fontinfo_id2(); script_id_ = other.script_id(); matrix_cell_ = other.matrix_cell_; min_xheight_ = other.min_xheight_; max_xheight_ = other.max_xheight_; yshift_ = other.yshift(); classifier_ = other.classifier_; }
/** * append_choices * * Checks to see whether or not the next choice is worth appending to * the word being generated. If so then keeps going deeper into the word. * * This function assumes that Dict::go_deeper_fxn_ is set. */ void Dict::append_choices( const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, const BLOB_CHOICE &blob_choice, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *more_args) { int word_ending = (char_choice_index == char_choices.length() - 1) ? true : false; // Deal with fragments. CHAR_FRAGMENT_INFO char_frag_info; if (!fragment_state_okay(blob_choice.unichar_id(), blob_choice.rating(), blob_choice.certainty(), prev_char_frag_info, debug, word_ending, &char_frag_info)) { return; // blob_choice must be an invalid fragment } // Search the next letter if this character is a fragment. if (char_frag_info.unichar_id == INVALID_UNICHAR_ID) { permute_choices(debug, char_choices, char_choice_index + 1, &char_frag_info, word, certainties, limit, best_choice, attempts_left, more_args); return; } // Add the next unichar. float old_rating = word->rating(); float old_certainty = word->certainty(); uint8_t old_permuter = word->permuter(); certainties[word->length()] = char_frag_info.certainty; word->append_unichar_id_space_allocated( char_frag_info.unichar_id, char_frag_info.num_fragments, char_frag_info.rating, char_frag_info.certainty); // Explore the next unichar. (this->*go_deeper_fxn_)(debug, char_choices, char_choice_index, &char_frag_info, word_ending, word, certainties, limit, best_choice, attempts_left, more_args); // Remove the unichar we added to explore other choices in it's place. word->remove_last_unichar_id(); word->set_rating(old_rating); word->set_certainty(old_certainty); word->set_permuter(old_permuter); }
LIST call_matcher( //call a matcher TBLOB *ptblob, //previous TBLOB *tessblob, //blob to match TBLOB *ntblob, //next void *, //unused parameter TEXTROW * //always null anyway ) { PBLOB *pblob; //converted blob PBLOB *blob; //converted blob PBLOB *nblob; //converted blob LIST result; //tess output BLOB_CHOICE *choice; //current choice BLOB_CHOICE_LIST ratings; //matcher result BLOB_CHOICE_IT it; //iterator char choice_lengths[2] = {0, 0}; blob = make_ed_blob (tessblob);//convert blob if (blob == NULL) { // Since it is actually possible to get a NULL blob here, due to invalid // segmentations, fake a really bad classification. choice_lengths[0] = strlen(unicharset.id_to_unichar(1)); return append_choice(NULL, unicharset.id_to_unichar(1), choice_lengths, static_cast<float>(MAX_NUM_INT_FEATURES), static_cast<float>(kReallyBadCertainty), 0); } pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL; nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL; (*tess_matcher) (pblob, blob, nblob, tess_word, tess_denorm, ratings); //match it delete blob; //don't need that now if (pblob != NULL) delete pblob; if (nblob != NULL) delete nblob; it.set_to_list (&ratings); //get list result = NULL; for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { choice = it.data (); choice_lengths[0] = strlen(choice->unichar ()); result = append_choice (result, choice->unichar (), choice_lengths, choice->rating (), choice->certainty (), choice->config ()); } return result; //converted list }
void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings) { int num_blobs_to_replace = 0; int begin_blob_index = 0; int i; // Rating and certainty for the new BLOB_CHOICE are derived from the // replaced choices. float new_rating = 0.0f; float new_certainty = 0.0f; BLOB_CHOICE* old_choice = nullptr; for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) { if (i >= wrong_ngram_begin_index) { int num_blobs = werd_choice->state(i); int col = begin_blob_index + num_blobs_to_replace; int row = col + num_blobs - 1; BLOB_CHOICE_LIST* choices = ratings->get(col, row); ASSERT_HOST(choices != nullptr); old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices); ASSERT_HOST(old_choice != nullptr); new_rating += old_choice->rating(); new_certainty += old_choice->certainty(); num_blobs_to_replace += num_blobs; } else { begin_blob_index += werd_choice->state(i); } } new_certainty /= wrong_ngram_size; // If there is no entry in the ratings matrix, add it. MATRIX_COORD coord(begin_blob_index, begin_blob_index + num_blobs_to_replace - 1); if (!coord.Valid(*ratings)) { ratings->IncreaseBandSize(coord.row - coord.col + 1); } if (ratings->get(coord.col, coord.row) == nullptr) ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST); BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row); BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices); if (choice != nullptr) { // Already there. Upgrade if new rating better. if (new_rating < choice->rating()) choice->set_rating(new_rating); if (new_certainty < choice->certainty()) choice->set_certainty(new_certainty); // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState. } else { // Need a new choice with the correct_ngram_id. choice = new BLOB_CHOICE(*old_choice); choice->set_unichar_id(correct_ngram_id); choice->set_rating(new_rating); choice->set_certainty(new_certainty); choice->set_classifier(BCC_AMBIG); choice->set_matrix_cell(coord.col, coord.row); BLOB_CHOICE_IT it (new_choices); it.add_to_end(choice); } // Remove current unichar from werd_choice. On the last iteration // set the correct replacement unichar instead of removing a unichar. for (int replaced_count = 0; replaced_count < wrong_ngram_size; ++replaced_count) { if (replaced_count + 1 == wrong_ngram_size) { werd_choice->set_blob_choice(wrong_ngram_begin_index, num_blobs_to_replace, choice); } else { werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1); } } if (stopper_debug_level >= 1) { werd_choice->print("ReplaceAmbig() "); tprintf("Modified blob_choices: "); print_ratings_list("\n", new_choices, getUnicharset()); } }