void LMPainPoints::GenerateInitial(WERD_RES *word_res) { MATRIX *ratings = word_res->ratings; AssociateStats associate_stats; for (int col = 0; col < ratings->dimension(); ++col) { int row_end = MIN(ratings->dimension(), col + ratings->bandwidth() + 1); for (int row = col + 1; row < row_end; ++row) { MATRIX_COORD coord(col, row); if (coord.Valid(*ratings) && ratings->get(col, row) != NOT_CLASSIFIED) continue; // Add an initial pain point if needed. if (ratings->Classified(col, row - 1, dict_->WildcardID()) || (col + 1 < ratings->dimension() && ratings->Classified(col + 1, row, dict_->WildcardID()))) { GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0, true, max_char_wh_ratio_, word_res); } } } }
// Helper recursively prints all paths through the ratings matrix, starting // at column col. static void PrintMatrixPaths(int col, int dim, const MATRIX& ratings, int length, const BLOB_CHOICE** blob_choices, const UNICHARSET& unicharset, const char *label, FILE *output_file) { for (int row = col; row < dim && row - col < ratings.bandwidth(); ++row) { if (ratings.get(col, row) != NOT_CLASSIFIED) { BLOB_CHOICE_IT bc_it(ratings.get(col, row)); for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { blob_choices[length] = bc_it.data(); if (row + 1 < dim) { PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices, unicharset, label, output_file); } else { PrintPath(length + 1, blob_choices, unicharset, label, output_file); } } } } }
void Wordrec::UpdateSegSearchNodes( float rating_cert_scale, int starting_col, GenericVector<SegSearchPending>* pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) { MATRIX *ratings = word_res->ratings; ASSERT_HOST(ratings->dimension() == pending->size()); ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size()); for (int col = starting_col; col < ratings->dimension(); ++col) { if (!(*pending)[col].WorkToDo()) continue; int first_row = col; int last_row = MIN(ratings->dimension() - 1, col + ratings->bandwidth() - 1); if ((*pending)[col].SingleRow() >= 0) { first_row = last_row = (*pending)[col].SingleRow(); } if (segsearch_debug_level > 0) { tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n", col, first_row, last_row, (*pending)[col].IsRowJustClassified(INT32_MAX)); } // Iterate over the pending list for this column. for (int row = first_row; row <= last_row; ++row) { // Update language model state of this child+parent pair. BLOB_CHOICE_LIST *current_node = ratings->get(col, row); LanguageModelState *parent_node = col == 0 ? NULL : best_choice_bundle->beam[col - 1]; if (current_node != NULL && language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), col, row, current_node, parent_node, pain_points, word_res, best_choice_bundle, blamer_bundle) && row + 1 < ratings->dimension()) { // Since the language model state of this entry changed, process all // the child column. (*pending)[row + 1].RevisitWholeColumn(); if (segsearch_debug_level > 0) { tprintf("Added child col=%d to pending\n", row + 1); } } // end if UpdateState. } // end for row. } // end for col. if (best_choice_bundle->best_vse != NULL) { ASSERT_HOST(word_res->StatesAllValid()); if (best_choice_bundle->best_vse->updated) { pain_points->GenerateFromPath(rating_cert_scale, best_choice_bundle->best_vse, word_res); if (!best_choice_bundle->fixpt.empty()) { pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt, best_choice_bundle->best_vse, word_res); } } } // The segsearch is completed. Reset all updated flags on all VSEs and reset // all pendings. for (int col = 0; col < pending->size(); ++col) { (*pending)[col].Clear(); ViterbiStateEntry_IT vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries); for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) { vse_it.data()->updated = false; } } }