void Wordrec::ProcessSegSearchPainPoint( float pain_point_priority, const MATRIX_COORD &pain_point, const char* pain_point_type, GenericVector<SegSearchPending>* pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle) { if (segsearch_debug_level > 0) { tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n", pain_point_type, pain_point_priority, pain_point.col, pain_point.row); } ASSERT_HOST(pain_points != NULL); MATRIX *ratings = word_res->ratings; // Classify blob [pain_point.col pain_point.row] if (!pain_point.Valid(*ratings)) { ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col); } ASSERT_HOST(pain_point.Valid(*ratings)); BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array, pain_point.col, pain_point.row, pain_point_type, word_res->chopped_word, blamer_bundle); BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row); if (lst == NULL) { ratings->put(pain_point.col, pain_point.row, classified); } else { // We can not delete old BLOB_CHOICEs, since they might contain // ViterbiStateEntries that are parents of other "active" entries. // Thus if the matrix cell already contains classifications we add // the new ones to the beginning of the list. BLOB_CHOICE_IT it(lst); it.add_list_before(classified); delete classified; // safe to delete, since empty after add_list_before() classified = NULL; } if (segsearch_debug_level > 0) { print_ratings_list("Updated ratings matrix with a new entry:", ratings->get(pain_point.col, pain_point.row), getDict().getUnicharset()); ratings->print(getDict().getUnicharset()); } // Insert initial "pain points" to join the newly classified blob // with its left and right neighbors. if (classified != NULL && !classified->empty()) { if (pain_point.col > 0) { pain_points->GeneratePainPoint( pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0, true, segsearch_max_char_wh_ratio, word_res); } if (pain_point.row + 1 < ratings->dimension()) { pain_points->GeneratePainPoint( pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0, true, segsearch_max_char_wh_ratio, word_res); } } (*pending)[pain_point.col].SetBlobClassified(pain_point.row); }
/** * @name chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. */ BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(WERD_RES *word) { TBLOB *blob; int index; int did_chopping; STATE state; BLOB_CHOICE_LIST *match_result; MATRIX *ratings = NULL; DANGERR fixpt; /*dangerous ambig */ inT32 bit_count; //no of bits set_denorm(&word->denorm); BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR(); BLOB_CHOICE_LIST_VECTOR *best_char_choices = new BLOB_CHOICE_LIST_VECTOR(); did_chopping = 0; for (blob = word->chopped_word->blobs, index = 0; blob != NULL; blob = blob->next, index++) { match_result = classify_blob(blob, "chop_word:", Green); if (match_result == NULL) cprintf("Null classifier output!\n"); *char_choices += match_result; } bit_count = index - 1; set_n_ones(&state, char_choices->length() - 1); bool acceptable = false; bool replaced = false; bool best_choice_updated = getDict().permute_characters(*char_choices, word->best_choice, word->raw_choice); if (best_choice_updated && getDict().AcceptableChoice(char_choices, word->best_choice, &fixpt, CHOPPER_CALLER, &replaced)) { acceptable = true; } if (replaced) update_blob_classifications(word->chopped_word, *char_choices); CopyCharChoices(*char_choices, best_char_choices); if (!acceptable) { // do more work to find a better choice did_chopping = 1; bool best_choice_acceptable = false; if (chop_enable) improve_by_chopping(word, char_choices, &state, best_char_choices, &fixpt, &best_choice_acceptable); if (chop_debug) print_seams ("Final seam list:", word->seam_array); // The force_word_assoc is almost redundant to enable_assoc. However, // it is not conditioned on the dict behavior. For CJK, we need to force // the associator to be invoked. When we figure out the exact behavior // of dict on CJK, we can remove the flag if it turns out to be redundant. if ((wordrec_enable_assoc && !best_choice_acceptable) || force_word_assoc) { ratings = word_associator(word, &state, best_char_choices, &fixpt, &state); } } best_char_choices = rebuild_current_state(word, &state, best_char_choices, ratings); if (ratings != NULL) { if (wordrec_debug_level > 0) { tprintf("Final Ratings Matrix:\n"); ratings->print(getDict().getUnicharset()); } ratings->delete_matrix_pointers(); delete ratings; } getDict().FilterWordChoices(); char_choices->delete_data_pointers(); delete char_choices; return best_char_choices; }