/** * @name chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. */ BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(WERD_RES *word) { TBLOB *blob; int index; int did_chopping; STATE state; BLOB_CHOICE_LIST *match_result; MATRIX *ratings = NULL; DANGERR fixpt; /*dangerous ambig */ inT32 bit_count; //no of bits set_denorm(&word->denorm); BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR(); BLOB_CHOICE_LIST_VECTOR *best_char_choices = new BLOB_CHOICE_LIST_VECTOR(); did_chopping = 0; for (blob = word->chopped_word->blobs, index = 0; blob != NULL; blob = blob->next, index++) { match_result = classify_blob(blob, "chop_word:", Green); if (match_result == NULL) cprintf("Null classifier output!\n"); *char_choices += match_result; } bit_count = index - 1; set_n_ones(&state, char_choices->length() - 1); bool acceptable = false; bool replaced = false; bool best_choice_updated = getDict().permute_characters(*char_choices, word->best_choice, word->raw_choice); if (best_choice_updated && getDict().AcceptableChoice(char_choices, word->best_choice, &fixpt, CHOPPER_CALLER, &replaced)) { acceptable = true; } if (replaced) update_blob_classifications(word->chopped_word, *char_choices); CopyCharChoices(*char_choices, best_char_choices); if (!acceptable) { // do more work to find a better choice did_chopping = 1; bool best_choice_acceptable = false; if (chop_enable) improve_by_chopping(word, char_choices, &state, best_char_choices, &fixpt, &best_choice_acceptable); if (chop_debug) print_seams ("Final seam list:", word->seam_array); // The force_word_assoc is almost redundant to enable_assoc. However, // it is not conditioned on the dict behavior. For CJK, we need to force // the associator to be invoked. When we figure out the exact behavior // of dict on CJK, we can remove the flag if it turns out to be redundant. if ((wordrec_enable_assoc && !best_choice_acceptable) || force_word_assoc) { ratings = word_associator(word, &state, best_char_choices, &fixpt, &state); } } best_char_choices = rebuild_current_state(word, &state, best_char_choices, ratings); if (ratings != NULL) { if (wordrec_debug_level > 0) { tprintf("Final Ratings Matrix:\n"); ratings->print(getDict().getUnicharset()); } ratings->delete_matrix_pointers(); delete ratings; } getDict().FilterWordChoices(); char_choices->delete_data_pointers(); delete char_choices; return best_char_choices; }
/********************************************************************** * chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. **********************************************************************/ CHOICES_LIST chop_word_main(register TWERD *word, int fx, A_CHOICE *best_choice, A_CHOICE *raw_choice, BOOL8 tester, BOOL8 trainer) { TBLOB *pblob; TBLOB *blob; CHOICES_LIST char_choices; int index; int did_chopping; float rating_limit = 1000.0; STATE state; SEAMS seam_list = NULL; CHOICES match_result; MATRIX ratings = NULL; DANGERR fixpt; /*dangerous ambig */ INT32 state_count; //no of states INT32 bit_count; //no of bits static STATE best_state; static STATE chop_states[64]; //in between states state_count = 0; set_null_choice(best_choice); set_null_choice(raw_choice); char_choices = new_choice_list (); did_chopping = 0; for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL; blob = blob->next, index++) { match_result = (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx, "chop_word:", Green, &chop_states[0], &best_state, matcher_pass, index); char_choices = array_push (char_choices, match_result); pblob = blob; } bit_count = index - 1; permute_characters(char_choices, rating_limit, best_choice, raw_choice); set_n_ones (&state, array_count (char_choices) - 1); if (matcher_fp != NULL) { if (matcher_pass == 0) { bits_in_states = bit_count; chop_states[state_count] = state; } state_count++; } if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt) || (tester || trainer) && strcmp (word->correct, class_string (best_choice))) { did_chopping = 1; if (first_pass) words_chopped1++; else words_chopped2++; seam_list = start_seam_list (word->blobs); if (chop_enable) improve_by_chopping(word, &char_choices, fx, &state, best_choice, raw_choice, &seam_list, &fixpt, chop_states, &state_count, &best_state, matcher_pass); if (chop_debug) print_seams ("Final seam list:", seam_list); if (enable_assoc && !AcceptableChoice (char_choices, best_choice, raw_choice, NULL) || (tester || trainer) && strcmp (word->correct, class_string (best_choice))) { ratings = word_associator (word->blobs, seam_list, &state, fx, best_choice, raw_choice, word->correct, /*0, */ &fixpt, &best_state, matcher_pass); } bits_in_states = bit_count + state_count - 1; } if (ratings != NULL) free_matrix(ratings); if (did_chopping || tester || trainer) char_choices = rebuild_current_state (word->blobs, seam_list, &state, char_choices, fx); if (seam_list != NULL) free_seam_list(seam_list); if (matcher_fp != NULL) { best_state = state; } FilterWordChoices(); return char_choices; }
void Wordrec::SegSearch(WERD_RES* word_res, BestChoiceBundle* best_choice_bundle, BlamerBundle* blamer_bundle) { LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio, assume_fixed_pitch_char_segment, &getDict(), segsearch_debug_level); // Compute scaling factor that will help us recover blob outline length // from classifier rating and certainty for the blob. float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale; GenericVector<SegSearchPending> pending; InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle, blamer_bundle); if (!SegSearchDone(0)) { // find a better choice if (chop_enable && word_res->chopped_word != NULL) { improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, blamer_bundle, &pain_points, &pending); } if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array); if (blamer_bundle != NULL && !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) { blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer); } } // Keep trying to find a better path by fixing the "pain points". MATRIX_COORD pain_point; float pain_point_priority; int num_futile_classifications = 0; STRING blamer_debug; while (wordrec_enable_assoc && (!SegSearchDone(num_futile_classifications) || (blamer_bundle != NULL && blamer_bundle->GuidedSegsearchStillGoing()))) { // Get the next valid "pain point". bool found_nothing = true; LMPainPointsType pp_type; while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) != LM_PPTYPE_NUM) { if (!pain_point.Valid(*word_res->ratings)) { word_res->ratings->IncreaseBandSize( pain_point.row - pain_point.col + 1); } if (pain_point.Valid(*word_res->ratings) && !word_res->ratings->Classified(pain_point.col, pain_point.row, getDict().WildcardID())) { found_nothing = false; break; } } if (found_nothing) { if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n"); break; } ProcessSegSearchPainPoint(pain_point_priority, pain_point, LMPainPoints::PainPointDescription(pp_type), &pending, word_res, &pain_points, blamer_bundle); UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, word_res, &pain_points, best_choice_bundle, blamer_bundle); if (!best_choice_bundle->updated) ++num_futile_classifications; if (segsearch_debug_level > 0) { tprintf("num_futile_classifications %d\n", num_futile_classifications); } best_choice_bundle->updated = false; // reset updated // See if it's time to terminate SegSearch or time for starting a guided // search for the true path to find the blame for the incorrect best_choice. if (SegSearchDone(num_futile_classifications) && blamer_bundle != NULL && blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) { InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, &blamer_debug); } } // end while loop exploring alternative paths if (blamer_bundle != NULL) { blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, &blamer_debug); } if (segsearch_debug_level > 0) { tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n", language_model_->AcceptableChoiceFound()); } }