/********************************************************************** * chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. **********************************************************************/ CHOICES_LIST chop_word_main(register TWERD *word, int fx, A_CHOICE *best_choice, A_CHOICE *raw_choice, BOOL8 tester, BOOL8 trainer) { TBLOB *pblob; TBLOB *blob; CHOICES_LIST char_choices; int index; int did_chopping; float rating_limit = 1000.0; STATE state; SEAMS seam_list = NULL; CHOICES match_result; MATRIX ratings = NULL; DANGERR fixpt; /*dangerous ambig */ INT32 state_count; //no of states INT32 bit_count; //no of bits static STATE best_state; static STATE chop_states[64]; //in between states state_count = 0; set_null_choice(best_choice); set_null_choice(raw_choice); char_choices = new_choice_list (); did_chopping = 0; for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL; blob = blob->next, index++) { match_result = (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx, "chop_word:", Green, &chop_states[0], &best_state, matcher_pass, index); char_choices = array_push (char_choices, match_result); pblob = blob; } bit_count = index - 1; permute_characters(char_choices, rating_limit, best_choice, raw_choice); set_n_ones (&state, array_count (char_choices) - 1); if (matcher_fp != NULL) { if (matcher_pass == 0) { bits_in_states = bit_count; chop_states[state_count] = state; } state_count++; } if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt) || (tester || trainer) && strcmp (word->correct, class_string (best_choice))) { did_chopping = 1; if (first_pass) words_chopped1++; else words_chopped2++; seam_list = start_seam_list (word->blobs); if (chop_enable) improve_by_chopping(word, &char_choices, fx, &state, best_choice, raw_choice, &seam_list, &fixpt, chop_states, &state_count, &best_state, matcher_pass); if (chop_debug) print_seams ("Final seam list:", seam_list); if (enable_assoc && !AcceptableChoice (char_choices, best_choice, raw_choice, NULL) || (tester || trainer) && strcmp (word->correct, class_string (best_choice))) { ratings = word_associator (word->blobs, seam_list, &state, fx, best_choice, raw_choice, word->correct, /*0, */ &fixpt, &best_state, matcher_pass); } bits_in_states = bit_count + state_count - 1; } if (ratings != NULL) free_matrix(ratings); if (did_chopping || tester || trainer) char_choices = rebuild_current_state (word->blobs, seam_list, &state, char_choices, fx); if (seam_list != NULL) free_seam_list(seam_list); if (matcher_fp != NULL) { best_state = state; } FilterWordChoices(); return char_choices; }
/** * @name chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. */ BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(WERD_RES *word) { TBLOB *blob; int index; int did_chopping; STATE state; BLOB_CHOICE_LIST *match_result; MATRIX *ratings = NULL; DANGERR fixpt; /*dangerous ambig */ inT32 bit_count; //no of bits set_denorm(&word->denorm); BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR(); BLOB_CHOICE_LIST_VECTOR *best_char_choices = new BLOB_CHOICE_LIST_VECTOR(); did_chopping = 0; for (blob = word->chopped_word->blobs, index = 0; blob != NULL; blob = blob->next, index++) { match_result = classify_blob(blob, "chop_word:", Green); if (match_result == NULL) cprintf("Null classifier output!\n"); *char_choices += match_result; } bit_count = index - 1; set_n_ones(&state, char_choices->length() - 1); bool acceptable = false; bool replaced = false; bool best_choice_updated = getDict().permute_characters(*char_choices, word->best_choice, word->raw_choice); if (best_choice_updated && getDict().AcceptableChoice(char_choices, word->best_choice, &fixpt, CHOPPER_CALLER, &replaced)) { acceptable = true; } if (replaced) update_blob_classifications(word->chopped_word, *char_choices); CopyCharChoices(*char_choices, best_char_choices); if (!acceptable) { // do more work to find a better choice did_chopping = 1; bool best_choice_acceptable = false; if (chop_enable) improve_by_chopping(word, char_choices, &state, best_char_choices, &fixpt, &best_choice_acceptable); if (chop_debug) print_seams ("Final seam list:", word->seam_array); // The force_word_assoc is almost redundant to enable_assoc. However, // it is not conditioned on the dict behavior. For CJK, we need to force // the associator to be invoked. When we figure out the exact behavior // of dict on CJK, we can remove the flag if it turns out to be redundant. if ((wordrec_enable_assoc && !best_choice_acceptable) || force_word_assoc) { ratings = word_associator(word, &state, best_char_choices, &fixpt, &state); } } best_char_choices = rebuild_current_state(word, &state, best_char_choices, ratings); if (ratings != NULL) { if (wordrec_debug_level > 0) { tprintf("Final Ratings Matrix:\n"); ratings->print(getDict().getUnicharset()); } ratings->delete_matrix_pointers(); delete ratings; } getDict().FilterWordChoices(); char_choices->delete_data_pointers(); delete char_choices; return best_char_choices; }