/** * @name chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. The results are returned in the WERD_RES. */ void Wordrec::chop_word_main(WERD_RES *word) { int num_blobs = word->chopped_word->NumBlobs(); if (word->ratings == NULL) { word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); } if (word->ratings->get(0, 0) == NULL) { // Run initial classification. for (int b = 0; b < num_blobs; ++b) { BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, "Initial:", word->chopped_word, word->blamer_bundle); word->ratings->put(b, b, choices); } } else { // Blobs have been pre-classified. Set matrix cell for all blob choices for (int col = 0; col < word->ratings->dimension(); ++col) { for (int row = col; row < word->ratings->dimension() && row < col + word->ratings->bandwidth(); ++row) { BLOB_CHOICE_LIST* choices = word->ratings->get(col, row); if (choices != NULL) { BLOB_CHOICE_IT bc_it(choices); for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { bc_it.data()->set_matrix_cell(col, row); } } } } } // Run Segmentation Search. BestChoiceBundle best_choice_bundle(word->ratings->dimension()); SegSearch(word, &best_choice_bundle, word->blamer_bundle); if (word->best_choice == NULL) { // SegSearch found no valid paths, so just use the leading diagonal. word->FakeWordFromRatings(); } word->RebuildBestState(); // If we finished without a hyphen at the end of the word, let the next word // be found in the dictionary. if (word->word->flag(W_EOL) && !getDict().has_hyphen_end(*word->best_choice)) { getDict().reset_hyphen_vars(true); } if (word->blamer_bundle != NULL && this->fill_lattice_ != NULL) { CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, word->blamer_bundle); } if (wordrec_debug_level > 0) { tprintf("Final Ratings Matrix:\n"); word->ratings->print(getDict().getUnicharset()); } word->FilterWordChoices(getDict().stopper_debug_level); }
MATRIX *Wordrec::word_associator(WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, STATE *best_state) { CHUNKS_RECORD chunks_record; BLOB_WEIGHTS blob_weights; int x; int num_chunks; BLOB_CHOICE_IT blob_choice_it; num_chunks = array_count(word->seam_array) + 1; TBLOB* blobs = word->chopped_word->blobs; chunks_record.chunks = blobs; chunks_record.splits = word->seam_array; chunks_record.ratings = record_piece_ratings (blobs); chunks_record.char_widths = blobs_widths (blobs); chunks_record.chunk_widths = blobs_widths (blobs); /* Save chunk weights */ for (x = 0; x < num_chunks; x++) { BLOB_CHOICE_LIST* choices = get_piece_rating(chunks_record.ratings, blobs, word->seam_array, x, x); blob_choice_it.set_to_list(choices); //This is done by Jetsoft. Divide by zero is possible. if (blob_choice_it.data()->certainty() == 0) { blob_weights[x]=0; } else { blob_weights[x] = -(inT16) (10 * blob_choice_it.data()->rating() / blob_choice_it.data()->certainty()); } } chunks_record.weights = blob_weights; if (chop_debug) chunks_record.ratings->print(getDict().getUnicharset()); if (enable_new_segsearch) { SegSearch(&chunks_record, word->best_choice, best_char_choices, word->raw_choice, state); } else { best_first_search(&chunks_record, best_char_choices, word, state, fixpt, best_state); } free_widths (chunks_record.chunk_widths); free_widths (chunks_record.char_widths); return chunks_record.ratings; }
void Wordrec::DoSegSearch(WERD_RES* word_res) { BestChoiceBundle best_choice_bundle(word_res->ratings->dimension()); // Run Segmentation Search. SegSearch(word_res, &best_choice_bundle, NULL); }