/********************************************************************** * recog_word_recursive * * Convert the word to tess form and pass it to the tess segmenter. * Convert the output back to editor form. **********************************************************************/ void Tesseract::recog_word_recursive(WERD_RES *word) { int word_length = word->chopped_word->NumBlobs(); // no of blobs if (word_length > MAX_UNDIVIDED_LENGTH) { return split_and_recog_word(word); } cc_recog(word); word_length = word->rebuild_word->NumBlobs(); // No of blobs in output. // Do sanity checks and minor fixes on best_choice. if (word->best_choice->length() > word_length) { word->best_choice->make_bad(); // should never happen tprintf("recog_word: Discarded long string \"%s\"" " (%d characters vs %d blobs)\n", word->best_choice->unichar_string().string(), word->best_choice->length(), word_length); tprintf("Word is at:"); word->word->bounding_box().print(); } if (word->best_choice->length() < word_length) { UNICHAR_ID space_id = unicharset.unichar_to_id(" "); while (word->best_choice->length() < word_length) { word->best_choice->append_unichar_id(space_id, 1, 0.0, word->best_choice->certainty()); } } }
WERD_CHOICE *recog_word_recursive( //recog one owrd WERD *word, //word to do DENORM *denorm, //de-normaliser POLY_MATCHER matcher, //matcher function POLY_TESTER tester, //tester function POLY_TESTER trainer, //trainer function BOOL8 testing, //true if answer driven WERD_CHOICE *&raw_choice, //raw result //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ) { inT32 initial_blob_choice_len; inT32 word_length; //no of blobs STRING word_string; //converted from tess STRING word_string_lengths; ARRAY tess_ratings; //tess results A_CHOICE tess_choice; //best word A_CHOICE tess_raw; //raw result TWERD *tessword; //tess format BLOB_CHOICE_LIST *choice_list; //fake list //iterator BLOB_CHOICE_LIST_C_IT choice_it; tess_matcher = matcher; //install matcher tess_tester = testing ? tester : NULL; tess_trainer = testing ? trainer : NULL; tess_denorm = denorm; tess_word = word; // blob_matchers[1]=call_matcher; if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) { return split_and_recog_word (word, denorm, matcher, tester, trainer, testing, raw_choice, blob_choices, outword); } else { if (word->flag (W_EOL)) last_word_on_line = TRUE; else last_word_on_line = FALSE; initial_blob_choice_len = blob_choices->length (); tessword = make_tess_word (word, NULL); tess_ratings = cc_recog (tessword, &tess_choice, &tess_raw, testing && tester != NULL /* ? call_tester : NULL */ , testing && trainer != NULL /* ? call_train_tester : NULL */ ); //convert word outword = make_ed_word (tessword, word); if (outword == NULL) { outword = word->poly_copy (denorm->row ()->x_height ()); } delete_word(tessword); //get rid of it //no of blobs word_length = outword->blob_list ()->length (); //convert all ratings convert_choice_lists(tess_ratings, blob_choices); //copy string word_string = tess_raw.string; word_string_lengths = tess_raw.lengths; while (word_string_lengths.length () < word_length) { word_string += " "; //pad with blanks word_string_lengths += 1; } raw_choice = new WERD_CHOICE (word_string.string (), word_string_lengths.string (), tess_raw.rating, tess_raw.certainty, tess_raw.permuter); word_string = tess_choice.string; word_string_lengths = tess_choice.lengths; if (word_string_lengths.length () > word_length) { tprintf ("recog_word: Discarded long string \"%s\"" " (%d characters vs %d blobs)\n", word_string.string (), word_string_lengths.length(), word_length); word_string = NULL; //should never happen word_string_lengths = NULL; tprintf("Word is at (%g,%g)\n", denorm->origin(), denorm->y(word->bounding_box().bottom(), 0.0)); } if (blob_choices->length () - initial_blob_choice_len != word_length) { word_string = NULL; //force rejection word_string_lengths = NULL; tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n", blob_choices->length (), word_length); //list of lists choice_it.set_to_list (blob_choices); while (blob_choices->length () - initial_blob_choice_len < word_length) { //get fake one choice_list = new BLOB_CHOICE_LIST; //add to list choice_it.add_to_end (choice_list); tprintf ("recog_word: Added dummy choice list\n"); } while (blob_choices->length () - initial_blob_choice_len > word_length) { choice_it.move_to_last (); //should never happen delete choice_it.extract (); tprintf ("recog_word: Deleted choice list\n"); } } while (word_string_lengths.length () < word_length) { word_string += " "; //pad with blanks word_string_lengths += 1; } assert (raw_choice != NULL); if (tess_choice.string) { strfree(tess_choice.string); strfree(tess_choice.lengths); } if (tess_raw.string) { strfree(tess_raw.string); strfree(tess_raw.lengths); } return new WERD_CHOICE (word_string.string (), word_string_lengths.string (), tess_choice.rating, tess_choice.certainty, tess_choice.permuter); } }