예제 #1
0
/**********************************************************************
 * recog_word_recursive
 *
 * Convert the word to tess form and pass it to the tess segmenter.
 * Convert the output back to editor form.
 **********************************************************************/
void Tesseract::recog_word_recursive(WERD_RES *word) {
  int word_length = word->chopped_word->NumBlobs();  // no of blobs
  if (word_length > MAX_UNDIVIDED_LENGTH) {
    return split_and_recog_word(word);
  }
  cc_recog(word);
  word_length = word->rebuild_word->NumBlobs();  // No of blobs in output.

  // Do sanity checks and minor fixes on best_choice.
  if (word->best_choice->length() > word_length) {
    word->best_choice->make_bad();  // should never happen
    tprintf("recog_word: Discarded long string \"%s\""
            " (%d characters vs %d blobs)\n",
            word->best_choice->unichar_string().string(),
            word->best_choice->length(), word_length);
    tprintf("Word is at:");
    word->word->bounding_box().print();
  }
  if (word->best_choice->length() < word_length) {
    UNICHAR_ID space_id = unicharset.unichar_to_id(" ");
    while (word->best_choice->length() < word_length) {
      word->best_choice->append_unichar_id(space_id, 1, 0.0,
                                           word->best_choice->certainty());
    }
  }
}
예제 #2
0
파일: tfacepp.cpp 프로젝트: chanchai/botker
WERD_CHOICE *recog_word_recursive(                           //recog one owrd
                                  WERD *word,                //word to do
                                  DENORM *denorm,            //de-normaliser
                                  POLY_MATCHER matcher,      //matcher function
                                  POLY_TESTER tester,        //tester function
                                  POLY_TESTER trainer,       //trainer function
                                  BOOL8 testing,             //true if answer driven
                                  WERD_CHOICE *&raw_choice,  //raw result //list of blob lists
                                  BLOB_CHOICE_LIST_CLIST *blob_choices,
                                  WERD *&outword             //bln word output
                                 ) {
  inT32 initial_blob_choice_len;
  inT32 word_length;             //no of blobs
  STRING word_string;            //converted from tess
  STRING word_string_lengths;
  ARRAY tess_ratings;            //tess results
  A_CHOICE tess_choice;          //best word
  A_CHOICE tess_raw;             //raw result
  TWERD *tessword;               //tess format
  BLOB_CHOICE_LIST *choice_list; //fake list
                                 //iterator
  BLOB_CHOICE_LIST_C_IT choice_it;

  tess_matcher = matcher;        //install matcher
  tess_tester = testing ? tester : NULL;
  tess_trainer = testing ? trainer : NULL;
  tess_denorm = denorm;
  tess_word = word;
  //      blob_matchers[1]=call_matcher;
  if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) {
    return split_and_recog_word (word, denorm, matcher, tester, trainer,
      testing, raw_choice, blob_choices,
      outword);
  }
  else {
    if (word->flag (W_EOL))
      last_word_on_line = TRUE;
    else
      last_word_on_line = FALSE;
    initial_blob_choice_len = blob_choices->length ();
    tessword = make_tess_word (word, NULL);
    tess_ratings = cc_recog (tessword, &tess_choice, &tess_raw,
      testing
      && tester != NULL /* ? call_tester : NULL */ ,
      testing
      && trainer !=
      NULL /* ? call_train_tester : NULL */ );
                                 //convert word
    outword = make_ed_word (tessword, word);
    if (outword == NULL) {
      outword = word->poly_copy (denorm->row ()->x_height ());
    }
    delete_word(tessword);  //get rid of it
                                 //no of blobs
    word_length = outword->blob_list ()->length ();
                                 //convert all ratings
    convert_choice_lists(tess_ratings, blob_choices);
                                 //copy string
    word_string = tess_raw.string;
    word_string_lengths = tess_raw.lengths;
    while (word_string_lengths.length () < word_length) {
      word_string += " ";        //pad with blanks
      word_string_lengths += 1;
    }
    raw_choice = new WERD_CHOICE (word_string.string (),
                                  word_string_lengths.string (),
                                  tess_raw.rating, tess_raw.certainty,
                                  tess_raw.permuter);
    word_string = tess_choice.string;
    word_string_lengths = tess_choice.lengths;
    if (word_string_lengths.length () > word_length) {
      tprintf ("recog_word: Discarded long string \"%s\""
               " (%d characters vs %d blobs)\n",
        word_string.string (), word_string_lengths.length(), word_length);
      word_string = NULL;        //should never happen
      word_string_lengths = NULL;
      tprintf("Word is at (%g,%g)\n",
              denorm->origin(),
              denorm->y(word->bounding_box().bottom(), 0.0));
    }
    if (blob_choices->length () - initial_blob_choice_len != word_length) {
      word_string = NULL;        //force rejection
      word_string_lengths = NULL;
      tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n",
        blob_choices->length (), word_length);
                                 //list of lists
      choice_it.set_to_list (blob_choices);
      while (blob_choices->length () - initial_blob_choice_len <
      word_length) {
                                 //get fake one
        choice_list = new BLOB_CHOICE_LIST;
                                 //add to list
        choice_it.add_to_end (choice_list);
        tprintf ("recog_word: Added dummy choice list\n");
      }
      while (blob_choices->length () - initial_blob_choice_len >
      word_length) {
        choice_it.move_to_last ();
                                 //should never happen
        delete choice_it.extract ();
        tprintf ("recog_word: Deleted choice list\n");
      }
    }
    while (word_string_lengths.length () < word_length) {
      word_string += " ";        //pad with blanks
      word_string_lengths += 1;
    }

    assert (raw_choice != NULL);
    if (tess_choice.string) {
      strfree(tess_choice.string);
      strfree(tess_choice.lengths);
    }
    if (tess_raw.string) {
      strfree(tess_raw.string);
      strfree(tess_raw.lengths);
    }
    return new WERD_CHOICE (word_string.string (),
                            word_string_lengths.string (),
                            tess_choice.rating, tess_choice.certainty,
                            tess_choice.permuter);
  }
}