Ejemplo n.º 1
0
/**
 * @name fix_sp_fp_word()
 * Test the current word to see if it can be split by deleting noise blobs. If
 * so, do the business.
 * Return with the iterator pointing to the same place if the word is unchanged,
 * or the last of the replacement words.
 */
void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
                               BLOCK* block) {
  WERD_RES *word_res;
  WERD_RES_LIST sub_word_list;
  WERD_RES_IT sub_word_list_it(&sub_word_list);
  inT16 blob_index;
  inT16 new_length;
  float junk;

  word_res = word_res_it.data();
  if (word_res->word->flag(W_REP_CHAR) ||
      word_res->combination ||
      word_res->part_of_combo ||
      !word_res->word->flag(W_DONT_CHOP))
    return;

  blob_index = worst_noise_blob(word_res, &junk);
  if (blob_index < 0)
    return;

  if (debug_fix_space_level > 1) {
    tprintf("FP fixspace working on \"%s\"\n",
            word_res->best_choice->unichar_string().string());
  }
  word_res->word->rej_cblob_list()->sort(c_blob_comparator);
  sub_word_list_it.add_after_stay_put(word_res_it.extract());
  fix_noisy_space_list(sub_word_list, row, block);
  new_length = sub_word_list.length();
  word_res_it.add_list_before(&sub_word_list);
  for (; !word_res_it.at_last() && new_length > 1; new_length--) {
    word_res_it.forward();
  }
}
Ejemplo n.º 2
0
/*************************************************************************
 * fix_sp_fp_word()
 * Test the current word to see if it can be split by deleting noise blobs. If
 * so, do the buisiness.
 * Return with the iterator pointing to the same place if the word is unchanged,
 * or the last of the replacement words.
 *************************************************************************/
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row) {
  WERD_RES *word_res;
  WERD_RES_LIST sub_word_list;
  WERD_RES_IT sub_word_list_it(&sub_word_list);
  inT16 blob_index;
  inT16 new_length;
  float junk;

  word_res = word_res_it.data ();
  if (!fixsp_check_for_fp_noise_space ||
    word_res->word->flag (W_REP_CHAR) ||
    word_res->combination ||
    word_res->part_of_combo || !word_res->word->flag (W_DONT_CHOP))
    return;

  blob_index = worst_noise_blob (word_res, &junk);
  if (blob_index < 0)
    return;

  #ifndef SECURE_NAMES
  if (debug_fix_space_level > 1) {
    tprintf ("FP fixspace working on \"%s\"\n",
      word_res->best_choice->string ().string ());
  }
  #endif
  gblob_sort_list ((PBLOB_LIST *) word_res->word->rej_cblob_list (), FALSE);
  sub_word_list_it.add_after_stay_put (word_res_it.extract ());
  fix_noisy_space_list(sub_word_list, row);
  new_length = sub_word_list.length ();
  word_res_it.add_list_before (&sub_word_list);
  for (; (!word_res_it.at_last () && (new_length > 1)); new_length--) {
    word_res_it.forward ();
  }
}