コード例 #1
0
ファイル: fixspace.cpp プロジェクト: ErfanHasmin/scope-ocr
/**
 * @name fix_sp_fp_word()
 * Test the current word to see if it can be split by deleting noise blobs. If
 * so, do the business.
 * Return with the iterator pointing to the same place if the word is unchanged,
 * or the last of the replacement words.
 */
void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
                               BLOCK* block) {
  WERD_RES *word_res;
  WERD_RES_LIST sub_word_list;
  WERD_RES_IT sub_word_list_it(&sub_word_list);
  inT16 blob_index;
  inT16 new_length;
  float junk;

  word_res = word_res_it.data();
  if (word_res->word->flag(W_REP_CHAR) ||
      word_res->combination ||
      word_res->part_of_combo ||
      !word_res->word->flag(W_DONT_CHOP))
    return;

  blob_index = worst_noise_blob(word_res, &junk);
  if (blob_index < 0)
    return;

  if (debug_fix_space_level > 1) {
    tprintf("FP fixspace working on \"%s\"\n",
            word_res->best_choice->unichar_string().string());
  }
  word_res->word->rej_cblob_list()->sort(c_blob_comparator);
  sub_word_list_it.add_after_stay_put(word_res_it.extract());
  fix_noisy_space_list(sub_word_list, row, block);
  new_length = sub_word_list.length();
  word_res_it.add_list_before(&sub_word_list);
  for (; !word_res_it.at_last() && new_length > 1; new_length--) {
    word_res_it.forward();
  }
}
コード例 #2
0
/*************************************************************************
 * fix_sp_fp_word()
 * Test the current word to see if it can be split by deleting noise blobs. If
 * so, do the buisiness.
 * Return with the iterator pointing to the same place if the word is unchanged,
 * or the last of the replacement words.
 *************************************************************************/
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row) {
  WERD_RES *word_res;
  WERD_RES_LIST sub_word_list;
  WERD_RES_IT sub_word_list_it(&sub_word_list);
  inT16 blob_index;
  inT16 new_length;
  float junk;

  word_res = word_res_it.data ();
  if (!fixsp_check_for_fp_noise_space ||
    word_res->word->flag (W_REP_CHAR) ||
    word_res->combination ||
    word_res->part_of_combo || !word_res->word->flag (W_DONT_CHOP))
    return;

  blob_index = worst_noise_blob (word_res, &junk);
  if (blob_index < 0)
    return;

  #ifndef SECURE_NAMES
  if (debug_fix_space_level > 1) {
    tprintf ("FP fixspace working on \"%s\"\n",
      word_res->best_choice->string ().string ());
  }
  #endif
  gblob_sort_list ((PBLOB_LIST *) word_res->word->rej_cblob_list (), FALSE);
  sub_word_list_it.add_after_stay_put (word_res_it.extract ());
  fix_noisy_space_list(sub_word_list, row);
  new_length = sub_word_list.length ();
  word_res_it.add_list_before (&sub_word_list);
  for (; (!word_res_it.at_last () && (new_length > 1)); new_length--) {
    word_res_it.forward ();
  }
}
コード例 #3
0
ファイル: fixspace.cpp プロジェクト: ErfanHasmin/scope-ocr
/**
 * break_noisiest_blob_word()
 * Find the word with the blob which looks like the worst noise.
 * Break the word into two, deleting the noise blob.
 */
void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
  WERD_RES_IT word_it(&words);
  WERD_RES_IT worst_word_it;
  float worst_noise_score = 9999;
  int worst_blob_index = -1;     // Noisiest blob of noisiest wd
  int blob_index;                // of wds noisiest blob
  float noise_score;             // of wds noisiest blob
  WERD_RES *word_res;
  C_BLOB_IT blob_it;
  C_BLOB_IT rej_cblob_it;
  C_BLOB_LIST new_blob_list;
  C_BLOB_IT new_blob_it;
  C_BLOB_IT new_rej_cblob_it;
  WERD *new_word;
  inT16 start_of_noise_blob;
  inT16 i;

  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    blob_index = worst_noise_blob(word_it.data(), &noise_score);
    if (blob_index > -1 && worst_noise_score > noise_score) {
      worst_noise_score = noise_score;
      worst_blob_index = blob_index;
      worst_word_it = word_it;
    }
  }
  if (worst_blob_index < 0) {
    words.clear();          // signal termination
    return;
  }

  /* Now split the worst_word_it */

  word_res = worst_word_it.data();

  /* Move blobs before noise blob to a new bloblist */

  new_blob_it.set_to_list(&new_blob_list);
  blob_it.set_to_list(word_res->word->cblob_list());
  for (i = 0; i < worst_blob_index; i++, blob_it.forward()) {
    new_blob_it.add_after_then_move(blob_it.extract());
  }
  start_of_noise_blob = blob_it.data()->bounding_box().left();
  delete blob_it.extract();     // throw out noise blob

  new_word = new WERD(&new_blob_list, word_res->word);
  new_word->set_flag(W_EOL, FALSE);
  word_res->word->set_flag(W_BOL, FALSE);
  word_res->word->set_blanks(1);  // After break

  new_rej_cblob_it.set_to_list(new_word->rej_cblob_list());
  rej_cblob_it.set_to_list(word_res->word->rej_cblob_list());
  for (;
       (!rej_cblob_it.empty() &&
        (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
       rej_cblob_it.forward()) {
    new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
  }

  WERD_RES* new_word_res = new WERD_RES(new_word);
  new_word_res->combination = TRUE;
  worst_word_it.add_before_then_move(new_word_res);

  word_res->ClearResults();
}