Ejemplo n.º 1
0
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm,
                                     ROW *row,
                                     BLOCK* block) {
  inT16 best_score;
  WERD_RES_LIST current_perm;
  inT16 current_score;
  BOOL8 improved = FALSE;

  best_score = eval_word_spacing(best_perm);  // default score
  dump_words(best_perm, best_score, 1, improved);

  if (best_score != PERFECT_WERDS)
    initialise_search(best_perm, current_perm);

  while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
    match_current_words(current_perm, row, block);
    current_score = eval_word_spacing(current_perm);
    dump_words(current_perm, current_score, 2, improved);
    if (current_score > best_score) {
      best_perm.clear();
      best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
      best_score = current_score;
      improved = TRUE;
    }
    if (current_score < PERFECT_WERDS)
      transform_to_next_perm(current_perm);
  }
  dump_words(best_perm, best_score, 3, improved);
}
Ejemplo n.º 2
0
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row) {
  inT16 best_score;
  WERD_RES_IT best_perm_it(&best_perm);
  WERD_RES_LIST current_perm;
  WERD_RES_IT current_perm_it(&current_perm);
  WERD_RES *old_word_res;
  WERD_RES *new_word_res;
  inT16 current_score;
  BOOL8 improved = FALSE;

                                 //default score
  best_score = fp_eval_word_spacing (best_perm);

  dump_words (best_perm, best_score, 1, improved);

  new_word_res = new WERD_RES;
  old_word_res = best_perm_it.data ();
                                 //Kludge to force deep copy
  old_word_res->combination = TRUE;
  *new_word_res = *old_word_res; //deep copy
                                 //Undo kludge
  old_word_res->combination = FALSE;
                                 //Undo kludge
  new_word_res->combination = FALSE;
  current_perm_it.add_to_end (new_word_res);

  break_noisiest_blob_word(current_perm);

  while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) {
    match_current_words(current_perm, row);
    current_score = fp_eval_word_spacing (current_perm);
    dump_words (current_perm, current_score, 2, improved);
    if (current_score > best_score) {
      best_perm.clear();
      best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
      best_score = current_score;
      improved = TRUE;
    }
    if (current_score < PERFECT_WERDS)
      break_noisiest_blob_word(current_perm);
  }
  dump_words (best_perm, best_score, 3, improved);
}
Ejemplo n.º 3
0
/**
 * break_noisiest_blob_word()
 * Find the word with the blob which looks like the worst noise.
 * Break the word into two, deleting the noise blob.
 */
void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
  WERD_RES_IT word_it(&words);
  WERD_RES_IT worst_word_it;
  float worst_noise_score = 9999;
  int worst_blob_index = -1;     // Noisiest blob of noisiest wd
  int blob_index;                // of wds noisiest blob
  float noise_score;             // of wds noisiest blob
  WERD_RES *word_res;
  C_BLOB_IT blob_it;
  C_BLOB_IT rej_cblob_it;
  C_BLOB_LIST new_blob_list;
  C_BLOB_IT new_blob_it;
  C_BLOB_IT new_rej_cblob_it;
  WERD *new_word;
  inT16 start_of_noise_blob;
  inT16 i;

  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    blob_index = worst_noise_blob(word_it.data(), &noise_score);
    if (blob_index > -1 && worst_noise_score > noise_score) {
      worst_noise_score = noise_score;
      worst_blob_index = blob_index;
      worst_word_it = word_it;
    }
  }
  if (worst_blob_index < 0) {
    words.clear();          // signal termination
    return;
  }

  /* Now split the worst_word_it */

  word_res = worst_word_it.data();

  /* Move blobs before noise blob to a new bloblist */

  new_blob_it.set_to_list(&new_blob_list);
  blob_it.set_to_list(word_res->word->cblob_list());
  for (i = 0; i < worst_blob_index; i++, blob_it.forward()) {
    new_blob_it.add_after_then_move(blob_it.extract());
  }
  start_of_noise_blob = blob_it.data()->bounding_box().left();
  delete blob_it.extract();     // throw out noise blob

  new_word = new WERD(&new_blob_list, word_res->word);
  new_word->set_flag(W_EOL, FALSE);
  word_res->word->set_flag(W_BOL, FALSE);
  word_res->word->set_blanks(1);  // After break

  new_rej_cblob_it.set_to_list(new_word->rej_cblob_list());
  rej_cblob_it.set_to_list(word_res->word->rej_cblob_list());
  for (;
       (!rej_cblob_it.empty() &&
        (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
       rej_cblob_it.forward()) {
    new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
  }

  WERD_RES* new_word_res = new WERD_RES(new_word);
  new_word_res->combination = TRUE;
  worst_word_it.add_before_then_move(new_word_res);

  word_res->ClearResults();
}
Ejemplo n.º 4
0
/**
 * @name transform_to_next_perm()
 * Examines the current word list to find the smallest word gap size. Then walks
 * the word list closing any gaps of this size by either inserted new
 * combination words, or extending existing ones.
 *
 * The routine COULD be limited to stop it building words longer than N blobs.
 *
 * If there are no more gaps then it DELETES the entire list and returns the
 * empty list to cause termination.
 */
void transform_to_next_perm(WERD_RES_LIST &words) {
  WERD_RES_IT word_it(&words);
  WERD_RES_IT prev_word_it(&words);
  WERD_RES *word;
  WERD_RES *prev_word;
  WERD_RES *combo;
  WERD *copy_word;
  inT16 prev_right = -MAX_INT16;
  TBOX box;
  inT16 gap;
  inT16 min_gap = MAX_INT16;

  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word = word_it.data();
    if (!word->part_of_combo) {
      box = word->word->bounding_box();
      if (prev_right > -MAX_INT16) {
        gap = box.left() - prev_right;
        if (gap < min_gap)
          min_gap = gap;
      }
      prev_right = box.right();
    }
  }
  if (min_gap < MAX_INT16) {
    prev_right = -MAX_INT16;        // back to start
    word_it.set_to_list(&words);
    // Note: we can't use cycle_pt due to inserted combos at start of list.
    for (; (prev_right == -MAX_INT16) || !word_it.at_first();
         word_it.forward()) {
      word = word_it.data();
      if (!word->part_of_combo) {
        box = word->word->bounding_box();
        if (prev_right > -MAX_INT16) {
          gap = box.left() - prev_right;
          if (gap <= min_gap) {
            prev_word = prev_word_it.data();
            if (prev_word->combination) {
              combo = prev_word;
            } else {
              /* Make a new combination and insert before
               * the first word being joined. */
              copy_word = new WERD;
              *copy_word = *(prev_word->word);
              // deep copy
              combo = new WERD_RES(copy_word);
              combo->combination = TRUE;
              combo->x_height = prev_word->x_height;
              prev_word->part_of_combo = TRUE;
              prev_word_it.add_before_then_move(combo);
            }
            combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
            if (word->combination) {
              combo->word->join_on(word->word);
              // Move blobs to combo
              // old combo no longer needed
              delete word_it.extract();
            } else {
              // Copy current wd to combo
              combo->copy_on(word);
              word->part_of_combo = TRUE;
            }
            combo->done = FALSE;
            combo->ClearResults();
          } else {
            prev_word_it = word_it;  // catch up
          }
        }
        prev_right = box.right();
      }
    }
  } else {
    words.clear();  // signal termination
  }
}