Example #1
0
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm,
                                     ROW *row,
                                     BLOCK* block) {
  inT16 best_score;
  WERD_RES_LIST current_perm;
  inT16 current_score;
  BOOL8 improved = FALSE;

  best_score = eval_word_spacing(best_perm);  // default score
  dump_words(best_perm, best_score, 1, improved);

  if (best_score != PERFECT_WERDS)
    initialise_search(best_perm, current_perm);

  while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
    match_current_words(current_perm, row, block);
    current_score = eval_word_spacing(current_perm);
    dump_words(current_perm, current_score, 2, improved);
    if (current_score > best_score) {
      best_perm.clear();
      best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
      best_score = current_score;
      improved = TRUE;
    }
    if (current_score < PERFECT_WERDS)
      transform_to_next_perm(current_perm);
  }
  dump_words(best_perm, best_score, 3, improved);
}
inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
  WERD_RES_IT word_it(&word_res_list);
  WERD_RES *word;
  PBLOB_IT blob_it;
  inT16 word_length;
  inT16 score = 0;
  inT16 i;
  inT16 offset;
  const char *chs;
  float small_limit = bln_x_height * fixsp_small_outlines_size;

  if (!fixsp_fp_eval)
    return (eval_word_spacing (word_res_list));

  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();
    word_length = word->reject_map.length ();
    chs = word->best_choice->string ().string ();
    if ((word->done ||
      word->tess_accepted) ||
      (word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||
      (word->best_choice->permuter () == FREQ_DAWG_PERM) ||
      (word->best_choice->permuter () == USER_DAWG_PERM) ||
    (safe_dict_word (chs) > 0)) {
      blob_it.set_to_list (word->outword->blob_list ());
      for (i = 0, offset = 0; i < word_length;
           offset += word->best_choice->lengths()[i++], blob_it.forward ()) {
        if ((chs[offset] == ' ') ||
          (blob_noise_score (blob_it.data ()) < small_limit))
          score -= 1;            //penalise possibly erroneous non-space

        else if (word->reject_map[i].accepted ())
          score++;
      }
    }
  }
  if (score < 0)
    score = 0;
  return score;
}