BOOL8 word_adaptable(  //should we adapt?
                     WERD_RES *word,
                     uinT16 mode) {
  BOOL8 status = FALSE;
  BITS16 flags(mode);

  enum MODES
  {
    ADAPTABLE_WERD,
    ACCEPTABLE_WERD,
    CHECK_DAWGS,
    CHECK_SPACES,
    CHECK_ONE_ELL_CONFLICT,
    CHECK_AMBIG_WERD
  };

  /*
  0: NO adaption
  */
  if (mode == 0) {
    return FALSE;
  }

  if (flags.bit (ADAPTABLE_WERD))
    status |= word->tess_would_adapt;

  if (flags.bit (ACCEPTABLE_WERD))
    status |= word->tess_accepted;

  if (!status)                   // If not set then
    return FALSE;                // ignore other checks

  if (flags.bit (CHECK_DAWGS) &&
    (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
    (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
    (word->best_choice->permuter () != USER_DAWG_PERM) &&
    (word->best_choice->permuter () != NUMBER_PERM))
    return FALSE;

  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE))
    return FALSE;

  if (flags.bit (CHECK_SPACES) &&
    (strchr (word->best_choice->string ().string (), ' ') != NULL))
    return FALSE;

//  if (flags.bit (CHECK_AMBIG_WERD) && test_ambig_word (word))
  if (flags.bit (CHECK_AMBIG_WERD) &&
      !NoDangerousAmbig(word->best_choice->string().string(),
                        word->best_choice->lengths().string(),
                        NULL))
    return FALSE;

  return status;

}
示例#2
0
void Tesseract::set_done(WERD_RES *word, inT16 pass) {
  word->done = word->tess_accepted &&
      (strchr(word->best_choice->unichar_string().string(), ' ') == NULL);
  bool word_is_ambig = word->best_choice->dangerous_ambig_found();
  bool word_from_dict = word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
      word->best_choice->permuter() == FREQ_DAWG_PERM ||
      word->best_choice->permuter() == USER_DAWG_PERM;
  if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
      one_ell_conflict(word, FALSE)) {
    if (tessedit_rejection_debug) tprintf("one_ell_conflict detected\n");
    word->done = FALSE;
  }
  if (word->done && ((!word_from_dict &&
      word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) {
    if (tessedit_rejection_debug) tprintf("non-dict or ambig word detected\n");
      word->done = FALSE;
  }
  if (tessedit_rejection_debug) {
    tprintf("set_done(): done=%d\n", word->done);
    word->best_choice->print("");
  }
}
示例#3
0
BOOL8 Tesseract::word_adaptable(  //should we adapt?
                                WERD_RES *word,
                                uinT16 mode) {
  if (tessedit_adaption_debug) {
    tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
          word->best_choice == NULL ? "" :
          word->best_choice->unichar_string().string(),
          word->best_choice->rating(), word->best_choice->certainty());
  }

  BOOL8 status = FALSE;
  BITS16 flags(mode);

  enum MODES
  {
    ADAPTABLE_WERD,
    ACCEPTABLE_WERD,
    CHECK_DAWGS,
    CHECK_SPACES,
    CHECK_ONE_ELL_CONFLICT,
    CHECK_AMBIG_WERD
  };

  /*
  0: NO adaption
  */
  if (mode == 0) {
    if (tessedit_adaption_debug) tprintf("adaption disabled\n");
    return FALSE;
  }

  if (flags.bit (ADAPTABLE_WERD)) {
    status |= word->tess_would_adapt;  // result of Classify::AdaptableWord()
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_would_adapt bit is false\n");
    }
  }

  if (flags.bit (ACCEPTABLE_WERD)) {
    status |= word->tess_accepted;
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_accepted bit is false\n");
    }
  }

  if (!status) {                  // If not set then
    return FALSE;                // ignore other checks
  }

  if (flags.bit (CHECK_DAWGS) &&
    (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
    (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
    (word->best_choice->permuter () != USER_DAWG_PERM) &&
    (word->best_choice->permuter () != NUMBER_PERM)) {
    if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
    return FALSE;
  }

  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) {
    if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
    return FALSE;
  }

  if (flags.bit (CHECK_SPACES) &&
    (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
    if (tessedit_adaption_debug) tprintf("word contains spaces\n");
    return FALSE;
  }

  if (flags.bit (CHECK_AMBIG_WERD) &&
      word->best_choice->dangerous_ambig_found()) {
    if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
    return FALSE;
  }

  if (tessedit_adaption_debug) {
    tprintf("returning status %d\n", status);
  }
  return status;
}
示例#4
0
BOOL8 Tesseract::word_adaptable(  //should we adapt?
                                WERD_RES *word,
                                uinT16 mode) {
  if (tessedit_adaption_debug) {
    tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
          word->best_choice == NULL ? "" :
          word->best_choice->unichar_string().string(),
          word->best_choice->rating(), word->best_choice->certainty());
  }

  BOOL8 status = FALSE;
  BITS16 flags(mode);

  enum MODES
  {
    ADAPTABLE_WERD,
    ACCEPTABLE_WERD,
    CHECK_DAWGS,
    CHECK_SPACES,
    CHECK_ONE_ELL_CONFLICT,
    CHECK_AMBIG_WERD
  };

  /*
  0: NO adaption
  */
  if (mode == 0) {
    if (tessedit_adaption_debug) tprintf("adaption disabled\n");
    return FALSE;
  }

  if (flags.bit (ADAPTABLE_WERD)) {
    status |= word->tess_would_adapt;  // result of Classify::AdaptableWord()
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_would_adapt bit is false\n");
    }
  }

  if (flags.bit (ACCEPTABLE_WERD)) {
    status |= word->tess_accepted;
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_accepted bit is false\n");
    }
  }

  if (!status) {                  // If not set then
    return FALSE;                // ignore other checks
  }

  if (flags.bit (CHECK_DAWGS) &&
    (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
    (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
    (word->best_choice->permuter () != USER_DAWG_PERM) &&
    (word->best_choice->permuter () != NUMBER_PERM)) {
    if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
    return FALSE;
  }

  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) {
    if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
    return FALSE;
  }

  if (flags.bit (CHECK_SPACES) &&
    (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
    if (tessedit_adaption_debug) tprintf("word contains spaces\n");
    return FALSE;
  }

//  if (flags.bit (CHECK_AMBIG_WERD) && test_ambig_word (word))
  if (flags.bit (CHECK_AMBIG_WERD) &&
      !getDict().NoDangerousAmbig(word->best_choice, NULL, false, NULL, NULL)) {
    if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
    return FALSE;
  }

  // Do not adapt to words that are composed from fragments if
  // tessedit_adapt_to_char_fragments is false.
  if (!tessedit_adapt_to_char_fragments) {
    const char *fragment_lengths = word->best_choice->fragment_lengths();
    if (fragment_lengths != NULL && *fragment_lengths != '\0') {
      for (int i = 0; i < word->best_choice->length(); ++i) {
        if (fragment_lengths[i] > 1) {
          if (tessedit_adaption_debug) tprintf("won't adapt to fragments\n");
          return false;  // found a character composed from fragments
        }
      }
    }
  }

  if (tessedit_adaption_debug) {
    tprintf("returning status %d\n", status);
  }
  return status;
}
示例#5
0
/*************************************************************************
 * make_reject_map()
 *
 * Sets the done flag to indicate whether the resylt is acceptable.
 *
 * Sets a reject map for the word.
 *************************************************************************/
void Tesseract::make_reject_map(WERD_RES *word, ROW *row, inT16 pass) {
  int i;
  int offset;

  flip_0O(word);
  check_debug_pt(word, -1);     // For trap only
  set_done(word, pass);  // Set acceptance
  word->reject_map.initialise(word->best_choice->unichar_lengths().length());
  reject_blanks(word);
  /*
  0: Rays original heuristic - the baseline
  */
  if (tessedit_reject_mode == 0) {
    if (!word->done)
      reject_poor_matches(word);
  } else if (tessedit_reject_mode == 5) {
    /*
    5: Reject I/1/l from words where there is no strong contextual confirmation;
      the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls);
      and the whole of any words which are very small
    */
    if (kBlnXHeight / word->denorm.y_scale() <= min_sane_x_ht_pixels) {
      word->reject_map.rej_word_small_xht();
    } else {
      one_ell_conflict(word, TRUE);
      /*
        Originally the code here just used the done flag. Now I have duplicated
        and unpacked the conditions for setting the done flag so that each
        mechanism can be turned on or off independently. This works WITHOUT
        affecting the done flag setting.
      */
      if (rej_use_tess_accepted && !word->tess_accepted)
        word->reject_map.rej_word_not_tess_accepted ();

      if (rej_use_tess_blanks &&
        (strchr (word->best_choice->unichar_string().string (), ' ') != NULL))
        word->reject_map.rej_word_contains_blanks ();

      WERD_CHOICE* best_choice = word->best_choice;
      if (rej_use_good_perm) {
        if ((best_choice->permuter() == SYSTEM_DAWG_PERM ||
             best_choice->permuter() == FREQ_DAWG_PERM ||
             best_choice->permuter() == USER_DAWG_PERM) &&
            (!rej_use_sensible_wd ||
             acceptable_word_string(*word->uch_set,
                                    best_choice->unichar_string().string(),
                                    best_choice->unichar_lengths().string()) !=
                                        AC_UNACCEPTABLE)) {
          // PASSED TEST
        } else if (best_choice->permuter() == NUMBER_PERM) {
          if (rej_alphas_in_number_perm) {
            for (i = 0, offset = 0;
                 best_choice->unichar_string()[offset] != '\0';
                 offset += best_choice->unichar_lengths()[i++]) {
              if (word->reject_map[i].accepted() &&
                  word->uch_set->get_isalpha(
                      best_choice->unichar_string().string() + offset,
                      best_choice->unichar_lengths()[i]))
                word->reject_map[i].setrej_bad_permuter();
              // rej alpha
            }
          }
        } else {
          word->reject_map.rej_word_bad_permuter();
        }
      }
      /* Ambig word rejection was here once !!*/
    }
  } else {
    tprintf("BAD tessedit_reject_mode\n");
    err_exit();
  }

  if (tessedit_image_border > -1)
    reject_edge_blobs(word);

  check_debug_pt (word, 10);
  if (tessedit_rejection_debug) {
    tprintf("Permuter Type = %d\n", word->best_choice->permuter ());
    tprintf("Certainty: %f     Rating: %f\n",
      word->best_choice->certainty (), word->best_choice->rating ());
    tprintf("Dict word: %d\n", dict_word(*(word->best_choice)));
  }

  flip_hyphens(word);
  check_debug_pt(word, 20);
}