Example #1
0
bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
                            XHeightConsistencyEnum xheight_consistency) {
  float CertaintyThreshold = stopper_nondict_certainty_base;
  int WordSize;

  if (stopper_no_acceptable_choices) return false;

  if (best_choice.length() == 0) return false;

  bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
  bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
  bool is_case_ok = case_ok(best_choice, getUnicharset());

  if (stopper_debug_level >= 1) {
    const char *xht = "UNKNOWN";
    switch (xheight_consistency) {
      case XH_GOOD:  xht = "NORMAL"; break;
      case XH_SUBNORMAL:  xht = "SUBNORMAL"; break;
      case XH_INCONSISTENT:  xht = "INCONSISTENT"; break;
      default: xht = "UNKNOWN";
    }
    tprintf("\nStopper:  %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
            best_choice.unichar_string().string(),
            (is_valid_word ? 'y' : 'n'),
            (is_case_ok ? 'y' : 'n'),
            xht,
            best_choice.min_x_height(),
            best_choice.max_x_height());
  }
  // Do not accept invalid words in PASS1.
  if (reject_offset_ <= 0.0f && !is_valid_word) return false;
  if (is_valid_word && is_case_ok) {
    WordSize = LengthOfShortestAlphaRun(best_choice);
    WordSize -= stopper_smallword_size;
    if (WordSize < 0)
      WordSize = 0;
    CertaintyThreshold += WordSize * stopper_certainty_per_char;
  }

  if (stopper_debug_level >= 1)
    tprintf("Stopper:  Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
            best_choice.rating(), best_choice.certainty(), CertaintyThreshold);

  if (no_dang_ambigs &&
      best_choice.certainty() > CertaintyThreshold &&
      xheight_consistency < XH_INCONSISTENT &&
      UniformCertainties(best_choice)) {
    return true;
  } else {
    if (stopper_debug_level >= 1) {
      tprintf("AcceptableChoice() returned false"
              " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
              no_dang_ambigs, best_choice.certainty(),
              CertaintyThreshold,
              UniformCertainties(best_choice));
    }
    return false;
  }
}
Example #2
0
int Dict::UniformCertainties(const WERD_CHOICE& word) {
  float Certainty;
  float WorstCertainty = MAX_FLOAT32;
  float CertaintyThreshold;
  FLOAT64 TotalCertainty;
  FLOAT64 TotalCertaintySquared;
  FLOAT64 Variance;
  FLOAT32 Mean, StdDev;
  int word_length = word.length();

  if (word_length < 3)
    return true;

  TotalCertainty = TotalCertaintySquared = 0.0;
  for (int i = 0; i < word_length; ++i) {
    Certainty = word.certainty(i);
    TotalCertainty += Certainty;
    TotalCertaintySquared += Certainty * Certainty;
    if (Certainty < WorstCertainty)
      WorstCertainty = Certainty;
  }

  // Subtract off worst certainty from statistics.
  word_length--;
  TotalCertainty -= WorstCertainty;
  TotalCertaintySquared -= WorstCertainty * WorstCertainty;

  Mean = TotalCertainty / word_length;
  Variance = ((word_length * TotalCertaintySquared -
    TotalCertainty * TotalCertainty) /
    (word_length * (word_length - 1)));
  if (Variance < 0.0)
    Variance = 0.0;
  StdDev = sqrt(Variance);

  CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
  if (CertaintyThreshold > stopper_nondict_certainty_base)
    CertaintyThreshold = stopper_nondict_certainty_base;

  if (word.certainty() < CertaintyThreshold) {
    if (stopper_debug_level >= 1)
      tprintf("Stopper: Non-uniform certainty = %4.1f"
              " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
              word.certainty(), Mean, StdDev, CertaintyThreshold);
    return false;
  } else {
    return true;
  }
}
Example #3
0
bool Dict::AcceptableResult(const WERD_CHOICE &BestChoice) {
  float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
  int WordSize;

  if (stopper_debug_level >= 1) {
    tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c)\n",
            BestChoice.debug_string(getUnicharset()).string(),
            (valid_word(BestChoice) ? 'y' : 'n'),
            (case_ok(BestChoice, getUnicharset()) ? 'y' : 'n'),
            ((list_rest (best_choices_) != NIL_LIST) ? 'n' : 'y'));
  }

  if (BestChoice.length() == 0 || CurrentWordAmbig())
    return false;
  if (BestChoice.fragment_mark()) {
    if (stopper_debug_level >= 1) {
      cprintf("AcceptableResult(): a choice with fragments beats BestChoice\n");
    }
    return false;
  }
  if (valid_word(BestChoice) && case_ok(BestChoice, getUnicharset())) {
    WordSize = LengthOfShortestAlphaRun(BestChoice);
    WordSize -= stopper_smallword_size;
    if (WordSize < 0)
      WordSize = 0;
    CertaintyThreshold += WordSize * stopper_certainty_per_char;
  }

  if (stopper_debug_level >= 1)
    cprintf ("Rejecter: Certainty = %4.1f, Threshold = %4.1f   ",
      BestChoice.certainty(), CertaintyThreshold);

  if (BestChoice.certainty() > CertaintyThreshold &&
      !stopper_no_acceptable_choices) {
    if (stopper_debug_level >= 1)
      cprintf("ACCEPTED\n");
    return true;
  }
  else {
    if (stopper_debug_level >= 1)
      cprintf("REJECTED\n");
    return false;
  }
}
Example #4
0
// Returns an array of all word confidences, terminated by -1.
int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) {
  if (!page_res) return NULL;
  int n_word = 0;
  PAGE_RES_IT res_it(page_res);
  for (res_it.restart_page(); res_it.word () != NULL; res_it.forward())
    n_word++;

  int* conf = new int[n_word+1];
  n_word = 0;
  for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) {
    WERD_RES *word = res_it.word();
    WERD_CHOICE* choice = word->best_choice;
    int w_conf = static_cast<int>(100 + 5 * choice->certainty());
                 // This is the eq for converting Tesseract confidence to 1..100
    if (w_conf < 0) w_conf = 0;
    if (w_conf > 100) w_conf = 100;
    conf[n_word++] = w_conf;
  }
  conf[n_word] = -1;
  return conf;
}