Exemplo n.º 1
0
/**
 * BLOB_CHOICE::BLOB_CHOICE
 *
 * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
 */
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
  unichar_id_ = other.unichar_id();
  rating_ = other.rating();
  certainty_ = other.certainty();
  config_ = other.config();
  script_id_ = other.script_id();
}
Exemplo n.º 2
0
/**
 * BLOB_CHOICE::BLOB_CHOICE
 *
 * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
 */
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
  unichar_id_ = other.unichar_id();
  rating_ = other.rating();
  certainty_ = other.certainty();
  config_ = other.config();
  config2_ = other.config2();
  script_id_ = other.script_id();
  language_model_state_ = NULL;
}
Exemplo n.º 3
0
/**
 * BLOB_CHOICE::BLOB_CHOICE
 *
 * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
 */
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
  unichar_id_ = other.unichar_id();
  rating_ = other.rating();
  certainty_ = other.certainty();
  fontinfo_id_ = other.fontinfo_id();
  fontinfo_id2_ = other.fontinfo_id2();
  script_id_ = other.script_id();
  language_model_state_ = NULL;
  min_xheight_ = other.min_xheight_;
  max_xheight_ = other.max_xheight_;
  adapted_ = other.adapted_;
}
Exemplo n.º 4
0
// Helper to find the BLOB_CHOICE in the bc_list that matches the given
// unichar_id, or NULL if there is no match.
BLOB_CHOICE* FindMatchingChoice(UNICHAR_ID char_id,
                                BLOB_CHOICE_LIST* bc_list) {
  // Find the corresponding best BLOB_CHOICE.
  BLOB_CHOICE_IT choice_it(bc_list);
  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
       choice_it.forward()) {
    BLOB_CHOICE* choice = choice_it.data();
    if (choice->unichar_id() == char_id) {
      return choice;
    }
  }
  return NULL;
}
Exemplo n.º 5
0
/**
 * BLOB_CHOICE::BLOB_CHOICE
 *
 * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
 */
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
  unichar_id_ = other.unichar_id();
  rating_ = other.rating();
  certainty_ = other.certainty();
  fontinfo_id_ = other.fontinfo_id();
  fontinfo_id2_ = other.fontinfo_id2();
  script_id_ = other.script_id();
  matrix_cell_ = other.matrix_cell_;
  min_xheight_ = other.min_xheight_;
  max_xheight_ = other.max_xheight_;
  yshift_ = other.yshift();
  classifier_ = other.classifier_;
}
Exemplo n.º 6
0
/**
 * append_choices
 *
 * Checks to see whether or not the next choice is worth appending to
 * the word being generated. If so then keeps going deeper into the word.
 *
 * This function assumes that Dict::go_deeper_fxn_ is set.
 */
void Dict::append_choices(
    const char *debug,
    const BLOB_CHOICE_LIST_VECTOR &char_choices,
    const BLOB_CHOICE &blob_choice,
    int char_choice_index,
    const CHAR_FRAGMENT_INFO *prev_char_frag_info,
    WERD_CHOICE *word,
    float certainties[],
    float *limit,
    WERD_CHOICE *best_choice,
    int *attempts_left,
    void *more_args) {
  int word_ending =
    (char_choice_index == char_choices.length() - 1) ? true : false;

  // Deal with fragments.
  CHAR_FRAGMENT_INFO char_frag_info;
  if (!fragment_state_okay(blob_choice.unichar_id(), blob_choice.rating(),
                           blob_choice.certainty(), prev_char_frag_info, debug,
                           word_ending, &char_frag_info)) {
    return;  // blob_choice must be an invalid fragment
  }
  // Search the next letter if this character is a fragment.
  if (char_frag_info.unichar_id == INVALID_UNICHAR_ID) {
    permute_choices(debug, char_choices, char_choice_index + 1,
                    &char_frag_info, word, certainties, limit,
                    best_choice, attempts_left, more_args);
    return;
  }

  // Add the next unichar.
  float old_rating = word->rating();
  float old_certainty = word->certainty();
  uint8_t old_permuter = word->permuter();
  certainties[word->length()] = char_frag_info.certainty;
  word->append_unichar_id_space_allocated(
      char_frag_info.unichar_id, char_frag_info.num_fragments,
      char_frag_info.rating, char_frag_info.certainty);

  // Explore the next unichar.
  (this->*go_deeper_fxn_)(debug, char_choices, char_choice_index,
                          &char_frag_info, word_ending, word, certainties,
                          limit, best_choice, attempts_left, more_args);

  // Remove the unichar we added to explore other choices in it's place.
  word->remove_last_unichar_id();
  word->set_rating(old_rating);
  word->set_certainty(old_certainty);
  word->set_permuter(old_permuter);
}
Exemplo n.º 7
0
// Creates a fake blob choice from the combination of the given fragments.
// unichar is the class to be made from the combination,
// expanded_fragment_lengths[choice_index] is the number of fragments to use.
// old_choices[choice_index] has the classifier output for each fragment.
// choice index initially indexes the last fragment and should be decremented
// expanded_fragment_lengths[choice_index] times to get the earlier fragments.
// Guarantees to return something non-null, or abort!
BLOB_CHOICE* Wordrec::rebuild_fragments(
    const char* unichar,
    const char* expanded_fragment_lengths,
    int choice_index,
    BLOB_CHOICE_LIST_VECTOR *old_choices) {
  float rating = 0.0f;
  float certainty = 0.0f;
  inT16 min_xheight = -MAX_INT16;
  inT16 max_xheight = MAX_INT16;
  for (int fragment_pieces = expanded_fragment_lengths[choice_index] - 1;
       fragment_pieces >= 0; --fragment_pieces, --choice_index) {
    // Get a pointer to the classifier results from the old_choices.
    BLOB_CHOICE_LIST *current_choices = old_choices->get(choice_index);
    // Populate fragment with updated values and look for the
    // fragment with the same values in current_choices.
    // Update rating and certainty of the character being composed.
    CHAR_FRAGMENT fragment;
    fragment.set_all(unichar, fragment_pieces,
                     expanded_fragment_lengths[choice_index], false);
    BLOB_CHOICE_IT choice_it(current_choices);
    for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
        choice_it.forward()) {
      BLOB_CHOICE* choice = choice_it.data();
      const CHAR_FRAGMENT *current_fragment =
          getDict().getUnicharset().get_fragment(choice->unichar_id());
      if (current_fragment && fragment.equals(current_fragment)) {
        rating += choice->rating();
        if (choice->certainty() < certainty) {
          certainty = choice->certainty();
        }
        IntersectRange(choice->min_xheight(), choice->max_xheight(),
                       &min_xheight, &max_xheight);
        break;
      }
    }
    if (choice_it.cycled_list()) {
      print_ratings_list("Failure", current_choices, unicharset);
      tprintf("Failed to find fragment %s at index=%d\n",
              fragment.to_string().string(), choice_index);
    }
    ASSERT_HOST(!choice_it.cycled_list());  // Be sure we found the fragment.
  }
  return new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar),
                         rating, certainty, -1, -1, 0,
                         min_xheight, max_xheight, false);
}
Exemplo n.º 8
0
/// Recursive helper to find a match to the target_text (from text_index
/// position) in the choices (from choices_pos position).
/// @param choices is an array of GenericVectors, of length choices_length,
/// with each element representing a starting position in the word, and the
/// #GenericVector holding classification results for a sequence of consecutive
/// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
/// @param choices_pos
/// @param choices_length
/// @param target_text
/// @param text_index
/// @param rating
/// @param segmentation
/// @param best_rating
/// @param best_segmentation
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
                              int choices_pos, int choices_length,
                              const GenericVector<UNICHAR_ID>& target_text,
                              int text_index,
                              float rating, GenericVector<int>* segmentation,
                              float* best_rating,
                              GenericVector<int>* best_segmentation) {
  const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs();
  for (int length = 1; length <= choices[choices_pos].size(); ++length) {
    // Rating of matching choice or worst choice if no match.
    float choice_rating = 0.0f;
    // Find the corresponding best BLOB_CHOICE.
    BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]);
    for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
         choice_it.forward()) {
      BLOB_CHOICE* choice = choice_it.data();
      choice_rating = choice->rating();
      UNICHAR_ID class_id = choice->unichar_id();
      if (class_id == target_text[text_index]) {
        break;
      }
      // Search ambigs table.
      if (class_id < table.size() && table[class_id] != NULL) {
        AmbigSpec_IT spec_it(table[class_id]);
        for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();
             spec_it.forward()) {
          const AmbigSpec *ambig_spec = spec_it.data();
          // We'll only do 1-1.
          if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID &&
              ambig_spec->correct_ngram_id == target_text[text_index])
            break;
        }
        if (!spec_it.cycled_list())
          break;  // Found an ambig.
      }
    }
    if (choice_it.cycled_list())
      continue;  // No match.
    segmentation->push_back(length);
    if (choices_pos + length == choices_length &&
        text_index + 1 == target_text.size()) {
      // This is a complete match. If the rating is good record a new best.
      if (applybox_debug > 2) {
        tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n",
                rating + choice_rating, *best_rating, segmentation->size(),
                best_segmentation->size());
      }
      if (best_segmentation->empty() || rating + choice_rating < *best_rating) {
        *best_segmentation = *segmentation;
        *best_rating = rating + choice_rating;
      }
    } else if (choices_pos + length < choices_length &&
               text_index + 1 < target_text.size()) {
      if (applybox_debug > 3) {
        tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n",
                target_text[text_index],
                unicharset.id_to_unichar(target_text[text_index]),
                choice_it.data()->unichar_id() == target_text[text_index]
                     ? "Match" : "Ambig",
                choices_pos, length);
      }
      SearchForText(choices, choices_pos + length, choices_length, target_text,
                    text_index + 1, rating + choice_rating, segmentation,
                    best_rating, best_segmentation);
      if (applybox_debug > 3) {
        tprintf("End recursion for %d=%s\n", target_text[text_index],
                unicharset.id_to_unichar(target_text[text_index]));
      }
    }
    segmentation->truncate(segmentation->size() - 1);
  }
}