예제 #1
0
/**********************************************************************
 * fill_filtered_fragment_list
 *
 * Filter the fragment list so that the filtered_choices only contain
 * fragments that are in the correct position. choices is the list
 * that we are going to filter. fragment_pos is the position in the
 * fragment that we are looking for and num_frag_parts is the the
 * total number of pieces. The result will be appended to
 * filtered_choices.
 **********************************************************************/
void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
                                          int fragment_pos,
                                          int num_frag_parts,
                                          BLOB_CHOICE_LIST *filtered_choices) {
  BLOB_CHOICE_IT filtered_choices_it(filtered_choices);
  BLOB_CHOICE_IT choices_it(choices);

  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
       choices_it.forward()) {
    UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
    const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);

    if (frag != NULL && frag->get_pos() == fragment_pos &&
        frag->get_total() == num_frag_parts) {
      // Recover the unichar_id of the unichar that this fragment is
      // a part of
      BLOB_CHOICE *b = new BLOB_CHOICE(*choices_it.data());
      int original_unichar = unicharset.unichar_to_id(frag->get_unichar());
      b->set_unichar_id(original_unichar);
      filtered_choices_it.add_to_end(b);
    }
  }

  filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>);
}
예제 #2
0
void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
                        UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,
                        MATRIX *ratings) {
  int num_blobs_to_replace = 0;
  int begin_blob_index = 0;
  int i;
  // Rating and certainty for the new BLOB_CHOICE are derived from the
  // replaced choices.
  float new_rating = 0.0f;
  float new_certainty = 0.0f;
  BLOB_CHOICE* old_choice = nullptr;
  for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
    if (i >= wrong_ngram_begin_index) {
      int num_blobs = werd_choice->state(i);
      int col = begin_blob_index + num_blobs_to_replace;
      int row = col + num_blobs - 1;
      BLOB_CHOICE_LIST* choices = ratings->get(col, row);
      ASSERT_HOST(choices != nullptr);
      old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
      ASSERT_HOST(old_choice != nullptr);
      new_rating += old_choice->rating();
      new_certainty += old_choice->certainty();
      num_blobs_to_replace += num_blobs;
    } else {
      begin_blob_index += werd_choice->state(i);
    }
  }
  new_certainty /= wrong_ngram_size;
  // If there is no entry in the ratings matrix, add it.
  MATRIX_COORD coord(begin_blob_index,
                     begin_blob_index + num_blobs_to_replace - 1);
  if (!coord.Valid(*ratings)) {
    ratings->IncreaseBandSize(coord.row - coord.col + 1);
  }
  if (ratings->get(coord.col, coord.row) == nullptr)
    ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
  BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);
  BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);
  if (choice != nullptr) {
    // Already there. Upgrade if new rating better.
    if (new_rating < choice->rating())
      choice->set_rating(new_rating);
    if (new_certainty < choice->certainty())
      choice->set_certainty(new_certainty);
    // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
  } else {
    // Need a new choice with the correct_ngram_id.
    choice = new BLOB_CHOICE(*old_choice);
    choice->set_unichar_id(correct_ngram_id);
    choice->set_rating(new_rating);
    choice->set_certainty(new_certainty);
    choice->set_classifier(BCC_AMBIG);
    choice->set_matrix_cell(coord.col, coord.row);
    BLOB_CHOICE_IT it (new_choices);
    it.add_to_end(choice);
  }
  // Remove current unichar from werd_choice. On the last iteration
  // set the correct replacement unichar instead of removing a unichar.
  for (int replaced_count = 0; replaced_count < wrong_ngram_size;
       ++replaced_count) {
    if (replaced_count + 1 == wrong_ngram_size) {
      werd_choice->set_blob_choice(wrong_ngram_begin_index,
                                   num_blobs_to_replace, choice);
    } else {
      werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
    }
  }
  if (stopper_debug_level >= 1) {
      werd_choice->print("ReplaceAmbig() ");
      tprintf("Modified blob_choices: ");
      print_ratings_list("\n", new_choices, getUnicharset());
  }
}