Пример #1
0
/**
 * @name chop_word_main
 *
 * Classify the blobs in this word and permute the results.  Find the
 * worst blob in the word and chop it up.  Continue this process until
 * a good answer has been found or all the blobs have been chopped up
 * enough.  The results are returned in the WERD_RES.
 */
void Wordrec::chop_word_main(WERD_RES *word) {
  int num_blobs = word->chopped_word->NumBlobs();
  if (word->ratings == NULL) {
    word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
  }
  if (word->ratings->get(0, 0) == NULL) {
    // Run initial classification.
    for (int b = 0; b < num_blobs; ++b) {
      BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
                                                 "Initial:", word->chopped_word,
                                                 word->blamer_bundle);
      word->ratings->put(b, b, choices);
    }
  } else {
    // Blobs have been pre-classified. Set matrix cell for all blob choices
    for (int col = 0; col < word->ratings->dimension(); ++col) {
      for (int row = col; row < word->ratings->dimension() &&
           row < col + word->ratings->bandwidth(); ++row) {
        BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
        if (choices != NULL) {
          BLOB_CHOICE_IT bc_it(choices);
          for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
            bc_it.data()->set_matrix_cell(col, row);
          }
        }
      }
    }
  }

  // Run Segmentation Search.
  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
  SegSearch(word, &best_choice_bundle, word->blamer_bundle);

  if (word->best_choice == NULL) {
    // SegSearch found no valid paths, so just use the leading diagonal.
    word->FakeWordFromRatings();
  }
  word->RebuildBestState();
  // If we finished without a hyphen at the end of the word, let the next word
  // be found in the dictionary.
  if (word->word->flag(W_EOL) &&
      !getDict().has_hyphen_end(*word->best_choice)) {
    getDict().reset_hyphen_vars(true);
  }

  if (word->blamer_bundle != NULL && this->fill_lattice_ != NULL) {
    CallFillLattice(*word->ratings, word->best_choices,
                    *word->uch_set, word->blamer_bundle);
  }
  if (wordrec_debug_level > 0) {
    tprintf("Final Ratings Matrix:\n");
    word->ratings->print(getDict().getUnicharset());
  }
  word->FilterWordChoices(getDict().stopper_debug_level);
}
Пример #2
0
MATRIX *Wordrec::word_associator(WERD_RES *word,
                                 STATE *state,
                                 BLOB_CHOICE_LIST_VECTOR *best_char_choices,
                                 DANGERR *fixpt,
                                 STATE *best_state) {
  CHUNKS_RECORD chunks_record;
  BLOB_WEIGHTS blob_weights;
  int x;
  int num_chunks;
  BLOB_CHOICE_IT blob_choice_it;

  num_chunks = array_count(word->seam_array) + 1;

  TBLOB* blobs = word->chopped_word->blobs;
  chunks_record.chunks = blobs;
  chunks_record.splits = word->seam_array;
  chunks_record.ratings = record_piece_ratings (blobs);
  chunks_record.char_widths = blobs_widths (blobs);
  chunks_record.chunk_widths = blobs_widths (blobs);
  /* Save chunk weights */
  for (x = 0; x < num_chunks; x++) {
    BLOB_CHOICE_LIST* choices = get_piece_rating(chunks_record.ratings,
                                                 blobs, word->seam_array, x, x);
    blob_choice_it.set_to_list(choices);
    //This is done by Jetsoft. Divide by zero is possible.
    if (blob_choice_it.data()->certainty() == 0) {
      blob_weights[x]=0;
    } else {
      blob_weights[x] =
        -(inT16) (10 * blob_choice_it.data()->rating() /
                  blob_choice_it.data()->certainty());
    }
  }
  chunks_record.weights = blob_weights;

  if (chop_debug)
    chunks_record.ratings->print(getDict().getUnicharset());

  if (enable_new_segsearch) {
    SegSearch(&chunks_record, word->best_choice,
              best_char_choices, word->raw_choice, state);
  } else {
    best_first_search(&chunks_record, best_char_choices, word,
                      state, fixpt, best_state);
  }

  free_widths (chunks_record.chunk_widths);
  free_widths (chunks_record.char_widths);
  return chunks_record.ratings;
}
Пример #3
0
void Wordrec::DoSegSearch(WERD_RES* word_res) {
  BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
  // Run Segmentation Search.
  SegSearch(word_res, &best_choice_bundle, NULL);
}