Пример #1
0
/**********************************************************************
 * chop_word_main
 *
 * Classify the blobs in this word and permute the results.  Find the
 * worst blob in the word and chop it up.  Continue this process until
 * a good answer has been found or all the blobs have been chopped up
 * enough.  Return the word level ratings.
 **********************************************************************/
CHOICES_LIST chop_word_main(register TWERD *word,
                            int fx,
                            A_CHOICE *best_choice,
                            A_CHOICE *raw_choice,
                            BOOL8 tester,
                            BOOL8 trainer) {
  TBLOB *pblob;
  TBLOB *blob;
  CHOICES_LIST char_choices;
  int index;
  int did_chopping;
  float rating_limit = 1000.0;
  STATE state;
  SEAMS seam_list = NULL;
  CHOICES match_result;
  MATRIX ratings = NULL;
  DANGERR fixpt;                 /*dangerous ambig */
  INT32 state_count;             //no of states
  INT32 bit_count;               //no of bits
  static STATE best_state;
  static STATE chop_states[64];  //in between states

  state_count = 0;
  set_null_choice(best_choice);
  set_null_choice(raw_choice);

  char_choices = new_choice_list ();

  did_chopping = 0;
  for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL;
  blob = blob->next, index++) {
    match_result =
      (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx,
      "chop_word:", Green, &chop_states[0],
      &best_state, matcher_pass, index);
    char_choices = array_push (char_choices, match_result);
    pblob = blob;
  }
  bit_count = index - 1;
  permute_characters(char_choices, rating_limit, best_choice, raw_choice);

  set_n_ones (&state, array_count (char_choices) - 1);
  if (matcher_fp != NULL) {
    if (matcher_pass == 0) {
      bits_in_states = bit_count;
      chop_states[state_count] = state;
    }
    state_count++;
  }

  if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt)
    || (tester || trainer)
  && strcmp (word->correct, class_string (best_choice))) {
    did_chopping = 1;
    if (first_pass)
      words_chopped1++;
    else
      words_chopped2++;

    seam_list = start_seam_list (word->blobs);

    if (chop_enable)
      improve_by_chopping(word,
                          &char_choices,
                          fx,
                          &state,
                          best_choice,
                          raw_choice,
                          &seam_list,
                          &fixpt,
                          chop_states,
                          &state_count,
                          &best_state,
                          matcher_pass);

    if (chop_debug)
      print_seams ("Final seam list:", seam_list);

    if (enable_assoc &&
      !AcceptableChoice (char_choices, best_choice, raw_choice, NULL)
      || (tester || trainer)
    && strcmp (word->correct, class_string (best_choice))) {
      ratings = word_associator (word->blobs, seam_list, &state, fx,
        best_choice, raw_choice, word->correct,
        /*0, */ &fixpt,
        &best_state, matcher_pass);
    }
    bits_in_states = bit_count + state_count - 1;

  }
  if (ratings != NULL)
    free_matrix(ratings);
  if (did_chopping || tester || trainer)
    char_choices = rebuild_current_state (word->blobs, seam_list, &state,
      char_choices, fx);
  if (seam_list != NULL)
    free_seam_list(seam_list);
  if (matcher_fp != NULL) {
    best_state = state;
  }
  FilterWordChoices();
  return char_choices;
}
Пример #2
0
/**
 * @name chop_word_main
 *
 * Classify the blobs in this word and permute the results.  Find the
 * worst blob in the word and chop it up.  Continue this process until
 * a good answer has been found or all the blobs have been chopped up
 * enough.  Return the word level ratings.
 */
BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(WERD_RES *word) {
  TBLOB *blob;
  int index;
  int did_chopping;
  STATE state;
  BLOB_CHOICE_LIST *match_result;
  MATRIX *ratings = NULL;
  DANGERR fixpt;                 /*dangerous ambig */
  inT32 bit_count;               //no of bits

  set_denorm(&word->denorm);

  BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
  BLOB_CHOICE_LIST_VECTOR *best_char_choices = new BLOB_CHOICE_LIST_VECTOR();

  did_chopping = 0;
  for (blob = word->chopped_word->blobs, index = 0;
       blob != NULL; blob = blob->next, index++) {
    match_result = classify_blob(blob, "chop_word:", Green);
    if (match_result == NULL)
      cprintf("Null classifier output!\n");
    *char_choices += match_result;
  }
  bit_count = index - 1;
  set_n_ones(&state, char_choices->length() - 1);
  bool acceptable = false;
  bool replaced = false;
  bool best_choice_updated =
    getDict().permute_characters(*char_choices, word->best_choice,
                                 word->raw_choice);
  if (best_choice_updated &&
      getDict().AcceptableChoice(char_choices, word->best_choice, &fixpt,
                                 CHOPPER_CALLER, &replaced)) {
    acceptable = true;
  }
  if (replaced)
    update_blob_classifications(word->chopped_word, *char_choices);
  CopyCharChoices(*char_choices, best_char_choices);
  if (!acceptable) {  // do more work to find a better choice
    did_chopping = 1;

    bool best_choice_acceptable = false;
    if (chop_enable)
      improve_by_chopping(word,
                          char_choices,
                          &state,
                          best_char_choices,
                          &fixpt,
                          &best_choice_acceptable);
    if (chop_debug)
      print_seams ("Final seam list:", word->seam_array);

    // The force_word_assoc is almost redundant to enable_assoc.  However,
    // it is not conditioned on the dict behavior.  For CJK, we need to force
    // the associator to be invoked.  When we figure out the exact behavior
    // of dict on CJK, we can remove the flag if it turns out to be redundant.
    if ((wordrec_enable_assoc && !best_choice_acceptable) || force_word_assoc) {
      ratings = word_associator(word, &state, best_char_choices,
                                &fixpt, &state);
    }
  }
  best_char_choices = rebuild_current_state(word, &state, best_char_choices,
                                            ratings);
  if (ratings != NULL) {
    if (wordrec_debug_level > 0) {
      tprintf("Final Ratings Matrix:\n");
      ratings->print(getDict().getUnicharset());
    }
    ratings->delete_matrix_pointers();
    delete ratings;
  }
  getDict().FilterWordChoices();
  char_choices->delete_data_pointers();
  delete char_choices;

  return best_char_choices;
}