Exemple #1
0
/**
 * @name improve_by_chopping
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide them up to improve the ratings.
 * As long as ratings are produced by the new blob splitting.  When
 * all the splitting has been accomplished all the ratings memory is
 * reclaimed.
 */
void Wordrec::improve_by_chopping(WERD_RES *word,
                                  BLOB_CHOICE_LIST_VECTOR *char_choices,
                                  STATE *best_state,
                                  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
                                  DANGERR *fixpt,
                                  bool *best_choice_acceptable) {
  inT32 blob_number;
  float old_best;
  int fixpt_valid = 1;
  bool updated_best_choice = false;

  while (1) {  // improvement loop
    if (!fixpt_valid) fixpt->clear();
    old_best = word->best_choice->rating();
    if (improve_one_blob(word->chopped_word, char_choices,
                         &blob_number, &word->seam_array,
                         fixpt, (fragments_guide_chopper &&
                                 word->best_choice->fragment_mark()))) {
      getDict().LogNewSplit(blob_number);
      updated_best_choice =
        getDict().permute_characters(*char_choices, word->best_choice,
                                     word->raw_choice);

      if (old_best > word->best_choice->rating()) {
        set_n_ones(best_state, char_choices->length() - 1);
        fixpt_valid = 1;
      }
      else {
        insert_new_chunk(best_state, blob_number, char_choices->length() - 2);
        fixpt_valid = 0;
      }

      if (chop_debug)
        print_state("best state = ",
          best_state, count_blobs(word->chopped_word->blobs) - 1);
    } else {
      break;
    }

    // Check if we should break from the loop.
    bool done = false;
    bool replaced = false;
    if ((updated_best_choice &&
         (*best_choice_acceptable =
          getDict().AcceptableChoice(char_choices, word->best_choice,
                                     fixpt, CHOPPER_CALLER, &replaced))) ||
        char_choices->length() >= MAX_NUM_CHUNKS) {
      done = true;
    }
    if (replaced) update_blob_classifications(word->chopped_word,
                                              *char_choices);
    if (updated_best_choice) CopyCharChoices(*char_choices, best_char_choices);
    if (done) break;
  }
  if (!fixpt_valid) fixpt->clear();
}
/**********************************************************************
 * improve_by_chopping
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide them up to improve the ratings.
 * As long as ratings are produced by the new blob splitting.  When
 * all the splitting has been accomplished all the ratings memory is
 * reclaimed.
 **********************************************************************/
void improve_by_chopping(register TWERD *word,
                         CHOICES_LIST *char_choices,
                         int fx,
                         STATE *best_state,
                         A_CHOICE *best_choice,
                         A_CHOICE *raw_choice,
                         SEAMS *seam_list,
                         DANGERR *fixpt,
                         STATE *chop_states,
                         INT32 *state_count,
                         STATE *correct_state,
                         INT32 pass) {
  INT32 blob_number;
  INT32 index;                   //to states
  CHOICES_LIST choices = *char_choices;
  float old_best;
  int fixpt_valid = 1;
  static INT32 old_count;        //from pass1

  do {
                                 /* Improvement loop */
    if (!fixpt_valid)
      fixpt->index = -1;
    old_best = class_probability (best_choice);
    choices = improve_one_blob (word, *char_choices, fx,
      &blob_number, seam_list, fixpt,
      chop_states + *state_count, correct_state,
      pass);
    if (choices != NULL) {
      LogNewSplit(blob_number);
      permute_characters (choices,
        class_probability (best_choice),
        best_choice, raw_choice);
      *char_choices = choices;

      if (old_best > class_probability (best_choice)) {
        set_n_ones (best_state, array_count (*char_choices) - 1);
        fixpt_valid = 1;
      }
      else {
        insert_new_chunk (best_state, blob_number,
          array_count (*char_choices) - 2);
        fixpt_valid = 0;
      }
      if (*state_count > 0) {
        if (pass == 0) {
          for (index = 0; index < *state_count; index++)
            insert_new_chunk (&chop_states[index], blob_number,
              array_count (*char_choices) - 2);
          set_n_ones (&chop_states[index],
            array_count (*char_choices) - 1);
        }
        (*state_count)++;
      }

      if (chop_debug)
        print_state ("best state = ",
          best_state, count_blobs (word->blobs) - 1);
      if (first_pass)
        chops_performed1++;
      else
        chops_performed2++;

    }
  }
  while (choices &&
    !AcceptableChoice (*char_choices, best_choice, raw_choice, fixpt) &&
    !blob_skip && array_count (*char_choices) < MAX_NUM_CHUNKS);
  if (pass == 0)
    old_count = *state_count;
  else {
    if (old_count != *state_count)
      fprintf (matcher_fp,
        "Mis-matched state counts, " INT32FORMAT " pass1, "
        INT32FORMAT " pass2\n", old_count, *state_count);
  }
  if (!fixpt_valid)
    fixpt->index = -1;
}