/**********************************************************************
 * improve_one_blob
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 *********************************************************************/
CHOICES_LIST improve_one_blob(TWERD *word,
                              CHOICES_LIST char_choices,
                              int fx,
                              INT32 *blob_number,
                              SEAMS *seam_list,
                              DANGERR *fixpt,
                              STATE *this_state,
                              STATE *correct_state,
                              INT32 pass) {
  TBLOB *pblob;
  TBLOB *blob;
  INT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  CHOICES answer;
  SEAM *seam;

  do {
    *blob_number = select_blob_to_split (char_choices, rating_ceiling);
    if (*blob_number == -1)
      return (NULL);

    seam = attempt_blob_chop (word, *blob_number, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = (CHOICES) array_value (char_choices, *blob_number);
    if (answer == NIL)
      return (NULL);             /* Try different blob */
    rating_ceiling = best_probability (answer);
  }
  while (!blob_skip);
  /* Split OK */
  for (blob = word->blobs, pblob = NULL; x < *blob_number; x++) {
    pblob = blob;
    blob = blob->next;
  }

  *seam_list =
    insert_seam (*seam_list, *blob_number, seam, blob, word->blobs);

  free_choices ((CHOICES) array_value (char_choices, *blob_number));

  answer =
    classify_blob (pblob, blob, blob->next, NULL, fx, "improve 1:", Red,
    this_state, correct_state, pass, *blob_number);
  char_choices = array_insert (char_choices, *blob_number, answer);

  answer =
    classify_blob (blob, blob->next, blob->next->next, NULL, fx, "improve 2:",
    Yellow, this_state, correct_state, pass, *blob_number + 1);
  array_value (char_choices, *blob_number + 1) = (char *) answer;

  return (char_choices);
}
Beispiel #2
0
/**
 * @name improve_one_blob
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 */
bool Wordrec::improve_one_blob(TWERD *word,
                               BLOB_CHOICE_LIST_VECTOR *char_choices,
                               inT32 *blob_number,
                               SEAMS *seam_list,
                               DANGERR *fixpt,
                               bool split_next_to_fragment) {
  TBLOB *blob;
  inT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  BLOB_CHOICE_LIST *answer;
  BLOB_CHOICE_IT answer_it;
  SEAM *seam;

  do {
    *blob_number = select_blob_to_split(*char_choices, rating_ceiling,
                                        split_next_to_fragment);
    if (chop_debug)
      cprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return false;

    // TODO(rays) it may eventually help to allow italic_blob to be true,
    seam = attempt_blob_chop (word, *blob_number, false, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = char_choices->get(*blob_number);
    if (answer == NULL)
      return false;
    answer_it.set_to_list(answer);
    rating_ceiling = answer_it.data()->rating();  // try a different blob
  } while (true);
  /* Split OK */
  for (blob = word->blobs; x < *blob_number; x++) {
    blob = blob->next;
  }

  *seam_list =
    insert_seam (*seam_list, *blob_number, seam, blob, word->blobs);

  delete char_choices->get(*blob_number);

  answer = classify_blob(blob, "improve 1:", Red);
  char_choices->insert(answer, *blob_number);

  answer = classify_blob(blob->next, "improve 2:", Yellow);
  char_choices->set(answer, *blob_number + 1);

  return true;
}
Beispiel #3
0
BLOB_CHOICE_LIST *Wordrec::classify_piece(TBLOB *pieces,
                                          const DENORM& denorm,
                                          SEAMS seams,
                                          inT16 start,
                                          inT16 end,
                                          BlamerBundle *blamer_bundle) {
  BLOB_CHOICE_LIST *choices;
  TBLOB *blob;
  inT16 x;

  join_pieces(pieces, seams, start, end);
  for (blob = pieces, x = 0; x < start; x++) {
    blob = blob->next;
  }
  choices = classify_blob(blob, denorm, "pieces:", White, blamer_bundle);

  break_pieces(blob, seams, start, end);
#ifndef GRAPHICS_DISABLED
  if (wordrec_display_segmentations > 2) {
    STATE current_state;
    SEARCH_STATE chunk_groups;
    set_n_ones (&current_state, array_count(seams));
    chunk_groups = bin_to_chunks(&current_state, array_count(seams));
    display_segmentation(pieces, chunk_groups);
    window_wait(segm_window);
    memfree(chunk_groups);
  }
#endif

  return (choices);
}
BLOB_CHOICE_LIST *Wordrec::classify_piece(TBLOB *pieces,
                                          SEAMS seams,
                                          inT16 start,
                                          inT16 end) {
  STATE current_state;
  BLOB_CHOICE_LIST *choices;
  TBLOB *pblob;
  TBLOB *blob;
  TBLOB *nblob;
  inT16 x;
  SEARCH_STATE chunk_groups;

  set_n_ones (&current_state, array_count (seams));

  join_pieces(pieces, seams, start, end);
  for (blob = pieces, pblob = NULL, x = 0; x < start; x++) {
    pblob = blob;
    blob = blob->next;
  }
  for (nblob = blob->next; x < end; x++)
    nblob = nblob->next;
  choices = classify_blob (pblob, blob, nblob, NULL, "pieces:", White);

  break_pieces(blob, seams, start, end);
#ifndef GRAPHICS_DISABLED
  if (wordrec_display_segmentations > 2) {
    chunk_groups = bin_to_chunks (&current_state, array_count (seams));
    display_segmentation(pieces, chunk_groups);
    window_wait(segm_window);
    memfree(chunk_groups);
  }
#endif

  return (choices);
}
Beispiel #5
0
BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector<SEAM*>& seams,
                                          inT16 start,
                                          inT16 end,
                                          const char* description,
                                          TWERD *word,
                                          BlamerBundle *blamer_bundle) {
  if (end > start) join_pieces(seams, start, end, word);
  BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description,
                                            White, blamer_bundle);
  // Set the matrix_cell_ entries in all the BLOB_CHOICES.
  BLOB_CHOICE_IT bc_it(choices);
  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
    bc_it.data()->set_matrix_cell(start, end);
  }

  if (end > start) break_pieces(seams, start, end, word);

  return (choices);
}
Beispiel #6
0
// Creates a joined copy of the blobs between x and y (inclusive) and
// inserts as the first blob at word->rebuild_word->blobs.
// Returns a deep copy of the classifier results for the blob.
BLOB_CHOICE_LIST *Wordrec::join_blobs_and_classify(
    WERD_RES* word, int x, int y, int choice_index, MATRIX *ratings,
    BLOB_CHOICE_LIST_VECTOR *old_choices) {
  // Join parts to make the blob if needed.
  if (x != y)
    join_pieces(word->chopped_word->blobs, word->seam_array, x, y);
  TBLOB *blob = word->chopped_word->blobs;
  for (int i = 0; i < x; i++) {
    blob = blob->next;
  }
  // Deep copy this blob into the output word.
  TBLOB* copy_blob = new TBLOB(*blob);
  copy_blob->next = word->rebuild_word->blobs;
  word->rebuild_word->blobs = copy_blob;

  BLOB_CHOICE_LIST *choices = NULL;
  // First check to see if we can look up the classificaiton
  // in old_choices (if there is no need to merge blobs).
  if (choice_index >= 0 && old_choices != NULL) {
    choices = old_choices->get(choice_index);
    old_choices->set(NULL, choice_index);
  }
  // The ratings matrix filled in by the associator will contain the next most
  // up-to-date classification info. Thus we look up the classification there
  // next, and only call classify_blob() if the classification is not found.
  if (choices == NULL && ratings != NULL) {
    choices = ratings->get(x, y);
    if (choices != NOT_CLASSIFIED) {
      ratings->put(x, y, NULL);
    }
  }
  // Get the choices for the blob by classification if necessary.
  if (choices == NULL) {
    choices = classify_blob(blob, word->denorm, "rebuild", Orange,
                            word->blamer_bundle);
  }
  // Undo join_pieces to restore the chopped word to its fully chopped state.
  if (x != y)
    break_pieces(blob, word->seam_array, x, y);
  return choices;
}
/**********************************************************************
 * classify_piece
 *
 * Create a larger piece from a collection of smaller ones.  Classify
 * it and return the results.  Take the large piece apart to leave
 * the collection of small pieces un modified.
 **********************************************************************/
CHOICES classify_piece(TBLOB *pieces,
                       SEAMS seams,
                       INT16 start,
                       INT16 end,
                       INT32 fx,
                       STATE *this_state,
                       STATE *best_state,
                       INT32 pass,
                       INT32 blob_index) {
  STATE current_state;
  CHOICES choices;
  TBLOB *pblob;
  TBLOB *blob;
  TBLOB *nblob;
  INT16 x;
  SEARCH_STATE chunk_groups;

  set_n_ones (&current_state, array_count (seams));

  join_pieces(pieces, seams, start, end); 
  for (blob = pieces, pblob = NULL, x = 0; x < start; x++) {
    pblob = blob;
    blob = blob->next;
  }
  for (nblob = blob->next; x < end; x++)
    nblob = nblob->next;
  choices = classify_blob (pblob, blob, nblob, NULL, fx, "pieces:", White,
    this_state, best_state, pass, blob_index);

  break_pieces(blob, seams, start, end); 
#ifndef GRAPHICS_DISABLED
  if (display_segmentations > 2) {
    chunk_groups = bin_to_chunks (&current_state, array_count (seams));
    display_segmentation(pieces, chunk_groups); 
    window_wait(segm_window); 
    memfree(chunk_groups); 
  }
#endif

  return (choices);
}
/**********************************************************************
 * chop_word_main
 *
 * Classify the blobs in this word and permute the results.  Find the
 * worst blob in the word and chop it up.  Continue this process until
 * a good answer has been found or all the blobs have been chopped up
 * enough.  Return the word level ratings.
 **********************************************************************/
CHOICES_LIST chop_word_main(register TWERD *word,
                            int fx,
                            A_CHOICE *best_choice,
                            A_CHOICE *raw_choice,
                            BOOL8 tester,
                            BOOL8 trainer) {
  TBLOB *pblob;
  TBLOB *blob;
  CHOICES_LIST char_choices;
  int index;
  int did_chopping;
  float rating_limit = 1000.0;
  STATE state;
  SEAMS seam_list = NULL;
  CHOICES match_result;
  MATRIX ratings = NULL;
  DANGERR fixpt;                 /*dangerous ambig */
  INT32 state_count;             //no of states
  INT32 bit_count;               //no of bits
  static STATE best_state;
  static STATE chop_states[64];  //in between states

  state_count = 0;
  set_null_choice(best_choice);
  set_null_choice(raw_choice);

  char_choices = new_choice_list ();

  did_chopping = 0;
  for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL;
  blob = blob->next, index++) {
    match_result =
      (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx,
      "chop_word:", Green, &chop_states[0],
      &best_state, matcher_pass, index);
    char_choices = array_push (char_choices, match_result);
    pblob = blob;
  }
  bit_count = index - 1;
  permute_characters(char_choices, rating_limit, best_choice, raw_choice);

  set_n_ones (&state, array_count (char_choices) - 1);
  if (matcher_fp != NULL) {
    if (matcher_pass == 0) {
      bits_in_states = bit_count;
      chop_states[state_count] = state;
    }
    state_count++;
  }

  if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt)
    || (tester || trainer)
  && strcmp (word->correct, class_string (best_choice))) {
    did_chopping = 1;
    if (first_pass)
      words_chopped1++;
    else
      words_chopped2++;

    seam_list = start_seam_list (word->blobs);

    if (chop_enable)
      improve_by_chopping(word,
                          &char_choices,
                          fx,
                          &state,
                          best_choice,
                          raw_choice,
                          &seam_list,
                          &fixpt,
                          chop_states,
                          &state_count,
                          &best_state,
                          matcher_pass);

    if (chop_debug)
      print_seams ("Final seam list:", seam_list);

    if (enable_assoc &&
      !AcceptableChoice (char_choices, best_choice, raw_choice, NULL)
      || (tester || trainer)
    && strcmp (word->correct, class_string (best_choice))) {
      ratings = word_associator (word->blobs, seam_list, &state, fx,
        best_choice, raw_choice, word->correct,
        /*0, */ &fixpt,
        &best_state, matcher_pass);
    }
    bits_in_states = bit_count + state_count - 1;

  }
  if (ratings != NULL)
    free_matrix(ratings);
  if (did_chopping || tester || trainer)
    char_choices = rebuild_current_state (word->blobs, seam_list, &state,
      char_choices, fx);
  if (seam_list != NULL)
    free_seam_list(seam_list);
  if (matcher_fp != NULL) {
    best_state = state;
  }
  FilterWordChoices();
  return char_choices;
}
Beispiel #9
0
/**
 * @name chop_word_main
 *
 * Classify the blobs in this word and permute the results.  Find the
 * worst blob in the word and chop it up.  Continue this process until
 * a good answer has been found or all the blobs have been chopped up
 * enough.  Return the word level ratings.
 */
BLOB_CHOICE_LIST_VECTOR *Wordrec::chop_word_main(WERD_RES *word) {
  TBLOB *blob;
  int index;
  int did_chopping;
  STATE state;
  BLOB_CHOICE_LIST *match_result;
  MATRIX *ratings = NULL;
  DANGERR fixpt;                 /*dangerous ambig */
  inT32 bit_count;               //no of bits

  set_denorm(&word->denorm);

  BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
  BLOB_CHOICE_LIST_VECTOR *best_char_choices = new BLOB_CHOICE_LIST_VECTOR();

  did_chopping = 0;
  for (blob = word->chopped_word->blobs, index = 0;
       blob != NULL; blob = blob->next, index++) {
    match_result = classify_blob(blob, "chop_word:", Green);
    if (match_result == NULL)
      cprintf("Null classifier output!\n");
    *char_choices += match_result;
  }
  bit_count = index - 1;
  set_n_ones(&state, char_choices->length() - 1);
  bool acceptable = false;
  bool replaced = false;
  bool best_choice_updated =
    getDict().permute_characters(*char_choices, word->best_choice,
                                 word->raw_choice);
  if (best_choice_updated &&
      getDict().AcceptableChoice(char_choices, word->best_choice, &fixpt,
                                 CHOPPER_CALLER, &replaced)) {
    acceptable = true;
  }
  if (replaced)
    update_blob_classifications(word->chopped_word, *char_choices);
  CopyCharChoices(*char_choices, best_char_choices);
  if (!acceptable) {  // do more work to find a better choice
    did_chopping = 1;

    bool best_choice_acceptable = false;
    if (chop_enable)
      improve_by_chopping(word,
                          char_choices,
                          &state,
                          best_char_choices,
                          &fixpt,
                          &best_choice_acceptable);
    if (chop_debug)
      print_seams ("Final seam list:", word->seam_array);

    // The force_word_assoc is almost redundant to enable_assoc.  However,
    // it is not conditioned on the dict behavior.  For CJK, we need to force
    // the associator to be invoked.  When we figure out the exact behavior
    // of dict on CJK, we can remove the flag if it turns out to be redundant.
    if ((wordrec_enable_assoc && !best_choice_acceptable) || force_word_assoc) {
      ratings = word_associator(word, &state, best_char_choices,
                                &fixpt, &state);
    }
  }
  best_char_choices = rebuild_current_state(word, &state, best_char_choices,
                                            ratings);
  if (ratings != NULL) {
    if (wordrec_debug_level > 0) {
      tprintf("Final Ratings Matrix:\n");
      ratings->print(getDict().getUnicharset());
    }
    ratings->delete_matrix_pointers();
    delete ratings;
  }
  getDict().FilterWordChoices();
  char_choices->delete_data_pointers();
  delete char_choices;

  return best_char_choices;
}