コード例 #1
0
ファイル: chopper.cpp プロジェクト: 0359xiaodong/tess-two
/**
 * @name chop_one_blob
 *
 * Start with the current one-blob word and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 * Used for testing chopper.
 */
bool Wordrec::chop_one_blob(TWERD *word,
                               BLOB_CHOICE_LIST_VECTOR *char_choices,
                               inT32 *blob_number,
                               SEAMS *seam_list,
                               int *right_chop_index) {
  TBLOB *blob;
  inT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  BLOB_CHOICE_LIST *answer;
  BLOB_CHOICE_IT answer_it;
  SEAM *seam;
  UNICHAR_ID unichar_id = 0;
  int left_chop_index = 0;

  do {
    *blob_number = select_blob_to_split(*char_choices, rating_ceiling, false);
    if (chop_debug)
      cprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return false;
    seam = attempt_blob_chop(word, *blob_number, true, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = char_choices->get(*blob_number);
    if (answer == NULL)
      return false;
    answer_it.set_to_list(answer);
    rating_ceiling = answer_it.data()->rating();  // try a different blob
  } while (true);
  /* Split OK */
  for (blob = word->blobs; x < *blob_number; x++) {
    blob = blob->next;
  }
  if (chop_debug) {
    tprintf("Chop made blob1:");
    blob->bounding_box().print();
    tprintf("and blob2:");
    blob->next->bounding_box().print();
  }
  *seam_list = insert_seam(*seam_list, *blob_number, seam, blob, word->blobs);

  answer = char_choices->get(*blob_number);
  answer_it.set_to_list(answer);
  unichar_id = answer_it.data()->unichar_id();
  float rating = answer_it.data()->rating() / exp(1.0);
  left_chop_index = atoi(unicharset.id_to_unichar(unichar_id));

  delete char_choices->get(*blob_number);
  // combine confidence w/ serial #
  answer = fake_classify_blob(0, rating, -rating);
  modify_blob_choice(answer, left_chop_index);
  char_choices->insert(answer, *blob_number);

  answer = fake_classify_blob(0, rating - 0.125f, -rating);
  modify_blob_choice(answer, ++*right_chop_index);
  char_choices->set(answer, *blob_number + 1);
  return true;
}
コード例 #2
0
/**********************************************************************
 * improve_one_blob
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 *********************************************************************/
CHOICES_LIST improve_one_blob(TWERD *word,
                              CHOICES_LIST char_choices,
                              int fx,
                              INT32 *blob_number,
                              SEAMS *seam_list,
                              DANGERR *fixpt,
                              STATE *this_state,
                              STATE *correct_state,
                              INT32 pass) {
  TBLOB *pblob;
  TBLOB *blob;
  INT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  CHOICES answer;
  SEAM *seam;

  do {
    *blob_number = select_blob_to_split (char_choices, rating_ceiling);
    if (*blob_number == -1)
      return (NULL);

    seam = attempt_blob_chop (word, *blob_number, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = (CHOICES) array_value (char_choices, *blob_number);
    if (answer == NIL)
      return (NULL);             /* Try different blob */
    rating_ceiling = best_probability (answer);
  }
  while (!blob_skip);
  /* Split OK */
  for (blob = word->blobs, pblob = NULL; x < *blob_number; x++) {
    pblob = blob;
    blob = blob->next;
  }

  *seam_list =
    insert_seam (*seam_list, *blob_number, seam, blob, word->blobs);

  free_choices ((CHOICES) array_value (char_choices, *blob_number));

  answer =
    classify_blob (pblob, blob, blob->next, NULL, fx, "improve 1:", Red,
    this_state, correct_state, pass, *blob_number);
  char_choices = array_insert (char_choices, *blob_number, answer);

  answer =
    classify_blob (blob, blob->next, blob->next->next, NULL, fx, "improve 2:",
    Yellow, this_state, correct_state, pass, *blob_number + 1);
  array_value (char_choices, *blob_number + 1) = (char *) answer;

  return (char_choices);
}
コード例 #3
0
ファイル: chopper.cpp プロジェクト: 0359xiaodong/tess-two
/**
 * @name improve_one_blob
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 */
bool Wordrec::improve_one_blob(TWERD *word,
                               BLOB_CHOICE_LIST_VECTOR *char_choices,
                               inT32 *blob_number,
                               SEAMS *seam_list,
                               DANGERR *fixpt,
                               bool split_next_to_fragment) {
  TBLOB *blob;
  inT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  BLOB_CHOICE_LIST *answer;
  BLOB_CHOICE_IT answer_it;
  SEAM *seam;

  do {
    *blob_number = select_blob_to_split(*char_choices, rating_ceiling,
                                        split_next_to_fragment);
    if (chop_debug)
      cprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return false;

    // TODO(rays) it may eventually help to allow italic_blob to be true,
    seam = attempt_blob_chop (word, *blob_number, false, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = char_choices->get(*blob_number);
    if (answer == NULL)
      return false;
    answer_it.set_to_list(answer);
    rating_ceiling = answer_it.data()->rating();  // try a different blob
  } while (true);
  /* Split OK */
  for (blob = word->blobs; x < *blob_number; x++) {
    blob = blob->next;
  }

  *seam_list =
    insert_seam (*seam_list, *blob_number, seam, blob, word->blobs);

  delete char_choices->get(*blob_number);

  answer = classify_blob(blob, "improve 1:", Red);
  char_choices->insert(answer, *blob_number);

  answer = classify_blob(blob->next, "improve 2:", Yellow);
  char_choices->set(answer, *blob_number + 1);

  return true;
}
コード例 #4
0
ファイル: chopper.cpp プロジェクト: ManishKSharma/tess-two
/**
 * @name improve_one_blob
 *
 * Finds the best place to chop, based on the worst blob, fixpt, or next to
 * a fragment, according to the input. Returns the SEAM corresponding to the
 * chop point, if any is found, and the index in the ratings_matrix of the
 * chopped blob. Note that blob_choices is just a copy of the pointers in the
 * leading diagonal of the ratings MATRIX.
 * Although the blob is chopped, the returned SEAM is yet to be inserted into
 * word->seam_array and the resulting blobs are unclassified, so this function
 * can be used by ApplyBox as well as during recognition.
 */
SEAM* Wordrec::improve_one_blob(const GenericVector<BLOB_CHOICE*>& blob_choices,
                                DANGERR *fixpt,
                                bool split_next_to_fragment,
                                bool italic_blob,
                                WERD_RES* word,
                                int* blob_number) {
  float rating_ceiling = MAX_FLOAT32;
  SEAM *seam = NULL;
  do {
    *blob_number = select_blob_to_split_from_fixpt(fixpt);
    if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
    bool split_point_from_dict = (*blob_number != -1);
    if (split_point_from_dict) {
      fixpt->clear();
    } else {
      *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
                                          split_next_to_fragment);
    }
    if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return NULL;

    // TODO(rays) it may eventually help to allow italic_blob to be true,
    seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
                              word->seam_array);
    if (seam != NULL)
      return seam;  // Success!
    if (blob_choices[*blob_number] == NULL)
      return NULL;
    if (!split_point_from_dict) {
      // We chopped the worst rated blob, try something else next time.
      rating_ceiling = blob_choices[*blob_number]->rating();
    }
  } while (true);
  return seam;
}