Esempio n. 1
0
/**
 * @name chop_one_blob
 *
 * Start with the current one-blob word and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 * Used for testing chopper.
 */
bool Wordrec::chop_one_blob(TWERD *word,
                               BLOB_CHOICE_LIST_VECTOR *char_choices,
                               inT32 *blob_number,
                               SEAMS *seam_list,
                               int *right_chop_index) {
  TBLOB *blob;
  inT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  BLOB_CHOICE_LIST *answer;
  BLOB_CHOICE_IT answer_it;
  SEAM *seam;
  UNICHAR_ID unichar_id = 0;
  int left_chop_index = 0;

  do {
    *blob_number = select_blob_to_split(*char_choices, rating_ceiling, false);
    if (chop_debug)
      cprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return false;
    seam = attempt_blob_chop(word, *blob_number, true, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = char_choices->get(*blob_number);
    if (answer == NULL)
      return false;
    answer_it.set_to_list(answer);
    rating_ceiling = answer_it.data()->rating();  // try a different blob
  } while (true);
  /* Split OK */
  for (blob = word->blobs; x < *blob_number; x++) {
    blob = blob->next;
  }
  if (chop_debug) {
    tprintf("Chop made blob1:");
    blob->bounding_box().print();
    tprintf("and blob2:");
    blob->next->bounding_box().print();
  }
  *seam_list = insert_seam(*seam_list, *blob_number, seam, blob, word->blobs);

  answer = char_choices->get(*blob_number);
  answer_it.set_to_list(answer);
  unichar_id = answer_it.data()->unichar_id();
  float rating = answer_it.data()->rating() / exp(1.0);
  left_chop_index = atoi(unicharset.id_to_unichar(unichar_id));

  delete char_choices->get(*blob_number);
  // combine confidence w/ serial #
  answer = fake_classify_blob(0, rating, -rating);
  modify_blob_choice(answer, left_chop_index);
  char_choices->insert(answer, *blob_number);

  answer = fake_classify_blob(0, rating - 0.125f, -rating);
  modify_blob_choice(answer, ++*right_chop_index);
  char_choices->set(answer, *blob_number + 1);
  return true;
}
Esempio n. 2
0
/**********************************************************************
 * improve_one_blob
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 *********************************************************************/
CHOICES_LIST improve_one_blob(TWERD *word,
                              CHOICES_LIST char_choices,
                              int fx,
                              INT32 *blob_number,
                              SEAMS *seam_list,
                              DANGERR *fixpt,
                              STATE *this_state,
                              STATE *correct_state,
                              INT32 pass) {
  TBLOB *pblob;
  TBLOB *blob;
  INT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  CHOICES answer;
  SEAM *seam;

  do {
    *blob_number = select_blob_to_split (char_choices, rating_ceiling);
    if (*blob_number == -1)
      return (NULL);

    seam = attempt_blob_chop (word, *blob_number, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = (CHOICES) array_value (char_choices, *blob_number);
    if (answer == NIL)
      return (NULL);             /* Try different blob */
    rating_ceiling = best_probability (answer);
  }
  while (!blob_skip);
  /* Split OK */
  for (blob = word->blobs, pblob = NULL; x < *blob_number; x++) {
    pblob = blob;
    blob = blob->next;
  }

  *seam_list =
    insert_seam (*seam_list, *blob_number, seam, blob, word->blobs);

  free_choices ((CHOICES) array_value (char_choices, *blob_number));

  answer =
    classify_blob (pblob, blob, blob->next, NULL, fx, "improve 1:", Red,
    this_state, correct_state, pass, *blob_number);
  char_choices = array_insert (char_choices, *blob_number, answer);

  answer =
    classify_blob (blob, blob->next, blob->next->next, NULL, fx, "improve 2:",
    Yellow, this_state, correct_state, pass, *blob_number + 1);
  array_value (char_choices, *blob_number + 1) = (char *) answer;

  return (char_choices);
}
Esempio n. 3
0
/**
 * @name improve_one_blob
 *
 * Start with the current word of blobs and its classification.  Find
 * the worst blobs and try to divide it up to improve the ratings.
 */
bool Wordrec::improve_one_blob(TWERD *word,
                               BLOB_CHOICE_LIST_VECTOR *char_choices,
                               inT32 *blob_number,
                               SEAMS *seam_list,
                               DANGERR *fixpt,
                               bool split_next_to_fragment) {
  TBLOB *blob;
  inT16 x = 0;
  float rating_ceiling = MAX_FLOAT32;
  BLOB_CHOICE_LIST *answer;
  BLOB_CHOICE_IT answer_it;
  SEAM *seam;

  do {
    *blob_number = select_blob_to_split(*char_choices, rating_ceiling,
                                        split_next_to_fragment);
    if (chop_debug)
      cprintf("blob_number = %d\n", *blob_number);
    if (*blob_number == -1)
      return false;

    // TODO(rays) it may eventually help to allow italic_blob to be true,
    seam = attempt_blob_chop (word, *blob_number, false, *seam_list);
    if (seam != NULL)
      break;
    /* Must split null blobs */
    answer = char_choices->get(*blob_number);
    if (answer == NULL)
      return false;
    answer_it.set_to_list(answer);
    rating_ceiling = answer_it.data()->rating();  // try a different blob
  } while (true);
  /* Split OK */
  for (blob = word->blobs; x < *blob_number; x++) {
    blob = blob->next;
  }

  *seam_list =
    insert_seam (*seam_list, *blob_number, seam, blob, word->blobs);

  delete char_choices->get(*blob_number);

  answer = classify_blob(blob, "improve 1:", Red);
  char_choices->insert(answer, *blob_number);

  answer = classify_blob(blob->next, "improve 2:", Yellow);
  char_choices->set(answer, *blob_number + 1);

  return true;
}
Esempio n. 4
0
SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes,
                                     bool italic_blob, WERD_RES *word_res,
                                     int *blob_number) {
  TWERD *word = word_res->chopped_word;
  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
    TBLOB *blob = word->blobs[*blob_number];
    TPOINT topleft, botright;
    topleft.x = blob->bounding_box().left();
    topleft.y = blob->bounding_box().top();
    botright.x = blob->bounding_box().right();
    botright.y = blob->bounding_box().bottom();

    TPOINT original_topleft, original_botright;
    word_res->denorm.DenormTransform(NULL, topleft, &original_topleft);
    word_res->denorm.DenormTransform(NULL, botright, &original_botright);

    TBOX original_box = TBOX(original_topleft.x, original_botright.y,
                             original_botright.x, original_topleft.y);

    bool almost_equal_box = false;
    int num_overlap = 0;
    for (int i = 0; i < boxes.size(); i++) {
      if (original_box.overlap_fraction(boxes[i]) > 0.125)
        num_overlap++;
      if (original_box.almost_equal(boxes[i], 3))
        almost_equal_box = true;
    }

    TPOINT location;
    if (divisible_blob(blob, italic_blob, &location) ||
        (!almost_equal_box && num_overlap > 1)) {
      SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
                                     italic_blob, word_res->seam_array);
      if (seam != NULL)
        return seam;
    }
  }

  *blob_number = -1;
  return NULL;
}
Esempio n. 5
0
SEAM *Wordrec::chop_numbered_blob(TWERD *word, inT32 blob_number,
                                  bool italic_blob,
                                  const GenericVector<SEAM*>& seams) {
  return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
                           italic_blob, seams);
}