/** * @name chop_one_blob * * Start with the current one-blob word and its classification. Find * the worst blobs and try to divide it up to improve the ratings. * Used for testing chopper. */ bool Wordrec::chop_one_blob(TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, int *right_chop_index) { TBLOB *blob; inT16 x = 0; float rating_ceiling = MAX_FLOAT32; BLOB_CHOICE_LIST *answer; BLOB_CHOICE_IT answer_it; SEAM *seam; UNICHAR_ID unichar_id = 0; int left_chop_index = 0; do { *blob_number = select_blob_to_split(*char_choices, rating_ceiling, false); if (chop_debug) cprintf("blob_number = %d\n", *blob_number); if (*blob_number == -1) return false; seam = attempt_blob_chop(word, *blob_number, true, *seam_list); if (seam != NULL) break; /* Must split null blobs */ answer = char_choices->get(*blob_number); if (answer == NULL) return false; answer_it.set_to_list(answer); rating_ceiling = answer_it.data()->rating(); // try a different blob } while (true); /* Split OK */ for (blob = word->blobs; x < *blob_number; x++) { blob = blob->next; } if (chop_debug) { tprintf("Chop made blob1:"); blob->bounding_box().print(); tprintf("and blob2:"); blob->next->bounding_box().print(); } *seam_list = insert_seam(*seam_list, *blob_number, seam, blob, word->blobs); answer = char_choices->get(*blob_number); answer_it.set_to_list(answer); unichar_id = answer_it.data()->unichar_id(); float rating = answer_it.data()->rating() / exp(1.0); left_chop_index = atoi(unicharset.id_to_unichar(unichar_id)); delete char_choices->get(*blob_number); // combine confidence w/ serial # answer = fake_classify_blob(0, rating, -rating); modify_blob_choice(answer, left_chop_index); char_choices->insert(answer, *blob_number); answer = fake_classify_blob(0, rating - 0.125f, -rating); modify_blob_choice(answer, ++*right_chop_index); char_choices->set(answer, *blob_number + 1); return true; }
/********************************************************************** * improve_one_blob * * Start with the current word of blobs and its classification. Find * the worst blobs and try to divide it up to improve the ratings. *********************************************************************/ CHOICES_LIST improve_one_blob(TWERD *word, CHOICES_LIST char_choices, int fx, INT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, STATE *this_state, STATE *correct_state, INT32 pass) { TBLOB *pblob; TBLOB *blob; INT16 x = 0; float rating_ceiling = MAX_FLOAT32; CHOICES answer; SEAM *seam; do { *blob_number = select_blob_to_split (char_choices, rating_ceiling); if (*blob_number == -1) return (NULL); seam = attempt_blob_chop (word, *blob_number, *seam_list); if (seam != NULL) break; /* Must split null blobs */ answer = (CHOICES) array_value (char_choices, *blob_number); if (answer == NIL) return (NULL); /* Try different blob */ rating_ceiling = best_probability (answer); } while (!blob_skip); /* Split OK */ for (blob = word->blobs, pblob = NULL; x < *blob_number; x++) { pblob = blob; blob = blob->next; } *seam_list = insert_seam (*seam_list, *blob_number, seam, blob, word->blobs); free_choices ((CHOICES) array_value (char_choices, *blob_number)); answer = classify_blob (pblob, blob, blob->next, NULL, fx, "improve 1:", Red, this_state, correct_state, pass, *blob_number); char_choices = array_insert (char_choices, *blob_number, answer); answer = classify_blob (blob, blob->next, blob->next->next, NULL, fx, "improve 2:", Yellow, this_state, correct_state, pass, *blob_number + 1); array_value (char_choices, *blob_number + 1) = (char *) answer; return (char_choices); }
/** * @name improve_one_blob * * Start with the current word of blobs and its classification. Find * the worst blobs and try to divide it up to improve the ratings. */ bool Wordrec::improve_one_blob(TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, bool split_next_to_fragment) { TBLOB *blob; inT16 x = 0; float rating_ceiling = MAX_FLOAT32; BLOB_CHOICE_LIST *answer; BLOB_CHOICE_IT answer_it; SEAM *seam; do { *blob_number = select_blob_to_split(*char_choices, rating_ceiling, split_next_to_fragment); if (chop_debug) cprintf("blob_number = %d\n", *blob_number); if (*blob_number == -1) return false; // TODO(rays) it may eventually help to allow italic_blob to be true, seam = attempt_blob_chop (word, *blob_number, false, *seam_list); if (seam != NULL) break; /* Must split null blobs */ answer = char_choices->get(*blob_number); if (answer == NULL) return false; answer_it.set_to_list(answer); rating_ceiling = answer_it.data()->rating(); // try a different blob } while (true); /* Split OK */ for (blob = word->blobs; x < *blob_number; x++) { blob = blob->next; } *seam_list = insert_seam (*seam_list, *blob_number, seam, blob, word->blobs); delete char_choices->get(*blob_number); answer = classify_blob(blob, "improve 1:", Red); char_choices->insert(answer, *blob_number); answer = classify_blob(blob->next, "improve 2:", Yellow); char_choices->set(answer, *blob_number + 1); return true; }
SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) { TWERD *word = word_res->chopped_word; for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) { TBLOB *blob = word->blobs[*blob_number]; TPOINT topleft, botright; topleft.x = blob->bounding_box().left(); topleft.y = blob->bounding_box().top(); botright.x = blob->bounding_box().right(); botright.y = blob->bounding_box().bottom(); TPOINT original_topleft, original_botright; word_res->denorm.DenormTransform(NULL, topleft, &original_topleft); word_res->denorm.DenormTransform(NULL, botright, &original_botright); TBOX original_box = TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y); bool almost_equal_box = false; int num_overlap = 0; for (int i = 0; i < boxes.size(); i++) { if (original_box.overlap_fraction(boxes[i]) > 0.125) num_overlap++; if (original_box.almost_equal(boxes[i], 3)) almost_equal_box = true; } TPOINT location; if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) { SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array); if (seam != NULL) return seam; } } *blob_number = -1; return NULL; }
SEAM *Wordrec::chop_numbered_blob(TWERD *word, inT32 blob_number, bool italic_blob, const GenericVector<SEAM*>& seams) { return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams); }