/********************************************************************** * split_word * * Split a given WERD_RES in place into two smaller words for recognition. * split_pt is the index of the first blob to go in the second word. * The underlying word is left alone, only the TWERD (and subsequent data) * are split up. orig_blamer_bundle is set to the original blamer bundle, * and will now be owned by the caller. New blamer bundles are forged for the * two pieces. **********************************************************************/ void Tesseract::split_word(WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const { ASSERT_HOST(split_pt >0 && split_pt < word->chopped_word->NumBlobs()); // Save a copy of the blamer bundle so we can try to reconstruct it below. BlamerBundle *orig_bb = word->blamer_bundle ? new BlamerBundle(*word->blamer_bundle) : NULL; WERD_RES *word2 = new WERD_RES(*word); // blow away the copied chopped_word, as we want to work with // the blobs from the input chopped_word so seam_arrays can be merged. TWERD *chopped = word->chopped_word; TWERD *chopped2 = new TWERD; chopped2->blobs.reserve(chopped->NumBlobs() - split_pt); for (int i = split_pt; i < chopped->NumBlobs(); ++i) { chopped2->blobs.push_back(chopped->blobs[i]); } chopped->blobs.truncate(split_pt); word->chopped_word = NULL; delete word2->chopped_word; word2->chopped_word = NULL; const UNICHARSET &unicharset = *word->uch_set; word->ClearResults(); word2->ClearResults(); word->chopped_word = chopped; word2->chopped_word = chopped2; word->SetupBasicsFromChoppedWord(unicharset); word2->SetupBasicsFromChoppedWord(unicharset); // Try to adjust the blamer bundle. if (orig_bb != NULL) { // TODO(rays) Looks like a leak to me. // orig_bb should take, rather than copy. word->blamer_bundle = new BlamerBundle(); word2->blamer_bundle = new BlamerBundle(); orig_bb->SplitBundle(chopped->blobs.back()->bounding_box().right(), word2->chopped_word->blobs[0]->bounding_box().left(), wordrec_debug_blamer, word->blamer_bundle, word2->blamer_bundle); } *right_piece = word2; *orig_blamer_bundle = orig_bb; }
SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) { TWERD *word = word_res->chopped_word; for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) { TBLOB *blob = word->blobs[*blob_number]; TPOINT topleft, botright; topleft.x = blob->bounding_box().left(); topleft.y = blob->bounding_box().top(); botright.x = blob->bounding_box().right(); botright.y = blob->bounding_box().bottom(); TPOINT original_topleft, original_botright; word_res->denorm.DenormTransform(NULL, topleft, &original_topleft); word_res->denorm.DenormTransform(NULL, botright, &original_botright); TBOX original_box = TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y); bool almost_equal_box = false; int num_overlap = 0; for (int i = 0; i < boxes.size(); i++) { if (original_box.overlap_fraction(boxes[i]) > 0.125) num_overlap++; if (original_box.almost_equal(boxes[i], 3)) almost_equal_box = true; } TPOINT location; if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) { SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array); if (seam != NULL) return seam; } } *blob_number = -1; return NULL; }
/** * word_display() Word Processor * * Display a word according to its display modes */ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) { WERD* word = word_res->word; TBOX word_bb; // word bounding box int word_height; // ht of word BB BOOL8 displayed_something = FALSE; float shift; // from bot left C_BLOB_IT c_it; // cblob iterator if (color_mode != CM_RAINBOW && word_res->box_word != NULL) { BoxWord* box_word = word_res->box_word; int length = box_word->length(); if (word_res->fontinfo == NULL) return false; const FontInfo& font_info = *word_res->fontinfo; for (int i = 0; i < length; ++i) { ScrollView::Color color = ScrollView::GREEN; switch (color_mode) { case CM_SUBSCRIPT: if (box_word->BlobPosition(i) == SP_SUBSCRIPT) color = ScrollView::RED; break; case CM_SUPERSCRIPT: if (box_word->BlobPosition(i) == SP_SUPERSCRIPT) color = ScrollView::RED; break; case CM_ITALIC: if (font_info.is_italic()) color = ScrollView::RED; break; case CM_BOLD: if (font_info.is_bold()) color = ScrollView::RED; break; case CM_FIXEDPITCH: if (font_info.is_fixed_pitch()) color = ScrollView::RED; break; case CM_SERIF: if (font_info.is_serif()) color = ScrollView::RED; break; case CM_SMALLCAPS: if (word_res->small_caps) color = ScrollView::RED; break; case CM_DROPCAPS: if (box_word->BlobPosition(i) == SP_DROPCAP) color = ScrollView::RED; break; // TODO(rays) underline is currently completely unsupported. case CM_UNDERLINE: default: break; } image_win->Pen(color); TBOX box = box_word->BlobBox(i); image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top()); } return true; } /* Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color) etc. are to keep the compiler happy. */ // display bounding box if (word->display_flag(DF_BOX)) { word->bounding_box().plot(image_win, (ScrollView::Color)((inT32) editor_image_word_bb_color), (ScrollView::Color)((inT32) editor_image_word_bb_color)); ScrollView::Color c = (ScrollView::Color) ((inT32) editor_image_blob_bb_color); image_win->Pen(c); c_it.set_to_list(word->cblob_list()); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) c_it.data()->bounding_box().plot(image_win); displayed_something = TRUE; } // display edge steps if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available word->plot(image_win); // rainbow colors displayed_something = TRUE; } // display poly approx if (word->display_flag(DF_POLYGONAL)) { // need to convert TWERD* tword = TWERD::PolygonalCopy(word); tword->plot(image_win); delete tword; displayed_something = TRUE; } // Display correct text and blamer information. STRING text; STRING blame; if (word->display_flag(DF_TEXT) && word->text() != NULL) { text = word->text(); } if (word->display_flag(DF_BLAMER) && !(word_res->blamer_bundle != NULL && word_res->blamer_bundle->incorrect_result_reason == IRR_CORRECT)) { text = ""; const BlamerBundle *blamer_bundle = word_res->blamer_bundle; if (blamer_bundle == NULL) { text += "NULL"; } else { for (int i = 0; i < blamer_bundle->truth_text.length(); ++i) { text += blamer_bundle->truth_text[i]; } } text += " -> "; STRING best_choice_str; if (word_res->best_choice == NULL) { best_choice_str = "NULL"; } else { word_res->best_choice->string_and_lengths(&best_choice_str, NULL); } text += best_choice_str; IncorrectResultReason reason = (blamer_bundle == NULL) ? IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason; ASSERT_HOST(reason < IRR_NUM_REASONS) blame += " ["; blame += BlamerBundle::IncorrectReasonName(reason); blame += "]"; } if (text.length() > 0) { word_bb = word->bounding_box(); image_win->Pen(ScrollView::RED); word_height = word_bb.height(); int text_height = 0.50 * word_height; if (text_height > 20) text_height = 20; image_win->TextAttributes("Arial", text_height, false, false, false); shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f; image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.string()); if (blame.length() > 0) { image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height, blame.string()); } displayed_something = TRUE; } if (!displayed_something) // display BBox anyway word->bounding_box().plot(image_win, (ScrollView::Color)((inT32) editor_image_word_bb_color), (ScrollView::Color)((inT32) editor_image_word_bb_color)); return TRUE; }