// Build a ragged TabVector by copying another's direction, shifting it // to match the given blob, and making its initial extent the height // of the blob, but its extended bounds from the bounds of the original. TabVector::TabVector(const TabVector& src, TabAlignment alignment, const ICOORD& vertical_skew, BLOBNBOX* blob) : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_), sort_key_(0), percent_score_(0), mean_width_(0), needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false), alignment_(alignment), top_constraints_(NULL), bottom_constraints_(NULL) { BLOBNBOX_C_IT it(&boxes_); it.add_to_end(blob); TBOX box = blob->bounding_box(); if (IsLeftTab()) { startpt_ = box.botleft(); endpt_ = box.topleft(); } else { startpt_ = box.botright(); endpt_ = box.topright(); } sort_key_ = SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2); if (textord_debug_tabfind > 3) Print("Constructed a new tab vector:"); }
/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( bool single_column, bool osd, bool only_osd, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); ASSERT_HOST(pix_binary_ != NULL); if (tessedit_dump_pageseg_images) { pixWrite("tessinput.png", pix_binary_, IFF_PNG); } // Leptonica is used to find the rule/separator lines in the input. LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines); if (tessedit_dump_pageseg_images) pixWrite("tessnolines.png", pix_binary_, IFF_PNG); // Leptonica is used to find a mask of the photo regions in the input. *photo_mask_pix = ImageFind::FindImages(pix_binary_); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); if (single_column) v_lines.clear(); // The rest of the algorithm uses the usual connected components. textord_.find_components(pix_binary_, blocks, to_blocks); TO_BLOCK_IT to_block_it(to_blocks); // There must be exactly one input block. // TODO(rays) handle new textline finding with a UNLV zone file. ASSERT_HOST(to_blocks->singleton()); TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); ColumnFinder* finder = NULL; if (to_block->line_size >= 2) { finder = new ColumnFinder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), source_resolution_, &v_lines, &h_lines, vertical_x, vertical_y); finder->SetupAndFilterNoise(*photo_mask_pix, to_block); if (equ_detect_) { equ_detect_->LabelSpecialText(to_block); } BLOBNBOX_CLIST osd_blobs; // osd_orientation is the number of 90 degree rotations to make the // characters upright. (See osdetect.h for precise definition.) // We want the text lines horizontal, (vertical text indicates vertical // textlines) which may conflict (eg vertically written CJK). int osd_orientation = 0; bool vertical_text = finder->IsVerticallyAlignedText(to_block, &osd_blobs); if (osd && osd_tess != NULL && osr != NULL) { os_detect_blobs(&osd_blobs, osr, osd_tess); if (only_osd) { delete finder; return NULL; } osd_orientation = osr->best_result.orientation_id; double osd_score = osr->orientations[osd_orientation]; double osd_margin = min_orientation_margin * 2; for (int i = 0; i < 4; ++i) { if (i != osd_orientation && osd_score - osr->orientations[i] < osd_margin) { osd_margin = osd_score - osr->orientations[i]; } } if (osd_margin < min_orientation_margin) { // The margin is weak. int best_script_id = osr->best_result.script_id; bool cjk = (best_script_id == osd_tess->unicharset.han_sid()) || (best_script_id == osd_tess->unicharset.hiragana_sid()) || (best_script_id == osd_tess->unicharset.katakana_sid()); if (!cjk && !vertical_text && osd_orientation == 2) { // upside down latin text is improbable with such a weak margin. tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { tprintf("OSD: Weak margin (%.2f) for %d blob text block, " "but using orientation anyway: %d\n", osd_blobs.length(), osd_margin, osd_orientation); } } } osd_blobs.shallow_clear(); finder->CorrectOrientation(to_block, vertical_text, osd_orientation); } return finder; }
/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); ASSERT_HOST(pix_binary_ != NULL); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "PageSegInput"); } // Leptonica is used to find the rule/separator lines in the input. LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "NoLines"); } // Leptonica is used to find a mask of the photo regions in the input. *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "NoImages"); } if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear(); // The rest of the algorithm uses the usual connected components. textord_.find_components(pix_binary_, blocks, to_blocks); TO_BLOCK_IT to_block_it(to_blocks); // There must be exactly one input block. // TODO(rays) handle new textline finding with a UNLV zone file. ASSERT_HOST(to_blocks->singleton()); TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); ColumnFinder* finder = NULL; int estimated_resolution = source_resolution_; if (source_resolution_ == kMinCredibleResolution) { // Try to estimate resolution from typical body text size. int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor); if (res > estimated_resolution && res < kMaxCredibleResolution) { estimated_resolution = res; tprintf("Estimating resolution as %d\n", estimated_resolution); } } if (to_block->line_size >= 2) { finder = new ColumnFinder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), estimated_resolution, textord_use_cjk_fp_model, textord_tabfind_aligned_gap_fraction, &v_lines, &h_lines, vertical_x, vertical_y); finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block); if (equ_detect_) { equ_detect_->LabelSpecialText(to_block); } BLOBNBOX_CLIST osd_blobs; // osd_orientation is the number of 90 degree rotations to make the // characters upright. (See osdetect.h for precise definition.) // We want the text lines horizontal, (vertical text indicates vertical // textlines) which may conflict (eg vertically written CJK). int osd_orientation = 0; bool vertical_text = textord_tabfind_force_vertical_text || pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; if (!vertical_text && textord_tabfind_vertical_text && PSM_ORIENTATION_ENABLED(pageseg_mode)) { vertical_text = finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, to_block, &osd_blobs); } if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) { GenericVector<int> osd_scripts; if (osd_tess != this) { // We are running osd as part of layout analysis, so constrain the // scripts to those allowed by *this. AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts); for (int s = 0; s < sub_langs_.size(); ++s) { AddAllScriptsConverted(sub_langs_[s]->unicharset, osd_tess->unicharset, &osd_scripts); } } os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess); if (pageseg_mode == PSM_OSD_ONLY) { delete finder; return NULL; } osd_orientation = osr->best_result.orientation_id; double osd_score = osr->orientations[osd_orientation]; double osd_margin = min_orientation_margin * 2; for (int i = 0; i < 4; ++i) { if (i != osd_orientation && osd_score - osr->orientations[i] < osd_margin) { osd_margin = osd_score - osr->orientations[i]; } } int best_script_id = osr->best_result.script_id; const char* best_script_str = osd_tess->unicharset.get_script_from_script_id(best_script_id); bool cjk = best_script_id == osd_tess->unicharset.han_sid() || best_script_id == osd_tess->unicharset.hiragana_sid() || best_script_id == osd_tess->unicharset.katakana_sid() || strcmp("Japanese", best_script_str) == 0 || strcmp("Korean", best_script_str) == 0 || strcmp("Hangul", best_script_str) == 0; if (cjk) { finder->set_cjk_script(true); } if (osd_margin < min_orientation_margin) { // The margin is weak. if (!cjk && !vertical_text && osd_orientation == 2) { // upside down latin text is improbable with such a weak margin. tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { tprintf( "OSD: Weak margin (%.2f) for %d blob text block, " "but using orientation anyway: %d\n", osd_margin, osd_blobs.length(), osd_orientation); } } } osd_blobs.shallow_clear(); finder->CorrectOrientation(to_block, vertical_text, osd_orientation); } return finder; }
// Auto page segmentation. Divide the page image into blocks of uniform // text linespacing and images. // Width, height and resolution are derived from the input image. // If the pix is non-NULL, then it is assumed to be the input, and it is // copied to the image, otherwise the image is used directly. // The output goes in the blocks list with corresponding TO_BLOCKs in the // to_blocks list. // If single_column is true, then no attempt is made to divide the image // into columns, but multiple blocks are still made if the text is of // non-uniform linespacing. int Tesseract::AutoPageSeg(int width, int height, int resolution, bool single_column, IMAGE* image, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); Boxa* boxa = NULL; Pixa* pixa = NULL; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) { if (textord_debug_images) { Pix* grey_pix = pixCreate(width, height, 8); // Printable images are light grey on white, but for screen display // they are black on dark grey so the other colors show up well. if (textord_debug_printable) { pixSetAll(grey_pix); pixSetMasked(grey_pix, pix_binary_, 192); } else { pixSetAllArbitrary(grey_pix, 64); pixSetMasked(grey_pix, pix_binary_, 0); } AlignedBlob::IncrementDebugPix(); pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG); pixDestroy(&grey_pix); } if (tessedit_dump_pageseg_images) pixWrite("tessinput.png", pix_binary_, IFF_PNG); // Leptonica is used to find the lines and image regions in the input. LineFinder::FindVerticalLines(resolution, pix_binary_, &vertical_x, &vertical_y, &v_lines); LineFinder::FindHorizontalLines(resolution, pix_binary_, &h_lines); if (tessedit_dump_pageseg_images) pixWrite("tessnolines.png", pix_binary_, IFF_PNG); ImageFinder::FindImages(pix_binary_, &boxa, &pixa); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); // Copy the Pix to the IMAGE. image->FromPix(pix_binary_); if (single_column) v_lines.clear(); } #endif TO_BLOCK_LIST land_blocks, port_blocks; TBOX page_box; // The rest of the algorithm uses the usual connected components. find_components(blocks, &land_blocks, &port_blocks, &page_box); TO_BLOCK_IT to_block_it(&port_blocks); ASSERT_HOST(!to_block_it.empty()); for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list(); to_block_it.forward()) { TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); if (to_block->line_size >= 2) { // Note: if there are multiple blocks, then v_lines, boxa, and pixa // are empty on the next iteration, but in this case, we assume // that there aren't any interesting line separators or images, since // it means that we have a pre-defined unlv zone file. ColumnFinder finder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), &v_lines, &h_lines, vertical_x, vertical_y); if (finder.FindBlocks(height, resolution, single_column, to_block, boxa, pixa, &found_blocks, to_blocks) < 0) return -1; finder.ComputeDeskewVectors(&deskew_, &reskew_); boxa = NULL; pixa = NULL; } } #ifdef HAVE_LIBLEPT boxaDestroy(&boxa); pixaDestroy(&pixa); #endif blocks->clear(); BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. block_it.add_list_after(&found_blocks); if (textord_debug_images) { // The debug image is no longer needed so delete it. unlink(AlignedBlob::textord_debug_pix().string()); } return 0; }