// Rotate the grid by rotation, keeping cell contents. // rotation must be a multiple of 90 degrees. // NOTE: due to partial cells, cell coverage in the rotated grid will be // inexact. This is why there is no Rotate for the generic BBGrid. // TODO(rays) investigate fixing this inaccuracy by moving the origin after // rotation. void IntGrid::Rotate(const FCOORD& rotation) { ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f); ICOORD old_bleft(bleft()); ICOORD old_tright(tright()); int old_width = gridwidth(); int old_height = gridheight(); TBOX box(bleft(), tright()); box.rotate(rotation); int* old_grid = grid_; grid_ = NULL; Init(gridsize(), box.botleft(), box.topright()); // Iterate over the old grid, copying data to the rotated position in the new. int oldi = 0; FCOORD x_step(rotation); x_step *= gridsize(); for (int oldy = 0; oldy < old_height; ++oldy) { FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy); line_pos.rotate(rotation); for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { int grid_x, grid_y; GridCoords(static_cast<int>(line_pos.x() + 0.5), static_cast<int>(line_pos.y() + 0.5), &grid_x, &grid_y); grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; } } delete [] old_grid; }
// Returns a full-resolution binary pix in which each cell over the given // threshold is filled as a black square. pixDestroy after use. // Edge cells, which have a zero 4-neighbour, are not marked. Pix* IntGrid::ThresholdToPix(int threshold) const { Pix* pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1); int cellsize = gridsize(); for (int y = 0; y < gridheight(); ++y) { for (int x = 0; x < gridwidth(); ++x) { if (GridCellValue(x, y) > threshold && GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 && GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) { pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize), cellsize, cellsize, PIX_SET, NULL, 0, 0); } } } return pix; }
// Finds horizontal line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindHorizontalLines(int resolution, Pix* pix, TabVector_LIST* vectors) { #ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(height, width); int vertical_x, vertical_y; FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); // Coords are in tess format so filp x and y and then covert // to leptonica by height -y. Box* pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 3, 1); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("hlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(vertical_x, vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); // Iterate the vectors to flip them. TabVector_IT h_it(vectors); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { h_it.data()->XYFlip(); } } pixDestroy(&line_pix); #endif }
// Tests each blob in the list to see if it is certain non-text using 2 // conditions: // 1. blob overlaps a cell with high value in noise_density_ (previously set // by ComputeNoiseDensity). // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This // condition is disabled with max_blob_overlaps == -1. // If it does, the blob is declared non-text, and is used to mark up the // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their // neighbours reset, as they may now point to deleted data. // WARNING: The blobs list blobs may be in the *this grid, but they are // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not NULL, deleted blobs are drawn on it in red, and kept // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; BLOBNBOX_IT dead_it(&dead_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ok_color, ok_color); #endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the // photo mask, so try to join it to a bigger area by expanding the // box in a way that does not touch any zero noise density cell. box = AttemptBoxExpansion(box, *noise_density_, gridsize()); } // All overlapped cells are non-zero, so just mark the rectangle. pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, NULL, 0, 0); } #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ScrollView::RED, ScrollView::RED); #endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. // TODO(rays) delete the delete when the BLOBNBOX destructor deletes // the cblob. delete blob->cblob(); dead_it.add_to_end(blob_it.extract()); } } }
// Finds vertical line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vertical_x and vertical_y contain a sum of the output vectors, // thereby giving the mean vertical direction. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindVerticalLines(int resolution, Pix* pix, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { #ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(width, height); FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); Box* pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 1, 3); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("vlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(*vertical_x, *vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); } pixDestroy(&line_pix); #endif }
// Returns a new IntGrid containing values equal to the sum of all the // neighbouring cells. The returned grid must be deleted after use. // For ease of implementation, edge cells are double counted, to make them // have the same range as the non-edge cells. IntGrid* IntGrid::NeighbourhoodSum() const { IntGrid* sumgrid = new IntGrid(gridsize(), bleft(), tright()); for (int y = 0; y < gridheight(); ++y) { for (int x = 0; x < gridwidth(); ++x) { int cell_count = 0; for (int yoffset = -1; yoffset <= 1; ++yoffset) { for (int xoffset = -1; xoffset <= 1; ++xoffset) { int grid_x = x + xoffset; int grid_y = y + yoffset; ClipGridCoords(&grid_x, &grid_y); cell_count += GridCellValue(grid_x, grid_y); } } if (GridCellValue(x, y) > 1) sumgrid->SetGridCell(x, y, cell_count); } } return sumgrid; }
// Creates and returns a Pix with the same resolution as the original // in which 1 (black) pixels represent likely non text (photo, line drawing) // areas of the page, deleting from the blob_block the blobs that were // determined to be non-text. // The photo_map is used to bias the decision towards non-text, rather than // supplying definite decision. // The blob_block is the usual result of connected component analysis, // holding the detected blobs. // The returned Pix should be PixDestroyed after use. Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block) { // Insert the smallest blobs into the grid. InsertBlobList(&blob_block->small_blobs); InsertBlobList(&blob_block->noise_blobs); // Add the medium blobs that don't have a good strokewidth neighbour. // Those that do go into good_grid as an antidote to spreading beyond the // real reaches of a noise region. BlobGrid good_grid(gridsize(), bleft(), tright()); BLOBNBOX_IT blob_it(&blob_block->blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) InsertBBox(true, true, blob); else good_grid.InsertBBox(true, true, blob); } noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); good_grid.Clear(); // Not needed any more. Pix* pix = noise_density_->ThresholdToPix(max_noise_count_); if (debug) { pixWrite("junknoisemask.png", pix, IFF_PNG); } ScrollView* win = NULL; #ifndef GRAPHICS_DISABLED if (debug) { win = MakeWindow(0, 400, "Photo Mask Blobs"); } #endif // GRAPHICS_DISABLED // Large and medium blobs are not text if they overlap with "a lot" of small // blobs. MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithSmall, win, ScrollView::DARK_GREEN, pix); MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, win, ScrollView::WHITE, pix); // Clear the grid of small blobs and insert the medium blobs. Clear(); InsertBlobList(&blob_block->blobs); MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, kMaxLargeOverlapsWithMedium, win, ScrollView::DARK_GREEN, pix); // Clear again before we start deleting the blobs in the grid. Clear(); MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, win, ScrollView::CORAL, pix); MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, win, ScrollView::GOLDENROD, pix); MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, win, ScrollView::WHITE, pix); if (debug) { #ifndef GRAPHICS_DISABLED win->Update(); #endif // GRAPHICS_DISABLED pixWrite("junkccphotomask.png", pix, IFF_PNG); #ifndef GRAPHICS_DISABLED delete win->AwaitEvent(SVET_DESTROY); delete win; #endif // GRAPHICS_DISABLED } return pix; }
/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); ASSERT_HOST(pix_binary_ != NULL); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "PageSegInput"); } // Leptonica is used to find the rule/separator lines in the input. LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "NoLines"); } // Leptonica is used to find a mask of the photo regions in the input. *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_); if (tessedit_dump_pageseg_images) { pixa_debug_.AddPix(pix_binary_, "NoImages"); } if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear(); // The rest of the algorithm uses the usual connected components. textord_.find_components(pix_binary_, blocks, to_blocks); TO_BLOCK_IT to_block_it(to_blocks); // There must be exactly one input block. // TODO(rays) handle new textline finding with a UNLV zone file. ASSERT_HOST(to_blocks->singleton()); TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); ColumnFinder* finder = NULL; int estimated_resolution = source_resolution_; if (source_resolution_ == kMinCredibleResolution) { // Try to estimate resolution from typical body text size. int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor); if (res > estimated_resolution && res < kMaxCredibleResolution) { estimated_resolution = res; tprintf("Estimating resolution as %d\n", estimated_resolution); } } if (to_block->line_size >= 2) { finder = new ColumnFinder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), estimated_resolution, textord_use_cjk_fp_model, textord_tabfind_aligned_gap_fraction, &v_lines, &h_lines, vertical_x, vertical_y); finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block); if (equ_detect_) { equ_detect_->LabelSpecialText(to_block); } BLOBNBOX_CLIST osd_blobs; // osd_orientation is the number of 90 degree rotations to make the // characters upright. (See osdetect.h for precise definition.) // We want the text lines horizontal, (vertical text indicates vertical // textlines) which may conflict (eg vertically written CJK). int osd_orientation = 0; bool vertical_text = textord_tabfind_force_vertical_text || pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; if (!vertical_text && textord_tabfind_vertical_text && PSM_ORIENTATION_ENABLED(pageseg_mode)) { vertical_text = finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, to_block, &osd_blobs); } if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) { GenericVector<int> osd_scripts; if (osd_tess != this) { // We are running osd as part of layout analysis, so constrain the // scripts to those allowed by *this. AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts); for (int s = 0; s < sub_langs_.size(); ++s) { AddAllScriptsConverted(sub_langs_[s]->unicharset, osd_tess->unicharset, &osd_scripts); } } os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess); if (pageseg_mode == PSM_OSD_ONLY) { delete finder; return NULL; } osd_orientation = osr->best_result.orientation_id; double osd_score = osr->orientations[osd_orientation]; double osd_margin = min_orientation_margin * 2; for (int i = 0; i < 4; ++i) { if (i != osd_orientation && osd_score - osr->orientations[i] < osd_margin) { osd_margin = osd_score - osr->orientations[i]; } } int best_script_id = osr->best_result.script_id; const char* best_script_str = osd_tess->unicharset.get_script_from_script_id(best_script_id); bool cjk = best_script_id == osd_tess->unicharset.han_sid() || best_script_id == osd_tess->unicharset.hiragana_sid() || best_script_id == osd_tess->unicharset.katakana_sid() || strcmp("Japanese", best_script_str) == 0 || strcmp("Korean", best_script_str) == 0 || strcmp("Hangul", best_script_str) == 0; if (cjk) { finder->set_cjk_script(true); } if (osd_margin < min_orientation_margin) { // The margin is weak. if (!cjk && !vertical_text && osd_orientation == 2) { // upside down latin text is improbable with such a weak margin. tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { tprintf( "OSD: Weak margin (%.2f) for %d blob text block, " "but using orientation anyway: %d\n", osd_margin, osd_blobs.length(), osd_orientation); } } } osd_blobs.shallow_clear(); finder->CorrectOrientation(to_block, vertical_text, osd_orientation); } return finder; }
/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( bool single_column, bool osd, bool only_osd, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); ASSERT_HOST(pix_binary_ != NULL); if (tessedit_dump_pageseg_images) { pixWrite("tessinput.png", pix_binary_, IFF_PNG); } // Leptonica is used to find the rule/separator lines in the input. LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines); if (tessedit_dump_pageseg_images) pixWrite("tessnolines.png", pix_binary_, IFF_PNG); // Leptonica is used to find a mask of the photo regions in the input. *photo_mask_pix = ImageFind::FindImages(pix_binary_); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); if (single_column) v_lines.clear(); // The rest of the algorithm uses the usual connected components. textord_.find_components(pix_binary_, blocks, to_blocks); TO_BLOCK_IT to_block_it(to_blocks); // There must be exactly one input block. // TODO(rays) handle new textline finding with a UNLV zone file. ASSERT_HOST(to_blocks->singleton()); TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); ColumnFinder* finder = NULL; if (to_block->line_size >= 2) { finder = new ColumnFinder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), source_resolution_, &v_lines, &h_lines, vertical_x, vertical_y); finder->SetupAndFilterNoise(*photo_mask_pix, to_block); if (equ_detect_) { equ_detect_->LabelSpecialText(to_block); } BLOBNBOX_CLIST osd_blobs; // osd_orientation is the number of 90 degree rotations to make the // characters upright. (See osdetect.h for precise definition.) // We want the text lines horizontal, (vertical text indicates vertical // textlines) which may conflict (eg vertically written CJK). int osd_orientation = 0; bool vertical_text = finder->IsVerticallyAlignedText(to_block, &osd_blobs); if (osd && osd_tess != NULL && osr != NULL) { os_detect_blobs(&osd_blobs, osr, osd_tess); if (only_osd) { delete finder; return NULL; } osd_orientation = osr->best_result.orientation_id; double osd_score = osr->orientations[osd_orientation]; double osd_margin = min_orientation_margin * 2; for (int i = 0; i < 4; ++i) { if (i != osd_orientation && osd_score - osr->orientations[i] < osd_margin) { osd_margin = osd_score - osr->orientations[i]; } } if (osd_margin < min_orientation_margin) { // The margin is weak. int best_script_id = osr->best_result.script_id; bool cjk = (best_script_id == osd_tess->unicharset.han_sid()) || (best_script_id == osd_tess->unicharset.hiragana_sid()) || (best_script_id == osd_tess->unicharset.katakana_sid()); if (!cjk && !vertical_text && osd_orientation == 2) { // upside down latin text is improbable with such a weak margin. tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: " "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { tprintf("OSD: Weak margin (%.2f) for %d blob text block, " "but using orientation anyway: %d\n", osd_blobs.length(), osd_margin, osd_orientation); } } } osd_blobs.shallow_clear(); finder->CorrectOrientation(to_block, vertical_text, osd_orientation); } return finder; }
// Auto page segmentation. Divide the page image into blocks of uniform // text linespacing and images. // Width, height and resolution are derived from the input image. // If the pix is non-NULL, then it is assumed to be the input, and it is // copied to the image, otherwise the image is used directly. // The output goes in the blocks list with corresponding TO_BLOCKs in the // to_blocks list. // If single_column is true, then no attempt is made to divide the image // into columns, but multiple blocks are still made if the text is of // non-uniform linespacing. int Tesseract::AutoPageSeg(int width, int height, int resolution, bool single_column, IMAGE* image, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); Boxa* boxa = NULL; Pixa* pixa = NULL; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) { if (textord_debug_images) { Pix* grey_pix = pixCreate(width, height, 8); // Printable images are light grey on white, but for screen display // they are black on dark grey so the other colors show up well. if (textord_debug_printable) { pixSetAll(grey_pix); pixSetMasked(grey_pix, pix_binary_, 192); } else { pixSetAllArbitrary(grey_pix, 64); pixSetMasked(grey_pix, pix_binary_, 0); } AlignedBlob::IncrementDebugPix(); pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG); pixDestroy(&grey_pix); } if (tessedit_dump_pageseg_images) pixWrite("tessinput.png", pix_binary_, IFF_PNG); // Leptonica is used to find the lines and image regions in the input. LineFinder::FindVerticalLines(resolution, pix_binary_, &vertical_x, &vertical_y, &v_lines); LineFinder::FindHorizontalLines(resolution, pix_binary_, &h_lines); if (tessedit_dump_pageseg_images) pixWrite("tessnolines.png", pix_binary_, IFF_PNG); ImageFinder::FindImages(pix_binary_, &boxa, &pixa); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); // Copy the Pix to the IMAGE. image->FromPix(pix_binary_); if (single_column) v_lines.clear(); } #endif TO_BLOCK_LIST land_blocks, port_blocks; TBOX page_box; // The rest of the algorithm uses the usual connected components. find_components(blocks, &land_blocks, &port_blocks, &page_box); TO_BLOCK_IT to_block_it(&port_blocks); ASSERT_HOST(!to_block_it.empty()); for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list(); to_block_it.forward()) { TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); if (to_block->line_size >= 2) { // Note: if there are multiple blocks, then v_lines, boxa, and pixa // are empty on the next iteration, but in this case, we assume // that there aren't any interesting line separators or images, since // it means that we have a pre-defined unlv zone file. ColumnFinder finder(static_cast<int>(to_block->line_size), blkbox.botleft(), blkbox.topright(), &v_lines, &h_lines, vertical_x, vertical_y); if (finder.FindBlocks(height, resolution, single_column, to_block, boxa, pixa, &found_blocks, to_blocks) < 0) return -1; finder.ComputeDeskewVectors(&deskew_, &reskew_); boxa = NULL; pixa = NULL; } } #ifdef HAVE_LIBLEPT boxaDestroy(&boxa); pixaDestroy(&pixa); #endif blocks->clear(); BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. block_it.add_list_after(&found_blocks); if (textord_debug_images) { // The debug image is no longer needed so delete it. unlink(AlignedBlob::textord_debug_pix().string()); } return 0; }