// Finds horizontal line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindHorizontalLines(int resolution, Pix* pix, TabVector_LIST* vectors) { #ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(height, width); int vertical_x, vertical_y; FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); // Coords are in tess format so filp x and y and then covert // to leptonica by height -y. Box* pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 3, 1); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("hlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(vertical_x, vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); // Iterate the vectors to flip them. TabVector_IT h_it(vectors); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { h_it.data()->XYFlip(); } } pixDestroy(&line_pix); #endif }
// Finds vertical line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vertical_x and vertical_y contain a sum of the output vectors, // thereby giving the mean vertical direction. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindVerticalLines(int resolution, Pix* pix, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { #ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(width, height); FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); Box* pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 1, 3); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("vlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(*vertical_x, *vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); } pixDestroy(&line_pix); #endif }
// Helper erases false-positive line segments from the input/output line_pix. // 1. Since thick lines shouldn't really break up, we can eliminate some false // positives by marking segments that are at least kMinThickLineWidth // thickness, yet have a length less than min_thick_length. // 2. Lines that don't have at least 2 intersections with other lines and have // a lot of neighbouring non-lines are probably not lines (perhaps arabic // or Hindi words, or underlines.) // Bad line components are erased from line_pix. // Returns the number of remaining connected components. static int FilterFalsePositives(int resolution, Pix* nonline_pix, Pix* intersection_pix, Pix* line_pix) { int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple); Pixa* pixa = NULL; Boxa* boxa = pixConnComp(line_pix, &pixa, 8); // Iterate over the boxes to remove false positives. int nboxes = boxaGetCount(boxa); int remaining_boxes = nboxes; for (int i = 0; i < nboxes; ++i) { Box* box = boxaGetBox(boxa, i, L_CLONE); l_int32 x, y, box_width, box_height; boxGetGeometry(box, &x, &y, &box_width, &box_height); Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE); int max_width = MaxStrokeWidth(comp_pix); pixDestroy(&comp_pix); bool bad_line = false; // If the length is too short to stand-alone as a line, and the box width // is thick enough, and the stroke width is thick enough it is bad. if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && box_width < min_thick_length && box_height < min_thick_length && max_width > kMinThickLineWidth) { // Too thick for the length. bad_line = true; } if (!bad_line && (intersection_pix == NULL || NumTouchingIntersections(box, intersection_pix) < 2)) { // Test non-line density near the line. int nonline_count = CountPixelsAdjacentToLine(max_width, box, nonline_pix); if (nonline_count > box_height * box_width * kMaxNonLineDensity) bad_line = true; } if (bad_line) { // Not a good line. pixClearInRect(line_pix, box); --remaining_boxes; } boxDestroy(&box); } pixaDestroy(&pixa); boxaDestroy(&boxa); return remaining_boxes; }
// Top-level method to perform splitting based on current settings. // Returns true if a split was actually performed. // split_for_pageseg should be true if the splitting is being done prior to // page segmentation. This mode uses the flag // pageseg_devanagari_split_strategy to determine the splitting strategy. bool ShiroRekhaSplitter::Split(bool split_for_pageseg) { SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_; if (split_strategy == NO_SPLIT) { return false; // Nothing to do. } ASSERT_HOST(split_strategy == MINIMAL_SPLIT || split_strategy == MAXIMAL_SPLIT); ASSERT_HOST(orig_pix_); if (devanagari_split_debuglevel > 0) { tprintf("Splitting shiro-rekha ...\n"); tprintf("Split strategy = %s\n", split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal"); tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no"); } // Create a copy of original image to store the splitting output. pixDestroy(&splitted_image_); splitted_image_ = pixCopy(NULL, orig_pix_); // Initialize debug image if required. if (devanagari_split_debugimage) { pixDestroy(&debug_image_); debug_image_ = pixConvertTo32(orig_pix_); } // Determine all connected components in the input image. A close operation // may be required prior to this, depending on the current settings. Pix* pix_for_ccs = pixClone(orig_pix_); if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) { if (devanagari_split_debuglevel > 0) { tprintf("Performing a global close operation..\n"); } // A global measure is available for xheight, but no local information // exists. pixDestroy(&pix_for_ccs); pix_for_ccs = pixCopy(NULL, orig_pix_); PerformClose(pix_for_ccs, global_xheight_); } Pixa* ccs; Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8); boxaDestroy(&tmp_boxa); pixDestroy(&pix_for_ccs); // Iterate over all connected components. Get their bounding boxes and clip // out the image regions corresponding to these boxes from the original image. // Conditionally run splitting on each of them. Boxa* regions_to_clear = boxaCreate(0); for (int i = 0; i < pixaGetCount(ccs); ++i) { Box* box = ccs->boxa->box[i]; Pix* word_pix = pixClipRectangle(orig_pix_, box, NULL); ASSERT_HOST(word_pix); int xheight = GetXheightForCC(box); if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) { pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0); } // If some xheight measure is available, attempt to pre-eliminate small // blobs from the shiro-rekha process. This is primarily to save the CCs // corresponding to punctuation marks/small dots etc which are part of // larger graphemes. if (xheight == kUnspecifiedXheight || (box->w > xheight / 3 && box->h > xheight / 2)) { SplitWordShiroRekha(split_strategy, word_pix, xheight, box->x, box->y, regions_to_clear); } else if (devanagari_split_debuglevel > 0) { tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, box->w, box->h); } pixDestroy(&word_pix); } // Actually clear the boxes now. for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) { Box* box = boxaGetBox(regions_to_clear, i, L_CLONE); pixClearInRect(splitted_image_, box); boxDestroy(&box); } boxaDestroy(®ions_to_clear); pixaDestroy(&ccs); if (devanagari_split_debugimage) { DumpDebugImage(split_for_pageseg ? "pageseg_split_debug.png" : "ocr_split_debug.png"); } return true; }
// Returns a list of regions (boxes) which should be cleared in the original // image so as to perform shiro-rekha splitting. Pix is assumed to carry one // (or less) word only. Xheight measure could be the global estimate, the row // estimate, or unspecified. If unspecified, over splitting may occur, since a // conservative estimate of stroke width along with an associated multiplier // is used in its place. It is advisable to have a specified xheight when // splitting for classification/training. // A vertical projection histogram of all the on-pixels in the input pix is // computed. The maxima of this histogram is regarded as an approximate location // of the shiro-rekha. By descending on the maxima's peak on both sides, // stroke width of shiro-rekha is estimated. // A horizontal projection histogram is computed for a sub-image of the input // image, which extends from just below the shiro-rekha down to a certain // leeway. The leeway depends on the input xheight, if provided, else a // conservative multiplier on approximate stroke width is used (which may lead // to over-splitting). void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix* pix, int xheight, int word_left, int word_top, Boxa* regions_to_clear) { if (split_strategy == NO_SPLIT) { return; } int width = pixGetWidth(pix); int height = pixGetHeight(pix); // Statistically determine the yextents of the shiro-rekha. int shirorekha_top, shirorekha_bottom, shirorekha_ylevel; GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, &shirorekha_ylevel); // Since the shiro rekha is also a stroke, its width is equal to the stroke // width. int stroke_width = shirorekha_bottom - shirorekha_top + 1; // Some safeguards to protect CCs we do not want to be split. // These are particularly useful when the word wasn't eliminated earlier // because xheight information was unavailable. if (shirorekha_ylevel > height / 2) { // Shirorekha shouldn't be in the bottom half of the word. if (devanagari_split_debuglevel > 0) { tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", word_left, word_top); } return; } if (stroke_width > height / 3) { // Even the boldest of fonts shouldn't do this. if (devanagari_split_debuglevel > 0) { tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", word_left, word_top); } return; } // Clear the ascender and descender regions of the word. // Obtain a vertical projection histogram for the resulting image. Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3); Pix* word_in_xheight = pixCopy(NULL, pix); pixClearInRect(word_in_xheight, box_to_clear); // Also clear any pixels which are below shirorekha_bottom + some leeway. // The leeway is set to xheight if the information is available, else it is a // multiplier applied to the stroke width. int leeway_to_keep = stroke_width * 3; if (xheight != kUnspecifiedXheight) { // This is because the xheight-region typically includes the shiro-rekha // inside it, i.e., the top of the xheight range corresponds to the top of // shiro-rekha. leeway_to_keep = xheight - stroke_width; } box_to_clear->y = shirorekha_bottom + leeway_to_keep; box_to_clear->h = height - box_to_clear->y; pixClearInRect(word_in_xheight, box_to_clear); boxDestroy(&box_to_clear); PixelHistogram vert_hist; vert_hist.ConstructVerticalCountHist(word_in_xheight); pixDestroy(&word_in_xheight); // If the number of black pixel in any column of the image is less than a // fraction of the stroke width, treat it as noise / a stray mark. Perform // these changes inside the vert_hist data itself, as that is used later on as // a bit vector for the final split decision at every column. for (int i = 0; i < width; ++i) { if (vert_hist.hist()[i] <= stroke_width / 4) vert_hist.hist()[i] = 0; else vert_hist.hist()[i] = 1; } // In order to split the line at any point, we make sure that the width of the // gap is atleast half the stroke width. int i = 0; int cur_component_width = 0; while (i < width) { if (!vert_hist.hist()[i]) { int j = 0; while (i + j < width && !vert_hist.hist()[i+j]) ++j; if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) { // Perform a shiro-rekha split. The intervening region lies from i to // i+j-1. // A minimal single-pixel split makes the estimation of intra- and // inter-word spacing easier during page layout analysis, // whereas a maximal split may be needed for OCR, depending on // how the engine was trained. bool minimal_split = (split_strategy == MINIMAL_SPLIT); int split_width = minimal_split ? 1 : j; int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i; if (!minimal_split || (i != 0 && i + j != width)) { Box* box_to_clear = boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3, split_width, 5 * stroke_width / 3); if (box_to_clear) { boxaAddBox(regions_to_clear, box_to_clear, L_CLONE); // Mark this in the debug image if needed. if (devanagari_split_debugimage) { pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128); } boxDestroy(&box_to_clear); cur_component_width = 0; } } } i += j; } else { ++i; ++cur_component_width; } } }
main(int argc, char **argv) { l_int32 i, j, w, h; l_int32 minsum[5] = { 2, 40, 50, 50, 70}; l_int32 skipdist[5] = { 5, 5, 10, 10, 30}; l_int32 delta[5] = { 2, 10, 10, 25, 40}; l_int32 maxbg[5] = {10, 15, 10, 20, 40}; BOX *box1, *box2, *box3, *box4; BOXA *boxa; PIX *pixs, *pixc, *pixt, *pixd, *pix32; PIXA *pixas, *pixad; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* Generate and save 1 bpp masks */ pixas = pixaCreate(0); pixs = pixCreate(300, 250, 1); pixSetAll(pixs); box1 = boxCreate(50, 0, 140, 25); box2 = boxCreate(120, 100, 100, 25); box3 = boxCreate(75, 170, 80, 20); box4 = boxCreate(150, 80, 25, 70); pixClearInRect(pixs, box1); pixaAddPix(pixas, pixs, L_COPY); pixt = pixRotateOrth(pixs, 1); pixaAddPix(pixas, pixt, L_INSERT); pixClearInRect(pixs, box2); pixaAddPix(pixas, pixs, L_COPY); pixt = pixRotateOrth(pixs, 1); pixaAddPix(pixas, pixt, L_INSERT); pixClearInRect(pixs, box3); pixaAddPix(pixas, pixs, L_COPY); pixt = pixRotateOrth(pixs, 1); pixaAddPix(pixas, pixt, L_INSERT); pixClearInRect(pixs, box4); pixaAddPix(pixas, pixs, L_COPY); pixt = pixRotateOrth(pixs, 1); pixaAddPix(pixas, pixt, L_INSERT); boxDestroy(&box1); boxDestroy(&box2); boxDestroy(&box3); boxDestroy(&box4); pixDestroy(&pixs); /* Do 5 splittings on each of the 8 masks */ pixad = pixaCreate(0); for (j = 0; j < 8; j++) { pixt = pixaGetPix(pixas, j, L_CLONE); pixGetDimensions(pixt, &w, &h, NULL); pix32 = pixCreate(w, h, 32); pixSetAll(pix32); pixPaintThroughMask(pix32, pixt, 0, 0, 0xc0c0c000); pixSaveTiled(pix32, pixad, 1, 1, 30, 32); for (i = 0; i < 5; i++) { pixc = pixCopy(NULL, pix32); boxa = pixSplitComponentIntoBoxa(pixt, NULL, minsum[i], skipdist[i], delta[i], maxbg[i], 0, 1); /* boxaWriteStream(stderr, boxa); */ pixd = pixBlendBoxaRandom(pixc, boxa, 0.4); pixRenderBoxaArb(pixd, boxa, 2, 255, 0, 0); pixSaveTiled(pixd, pixad, 1, 0, 30, 32); pixDestroy(&pixd); pixDestroy(&pixc); boxaDestroy(&boxa); } pixDestroy(&pixt); pixDestroy(&pix32); } /* Display results */ pixd = pixaDisplay(pixad, 0, 0); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 0 */ pixDisplayWithTitle(pixd, 100, 100, NULL, rp->display); pixDestroy(&pixd); pixaDestroy(&pixad); /* Put the 8 masks all together, and split 5 ways */ pixad = pixaCreate(0); pixs = pixaDisplayOnLattice(pixas, 325, 325); pixGetDimensions(pixs, &w, &h, NULL); pix32 = pixCreate(w, h, 32); pixSetAll(pix32); pixPaintThroughMask(pix32, pixs, 0, 0, 0xc0c0c000); pixSaveTiled(pix32, pixad, 1, 1, 30, 32); for (i = 0; i < 5; i++) { pixc = pixCopy(NULL, pix32); boxa = pixSplitIntoBoxa(pixs, minsum[i], skipdist[i], delta[i], maxbg[i], 0, 1); /* boxaWriteStream(stderr, boxa); */ pixd = pixBlendBoxaRandom(pixc, boxa, 0.4); pixRenderBoxaArb(pixd, boxa, 2, 255, 0, 0); pixSaveTiled(pixd, pixad, 1, 0, 30, 32); pixDestroy(&pixd); pixDestroy(&pixc); boxaDestroy(&boxa); } pixDestroy(&pix32); pixDestroy(&pixs); /* Display results */ pixd = pixaDisplay(pixad, 0, 0); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 1 */ pixDisplayWithTitle(pixd, 600, 100, NULL, rp->display); pixDestroy(&pixd); pixaDestroy(&pixad); pixaDestroy(&pixas); regTestCleanup(rp); return 0; }