void EquationDetectBase::RenderSpecialText(Pix* pix, BLOBNBOX* blob) { ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr); const TBOX& tbox = blob->bounding_box(); int height = pixGetHeight(pix); const int box_width = 5; // Coordinate translation: tesseract use left bottom as the original, while // leptonica uses left top as the original. Box *box = boxCreate(tbox.left(), height - tbox.top(), tbox.width(), tbox.height()); switch (blob->special_text_type()) { case BSTT_MATH: // Red box. pixRenderBoxArb(pix, box, box_width, 255, 0, 0); break; case BSTT_DIGIT: // cyan box. pixRenderBoxArb(pix, box, box_width, 0, 255, 255); break; case BSTT_ITALIC: // Green box. pixRenderBoxArb(pix, box, box_width, 0, 255, 0); break; case BSTT_UNCLEAR: // blue box. pixRenderBoxArb(pix, box, box_width, 0, 255, 0); break; case BSTT_NONE: default: // yellow box. pixRenderBoxArb(pix, box, box_width, 255, 255, 0); break; } boxDestroy(&box); }
main(int argc, char **argv) { char *filein, *fileout; l_int32 d; BOX *box1, *box2, *box3, *box4; BOXA *boxa; PIX *pixs, *pixt1, *pixt2, *pixt3; PTA *pta; static char mainName[] = "graphicstest"; if (argc != 3) exit(ERROR_INT(" Syntax: graphicstest filein fileout", mainName, 1)); filein = argv[1]; fileout = argv[2]; if ((pixs = pixRead(filein)) == NULL) exit(ERROR_INT(" Syntax: pixs not made", mainName, 1)); d = pixGetDepth(pixs); if (d <= 8) pixt1 = pixConvertTo32(pixs); else pixt1 = pixClone(pixs); /* Paint on RGB */ pixRenderLineArb(pixt1, 450, 20, 850, 320, 5, 200, 50, 125); pixRenderLineArb(pixt1, 30, 40, 440, 40, 5, 100, 200, 25); pixRenderLineBlend(pixt1, 30, 60, 440, 70, 5, 115, 200, 120, 0.3); pixRenderLineBlend(pixt1, 30, 600, 440, 670, 9, 215, 115, 30, 0.5); pixRenderLineBlend(pixt1, 130, 700, 540, 770, 9, 255, 255, 250, 0.4); pixRenderLineBlend(pixt1, 130, 800, 540, 870, 9, 0, 0, 0, 0.4); box1 = boxCreate(70, 80, 300, 245); box2 = boxCreate(470, 180, 150, 205); box3 = boxCreate(520, 220, 160, 220); box4 = boxCreate(570, 260, 160, 220); boxa = boxaCreate(3); boxaAddBox(boxa, box2, L_INSERT); boxaAddBox(boxa, box3, L_INSERT); boxaAddBox(boxa, box4, L_INSERT); pixRenderBoxArb(pixt1, box1, 3, 200, 200, 25); pixRenderBoxaBlend(pixt1, boxa, 17, 200, 200, 25, 0.4, 1); pta = ptaCreate(5); ptaAddPt(pta, 250, 300); ptaAddPt(pta, 350, 450); ptaAddPt(pta, 400, 600); ptaAddPt(pta, 212, 512); ptaAddPt(pta, 180, 375); pixRenderPolylineBlend(pixt1, pta, 17, 25, 200, 200, 0.5, 1, 1); pixWrite(fileout, pixt1, IFF_JFIF_JPEG); pixDisplay(pixt1, 200, 200); pixDestroy(&pixs); pixDestroy(&pixt1); boxDestroy(&box1); boxaDestroy(&boxa); ptaDestroy(&pta); pixDestroy(&pixs); return 0; }
// Refreshes the words in the segmentation block list by using blobs in the // input block list. // The segmentation block list must be set. void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs( C_BLOB_LIST* new_blobs) { // The segmentation block list must have been specified. ASSERT_HOST(segmentation_block_list_); if (devanagari_split_debuglevel > 0) { tprintf("Before refreshing blobs:\n"); PrintSegmentationStats(segmentation_block_list_); tprintf("New Blobs found: %d\n", new_blobs->length()); } C_BLOB_LIST not_found_blobs; RefreshWordBlobsFromNewBlobs(segmentation_block_list_, new_blobs, ((devanagari_split_debugimage && debug_image_) ? ¬_found_blobs : NULL)); if (devanagari_split_debuglevel > 0) { tprintf("After refreshing blobs:\n"); PrintSegmentationStats(segmentation_block_list_); } if (devanagari_split_debugimage && debug_image_) { // Plot out the original blobs for which no match was found in the new // all_blobs list. C_BLOB_IT not_found_it(¬_found_blobs); for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) { C_BLOB* not_found = not_found_it.data(); TBOX not_found_box = not_found->bounding_box(); Box* box_to_plot = GetBoxForTBOX(not_found_box); pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255); boxDestroy(&box_to_plot); } // Plot out the blobs unused from all blobs. C_BLOB_IT all_blobs_it(new_blobs); for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) { C_BLOB* a_blob = all_blobs_it.data(); Box* box_to_plot = GetBoxForTBOX(a_blob->bounding_box()); pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0); boxDestroy(&box_to_plot); } } }
static PIX * DisplayBoxa(BOXA *boxa) { l_int32 w, h; BOX *box; PIX *pix1, *pix2, *pix3; PIXA *pixa; pixa = pixaCreate(2); boxaGetExtent(boxa, &w, &h, &box); pix1 = pixCreate(w, h, 1); pixMaskBoxa(pix1, pix1, boxa, L_SET_PIXELS); pixaAddPix(pixa, pix1, L_INSERT); pix2 = pixCreate(w, h, 32); pixSetAll(pix2); pixRenderBoxaArb(pix2, boxa, 2, 0, 255, 0); pixRenderBoxArb(pix2, box, 3, 255, 0, 0); pixaAddPix(pixa, pix2, L_INSERT); pix3 = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 30, 2); boxDestroy(&box); pixaDestroy(&pixa); return pix3; }
int main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 i, j, w, h, firstpage, npages, nfiles, ncomp; l_int32 index, ival, rval, gval, bval; BOX *box; BOXA *boxa; BOXAA *baa; JBDATA *data; JBCLASSER *classer; NUMA *nai; NUMAA *naa; SARRAY *safiles; PIX *pixs, *pixt1, *pixt2, *pixd; PIXCMAP *cmap; static char mainName[] = "wordsinorder"; if (argc != 3 && argc != 5) return ERROR_INT( " Syntax: wordsinorder dirin rootname [firstpage, npages]", mainName, 1); dirin = argv[1]; rootname = argv[2]; if (argc == 3) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[3]); npages = atoi(argv[4]); } /* Compute the word bounding boxes at 2x reduction, along with * the textlines that they are in. */ safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); baa = boxaaCreate(nfiles); naa = numaaCreate(nfiles); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, 0); if ((pixs = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", mainName, i); continue; } pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT, MAX_WORD_WIDTH, MAX_WORD_HEIGHT, &boxa, &nai); boxaaAddBoxa(baa, boxa, L_INSERT); numaaAddNuma(naa, nai, L_INSERT); #if RENDER_PAGES /* Show the results on a 2x reduced image, where each * word is outlined and the color of the box depends on the * computed textline. */ pixt1 = pixReduceRankBinary2(pixs, 2, NULL); pixGetDimensions(pixt1, &w, &h, NULL); pixd = pixCreate(w, h, 8); cmap = pixcmapCreateRandom(8, 1, 1); /* first color is black */ pixSetColormap(pixd, cmap); pixt2 = pixUnpackBinary(pixt1, 8, 1); pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0); ncomp = boxaGetCount(boxa); for (j = 0; j < ncomp; j++) { box = boxaGetBox(boxa, j, L_CLONE); numaGetIValue(nai, j, &ival); index = 1 + (ival % 254); /* omit black and white */ pixcmapGetColor(cmap, index, &rval, &gval, &bval); pixRenderBoxArb(pixd, box, 2, rval, gval, bval); boxDestroy(&box); } snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pixd, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixs); pixDestroy(&pixd); #endif /* RENDER_PAGES */ } boxaaDestroy(&baa); numaaDestroy(&naa); sarrayDestroy(&safiles); return 0; }
int main(int argc, char **argv) { l_int32 index; l_uint32 val32; BOX *box, *box1, *box2, *box3, *box4, *box5; BOXA *boxa; L_KERNEL *kel; PIX *pixs, *pixg, *pixb, *pixd, *pixt, *pix1, *pix2, *pix3, *pix4; PIXA *pixa; PIXCMAP *cmap; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pixa = pixaCreate(0); /* Color non-white pixels on RGB */ pixs = pixRead("lucasta-frag.jpg"); pixt = pixConvert8To32(pixs); box = boxCreate(120, 30, 200, 200); pixColorGray(pixt, box, L_PAINT_DARK, 220, 0, 0, 255); regTestWritePixAndCheck(rp, pixt, IFF_JFIF_JPEG); /* 0 */ pixaAddPix(pixa, pixt, L_COPY); pixColorGray(pixt, NULL, L_PAINT_DARK, 220, 255, 100, 100); regTestWritePixAndCheck(rp, pixt, IFF_JFIF_JPEG); /* 1 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Color non-white pixels on colormap */ pixt = pixThresholdTo4bpp(pixs, 6, 1); box = boxCreate(120, 30, 200, 200); pixColorGray(pixt, box, L_PAINT_DARK, 220, 0, 0, 255); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 2 */ pixaAddPix(pixa, pixt, L_COPY); pixColorGray(pixt, NULL, L_PAINT_DARK, 220, 255, 100, 100); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 3 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Color non-black pixels on RGB */ pixt = pixConvert8To32(pixs); box = boxCreate(120, 30, 200, 200); pixColorGray(pixt, box, L_PAINT_LIGHT, 20, 0, 0, 255); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 4 */ pixaAddPix(pixa, pixt, L_COPY); pixColorGray(pixt, NULL, L_PAINT_LIGHT, 80, 255, 100, 100); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 5 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Color non-black pixels on colormap */ pixt = pixThresholdTo4bpp(pixs, 6, 1); box = boxCreate(120, 30, 200, 200); pixColorGray(pixt, box, L_PAINT_LIGHT, 20, 0, 0, 255); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 6 */ pixaAddPix(pixa, pixt, L_COPY); pixColorGray(pixt, NULL, L_PAINT_LIGHT, 20, 255, 100, 100); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 7 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Add highlight color to RGB */ pixt = pixConvert8To32(pixs); box = boxCreate(507, 5, 385, 45); pixg = pixClipRectangle(pixs, box, NULL); pixb = pixThresholdToBinary(pixg, 180); pixInvert(pixb, pixb); pixDisplayWrite(pixb, 1); composeRGBPixel(50, 0, 250, &val32); pixPaintThroughMask(pixt, pixb, box->x, box->y, val32); boxDestroy(&box); pixDestroy(&pixg); pixDestroy(&pixb); box = boxCreate(236, 107, 262, 40); pixg = pixClipRectangle(pixs, box, NULL); pixb = pixThresholdToBinary(pixg, 180); pixInvert(pixb, pixb); composeRGBPixel(250, 0, 50, &val32); pixPaintThroughMask(pixt, pixb, box->x, box->y, val32); boxDestroy(&box); pixDestroy(&pixg); pixDestroy(&pixb); box = boxCreate(222, 208, 247, 43); pixg = pixClipRectangle(pixs, box, NULL); pixb = pixThresholdToBinary(pixg, 180); pixInvert(pixb, pixb); composeRGBPixel(60, 250, 60, &val32); pixPaintThroughMask(pixt, pixb, box->x, box->y, val32); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 8 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); pixDestroy(&pixg); pixDestroy(&pixb); /* Add highlight color to colormap */ pixt = pixThresholdTo4bpp(pixs, 5, 1); cmap = pixGetColormap(pixt); pixcmapGetIndex(cmap, 255, 255, 255, &index); box = boxCreate(507, 5, 385, 45); pixSetSelectCmap(pixt, box, index, 50, 0, 250); boxDestroy(&box); box = boxCreate(236, 107, 262, 40); pixSetSelectCmap(pixt, box, index, 250, 0, 50); boxDestroy(&box); box = boxCreate(222, 208, 247, 43); pixSetSelectCmap(pixt, box, index, 60, 250, 60); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 9 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Paint lines on RGB */ pixt = pixConvert8To32(pixs); pixRenderLineArb(pixt, 450, 20, 850, 320, 5, 200, 50, 125); pixRenderLineArb(pixt, 30, 40, 440, 40, 5, 100, 200, 25); box = boxCreate(70, 80, 300, 245); pixRenderBoxArb(pixt, box, 3, 200, 200, 25); regTestWritePixAndCheck(rp, pixt, IFF_JFIF_JPEG); /* 10 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Paint lines on colormap */ pixt = pixThresholdTo4bpp(pixs, 5, 1); pixRenderLineArb(pixt, 450, 20, 850, 320, 5, 200, 50, 125); pixRenderLineArb(pixt, 30, 40, 440, 40, 5, 100, 200, 25); box = boxCreate(70, 80, 300, 245); pixRenderBoxArb(pixt, box, 3, 200, 200, 25); regTestWritePixAndCheck(rp, pixt, IFF_PNG); /* 11 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Blend lines on RGB */ pixt = pixConvert8To32(pixs); pixRenderLineBlend(pixt, 450, 20, 850, 320, 5, 200, 50, 125, 0.35); pixRenderLineBlend(pixt, 30, 40, 440, 40, 5, 100, 200, 25, 0.35); box = boxCreate(70, 80, 300, 245); pixRenderBoxBlend(pixt, box, 3, 200, 200, 25, 0.6); regTestWritePixAndCheck(rp, pixt, IFF_JFIF_JPEG); /* 12 */ pixaAddPix(pixa, pixt, L_INSERT); boxDestroy(&box); /* Colorize gray on cmapped image. */ pix1 = pixRead("lucasta.150.jpg"); pix2 = pixThresholdTo4bpp(pix1, 7, 1); box1 = boxCreate(73, 206, 140, 27); pixColorGrayCmap(pix2, box1, L_PAINT_LIGHT, 130, 207, 43); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 13 */ pixaAddPix(pixa, pix2, L_COPY); if (rp->display) pixPrintStreamInfo(stderr, pix2, "One box added"); box2 = boxCreate(255, 404, 197, 25); pixColorGrayCmap(pix2, box2, L_PAINT_LIGHT, 230, 67, 119); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 14 */ pixaAddPix(pixa, pix2, L_COPY); if (rp->display) pixPrintStreamInfo(stderr, pix2, "Two boxes added"); box3 = boxCreate(122, 756, 224, 22); pixColorGrayCmap(pix2, box3, L_PAINT_DARK, 230, 67, 119); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 15 */ pixaAddPix(pixa, pix2, L_COPY); if (rp->display) pixPrintStreamInfo(stderr, pix2, "Three boxes added"); box4 = boxCreate(11, 780, 147, 22); pixColorGrayCmap(pix2, box4, L_PAINT_LIGHT, 70, 137, 229); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 16 */ pixaAddPix(pixa, pix2, L_COPY); if (rp->display) pixPrintStreamInfo(stderr, pix2, "Four boxes added"); box5 = boxCreate(163, 605, 78, 22); pixColorGrayCmap(pix2, box5, L_PAINT_LIGHT, 70, 137, 229); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 17 */ pixaAddPix(pixa, pix2, L_INSERT); if (rp->display) pixPrintStreamInfo(stderr, pix2, "Five boxes added"); pixDestroy(&pix1); boxDestroy(&box1); boxDestroy(&box2); boxDestroy(&box3); boxDestroy(&box4); boxDestroy(&box5); pixDestroy(&pixs); /* Make a gray image and identify the fg pixels (val > 230) */ pixs = pixRead("feyn-fract.tif"); pix1 = pixConvertTo8(pixs, 0); kel = makeGaussianKernel(2, 2, 1.5, 1.0); pix2 = pixConvolve(pix1, kel, 8, 1); pix3 = pixThresholdToBinary(pix2, 230); boxa = pixConnComp(pix3, NULL, 8); pixDestroy(&pixs); pixDestroy(&pix1); pixDestroy(&pix3); kernelDestroy(&kel); /* Color the individual components in the gray image */ pix4 = pixColorGrayRegions(pix2, boxa, L_PAINT_DARK, 230, 255, 0, 0); regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 18 */ pixaAddPix(pixa, pix4, L_INSERT); pixDisplayWithTitle(pix4, 0, 0, NULL, rp->display); /* Threshold to 10 levels of gray */ pix3 = pixThresholdOn8bpp(pix2, 10, 1); regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 19 */ pixaAddPix(pixa, pix3, L_COPY); /* Color the individual components in the cmapped image */ pix4 = pixColorGrayRegions(pix3, boxa, L_PAINT_DARK, 230, 255, 0, 0); regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 20 */ pixaAddPix(pixa, pix4, L_INSERT); pixDisplayWithTitle(pix4, 0, 100, NULL, rp->display); boxaDestroy(&boxa); /* Color the entire gray image (not component-wise) */ pixColorGray(pix2, NULL, L_PAINT_DARK, 230, 255, 0, 0); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 21 */ pixaAddPix(pixa, pix2, L_INSERT); /* Color the entire cmapped image (not component-wise) */ pixColorGray(pix3, NULL, L_PAINT_DARK, 230, 255, 0, 0); regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 22 */ pixaAddPix(pixa, pix3, L_INSERT); /* Reconstruct cmapped images */ pixd = ReconstructByValue(rp, "weasel2.4c.png"); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 23 */ pixaAddPix(pixa, pixd, L_INSERT); pixd = ReconstructByValue(rp, "weasel4.11c.png"); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 24 */ pixaAddPix(pixa, pixd, L_INSERT); pixd = ReconstructByValue(rp, "weasel8.240c.png"); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 25 */ pixaAddPix(pixa, pixd, L_INSERT); /* Fake reconstruct cmapped images, with one color into a band */ pixd = FakeReconstructByBand(rp, "weasel2.4c.png"); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 26 */ pixaAddPix(pixa, pixd, L_INSERT); pixd = FakeReconstructByBand(rp, "weasel4.11c.png"); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 27 */ pixaAddPix(pixa, pixd, L_INSERT); pixd = FakeReconstructByBand(rp, "weasel8.240c.png"); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 28 */ pixaAddPix(pixa, pixd, L_INSERT); /* If in testing mode, make a pdf */ if (rp->display) { pixaConvertToPdf(pixa, 100, 1.0, L_FLATE_ENCODE, 0, "Colorize and paint", "/tmp/lept/regout/paint.pdf"); L_INFO("Output pdf: /tmp/lept/regout/paint.pdf\n", rp->testname); } pixaDestroy(&pixa); return regTestCleanup(rp); }
// Top-level method to perform splitting based on current settings. // Returns true if a split was actually performed. // split_for_pageseg should be true if the splitting is being done prior to // page segmentation. This mode uses the flag // pageseg_devanagari_split_strategy to determine the splitting strategy. bool ShiroRekhaSplitter::Split(bool split_for_pageseg) { SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_; if (split_strategy == NO_SPLIT) { return false; // Nothing to do. } ASSERT_HOST(split_strategy == MINIMAL_SPLIT || split_strategy == MAXIMAL_SPLIT); ASSERT_HOST(orig_pix_); if (devanagari_split_debuglevel > 0) { tprintf("Splitting shiro-rekha ...\n"); tprintf("Split strategy = %s\n", split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal"); tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no"); } // Create a copy of original image to store the splitting output. pixDestroy(&splitted_image_); splitted_image_ = pixCopy(NULL, orig_pix_); // Initialize debug image if required. if (devanagari_split_debugimage) { pixDestroy(&debug_image_); debug_image_ = pixConvertTo32(orig_pix_); } // Determine all connected components in the input image. A close operation // may be required prior to this, depending on the current settings. Pix* pix_for_ccs = pixClone(orig_pix_); if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) { if (devanagari_split_debuglevel > 0) { tprintf("Performing a global close operation..\n"); } // A global measure is available for xheight, but no local information // exists. pixDestroy(&pix_for_ccs); pix_for_ccs = pixCopy(NULL, orig_pix_); PerformClose(pix_for_ccs, global_xheight_); } Pixa* ccs; Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8); boxaDestroy(&tmp_boxa); pixDestroy(&pix_for_ccs); // Iterate over all connected components. Get their bounding boxes and clip // out the image regions corresponding to these boxes from the original image. // Conditionally run splitting on each of them. Boxa* regions_to_clear = boxaCreate(0); for (int i = 0; i < pixaGetCount(ccs); ++i) { Box* box = ccs->boxa->box[i]; Pix* word_pix = pixClipRectangle(orig_pix_, box, NULL); ASSERT_HOST(word_pix); int xheight = GetXheightForCC(box); if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) { pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0); } // If some xheight measure is available, attempt to pre-eliminate small // blobs from the shiro-rekha process. This is primarily to save the CCs // corresponding to punctuation marks/small dots etc which are part of // larger graphemes. if (xheight == kUnspecifiedXheight || (box->w > xheight / 3 && box->h > xheight / 2)) { SplitWordShiroRekha(split_strategy, word_pix, xheight, box->x, box->y, regions_to_clear); } else if (devanagari_split_debuglevel > 0) { tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, box->w, box->h); } pixDestroy(&word_pix); } // Actually clear the boxes now. for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) { Box* box = boxaGetBox(regions_to_clear, i, L_CLONE); pixClearInRect(splitted_image_, box); boxDestroy(&box); } boxaDestroy(®ions_to_clear); pixaDestroy(&ccs); if (devanagari_split_debugimage) { DumpDebugImage(split_for_pageseg ? "pageseg_split_debug.png" : "ocr_split_debug.png"); } return true; }
// Returns a list of regions (boxes) which should be cleared in the original // image so as to perform shiro-rekha splitting. Pix is assumed to carry one // (or less) word only. Xheight measure could be the global estimate, the row // estimate, or unspecified. If unspecified, over splitting may occur, since a // conservative estimate of stroke width along with an associated multiplier // is used in its place. It is advisable to have a specified xheight when // splitting for classification/training. // A vertical projection histogram of all the on-pixels in the input pix is // computed. The maxima of this histogram is regarded as an approximate location // of the shiro-rekha. By descending on the maxima's peak on both sides, // stroke width of shiro-rekha is estimated. // A horizontal projection histogram is computed for a sub-image of the input // image, which extends from just below the shiro-rekha down to a certain // leeway. The leeway depends on the input xheight, if provided, else a // conservative multiplier on approximate stroke width is used (which may lead // to over-splitting). void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix* pix, int xheight, int word_left, int word_top, Boxa* regions_to_clear) { if (split_strategy == NO_SPLIT) { return; } int width = pixGetWidth(pix); int height = pixGetHeight(pix); // Statistically determine the yextents of the shiro-rekha. int shirorekha_top, shirorekha_bottom, shirorekha_ylevel; GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, &shirorekha_ylevel); // Since the shiro rekha is also a stroke, its width is equal to the stroke // width. int stroke_width = shirorekha_bottom - shirorekha_top + 1; // Some safeguards to protect CCs we do not want to be split. // These are particularly useful when the word wasn't eliminated earlier // because xheight information was unavailable. if (shirorekha_ylevel > height / 2) { // Shirorekha shouldn't be in the bottom half of the word. if (devanagari_split_debuglevel > 0) { tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", word_left, word_top); } return; } if (stroke_width > height / 3) { // Even the boldest of fonts shouldn't do this. if (devanagari_split_debuglevel > 0) { tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", word_left, word_top); } return; } // Clear the ascender and descender regions of the word. // Obtain a vertical projection histogram for the resulting image. Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3); Pix* word_in_xheight = pixCopy(NULL, pix); pixClearInRect(word_in_xheight, box_to_clear); // Also clear any pixels which are below shirorekha_bottom + some leeway. // The leeway is set to xheight if the information is available, else it is a // multiplier applied to the stroke width. int leeway_to_keep = stroke_width * 3; if (xheight != kUnspecifiedXheight) { // This is because the xheight-region typically includes the shiro-rekha // inside it, i.e., the top of the xheight range corresponds to the top of // shiro-rekha. leeway_to_keep = xheight - stroke_width; } box_to_clear->y = shirorekha_bottom + leeway_to_keep; box_to_clear->h = height - box_to_clear->y; pixClearInRect(word_in_xheight, box_to_clear); boxDestroy(&box_to_clear); PixelHistogram vert_hist; vert_hist.ConstructVerticalCountHist(word_in_xheight); pixDestroy(&word_in_xheight); // If the number of black pixel in any column of the image is less than a // fraction of the stroke width, treat it as noise / a stray mark. Perform // these changes inside the vert_hist data itself, as that is used later on as // a bit vector for the final split decision at every column. for (int i = 0; i < width; ++i) { if (vert_hist.hist()[i] <= stroke_width / 4) vert_hist.hist()[i] = 0; else vert_hist.hist()[i] = 1; } // In order to split the line at any point, we make sure that the width of the // gap is atleast half the stroke width. int i = 0; int cur_component_width = 0; while (i < width) { if (!vert_hist.hist()[i]) { int j = 0; while (i + j < width && !vert_hist.hist()[i+j]) ++j; if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) { // Perform a shiro-rekha split. The intervening region lies from i to // i+j-1. // A minimal single-pixel split makes the estimation of intra- and // inter-word spacing easier during page layout analysis, // whereas a maximal split may be needed for OCR, depending on // how the engine was trained. bool minimal_split = (split_strategy == MINIMAL_SPLIT); int split_width = minimal_split ? 1 : j; int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i; if (!minimal_split || (i != 0 && i + j != width)) { Box* box_to_clear = boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3, split_width, 5 * stroke_width / 3); if (box_to_clear) { boxaAddBox(regions_to_clear, box_to_clear, L_CLONE); // Mark this in the debug image if needed. if (devanagari_split_debugimage) { pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128); } boxDestroy(&box_to_clear); cur_component_width = 0; } } } i += j; } else { ++i; ++cur_component_width; } } }
/*! * pixFindPageForeground() * * Input: pixs (full resolution (any type or depth) * threshold (for binarization; typically about 128) * mindist (min distance of text from border to allow * cleaning near border; at 2x reduction, this * should be larger than 50; typically about 70) * erasedist (when conditions are satisfied, erase anything * within this distance of the edge; * typically 30 at 2x reduction) * pagenum (use for debugging when called repeatedly; labels * debug images that are assembled into pdfdir) * showmorph (set to a negative integer to show steps in * generating masks; this is typically used * for debugging region extraction) * display (set to 1 to display mask and selected region * for debugging a single page) * pdfdir (subdirectory of /tmp where images showing the * result are placed when called repeatedly; use * null if no output requested) * Return: box (region including foreground, with some pixel noise * removed), or null if not found * * Notes: * (1) This doesn't simply crop to the fg. It attempts to remove * pixel noise and junk at the edge of the image before cropping. * The input @threshold is used if pixs is not 1 bpp. * (2) There are several debugging options, determined by the * last 4 arguments. * (3) If you want pdf output of results when called repeatedly, * the pagenum arg labels the images written, which go into * /tmp/<pdfdir>/<pagenum>.png. In that case, * you would clean out the /tmp directory before calling this * function on each page: * lept_rmdir(pdfdir); * lept_mkdir(pdfdir); */ BOX * pixFindPageForeground(PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 pagenum, l_int32 showmorph, l_int32 display, const char *pdfdir) { char buf[64]; l_int32 flag, nbox, intersects; l_int32 w, h, bx, by, bw, bh, left, right, top, bottom; PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2; BOX *box, *boxfg, *boxin, *boxd; BOXA *ba1, *ba2; PROCNAME("pixFindPageForeground"); if (!pixs) return (BOX *)ERROR_PTR("pixs not defined", procName, NULL); /* Binarize, downscale by 0.5, remove the noise to generate a seed, * and do a seedfill back from the seed into those 8-connected * components of the binarized image for which there was at least * one seed pixel. Also clear out any components that are within * 10 pixels of the edge at 2x reduction. */ flag = (showmorph) ? -1 : 0; /* if showmorph == -1, write intermediate * images to /tmp/seq_output_1.pdf */ pixb = pixConvertTo1(pixs, threshold); pixb2 = pixScale(pixb, 0.5, 0.5); pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.5", flag); pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8); pixSetOrClearBorder(pixsf, 10, 10, 10, 10, PIX_SET); pixm = pixRemoveBorderConnComps(pixsf, 8); if (display) pixDisplay(pixm, 100, 100); /* Now, where is the main block of text? We want to remove noise near * the edge of the image, but to do that, we have to be convinced that * (1) there is noise and (2) it is far enough from the text block * and close enough to the edge. For each edge, if the block * is more than mindist from that edge, then clean 'erasedist' * pixels from the edge. */ pix1 = pixMorphSequence(pixm, "c50.50", flag - 1); ba1 = pixConnComp(pix1, NULL, 8); ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); pixGetDimensions(pix1, &w, &h, NULL); nbox = boxaGetCount(ba2); if (nbox > 1) { box = boxaGetBox(ba2, 0, L_CLONE); boxGetGeometry(box, &bx, &by, &bw, &bh); left = (bx > mindist) ? erasedist : 0; right = (w - bx - bw > mindist) ? erasedist : 0; top = (by > mindist) ? erasedist : 0; bottom = (h - by - bh > mindist) ? erasedist : 0; pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR); boxDestroy(&box); } pixDestroy(&pix1); boxaDestroy(&ba1); boxaDestroy(&ba2); /* Locate the foreground region; don't bother cropping */ pixClipToForeground(pixm, NULL, &boxfg); /* Sanity check the fg region. Make sure it's not confined * to a thin boundary on the left and right sides of the image, * in which case it is likely to be noise. */ if (boxfg) { boxin = boxCreate(0.1 * w, 0, 0.8 * w, h); boxIntersects(boxfg, boxin, &intersects); if (!intersects) { L_INFO("found only noise on page %d\n", procName, pagenum); boxDestroy(&boxfg); } boxDestroy(&boxin); } boxd = NULL; if (!boxfg) { L_INFO("no fg region found for page %d\n", procName, pagenum); } else { boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */ boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0); /* Write image showing box for this page. This is to be * bundled up into a pdf of all the pages, which can be * generated by convertFilesToPdf() */ if (pdfdir) { pixg2 = pixConvert1To4Cmap(pixb); pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0); snprintf(buf, sizeof(buf), "/tmp/%s/%05d.png", pdfdir, pagenum); if (display) pixDisplay(pixg2, 700, 100); pixWrite(buf, pixg2, IFF_PNG); pixDestroy(&pixg2); } } pixDestroy(&pixb); pixDestroy(&pixb2); pixDestroy(&pixseed); pixDestroy(&pixsf); pixDestroy(&pixm); boxDestroy(&boxfg); return boxd; }
/*! * \brief boxaDisplayTiled() * * \param[in] boxas * \param[in] pixa [optional] background for each box * \param[in] first index of first box * \param[in] last index of last box; use -1 to go to end * \param[in] maxwidth of output image * \param[in] linewidth width of box outlines, before scaling * \param[in] scalefactor applied to every box; use 1.0 for no scaling * \param[in] background 0 for white, 1 for black; this is the color * of the spacing between the images * \param[in] spacing between images, and on outside * \param[in] border width of black border added to each image; * use 0 for no border * \return pixd of tiled images of boxes, or NULL on error * * <pre> * Notes: * (1) Displays each box separately in a tiled 32 bpp image. * (2) If pixa is defined, it must have the same count as the boxa, * and it will be a background over with each box is rendered. * If pixa is not defined, the boxes will be rendered over * blank images of identical size. * (3) See pixaDisplayTiledInRows() for other parameters. * </pre> */ PIX * boxaDisplayTiled(BOXA *boxas, PIXA *pixa, l_int32 first, l_int32 last, l_int32 maxwidth, l_int32 linewidth, l_float32 scalefactor, l_int32 background, l_int32 spacing, l_int32 border) { char buf[32]; l_int32 i, n, npix, w, h, fontsize; L_BMF *bmf; BOX *box; BOXA *boxa; PIX *pix1, *pix2, *pixd; PIXA *pixat; PROCNAME("boxaDisplayTiled"); if (!boxas) return (PIX *)ERROR_PTR("boxas not defined", procName, NULL); boxa = boxaSaveValid(boxas, L_COPY); n = boxaGetCount(boxa); if (pixa) { npix = pixaGetCount(pixa); if (n != npix) { boxaDestroy(&boxa); return (PIX *)ERROR_PTR("boxa and pixa counts differ", procName, NULL); } } first = L_MAX(0, first); if (last < 0) last = n - 1; if (first >= n) { boxaDestroy(&boxa); return (PIX *)ERROR_PTR("invalid first", procName, NULL); } if (last >= n) { L_WARNING("last = %d is beyond max index = %d; adjusting\n", procName, last, n - 1); last = n - 1; } if (first > last) { boxaDestroy(&boxa); return (PIX *)ERROR_PTR("first > last", procName, NULL); } /* Because the bitmap font will be reduced when tiled, choose the * font size inversely with the scale factor. */ if (scalefactor > 0.8) fontsize = 6; else if (scalefactor > 0.6) fontsize = 10; else if (scalefactor > 0.4) fontsize = 14; else if (scalefactor > 0.3) fontsize = 18; else fontsize = 20; bmf = bmfCreate(NULL, fontsize); pixat = pixaCreate(n); boxaGetExtent(boxa, &w, &h, NULL); for (i = first; i <= last; i++) { box = boxaGetBox(boxa, i, L_CLONE); if (!pixa) { pix1 = pixCreate(w, h, 32); pixSetAll(pix1); } else { pix1 = pixaGetPix(pixa, i, L_COPY); } pixSetBorderVal(pix1, 0, 0, 0, 2, 0x0000ff00); snprintf(buf, sizeof(buf), "%d", i); pix2 = pixAddSingleTextblock(pix1, bmf, buf, 0x00ff0000, L_ADD_BELOW, NULL); pixDestroy(&pix1); pixRenderBoxArb(pix2, box, linewidth, 255, 0, 0); pixaAddPix(pixat, pix2, L_INSERT); boxDestroy(&box); } bmfDestroy(&bmf); boxaDestroy(&boxa); pixd = pixaDisplayTiledInRows(pixat, 32, maxwidth, scalefactor, background, spacing, border); pixaDestroy(&pixat); return pixd; }