// Helper to remove an enclosing circle from an image. // If there isn't one, then the image will most likely get badly mangled. // The returned pix must be pixDestroyed after use. NULL may be returned // if the image doesn't meet the trivial conditions that it uses to determine // success. static Pix* RemoveEnclosingCircle(Pix* pixs) { Pix* pixsi = pixInvert(NULL, pixs); Pix* pixc = pixCreateTemplate(pixs); pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET); pixSeedfillBinary(pixc, pixc, pixsi, 4); pixInvert(pixc, pixc); pixDestroy(&pixsi); Pix* pixt = pixAnd(NULL, pixs, pixc); l_int32 max_count; pixCountConnComp(pixt, 8, &max_count); // The count has to go up before we start looking for the minimum. l_int32 min_count = MAX_INT32; Pix* pixout = NULL; for (int i = 1; i < kMaxCircleErosions; i++) { pixDestroy(&pixt); pixErodeBrick(pixc, pixc, 3, 3); pixt = pixAnd(NULL, pixs, pixc); l_int32 count; pixCountConnComp(pixt, 8, &count); if (i == 1 || count > max_count) { max_count = count; min_count = count; } else if (i > 1 && count < min_count) { min_count = count; pixDestroy(&pixout); pixout = pixCopy(NULL, pixt); // Save the best. } else if (count >= min_count) { break; // We have passed by the best. } } pixDestroy(&pixt); pixDestroy(&pixc); return pixout; }
/*! * pixaClipToPix() * * Input: pixas * pixs * Return: pixad, or null on error * * Notes: * (1) This is intended for use in situations where pixas * was originally generated from the input pixs. * (2) Returns a pixad where each pix in pixas is ANDed * with its associated region of the input pixs. This * region is specified by the the box that is associated * with the pix. * (3) In a typical application of this function, pixas has * a set of region masks, so this generates a pixa of * the parts of pixs that correspond to each region * mask component, along with the bounding box for * the region. */ PIXA * pixaClipToPix(PIXA *pixas, PIX *pixs) { l_int32 i, n; BOX *box; PIX *pix, *pixc; PIXA *pixad; PROCNAME("pixaClipToPix"); if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); if (!pixs) return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); n = pixaGetCount(pixas); if ((pixad = pixaCreate(n)) == NULL) return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); for (i = 0; i < n; i++) { pix = pixaGetPix(pixas, i, L_CLONE); box = pixaGetBox(pixas, i, L_COPY); pixc = pixClipRectangle(pixs, box, NULL); pixAnd(pixc, pixc, pix); pixaAddPix(pixad, pixc, L_INSERT); pixaAddBox(pixad, box, L_INSERT); pixDestroy(&pix); } return pixad; }
l_int32 main(int argc, char **argv) { l_int32 irval, igval, ibval; l_float32 rval, gval, bval, fract, fgfract; L_BMF *bmf; BOX *box; BOXA *boxa; FPIX *fpix; PIX *pixs, *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7; PIX *pix8, *pix9, *pix10, *pix11, *pix12, *pix13, *pix14, *pix15; PIXA *pixa; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pixa = pixaCreate(0); pixs = pixRead("breviar38.150.jpg"); /* pixs = pixRead("breviar32.150.jpg"); */ pixaAddPix(pixa, pixs, L_CLONE); regTestWritePixAndCheck(rp, pixs, IFF_JFIF_JPEG); /* 0 */ pixDisplayWithTitle(pixs, 0, 0, "Input image", rp->display); /* Extract the blue component, which is small in all the text * regions, including in the highlight color region */ pix1 = pixGetRGBComponent(pixs, COLOR_BLUE); pixaAddPix(pixa, pix1, L_CLONE); regTestWritePixAndCheck(rp, pix1, IFF_JFIF_JPEG); /* 1 */ pixDisplayWithTitle(pix1, 200, 0, "Blue component", rp->display); /* Do a background normalization, with the background set to * approximately 200 */ pix2 = pixBackgroundNormSimple(pix1, NULL, NULL); pixaAddPix(pixa, pix2, L_COPY); regTestWritePixAndCheck(rp, pix2, IFF_JFIF_JPEG); /* 2 */ pixDisplayWithTitle(pix2, 400, 0, "BG normalized to 200", rp->display); /* Do a linear transform on the gray pixels, with 50 going to * black and 160 going to white. 50 is sufficiently low to * make both the red and black print quite dark. Quantize * to a few equally spaced gray levels. This is the image * to which highlight color will be applied. */ pixGammaTRC(pix2, pix2, 1.0, 50, 160); pix3 = pixThresholdOn8bpp(pix2, 7, 1); pixaAddPix(pixa, pix3, L_CLONE); regTestWritePixAndCheck(rp, pix3, IFF_JFIF_JPEG); /* 3 */ pixDisplayWithTitle(pix3, 600, 0, "Basic quantized with white bg", rp->display); /* Identify the regions of red text. First, make a mask * consisting of all pixels such that (R-B)/B is larger * than 2.0. This will have all the red, plus a lot of * the dark pixels. */ fpix = pixComponentFunction(pixs, 1.0, 0.0, -1.0, 0.0, 0.0, 1.0); pix4 = fpixThresholdToPix(fpix, 2.0); pixInvert(pix4, pix4); /* red plus some dark text */ pixaAddPix(pixa, pix4, L_CLONE); regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 4 */ pixDisplayWithTitle(pix4, 800, 0, "Red plus dark pixels", rp->display); /* Make a mask consisting of all the red and background pixels */ pix5 = pixGetRGBComponent(pixs, COLOR_RED); pix6 = pixThresholdToBinary(pix5, 128); pixInvert(pix6, pix6); /* red plus background (white) */ /* Intersect the two masks to get a mask consisting of pixels * that are almost certainly red. This is the seed. */ pix7 = pixAnd(NULL, pix4, pix6); /* red only (seed) */ pixaAddPix(pixa, pix7, L_COPY); regTestWritePixAndCheck(rp, pix7, IFF_PNG); /* 5 */ pixDisplayWithTitle(pix7, 0, 600, "Seed for red color", rp->display); /* Make the clipping mask by thresholding the image with * the background cleaned to white. */ pix8 = pixThresholdToBinary(pix2, 230); /* mask */ pixaAddPix(pixa, pix8, L_CLONE); regTestWritePixAndCheck(rp, pix8, IFF_PNG); /* 6 */ pixDisplayWithTitle(pix8, 200, 600, "Clipping mask for red components", rp->display); /* Fill into the mask from the seed */ pixSeedfillBinary(pix7, pix7, pix8, 8); /* filled: red plus touching */ regTestWritePixAndCheck(rp, pix7, IFF_PNG); /* 7 */ pixDisplayWithTitle(pix7, 400, 600, "Red component mask filled", rp->display); /* Remove long horizontal and vertical lines from the filled result */ pix9 = pixMorphSequence(pix7, "o40.1", 0); pixSubtract(pix7, pix7, pix9); /* remove long horizontal lines */ pixDestroy(&pix9); pix9 = pixMorphSequence(pix7, "o1.40", 0); pixSubtract(pix7, pix7, pix9); /* remove long vertical lines */ /* Close the regions to be colored */ pix10 = pixMorphSequence(pix7, "c5.1", 0); pixaAddPix(pixa, pix10, L_CLONE); regTestWritePixAndCheck(rp, pix10, IFF_PNG); /* 8 */ pixDisplayWithTitle(pix10, 600, 600, "Components defining regions allowing coloring", rp->display); /* Sanity check on amount to be colored. Only accept images * with less than 10% of all the pixels with highlight color */ pixForegroundFraction(pix10, &fgfract); if (fgfract >= 0.10) { L_INFO("too much highlighting: fract = %6.3f; removing it\n", rp->testname, fgfract); pixClearAll(pix10); pixSetPixel(pix10, 0, 0, 1); } /* Get the bounding boxes of the regions to be colored */ boxa = pixConnCompBB(pix10, 8); /* Get a color to paint that is representative of the * actual highlight color in the image. Scale each * color component up from the average by an amount necessary * to saturate the red. Then divide the green and * blue components by 2.0. */ pixGetAverageMaskedRGB(pixs, pix7, 0, 0, 1, L_MEAN_ABSVAL, &rval, &gval, &bval); fract = 255.0 / rval; irval = lept_roundftoi(fract * rval); igval = lept_roundftoi(fract * gval / 2.0); ibval = lept_roundftoi(fract * bval / 2.0); fprintf(stderr, "(r,g,b) = (%d,%d,%d)\n", irval, igval, ibval); /* Color the quantized gray version in the selected regions */ pix11 = pixColorGrayRegions(pix3, boxa, L_PAINT_DARK, 220, irval, igval, ibval); pixaAddPix(pixa, pix11, L_CLONE); regTestWritePixAndCheck(rp, pix11, IFF_PNG); /* 9 */ pixDisplayWithTitle(pix11, 800, 600, "Final colored result", rp->display); pixaAddPix(pixa, pixs, L_CLONE); /* Test colorization on gray and cmapped gray */ pix12 = pixColorGrayRegions(pix2, boxa, L_PAINT_DARK, 220, 0, 255, 0); pixaAddPix(pixa, pix12, L_CLONE); regTestWritePixAndCheck(rp, pix12, IFF_PNG); /* 10 */ pixDisplayWithTitle(pix12, 900, 600, "Colorizing boxa gray", rp->display); box = boxCreate(200, 200, 250, 350); pix13 = pixCopy(NULL, pix2); pixColorGray(pix13, box, L_PAINT_DARK, 220, 0, 0, 255); pixaAddPix(pixa, pix13, L_CLONE); regTestWritePixAndCheck(rp, pix13, IFF_PNG); /* 11 */ pixDisplayWithTitle(pix13, 1000, 600, "Colorizing box gray", rp->display); pix14 = pixThresholdTo4bpp(pix2, 6, 1); pix15 = pixColorGrayRegions(pix14, boxa, L_PAINT_DARK, 220, 0, 0, 255); pixaAddPix(pixa, pix15, L_CLONE); regTestWritePixAndCheck(rp, pix15, IFF_PNG); /* 12 */ pixDisplayWithTitle(pix15, 1100, 600, "Colorizing boxa cmap", rp->display); pixColorGrayCmap(pix14, box, L_PAINT_DARK, 0, 255, 255); pixaAddPix(pixa, pix14, L_CLONE); regTestWritePixAndCheck(rp, pix14, IFF_PNG); /* 13 */ pixDisplayWithTitle(pix14, 1200, 600, "Colorizing box cmap", rp->display); boxDestroy(&box); /* Generate a pdf of the intermediate results */ lept_mkdir("lept"); L_INFO("Writing to /tmp/lept/colorize.pdf\n", rp->testname); pixaConvertToPdf(pixa, 90, 1.0, 0, 0, "Colorizing highlighted text", "/tmp/lept/colorize.pdf"); pixaDestroy(&pixa); fpixDestroy(&fpix); boxDestroy(&box); boxaDestroy(&boxa); pixDestroy(&pixs); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); pixDestroy(&pix6); pixDestroy(&pix7); pixDestroy(&pix8); pixDestroy(&pix9); pixDestroy(&pix10); pixDestroy(&pix11); pixDestroy(&pix12); pixDestroy(&pix13); pixDestroy(&pix14); pixDestroy(&pix15); /* Test the color detector */ pixa = pixaCreate(7); bmf = bmfCreate("./fonts", 4); pix1 = TestForRedColor(rp, "brev06.75.jpg", 1, bmf); /* 14 */ pixaAddPix(pixa, pix1, L_INSERT); pix1 = TestForRedColor(rp, "brev10.75.jpg", 0, bmf); /* 15 */ pixaAddPix(pixa, pix1, L_INSERT); pix1 = TestForRedColor(rp, "brev14.75.jpg", 1, bmf); /* 16 */ pixaAddPix(pixa, pix1, L_INSERT); pix1 = TestForRedColor(rp, "brev20.75.jpg", 1, bmf); /* 17 */ pixaAddPix(pixa, pix1, L_INSERT); pix1 = TestForRedColor(rp, "brev36.75.jpg", 0, bmf); /* 18 */ pixaAddPix(pixa, pix1, L_INSERT); pix1 = TestForRedColor(rp, "brev53.75.jpg", 1, bmf); /* 19 */ pixaAddPix(pixa, pix1, L_INSERT); pix1 = TestForRedColor(rp, "brev56.75.jpg", 1, bmf); /* 20 */ pixaAddPix(pixa, pix1, L_INSERT); /* Generate a pdf of the color detector results */ L_INFO("Writing to /tmp/lept/colordetect.pdf\n", rp->testname); pixaConvertToPdf(pixa, 45, 1.0, 0, 0, "Color detection", "/tmp/lept/colordetect.pdf"); pixaDestroy(&pixa); bmfDestroy(&bmf); return regTestCleanup(rp); }
* that might be used * * -------------------------------------------------------------------- */ #if 0 pixd = pixCreateTemplate(pixs); pixd = pixDilate(NULL, pixs, sel); pixd = pixErode(NULL, pixs, sel); pixd = pixOpen(NULL, pixs, sel); pixd = pixClose(NULL, pixs, sel); pixDilate(pixd, pixs, sel); pixErode(pixd, pixs, sel); pixOpen(pixd, pixs, sel); pixClose(pixd, pixs, sel); pixAnd(pixd, pixd, pixs); pixOr(pixd, pixd, pixs); pixXor(pixd, pixd, pixs); pixSubtract(pixd, pixd, pixs); pixInvert(pixd, pixs); pixd = pixAnd(NULL, pixd, pixs); pixd = pixOr(NULL, pixd, pixs); pixd = pixXor(NULL, pixd, pixs); pixd = pixSubtract(NULL, pixd, pixs); pixd = pixInvert(NULL, pixs); pixInvert(pixs, pixs); #endif /* 0 */
l_int32 DoPageSegmentation(PIX *pixs, /* should be at least 300 ppi */ l_int32 which) /* 1, 2, 3, 4 */ { char buf[256]; l_int32 zero; BOXA *boxatm, *boxahm; PIX *pixr; /* image reduced to 150 ppi */ PIX *pixhs; /* image of halftone seed, 150 ppi */ PIX *pixm; /* image of mask of components, 150 ppi */ PIX *pixhm1; /* image of halftone mask, 150 ppi */ PIX *pixhm2; /* image of halftone mask, 300 ppi */ PIX *pixht; /* image of halftone components, 150 ppi */ PIX *pixnht; /* image without halftone components, 150 ppi */ PIX *pixi; /* inverted image, 150 ppi */ PIX *pixvws; /* image of vertical whitespace, 150 ppi */ PIX *pixm1; /* image of closed textlines, 150 ppi */ PIX *pixm2; /* image of refined text line mask, 150 ppi */ PIX *pixm3; /* image of refined text line mask, 300 ppi */ PIX *pixb1; /* image of text block mask, 150 ppi */ PIX *pixb2; /* image of text block mask, 300 ppi */ PIX *pixnon; /* image of non-text or halftone, 150 ppi */ PIX *pix1, *pix2, *pix3, *pix4; PIXA *pixa; PIXCMAP *cmap; PTAA *ptaa; l_int32 ht_flag = 0; l_int32 ws_flag = 0; l_int32 text_flag = 0; l_int32 block_flag = 0; PROCNAME("DoPageSegmentation"); if (which == 1) ht_flag = 1; else if (which == 2) ws_flag = 1; else if (which == 3) text_flag = 1; else if (which == 4) block_flag = 1; else return ERROR_INT("invalid parameter: not in [1...4]", procName, 1); pixa = pixaCreate(0); lept_mkdir("lept/livre"); /* Reduce to 150 ppi */ pix1 = pixScaleToGray2(pixs); if (ws_flag || ht_flag || block_flag) pixaAddPix(pixa, pix1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/orig.gray.150.png", pix1, IFF_PNG); pixDestroy(&pix1); pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); /* Get seed for halftone parts */ pix1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0); pix2 = pixOpenBrick(NULL, pix1, 5, 5); pixhs = pixExpandBinaryPower2(pix2, 8); if (ht_flag) pixaAddPix(pixa, pixhs, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/htseed.150.png", pixhs, IFF_PNG); pixDestroy(&pix1); pixDestroy(&pix2); /* Get mask for connected regions */ pixm = pixCloseSafeBrick(NULL, pixr, 4, 4); if (ht_flag) pixaAddPix(pixa, pixm, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/ccmask.150.png", pixm, IFF_PNG); /* Fill seed into mask to get halftone mask */ pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4); if (ht_flag) pixaAddPix(pixa, pixhm1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/htmask.150.png", pixhm1, IFF_PNG); pixhm2 = pixExpandBinaryPower2(pixhm1, 2); /* Extract halftone stuff */ pixht = pixAnd(NULL, pixhm1, pixr); if (which == 1) pixWrite("/tmp/lept/livre/ht.150.png", pixht, IFF_PNG); /* Extract non-halftone stuff */ pixnht = pixXor(NULL, pixht, pixr); if (text_flag) pixaAddPix(pixa, pixnht, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/text.150.png", pixnht, IFF_PNG); pixZero(pixht, &zero); if (zero) fprintf(stderr, "No halftone parts found\n"); else fprintf(stderr, "Halftone parts found\n"); /* Get bit-inverted image */ pixi = pixInvert(NULL, pixnht); if (ws_flag) pixaAddPix(pixa, pixi, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/invert.150.png", pixi, IFF_PNG); /* The whitespace mask will break textlines where there * is a large amount of white space below or above. * We can prevent this by identifying regions of the * inverted image that have large horizontal (bigger than * the separation between columns) and significant * vertical extent (bigger than the separation between * textlines), and subtracting this from the whitespace mask. */ pix1 = pixMorphCompSequence(pixi, "o80.60", 0); pix2 = pixSubtract(NULL, pixi, pix1); if (ws_flag) pixaAddPix(pixa, pix2, L_COPY); pixDestroy(&pix1); /* Identify vertical whitespace by opening inverted image */ pix3 = pixOpenBrick(NULL, pix2, 5, 1); /* removes thin vertical lines */ pixvws = pixOpenBrick(NULL, pix3, 1, 200); /* gets long vertical lines */ if (text_flag || ws_flag) pixaAddPix(pixa, pixvws, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/vertws.150.png", pixvws, IFF_PNG); pixDestroy(&pix2); pixDestroy(&pix3); /* Get proto (early processed) text line mask. */ /* First close the characters and words in the textlines */ pixm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1); if (text_flag) pixaAddPix(pixa, pixm1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textmask1.150.png", pixm1, IFF_PNG); /* Next open back up the vertical whitespace corridors */ pixm2 = pixSubtract(NULL, pixm1, pixvws); if (which == 1) pixWrite("/tmp/lept/livre/textmask2.150.png", pixm2, IFF_PNG); /* Do a small opening to remove noise */ pixOpenBrick(pixm2, pixm2, 3, 3); if (text_flag) pixaAddPix(pixa, pixm2, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textmask3.150.png", pixm2, IFF_PNG); pixm3 = pixExpandBinaryPower2(pixm2, 2); /* Join pixels vertically to make text block mask */ pixb1 = pixMorphSequence(pixm2, "c1.10 + o4.1", 0); if (block_flag) pixaAddPix(pixa, pixb1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textblock1.150.png", pixb1, IFF_PNG); /* Solidify the textblock mask and remove noise: * (1) For each c.c., close the blocks and dilate slightly * to form a solid mask. * (2) Small horizontal closing between components * (3) Open the white space between columns, again * (4) Remove small components */ pix1 = pixMorphSequenceByComponent(pixb1, "c30.30 + d3.3", 8, 0, 0, NULL); pixCloseSafeBrick(pix1, pix1, 10, 1); if (block_flag) pixaAddPix(pixa, pix1, L_COPY); pix2 = pixSubtract(NULL, pix1, pixvws); pix3 = pixSelectBySize(pix2, 25, 5, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); if (block_flag) pixaAddPix(pixa, pix3, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textblock2.150.png", pix3, IFF_PNG); pixb2 = pixExpandBinaryPower2(pix3, 2); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Identify the outlines of each textblock */ ptaa = pixGetOuterBordersPtaa(pixb2); pix1 = pixRenderRandomCmapPtaa(pixb2, ptaa, 1, 8, 1); cmap = pixGetColormap(pix1); pixcmapResetColor(cmap, 0, 130, 130, 130); /* set interior to gray */ if (which == 1) pixWrite("/tmp/lept/livre/textblock3.300.png", pix1, IFF_PNG); pixDisplayWithTitle(pix1, 480, 360, "textblock mask with outlines", DFLAG); ptaaDestroy(&ptaa); pixDestroy(&pix1); /* Fill line mask (as seed) into the original */ pix1 = pixSeedfillBinary(NULL, pixm3, pixs, 8); pixOr(pixm3, pixm3, pix1); pixDestroy(&pix1); if (which == 1) pixWrite("/tmp/lept/livre/textmask.300.png", pixm3, IFF_PNG); pixDisplayWithTitle(pixm3, 480, 360, "textline mask 4", DFLAG); /* Fill halftone mask (as seed) into the original */ pix1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8); pixOr(pixhm2, pixhm2, pix1); pixDestroy(&pix1); if (which == 1) pixWrite("/tmp/lept/livre/htmask.300.png", pixhm2, IFF_PNG); pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG); /* Find objects that are neither text nor halftones */ pix1 = pixSubtract(NULL, pixs, pixm3); /* remove text pixels */ pixnon = pixSubtract(NULL, pix1, pixhm2); /* remove halftone pixels */ pixDestroy(&pix1); if (which == 1) pixWrite("/tmp/lept/livre/other.300.png", pixnon, IFF_PNG); pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG); /* Write out b.b. for text line mask and halftone mask components */ boxatm = pixConnComp(pixm3, NULL, 4); boxahm = pixConnComp(pixhm2, NULL, 8); if (which == 1) { boxaWrite("/tmp/lept/livre/textmask.boxa", boxatm); boxaWrite("/tmp/lept/livre/htmask.boxa", boxahm); } pix1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2); pixDisplay(pix1, 0, 375 * (which - 1)); snprintf(buf, sizeof(buf), "/tmp/lept/livre/segout.%d.png", which); pixWrite(buf, pix1, IFF_PNG); pixDestroy(&pix1); pixaDestroy(&pixa); /* clean up to test with valgrind */ pixDestroy(&pixr); pixDestroy(&pixhs); pixDestroy(&pixm); pixDestroy(&pixhm1); pixDestroy(&pixhm2); pixDestroy(&pixht); pixDestroy(&pixi); pixDestroy(&pixnht); pixDestroy(&pixvws); pixDestroy(&pixm1); pixDestroy(&pixm2); pixDestroy(&pixm3); pixDestroy(&pixb1); pixDestroy(&pixb2); pixDestroy(&pixnon); boxaDestroy(&boxatm); boxaDestroy(&boxahm); return 0; }
/*! * Note: this method is generally inferior to pixHasColorRegions(); it * is retained as a reference only * * \brief pixFindColorRegionsLight() * * \param[in] pixs 32 bpp rgb * \param[in] pixm [optional] 1 bpp mask image * \param[in] factor subsample factor; integer >= 1 * \param[in] darkthresh threshold to eliminate dark pixels (e.g., text) * from consideration; typ. 70; -1 for default. * \param[in] lightthresh threshold for minimum gray value at 95% rank * near white; typ. 220; -1 for default * \param[in] mindiff minimum difference from 95% rank value, used * to count darker pixels; typ. 50; -1 for default * \param[in] colordiff minimum difference in (max - min) component to * qualify as a color pixel; typ. 40; -1 for default * \param[out] pcolorfract fraction of 'color' pixels found * \param[out] pcolormask1 [optional] mask over background color, if any * \param[out] pcolormask2 [optional] filtered mask over background color * \param[out] pixadb [optional] debug intermediate results * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) This function tries to determine if there is a significant * color or darker region on a scanned page image where part * of the image is very close to "white". It will also allow * extraction of small regions of lightly colored pixels. * If the background is darker (and reddish), use instead * pixHasColorRegions2(). * (2) If %pixm exists, only pixels under fg are considered. Typically, * the inverse of %pixm would have fg pixels over a photograph. * (3) There are four thresholds. * * %darkthresh: ignore pixels darker than this (typ. fg text). * We make a 1 bpp mask of these pixels, and then dilate it to * remove all vestiges of fg from their vicinity. * * %lightthresh: let val95 be the pixel value for which 95% * of the non-masked pixels have a lower value (darker) of * their min component. Then if val95 is darker than * %lightthresh, the image is not considered to have a * light bg, and this returns 0.0 for %colorfract. * * %mindiff: we are interested in the fraction of pixels that * have two conditions. The first is that their min component * is at least %mindiff darker than val95. * * %colordiff: the second condition is that the max-min diff * of the pixel components exceeds %colordiff. * (4) This returns in %pcolorfract the fraction of pixels that have * both a min component that is at least %mindiff below that at the * 95% rank value (where 100% rank is the lightest value), and * a max-min diff that is at least %colordiff. Without the * %colordiff constraint, gray pixels of intermediate value * could get flagged by this function. * (5) No masks are returned unless light color pixels are found. * If colorfract > 0.0 and %pcolormask1 is defined, this returns * a 1 bpp mask with fg pixels over the color background. * This mask may have some holes in it. * (6) If colorfract > 0.0 and %pcolormask2 is defined, this returns * a filtered version of colormask1. The two changes are * (a) small holes have been filled * (b) components near the border have been removed. * The latter insures that dark pixels near the edge of the * image are not included. * (7) To generate a boxa of rectangular regions from the overlap * of components in the filtered mask: * boxa1 = pixConnCompBB(colormask2, 8); * boxa2 = boxaCombineOverlaps(boxa1); * This is done here in debug mode. * </pre> */ static l_int32 pixFindColorRegionsLight(PIX *pixs, PIX *pixm, l_int32 factor, l_int32 darkthresh, l_int32 lightthresh, l_int32 mindiff, l_int32 colordiff, l_float32 *pcolorfract, PIX **pcolormask1, PIX **pcolormask2, PIXA *pixadb) { l_int32 lightbg, w, h, count; l_float32 ratio, val95, rank; BOXA *boxa1, *boxa2; NUMA *nah; PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pixm1, *pixm2, *pixm3; PROCNAME("pixFindColorRegionsLight"); if (pcolormask1) *pcolormask1 = NULL; if (pcolormask2) *pcolormask2 = NULL; if (!pcolorfract) return ERROR_INT("&colorfract not defined", procName, 1); *pcolorfract = 0.0; if (!pixs || pixGetDepth(pixs) != 32) return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); if (factor < 1) factor = 1; if (darkthresh < 0) darkthresh = 70; /* defaults */ if (lightthresh < 0) lightthresh = 220; if (mindiff < 0) mindiff = 50; if (colordiff < 0) colordiff = 40; /* Check if pixm covers most of the image. If so, just return. */ pixGetDimensions(pixs, &w, &h, NULL); if (pixm) { pixCountPixels(pixm, &count, NULL); ratio = (l_float32)count / ((l_float32)(w) * h); if (ratio > 0.7) { if (pixadb) L_INFO("pixm has big fg: %f5.2\n", procName, ratio); return 0; } } /* Make a mask pixm1 over the dark pixels in the image: * convert to gray using the average of the components; * threshold using %darkthresh; do a small dilation; * combine with pixm. */ pix1 = pixConvertRGBToGray(pixs, 0.33, 0.34, 0.33); if (pixadb) pixaAddPix(pixadb, pixs, L_COPY); if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); pixm1 = pixThresholdToBinary(pix1, darkthresh); pixDilateBrick(pixm1, pixm1, 7, 7); if (pixadb) pixaAddPix(pixadb, pixm1, L_COPY); if (pixm) { pixOr(pixm1, pixm1, pixm); if (pixadb) pixaAddPix(pixadb, pixm1, L_COPY); } pixDestroy(&pix1); /* Convert to gray using the minimum component value and * find the gray value at rank 0.95, that represents the light * pixels in the image. If it is too dark, quit. */ pix1 = pixConvertRGBToGrayMinMax(pixs, L_SELECT_MIN); pix2 = pixInvert(NULL, pixm1); /* pixels that are not dark */ pixGetRankValueMasked(pix1, pix2, 0, 0, factor, 0.95, &val95, &nah); pixDestroy(&pix2); if (pixadb) { L_INFO("val at 0.95 rank = %5.1f\n", procName, val95); gplotSimple1(nah, GPLOT_PNG, "/tmp/lept/histo1", "gray histo"); pix3 = pixRead("/tmp/lept/histo1.png"); pix4 = pixExpandReplicate(pix3, 2); pixaAddPix(pixadb, pix4, L_INSERT); pixDestroy(&pix3); } lightbg = (l_int32)val95 >= lightthresh; numaDestroy(&nah); if (!lightbg) { pixDestroy(&pix1); pixDestroy(&pixm1); return 0; } /* Make mask pixm2 over pixels that are darker than val95 - mindiff. */ pixm2 = pixThresholdToBinary(pix1, val95 - mindiff); if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); pixDestroy(&pix1); /* Make a mask pixm3 over pixels that have some color saturation, * with a (max - min) component difference >= %colordiff, * and combine using AND with pixm2. */ pix2 = pixConvertRGBToGrayMinMax(pixs, L_CHOOSE_MAXDIFF); pixm3 = pixThresholdToBinary(pix2, colordiff); pixDestroy(&pix2); pixInvert(pixm3, pixm3); /* need pixels above threshold */ if (pixadb) pixaAddPix(pixadb, pixm3, L_COPY); pixAnd(pixm2, pixm2, pixm3); if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); pixDestroy(&pixm3); /* Subtract the dark pixels represented by pixm1. * pixm2 now holds all the color pixels of interest */ pixSubtract(pixm2, pixm2, pixm1); pixDestroy(&pixm1); if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); /* But we're not quite finished. Remove pixels from any component * that is touching the image border. False color pixels can * sometimes be found there if the image is much darker near * the border, due to oxidation or reduced illumination. */ pixm3 = pixRemoveBorderConnComps(pixm2, 8); pixDestroy(&pixm2); if (pixadb) pixaAddPix(pixadb, pixm3, L_COPY); /* Get the fraction of light color pixels */ pixCountPixels(pixm3, &count, NULL); *pcolorfract = (l_float32)count / (w * h); if (pixadb) { if (count == 0) L_INFO("no light color pixels found\n", procName); else L_INFO("fraction of light color pixels = %5.3f\n", procName, *pcolorfract); } /* Debug: extract the color pixels from pixs */ if (pixadb && count > 0) { /* Use pixm3 to extract the color pixels */ pix3 = pixCreateTemplate(pixs); pixSetAll(pix3); pixCombineMasked(pix3, pixs, pixm3); pixaAddPix(pixadb, pix3, L_INSERT); /* Use additional filtering to extract the color pixels */ pix3 = pixCloseSafeBrick(NULL, pixm3, 15, 15); pixaAddPix(pixadb, pix3, L_INSERT); pix5 = pixCreateTemplate(pixs); pixSetAll(pix5); pixCombineMasked(pix5, pixs, pix3); pixaAddPix(pixadb, pix5, L_INSERT); /* Get the combined bounding boxes of the mask components * in pix3, and extract those pixels from pixs. */ boxa1 = pixConnCompBB(pix3, 8); boxa2 = boxaCombineOverlaps(boxa1, NULL); pix4 = pixCreateTemplate(pix3); pixMaskBoxa(pix4, pix4, boxa2, L_SET_PIXELS); pixaAddPix(pixadb, pix4, L_INSERT); pix5 = pixCreateTemplate(pixs); pixSetAll(pix5); pixCombineMasked(pix5, pixs, pix4); pixaAddPix(pixadb, pix5, L_INSERT); boxaDestroy(&boxa1); boxaDestroy(&boxa2); pixaAddPix(pixadb, pixs, L_COPY); } /* Optional colormask returns */ if (pcolormask2 && count > 0) *pcolormask2 = pixCloseSafeBrick(NULL, pixm3, 15, 15); if (pcolormask1 && count > 0) *pcolormask1 = pixm3; else pixDestroy(&pixm3); return 0; }
/* ----------------------------------------------------- */ void ProcessDigits(l_int32 index) { char rootname[8] = "digit5"; char buf[64]; l_int32 i, nc, ns, same; NUMA *na1; PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6; PIXA *pixa1, *pixa2, *pixa3; /* Read the unfiltered, unscaled pixa of twenty-five 5s */ snprintf(buf, sizeof(buf), "digits/%s.orig-25.pa", rootname); pixa1 = pixaRead(buf); /* Number and show the input images */ snprintf(buf, sizeof(buf), "/tmp/lept/digit/%s.orig-num", rootname); PixaDisplayNumbered(pixa1, buf); /* Remove some of them */ na1 = numaCreateFromString(removeset); pixaRemoveSelected(pixa1, na1); numaDestroy(&na1); snprintf(buf, sizeof(buf), "/tmp/lept/digit/%s.filt.pa", rootname); pixaWrite(buf, pixa1); /* Number and show the filtered images */ snprintf(buf, sizeof(buf), "/tmp/lept/digit/%s.filt-num", rootname); PixaDisplayNumbered(pixa1, buf); /* Extract the largest c.c., clip to the foreground, * and scale the result to a fixed size. */ nc = pixaGetCount(pixa1); pixa2 = pixaCreate(nc); for (i = 0; i < nc; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); /* A threshold of 140 gives reasonable results */ pix2 = pixThresholdToBinary(pix1, 140); /* Join nearly touching pieces */ pix3 = pixCloseSafeBrick(NULL, pix2, 5, 5); /* Take the largest (by area) connected component */ pix4 = pixFilterComponentBySize(pix3, 0, L_SELECT_BY_AREA, 8, NULL); /* Extract the original 1 bpp pixels that have been * covered by the closing operation */ pixAnd(pix4, pix4, pix2); /* Grab the result as an image with no surrounding whitespace */ pixClipToForeground(pix4, &pix5, NULL); /* Rescale the result to the canonical size */ pix6 = pixScaleToSize(pix5, 20, 30); pixaAddPix(pixa2, pix6, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); } /* Add the index (a "5") in the text field of each pix; save pixa2 */ snprintf(buf, sizeof(buf), "%d", index); for (i = 0; i < nc; i++) { pix1 = pixaGetPix(pixa2, i, L_CLONE); pixSetText(pix1, buf); pixDestroy(&pix1); } snprintf(buf, sizeof(buf), "/tmp/lept/digit/%s.comp.pa", rootname); pixaWrite(buf, pixa2); /* Number and show the resulting binary templates */ snprintf(buf, sizeof(buf), "/tmp/lept/digit/%s.comp-num", rootname); PixaDisplayNumbered(pixa2, buf); /* Save the binary templates as a packed tiling (tiff g4). * This is the most efficient way to represent the templates. */ pix1 = pixaDisplayOnLattice(pixa2, 20, 30, NULL, NULL); pixDisplay(pix1, 1000, 500); snprintf(buf, sizeof(buf), "/tmp/lept/digit/%s.comp.tif", rootname); pixWrite(buf, pix1, IFF_TIFF_G4); /* The number of templates is in the pix text string; check it. */ pix2 = pixRead(buf); if (sscanf(pixGetText(pix2), "n = %d", &ns) != 1) fprintf(stderr, "Failed to read the number of templates!\n"); if (ns != nc) fprintf(stderr, "(stored = %d) != (actual number = %d)\n", ns, nc); /* Reconstruct the pixa of templates from the tiled compressed * image, and verify that the resulting pixa is the same. */ pixa3 = pixaMakeFromTiledPix(pix1, 20, 30, 0, 0, NULL); pixaEqual(pixa2, pixa3, 0, NULL, &same); if (!same) fprintf(stderr, "Pixa are not the same!\n"); pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); pixaDestroy(&pixa2); pixaDestroy(&pixa3); }
/*! * pixMirrorDetectDwa() * * Input: pixs (1 bpp, deskewed, English text) * &conf (<return> confidence that text is not LR mirror reversed) * mincount (min number of left + right; use 0 for default) * debug (1 for debug output; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) We assume the text is horizontally oriented, with * ascenders going up. * (2) See notes in pixMirrorDetect(). */ l_int32 pixMirrorDetectDwa(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug) { char flipsel1[] = "flipsel1"; char flipsel2[] = "flipsel2"; l_int32 count1, count2, nmax; l_float32 nleft, nright; PIX *pixt0, *pixt1, *pixt2, *pixt3; PROCNAME("pixMirrorDetectDwa"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; /* Fill x-height characters but not space between them, sort of. */ pixt3 = pixMorphSequenceDwa(pixs, "d1.30", 0); pixXor(pixt3, pixt3, pixs); pixt0 = pixMorphSequenceDwa(pixs, "c15.1", 0); pixXor(pixt0, pixt0, pixs); pixAnd(pixt0, pixt0, pixt3); pixOr(pixt3, pixt0, pixs); pixDestroy(&pixt0); pixt0 = pixAddBorderGeneral(pixt3, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, 0); pixDestroy(&pixt3); /* Filter the right-facing characters. */ pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &count1, NULL); pixDestroy(&pixt1); pixDestroy(&pixt3); /* Filter the left-facing characters. */ pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2); pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0); pixCountPixels(pixt3, &count2, NULL); pixDestroy(&pixt2); pixDestroy(&pixt3); pixDestroy(&pixt0); nright = (l_float32)count1; nleft = (l_float32)count2; nmax = L_MAX(count1, count2); if (nmax > mincount) *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); if (debug) { fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is not mirror reversed\n"); if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is mirror reversed\n"); } return 0; }
// Finds image regions within the source pix (page image) and returns // the image regions as a Boxa, Pixa pair, analgous to pixConnComp. // The returned boxa, pixa may be NULL, meaning no images found. // If not NULL, they must be destroyed by the caller. void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) { *boxa = NULL; *pixa = NULL; #ifdef HAVE_LIBLEPT if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return; // Not worth looking at small images. // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); pixDisplayWrite(pixr, textord_tabfind_show_images); // Get the halftone mask directly from Leptonica. Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL, textord_tabfind_show_images); pixDestroy(&pixr); if (pixht2 == NULL) return; // Expand back up again. Pix *pixht = pixExpandReplicate(pixht2, 2); pixDisplayWrite(pixht, textord_tabfind_show_images); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8); pixOr(pixht, pixht, pixt); pixDestroy(&pixt); // Eliminate lines and bars that may be joined to images. Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); pixDisplayWrite(pixfinemask, textord_tabfind_show_images); Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); pixDestroy(&pixreduced); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); pixDestroy(&pixreduced2); pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images); // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); pixDestroy(&pixfinemask); // Dilate a bit to make sure we get everything. pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); pixDestroy(&pixcoarsemask); pixDisplayWrite(pixmask, textord_tabfind_show_images); // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); pixDestroy(&pixmask); pixDisplayWrite(pixht, textord_tabfind_show_images); // Find the individual image regions in the mask image. *boxa = pixConnComp(pixht, pixa, 8); pixDestroy(&pixht); // Rectangularize the individual images. If a sharp edge in vertical and/or // horizontal occupancy can be found, it indicates a probably rectangular // image with unwanted bits merged on, so clip to the approximate rectangle. int npixes = pixaGetCount(*pixa); for (int i = 0; i < npixes; ++i) { int x_start, x_end, y_start, y_end; Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); pixDisplayWrite(img_pix, textord_tabfind_show_images); if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { // Add 1 to the size as a kludgy flag to indicate to the later stages // of processing that it is a clipped rectangular image . Pix* simple_pix = pixCreate(pixGetWidth(img_pix) + 1, pixGetHeight(img_pix), 1); pixDestroy(&img_pix); pixRasterop(simple_pix, x_start, y_start, x_end - x_start, y_end - y_start, PIX_SET, NULL, 0, 0); // pixaReplacePix takes ownership of the simple_pix. pixaReplacePix(*pixa, i, simple_pix, NULL); img_pix = pixaGetPix(*pixa, i, L_CLONE); } // Subtract the pix from the correct location in the master image. l_int32 x, y, width, height; pixDisplayWrite(img_pix, textord_tabfind_show_images); boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); pixRasterop(pix, x, y, width, height, PIX_NOT(PIX_SRC) & PIX_DST, img_pix, 0, 0); pixDestroy(&img_pix); } #endif }
/*! * pixMirrorDetect() * * Input: pixs (1 bpp, deskewed, English text) * &conf (<return> confidence that text is not LR mirror reversed) * mincount (min number of left + right; use 0 for default) * debug (1 for debug output; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) For this test, it is necessary that the text is horizontally * oriented, with ascenders going up. * (2) conf is the normalized difference between the number of * right and left facing characters with ascenders. * Left-facing are {d}; right-facing are {b, h, k}. * At least that was the expectation. In practice, we can * really just say that it is the normalized difference in * hits using two specific hit-miss filters, textsel1 and textsel2, * after the image has been suitably pre-filtered so that * these filters are effective. See (4) for what's really happening. * (3) A large positive conf value indicates normal text, whereas * a large negative conf value means the page is mirror reversed. * (4) The implementation is a bit tricky. The general idea is * to fill the x-height part of characters, but not the space * between them, before doing the HMT. This is done by * finding pixels added using two different operations -- a * horizontal close and a vertical dilation -- and adding * the intersection of these sets to the original. It turns * out that the original intuition about the signal was largely * in error: much of the signal for right-facing characters * comes from the lower part of common x-height characters, like * the e and c, that remain open after these operations. * So it's important that the operations to close the x-height * parts of the characters are purposely weakened sufficiently * to allow these characters to remain open. The wonders * of morphology! */ l_int32 pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug) { l_int32 count1, count2, nmax; l_float32 nleft, nright; PIX *pixt0, *pixt1, *pixt2, *pixt3; SEL *sel1, *sel2; PROCNAME("pixMirrorDetect"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; sel1 = selCreateFromString(textsel1, 5, 6, NULL); sel2 = selCreateFromString(textsel2, 5, 6, NULL); /* Fill x-height characters but not space between them, sort of. */ pixt3 = pixMorphCompSequence(pixs, "d1.30", 0); pixXor(pixt3, pixt3, pixs); pixt0 = pixMorphCompSequence(pixs, "c15.1", 0); pixXor(pixt0, pixt0, pixs); pixAnd(pixt0, pixt0, pixt3); pixOr(pixt0, pixt0, pixs); pixDestroy(&pixt3); /* pixDisplayWrite(pixt0, 1); */ /* Filter the right-facing characters. */ pixt1 = pixHMT(NULL, pixt0, sel1); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &count1, NULL); pixDebugFlipDetect("junkpixright", pixs, pixt1, debug); pixDestroy(&pixt1); pixDestroy(&pixt3); /* Filter the left-facing characters. */ pixt2 = pixHMT(NULL, pixt0, sel2); pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0); pixCountPixels(pixt3, &count2, NULL); pixDebugFlipDetect("junkpixleft", pixs, pixt2, debug); pixDestroy(&pixt2); pixDestroy(&pixt3); nright = (l_float32)count1; nleft = (l_float32)count2; nmax = L_MAX(count1, count2); pixDestroy(&pixt0); selDestroy(&sel1); selDestroy(&sel2); if (nmax > mincount) *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); if (debug) { fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is not mirror reversed\n"); if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is mirror reversed\n"); } return 0; }
/*! * pixUpDownDetectGeneralDwa() * * Input: pixs (1 bpp, deskewed, English text) * &conf (<return> confidence that text is rightside-up) * mincount (min number of up + down; use 0 for default) * npixels (number of pixels removed from each side of word box) * debug (1 for debug output; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) See the notes in pixUpDownDetectGeneral() for usage. */ l_int32 pixUpDownDetectGeneralDwa(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug) { char flipsel1[] = "flipsel1"; char flipsel2[] = "flipsel2"; char flipsel3[] = "flipsel3"; char flipsel4[] = "flipsel4"; l_int32 countup, countdown, nmax; l_float32 nup, ndown; PIX *pixt, *pixt0, *pixt1, *pixt2, *pixt3, *pixm; PROCNAME("pixUpDownDetectGeneralDwa"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_UP_DOWN_COUNT; if (npixels < 0) npixels = 0; /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). * This closes holes in x-height characters and joins them at * the x-height. There is more noise in the descender detection * from this, but it works fairly well. */ pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0); /* Be sure to add the border before the flip DWA operations! */ pixt0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, 0); pixDestroy(&pixt); /* Optionally, make a mask of the word bounding boxes, shortening * each of them by a fixed amount at each end. */ pixm = NULL; if (npixels > 0) { l_int32 i, nbox, x, y, w, h; BOX *box; BOXA *boxa; pixt1 = pixMorphSequenceDwa(pixt0, "o10.1", 0); boxa = pixConnComp(pixt1, NULL, 8); pixm = pixCreateTemplate(pixt1); pixDestroy(&pixt1); nbox = boxaGetCount(boxa); for (i = 0; i < nbox; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w > 2 * npixels) pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, PIX_SET, NULL, 0, 0); boxDestroy(&box); } boxaDestroy(&boxa); } /* Find the ascenders and optionally filter with pixm. * For an explanation of the procedure used for counting the result * of the HMT, see comments in pixUpDownDetectGeneral(). */ pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1); pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2); pixOr(pixt1, pixt1, pixt2); if (pixm) pixAnd(pixt1, pixt1, pixm); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &countup, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Find the ascenders and optionally filter with pixm. */ pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel3); pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel4); pixOr(pixt1, pixt1, pixt2); if (pixm) pixAnd(pixt1, pixt1, pixm); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &countdown, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Evaluate statistically, generating a confidence that is * related to the probability with a gaussian distribution. */ nup = (l_float32)(countup); ndown = (l_float32)(countdown); nmax = L_MAX(countup, countdown); if (nmax > mincount) *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); if (debug) { if (pixm) pixWrite("junkpixm2", pixm, IFF_PNG); fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", nup, ndown, *pconf); if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is rightside-up\n"); if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is upside-down\n"); } pixDestroy(&pixt0); pixDestroy(&pixm); return 0; }
int main(int argc, char **argv) { PIX *pixs, *pix1, *pix2, *pix3, *pix4; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pixs = pixRead("test1.png"); /* pixInvert */ pix1 = pixInvert(NULL, pixs); pix2 = pixCreateTemplate(pixs); /* into pixd of same size */ pixInvert(pix2, pixs); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 0 */ regTestComparePix(rp, pix1, pix2); /* 1 */ pix3 = pixRead("marge.jpg"); /* into pixd of different size */ pixInvert(pix3, pixs); regTestComparePix(rp, pix1, pix3); /* 2 */ pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pix1 = pixOpenBrick(NULL, pixs, 1, 9); pix2 = pixDilateBrick(NULL, pixs, 1, 9); /* pixOr */ pix3 = pixCreateTemplate(pixs); pixOr(pix3, pixs, pix1); /* existing */ pix4 = pixOr(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 3 */ regTestComparePix(rp, pix3, pix4); /* 4 */ pixCopy(pix4, pix1); pixOr(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 5 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixOr(pix3, pixs, pix2); /* existing */ pix4 = pixOr(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 6 */ regTestComparePix(rp, pix3, pix4); /* 7 */ pixCopy(pix4, pix2); pixOr(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 8 */ pixDestroy(&pix3); pixDestroy(&pix4); /* pixAnd */ pix3 = pixCreateTemplate(pixs); pixAnd(pix3, pixs, pix1); /* existing */ pix4 = pixAnd(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 9 */ regTestComparePix(rp, pix3, pix4); /* 10 */ pixCopy(pix4, pix1); pixAnd(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 11 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixAnd(pix3, pixs, pix2); /* existing */ pix4 = pixAnd(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 12 */ regTestComparePix(rp, pix3, pix4); /* 13 */ pixCopy(pix4, pix2); pixAnd(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 14 */ pixDestroy(&pix3); pixDestroy(&pix4); /* pixXor */ pix3 = pixCreateTemplate(pixs); pixXor(pix3, pixs, pix1); /* existing */ pix4 = pixXor(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 15 */ regTestComparePix(rp, pix3, pix4); /* 16 */ pixCopy(pix4, pix1); pixXor(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 17 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixXor(pix3, pixs, pix2); /* existing */ pix4 = pixXor(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 18 */ regTestComparePix(rp, pix3, pix4); /* 19 */ pixCopy(pix4, pix2); pixXor(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 20 */ pixDestroy(&pix3); pixDestroy(&pix4); /* pixSubtract */ pix3 = pixCreateTemplate(pixs); pixSubtract(pix3, pixs, pix1); /* existing */ pix4 = pixSubtract(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 21 */ regTestComparePix(rp, pix3, pix4); /* 22 */ pixCopy(pix4, pix1); pixSubtract(pix4, pixs, pix4); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 23 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixSubtract(pix3, pixs, pix2); /* existing */ pix4 = pixSubtract(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 24 */ regTestComparePix(rp, pix3, pix4); /* 25 */ pixCopy(pix4, pix2); pixSubtract(pix4, pixs, pix4); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 26 */ pixDestroy(&pix3); pixDestroy(&pix4); pix4 = pixRead("marge.jpg"); pixSubtract(pix4, pixs, pixs); /* subtract from itself; should be empty */ pix3 = pixCreateTemplate(pixs); regTestComparePix(rp, pix3, pix4); /* 27*/ pixDestroy(&pix3); pixDestroy(&pix4); pixSubtract(pixs, pixs, pixs); /* subtract from itself; should be empty */ pix3 = pixCreateTemplate(pixs); regTestComparePix(rp, pix3, pixs); /* 28*/ pixDestroy(&pix3); pixDestroy(&pixs); pixDestroy(&pix1); pixDestroy(&pix2); return regTestCleanup(rp); }
int main(int argc, char **argv) { if (argc < 3) return usage(argv[0]); char highlight = 0; char ignore_scrollbars = 1; /* Default output filename; can be overridden by command line. */ const char *output_filename = "highlight.png"; int argi = 1; for (; argi < argc; ++argi) { if (strcmp("--highlight", argv[argi]) == 0) { highlight = 1; } else if (strcmp("--no-ignore-scrollbars", argv[argi]) == 0) { ignore_scrollbars = 0; } else if (strcmp("--output", argv[argi]) == 0) { if (argi + 1 >= argc) { fprintf(stderr, "missing argument to --output\n"); return 1; } output_filename = argv[++argi]; } else { break; } } if (argc - argi < 2) return usage(argv[0]); PIX *a = pixRead(argv[argi]); PIX *b = pixRead(argv[argi + 1]); if (!a) { fprintf(stderr, "Failed to open %s\n", argv[argi]); return 1; } if (!b) { fprintf(stderr, "Failed to open %s\n", argv[argi + 1]); return 1; } if (pixGetWidth(a) != pixGetWidth(b) || pixGetHeight(a) != pixGetHeight(b)) { fprintf(stderr, "Inputs are difference sizes\n"); return 1; } PIX *delta = pixAbsDifference(a, b); pixInvert(delta, delta); if (!highlight) pixDestroy(&a); pixDestroy(&b); PIX *deltagray = pixConvertRGBToGray(delta, 0, 0, 0); pixDestroy(&delta); PIX *deltabinary = pixThresholdToBinary(deltagray, 254); PIX *deltabinaryclipped; const int clipwidth = pixGetWidth(deltabinary) - 15; const int clipheight = pixGetHeight(deltabinary) - 15; if (ignore_scrollbars && clipwidth > 0 && clipheight > 0) { BOX *clip = boxCreate(0, 0, clipwidth, clipheight); deltabinaryclipped = pixClipRectangle(deltabinary, clip, NULL); boxDestroy(&clip); pixDestroy(&deltabinary); } else { deltabinaryclipped = deltabinary; deltabinary = NULL; } PIX *hopened = pixOpenBrick(NULL, deltabinaryclipped, 3, 1); PIX *vopened = pixOpenBrick(NULL, deltabinaryclipped, 1, 3); pixDestroy(&deltabinaryclipped); PIX *opened = pixOr(NULL, hopened, vopened); pixDestroy(&hopened); pixDestroy(&vopened); l_int32 count; pixCountPixels(opened, &count, NULL); fprintf(stderr, "%d\n", count); if (count && highlight) { PIX *d1 = pixDilateBrick(NULL, opened, 7, 7); PIX *d2 = pixDilateBrick(NULL, opened, 3, 3); pixInvert(d2, d2); pixAnd(d1, d1, d2); pixPaintThroughMask(a, d1, 0, 0, 0xff << 24); pixWrite(output_filename, a, IFF_PNG); } return count > 0; }
/*! * \brief pixUpDownDetectGeneral() * * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi * \param[out] pconf confidence that text is rightside-up * \param[in] mincount min number of up + down; use 0 for default * \param[in] npixels number of pixels removed from each side of word box * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) See pixOrientDetect() for other details. * (2) %conf is the normalized difference between the number of * detected up and down ascenders, assuming that the text * is either rightside-up or upside-down and not rotated * at a 90 degree angle. * (3) The typical mode of operation is %npixels == 0. * If %npixels > 0, this removes HMT matches at the * beginning and ending of "words." This is useful for * pages that may have mostly digits, because if npixels == 0, * leading "1" and "3" digits can register as having * ascenders or descenders, and "7" digits can match descenders. * Consequently, a page image of only digits may register * as being upside-down. * (4) We want to count the number of instances found using the HMT. * An expensive way to do this would be to count the * number of connected components. A cheap way is to do a rank * reduction cascade that reduces each component to a single * pixel, and results (after two or three 2x reductions) * in one pixel for each of the original components. * After the reduction, you have a much smaller pix over * which to count pixels. We do only 2 reductions, because * this function is designed to work for input pix between * 150 and 300 ppi, and an 8x reduction on a 150 ppi image * is going too far -- components will get merged. * </pre> */ l_int32 pixUpDownDetectGeneral(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug) { l_int32 countup, countdown, nmax; l_float32 nup, ndown; PIX *pix0, *pix1, *pix2, *pix3, *pixm; SEL *sel1, *sel2, *sel3, *sel4; PROCNAME("pixUpDownDetectGeneral"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_UP_DOWN_COUNT; if (npixels < 0) npixels = 0; lept_mkdir("lept/orient"); sel1 = selCreateFromString(textsel1, 5, 6, NULL); sel2 = selCreateFromString(textsel2, 5, 6, NULL); sel3 = selCreateFromString(textsel3, 5, 6, NULL); sel4 = selCreateFromString(textsel4, 5, 6, NULL); /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). * This closes holes in x-height characters and joins them at * the x-height. There is more noise in the descender detection * from this, but it works fairly well. */ pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0); /* Optionally, make a mask of the word bounding boxes, shortening * each of them by a fixed amount at each end. */ pixm = NULL; if (npixels > 0) { l_int32 i, nbox, x, y, w, h; BOX *box; BOXA *boxa; pix1 = pixMorphSequence(pix0, "o10.1", 0); boxa = pixConnComp(pix1, NULL, 8); pixm = pixCreateTemplate(pix1); pixDestroy(&pix1); nbox = boxaGetCount(boxa); for (i = 0; i < nbox; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w > 2 * npixels) pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, PIX_SET, NULL, 0, 0); boxDestroy(&box); } boxaDestroy(&boxa); } /* Find the ascenders and optionally filter with pixm. * For an explanation of the procedure used for counting the result * of the HMT, see comments at the beginning of this function. */ pix1 = pixHMT(NULL, pix0, sel1); pix2 = pixHMT(NULL, pix0, sel2); pixOr(pix1, pix1, pix2); if (pixm) pixAnd(pix1, pix1, pixm); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &countup, NULL); pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Find the ascenders and optionally filter with pixm. */ pix1 = pixHMT(NULL, pix0, sel3); pix2 = pixHMT(NULL, pix0, sel4); pixOr(pix1, pix1, pix2); if (pixm) pixAnd(pix1, pix1, pixm); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &countdown, NULL); pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Evaluate statistically, generating a confidence that is * related to the probability with a gaussian distribution. */ nup = (l_float32)(countup); ndown = (l_float32)(countdown); nmax = L_MAX(countup, countdown); if (nmax > mincount) *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); if (debug) { if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG); fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", nup, ndown, *pconf); if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is rightside-up\n"); if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is upside-down\n"); } pixDestroy(&pix0); pixDestroy(&pixm); selDestroy(&sel1); selDestroy(&sel2); selDestroy(&sel3); selDestroy(&sel4); return 0; }