/*! * pixGenHalftoneMask() * * Input: pixs (1 bpp, assumed to be 150 to 200 ppi) * &pixtext (<optional return> text part of pixs) * &htfound (<optional return> 1 if the mask is not empty) * debug (flag: 1 for debug output) * Return: pixd (halftone mask), or null on error */ PIX * pixGenHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug) { l_int32 empty; PIX *pixt1, *pixt2, *pixhs, *pixhm, *pixd; PROCNAME("pixGenHalftoneMask"); if (ppixtext) *ppixtext = NULL; if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (pixGetDepth(pixs) != 1) return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); /* Compute seed for halftone parts at 8x reduction */ pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0); pixt2 = pixOpenBrick(NULL, pixt1, 5, 5); pixhs = pixExpandReplicate(pixt2, 8); /* back to 2x reduction */ pixDestroy(&pixt1); pixDestroy(&pixt2); pixDisplayWriteFormat(pixhs, debug, IFF_PNG); /* Compute mask for connected regions */ pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); pixDisplayWriteFormat(pixhm, debug, IFF_PNG); /* Fill seed into mask to get halftone mask */ pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); #if 0 /* Moderate opening to remove thin lines, etc. */ pixOpenBrick(pixd, pixd, 10, 10); pixDisplayWrite(pixd, debug); #endif /* Check if mask is empty */ pixZero(pixd, &empty); if (phtfound) { *phtfound = 0; if (!empty) *phtfound = 1; } /* Optionally, get all pixels that are not under the halftone mask */ if (ppixtext) { if (empty) *ppixtext = pixCopy(NULL, pixs); else *ppixtext = pixSubtract(NULL, pixs, pixd); pixDisplayWriteFormat(*ppixtext, debug, IFF_PNG); } pixDestroy(&pixhs); pixDestroy(&pixhm); return pixd; }
void ColorBlend(PIX *pixs, PIX *pixb, l_float32 fract) { l_int32 i, j, wb, hb, ws, hs, delx, dely, x, y; pixGetDimensions(pixs, &ws, &hs, NULL); pixGetDimensions(pixb, &wb, &hb, NULL); delx = wb + 30; dely = hb + 25; x = 200; y = 300; for (i = 0; i < 20; i++) { y = 20 + i * dely; if (y >= hs - hb) continue; for (j = 0; j < 20; j++) { x = 30 + j * delx; if (x >= ws - wb) continue; pixBlendColor(pixs, pixs, pixb, x, y, fract, 1, 255); } } pixDisplayWriteFormat(pixs, 1, IFF_PNG); }
main(int argc, char **argv) { l_int32 i; PIX *pixs, *pixt1, *pixt2, *pixt3, *pixd; PIXA *pixa; static char mainName[] = "livre_seedgen"; pixs = pixRead("pageseg2.tif"); startTimer(); for (i = 0; i < 100; i++) { pixt1 = pixReduceRankBinaryCascade(pixs, 1, 4, 4, 3); pixDestroy(&pixt1); } fprintf(stderr, "Time: %8.4f sec\n", stopTimer() / 100.); /* 4 2x rank reductions (levels 1, 4, 4, 3), followed by 5x5 opening */ pixDisplayWrite(NULL, -1); pixDisplayWriteFormat(pixs, 4, IFF_PNG); pixt1 = pixReduceRankBinaryCascade(pixs, 1, 4, 0, 0); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixt2 = pixReduceRankBinaryCascade(pixt1, 4, 3, 0, 0); pixDisplayWriteFormat(pixt2, 1, IFF_PNG); pixOpenBrick(pixt2, pixt2, 5, 5); pixt3 = pixExpandBinaryReplicate(pixt2, 2); pixDisplayWriteFormat(pixt3, 1, IFF_PNG); /* Generate the output image */ pixa = pixaReadFiles("/tmp", "junk_write_display"); pixd = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2); pixWrite("/tmp/seedgen.png", pixd, IFF_PNG); pixDestroy(&pixs); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); pixDestroy(&pixd); pixaDestroy(&pixa); return 0; }
main(int argc, char **argv) { PIX *pixs, *pixsg, *pixg, *pixd; PIXA *pixa; static char mainName[] = "livre_tophat"; if (argc != 1) return ERROR_INT(" Syntax: livre_tophat", mainName, 1); /* Read the image in at 150 ppi. */ if ((pixs = pixRead("brothers.150.jpg")) == NULL) return ERROR_INT("pix not made", mainName, 1); pixDisplayWrite(NULL, -1); pixDisplayWriteFormat(pixs, 2, IFF_JFIF_JPEG); pixsg = pixConvertRGBToLuminance(pixs); /* Black tophat (closing - original-image) and invert */ pixg = pixTophat(pixsg, 15, 15, L_TOPHAT_BLACK); pixInvert(pixg, pixg); pixDisplayWriteFormat(pixg, 2, IFF_JFIF_JPEG); /* Set black point at 200, white point at 245. */ pixd = pixGammaTRC(NULL, pixg, 1.0, 200, 245); pixDisplayWriteFormat(pixd, 2, IFF_JFIF_JPEG); pixDestroy(&pixg); pixDestroy(&pixd); /* Generate the output image */ pixa = pixaReadFiles("/tmp", "junk_write_display"); pixd = pixaDisplayTiledAndScaled(pixa, 8, 350, 3, 0, 25, 2); pixWrite("/tmp/tophat.jpg", pixd, IFF_JFIF_JPEG); pixDisplay(pixd, 0, 0); pixDestroy(&pixd); pixDestroy(&pixs); pixDestroy(&pixsg); return 0; }
/*! * pixGenTextblockMask() * * Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi) * pixvws (vertical white space mask) * debug (flag: 1 for debug output) * Return: pixd (textblock mask), or null on error * * Notes: * (1) Both the input masks (textline and vertical white space) and * the returned textblock mask are at the same resolution. * (2) The result is somewhat noisy, in that small "blocks" of * text may be included. These can be removed by post-processing, * using, e.g., * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, * L_SELECT_IF_GTE, NULL); */ PIX * pixGenTextblockMask(PIX *pixs, PIX *pixvws, l_int32 debug) { PIX *pixt1, *pixt2, *pixt3, *pixd; PROCNAME("pixGenTextblockMask"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (!pixvws) return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL); if (pixGetDepth(pixs) != 1) return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); /* Join pixels vertically to make a textblock mask */ pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); pixDisplayWriteFormat(pixt1, debug, IFF_PNG); /* Solidify the textblock mask and remove noise: * (1) For each cc, close the blocks and dilate slightly * to form a solid mask. * (2) Small horizontal closing between components. * (3) Open the white space between columns, again. * (4) Remove small components. */ pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL); pixCloseSafeBrick(pixt2, pixt2, 10, 1); pixDisplayWriteFormat(pixt2, debug, IFF_PNG); pixt3 = pixSubtract(NULL, pixt2, pixvws); pixDisplayWriteFormat(pixt3, debug, IFF_PNG); pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); pixDisplayWriteFormat(pixd, debug, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); return pixd; }
/*! * pixDisplayWrite() * * Input: pix (1, 2, 4, 8, 16, 32 bpp) * reduction (-1 to reset/erase; 0 to disable; * otherwise this is a reduction factor) * Return: 0 if OK; 1 on error * * Notes: * (1) This defaults to jpeg output for pix that are 32 bpp or * 8 bpp without a colormap. If you want to write all images * losslessly, use format == IFF_PNG in pixDisplayWriteFormat(). * (2) See pixDisplayWriteFormat() for usage details. */ l_int32 pixDisplayWrite(PIX *pixs, l_int32 reduction) { return pixDisplayWriteFormat(pixs, reduction, IFF_JFIF_JPEG); }
/*! * \brief pixDisplayWrite() * * \param[in] pix 1, 2, 4, 8, 16, 32 bpp * \param[in] reduction -1 to reset/erase; 0 to disable; * otherwise this is a reduction factor * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) This is a simple interface for writing a set of files that can * be either looked at individually or saved in a pdf for viewing. * (2) This defaults to jpeg output for pix that are 32 bpp or * 8 bpp without a colormap. If you want to write all images * losslessly, use format == IFF_PNG in pixDisplayWriteFormat(). * (3) See pixDisplayWriteFormat() for usage details. * </pre> */ l_int32 pixDisplayWrite(PIX *pixs, l_int32 reduction) { return pixDisplayWriteFormat(pixs, reduction, IFF_DEFAULT); }
main(int argc, char **argv) { l_int32 d; PIX *pixs, *pixc, *pixr, *pixg, *pixb, *pixsg, *pixsm, *pixd; PIXA *pixa; static char mainName[] = "livre_adapt"; if (argc != 1) exit(ERROR_INT(" Syntax: livre_adapt", mainName, 1)); /* Read the image in at 150 ppi. */ pixDisplayWrite(NULL, -1); if ((pixs = pixRead("brothers.150.jpg")) == NULL) exit(ERROR_INT("pix not made", mainName, 1)); pixDisplayWriteFormat(pixs, 2, IFF_JFIF_JPEG); /* Normalize for uneven illumination on RGB image */ pixBackgroundNormRGBArraysMorph(pixs, NULL, 4, 5, 200, &pixr, &pixg, &pixb); pixd = pixApplyInvBackgroundRGBMap(pixs, pixr, pixg, pixb, 4, 4); pixDisplayWriteFormat(pixd, 2, IFF_JFIF_JPEG); pixDestroy(&pixr); pixDestroy(&pixg); pixDestroy(&pixb); pixDestroy(&pixd); /* Convert the RGB image to grayscale. */ pixsg = pixConvertRGBToLuminance(pixs); pixDisplayWriteFormat(pixsg, 2, IFF_JFIF_JPEG); /* Remove the text in the fg. */ pixc = pixCloseGray(pixsg, 25, 25); pixDisplayWriteFormat(pixc, 2, IFF_JFIF_JPEG); /* Smooth the bg with a convolution. */ pixsm = pixBlockconv(pixc, 15, 15); pixDisplayWriteFormat(pixsm, 2, IFF_JFIF_JPEG); pixDestroy(&pixc); /* Normalize for uneven illumination on gray image. */ pixBackgroundNormGrayArrayMorph(pixsg, NULL, 4, 5, 200, &pixg); pixc = pixApplyInvBackgroundGrayMap(pixsg, pixg, 4, 4); pixDisplayWriteFormat(pixc, 2, IFF_JFIF_JPEG); pixDestroy(&pixg); /* Increase the dynamic range. */ pixd = pixGammaTRC(NULL, pixc, 1.0, 30, 180); pixDisplayWriteFormat(pixd, 2, IFF_JFIF_JPEG); pixDestroy(&pixc); /* Threshold to 1 bpp. */ pixb = pixThresholdToBinary(pixd, 120); pixDisplayWriteFormat(pixb, 2, IFF_PNG); pixDestroy(&pixd); pixDestroy(&pixb); /* Generate the output image */ pixa = pixaReadFiles("/tmp", "junk_write_display"); pixd = pixaDisplayTiledAndScaled(pixa, 8, 350, 4, 0, 25, 2); pixWrite("/tmp/adapt.jpg", pixd, IFF_JFIF_JPEG); pixDisplayWithTitle(pixd, 100, 100, NULL, 1); pixDestroy(&pixd); pixDestroy(&pixs); pixDestroy(&pixsg); return 0; }
l_int32 DoPageSegmentation(PIX *pixs, /* should be at least 300 ppi */ l_int32 which) /* 1, 2, 3, 4 */ { char buf[256]; l_int32 zero; BOXA *boxatm, *boxahm; PIX *pixr; /* image reduced to 150 ppi */ PIX *pixhs; /* image of halftone seed, 150 ppi */ PIX *pixm; /* image of mask of components, 150 ppi */ PIX *pixhm1; /* image of halftone mask, 150 ppi */ PIX *pixhm2; /* image of halftone mask, 300 ppi */ PIX *pixht; /* image of halftone components, 150 ppi */ PIX *pixnht; /* image without halftone components, 150 ppi */ PIX *pixi; /* inverted image, 150 ppi */ PIX *pixvws; /* image of vertical whitespace, 150 ppi */ PIX *pixtm1; /* image of closed textlines, 150 ppi */ PIX *pixtm2; /* image of refined text line mask, 150 ppi */ PIX *pixtm3; /* image of refined text line mask, 300 ppi */ PIX *pixtb1; /* image of text block mask, 150 ppi */ PIX *pixtb2; /* image of text block mask, 300 ppi */ PIX *pixnon; /* image of non-text or halftone, 150 ppi */ PIX *pixt1, *pixt2, *pixt3; PIXA *pixa; PIXCMAP *cmap; PTAA *ptaa; l_int32 ht_flag = 0; l_int32 ws_flag = 0; l_int32 text_flag = 0; l_int32 block_flag = 0; PROCNAME("DoPageSegmentation"); if (which == 1) ht_flag = 1; else if (which == 2) ws_flag = 1; else if (which == 3) text_flag = 1; else if (which == 4) block_flag = 1; else return ERROR_INT("invalid parameter: not in [1...4]", procName, 1); pixDisplayWrite(NULL, -1); /* Reduce to 150 ppi */ pixt1 = pixScaleToGray2(pixs); pixDisplayWriteFormat(pixt1, L_MAX(ws_flag, L_MAX(ht_flag, block_flag)), IFF_PNG); if (which == 1) pixWrite("/tmp/orig.gray.150.png", pixt1, IFF_PNG); pixDestroy(&pixt1); pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); /* Get seed for halftone parts */ pixt1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0); pixt2 = pixOpenBrick(NULL, pixt1, 5, 5); pixhs = pixExpandBinaryPower2(pixt2, 8); pixDisplayWriteFormat(pixhs, ht_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/htseed.150.png", pixhs, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); /* Get mask for connected regions */ pixm = pixCloseSafeBrick(NULL, pixr, 4, 4); pixDisplayWriteFormat(pixm, ht_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/ccmask.150.png", pixm, IFF_PNG); /* Fill seed into mask to get halftone mask */ pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4); pixDisplayWriteFormat(pixhm1, ht_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/htmask.150.png", pixhm1, IFF_PNG); pixhm2 = pixExpandBinaryPower2(pixhm1, 2); /* Extract halftone stuff */ pixht = pixAnd(NULL, pixhm1, pixr); if (which == 1) pixWrite("/tmp/ht.150.png", pixht, IFF_PNG); /* Extract non-halftone stuff */ pixnht = pixXor(NULL, pixht, pixr); pixDisplayWriteFormat(pixnht, text_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/text.150.png", pixnht, IFF_PNG); pixZero(pixht, &zero); if (zero) fprintf(stderr, "No halftone parts found\n"); else fprintf(stderr, "Halftone parts found\n"); /* Get bit-inverted image */ pixi = pixInvert(NULL, pixnht); if (which == 1) pixWrite("/tmp/invert.150.png", pixi, IFF_PNG); pixDisplayWriteFormat(pixi, ws_flag, IFF_PNG); /* The whitespace mask will break textlines where there * is a large amount of white space below or above. * We can prevent this by identifying regions of the * inverted image that have large horizontal (bigger than * the separation between columns) and significant * vertical extent (bigger than the separation between * textlines), and subtracting this from the whitespace mask. */ pixt1 = pixMorphCompSequence(pixi, "o80.60", 0); pixt2 = pixSubtract(NULL, pixi, pixt1); pixDisplayWriteFormat(pixt2, ws_flag, IFF_PNG); pixDestroy(&pixt1); /* Identify vertical whitespace by opening inverted image */ pixt3 = pixOpenBrick(NULL, pixt2, 5, 1); /* removes thin vertical lines */ pixvws = pixOpenBrick(NULL, pixt3, 1, 200); /* gets long vertical lines */ pixDisplayWriteFormat(pixvws, L_MAX(text_flag, ws_flag), IFF_PNG); if (which == 1) pixWrite("/tmp/vertws.150.png", pixvws, IFF_PNG); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Get proto (early processed) text line mask. */ /* First close the characters and words in the textlines */ pixtm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1); pixDisplayWriteFormat(pixtm1, text_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/textmask1.150.png", pixtm1, IFF_PNG); /* Next open back up the vertical whitespace corridors */ pixtm2 = pixSubtract(NULL, pixtm1, pixvws); if (which == 1) pixWrite("/tmp/textmask2.150.png", pixtm2, IFF_PNG); /* Do a small opening to remove noise */ pixOpenBrick(pixtm2, pixtm2, 3, 3); pixDisplayWriteFormat(pixtm2, text_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/textmask3.150.png", pixtm2, IFF_PNG); pixtm3 = pixExpandBinaryPower2(pixtm2, 2); /* Join pixels vertically to make text block mask */ pixtb1 = pixMorphSequence(pixtm2, "c1.10 + o4.1", 0); pixDisplayWriteFormat(pixtb1, block_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/textblock1.150.png", pixtb1, IFF_PNG); /* Solidify the textblock mask and remove noise: * (1) For each c.c., close the blocks and dilate slightly * to form a solid mask. * (2) Small horizontal closing between components * (3) Open the white space between columns, again * (4) Remove small components */ pixt1 = pixMorphSequenceByComponent(pixtb1, "c30.30 + d3.3", 8, 0, 0, NULL); pixCloseSafeBrick(pixt1, pixt1, 10, 1); pixDisplayWriteFormat(pixt1, block_flag, IFF_PNG); pixt2 = pixSubtract(NULL, pixt1, pixvws); pixt3 = pixSelectBySize(pixt2, 25, 5, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); pixDisplayWriteFormat(pixt3, block_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/textblock2.150.png", pixt3, IFF_PNG); pixtb2 = pixExpandBinaryPower2(pixt3, 2); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Identify the outlines of each textblock */ ptaa = pixGetOuterBordersPtaa(pixtb2); pixt1 = pixRenderRandomCmapPtaa(pixtb2, ptaa, 1, 8, 1); cmap = pixGetColormap(pixt1); pixcmapResetColor(cmap, 0, 130, 130, 130); /* set interior to gray */ if (which == 1) pixWrite("/tmp/textblock3.300.png", pixt1, IFF_PNG); pixDisplayWithTitle(pixt1, 480, 360, "textblock mask with outlines", DFLAG); ptaaDestroy(&ptaa); pixDestroy(&pixt1); /* Fill line mask (as seed) into the original */ pixt1 = pixSeedfillBinary(NULL, pixtm3, pixs, 8); pixOr(pixtm3, pixtm3, pixt1); pixDestroy(&pixt1); if (which == 1) pixWrite("/tmp/textmask.300.png", pixtm3, IFF_PNG); pixDisplayWithTitle(pixtm3, 480, 360, "textline mask 4", DFLAG); /* Fill halftone mask (as seed) into the original */ pixt1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8); pixOr(pixhm2, pixhm2, pixt1); pixDestroy(&pixt1); if (which == 1) pixWrite("/tmp/htmask.300.png", pixhm2, IFF_PNG); pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG); /* Find objects that are neither text nor halftones */ pixt1 = pixSubtract(NULL, pixs, pixtm3); /* remove text pixels */ pixnon = pixSubtract(NULL, pixt1, pixhm2); /* remove halftone pixels */ if (which == 1) pixWrite("/tmp/other.300.png", pixnon, IFF_PNG); pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG); pixDestroy(&pixt1); /* Write out b.b. for text line mask and halftone mask components */ boxatm = pixConnComp(pixtm3, NULL, 4); boxahm = pixConnComp(pixhm2, NULL, 8); if (which == 1) boxaWrite("/tmp/textmask.boxa", boxatm); if (which == 1) boxaWrite("/tmp/htmask.boxa", boxahm); pixa = pixaReadFiles("/tmp/display", "file"); pixt1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2); snprintf(buf, sizeof(buf), "/tmp/segout.%d.png", which); pixWrite(buf, pixt1, IFF_PNG); pixDestroy(&pixt1); pixaDestroy(&pixa); /* clean up to test with valgrind */ pixDestroy(&pixr); pixDestroy(&pixhs); pixDestroy(&pixm); pixDestroy(&pixhm1); pixDestroy(&pixhm2); pixDestroy(&pixht); pixDestroy(&pixnht); pixDestroy(&pixi); pixDestroy(&pixvws); pixDestroy(&pixtm1); pixDestroy(&pixtm2); pixDestroy(&pixtm3); pixDestroy(&pixtb1); pixDestroy(&pixtb2); pixDestroy(&pixnon); boxaDestroy(&boxatm); boxaDestroy(&boxahm); return 0; }
/*! * pixGetRegionsBinary() * * Input: pixs (1 bpp, assumed to be 300 to 400 ppi) * &pixhm (<optional return> halftone mask) * &pixtm (<optional return> textline mask) * &pixtb (<optional return> textblock mask) * debug (flag: set to 1 for debug output) * Return: 0 if OK, 1 on error * * Notes: * (1) It is best to deskew the image before segmenting. * (2) The debug flag enables a number of outputs. These * are included to show how to generate and save/display * these results. */ l_int32 pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, l_int32 debug) { char *tempname; l_int32 htfound, tlfound; PIX *pixr, *pixt1, *pixt2; PIX *pixtext; /* text pixels only */ PIX *pixhm2; /* halftone mask; 2x reduction */ PIX *pixhm; /* halftone mask; */ PIX *pixtm2; /* textline mask; 2x reduction */ PIX *pixtm; /* textline mask */ PIX *pixvws; /* vertical white space mask */ PIX *pixtb2; /* textblock mask; 2x reduction */ PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ PIX *pixtb; /* textblock mask */ PROCNAME("pixGetRegionsBinary"); if (ppixhm) *ppixhm = NULL; if (ppixtm) *ppixtm = NULL; if (ppixtb) *ppixtb = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); /* 2x reduce, to 150 -200 ppi */ pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); pixDisplayWrite(pixr, debug); /* Get the halftone mask */ pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug); /* Get the textline mask from the text pixels */ pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug); /* Get the textblock mask from the textline mask */ pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); pixDestroy(&pixr); pixDestroy(&pixtext); pixDestroy(&pixvws); /* Remove small components from the mask, where a small * component is defined as one with both width and height < 60 */ pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL); pixDestroy(&pixtb2); pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG); /* Expand all masks to full resolution, and do filling or * small dilations for better coverage. */ pixhm = pixExpandReplicate(pixhm2, 2); pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); pixOr(pixhm, pixhm, pixt1); pixDestroy(&pixt1); pixDisplayWriteFormat(pixhm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtm2, 2); pixtm = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtbf2, 2); pixtb = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtb, debug, IFF_PNG); pixDestroy(&pixhm2); pixDestroy(&pixtm2); pixDestroy(&pixtbf2); /* Debug: identify objects that are neither text nor halftone image */ if (debug) { pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */ pixDisplayWriteFormat(pixt2, 1, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); } /* Debug: display textline components with random colors */ if (debug) { l_int32 w, h; BOXA *boxa; PIXA *pixa; boxa = pixConnComp(pixtm, &pixa, 8); pixGetDimensions(pixtm, &w, &h, NULL); pixt1 = pixaDisplayRandomCmap(pixa, w, h); pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255); pixDisplay(pixt1, 100, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixaDestroy(&pixa); boxaDestroy(&boxa); pixDestroy(&pixt1); } /* Debug: identify the outlines of each textblock */ if (debug) { PIXCMAP *cmap; PTAA *ptaa; ptaa = pixGetOuterBordersPtaa(pixtb); tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0, 0); ptaaWrite(tempname, ptaa, 1); FREE(tempname); pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); cmap = pixGetColormap(pixt1); pixcmapResetColor(cmap, 0, 130, 130, 130); pixDisplay(pixt1, 500, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixDestroy(&pixt1); ptaaDestroy(&ptaa); } /* Debug: get b.b. for all mask components */ if (debug) { BOXA *bahm, *batm, *batb; bahm = pixConnComp(pixhm, NULL, 4); batm = pixConnComp(pixtm, NULL, 4); batb = pixConnComp(pixtb, NULL, 4); tempname = genTempFilename("/tmp", "htmask.boxa", 0, 0); boxaWrite(tempname, bahm); FREE(tempname); tempname = genTempFilename("/tmp", "textmask.boxa", 0, 0); boxaWrite(tempname, batm); FREE(tempname); tempname = genTempFilename("/tmp", "textblock.boxa", 0, 0); boxaWrite(tempname, batb); FREE(tempname); boxaDestroy(&bahm); boxaDestroy(&batm); boxaDestroy(&batb); } if (ppixhm) *ppixhm = pixhm; else pixDestroy(&pixhm); if (ppixtm) *ppixtm = pixtm; else pixDestroy(&pixtm); if (ppixtb) *ppixtb = pixtb; else pixDestroy(&pixtb); return 0; }
/*! * pixGenTextlineMask() * * Input: pixs (1 bpp, assumed to be 150 to 200 ppi) * &pixvws (<return> vertical whitespace mask) * &tlfound (<optional return> 1 if the mask is not empty) * debug (flag: 1 for debug output) * Return: pixd (textline mask), or null on error * * Notes: * (1) The input pixs should be deskewed. * (2) pixs should have no halftone pixels. * (3) Both the input image and the returned textline mask * are at the same resolution. */ PIX * pixGenTextlineMask(PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, l_int32 debug) { l_int32 empty; PIX *pixt1, *pixt2, *pixvws, *pixd; PROCNAME("pixGenTextlineMask"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (!ppixvws) return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL); if (pixGetDepth(pixs) != 1) return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); /* First we need a vertical whitespace mask. Invert the image. */ pixt1 = pixInvert(NULL, pixs); /* The whitespace mask will break textlines where there * is a large amount of white space below or above. * This can be prevented by identifying regions of the * inverted image that have large horizontal extent (bigger than * the separation between columns) and significant * vertical extent (bigger than the separation between * textlines), and subtracting this from the bg. */ pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0); pixSubtract(pixt1, pixt1, pixt2); pixDisplayWriteFormat(pixt1, debug, IFF_PNG); pixDestroy(&pixt2); /* Identify vertical whitespace by opening the remaining bg. * o5.1 removes thin vertical bg lines and o1.200 extracts * long vertical bg lines. */ pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0); *ppixvws = pixvws; pixDisplayWriteFormat(pixvws, debug, IFF_PNG); pixDestroy(&pixt1); /* Three steps to getting text line mask: * (1) close the characters and words in the textlines * (2) open the vertical whitespace corridors back up * (3) small opening to remove noise */ pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1); pixDisplayWrite(pixt1, debug); pixd = pixSubtract(NULL, pixt1, pixvws); pixOpenBrick(pixd, pixd, 3, 3); pixDisplayWriteFormat(pixd, debug, IFF_PNG); pixDestroy(&pixt1); /* Check if text line mask is empty */ if (ptlfound) { *ptlfound = 0; pixZero(pixd, &empty); if (!empty) *ptlfound = 1; } return pixd; }