/** * Auto page segmentation. Divide the page image into blocks of uniform * text linespacing and images. * * Resolution (in ppi) is derived from the input image. * * The output goes in the blocks list with corresponding TO_BLOCKs in the * to_blocks list. * * If single_column is true, then no attempt is made to divide the image * into columns, but multiple blocks are still made if the text is of * non-uniform linespacing. * * If osd (orientation and script detection) is true then that is performed * as well. If only_osd is true, then only orientation and script detection is * performed. If osd is desired, (osd or only_osd) then osr_tess must be * another Tesseract that was initialized especially for osd, and the results * will be output into osr (orientation and script result). */ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks, Tesseract* osd_tess, OSResults* osr) { if (textord_debug_images) { WriteDebugBackgroundImage(textord_debug_printable, pix_binary_); } Pix* photomask_pix = NULL; Pix* musicmask_pix = NULL; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; TO_BLOCK_LIST temp_blocks; bool single_column = !PSM_COL_FIND_ENABLED(pageseg_mode); bool osd_enabled = PSM_OSD_ENABLED(pageseg_mode); bool osd_only = pageseg_mode == PSM_OSD_ONLY; ColumnFinder* finder = SetupPageSegAndDetectOrientation( single_column, osd_enabled, osd_only, blocks, osd_tess, osr, &temp_blocks, &photomask_pix, &musicmask_pix); int result = 0; if (finder != NULL) { TO_BLOCK_IT to_block_it(&temp_blocks); TO_BLOCK* to_block = to_block_it.data(); if (musicmask_pix != NULL) { // TODO(rays) pass the musicmask_pix into FindBlocks and mark music // blocks separately. For now combine with photomask_pix. pixOr(photomask_pix, photomask_pix, musicmask_pix); } if (equ_detect_) { finder->SetEquationDetect(equ_detect_); } result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix, pix_thresholds_, pix_grey_, &found_blocks, to_blocks); if (result >= 0) finder->GetDeskewVectors(&deskew_, &reskew_); delete finder; } pixDestroy(&photomask_pix); pixDestroy(&musicmask_pix); if (result < 0) return result; blocks->clear(); BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. block_it.add_list_after(&found_blocks); if (textord_debug_images) { // The debug image is no longer needed so delete it. unlink(AlignedBlob::textord_debug_pix().string()); } return result; }
/** * Auto page segmentation. Divide the page image into blocks of uniform * text linespacing and images. * * Resolution (in ppi) is derived from the input image. * * The output goes in the blocks list with corresponding TO_BLOCKs in the * to_blocks list. * * If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide * the image into columns, but multiple blocks are still made if the text is * of non-uniform linespacing. * * If diacritic_blobs is non-null, then diacritics/noise blobs, that would * confuse layout anaylsis by causing textline overlap, are placed there, * with the expectation that they will be reassigned to words later and * noise/diacriticness determined via classification. * * If osd (orientation and script detection) is true then that is performed * as well. If only_osd is true, then only orientation and script detection is * performed. If osd is desired, (osd or only_osd) then osr_tess must be * another Tesseract that was initialized especially for osd, and the results * will be output into osr (orientation and script result). */ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess, OSResults* osr) { Pix* photomask_pix = NULL; Pix* musicmask_pix = NULL; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; TO_BLOCK_LIST temp_blocks; ColumnFinder* finder = SetupPageSegAndDetectOrientation( pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix, &musicmask_pix); int result = 0; if (finder != NULL) { TO_BLOCK_IT to_block_it(&temp_blocks); TO_BLOCK* to_block = to_block_it.data(); if (musicmask_pix != NULL) { // TODO(rays) pass the musicmask_pix into FindBlocks and mark music // blocks separately. For now combine with photomask_pix. pixOr(photomask_pix, photomask_pix, musicmask_pix); } if (equ_detect_) { finder->SetEquationDetect(equ_detect_); } result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_, &found_blocks, diacritic_blobs, to_blocks); if (result >= 0) finder->GetDeskewVectors(&deskew_, &reskew_); delete finder; } pixDestroy(&photomask_pix); pixDestroy(&musicmask_pix); if (result < 0) return result; blocks->clear(); BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. block_it.add_list_after(&found_blocks); return result; }
l_int32 main(int argc, char **argv) { l_int32 bx, by, bw, bh; l_uint32 pixval; BOX *box1, *box2; BOXA *boxa; PIX *pixs, *pixm, *pixd; PIX *pix0, *pix1, *pix2, *pix3, *pix4, *pix5, *pix6; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* Find a mask for repainting pixels */ pixs = pixRead("amoris.2.150.jpg"); pix1 = MakeReplacementMask(pixs); boxa = pixConnCompBB(pix1, 8); box1 = boxaGetBox(boxa, 0, L_COPY); boxaDestroy(&boxa); /*--------------------------------------------------------* * Show the individual steps * *--------------------------------------------------------*/ /* Locate a good tile to use */ pixFindRepCloseTile(pixs, box1, L_VERT, 20, 30, 7, &box2, 1); pix0 = pixCopy(NULL, pix1); pixRenderBox(pix0, box2, 2, L_SET_PIXELS); /* Make a patch using this tile */ boxGetGeometry(box1, &bx, &by, &bw, &bh); pix2 = pixClipRectangle(pixs, box2, NULL); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 0 */ pixDisplayWithTitle(pix2, 400, 100, NULL, rp->display); pix3 = pixMirroredTiling(pix2, bw, bh); regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 1 */ pixDisplayWithTitle(pix3, 1000, 0, NULL, rp->display); /* Paint the patch through the mask */ pixd = pixCopy(NULL, pixs); pixm = pixClipRectangle(pix1, box1, NULL); pixCombineMaskedGeneral(pixd, pix3, pixm, bx, by); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 2 */ pixDisplayWithTitle(pixd, 0, 0, NULL, rp->display); boxDestroy(&box2); pixDestroy(&pixm); pixDestroy(&pixd); pixDestroy(&pix2); /* Blend two patches and then overlay. Use the previous * tile found vertically and a new one found horizontally. */ pixFindRepCloseTile(pixs, box1, L_HORIZ, 20, 30, 7, &box2, 1); pixRenderBox(pix0, box2, 2, L_SET_PIXELS); regTestWritePixAndCheck(rp, pix0, IFF_TIFF_G4); /* 3 */ pixDisplayWithTitle(pix0, 100, 100, NULL, rp->display); pix2 = pixClipRectangle(pixs, box2, NULL); pix4 = pixMirroredTiling(pix2, bw, bh); regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 4 */ pixDisplayWithTitle(pix4, 1100, 0, NULL, rp->display); pix5 = pixBlend(pix3, pix4, 0, 0, 0.5); regTestWritePixAndCheck(rp, pix5, IFF_PNG); /* 5 */ pixDisplayWithTitle(pix5, 1200, 0, NULL, rp->display); pix6 = pixClipRectangle(pix1, box1, NULL); pixd = pixCopy(NULL, pixs); pixCombineMaskedGeneral(pixd, pix5, pix6, bx, by); regTestWritePixAndCheck(rp, pixd, IFF_PNG); /* 6 */ pixDisplayWithTitle(pixd, 700, 200, NULL, rp->display); boxDestroy(&box2); pixDestroy(&pixd); pixDestroy(&pix0); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); pixDestroy(&pix6); /*--------------------------------------------------------* * Show painting from a color near region * *--------------------------------------------------------*/ pix2 = pixCopy(NULL, pixs); pixGetColorNearMaskBoundary(pix2, pix1, box1, 20, &pixval, 0); pix3 = pixClipRectangle(pix1, box1, NULL); boxGetGeometry(box1, &bx, &by, NULL, NULL); pixSetMaskedGeneral(pix2, pix3, pixval, bx, by); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 7 */ pixDisplayWithTitle(pix2, 0, 0, NULL, rp->display); boxDestroy(&box1); pixDestroy(&pix2); pixDestroy(&pix3); /*--------------------------------------------------------* * Use the higher-level function * *--------------------------------------------------------*/ /* Use various tile selections and tile blending with one component */ pix2 = pixCopy(NULL, pixs); pix3 = pixCopy(NULL, pixs); pix4 = pixCopy(NULL, pixs); pixPaintSelfThroughMask(pix2, pix1, 0, 0, L_HORIZ, 30, 50, 5, 10); pixPaintSelfThroughMask(pix3, pix1, 0, 0, L_VERT, 30, 50, 5, 0); pixPaintSelfThroughMask(pixs, pix1, 0, 0, L_BOTH_DIRECTIONS, 30, 50, 5, 20); regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 8 */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 9 */ regTestWritePixAndCheck(rp, pixs, IFF_PNG); /* 10 */ pixDisplayWithTitle(pix2, 300, 0, NULL, rp->display); pixDisplayWithTitle(pix3, 500, 0, NULL, rp->display); pixDisplayWithTitle(pixs, 700, 0, NULL, rp->display); /* Test with two components; */ pix5 = pixFlipLR(NULL, pix1); pixOr(pix5, pix5, pix1); pixPaintSelfThroughMask(pix4, pix5, 0, 0, L_BOTH_DIRECTIONS, 50, 100, 5, 9); regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 11 */ pixDisplayWithTitle(pix4, 900, 0, NULL, rp->display); pixDestroy(&pixs); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); return regTestCleanup(rp); }
l_int32 DoPageSegmentation(PIX *pixs, /* should be at least 300 ppi */ l_int32 which) /* 1, 2, 3, 4 */ { char buf[256]; l_int32 zero; BOXA *boxatm, *boxahm; PIX *pixr; /* image reduced to 150 ppi */ PIX *pixhs; /* image of halftone seed, 150 ppi */ PIX *pixm; /* image of mask of components, 150 ppi */ PIX *pixhm1; /* image of halftone mask, 150 ppi */ PIX *pixhm2; /* image of halftone mask, 300 ppi */ PIX *pixht; /* image of halftone components, 150 ppi */ PIX *pixnht; /* image without halftone components, 150 ppi */ PIX *pixi; /* inverted image, 150 ppi */ PIX *pixvws; /* image of vertical whitespace, 150 ppi */ PIX *pixm1; /* image of closed textlines, 150 ppi */ PIX *pixm2; /* image of refined text line mask, 150 ppi */ PIX *pixm3; /* image of refined text line mask, 300 ppi */ PIX *pixb1; /* image of text block mask, 150 ppi */ PIX *pixb2; /* image of text block mask, 300 ppi */ PIX *pixnon; /* image of non-text or halftone, 150 ppi */ PIX *pix1, *pix2, *pix3, *pix4; PIXA *pixa; PIXCMAP *cmap; PTAA *ptaa; l_int32 ht_flag = 0; l_int32 ws_flag = 0; l_int32 text_flag = 0; l_int32 block_flag = 0; PROCNAME("DoPageSegmentation"); if (which == 1) ht_flag = 1; else if (which == 2) ws_flag = 1; else if (which == 3) text_flag = 1; else if (which == 4) block_flag = 1; else return ERROR_INT("invalid parameter: not in [1...4]", procName, 1); pixa = pixaCreate(0); lept_mkdir("lept/livre"); /* Reduce to 150 ppi */ pix1 = pixScaleToGray2(pixs); if (ws_flag || ht_flag || block_flag) pixaAddPix(pixa, pix1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/orig.gray.150.png", pix1, IFF_PNG); pixDestroy(&pix1); pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); /* Get seed for halftone parts */ pix1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0); pix2 = pixOpenBrick(NULL, pix1, 5, 5); pixhs = pixExpandBinaryPower2(pix2, 8); if (ht_flag) pixaAddPix(pixa, pixhs, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/htseed.150.png", pixhs, IFF_PNG); pixDestroy(&pix1); pixDestroy(&pix2); /* Get mask for connected regions */ pixm = pixCloseSafeBrick(NULL, pixr, 4, 4); if (ht_flag) pixaAddPix(pixa, pixm, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/ccmask.150.png", pixm, IFF_PNG); /* Fill seed into mask to get halftone mask */ pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4); if (ht_flag) pixaAddPix(pixa, pixhm1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/htmask.150.png", pixhm1, IFF_PNG); pixhm2 = pixExpandBinaryPower2(pixhm1, 2); /* Extract halftone stuff */ pixht = pixAnd(NULL, pixhm1, pixr); if (which == 1) pixWrite("/tmp/lept/livre/ht.150.png", pixht, IFF_PNG); /* Extract non-halftone stuff */ pixnht = pixXor(NULL, pixht, pixr); if (text_flag) pixaAddPix(pixa, pixnht, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/text.150.png", pixnht, IFF_PNG); pixZero(pixht, &zero); if (zero) fprintf(stderr, "No halftone parts found\n"); else fprintf(stderr, "Halftone parts found\n"); /* Get bit-inverted image */ pixi = pixInvert(NULL, pixnht); if (ws_flag) pixaAddPix(pixa, pixi, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/invert.150.png", pixi, IFF_PNG); /* The whitespace mask will break textlines where there * is a large amount of white space below or above. * We can prevent this by identifying regions of the * inverted image that have large horizontal (bigger than * the separation between columns) and significant * vertical extent (bigger than the separation between * textlines), and subtracting this from the whitespace mask. */ pix1 = pixMorphCompSequence(pixi, "o80.60", 0); pix2 = pixSubtract(NULL, pixi, pix1); if (ws_flag) pixaAddPix(pixa, pix2, L_COPY); pixDestroy(&pix1); /* Identify vertical whitespace by opening inverted image */ pix3 = pixOpenBrick(NULL, pix2, 5, 1); /* removes thin vertical lines */ pixvws = pixOpenBrick(NULL, pix3, 1, 200); /* gets long vertical lines */ if (text_flag || ws_flag) pixaAddPix(pixa, pixvws, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/vertws.150.png", pixvws, IFF_PNG); pixDestroy(&pix2); pixDestroy(&pix3); /* Get proto (early processed) text line mask. */ /* First close the characters and words in the textlines */ pixm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1); if (text_flag) pixaAddPix(pixa, pixm1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textmask1.150.png", pixm1, IFF_PNG); /* Next open back up the vertical whitespace corridors */ pixm2 = pixSubtract(NULL, pixm1, pixvws); if (which == 1) pixWrite("/tmp/lept/livre/textmask2.150.png", pixm2, IFF_PNG); /* Do a small opening to remove noise */ pixOpenBrick(pixm2, pixm2, 3, 3); if (text_flag) pixaAddPix(pixa, pixm2, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textmask3.150.png", pixm2, IFF_PNG); pixm3 = pixExpandBinaryPower2(pixm2, 2); /* Join pixels vertically to make text block mask */ pixb1 = pixMorphSequence(pixm2, "c1.10 + o4.1", 0); if (block_flag) pixaAddPix(pixa, pixb1, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textblock1.150.png", pixb1, IFF_PNG); /* Solidify the textblock mask and remove noise: * (1) For each c.c., close the blocks and dilate slightly * to form a solid mask. * (2) Small horizontal closing between components * (3) Open the white space between columns, again * (4) Remove small components */ pix1 = pixMorphSequenceByComponent(pixb1, "c30.30 + d3.3", 8, 0, 0, NULL); pixCloseSafeBrick(pix1, pix1, 10, 1); if (block_flag) pixaAddPix(pixa, pix1, L_COPY); pix2 = pixSubtract(NULL, pix1, pixvws); pix3 = pixSelectBySize(pix2, 25, 5, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); if (block_flag) pixaAddPix(pixa, pix3, L_COPY); if (which == 1) pixWrite("/tmp/lept/livre/textblock2.150.png", pix3, IFF_PNG); pixb2 = pixExpandBinaryPower2(pix3, 2); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Identify the outlines of each textblock */ ptaa = pixGetOuterBordersPtaa(pixb2); pix1 = pixRenderRandomCmapPtaa(pixb2, ptaa, 1, 8, 1); cmap = pixGetColormap(pix1); pixcmapResetColor(cmap, 0, 130, 130, 130); /* set interior to gray */ if (which == 1) pixWrite("/tmp/lept/livre/textblock3.300.png", pix1, IFF_PNG); pixDisplayWithTitle(pix1, 480, 360, "textblock mask with outlines", DFLAG); ptaaDestroy(&ptaa); pixDestroy(&pix1); /* Fill line mask (as seed) into the original */ pix1 = pixSeedfillBinary(NULL, pixm3, pixs, 8); pixOr(pixm3, pixm3, pix1); pixDestroy(&pix1); if (which == 1) pixWrite("/tmp/lept/livre/textmask.300.png", pixm3, IFF_PNG); pixDisplayWithTitle(pixm3, 480, 360, "textline mask 4", DFLAG); /* Fill halftone mask (as seed) into the original */ pix1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8); pixOr(pixhm2, pixhm2, pix1); pixDestroy(&pix1); if (which == 1) pixWrite("/tmp/lept/livre/htmask.300.png", pixhm2, IFF_PNG); pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG); /* Find objects that are neither text nor halftones */ pix1 = pixSubtract(NULL, pixs, pixm3); /* remove text pixels */ pixnon = pixSubtract(NULL, pix1, pixhm2); /* remove halftone pixels */ pixDestroy(&pix1); if (which == 1) pixWrite("/tmp/lept/livre/other.300.png", pixnon, IFF_PNG); pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG); /* Write out b.b. for text line mask and halftone mask components */ boxatm = pixConnComp(pixm3, NULL, 4); boxahm = pixConnComp(pixhm2, NULL, 8); if (which == 1) { boxaWrite("/tmp/lept/livre/textmask.boxa", boxatm); boxaWrite("/tmp/lept/livre/htmask.boxa", boxahm); } pix1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2); pixDisplay(pix1, 0, 375 * (which - 1)); snprintf(buf, sizeof(buf), "/tmp/lept/livre/segout.%d.png", which); pixWrite(buf, pix1, IFF_PNG); pixDestroy(&pix1); pixaDestroy(&pixa); /* clean up to test with valgrind */ pixDestroy(&pixr); pixDestroy(&pixhs); pixDestroy(&pixm); pixDestroy(&pixhm1); pixDestroy(&pixhm2); pixDestroy(&pixht); pixDestroy(&pixi); pixDestroy(&pixnht); pixDestroy(&pixvws); pixDestroy(&pixm1); pixDestroy(&pixm2); pixDestroy(&pixm3); pixDestroy(&pixb1); pixDestroy(&pixb2); pixDestroy(&pixnon); boxaDestroy(&boxatm); boxaDestroy(&boxahm); return 0; }
/*! * pixGetRegionsBinary() * * Input: pixs (1 bpp, assumed to be 300 to 400 ppi) * &pixhm (<optional return> halftone mask) * &pixtm (<optional return> textline mask) * &pixtb (<optional return> textblock mask) * debug (flag: set to 1 for debug output) * Return: 0 if OK, 1 on error * * Notes: * (1) It is best to deskew the image before segmenting. * (2) The debug flag enables a number of outputs. These * are included to show how to generate and save/display * these results. */ l_int32 pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, l_int32 debug) { char *tempname; l_int32 htfound, tlfound; PIX *pixr, *pixt1, *pixt2; PIX *pixtext; /* text pixels only */ PIX *pixhm2; /* halftone mask; 2x reduction */ PIX *pixhm; /* halftone mask; */ PIX *pixtm2; /* textline mask; 2x reduction */ PIX *pixtm; /* textline mask */ PIX *pixvws; /* vertical white space mask */ PIX *pixtb2; /* textblock mask; 2x reduction */ PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ PIX *pixtb; /* textblock mask */ PROCNAME("pixGetRegionsBinary"); if (ppixhm) *ppixhm = NULL; if (ppixtm) *ppixtm = NULL; if (ppixtb) *ppixtb = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); /* 2x reduce, to 150 -200 ppi */ pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); pixDisplayWrite(pixr, debug); /* Get the halftone mask */ pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug); /* Get the textline mask from the text pixels */ pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug); /* Get the textblock mask from the textline mask */ pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); pixDestroy(&pixr); pixDestroy(&pixtext); pixDestroy(&pixvws); /* Remove small components from the mask, where a small * component is defined as one with both width and height < 60 */ pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL); pixDestroy(&pixtb2); pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG); /* Expand all masks to full resolution, and do filling or * small dilations for better coverage. */ pixhm = pixExpandReplicate(pixhm2, 2); pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); pixOr(pixhm, pixhm, pixt1); pixDestroy(&pixt1); pixDisplayWriteFormat(pixhm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtm2, 2); pixtm = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtbf2, 2); pixtb = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtb, debug, IFF_PNG); pixDestroy(&pixhm2); pixDestroy(&pixtm2); pixDestroy(&pixtbf2); /* Debug: identify objects that are neither text nor halftone image */ if (debug) { pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */ pixDisplayWriteFormat(pixt2, 1, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); } /* Debug: display textline components with random colors */ if (debug) { l_int32 w, h; BOXA *boxa; PIXA *pixa; boxa = pixConnComp(pixtm, &pixa, 8); pixGetDimensions(pixtm, &w, &h, NULL); pixt1 = pixaDisplayRandomCmap(pixa, w, h); pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255); pixDisplay(pixt1, 100, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixaDestroy(&pixa); boxaDestroy(&boxa); pixDestroy(&pixt1); } /* Debug: identify the outlines of each textblock */ if (debug) { PIXCMAP *cmap; PTAA *ptaa; ptaa = pixGetOuterBordersPtaa(pixtb); tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0, 0); ptaaWrite(tempname, ptaa, 1); FREE(tempname); pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); cmap = pixGetColormap(pixt1); pixcmapResetColor(cmap, 0, 130, 130, 130); pixDisplay(pixt1, 500, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixDestroy(&pixt1); ptaaDestroy(&ptaa); } /* Debug: get b.b. for all mask components */ if (debug) { BOXA *bahm, *batm, *batb; bahm = pixConnComp(pixhm, NULL, 4); batm = pixConnComp(pixtm, NULL, 4); batb = pixConnComp(pixtb, NULL, 4); tempname = genTempFilename("/tmp", "htmask.boxa", 0, 0); boxaWrite(tempname, bahm); FREE(tempname); tempname = genTempFilename("/tmp", "textmask.boxa", 0, 0); boxaWrite(tempname, batm); FREE(tempname); tempname = genTempFilename("/tmp", "textblock.boxa", 0, 0); boxaWrite(tempname, batb); FREE(tempname); boxaDestroy(&bahm); boxaDestroy(&batm); boxaDestroy(&batb); } if (ppixhm) *ppixhm = pixhm; else pixDestroy(&pixhm); if (ppixtm) *ppixtm = pixtm; else pixDestroy(&pixtm); if (ppixtb) *ppixtb = pixtb; else pixDestroy(&pixtb); return 0; }
int main(int argc, char **argv) { if (argc < 3) return usage(argv[0]); char highlight = 0; char ignore_scrollbars = 1; /* Default output filename; can be overridden by command line. */ const char *output_filename = "highlight.png"; int argi = 1; for (; argi < argc; ++argi) { if (strcmp("--highlight", argv[argi]) == 0) { highlight = 1; } else if (strcmp("--no-ignore-scrollbars", argv[argi]) == 0) { ignore_scrollbars = 0; } else if (strcmp("--output", argv[argi]) == 0) { if (argi + 1 >= argc) { fprintf(stderr, "missing argument to --output\n"); return 1; } output_filename = argv[++argi]; } else { break; } } if (argc - argi < 2) return usage(argv[0]); PIX *a = pixRead(argv[argi]); PIX *b = pixRead(argv[argi + 1]); if (!a) { fprintf(stderr, "Failed to open %s\n", argv[argi]); return 1; } if (!b) { fprintf(stderr, "Failed to open %s\n", argv[argi + 1]); return 1; } if (pixGetWidth(a) != pixGetWidth(b) || pixGetHeight(a) != pixGetHeight(b)) { fprintf(stderr, "Inputs are difference sizes\n"); return 1; } PIX *delta = pixAbsDifference(a, b); pixInvert(delta, delta); if (!highlight) pixDestroy(&a); pixDestroy(&b); PIX *deltagray = pixConvertRGBToGray(delta, 0, 0, 0); pixDestroy(&delta); PIX *deltabinary = pixThresholdToBinary(deltagray, 254); PIX *deltabinaryclipped; const int clipwidth = pixGetWidth(deltabinary) - 15; const int clipheight = pixGetHeight(deltabinary) - 15; if (ignore_scrollbars && clipwidth > 0 && clipheight > 0) { BOX *clip = boxCreate(0, 0, clipwidth, clipheight); deltabinaryclipped = pixClipRectangle(deltabinary, clip, NULL); boxDestroy(&clip); pixDestroy(&deltabinary); } else { deltabinaryclipped = deltabinary; deltabinary = NULL; } PIX *hopened = pixOpenBrick(NULL, deltabinaryclipped, 3, 1); PIX *vopened = pixOpenBrick(NULL, deltabinaryclipped, 1, 3); pixDestroy(&deltabinaryclipped); PIX *opened = pixOr(NULL, hopened, vopened); pixDestroy(&hopened); pixDestroy(&vopened); l_int32 count; pixCountPixels(opened, &count, NULL); fprintf(stderr, "%d\n", count); if (count && highlight) { PIX *d1 = pixDilateBrick(NULL, opened, 7, 7); PIX *d2 = pixDilateBrick(NULL, opened, 3, 3); pixInvert(d2, d2); pixAnd(d1, d1, d2); pixPaintThroughMask(a, d1, 0, 0, 0xff << 24); pixWrite(output_filename, a, IFF_PNG); } return count > 0; }
// Finds the sample for each font, class pair that has least maximum // distance to all the other samples of the same font, class. // OrganizeByFontAndClass must have been already called. void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap &map, bool debug) { ASSERT_HOST(font_class_array_ != NULL); IntFeatureDist f_table; if (debug) tprintf("feature table size %d\n", map.sparse_size()); f_table.Init(&map); int worst_s1 = 0; int worst_s2 = 0; double global_worst_dist = 0.0; // Compute distances independently for each font and char index. int font_size = font_id_map_.CompactSize(); for (int font_index = 0; font_index < font_size; ++font_index) { int font_id = font_id_map_.CompactToSparse(font_index); for (int c = 0; c < unicharset_size_; ++c) { int samples_found = 0; FontClassInfo &fcinfo = (*font_class_array_)(font_index, c); if (fcinfo.samples.size() == 0 || (kTestChar >= 0 && c != kTestChar)) { fcinfo.canonical_sample = -1; fcinfo.canonical_dist = 0.0f; if (debug) tprintf("Skipping class %d\n", c); continue; } // The canonical sample will be the one with the min_max_dist, which // is the sample with the lowest maximum distance to all other samples. double min_max_dist = 2.0; // We keep track of the farthest apart pair (max_s1, max_s2) which // are max_max_dist apart, so we can see how bad the variability is. double max_max_dist = 0.0; int max_s1 = 0; int max_s2 = 0; fcinfo.canonical_sample = fcinfo.samples[0]; fcinfo.canonical_dist = 0.0f; for (int i = 0; i < fcinfo.samples.size(); ++i) { int s1 = fcinfo.samples[i]; const GenericVector <int> &features1 = samples_[s1]->indexed_features(); f_table.Set(features1, features1.size(), true); double max_dist = 0.0; // Run the full squared-order search for similar samples. It is still // reasonably fast because f_table.FeatureDistance is fast, but we // may have to reconsider if we start playing with too many samples // of a single char/font. for (int j = 0; j < fcinfo.samples.size(); ++j) { int s2 = fcinfo.samples[j]; if (samples_[s2]->class_id() != c || samples_[s2]->font_id() != font_id || s2 == s1) continue; GenericVector <int> features2 = samples_[s2]->indexed_features(); double dist = f_table.FeatureDistance(features2); if (dist > max_dist) { max_dist = dist; if (dist > max_max_dist) { max_s1 = s1; max_s2 = s2; } } } // Using Set(..., false) is far faster than re initializing, due to // the sparseness of the feature space. f_table.Set(features1, features1.size(), false); samples_[s1]->set_max_dist(max_dist); ++samples_found; if (max_dist < min_max_dist) { fcinfo.canonical_sample = s1; fcinfo.canonical_dist = max_dist; } UpdateRange(max_dist, &min_max_dist, &max_max_dist); } if (max_max_dist > global_worst_dist) { // Keep a record of the worst pair over all characters/fonts too. global_worst_dist = max_max_dist; worst_s1 = max_s1; worst_s2 = max_s2; } if (debug) { tprintf("Found %d samples of class %d=%s, font %d, " "dist range [%g, %g], worst pair= %s, %s\n", samples_found, c, unicharset_.debug_str(c).string(), font_index, min_max_dist, max_max_dist, SampleToString(*samples_[max_s1]).string(), SampleToString(*samples_[max_s2]).string()); } } } if (debug) { tprintf("Global worst dist = %g, between sample %d and %d\n", global_worst_dist, worst_s1, worst_s2); Pix *pix1 = DebugSample(unicharset_, samples_[worst_s1]); Pix *pix2 = DebugSample(unicharset_, samples_[worst_s2]); pixOr(pix1, pix1, pix2); pixWrite("worstpair.png", pix1, IFF_PNG); pixDestroy(&pix1); pixDestroy(&pix2); } }
int main(int argc, char **argv) { l_int32 w, h, n, i, sum, sumi, empty; BOX *box1, *box2, *box3, *box4; BOXA *boxa, *boxat; NUMA *na1, *na2, *na3, *na4, *na5; NUMA *na2i, *na3i, *na4i, *nat, *naw, *nah; PIX *pixs, *pixc, *pixt, *pixt2, *pixd, *pixcount; PIXA *pixas, *pixad, *pixac; pixDisplayWrite(NULL, -1); /* Draw 4 filled boxes of different sizes */ pixs = pixCreate(200, 200, 1); box1 = boxCreate(10, 10, 20, 30); box2 = boxCreate(50, 10, 40, 20); box3 = boxCreate(110, 10, 35, 5); box4 = boxCreate(160, 10, 5, 15); boxa = boxaCreate(4); boxaAddBox(boxa, box1, L_INSERT); boxaAddBox(boxa, box2, L_INSERT); boxaAddBox(boxa, box3, L_INSERT); boxaAddBox(boxa, box4, L_INSERT); pixRenderBox(pixs, box1, 1, L_SET_PIXELS); pixRenderBox(pixs, box2, 1, L_SET_PIXELS); pixRenderBox(pixs, box3, 1, L_SET_PIXELS); pixRenderBox(pixs, box4, 1, L_SET_PIXELS); pixt = pixFillClosedBorders(pixs, 4); pixDisplayWrite(pixt, 1); pixt2 = pixCreateTemplate(pixs); pixRenderHashBox(pixt2, box1, 6, 4, L_POS_SLOPE_LINE, 1, L_SET_PIXELS); pixRenderHashBox(pixt2, box2, 7, 2, L_POS_SLOPE_LINE, 1, L_SET_PIXELS); pixRenderHashBox(pixt2, box3, 4, 2, L_VERTICAL_LINE, 1, L_SET_PIXELS); pixRenderHashBox(pixt2, box4, 3, 1, L_HORIZONTAL_LINE, 1, L_SET_PIXELS); pixDisplayWrite(pixt2, 1); /* Exercise the parameters */ pixd = pixSelectBySize(pixt, 0, 22, 8, L_SELECT_HEIGHT, L_SELECT_IF_GT, NULL); count_pieces(pixd, 1); pixd = pixSelectBySize(pixt, 0, 30, 8, L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL); count_pieces(pixd, 3); pixd = pixSelectBySize(pixt, 0, 5, 8, L_SELECT_HEIGHT, L_SELECT_IF_GT, NULL); count_pieces(pixd, 3); pixd = pixSelectBySize(pixt, 0, 6, 8, L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL); count_pieces(pixd, 1); pixd = pixSelectBySize(pixt, 20, 0, 8, L_SELECT_WIDTH, L_SELECT_IF_GT, NULL); count_pieces(pixd, 2); pixd = pixSelectBySize(pixt, 31, 0, 8, L_SELECT_WIDTH, L_SELECT_IF_LT, NULL); count_pieces(pixd, 2); pixd = pixSelectBySize(pixt, 21, 10, 8, L_SELECT_IF_EITHER, L_SELECT_IF_LT, NULL); count_pieces(pixd, 3); pixd = pixSelectBySize(pixt, 20, 30, 8, L_SELECT_IF_EITHER, L_SELECT_IF_GT, NULL); count_pieces(pixd, 2); pixd = pixSelectBySize(pixt, 22, 32, 8, L_SELECT_IF_BOTH, L_SELECT_IF_LT, NULL); count_pieces(pixd, 2); pixd = pixSelectBySize(pixt, 6, 32, 8, L_SELECT_IF_BOTH, L_SELECT_IF_LT, NULL); count_pieces(pixd, 1); pixd = pixSelectBySize(pixt, 5, 25, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GT, NULL); count_pieces(pixd, 1); pixd = pixSelectBySize(pixt, 25, 5, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GT, NULL); count_pieces(pixd, 1); pixd = pixSelectByPerimToAreaRatio(pixt, 0.3, 8, L_SELECT_IF_GT, NULL); count_pieces(pixd, 2); pixd = pixSelectByPerimToAreaRatio(pixt, 0.15, 8, L_SELECT_IF_GT, NULL); count_pieces(pixd, 3); pixd = pixSelectByPerimToAreaRatio(pixt, 0.4, 8, L_SELECT_IF_LTE, NULL); count_pieces(pixd, 2); pixd = pixSelectByPerimToAreaRatio(pixt, 0.45, 8, L_SELECT_IF_LT, NULL); count_pieces(pixd, 3); pixd = pixSelectByPerimSizeRatio(pixt2, 2.3, 8, L_SELECT_IF_GT, NULL); count_pieces(pixd, 2); pixd = pixSelectByPerimSizeRatio(pixt2, 1.2, 8, L_SELECT_IF_GT, NULL); count_pieces(pixd, 3); pixd = pixSelectByPerimSizeRatio(pixt2, 1.7, 8, L_SELECT_IF_LTE, NULL); count_pieces(pixd, 1); pixd = pixSelectByPerimSizeRatio(pixt2, 2.9, 8, L_SELECT_IF_LT, NULL); count_pieces(pixd, 3); pixd = pixSelectByAreaFraction(pixt2, 0.3, 8, L_SELECT_IF_LT, NULL); count_pieces(pixd, 0); pixd = pixSelectByAreaFraction(pixt2, 0.9, 8, L_SELECT_IF_LT, NULL); count_pieces(pixd, 4); pixd = pixSelectByAreaFraction(pixt2, 0.5, 8, L_SELECT_IF_GTE, NULL); count_pieces(pixd, 3); pixd = pixSelectByAreaFraction(pixt2, 0.7, 8, L_SELECT_IF_GT, NULL); count_pieces(pixd, 2); boxat = boxaSelectBySize(boxa, 21, 10, L_SELECT_IF_EITHER, L_SELECT_IF_LT, NULL); count_pieces2(boxat, 3); boxat = boxaSelectBySize(boxa, 22, 32, L_SELECT_IF_BOTH, L_SELECT_IF_LT, NULL); count_pieces2(boxat, 2); boxaDestroy(&boxa); pixDestroy(&pixt); pixDestroy(&pixt2); pixDestroy(&pixs); /* Here's the most general method for selecting components. * We do it for area fraction, but any combination of * size, area/perimeter ratio and area fraction can be used. */ pixs = pixRead("feyn.tif"); /* pixs = pixRead("rabi.png"); */ pixc = pixCopy(NULL, pixs); /* subtract bands from this */ pixt = pixCreateTemplate(pixs); /* add bands to this */ pixGetDimensions(pixs, &w, &h, NULL); boxa = pixConnComp(pixs, &pixas, 8); n = boxaGetCount(boxa); fprintf(stderr, "total: %d\n", n); na1 = pixaFindAreaFraction(pixas); nat = numaCreate(0); numaSetCount(nat, n); /* initialize to all 0 */ sum = sumi = 0; pixac = pixaCreate(0); for (i = 0; i < 12; i++) { /* Compute within the intervals using an intersection. */ na2 = numaMakeThresholdIndicator(na1, edges[i], L_SELECT_IF_GTE); if (i != 11) na3 = numaMakeThresholdIndicator(na1, edges[i + 1], L_SELECT_IF_LT); else na3 = numaMakeThresholdIndicator(na1, edges[i + 1], L_SELECT_IF_LTE); na4 = numaLogicalOp(NULL, na2, na3, L_INTERSECTION); sum += count_ones(na4, 0, 0, NULL); /* Compute outside the intervals using a union, and invert */ na2i = numaMakeThresholdIndicator(na1, edges[i], L_SELECT_IF_LT); if (i != 11) na3i = numaMakeThresholdIndicator(na1, edges[i + 1], L_SELECT_IF_GTE); else na3i = numaMakeThresholdIndicator(na1, edges[i + 1], L_SELECT_IF_GT); na4i = numaLogicalOp(NULL, na3i, na2i, L_UNION); numaInvert(na4i, na4i); sumi += count_ones(na4i, 0, 0, NULL); /* Compare the two methods */ if (sum == sumi) fprintf(stderr, "\nCorrect: sum = sumi = %d\n", sum); else fprintf(stderr, "\nWRONG: sum = %d, sumi = %d\n", sum, sumi); /* Reconstruct the image, band by band. */ numaLogicalOp(nat, nat, na4, L_UNION); pixad = pixaSelectWithIndicator(pixas, na4, NULL); pixd = pixaDisplay(pixad, w, h); pixOr(pixt, pixt, pixd); /* add them in */ pixcount = pixCopy(NULL, pixt); /* destroyed by count_pieces */ count_ones(na4, band[i], i, "band"); count_pieces(pixd, band[i]); count_ones(nat, total[i], i, "total"); count_pieces(pixcount, total[i]); pixaDestroy(&pixad); /* Remove band successively from full image */ pixRemoveWithIndicator(pixc, pixas, na4); pixSaveTiled(pixc, pixac, 0.25, 1 - i % 2, 25, 8); numaDestroy(&na2); numaDestroy(&na3); numaDestroy(&na4); numaDestroy(&na2i); numaDestroy(&na3i); numaDestroy(&na4i); } /* Did we remove all components from pixc? */ pixZero(pixc, &empty); if (!empty) fprintf(stderr, "\nWRONG: not all pixels removed from pixc\n"); pixDestroy(&pixs); pixDestroy(&pixc); pixDestroy(&pixt); boxaDestroy(&boxa); pixaDestroy(&pixas); numaDestroy(&na1); numaDestroy(&nat); /* One last extraction. Get all components that have either * a height of at least 50 or a width of between 30 and 35, * and also have a relatively large perimeter/area ratio. */ pixs = pixRead("feyn.tif"); boxa = pixConnComp(pixs, &pixas, 8); n = boxaGetCount(boxa); pixaFindDimensions(pixas, &naw, &nah); na1 = pixaFindPerimToAreaRatio(pixas); na2 = numaMakeThresholdIndicator(nah, 50, L_SELECT_IF_GTE); na3 = numaMakeThresholdIndicator(naw, 30, L_SELECT_IF_GTE); na4 = numaMakeThresholdIndicator(naw, 35, L_SELECT_IF_LTE); na5 = numaMakeThresholdIndicator(na1, 0.4, L_SELECT_IF_GTE); numaLogicalOp(na3, na3, na4, L_INTERSECTION); numaLogicalOp(na2, na2, na3, L_UNION); numaLogicalOp(na2, na2, na5, L_INTERSECTION); numaInvert(na2, na2); /* get components to be removed */ pixRemoveWithIndicator(pixs, pixas, na2); pixSaveTiled(pixs, pixac, 0.25, 1, 25, 8); pixDestroy(&pixs); boxaDestroy(&boxa); pixaDestroy(&pixas); numaDestroy(&naw); numaDestroy(&nah); numaDestroy(&na1); numaDestroy(&na2); numaDestroy(&na3); numaDestroy(&na4); numaDestroy(&na5); pixDisplayMultiple("/tmp/display/file*"); pixd = pixaDisplay(pixac, 0, 0); pixDisplay(pixd, 100, 100); pixWrite("/tmp/comp.jpg", pixd, IFF_JFIF_JPEG); pixDestroy(&pixd); pixaDestroy(&pixac); return 0; }
int main(int argc, char **argv) { char *filein, *fileout; l_int32 i; l_uint32 val; l_float32 size; PIX *pixs, *pixd, *pixm, *pixmi, *pixt1, *pixt2, *pixt3; static char mainName[] = "seedfilltest"; if (argc != 3) return ERROR_INT(" Syntax: seedfilltest filein fileout", mainName, 1); filein = argv[1]; fileout = argv[2]; pixd = NULL; if ((pixm = pixRead(filein)) == NULL) return ERROR_INT("pixm not made", mainName, 1); pixmi = pixInvert(NULL, pixm); size = pixGetWidth(pixm) * pixGetHeight(pixm); pixs = pixCreateTemplate(pixm); for (i = 0; i < 100; i++) { pixGetPixel(pixm, XS + 5 * i, YS + 5 * i, &val); if (val == 0) break; } if (i == 100) return ERROR_INT("no seed pixel found", mainName, 1); pixSetPixel(pixs, XS + 5 * i, YS + 5 * i, 1); #if 0 /* hole filling; use "hole-filler.png" */ pixt1 = pixHDome(pixmi, 100, 4); pixt2 = pixThresholdToBinary(pixt1, 10); /* pixInvert(pixt1, pixt1); */ pixDisplay(pixt1, 100, 500); pixDisplay(pixt2, 600, 500); pixt3 = pixHolesByFilling(pixt2, 4); pixDilateBrick(pixt3, pixt3, 7, 7); pixd = pixConvertTo8(pixt3, FALSE); pixDisplay(pixd, 0, 100); pixSeedfillGray(pixd, pixmi, CONNECTIVITY); pixInvert(pixd, pixd); pixDisplay(pixmi, 500, 100); pixDisplay(pixd, 1000, 100); pixWrite("/tmp/junkpixm.png", pixmi, IFF_PNG); pixWrite("/tmp/junkpixd.png", pixd, IFF_PNG); #endif #if 0 /* hole filling; use "hole-filler.png" */ pixt1 = pixThresholdToBinary(pixm, 110); pixInvert(pixt1, pixt1); pixDisplay(pixt1, 100, 500); pixt2 = pixHolesByFilling(pixt1, 4); pixd = pixConvertTo8(pixt2, FALSE); pixDisplay(pixd, 0, 100); pixSeedfillGray(pixd, pixmi, CONNECTIVITY); pixInvert(pixd, pixd); pixDisplay(pixmi, 500, 100); pixDisplay(pixd, 1000, 100); pixWrite("/tmp/junkpixm.png", pixmi, IFF_PNG); pixWrite("/tmp/junkpixd.png", pixd, IFF_PNG); #endif #if 0 /* hole filling; use "hole-filler.png" */ pixd = pixInvert(NULL, pixm); pixAddConstantGray(pixd, -50); pixDisplay(pixd, 0, 100); /* pixt1 = pixThresholdToBinary(pixd, 20); pixDisplayWithTitle(pixt1, 600, 600, "pixt1", DFLAG); */ pixSeedfillGray(pixd, pixmi, CONNECTIVITY); /* pixInvert(pixd, pixd); */ pixDisplay(pixmi, 500, 100); pixDisplay(pixd, 1000, 100); pixWrite("/tmp/junkpixm.png", pixmi, IFF_PNG); pixWrite("/tmp/junkpixd.png", pixd, IFF_PNG); #endif #if 0 /* test in-place seedfill for speed */ pixd = pixClone(pixs); startTimer(); pixSeedfillBinary(pixs, pixs, pixmi, CONNECTIVITY); fprintf(stderr, "Filling rate: %7.4f Mpix/sec\n", (size/1000000.) / stopTimer()); pixWrite(fileout, pixd, IFF_PNG); pixOr(pixd, pixd, pixm); pixWrite("/tmp/junkout1.png", pixd, IFF_PNG); #endif #if 0 /* test seedfill to dest for speed */ pixd = pixCreateTemplate(pixm); startTimer(); for (i = 0; i < NTIMES; i++) { pixSeedfillBinary(pixd, pixs, pixmi, CONNECTIVITY); } fprintf(stderr, "Filling rate: %7.4f Mpix/sec\n", (size/1000000.) * NTIMES / stopTimer()); pixWrite(fileout, pixd, IFF_PNG); pixOr(pixd, pixd, pixm); pixWrite("/tmp/junkout1.png", pixd, IFF_PNG); #endif /* use same connectivity to compare with the result of the * slow parallel operation */ #if 1 pixDestroy(&pixd); pixd = pixSeedfillMorph(pixs, pixmi, 100, CONNECTIVITY); pixOr(pixd, pixd, pixm); pixWrite("/tmp/junkout2.png", pixd, IFF_PNG); #endif pixDestroy(&pixs); pixDestroy(&pixm); pixDestroy(&pixmi); pixDestroy(&pixd); return 0; }
/*! * pixMirrorDetectDwa() * * Input: pixs (1 bpp, deskewed, English text) * &conf (<return> confidence that text is not LR mirror reversed) * mincount (min number of left + right; use 0 for default) * debug (1 for debug output; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) We assume the text is horizontally oriented, with * ascenders going up. * (2) See notes in pixMirrorDetect(). */ l_int32 pixMirrorDetectDwa(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug) { char flipsel1[] = "flipsel1"; char flipsel2[] = "flipsel2"; l_int32 count1, count2, nmax; l_float32 nleft, nright; PIX *pixt0, *pixt1, *pixt2, *pixt3; PROCNAME("pixMirrorDetectDwa"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; /* Fill x-height characters but not space between them, sort of. */ pixt3 = pixMorphSequenceDwa(pixs, "d1.30", 0); pixXor(pixt3, pixt3, pixs); pixt0 = pixMorphSequenceDwa(pixs, "c15.1", 0); pixXor(pixt0, pixt0, pixs); pixAnd(pixt0, pixt0, pixt3); pixOr(pixt3, pixt0, pixs); pixDestroy(&pixt0); pixt0 = pixAddBorderGeneral(pixt3, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, 0); pixDestroy(&pixt3); /* Filter the right-facing characters. */ pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &count1, NULL); pixDestroy(&pixt1); pixDestroy(&pixt3); /* Filter the left-facing characters. */ pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2); pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0); pixCountPixels(pixt3, &count2, NULL); pixDestroy(&pixt2); pixDestroy(&pixt3); pixDestroy(&pixt0); nright = (l_float32)count1; nleft = (l_float32)count2; nmax = L_MAX(count1, count2); if (nmax > mincount) *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); if (debug) { fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is not mirror reversed\n"); if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is mirror reversed\n"); } return 0; }
// Finds image regions within the source pix (page image) and returns // the image regions as a Boxa, Pixa pair, analgous to pixConnComp. // The returned boxa, pixa may be NULL, meaning no images found. // If not NULL, they must be destroyed by the caller. void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) { *boxa = NULL; *pixa = NULL; #ifdef HAVE_LIBLEPT if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return; // Not worth looking at small images. // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); pixDisplayWrite(pixr, textord_tabfind_show_images); // Get the halftone mask directly from Leptonica. Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL, textord_tabfind_show_images); pixDestroy(&pixr); if (pixht2 == NULL) return; // Expand back up again. Pix *pixht = pixExpandReplicate(pixht2, 2); pixDisplayWrite(pixht, textord_tabfind_show_images); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8); pixOr(pixht, pixht, pixt); pixDestroy(&pixt); // Eliminate lines and bars that may be joined to images. Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); pixDisplayWrite(pixfinemask, textord_tabfind_show_images); Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); pixDestroy(&pixreduced); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); pixDestroy(&pixreduced2); pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images); // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); pixDestroy(&pixfinemask); // Dilate a bit to make sure we get everything. pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); pixDestroy(&pixcoarsemask); pixDisplayWrite(pixmask, textord_tabfind_show_images); // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); pixDestroy(&pixmask); pixDisplayWrite(pixht, textord_tabfind_show_images); // Find the individual image regions in the mask image. *boxa = pixConnComp(pixht, pixa, 8); pixDestroy(&pixht); // Rectangularize the individual images. If a sharp edge in vertical and/or // horizontal occupancy can be found, it indicates a probably rectangular // image with unwanted bits merged on, so clip to the approximate rectangle. int npixes = pixaGetCount(*pixa); for (int i = 0; i < npixes; ++i) { int x_start, x_end, y_start, y_end; Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); pixDisplayWrite(img_pix, textord_tabfind_show_images); if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { // Add 1 to the size as a kludgy flag to indicate to the later stages // of processing that it is a clipped rectangular image . Pix* simple_pix = pixCreate(pixGetWidth(img_pix) + 1, pixGetHeight(img_pix), 1); pixDestroy(&img_pix); pixRasterop(simple_pix, x_start, y_start, x_end - x_start, y_end - y_start, PIX_SET, NULL, 0, 0); // pixaReplacePix takes ownership of the simple_pix. pixaReplacePix(*pixa, i, simple_pix, NULL); img_pix = pixaGetPix(*pixa, i, L_CLONE); } // Subtract the pix from the correct location in the master image. l_int32 x, y, width, height; pixDisplayWrite(img_pix, textord_tabfind_show_images); boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); pixRasterop(pix, x, y, width, height, PIX_NOT(PIX_SRC) & PIX_DST, img_pix, 0, 0); pixDestroy(&img_pix); } #endif }
/*! * pixMirrorDetect() * * Input: pixs (1 bpp, deskewed, English text) * &conf (<return> confidence that text is not LR mirror reversed) * mincount (min number of left + right; use 0 for default) * debug (1 for debug output; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) For this test, it is necessary that the text is horizontally * oriented, with ascenders going up. * (2) conf is the normalized difference between the number of * right and left facing characters with ascenders. * Left-facing are {d}; right-facing are {b, h, k}. * At least that was the expectation. In practice, we can * really just say that it is the normalized difference in * hits using two specific hit-miss filters, textsel1 and textsel2, * after the image has been suitably pre-filtered so that * these filters are effective. See (4) for what's really happening. * (3) A large positive conf value indicates normal text, whereas * a large negative conf value means the page is mirror reversed. * (4) The implementation is a bit tricky. The general idea is * to fill the x-height part of characters, but not the space * between them, before doing the HMT. This is done by * finding pixels added using two different operations -- a * horizontal close and a vertical dilation -- and adding * the intersection of these sets to the original. It turns * out that the original intuition about the signal was largely * in error: much of the signal for right-facing characters * comes from the lower part of common x-height characters, like * the e and c, that remain open after these operations. * So it's important that the operations to close the x-height * parts of the characters are purposely weakened sufficiently * to allow these characters to remain open. The wonders * of morphology! */ l_int32 pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug) { l_int32 count1, count2, nmax; l_float32 nleft, nright; PIX *pixt0, *pixt1, *pixt2, *pixt3; SEL *sel1, *sel2; PROCNAME("pixMirrorDetect"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; sel1 = selCreateFromString(textsel1, 5, 6, NULL); sel2 = selCreateFromString(textsel2, 5, 6, NULL); /* Fill x-height characters but not space between them, sort of. */ pixt3 = pixMorphCompSequence(pixs, "d1.30", 0); pixXor(pixt3, pixt3, pixs); pixt0 = pixMorphCompSequence(pixs, "c15.1", 0); pixXor(pixt0, pixt0, pixs); pixAnd(pixt0, pixt0, pixt3); pixOr(pixt0, pixt0, pixs); pixDestroy(&pixt3); /* pixDisplayWrite(pixt0, 1); */ /* Filter the right-facing characters. */ pixt1 = pixHMT(NULL, pixt0, sel1); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &count1, NULL); pixDebugFlipDetect("junkpixright", pixs, pixt1, debug); pixDestroy(&pixt1); pixDestroy(&pixt3); /* Filter the left-facing characters. */ pixt2 = pixHMT(NULL, pixt0, sel2); pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0); pixCountPixels(pixt3, &count2, NULL); pixDebugFlipDetect("junkpixleft", pixs, pixt2, debug); pixDestroy(&pixt2); pixDestroy(&pixt3); nright = (l_float32)count1; nleft = (l_float32)count2; nmax = L_MAX(count1, count2); pixDestroy(&pixt0); selDestroy(&sel1); selDestroy(&sel2); if (nmax > mincount) *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); if (debug) { fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is not mirror reversed\n"); if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is mirror reversed\n"); } return 0; }
/*! * pixUpDownDetectGeneralDwa() * * Input: pixs (1 bpp, deskewed, English text) * &conf (<return> confidence that text is rightside-up) * mincount (min number of up + down; use 0 for default) * npixels (number of pixels removed from each side of word box) * debug (1 for debug output; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) See the notes in pixUpDownDetectGeneral() for usage. */ l_int32 pixUpDownDetectGeneralDwa(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug) { char flipsel1[] = "flipsel1"; char flipsel2[] = "flipsel2"; char flipsel3[] = "flipsel3"; char flipsel4[] = "flipsel4"; l_int32 countup, countdown, nmax; l_float32 nup, ndown; PIX *pixt, *pixt0, *pixt1, *pixt2, *pixt3, *pixm; PROCNAME("pixUpDownDetectGeneralDwa"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_UP_DOWN_COUNT; if (npixels < 0) npixels = 0; /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). * This closes holes in x-height characters and joins them at * the x-height. There is more noise in the descender detection * from this, but it works fairly well. */ pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0); /* Be sure to add the border before the flip DWA operations! */ pixt0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, 0); pixDestroy(&pixt); /* Optionally, make a mask of the word bounding boxes, shortening * each of them by a fixed amount at each end. */ pixm = NULL; if (npixels > 0) { l_int32 i, nbox, x, y, w, h; BOX *box; BOXA *boxa; pixt1 = pixMorphSequenceDwa(pixt0, "o10.1", 0); boxa = pixConnComp(pixt1, NULL, 8); pixm = pixCreateTemplate(pixt1); pixDestroy(&pixt1); nbox = boxaGetCount(boxa); for (i = 0; i < nbox; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w > 2 * npixels) pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, PIX_SET, NULL, 0, 0); boxDestroy(&box); } boxaDestroy(&boxa); } /* Find the ascenders and optionally filter with pixm. * For an explanation of the procedure used for counting the result * of the HMT, see comments in pixUpDownDetectGeneral(). */ pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1); pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2); pixOr(pixt1, pixt1, pixt2); if (pixm) pixAnd(pixt1, pixt1, pixm); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &countup, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Find the ascenders and optionally filter with pixm. */ pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel3); pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel4); pixOr(pixt1, pixt1, pixt2); if (pixm) pixAnd(pixt1, pixt1, pixm); pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); pixCountPixels(pixt3, &countdown, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Evaluate statistically, generating a confidence that is * related to the probability with a gaussian distribution. */ nup = (l_float32)(countup); ndown = (l_float32)(countdown); nmax = L_MAX(countup, countdown); if (nmax > mincount) *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); if (debug) { if (pixm) pixWrite("junkpixm2", pixm, IFF_PNG); fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", nup, ndown, *pconf); if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is rightside-up\n"); if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is upside-down\n"); } pixDestroy(&pixt0); pixDestroy(&pixm); return 0; }
int main(int argc, char **argv) { PIX *pixs, *pix1, *pix2, *pix3, *pix4; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pixs = pixRead("test1.png"); /* pixInvert */ pix1 = pixInvert(NULL, pixs); pix2 = pixCreateTemplate(pixs); /* into pixd of same size */ pixInvert(pix2, pixs); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 0 */ regTestComparePix(rp, pix1, pix2); /* 1 */ pix3 = pixRead("marge.jpg"); /* into pixd of different size */ pixInvert(pix3, pixs); regTestComparePix(rp, pix1, pix3); /* 2 */ pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pix1 = pixOpenBrick(NULL, pixs, 1, 9); pix2 = pixDilateBrick(NULL, pixs, 1, 9); /* pixOr */ pix3 = pixCreateTemplate(pixs); pixOr(pix3, pixs, pix1); /* existing */ pix4 = pixOr(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 3 */ regTestComparePix(rp, pix3, pix4); /* 4 */ pixCopy(pix4, pix1); pixOr(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 5 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixOr(pix3, pixs, pix2); /* existing */ pix4 = pixOr(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 6 */ regTestComparePix(rp, pix3, pix4); /* 7 */ pixCopy(pix4, pix2); pixOr(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 8 */ pixDestroy(&pix3); pixDestroy(&pix4); /* pixAnd */ pix3 = pixCreateTemplate(pixs); pixAnd(pix3, pixs, pix1); /* existing */ pix4 = pixAnd(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 9 */ regTestComparePix(rp, pix3, pix4); /* 10 */ pixCopy(pix4, pix1); pixAnd(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 11 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixAnd(pix3, pixs, pix2); /* existing */ pix4 = pixAnd(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 12 */ regTestComparePix(rp, pix3, pix4); /* 13 */ pixCopy(pix4, pix2); pixAnd(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 14 */ pixDestroy(&pix3); pixDestroy(&pix4); /* pixXor */ pix3 = pixCreateTemplate(pixs); pixXor(pix3, pixs, pix1); /* existing */ pix4 = pixXor(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 15 */ regTestComparePix(rp, pix3, pix4); /* 16 */ pixCopy(pix4, pix1); pixXor(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 17 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixXor(pix3, pixs, pix2); /* existing */ pix4 = pixXor(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 18 */ regTestComparePix(rp, pix3, pix4); /* 19 */ pixCopy(pix4, pix2); pixXor(pix4, pix4, pixs); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 20 */ pixDestroy(&pix3); pixDestroy(&pix4); /* pixSubtract */ pix3 = pixCreateTemplate(pixs); pixSubtract(pix3, pixs, pix1); /* existing */ pix4 = pixSubtract(NULL, pixs, pix1); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 21 */ regTestComparePix(rp, pix3, pix4); /* 22 */ pixCopy(pix4, pix1); pixSubtract(pix4, pixs, pix4); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 23 */ pixDestroy(&pix3); pixDestroy(&pix4); pix3 = pixCreateTemplate(pixs); pixSubtract(pix3, pixs, pix2); /* existing */ pix4 = pixSubtract(NULL, pixs, pix2); /* new */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 24 */ regTestComparePix(rp, pix3, pix4); /* 25 */ pixCopy(pix4, pix2); pixSubtract(pix4, pixs, pix4); /* in-place */ regTestComparePix(rp, pix3, pix4); /* 26 */ pixDestroy(&pix3); pixDestroy(&pix4); pix4 = pixRead("marge.jpg"); pixSubtract(pix4, pixs, pixs); /* subtract from itself; should be empty */ pix3 = pixCreateTemplate(pixs); regTestComparePix(rp, pix3, pix4); /* 27*/ pixDestroy(&pix3); pixDestroy(&pix4); pixSubtract(pixs, pixs, pixs); /* subtract from itself; should be empty */ pix3 = pixCreateTemplate(pixs); regTestComparePix(rp, pix3, pixs); /* 28*/ pixDestroy(&pix3); pixDestroy(&pixs); pixDestroy(&pix1); pixDestroy(&pix2); return regTestCleanup(rp); }
* that might be used * * -------------------------------------------------------------------- */ #if 0 pixd = pixCreateTemplate(pixs); pixd = pixDilate(NULL, pixs, sel); pixd = pixErode(NULL, pixs, sel); pixd = pixOpen(NULL, pixs, sel); pixd = pixClose(NULL, pixs, sel); pixDilate(pixd, pixs, sel); pixErode(pixd, pixs, sel); pixOpen(pixd, pixs, sel); pixClose(pixd, pixs, sel); pixAnd(pixd, pixd, pixs); pixOr(pixd, pixd, pixs); pixXor(pixd, pixd, pixs); pixSubtract(pixd, pixd, pixs); pixInvert(pixd, pixs); pixd = pixAnd(NULL, pixd, pixs); pixd = pixOr(NULL, pixd, pixs); pixd = pixXor(NULL, pixd, pixs); pixd = pixSubtract(NULL, pixd, pixs); pixd = pixInvert(NULL, pixs); pixInvert(pixs, pixs); #endif /* 0 */
/*! * Note: this method is generally inferior to pixHasColorRegions(); it * is retained as a reference only * * \brief pixFindColorRegionsLight() * * \param[in] pixs 32 bpp rgb * \param[in] pixm [optional] 1 bpp mask image * \param[in] factor subsample factor; integer >= 1 * \param[in] darkthresh threshold to eliminate dark pixels (e.g., text) * from consideration; typ. 70; -1 for default. * \param[in] lightthresh threshold for minimum gray value at 95% rank * near white; typ. 220; -1 for default * \param[in] mindiff minimum difference from 95% rank value, used * to count darker pixels; typ. 50; -1 for default * \param[in] colordiff minimum difference in (max - min) component to * qualify as a color pixel; typ. 40; -1 for default * \param[out] pcolorfract fraction of 'color' pixels found * \param[out] pcolormask1 [optional] mask over background color, if any * \param[out] pcolormask2 [optional] filtered mask over background color * \param[out] pixadb [optional] debug intermediate results * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) This function tries to determine if there is a significant * color or darker region on a scanned page image where part * of the image is very close to "white". It will also allow * extraction of small regions of lightly colored pixels. * If the background is darker (and reddish), use instead * pixHasColorRegions2(). * (2) If %pixm exists, only pixels under fg are considered. Typically, * the inverse of %pixm would have fg pixels over a photograph. * (3) There are four thresholds. * * %darkthresh: ignore pixels darker than this (typ. fg text). * We make a 1 bpp mask of these pixels, and then dilate it to * remove all vestiges of fg from their vicinity. * * %lightthresh: let val95 be the pixel value for which 95% * of the non-masked pixels have a lower value (darker) of * their min component. Then if val95 is darker than * %lightthresh, the image is not considered to have a * light bg, and this returns 0.0 for %colorfract. * * %mindiff: we are interested in the fraction of pixels that * have two conditions. The first is that their min component * is at least %mindiff darker than val95. * * %colordiff: the second condition is that the max-min diff * of the pixel components exceeds %colordiff. * (4) This returns in %pcolorfract the fraction of pixels that have * both a min component that is at least %mindiff below that at the * 95% rank value (where 100% rank is the lightest value), and * a max-min diff that is at least %colordiff. Without the * %colordiff constraint, gray pixels of intermediate value * could get flagged by this function. * (5) No masks are returned unless light color pixels are found. * If colorfract > 0.0 and %pcolormask1 is defined, this returns * a 1 bpp mask with fg pixels over the color background. * This mask may have some holes in it. * (6) If colorfract > 0.0 and %pcolormask2 is defined, this returns * a filtered version of colormask1. The two changes are * (a) small holes have been filled * (b) components near the border have been removed. * The latter insures that dark pixels near the edge of the * image are not included. * (7) To generate a boxa of rectangular regions from the overlap * of components in the filtered mask: * boxa1 = pixConnCompBB(colormask2, 8); * boxa2 = boxaCombineOverlaps(boxa1); * This is done here in debug mode. * </pre> */ static l_int32 pixFindColorRegionsLight(PIX *pixs, PIX *pixm, l_int32 factor, l_int32 darkthresh, l_int32 lightthresh, l_int32 mindiff, l_int32 colordiff, l_float32 *pcolorfract, PIX **pcolormask1, PIX **pcolormask2, PIXA *pixadb) { l_int32 lightbg, w, h, count; l_float32 ratio, val95, rank; BOXA *boxa1, *boxa2; NUMA *nah; PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pixm1, *pixm2, *pixm3; PROCNAME("pixFindColorRegionsLight"); if (pcolormask1) *pcolormask1 = NULL; if (pcolormask2) *pcolormask2 = NULL; if (!pcolorfract) return ERROR_INT("&colorfract not defined", procName, 1); *pcolorfract = 0.0; if (!pixs || pixGetDepth(pixs) != 32) return ERROR_INT("pixs not defined or not 32 bpp", procName, 1); if (factor < 1) factor = 1; if (darkthresh < 0) darkthresh = 70; /* defaults */ if (lightthresh < 0) lightthresh = 220; if (mindiff < 0) mindiff = 50; if (colordiff < 0) colordiff = 40; /* Check if pixm covers most of the image. If so, just return. */ pixGetDimensions(pixs, &w, &h, NULL); if (pixm) { pixCountPixels(pixm, &count, NULL); ratio = (l_float32)count / ((l_float32)(w) * h); if (ratio > 0.7) { if (pixadb) L_INFO("pixm has big fg: %f5.2\n", procName, ratio); return 0; } } /* Make a mask pixm1 over the dark pixels in the image: * convert to gray using the average of the components; * threshold using %darkthresh; do a small dilation; * combine with pixm. */ pix1 = pixConvertRGBToGray(pixs, 0.33, 0.34, 0.33); if (pixadb) pixaAddPix(pixadb, pixs, L_COPY); if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); pixm1 = pixThresholdToBinary(pix1, darkthresh); pixDilateBrick(pixm1, pixm1, 7, 7); if (pixadb) pixaAddPix(pixadb, pixm1, L_COPY); if (pixm) { pixOr(pixm1, pixm1, pixm); if (pixadb) pixaAddPix(pixadb, pixm1, L_COPY); } pixDestroy(&pix1); /* Convert to gray using the minimum component value and * find the gray value at rank 0.95, that represents the light * pixels in the image. If it is too dark, quit. */ pix1 = pixConvertRGBToGrayMinMax(pixs, L_SELECT_MIN); pix2 = pixInvert(NULL, pixm1); /* pixels that are not dark */ pixGetRankValueMasked(pix1, pix2, 0, 0, factor, 0.95, &val95, &nah); pixDestroy(&pix2); if (pixadb) { L_INFO("val at 0.95 rank = %5.1f\n", procName, val95); gplotSimple1(nah, GPLOT_PNG, "/tmp/lept/histo1", "gray histo"); pix3 = pixRead("/tmp/lept/histo1.png"); pix4 = pixExpandReplicate(pix3, 2); pixaAddPix(pixadb, pix4, L_INSERT); pixDestroy(&pix3); } lightbg = (l_int32)val95 >= lightthresh; numaDestroy(&nah); if (!lightbg) { pixDestroy(&pix1); pixDestroy(&pixm1); return 0; } /* Make mask pixm2 over pixels that are darker than val95 - mindiff. */ pixm2 = pixThresholdToBinary(pix1, val95 - mindiff); if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); pixDestroy(&pix1); /* Make a mask pixm3 over pixels that have some color saturation, * with a (max - min) component difference >= %colordiff, * and combine using AND with pixm2. */ pix2 = pixConvertRGBToGrayMinMax(pixs, L_CHOOSE_MAXDIFF); pixm3 = pixThresholdToBinary(pix2, colordiff); pixDestroy(&pix2); pixInvert(pixm3, pixm3); /* need pixels above threshold */ if (pixadb) pixaAddPix(pixadb, pixm3, L_COPY); pixAnd(pixm2, pixm2, pixm3); if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); pixDestroy(&pixm3); /* Subtract the dark pixels represented by pixm1. * pixm2 now holds all the color pixels of interest */ pixSubtract(pixm2, pixm2, pixm1); pixDestroy(&pixm1); if (pixadb) pixaAddPix(pixadb, pixm2, L_COPY); /* But we're not quite finished. Remove pixels from any component * that is touching the image border. False color pixels can * sometimes be found there if the image is much darker near * the border, due to oxidation or reduced illumination. */ pixm3 = pixRemoveBorderConnComps(pixm2, 8); pixDestroy(&pixm2); if (pixadb) pixaAddPix(pixadb, pixm3, L_COPY); /* Get the fraction of light color pixels */ pixCountPixels(pixm3, &count, NULL); *pcolorfract = (l_float32)count / (w * h); if (pixadb) { if (count == 0) L_INFO("no light color pixels found\n", procName); else L_INFO("fraction of light color pixels = %5.3f\n", procName, *pcolorfract); } /* Debug: extract the color pixels from pixs */ if (pixadb && count > 0) { /* Use pixm3 to extract the color pixels */ pix3 = pixCreateTemplate(pixs); pixSetAll(pix3); pixCombineMasked(pix3, pixs, pixm3); pixaAddPix(pixadb, pix3, L_INSERT); /* Use additional filtering to extract the color pixels */ pix3 = pixCloseSafeBrick(NULL, pixm3, 15, 15); pixaAddPix(pixadb, pix3, L_INSERT); pix5 = pixCreateTemplate(pixs); pixSetAll(pix5); pixCombineMasked(pix5, pixs, pix3); pixaAddPix(pixadb, pix5, L_INSERT); /* Get the combined bounding boxes of the mask components * in pix3, and extract those pixels from pixs. */ boxa1 = pixConnCompBB(pix3, 8); boxa2 = boxaCombineOverlaps(boxa1, NULL); pix4 = pixCreateTemplate(pix3); pixMaskBoxa(pix4, pix4, boxa2, L_SET_PIXELS); pixaAddPix(pixadb, pix4, L_INSERT); pix5 = pixCreateTemplate(pixs); pixSetAll(pix5); pixCombineMasked(pix5, pixs, pix4); pixaAddPix(pixadb, pix5, L_INSERT); boxaDestroy(&boxa1); boxaDestroy(&boxa2); pixaAddPix(pixadb, pixs, L_COPY); } /* Optional colormask returns */ if (pcolormask2 && count > 0) *pcolormask2 = pixCloseSafeBrick(NULL, pixm3, 15, 15); if (pcolormask1 && count > 0) *pcolormask1 = pixm3; else pixDestroy(&pixm3); return 0; }
/*! * pixThinGeneral() * * Input: pixs (1 bpp) * type (L_THIN_FG, L_THIN_BG) * sela (of Sels for parallel composite HMTs) * maxiters (max number of iters allowed; use 0 to iterate * until completion) * Return: pixd, or null on error * * Notes: * (1) See notes in pixThin(). That function chooses among * the best of the Sels for thinning. * (2) This is a general function that takes a Sela of HMTs * that are used in parallel for thinning from each * of four directions. One iteration consists of four * such parallel thins. */ PIX * pixThinGeneral(PIX *pixs, l_int32 type, SELA *sela, l_int32 maxiters) { l_int32 i, j, r, nsels, same; PIXA *pixahmt; PIX **pixhmt; /* array owned by pixahmt; do not destroy! */ PIX *pixd, *pixt; SEL *sel, *selr; PROCNAME("pixThinGeneral"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (pixGetDepth(pixs) != 1) return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); if (type != L_THIN_FG && type != L_THIN_BG) return (PIX *)ERROR_PTR("invalid fg/bg type", procName, NULL); if (!sela) return (PIX *)ERROR_PTR("sela not defined", procName, NULL); if (maxiters == 0) maxiters = 10000; /* Set up array of temp pix to hold hmts */ nsels = selaGetCount(sela); pixahmt = pixaCreate(nsels); for (i = 0; i < nsels; i++) { pixt = pixCreateTemplate(pixs); pixaAddPix(pixahmt, pixt, L_INSERT); } pixhmt = pixaGetPixArray(pixahmt); if (!pixhmt) return (PIX *)ERROR_PTR("pixhmt array not made", procName, NULL); #if DEBUG_SELS pixt = selaDisplayInPix(sela, 35, 3, 15, 4); pixDisplayWithTitle(pixt, 100, 100, "allsels", 1); pixDestroy(&pixt); #endif /* DEBUG_SELS */ /* Set up initial image for fg thinning */ if (type == L_THIN_FG) pixd = pixCopy(NULL, pixs); else /* bg thinning */ pixd = pixInvert(NULL, pixs); /* Thin the fg, with up to maxiters iterations */ for (i = 0; i < maxiters; i++) { pixt = pixCopy(NULL, pixd); /* test for completion */ for (r = 0; r < 4; r++) { /* over 90 degree rotations of Sels */ for (j = 0; j < nsels; j++) { /* over individual sels in sela */ sel = selaGetSel(sela, j); /* not a copy */ selr = selRotateOrth(sel, r); pixHMT(pixhmt[j], pixd, selr); selDestroy(&selr); if (j > 0) pixOr(pixhmt[0], pixhmt[0], pixhmt[j]); /* accum result */ } pixSubtract(pixd, pixd, pixhmt[0]); /* remove result */ } pixEqual(pixd, pixt, &same); pixDestroy(&pixt); if (same) { L_INFO("%d iterations to completion\n", procName, i); break; } } if (type == L_THIN_BG) pixInvert(pixd, pixd); pixaDestroy(&pixahmt); return pixd; }
/*! * \brief pixUpDownDetectGeneral() * * \param[in] pixs 1 bpp, deskewed, English text, 150 - 300 ppi * \param[out] pconf confidence that text is rightside-up * \param[in] mincount min number of up + down; use 0 for default * \param[in] npixels number of pixels removed from each side of word box * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) See pixOrientDetect() for other details. * (2) %conf is the normalized difference between the number of * detected up and down ascenders, assuming that the text * is either rightside-up or upside-down and not rotated * at a 90 degree angle. * (3) The typical mode of operation is %npixels == 0. * If %npixels > 0, this removes HMT matches at the * beginning and ending of "words." This is useful for * pages that may have mostly digits, because if npixels == 0, * leading "1" and "3" digits can register as having * ascenders or descenders, and "7" digits can match descenders. * Consequently, a page image of only digits may register * as being upside-down. * (4) We want to count the number of instances found using the HMT. * An expensive way to do this would be to count the * number of connected components. A cheap way is to do a rank * reduction cascade that reduces each component to a single * pixel, and results (after two or three 2x reductions) * in one pixel for each of the original components. * After the reduction, you have a much smaller pix over * which to count pixels. We do only 2 reductions, because * this function is designed to work for input pix between * 150 and 300 ppi, and an 8x reduction on a 150 ppi image * is going too far -- components will get merged. * </pre> */ l_int32 pixUpDownDetectGeneral(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug) { l_int32 countup, countdown, nmax; l_float32 nup, ndown; PIX *pix0, *pix1, *pix2, *pix3, *pixm; SEL *sel1, *sel2, *sel3, *sel4; PROCNAME("pixUpDownDetectGeneral"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_UP_DOWN_COUNT; if (npixels < 0) npixels = 0; lept_mkdir("lept/orient"); sel1 = selCreateFromString(textsel1, 5, 6, NULL); sel2 = selCreateFromString(textsel2, 5, 6, NULL); sel3 = selCreateFromString(textsel3, 5, 6, NULL); sel4 = selCreateFromString(textsel4, 5, 6, NULL); /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). * This closes holes in x-height characters and joins them at * the x-height. There is more noise in the descender detection * from this, but it works fairly well. */ pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0); /* Optionally, make a mask of the word bounding boxes, shortening * each of them by a fixed amount at each end. */ pixm = NULL; if (npixels > 0) { l_int32 i, nbox, x, y, w, h; BOX *box; BOXA *boxa; pix1 = pixMorphSequence(pix0, "o10.1", 0); boxa = pixConnComp(pix1, NULL, 8); pixm = pixCreateTemplate(pix1); pixDestroy(&pix1); nbox = boxaGetCount(boxa); for (i = 0; i < nbox; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w > 2 * npixels) pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, PIX_SET, NULL, 0, 0); boxDestroy(&box); } boxaDestroy(&boxa); } /* Find the ascenders and optionally filter with pixm. * For an explanation of the procedure used for counting the result * of the HMT, see comments at the beginning of this function. */ pix1 = pixHMT(NULL, pix0, sel1); pix2 = pixHMT(NULL, pix0, sel2); pixOr(pix1, pix1, pix2); if (pixm) pixAnd(pix1, pix1, pixm); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &countup, NULL); pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Find the ascenders and optionally filter with pixm. */ pix1 = pixHMT(NULL, pix0, sel3); pix2 = pixHMT(NULL, pix0, sel4); pixOr(pix1, pix1, pix2); if (pixm) pixAnd(pix1, pix1, pixm); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &countdown, NULL); pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Evaluate statistically, generating a confidence that is * related to the probability with a gaussian distribution. */ nup = (l_float32)(countup); ndown = (l_float32)(countdown); nmax = L_MAX(countup, countdown); if (nmax > mincount) *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); if (debug) { if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG); fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", nup, ndown, *pconf); if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is rightside-up\n"); if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is upside-down\n"); } pixDestroy(&pix0); pixDestroy(&pixm); selDestroy(&sel1); selDestroy(&sel2); selDestroy(&sel3); selDestroy(&sel4); return 0; }