Exemple #1
0
/*!
 *  pixGenTextblockMask()
 *
 *      Input:  pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
 *              pixvws (vertical white space mask)
 *              debug (flag: 1 for debug output)
 *      Return: pixd (textblock mask), or null on error
 *
 *  Notes:
 *      (1) Both the input masks (textline and vertical white space) and
 *          the returned textblock mask are at the same resolution.
 *      (2) The result is somewhat noisy, in that small "blocks" of
 *          text may be included.  These can be removed by post-processing,
 *          using, e.g.,
 *             pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
 *                             L_SELECT_IF_GTE, NULL);
 */
PIX *
pixGenTextblockMask(PIX     *pixs,
                    PIX     *pixvws,
                    l_int32  debug)
{
PIX  *pixt1, *pixt2, *pixt3, *pixd;

    PROCNAME("pixGenTextblockMask");

    if (!pixs)
        return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
    if (!pixvws)
        return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);

        /* Join pixels vertically to make a textblock mask */
    pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
    pixDisplayWriteFormat(pixt1, debug, IFF_PNG);

        /* Solidify the textblock mask and remove noise:
         *   (1) For each cc, close the blocks and dilate slightly
	 *       to form a solid mask.
         *   (2) Small horizontal closing between components.
         *   (3) Open the white space between columns, again.
         *   (4) Remove small components. */
    pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL);
    pixCloseSafeBrick(pixt2, pixt2, 10, 1);
    pixDisplayWriteFormat(pixt2, debug, IFF_PNG);
    pixt3 = pixSubtract(NULL, pixt2, pixvws);
    pixDisplayWriteFormat(pixt3, debug, IFF_PNG);
    pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH,
                            L_SELECT_IF_GTE, NULL);
    pixDisplayWriteFormat(pixd, debug, IFF_PNG);

    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    return pixd;
}
Exemple #2
0
l_int32
DoPageSegmentation(PIX     *pixs,   /* should be at least 300 ppi */
                   l_int32  which)  /* 1, 2, 3, 4 */
{
char         buf[256];
l_int32      zero;
BOXA        *boxatm, *boxahm;
PIX         *pixr;   /* image reduced to 150 ppi */
PIX         *pixhs;  /* image of halftone seed, 150 ppi */
PIX         *pixm;   /* image of mask of components, 150 ppi */
PIX         *pixhm1; /* image of halftone mask, 150 ppi */
PIX         *pixhm2; /* image of halftone mask, 300 ppi */
PIX         *pixht;  /* image of halftone components, 150 ppi */
PIX         *pixnht; /* image without halftone components, 150 ppi */
PIX         *pixi;   /* inverted image, 150 ppi */
PIX         *pixvws; /* image of vertical whitespace, 150 ppi */
PIX         *pixm1;  /* image of closed textlines, 150 ppi */
PIX         *pixm2;  /* image of refined text line mask, 150 ppi */
PIX         *pixm3;  /* image of refined text line mask, 300 ppi */
PIX         *pixb1;  /* image of text block mask, 150 ppi */
PIX         *pixb2;  /* image of text block mask, 300 ppi */
PIX         *pixnon; /* image of non-text or halftone, 150 ppi */
PIX         *pix1, *pix2, *pix3, *pix4;
PIXA        *pixa;
PIXCMAP     *cmap;
PTAA        *ptaa;
l_int32      ht_flag = 0;
l_int32      ws_flag = 0;
l_int32      text_flag = 0;
l_int32      block_flag = 0;

    PROCNAME("DoPageSegmentation");

    if (which == 1)
        ht_flag = 1;
    else if (which == 2)
        ws_flag = 1;
    else if (which == 3)
        text_flag = 1;
    else if (which == 4)
        block_flag = 1;
    else
        return ERROR_INT("invalid parameter: not in [1...4]", procName, 1);

    pixa = pixaCreate(0);
    lept_mkdir("lept/livre");

        /* Reduce to 150 ppi */
    pix1 = pixScaleToGray2(pixs);
    if (ws_flag || ht_flag || block_flag) pixaAddPix(pixa, pix1, L_COPY);
    if (which == 1)
        pixWrite("/tmp/lept/livre/orig.gray.150.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);

        /* Get seed for halftone parts */
    pix1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0);
    pix2 = pixOpenBrick(NULL, pix1, 5, 5);
    pixhs = pixExpandBinaryPower2(pix2, 8);
    if (ht_flag) pixaAddPix(pixa, pixhs, L_COPY);
    if (which == 1)
        pixWrite("/tmp/lept/livre/htseed.150.png", pixhs, IFF_PNG);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Get mask for connected regions */
    pixm = pixCloseSafeBrick(NULL, pixr, 4, 4);
    if (ht_flag) pixaAddPix(pixa, pixm, L_COPY);
    if (which == 1)
        pixWrite("/tmp/lept/livre/ccmask.150.png", pixm, IFF_PNG);

        /* Fill seed into mask to get halftone mask */
    pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4);
    if (ht_flag) pixaAddPix(pixa, pixhm1, L_COPY);
    if (which == 1) pixWrite("/tmp/lept/livre/htmask.150.png", pixhm1, IFF_PNG);
    pixhm2 = pixExpandBinaryPower2(pixhm1, 2);

        /* Extract halftone stuff */
    pixht = pixAnd(NULL, pixhm1, pixr);
    if (which == 1) pixWrite("/tmp/lept/livre/ht.150.png", pixht, IFF_PNG);

        /* Extract non-halftone stuff */
    pixnht = pixXor(NULL, pixht, pixr);
    if (text_flag) pixaAddPix(pixa, pixnht, L_COPY);
    if (which == 1) pixWrite("/tmp/lept/livre/text.150.png", pixnht, IFF_PNG);
    pixZero(pixht, &zero);
    if (zero)
        fprintf(stderr, "No halftone parts found\n");
    else
        fprintf(stderr, "Halftone parts found\n");

        /* Get bit-inverted image */
    pixi = pixInvert(NULL, pixnht);
    if (ws_flag) pixaAddPix(pixa, pixi, L_COPY);
    if (which == 1) pixWrite("/tmp/lept/livre/invert.150.png", pixi, IFF_PNG);

        /* The whitespace mask will break textlines where there
         * is a large amount of white space below or above.
         * We can prevent this by identifying regions of the
         * inverted image that have large horizontal (bigger than
         * the separation between columns) and significant
         * vertical extent (bigger than the separation between
         * textlines), and subtracting this from the whitespace mask. */
    pix1 = pixMorphCompSequence(pixi, "o80.60", 0);
    pix2 = pixSubtract(NULL, pixi, pix1);
    if (ws_flag) pixaAddPix(pixa, pix2, L_COPY);
    pixDestroy(&pix1);

        /* Identify vertical whitespace by opening inverted image */
    pix3 = pixOpenBrick(NULL, pix2, 5, 1);  /* removes thin vertical lines */
    pixvws = pixOpenBrick(NULL, pix3, 1, 200);  /* gets long vertical lines */
    if (text_flag || ws_flag) pixaAddPix(pixa, pixvws, L_COPY);
    if (which == 1) pixWrite("/tmp/lept/livre/vertws.150.png", pixvws, IFF_PNG);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* Get proto (early processed) text line mask. */
        /* First close the characters and words in the textlines */
    pixm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1);
    if (text_flag) pixaAddPix(pixa, pixm1, L_COPY);
    if (which == 1)
        pixWrite("/tmp/lept/livre/textmask1.150.png", pixm1, IFF_PNG);

        /* Next open back up the vertical whitespace corridors */
    pixm2 = pixSubtract(NULL, pixm1, pixvws);
    if (which == 1)
        pixWrite("/tmp/lept/livre/textmask2.150.png", pixm2, IFF_PNG);

        /* Do a small opening to remove noise */
    pixOpenBrick(pixm2, pixm2, 3, 3);
    if (text_flag) pixaAddPix(pixa, pixm2, L_COPY);
    if (which == 1)
         pixWrite("/tmp/lept/livre/textmask3.150.png", pixm2, IFF_PNG);
    pixm3 = pixExpandBinaryPower2(pixm2, 2);

        /* Join pixels vertically to make text block mask */
    pixb1 = pixMorphSequence(pixm2, "c1.10 + o4.1", 0);
    if (block_flag) pixaAddPix(pixa, pixb1, L_COPY);
    if (which == 1)
        pixWrite("/tmp/lept/livre/textblock1.150.png", pixb1, IFF_PNG);

        /* Solidify the textblock mask and remove noise:
         *  (1) For each c.c., close the blocks and dilate slightly
         *      to form a solid mask.
         *  (2) Small horizontal closing between components
         *  (3) Open the white space between columns, again
         *  (4) Remove small components */
    pix1 = pixMorphSequenceByComponent(pixb1, "c30.30 + d3.3", 8, 0, 0, NULL);
    pixCloseSafeBrick(pix1, pix1, 10, 1);
    if (block_flag) pixaAddPix(pixa, pix1, L_COPY);
    pix2 = pixSubtract(NULL, pix1, pixvws);
    pix3 = pixSelectBySize(pix2, 25, 5, 8, L_SELECT_IF_BOTH,
                            L_SELECT_IF_GTE, NULL);
    if (block_flag) pixaAddPix(pixa, pix3, L_COPY);
    if (which == 1)
        pixWrite("/tmp/lept/livre/textblock2.150.png", pix3, IFF_PNG);
    pixb2 = pixExpandBinaryPower2(pix3, 2);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* Identify the outlines of each textblock */
    ptaa = pixGetOuterBordersPtaa(pixb2);
    pix1 = pixRenderRandomCmapPtaa(pixb2, ptaa, 1, 8, 1);
    cmap = pixGetColormap(pix1);
    pixcmapResetColor(cmap, 0, 130, 130, 130);  /* set interior to gray */
    if (which == 1)
        pixWrite("/tmp/lept/livre/textblock3.300.png", pix1, IFF_PNG);
    pixDisplayWithTitle(pix1, 480, 360, "textblock mask with outlines", DFLAG);
    ptaaDestroy(&ptaa);
    pixDestroy(&pix1);

        /* Fill line mask (as seed) into the original */
    pix1 = pixSeedfillBinary(NULL, pixm3, pixs, 8);
    pixOr(pixm3, pixm3, pix1);
    pixDestroy(&pix1);
    if (which == 1)
        pixWrite("/tmp/lept/livre/textmask.300.png", pixm3, IFF_PNG);
    pixDisplayWithTitle(pixm3, 480, 360, "textline mask 4", DFLAG);

        /* Fill halftone mask (as seed) into the original */
    pix1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8);
    pixOr(pixhm2, pixhm2, pix1);
    pixDestroy(&pix1);
    if (which == 1)
        pixWrite("/tmp/lept/livre/htmask.300.png", pixhm2, IFF_PNG);
    pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG);

        /* Find objects that are neither text nor halftones */
    pix1 = pixSubtract(NULL, pixs, pixm3);  /* remove text pixels */
    pixnon = pixSubtract(NULL, pix1, pixhm2);  /* remove halftone pixels */
    pixDestroy(&pix1);
    if (which == 1)
        pixWrite("/tmp/lept/livre/other.300.png", pixnon, IFF_PNG);
    pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG);

        /* Write out b.b. for text line mask and halftone mask components */
    boxatm = pixConnComp(pixm3, NULL, 4);
    boxahm = pixConnComp(pixhm2, NULL, 8);
    if (which == 1) {
        boxaWrite("/tmp/lept/livre/textmask.boxa", boxatm);
        boxaWrite("/tmp/lept/livre/htmask.boxa", boxahm);
    }

    pix1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2);
    pixDisplay(pix1, 0, 375 * (which - 1));
    snprintf(buf, sizeof(buf), "/tmp/lept/livre/segout.%d.png", which);
    pixWrite(buf, pix1, IFF_PNG);
    pixDestroy(&pix1);
    pixaDestroy(&pixa);

        /* clean up to test with valgrind */
    pixDestroy(&pixr);
    pixDestroy(&pixhs);
    pixDestroy(&pixm);
    pixDestroy(&pixhm1);
    pixDestroy(&pixhm2);
    pixDestroy(&pixht);
    pixDestroy(&pixi);
    pixDestroy(&pixnht);
    pixDestroy(&pixvws);
    pixDestroy(&pixm1);
    pixDestroy(&pixm2);
    pixDestroy(&pixm3);
    pixDestroy(&pixb1);
    pixDestroy(&pixb2);
    pixDestroy(&pixnon);
    boxaDestroy(&boxatm);
    boxaDestroy(&boxahm);
    return 0;
}