示例#1
0
static void
GetImageMask(PIX         *pixs,
             l_int32      res,
             BOXA       **pboxa,
             const char  *debugfile)
{
PIX   *pix1, *pix2, *pix3, *pix4;
PIXA  *pixa;

    pixSetResolution(pixs, 200, 200);
    pix1 = pixConvertTo1(pixs, 100);
    pix2 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
    pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0);
    *pboxa = pixConnComp(pix3, NULL, 8);
    if (debugfile) {
        pixa = pixaCreate(0);
        pixaAddPix(pixa, pixs, L_COPY);
        pixaAddPix(pixa, pix1, L_INSERT);
        pixaAddPix(pixa, pix2, L_INSERT);
        pixaAddPix(pixa, pix3, L_INSERT);
        pix4 = pixaDisplayTiledInRows(pixa, 32, 1800, 0.25, 0, 25, 2);
        pixWrite(debugfile, pix4, IFF_JFIF_JPEG);
        pixDisplay(pix4, 100, 100);
        pixDestroy(&pix4);
        pixaDestroy(&pixa);
    } else {
        pixDestroy(&pix1);
        pixDestroy(&pix2);
        pixDestroy(&pix3);
    }

    return;
}
示例#2
0
/*!
 *  pixGetRegionsBinary()
 *
 *      Input:  pixs (1 bpp, assumed to be 300 to 400 ppi)
 *              &pixhm (<optional return> halftone mask)
 *              &pixtm (<optional return> textline mask)
 *              &pixtb (<optional return> textblock mask)
 *              debug (flag: set to 1 for debug output)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) It is best to deskew the image before segmenting.
 *      (2) The debug flag enables a number of outputs.  These
 *          are included to show how to generate and save/display
 *          these results.
 */
l_int32
pixGetRegionsBinary(PIX     *pixs,
                    PIX    **ppixhm,
                    PIX    **ppixtm,
                    PIX    **ppixtb,
                    l_int32  debug)
{
char    *tempname;
l_int32  htfound, tlfound;
PIX     *pixr, *pixt1, *pixt2;
PIX     *pixtext;  /* text pixels only */
PIX     *pixhm2;   /* halftone mask; 2x reduction */
PIX     *pixhm;    /* halftone mask;  */
PIX     *pixtm2;   /* textline mask; 2x reduction */
PIX     *pixtm;    /* textline mask */
PIX     *pixvws;   /* vertical white space mask */
PIX     *pixtb2;   /* textblock mask; 2x reduction */
PIX     *pixtbf2;  /* textblock mask; 2x reduction; small comps filtered */
PIX     *pixtb;    /* textblock mask */

    PROCNAME("pixGetRegionsBinary");

    if (ppixhm) *ppixhm = NULL;
    if (ppixtm) *ppixtm = NULL;
    if (ppixtb) *ppixtb = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not 1 bpp", procName, 1);

        /* 2x reduce, to 150 -200 ppi */
    pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
    pixDisplayWrite(pixr, debug);

        /* Get the halftone mask */
    pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);

        /* Get the textline mask from the text pixels */
    pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);

        /* Get the textblock mask from the textline mask */
    pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
    pixDestroy(&pixr);
    pixDestroy(&pixtext);
    pixDestroy(&pixvws);

        /* Remove small components from the mask, where a small
         * component is defined as one with both width and height < 60 */
    pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
                              L_SELECT_IF_GTE, NULL);
    pixDestroy(&pixtb2);
    pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG);

        /* Expand all masks to full resolution, and do filling or
         * small dilations for better coverage. */
    pixhm = pixExpandReplicate(pixhm2, 2);
    pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
    pixOr(pixhm, pixhm, pixt1);
    pixDestroy(&pixt1);
    pixDisplayWriteFormat(pixhm, debug, IFF_PNG);

    pixt1 = pixExpandReplicate(pixtm2, 2);
    pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
    pixDestroy(&pixt1);
    pixDisplayWriteFormat(pixtm, debug, IFF_PNG);

    pixt1 = pixExpandReplicate(pixtbf2, 2);
    pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
    pixDestroy(&pixt1);
    pixDisplayWriteFormat(pixtb, debug, IFF_PNG);

    pixDestroy(&pixhm2);
    pixDestroy(&pixtm2);
    pixDestroy(&pixtbf2);

        /* Debug: identify objects that are neither text nor halftone image */
    if (debug) {
        pixt1 = pixSubtract(NULL, pixs, pixtm);  /* remove text pixels */
        pixt2 = pixSubtract(NULL, pixt1, pixhm);  /* remove halftone pixels */
        pixDisplayWriteFormat(pixt2, 1, IFF_PNG);
        pixDestroy(&pixt1);
        pixDestroy(&pixt2);
    }

        /* Debug: display textline components with random colors */
    if (debug) {
        l_int32  w, h;
        BOXA    *boxa;
        PIXA    *pixa;
        boxa = pixConnComp(pixtm, &pixa, 8);
        pixGetDimensions(pixtm, &w, &h, NULL);
        pixt1 = pixaDisplayRandomCmap(pixa, w, h);
        pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
        pixDisplay(pixt1, 100, 100);
        pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
        pixaDestroy(&pixa);
        boxaDestroy(&boxa);
        pixDestroy(&pixt1);
    }

        /* Debug: identify the outlines of each textblock */
    if (debug) {
        PIXCMAP  *cmap;
        PTAA     *ptaa;
        ptaa = pixGetOuterBordersPtaa(pixtb);
        tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0, 0);
        ptaaWrite(tempname, ptaa, 1);
        FREE(tempname);
        pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
        cmap = pixGetColormap(pixt1);
        pixcmapResetColor(cmap, 0, 130, 130, 130);
        pixDisplay(pixt1, 500, 100);
        pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
        pixDestroy(&pixt1);
        ptaaDestroy(&ptaa);
    }

        /* Debug: get b.b. for all mask components */
    if (debug) {
        BOXA  *bahm, *batm, *batb;
        bahm = pixConnComp(pixhm, NULL, 4);
        batm = pixConnComp(pixtm, NULL, 4);
        batb = pixConnComp(pixtb, NULL, 4);
        tempname = genTempFilename("/tmp", "htmask.boxa", 0, 0);
        boxaWrite(tempname, bahm);
        FREE(tempname);
        tempname = genTempFilename("/tmp", "textmask.boxa", 0, 0);
        boxaWrite(tempname, batm);
        FREE(tempname);
        tempname = genTempFilename("/tmp", "textblock.boxa", 0, 0);
        boxaWrite(tempname, batb);
        FREE(tempname);
	boxaDestroy(&bahm);
	boxaDestroy(&batm);
	boxaDestroy(&batb);
    }

    if (ppixhm)
        *ppixhm = pixhm;
    else
        pixDestroy(&pixhm);
    if (ppixtm)
        *ppixtm = pixtm;
    else
        pixDestroy(&pixtm);
    if (ppixtb)
        *ppixtb = pixtb;
    else
        pixDestroy(&pixtb);

    return 0;
}
示例#3
0
main(int    argc,
     char **argv)
{
l_int32      h;
l_float32    scalefactor;
BOX         *box;
BOXA        *boxa1, *boxa2;
BOXAA       *baa;
PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_rmdir("segtest");
    lept_mkdir("segtest");
    baa = boxaaCreate(5);

        /* Image region input.  */
    pix1 = pixRead("wet-day.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/0.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/0.jpg");   /* 0 */
    box = boxCreate(105, 161, 620, 872);   /* image region */
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Compute image region at w = 2 * WIDTH */
    pix1 = pixRead("candelabrum-11.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pix3 = pixConvertTo1(pix2, 100);
    pix4 = pixExpandBinaryPower2(pix3, 2);  /* w = 2 * WIDTH */
    pix5 = pixGenHalftoneMask(pix4, NULL, NULL, 1);
    pix6 = pixMorphSequence(pix5, "c20.1 + c1.20", 0);
    pix7 = pixMaskConnComp(pix6, 8, &boxa1);
    pix8 = pixReduceBinary2(pix7, NULL);  /* back to w = WIDTH */
    pix9 = pixBackgroundNormSimple(pix2, pix8, NULL);
    pixWrite("/tmp/segtest/1.jpg", pix9, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/1.jpg");   /* 1 */
    boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);  /* back to w = WIDTH */
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
    pixDestroy(&pix7);
    pixDestroy(&pix8);
    pixDestroy(&pix9);
    boxaDestroy(&boxa1);

        /* Use mask to find image region */
    pix1 = pixRead("lion-page.00016.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/2.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/2.jpg");   /* 2 */
    pix3 = pixRead("lion-mask.00016.tif");
    pix4 = pixScaleToSize(pix3, WIDTH, 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);

        /* Compute image region at full res */
    pix1 = pixRead("rabi.png");
    scalefactor = (l_float32)WIDTH / (l_float32)pixGetWidth(pix1);
    pix2 = pixScaleToGray(pix1, scalefactor);
    pixWrite("/tmp/segtest/3.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/3.jpg");   /* 3 */
    pix3 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
    pix4 = pixMorphSequence(pix3, "c20.1 + c1.20", 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, scalefactor, scalefactor);
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    boxaDestroy(&boxa1);

        /* Page with no image regions */
    pix1 = pixRead("lucasta-47.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    boxa1 = boxaCreate(1);
    pixWrite("/tmp/segtest/4.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/4.jpg");   /* 4 */
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Page that is all image */
    pix1 = pixRead("map1.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/5.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/5.jpg");   /* 5 */
    h = pixGetHeight(pix2);
    box = boxCreate(0, 0, WIDTH, h);
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Save the boxaa file */
    boxaaWrite("/tmp/segtest/seg.baa", baa);
    regTestCheckFile(rp, "/tmp/segtest/seg.baa");   /* 6 */

        /* Do the conversion */
    l_pdfSetDateAndVersion(FALSE);
    convertSegmentedFilesToPdf("/tmp/segtest", ".jpg", 100, L_G4_ENCODE,
                               140, baa, 75, 0.6, "Segmentation Test",
                               "/tmp/pdfseg.7.pdf");
    regTestCheckFile(rp, "/tmp/pdfseg.7.pdf");   /* 7 */

    boxaaDestroy(&baa);
    return regTestCleanup(rp);
}
示例#4
0
main(int    argc,
     char **argv)
{
char         buffer[512];
char        *tempfile1, *tempfile2;
l_uint8     *data;
l_int32      i, j, w, h, seq, ret, same;
size_t       nbytes;
const char  *title;
BOX         *box;
BOXA        *boxa1, *boxa2;
L_BYTEA     *ba;
L_PDF_DATA  *lpd;
PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6;
PIX         *pixs, *pixt, *pixg, *pixgc, *pixc;
static char  mainName[] = "pdfiotest";

    if (argc != 1)
        exit(ERROR_INT("syntax: pdfiotest", mainName, 1));
    l_pdfSetDateAndVersion(0);

#if 1
    /* ---------------  Single image tests  ------------------- */
    fprintf(stderr, "\n*** Writing single images as pdf files\n");

    convertToPdf("weasel2.4c.png", L_FLATE_ENCODE, 0, "/tmp/pdffile01.pdf",
                 0, 0, 72, NULL, 0, "weasel2.4c.png");
    convertToPdf("test24.jpg", L_JPEG_ENCODE, 0, "/tmp/pdffile02.pdf",
                 0, 0, 72, NULL, 0, "test24.jpg");
    convertToPdf("feyn.tif", L_G4_ENCODE, 0, "/tmp/pdffile03.pdf",
                 0, 0, 300, NULL, 0, "feyn.tif");

    pixs = pixRead("feyn.tif");
    pixConvertToPdf(pixs, L_G4_ENCODE, 0, "/tmp/pdffile04.pdf", 0, 0, 300,
                    NULL, 0, "feyn.tif");
    pixDestroy(&pixs);

    pixs = pixRead("test24.jpg");
    pixConvertToPdf(pixs, L_JPEG_ENCODE, 5, "/tmp/pdffile05.pdf", 0, 0, 72,
                    NULL, 0, "test24.jpg");
    pixDestroy(&pixs);

    pixs = pixRead("feyn.tif");
    pixt = pixScaleToGray2(pixs);
    pixWrite("junkfeyn8.png", pixt, IFF_PNG);
    convertToPdf("junkfeyn8.png", L_JPEG_ENCODE, 0, "/tmp/pdffile06.pdf",
                 0, 0, 150, NULL, 0, "junkfeyn8.png");
    pixDestroy(&pixs);
    pixDestroy(&pixt);
    
    convertToPdf("weasel4.16g.png", L_FLATE_ENCODE, 0, "/tmp/pdffile07.pdf",
                 0, 0, 30, NULL, 0, "weasel4.16g.png");

    pixs = pixRead("test24.jpg");
    pixg = pixConvertTo8(pixs, 0);
    box = boxCreate(100, 100, 100, 100);
    pixc = pixClipRectangle(pixs, box, NULL);
    pixgc = pixClipRectangle(pixg, box, NULL);
    pixWrite("junkpix32.jpg", pixc, IFF_JFIF_JPEG);
    pixWrite("junkpix8.jpg", pixgc, IFF_JFIF_JPEG);
    convertToPdf("junkpix32.jpg", L_FLATE_ENCODE, 0, "/tmp/pdffile08.pdf",
                 0, 0, 72, NULL, 0, "junkpix32.jpg");
    convertToPdf("junkpix8.jpg", L_FLATE_ENCODE, 0, "/tmp/pdffile09.pdf",
                 0, 0, 72, NULL, 0, "junkpix8.jpg");
    pixDestroy(&pixs);
    pixDestroy(&pixg);
    pixDestroy(&pixc);
    pixDestroy(&pixgc);
    boxDestroy(&box);
#endif


#if 1
    /* ---------------  Multiple image tests  ------------------- */
    fprintf(stderr, "\n*** Writing multiple images as single page pdf files\n");

    pix1 = pixRead("feyn-fract.tif");
    pix2 = pixRead("weasel8.240c.png");

/*    l_pdfSetDateAndVersion(0); */
        /* First, write the 1 bpp image through the mask onto the weasels */
    for (i = 0; i < 5; i++) {
        for (j = 0; j < 10; j++) {
            seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE;
            title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL;
            pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j,
                            100 * i, 70, &lpd, seq, title);
        }
    }
    pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/pdffile10.pdf", 0, 0, 80, &lpd,
                    L_LAST_IMAGE, NULL);

        /* Now, write the 1 bpp image over the weasels */
    l_pdfSetG4ImageMask(0);
    for (i = 0; i < 5; i++) {
        for (j = 0; j < 10; j++) {
            seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE;
            title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL;
            pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j,
                            100 * i, 70, &lpd, seq, title);
        }
    }
    pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/pdffile11.pdf", 0, 0, 80, &lpd,
                    L_LAST_IMAGE, NULL);
    l_pdfSetG4ImageMask(1);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
#endif

#if 1
    /* -------- pdf convert segmented with no image regions -------- */
    fprintf(stderr, "\n*** Writing segmented images without image regions\n");

    pix1 = pixRead("rabi.png");
    pix2 = pixScaleToGray2(pix1);
    pixWrite("/tmp/rabi8.jpg", pix2, IFF_JFIF_JPEG);
    pix3 = pixThresholdTo4bpp(pix2, 16, 1);
    pixWrite("/tmp/rabi4.png", pix3, IFF_PNG);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* 1 bpp input */
    convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, NULL, 0, 0,
                          "/tmp/pdffile12.pdf");
    convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, NULL, 0, 0,
                          "/tmp/pdffile13.pdf");
    convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, NULL, 0, 0,
                          "/tmp/pdffile14.pdf");

        /* 8 bpp input, no cmap */
    convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_G4_ENCODE, 128,
                          NULL, 0, 0, "/tmp/pdffile15.pdf");
    convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_JPEG_ENCODE, 128,
                          NULL, 0, 0, "/tmp/pdffile16.pdf");
    convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_FLATE_ENCODE, 128,
                          NULL, 0, 0, "/tmp/pdffile17.pdf");

        /* 4 bpp input, cmap */
    convertToPdfSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128,
                          NULL, 0, 0, "/tmp/pdffile18.pdf");
    convertToPdfSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128,
                          NULL, 0, 0, "/tmp/pdffile19.pdf");
    convertToPdfSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128,
                          NULL, 0, 0, "/tmp/pdffile20.pdf");

#endif

#if 1
    /* ---------- pdf convert segmented with image regions ---------- */
    fprintf(stderr, "\n*** Writing segmented images with image regions\n");

        /* Get the image region(s) for rabi.png.  There are two
         * small bogus regions at the top, but we'll keep them for
         * the demonstration. */
    pix1 = pixRead("rabi.png");
    pixSetResolution(pix1, 300, 300);
    pixGetDimensions(pix1, &w, &h, NULL);
    pix2 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
    pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0);
    boxa1 = pixConnComp(pix3, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* 1 bpp input */
    convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, boxa1,
                          0, 0.25, "/tmp/pdffile21.pdf");
    convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, boxa1,
                          0, 0.25, "/tmp/pdffile22.pdf");
    convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, boxa1,
                          0, 0.25, "/tmp/pdffile23.pdf");

        /* 8 bpp input, no cmap */
    convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_G4_ENCODE, 128, boxa2,
                          0, 0.5, "/tmp/pdffile24.pdf");
    convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_JPEG_ENCODE, 128, boxa2,
                          0, 0.5, "/tmp/pdffile25.pdf");
    convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_FLATE_ENCODE, 128, boxa2,
                          0, 0.5, "/tmp/pdffile26.pdf");

        /* 4 bpp input, cmap */
    convertToPdfSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, boxa2,
                          0, 0.5, "/tmp/pdffile27.pdf");
    convertToPdfSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, boxa2,
                          0, 0.5, "/tmp/pdffile28.pdf");
    convertToPdfSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, boxa2,
                          0, 0.5, "/tmp/pdffile29.pdf");

        /* 4 bpp input, cmap, data output */
    data = NULL;
    convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, boxa2,
                              0, 0.5, &data, &nbytes);
    l_binaryWrite("/tmp/pdffile30.pdf", "w", data, nbytes);
    lept_free(data);
    convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, boxa2,
                              0, 0.5, &data, &nbytes);
    l_binaryWrite("/tmp/pdffile31.pdf", "w", data, nbytes);
    lept_free(data);
    convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, boxa2,
                              0, 0.5, &data, &nbytes);
    l_binaryWrite("/tmp/pdffile32.pdf", "w", data, nbytes);
    lept_free(data);

    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
#endif

        
#if 1
    /* -------- pdf convert segmented from color image -------- */
    fprintf(stderr, "\n*** Writing color segmented images\n");

    pix1 = pixRead("candelabrum-11.jpg");
    pix2 = pixScale(pix1, 3.0, 3.0);
    pixWrite("/tmp/candelabrum3.jpg", pix2, IFF_JFIF_JPEG);
    GetImageMask(pix2, 200, &boxa1, "/tmp/seg1.jpg");
    convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_G4_ENCODE,
                          100, boxa1, 0, 0.25, "/tmp/pdffile33.pdf");
    convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_JPEG_ENCODE,
                          100, boxa1, 0, 0.25, "/tmp/pdffile34.pdf");
    convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_FLATE_ENCODE,
                          100, boxa1, 0, 0.25, "/tmp/pdffile35.pdf");

    pixDestroy(&pix1);
    pixDestroy(&pix2);
    boxaDestroy(&boxa1);

    pix1 = pixRead("lion-page.00016.jpg");
    pix2 = pixScale(pix1, 3.0, 3.0);
    pixWrite("/tmp/lion16.jpg", pix2, IFF_JFIF_JPEG);
    pix3 = pixRead("lion-mask.00016.tif");
    boxa1 = pixConnComp(pix3, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, 3.0, 3.0);
    convertToPdfSegmented("/tmp/lion16.jpg", 200, L_G4_ENCODE,
                          190, boxa2, 0, 0.5, "/tmp/pdffile36.pdf");
    convertToPdfSegmented("/tmp/lion16.jpg", 200, L_JPEG_ENCODE,
                          190, boxa2, 0, 0.5, "/tmp/pdffile37.pdf");
    convertToPdfSegmented("/tmp/lion16.jpg", 200, L_FLATE_ENCODE,
                          190, boxa2, 0, 0.5, "/tmp/pdffile38.pdf");

        /* Quantize the non-image part and flate encode.
         * This is useful because it results in a smaller file than
         * when you flate-encode the un-quantized non-image regions. */
    pix4 = pixScale(pix3, 3.0, 3.0);  /* higher res mask, for combining */
    pix5 = QuantizeNonImageRegion(pix2, pix4, 12);
    pixWrite("/tmp/lion16-quant.png", pix5, IFF_PNG);
    convertToPdfSegmented("/tmp/lion16-quant.png", 200, L_FLATE_ENCODE,
                          190, boxa2, 0, 0.5, "/tmp/pdffile39.pdf");

    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
#endif

#if 1
    /* ------------------ Test multipage pdf generation ----------------- */
    fprintf(stderr, "\n*** Writing multipage pdfs from single page pdfs\n");

        /* Generate a multi-page pdf from all these files */
    startTimer();
    concatenatePdf("/tmp", "pdffile", "/tmp/cat_lept.pdf");
    fprintf(stderr, "Time: %7.3f\n", stopTimer());

        /* Put two good pdf files in a directory */
    lept_mkdir("good");
    lept_cp("testfile1.pdf", "/tmp/good");
    lept_cp("testfile2.pdf", "/tmp/good");
    concatenatePdf("/tmp/good", "file", "/tmp/good.pdf");

        /* Make a version with the pdf id removed, so that it is not
         * recognized as a pdf */
    ba = l_byteaInitFromFile("testfile2.pdf");
    data = l_byteaGetData(ba, &nbytes);
    l_binaryWrite("testfile0.notpdf.pdf", "w", data + 10, nbytes - 10);

        /* Make a version with a corrupted trailer */
    data[2297] = '2';  /* munge trailer object 6: change 458 --> 428 */
    l_binaryWrite("testfile2.bad.pdf", "w", data, nbytes);

        /* Put these two bad files, along with a good file, in a directory */
    lept_mkdir("bad");
    lept_mv("testfile0.notpdf.pdf", "/tmp/bad");
    lept_cp("testfile1.pdf", "/tmp/bad");
    lept_mv("testfile2.bad.pdf", "/tmp/bad");
    l_byteaDestroy(&ba);

        /* Run concat on the bad files.   In the /tmp/bad/ directory,
         * the "not pdf" file should be ignored, and the corrupted pdf
         * file should be properly parsed, so the resulting
         * concatenated files should be identical.  */
    fprintf(stderr, "\nWe attempt to build from the bad directory\n");
    concatenatePdf("/tmp/bad", "file", "/tmp/bad.pdf");
    filesAreIdentical("/tmp/good.pdf", "/tmp/bad.pdf", &same);
    if (same)
        fprintf(stderr, "Fixed: files are the same\n"
                        "Attempt succeeded\n\n");
    else
        fprintf(stderr, "Busted: files are different\n");

        /* pdftk fails because the first file is not a pdf */
    fprintf(stderr, "pdftk attempts to build from the bad directory\n");
    tempfile1 = genPathname("/tmp/bad", "*.pdf");
    tempfile2 = genPathname("/tmp", "pdftk.bad.pdf");
    snprintf(buffer, sizeof(buffer), "pdftk %s output %s",
             tempfile1, tempfile2);
    ret = system(buffer);
    lept_free(tempfile1);
    lept_free(tempfile2);
    fprintf(stderr, "Attempt failed\n\n");

#endif

#if 1
    fprintf(stderr, "\n*** pdftk writes multipage pdfs from images\n");
    tempfile1 = genPathname("/tmp", "pdffile*.pdf");
    tempfile2 = genPathname("/tmp", "cat_pdftk.pdf");
    snprintf(buffer, sizeof(buffer), "pdftk %s output %s",
             tempfile1, tempfile2);
    ret = system(buffer);
    lept_free(tempfile1);
    lept_free(tempfile2);
#endif

#if 1
    /* -- Test simple interface for generating multi-page pdf from images -- */
    fprintf(stderr, "\n*** Writing multipage pdfs from images\n");

        /* Put four image files in a directory.  They will be encoded thus:
         *     file1.png:  flate (8 bpp, only 10 colors)
         *     file2.jpg:  dct (8 bpp, 256 colors because of the jpeg encoding)
         *     file3.tif:  g4 (1 bpp)
         *     file4.jpg:  dct (32 bpp)    */
    lept_mkdir("image");
    pix1 = pixRead("feyn.tif");
    pix2 = pixRead("rabi.png");
    pix3 = pixScaleToGray3(pix1);
    pix4 = pixScaleToGray3(pix2);
    pix5 = pixScale(pix1, 0.33, 0.33);
    pix6 = pixRead("test24.jpg");
    pixWrite("/tmp/image/file1.png", pix3, IFF_PNG);  /* 10 colors */
    pixWrite("/tmp/image/file2.jpg", pix4, IFF_JFIF_JPEG);  /* 256 colors */
    pixWrite("/tmp/image/file3.tif", pix5, IFF_TIFF_G4);
    pixWrite("/tmp/image/file4.jpg", pix6, IFF_JFIF_JPEG);

    startTimer();
    convertFilesToPdf("/tmp/image", "file", 100, 0.8, 75, "4 file test",
                      "/tmp/fourimages.pdf");
    fprintf(stderr, "Time: %7.3f\n", stopTimer());
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
#endif

    return 0;
}
示例#5
0
// Finds image regions within the source pix (page image) and returns
// the image regions as a Boxa, Pixa pair, analgous to pixConnComp.
// The returned boxa, pixa may be NULL, meaning no images found.
// If not NULL, they must be destroyed by the caller.
void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) {
  *boxa = NULL;
  *pixa = NULL;

#ifdef HAVE_LIBLEPT
  if (pixGetWidth(pix) < kMinImageFindSize ||
      pixGetHeight(pix) < kMinImageFindSize)
    return;  // Not worth looking at small images.
  // Reduce by factor 2.
  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
  pixDisplayWrite(pixr, textord_tabfind_show_images);

  // Get the halftone mask directly from Leptonica.
  Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL,
                                   textord_tabfind_show_images);
  pixDestroy(&pixr);
  if (pixht2 == NULL)
    return;

  // Expand back up again.
  Pix *pixht = pixExpandReplicate(pixht2, 2);
  pixDisplayWrite(pixht, textord_tabfind_show_images);
  pixDestroy(&pixht2);

  // Fill to capture pixels near the mask edges that were missed
  Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
  pixOr(pixht, pixht, pixt);
  pixDestroy(&pixt);

  // Eliminate lines and bars that may be joined to images.
  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
  pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
  pixDestroy(&pixreduced);
  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
  pixDestroy(&pixreduced2);
  pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
  // Combine the coarse and fine image masks.
  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
  pixDestroy(&pixfinemask);
  // Dilate a bit to make sure we get everything.
  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
  pixDestroy(&pixcoarsemask);
  pixDisplayWrite(pixmask, textord_tabfind_show_images);
  // And the image mask with the line and bar remover.
  pixAnd(pixht, pixht, pixmask);
  pixDestroy(&pixmask);
  pixDisplayWrite(pixht, textord_tabfind_show_images);
  // Find the individual image regions in the mask image.
  *boxa = pixConnComp(pixht, pixa, 8);
  pixDestroy(&pixht);
  // Rectangularize the individual images. If a sharp edge in vertical and/or
  // horizontal occupancy can be found, it indicates a probably rectangular
  // image with unwanted bits merged on, so clip to the approximate rectangle.
  int npixes = pixaGetCount(*pixa);
  for (int i = 0; i < npixes; ++i) {
    int x_start, x_end, y_start, y_end;
    Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
    pixDisplayWrite(img_pix, textord_tabfind_show_images);
    if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
                             kMaxRectangularFraction,
                             kMaxRectangularGradient,
                             &x_start, &y_start, &x_end, &y_end)) {
      // Add 1 to the size as a kludgy flag to indicate to the later stages
      // of processing that it is a clipped rectangular image .
      Pix* simple_pix = pixCreate(pixGetWidth(img_pix) + 1,
                                  pixGetHeight(img_pix), 1);
      pixDestroy(&img_pix);
      pixRasterop(simple_pix, x_start, y_start, x_end - x_start,
                  y_end - y_start, PIX_SET, NULL, 0, 0);
      // pixaReplacePix takes ownership of the simple_pix.
      pixaReplacePix(*pixa, i, simple_pix, NULL);
      img_pix = pixaGetPix(*pixa, i, L_CLONE);
    }
    // Subtract the pix from the correct location in the master image.
    l_int32 x, y, width, height;
    pixDisplayWrite(img_pix, textord_tabfind_show_images);
    boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
    pixRasterop(pix, x, y, width, height, PIX_NOT(PIX_SRC) & PIX_DST,
                img_pix, 0, 0);
    pixDestroy(&img_pix);
  }
#endif
}