static void GetImageMask(PIX *pixs, l_int32 res, BOXA **pboxa, const char *debugfile) { PIX *pix1, *pix2, *pix3, *pix4; PIXA *pixa; pixSetResolution(pixs, 200, 200); pix1 = pixConvertTo1(pixs, 100); pix2 = pixGenHalftoneMask(pix1, NULL, NULL, 0); pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0); *pboxa = pixConnComp(pix3, NULL, 8); if (debugfile) { pixa = pixaCreate(0); pixaAddPix(pixa, pixs, L_COPY); pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); pixaAddPix(pixa, pix3, L_INSERT); pix4 = pixaDisplayTiledInRows(pixa, 32, 1800, 0.25, 0, 25, 2); pixWrite(debugfile, pix4, IFF_JFIF_JPEG); pixDisplay(pix4, 100, 100); pixDestroy(&pix4); pixaDestroy(&pixa); } else { pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); } return; }
/*! * pixGetRegionsBinary() * * Input: pixs (1 bpp, assumed to be 300 to 400 ppi) * &pixhm (<optional return> halftone mask) * &pixtm (<optional return> textline mask) * &pixtb (<optional return> textblock mask) * debug (flag: set to 1 for debug output) * Return: 0 if OK, 1 on error * * Notes: * (1) It is best to deskew the image before segmenting. * (2) The debug flag enables a number of outputs. These * are included to show how to generate and save/display * these results. */ l_int32 pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, l_int32 debug) { char *tempname; l_int32 htfound, tlfound; PIX *pixr, *pixt1, *pixt2; PIX *pixtext; /* text pixels only */ PIX *pixhm2; /* halftone mask; 2x reduction */ PIX *pixhm; /* halftone mask; */ PIX *pixtm2; /* textline mask; 2x reduction */ PIX *pixtm; /* textline mask */ PIX *pixvws; /* vertical white space mask */ PIX *pixtb2; /* textblock mask; 2x reduction */ PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ PIX *pixtb; /* textblock mask */ PROCNAME("pixGetRegionsBinary"); if (ppixhm) *ppixhm = NULL; if (ppixtm) *ppixtm = NULL; if (ppixtb) *ppixtb = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); /* 2x reduce, to 150 -200 ppi */ pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); pixDisplayWrite(pixr, debug); /* Get the halftone mask */ pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug); /* Get the textline mask from the text pixels */ pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug); /* Get the textblock mask from the textline mask */ pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); pixDestroy(&pixr); pixDestroy(&pixtext); pixDestroy(&pixvws); /* Remove small components from the mask, where a small * component is defined as one with both width and height < 60 */ pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL); pixDestroy(&pixtb2); pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG); /* Expand all masks to full resolution, and do filling or * small dilations for better coverage. */ pixhm = pixExpandReplicate(pixhm2, 2); pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); pixOr(pixhm, pixhm, pixt1); pixDestroy(&pixt1); pixDisplayWriteFormat(pixhm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtm2, 2); pixtm = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtbf2, 2); pixtb = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtb, debug, IFF_PNG); pixDestroy(&pixhm2); pixDestroy(&pixtm2); pixDestroy(&pixtbf2); /* Debug: identify objects that are neither text nor halftone image */ if (debug) { pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */ pixDisplayWriteFormat(pixt2, 1, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); } /* Debug: display textline components with random colors */ if (debug) { l_int32 w, h; BOXA *boxa; PIXA *pixa; boxa = pixConnComp(pixtm, &pixa, 8); pixGetDimensions(pixtm, &w, &h, NULL); pixt1 = pixaDisplayRandomCmap(pixa, w, h); pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255); pixDisplay(pixt1, 100, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixaDestroy(&pixa); boxaDestroy(&boxa); pixDestroy(&pixt1); } /* Debug: identify the outlines of each textblock */ if (debug) { PIXCMAP *cmap; PTAA *ptaa; ptaa = pixGetOuterBordersPtaa(pixtb); tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0, 0); ptaaWrite(tempname, ptaa, 1); FREE(tempname); pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); cmap = pixGetColormap(pixt1); pixcmapResetColor(cmap, 0, 130, 130, 130); pixDisplay(pixt1, 500, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixDestroy(&pixt1); ptaaDestroy(&ptaa); } /* Debug: get b.b. for all mask components */ if (debug) { BOXA *bahm, *batm, *batb; bahm = pixConnComp(pixhm, NULL, 4); batm = pixConnComp(pixtm, NULL, 4); batb = pixConnComp(pixtb, NULL, 4); tempname = genTempFilename("/tmp", "htmask.boxa", 0, 0); boxaWrite(tempname, bahm); FREE(tempname); tempname = genTempFilename("/tmp", "textmask.boxa", 0, 0); boxaWrite(tempname, batm); FREE(tempname); tempname = genTempFilename("/tmp", "textblock.boxa", 0, 0); boxaWrite(tempname, batb); FREE(tempname); boxaDestroy(&bahm); boxaDestroy(&batm); boxaDestroy(&batb); } if (ppixhm) *ppixhm = pixhm; else pixDestroy(&pixhm); if (ppixtm) *ppixtm = pixtm; else pixDestroy(&pixtm); if (ppixtb) *ppixtb = pixtb; else pixDestroy(&pixtb); return 0; }
main(int argc, char **argv) { l_int32 h; l_float32 scalefactor; BOX *box; BOXA *boxa1, *boxa2; BOXAA *baa; PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; lept_rmdir("segtest"); lept_mkdir("segtest"); baa = boxaaCreate(5); /* Image region input. */ pix1 = pixRead("wet-day.jpg"); pix2 = pixScaleToSize(pix1, WIDTH, 0); pixWrite("/tmp/segtest/0.jpg", pix2, IFF_JFIF_JPEG); regTestCheckFile(rp, "/tmp/segtest/0.jpg"); /* 0 */ box = boxCreate(105, 161, 620, 872); /* image region */ boxa1 = boxaCreate(1); boxaAddBox(boxa1, box, L_INSERT); boxaaAddBoxa(baa, boxa1, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); /* Compute image region at w = 2 * WIDTH */ pix1 = pixRead("candelabrum-11.jpg"); pix2 = pixScaleToSize(pix1, WIDTH, 0); pix3 = pixConvertTo1(pix2, 100); pix4 = pixExpandBinaryPower2(pix3, 2); /* w = 2 * WIDTH */ pix5 = pixGenHalftoneMask(pix4, NULL, NULL, 1); pix6 = pixMorphSequence(pix5, "c20.1 + c1.20", 0); pix7 = pixMaskConnComp(pix6, 8, &boxa1); pix8 = pixReduceBinary2(pix7, NULL); /* back to w = WIDTH */ pix9 = pixBackgroundNormSimple(pix2, pix8, NULL); pixWrite("/tmp/segtest/1.jpg", pix9, IFF_JFIF_JPEG); regTestCheckFile(rp, "/tmp/segtest/1.jpg"); /* 1 */ boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5); /* back to w = WIDTH */ boxaaAddBoxa(baa, boxa2, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); pixDestroy(&pix6); pixDestroy(&pix7); pixDestroy(&pix8); pixDestroy(&pix9); boxaDestroy(&boxa1); /* Use mask to find image region */ pix1 = pixRead("lion-page.00016.jpg"); pix2 = pixScaleToSize(pix1, WIDTH, 0); pixWrite("/tmp/segtest/2.jpg", pix2, IFF_JFIF_JPEG); regTestCheckFile(rp, "/tmp/segtest/2.jpg"); /* 2 */ pix3 = pixRead("lion-mask.00016.tif"); pix4 = pixScaleToSize(pix3, WIDTH, 0); boxa1 = pixConnComp(pix4, NULL, 8); boxaaAddBoxa(baa, boxa1, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); /* Compute image region at full res */ pix1 = pixRead("rabi.png"); scalefactor = (l_float32)WIDTH / (l_float32)pixGetWidth(pix1); pix2 = pixScaleToGray(pix1, scalefactor); pixWrite("/tmp/segtest/3.jpg", pix2, IFF_JFIF_JPEG); regTestCheckFile(rp, "/tmp/segtest/3.jpg"); /* 3 */ pix3 = pixGenHalftoneMask(pix1, NULL, NULL, 0); pix4 = pixMorphSequence(pix3, "c20.1 + c1.20", 0); boxa1 = pixConnComp(pix4, NULL, 8); boxa2 = boxaTransform(boxa1, 0, 0, scalefactor, scalefactor); boxaaAddBoxa(baa, boxa2, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); boxaDestroy(&boxa1); /* Page with no image regions */ pix1 = pixRead("lucasta-47.jpg"); pix2 = pixScaleToSize(pix1, WIDTH, 0); boxa1 = boxaCreate(1); pixWrite("/tmp/segtest/4.jpg", pix2, IFF_JFIF_JPEG); regTestCheckFile(rp, "/tmp/segtest/4.jpg"); /* 4 */ boxaaAddBoxa(baa, boxa1, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); /* Page that is all image */ pix1 = pixRead("map1.jpg"); pix2 = pixScaleToSize(pix1, WIDTH, 0); pixWrite("/tmp/segtest/5.jpg", pix2, IFF_JFIF_JPEG); regTestCheckFile(rp, "/tmp/segtest/5.jpg"); /* 5 */ h = pixGetHeight(pix2); box = boxCreate(0, 0, WIDTH, h); boxa1 = boxaCreate(1); boxaAddBox(boxa1, box, L_INSERT); boxaaAddBoxa(baa, boxa1, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); /* Save the boxaa file */ boxaaWrite("/tmp/segtest/seg.baa", baa); regTestCheckFile(rp, "/tmp/segtest/seg.baa"); /* 6 */ /* Do the conversion */ l_pdfSetDateAndVersion(FALSE); convertSegmentedFilesToPdf("/tmp/segtest", ".jpg", 100, L_G4_ENCODE, 140, baa, 75, 0.6, "Segmentation Test", "/tmp/pdfseg.7.pdf"); regTestCheckFile(rp, "/tmp/pdfseg.7.pdf"); /* 7 */ boxaaDestroy(&baa); return regTestCleanup(rp); }
main(int argc, char **argv) { char buffer[512]; char *tempfile1, *tempfile2; l_uint8 *data; l_int32 i, j, w, h, seq, ret, same; size_t nbytes; const char *title; BOX *box; BOXA *boxa1, *boxa2; L_BYTEA *ba; L_PDF_DATA *lpd; PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6; PIX *pixs, *pixt, *pixg, *pixgc, *pixc; static char mainName[] = "pdfiotest"; if (argc != 1) exit(ERROR_INT("syntax: pdfiotest", mainName, 1)); l_pdfSetDateAndVersion(0); #if 1 /* --------------- Single image tests ------------------- */ fprintf(stderr, "\n*** Writing single images as pdf files\n"); convertToPdf("weasel2.4c.png", L_FLATE_ENCODE, 0, "/tmp/pdffile01.pdf", 0, 0, 72, NULL, 0, "weasel2.4c.png"); convertToPdf("test24.jpg", L_JPEG_ENCODE, 0, "/tmp/pdffile02.pdf", 0, 0, 72, NULL, 0, "test24.jpg"); convertToPdf("feyn.tif", L_G4_ENCODE, 0, "/tmp/pdffile03.pdf", 0, 0, 300, NULL, 0, "feyn.tif"); pixs = pixRead("feyn.tif"); pixConvertToPdf(pixs, L_G4_ENCODE, 0, "/tmp/pdffile04.pdf", 0, 0, 300, NULL, 0, "feyn.tif"); pixDestroy(&pixs); pixs = pixRead("test24.jpg"); pixConvertToPdf(pixs, L_JPEG_ENCODE, 5, "/tmp/pdffile05.pdf", 0, 0, 72, NULL, 0, "test24.jpg"); pixDestroy(&pixs); pixs = pixRead("feyn.tif"); pixt = pixScaleToGray2(pixs); pixWrite("junkfeyn8.png", pixt, IFF_PNG); convertToPdf("junkfeyn8.png", L_JPEG_ENCODE, 0, "/tmp/pdffile06.pdf", 0, 0, 150, NULL, 0, "junkfeyn8.png"); pixDestroy(&pixs); pixDestroy(&pixt); convertToPdf("weasel4.16g.png", L_FLATE_ENCODE, 0, "/tmp/pdffile07.pdf", 0, 0, 30, NULL, 0, "weasel4.16g.png"); pixs = pixRead("test24.jpg"); pixg = pixConvertTo8(pixs, 0); box = boxCreate(100, 100, 100, 100); pixc = pixClipRectangle(pixs, box, NULL); pixgc = pixClipRectangle(pixg, box, NULL); pixWrite("junkpix32.jpg", pixc, IFF_JFIF_JPEG); pixWrite("junkpix8.jpg", pixgc, IFF_JFIF_JPEG); convertToPdf("junkpix32.jpg", L_FLATE_ENCODE, 0, "/tmp/pdffile08.pdf", 0, 0, 72, NULL, 0, "junkpix32.jpg"); convertToPdf("junkpix8.jpg", L_FLATE_ENCODE, 0, "/tmp/pdffile09.pdf", 0, 0, 72, NULL, 0, "junkpix8.jpg"); pixDestroy(&pixs); pixDestroy(&pixg); pixDestroy(&pixc); pixDestroy(&pixgc); boxDestroy(&box); #endif #if 1 /* --------------- Multiple image tests ------------------- */ fprintf(stderr, "\n*** Writing multiple images as single page pdf files\n"); pix1 = pixRead("feyn-fract.tif"); pix2 = pixRead("weasel8.240c.png"); /* l_pdfSetDateAndVersion(0); */ /* First, write the 1 bpp image through the mask onto the weasels */ for (i = 0; i < 5; i++) { for (j = 0; j < 10; j++) { seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE; title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL; pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j, 100 * i, 70, &lpd, seq, title); } } pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/pdffile10.pdf", 0, 0, 80, &lpd, L_LAST_IMAGE, NULL); /* Now, write the 1 bpp image over the weasels */ l_pdfSetG4ImageMask(0); for (i = 0; i < 5; i++) { for (j = 0; j < 10; j++) { seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE; title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL; pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j, 100 * i, 70, &lpd, seq, title); } } pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/pdffile11.pdf", 0, 0, 80, &lpd, L_LAST_IMAGE, NULL); l_pdfSetG4ImageMask(1); pixDestroy(&pix1); pixDestroy(&pix2); #endif #if 1 /* -------- pdf convert segmented with no image regions -------- */ fprintf(stderr, "\n*** Writing segmented images without image regions\n"); pix1 = pixRead("rabi.png"); pix2 = pixScaleToGray2(pix1); pixWrite("/tmp/rabi8.jpg", pix2, IFF_JFIF_JPEG); pix3 = pixThresholdTo4bpp(pix2, 16, 1); pixWrite("/tmp/rabi4.png", pix3, IFF_PNG); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* 1 bpp input */ convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile12.pdf"); convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile13.pdf"); convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile14.pdf"); /* 8 bpp input, no cmap */ convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_G4_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile15.pdf"); convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_JPEG_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile16.pdf"); convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_FLATE_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile17.pdf"); /* 4 bpp input, cmap */ convertToPdfSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile18.pdf"); convertToPdfSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile19.pdf"); convertToPdfSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, NULL, 0, 0, "/tmp/pdffile20.pdf"); #endif #if 1 /* ---------- pdf convert segmented with image regions ---------- */ fprintf(stderr, "\n*** Writing segmented images with image regions\n"); /* Get the image region(s) for rabi.png. There are two * small bogus regions at the top, but we'll keep them for * the demonstration. */ pix1 = pixRead("rabi.png"); pixSetResolution(pix1, 300, 300); pixGetDimensions(pix1, &w, &h, NULL); pix2 = pixGenHalftoneMask(pix1, NULL, NULL, 0); pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0); boxa1 = pixConnComp(pix3, NULL, 8); boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* 1 bpp input */ convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, boxa1, 0, 0.25, "/tmp/pdffile21.pdf"); convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, boxa1, 0, 0.25, "/tmp/pdffile22.pdf"); convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, boxa1, 0, 0.25, "/tmp/pdffile23.pdf"); /* 8 bpp input, no cmap */ convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_G4_ENCODE, 128, boxa2, 0, 0.5, "/tmp/pdffile24.pdf"); convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_JPEG_ENCODE, 128, boxa2, 0, 0.5, "/tmp/pdffile25.pdf"); convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_FLATE_ENCODE, 128, boxa2, 0, 0.5, "/tmp/pdffile26.pdf"); /* 4 bpp input, cmap */ convertToPdfSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, boxa2, 0, 0.5, "/tmp/pdffile27.pdf"); convertToPdfSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, boxa2, 0, 0.5, "/tmp/pdffile28.pdf"); convertToPdfSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, boxa2, 0, 0.5, "/tmp/pdffile29.pdf"); /* 4 bpp input, cmap, data output */ data = NULL; convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, boxa2, 0, 0.5, &data, &nbytes); l_binaryWrite("/tmp/pdffile30.pdf", "w", data, nbytes); lept_free(data); convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, boxa2, 0, 0.5, &data, &nbytes); l_binaryWrite("/tmp/pdffile31.pdf", "w", data, nbytes); lept_free(data); convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, boxa2, 0, 0.5, &data, &nbytes); l_binaryWrite("/tmp/pdffile32.pdf", "w", data, nbytes); lept_free(data); boxaDestroy(&boxa1); boxaDestroy(&boxa2); #endif #if 1 /* -------- pdf convert segmented from color image -------- */ fprintf(stderr, "\n*** Writing color segmented images\n"); pix1 = pixRead("candelabrum-11.jpg"); pix2 = pixScale(pix1, 3.0, 3.0); pixWrite("/tmp/candelabrum3.jpg", pix2, IFF_JFIF_JPEG); GetImageMask(pix2, 200, &boxa1, "/tmp/seg1.jpg"); convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_G4_ENCODE, 100, boxa1, 0, 0.25, "/tmp/pdffile33.pdf"); convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_JPEG_ENCODE, 100, boxa1, 0, 0.25, "/tmp/pdffile34.pdf"); convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_FLATE_ENCODE, 100, boxa1, 0, 0.25, "/tmp/pdffile35.pdf"); pixDestroy(&pix1); pixDestroy(&pix2); boxaDestroy(&boxa1); pix1 = pixRead("lion-page.00016.jpg"); pix2 = pixScale(pix1, 3.0, 3.0); pixWrite("/tmp/lion16.jpg", pix2, IFF_JFIF_JPEG); pix3 = pixRead("lion-mask.00016.tif"); boxa1 = pixConnComp(pix3, NULL, 8); boxa2 = boxaTransform(boxa1, 0, 0, 3.0, 3.0); convertToPdfSegmented("/tmp/lion16.jpg", 200, L_G4_ENCODE, 190, boxa2, 0, 0.5, "/tmp/pdffile36.pdf"); convertToPdfSegmented("/tmp/lion16.jpg", 200, L_JPEG_ENCODE, 190, boxa2, 0, 0.5, "/tmp/pdffile37.pdf"); convertToPdfSegmented("/tmp/lion16.jpg", 200, L_FLATE_ENCODE, 190, boxa2, 0, 0.5, "/tmp/pdffile38.pdf"); /* Quantize the non-image part and flate encode. * This is useful because it results in a smaller file than * when you flate-encode the un-quantized non-image regions. */ pix4 = pixScale(pix3, 3.0, 3.0); /* higher res mask, for combining */ pix5 = QuantizeNonImageRegion(pix2, pix4, 12); pixWrite("/tmp/lion16-quant.png", pix5, IFF_PNG); convertToPdfSegmented("/tmp/lion16-quant.png", 200, L_FLATE_ENCODE, 190, boxa2, 0, 0.5, "/tmp/pdffile39.pdf"); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); boxaDestroy(&boxa1); boxaDestroy(&boxa2); #endif #if 1 /* ------------------ Test multipage pdf generation ----------------- */ fprintf(stderr, "\n*** Writing multipage pdfs from single page pdfs\n"); /* Generate a multi-page pdf from all these files */ startTimer(); concatenatePdf("/tmp", "pdffile", "/tmp/cat_lept.pdf"); fprintf(stderr, "Time: %7.3f\n", stopTimer()); /* Put two good pdf files in a directory */ lept_mkdir("good"); lept_cp("testfile1.pdf", "/tmp/good"); lept_cp("testfile2.pdf", "/tmp/good"); concatenatePdf("/tmp/good", "file", "/tmp/good.pdf"); /* Make a version with the pdf id removed, so that it is not * recognized as a pdf */ ba = l_byteaInitFromFile("testfile2.pdf"); data = l_byteaGetData(ba, &nbytes); l_binaryWrite("testfile0.notpdf.pdf", "w", data + 10, nbytes - 10); /* Make a version with a corrupted trailer */ data[2297] = '2'; /* munge trailer object 6: change 458 --> 428 */ l_binaryWrite("testfile2.bad.pdf", "w", data, nbytes); /* Put these two bad files, along with a good file, in a directory */ lept_mkdir("bad"); lept_mv("testfile0.notpdf.pdf", "/tmp/bad"); lept_cp("testfile1.pdf", "/tmp/bad"); lept_mv("testfile2.bad.pdf", "/tmp/bad"); l_byteaDestroy(&ba); /* Run concat on the bad files. In the /tmp/bad/ directory, * the "not pdf" file should be ignored, and the corrupted pdf * file should be properly parsed, so the resulting * concatenated files should be identical. */ fprintf(stderr, "\nWe attempt to build from the bad directory\n"); concatenatePdf("/tmp/bad", "file", "/tmp/bad.pdf"); filesAreIdentical("/tmp/good.pdf", "/tmp/bad.pdf", &same); if (same) fprintf(stderr, "Fixed: files are the same\n" "Attempt succeeded\n\n"); else fprintf(stderr, "Busted: files are different\n"); /* pdftk fails because the first file is not a pdf */ fprintf(stderr, "pdftk attempts to build from the bad directory\n"); tempfile1 = genPathname("/tmp/bad", "*.pdf"); tempfile2 = genPathname("/tmp", "pdftk.bad.pdf"); snprintf(buffer, sizeof(buffer), "pdftk %s output %s", tempfile1, tempfile2); ret = system(buffer); lept_free(tempfile1); lept_free(tempfile2); fprintf(stderr, "Attempt failed\n\n"); #endif #if 1 fprintf(stderr, "\n*** pdftk writes multipage pdfs from images\n"); tempfile1 = genPathname("/tmp", "pdffile*.pdf"); tempfile2 = genPathname("/tmp", "cat_pdftk.pdf"); snprintf(buffer, sizeof(buffer), "pdftk %s output %s", tempfile1, tempfile2); ret = system(buffer); lept_free(tempfile1); lept_free(tempfile2); #endif #if 1 /* -- Test simple interface for generating multi-page pdf from images -- */ fprintf(stderr, "\n*** Writing multipage pdfs from images\n"); /* Put four image files in a directory. They will be encoded thus: * file1.png: flate (8 bpp, only 10 colors) * file2.jpg: dct (8 bpp, 256 colors because of the jpeg encoding) * file3.tif: g4 (1 bpp) * file4.jpg: dct (32 bpp) */ lept_mkdir("image"); pix1 = pixRead("feyn.tif"); pix2 = pixRead("rabi.png"); pix3 = pixScaleToGray3(pix1); pix4 = pixScaleToGray3(pix2); pix5 = pixScale(pix1, 0.33, 0.33); pix6 = pixRead("test24.jpg"); pixWrite("/tmp/image/file1.png", pix3, IFF_PNG); /* 10 colors */ pixWrite("/tmp/image/file2.jpg", pix4, IFF_JFIF_JPEG); /* 256 colors */ pixWrite("/tmp/image/file3.tif", pix5, IFF_TIFF_G4); pixWrite("/tmp/image/file4.jpg", pix6, IFF_JFIF_JPEG); startTimer(); convertFilesToPdf("/tmp/image", "file", 100, 0.8, 75, "4 file test", "/tmp/fourimages.pdf"); fprintf(stderr, "Time: %7.3f\n", stopTimer()); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); pixDestroy(&pix6); #endif return 0; }
// Finds image regions within the source pix (page image) and returns // the image regions as a Boxa, Pixa pair, analgous to pixConnComp. // The returned boxa, pixa may be NULL, meaning no images found. // If not NULL, they must be destroyed by the caller. void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) { *boxa = NULL; *pixa = NULL; #ifdef HAVE_LIBLEPT if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return; // Not worth looking at small images. // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); pixDisplayWrite(pixr, textord_tabfind_show_images); // Get the halftone mask directly from Leptonica. Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL, textord_tabfind_show_images); pixDestroy(&pixr); if (pixht2 == NULL) return; // Expand back up again. Pix *pixht = pixExpandReplicate(pixht2, 2); pixDisplayWrite(pixht, textord_tabfind_show_images); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8); pixOr(pixht, pixht, pixt); pixDestroy(&pixt); // Eliminate lines and bars that may be joined to images. Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); pixDisplayWrite(pixfinemask, textord_tabfind_show_images); Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); pixDestroy(&pixreduced); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); pixDestroy(&pixreduced2); pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images); // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); pixDestroy(&pixfinemask); // Dilate a bit to make sure we get everything. pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); pixDestroy(&pixcoarsemask); pixDisplayWrite(pixmask, textord_tabfind_show_images); // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); pixDestroy(&pixmask); pixDisplayWrite(pixht, textord_tabfind_show_images); // Find the individual image regions in the mask image. *boxa = pixConnComp(pixht, pixa, 8); pixDestroy(&pixht); // Rectangularize the individual images. If a sharp edge in vertical and/or // horizontal occupancy can be found, it indicates a probably rectangular // image with unwanted bits merged on, so clip to the approximate rectangle. int npixes = pixaGetCount(*pixa); for (int i = 0; i < npixes; ++i) { int x_start, x_end, y_start, y_end; Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); pixDisplayWrite(img_pix, textord_tabfind_show_images); if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { // Add 1 to the size as a kludgy flag to indicate to the later stages // of processing that it is a clipped rectangular image . Pix* simple_pix = pixCreate(pixGetWidth(img_pix) + 1, pixGetHeight(img_pix), 1); pixDestroy(&img_pix); pixRasterop(simple_pix, x_start, y_start, x_end - x_start, y_end - y_start, PIX_SET, NULL, 0, 0); // pixaReplacePix takes ownership of the simple_pix. pixaReplacePix(*pixa, i, simple_pix, NULL); img_pix = pixaGetPix(*pixa, i, L_CLONE); } // Subtract the pix from the correct location in the master image. l_int32 x, y, width, height; pixDisplayWrite(img_pix, textord_tabfind_show_images); boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); pixRasterop(pix, x, y, width, height, PIX_NOT(PIX_SRC) & PIX_DST, img_pix, 0, 0); pixDestroy(&img_pix); } #endif }