void
MakeWordBoxes2(PIX          *pixs,
               l_int32       reduction,
               L_REGPARAMS  *rp)
{
l_int32  default_minwidth = 10;
l_int32  default_minheight = 10;
l_int32  default_maxwidth = 400;
l_int32  default_maxheight = 70;
l_int32  minwidth, minheight, maxwidth, maxheight;
BOXA    *boxa1, *boxa2;
NUMA    *na;
PIX     *pixd1, *pixd2;
PIXA    *pixa;

    minwidth = default_minwidth / reduction;
    minheight = default_minheight / reduction;
    maxwidth = default_maxwidth / reduction;
    maxheight = default_maxheight / reduction;

        /* Get the word boxes */
    pixGetWordsInTextlines(pixs, reduction, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, &pixa, &na);
    pixaDestroy(&pixa);
    numaDestroy(&na);
    if (reduction == 1)
        boxa2 = boxaCopy(boxa1, L_CLONE);
    else
        boxa2 = boxaTransform(boxa1, 0, 0, 2.0, 2.0);
    pixd1 = pixConvertTo8(pixs, 1);
    pixRenderBoxaArb(pixd1, boxa2, 2, 255, 0, 0);
    regTestWritePixAndCheck(rp, pixd1, IFF_PNG);
    pixDisplayWithTitle(pixd1, 800, 100, NULL, rp->display);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Do it again with this interface.  The result should be the same. */
    pixGetWordBoxesInTextlines(pixs, reduction, minwidth, minheight,
                               maxwidth, maxheight, &boxa1, NULL);
    if (reduction == 1)
        boxa2 = boxaCopy(boxa1, L_CLONE);
    else
        boxa2 = boxaTransform(boxa1, 0, 0, 2.0, 2.0);
    pixd2 = pixConvertTo8(pixs, 1);
    pixRenderBoxaArb(pixd2, boxa2, 2, 255, 0, 0);
    if (regTestComparePix(rp, pixd1, pixd2)) {
        L_ERROR("pix not the same", "MakeWordBoxes2");
        pixDisplayWithTitle(pixd2, 800, 100, NULL, rp->display);
    }
    pixDestroy(&pixd1);
    pixDestroy(&pixd2);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    return;
}
Exemple #2
0
int main(int    argc,
         char **argv)
{
char        *filein, *fileout, *fontdir;
l_int32      w, h, width, sep;
l_float32    scalefact;
L_BMF       *bmf;
BOXA        *boxa1, *boxa2;
PIX         *pixd;
static char  mainName[] = "displayboxa";

    if (argc != 4 && argc != 5) {
        fprintf(stderr, "Syntax error in displayboxa:\n"
           "   displayboxa filein width fileout [fontdir]\n");
         return 1;
    }

    filein = argv[1];
    fileout = argv[3];
    fontdir = (argc == 4) ? NULL : argv[4];
    width = atoi(argv[2]);
    if (width < 30) {
        L_ERROR("width too small; setting to 100\n", mainName);
        width = 100;
    }

    if ((boxa1 = boxaRead(filein)) == NULL)
        return ERROR_INT("boxa not made", mainName, 1);
    boxaGetExtent(boxa1, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa2 = boxaTransform(boxa1, 0, 0, scalefact, scalefact);
    sep = L_MIN(width / 5, 20);
    pixd = boxaDisplayTiled(boxa2, NULL, 1500, 2, 1.0, 0, sep, 2, fontdir);
    pixWrite(fileout, pixd, IFF_PNG);
    pixDisplay(pixd, 100, 100);

    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    pixDestroy(&pixd);
    return 0;
}
Exemple #3
0
/*!
 * \brief   pixFindBaselines()
 *
 * \param[in]    pixs     1 bpp, 300 ppi
 * \param[out]   ppta     [optional] pairs of pts corresponding to
 *                        approx. ends of each text line
 * \param[in]    pixadb   for debug output; use NULL to skip
 * \return  na of baseline y values, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Input binary image must have text lines already aligned
 *          horizontally.  This can be done by either rotating the
 *          image with pixDeskew(), or, if a projective transform
 *          is required, by doing pixDeskewLocal() first.
 *      (2) Input null for &pta if you don't want this returned.
 *          The pta will come in pairs of points (left and right end
 *          of each baseline).
 *      (3) Caution: this will not work properly on text with multiple
 *          columns, where the lines are not aligned between columns.
 *          If there are multiple columns, they should be extracted
 *          separately before finding the baselines.
 *      (4) This function constructs different types of output
 *          for baselines; namely, a set of raster line values and
 *          a set of end points of each baseline.
 *      (5) This function was designed to handle short and long text lines
 *          without using dangerous thresholds on the peak heights.  It does
 *          this by combining the differential signal with a morphological
 *          analysis of the locations of the text lines.  One can also
 *          combine this data to normalize the peak heights, by weighting
 *          the differential signal in the region of each baseline
 *          by the inverse of the width of the text line found there.
 * </pre>
 */
NUMA *
pixFindBaselines(PIX   *pixs,
                 PTA  **ppta,
                 PIXA  *pixadb)
{
l_int32    h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
l_int32    imaxloc, peakthresh, zerothresh, inpeak;
l_int32    mintosearch, max, maxloc, nloc, locval;
l_int32   *array;
l_float32  maxval;
BOXA      *boxa1, *boxa2, *boxa3;
GPLOT     *gplot;
NUMA      *nasum, *nadiff, *naloc, *naval;
PIX       *pix1, *pix2;
PTA       *pta;

    PROCNAME("pixFindBaselines");

    if (ppta) *ppta = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);

        /* Close up the text characters, removing noise */
    pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0);

        /* Estimate the resolution */
    if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);

        /* Save the difference of adjacent row sums.
         * The high positive-going peaks are the baselines */
    if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) {
        pixDestroy(&pix1);
        return (NUMA *)ERROR_PTR("nasum not made", procName, NULL);
    }
    h = pixGetHeight(pixs);
    nadiff = numaCreate(h);
    numaGetIValue(nasum, 0, &val2);
    for (i = 0; i < h - 1; i++) {
        val1 = val2;
        numaGetIValue(nasum, i + 1, &val2);
        numaAddNumber(nadiff, val1 - val2);
    }
    numaDestroy(&nasum);

    if (pixadb) {  /* show the difference signal */
        lept_mkdir("lept/baseline");
        gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig");
        pix2 = pixRead("/tmp/lept/baseline/diff.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }

        /* Use the zeroes of the profile to locate each baseline. */
    array = numaGetIArray(nadiff);
    ndiff = numaGetCount(nadiff);
    numaGetMax(nadiff, &maxval, &imaxloc);
    numaDestroy(&nadiff);

        /* Use this to begin locating a new peak: */
    peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO;
        /* Use this to begin a region between peaks: */
    zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO;

    naloc = numaCreate(0);
    naval = numaCreate(0);
    inpeak = FALSE;
    for (i = 0; i < ndiff; i++) {
        if (inpeak == FALSE) {
            if (array[i] > peakthresh) {  /* transition to in-peak */
                inpeak = TRUE;
                mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros
                                               * between i and mintosearch */
                max = array[i];
                maxloc = i;
            }
        } else {  /* inpeak == TRUE; look for max */
            if (array[i] > max) {
                max = array[i];
                maxloc = i;
                mintosearch = i + MIN_DIST_IN_PEAK;
            } else if (i > mintosearch && array[i] <= zerothresh) {  /* leave */
                inpeak = FALSE;
                numaAddNumber(naval, max);
                numaAddNumber(naloc, maxloc);
            }
        }
    }
    LEPT_FREE(array);

        /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
    if (inpeak) {
        numaAddNumber(naval, max);
        numaAddNumber(naloc, maxloc);
    }

    if (pixadb) {  /* show the raster locations for the peaks */
        gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs",
                            "rasterline", "height");
        gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
        pix2 = pixRead("/tmp/lept/baseline/loc.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }
    numaDestroy(&naval);

        /* Generate an approximate profile of text line width.
         * First, filter the boxes of text, where there may be
         * more than one box for a given textline. */
    pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0);
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
    boxa1 = pixConnComp(pix2, NULL, 4);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    if (boxaGetCount(boxa1) == 0) {
        numaDestroy(&naloc);
        boxaDestroy(&boxa1);
        L_INFO("no compnents after filtering\n", procName);
        return NULL;
    }
    boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
    boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Optionally, find the baseline segments */
    pta = NULL;
    if (ppta) {
        pta = ptaCreate(0);
        *ppta = pta;
    }
    if (pta) {
      nloc = numaGetCount(naloc);
      nbox = boxaGetCount(boxa3);
      for (i = 0; i < nbox; i++) {
          boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
          for (j = 0; j < nloc; j++) {
              numaGetIValue(naloc, j, &locval);
              if (L_ABS(locval - (by + bh)) > 25)
                  continue;
              ptaAddPt(pta, bx, locval);
              ptaAddPt(pta, bx + bw, locval);
              break;
          }
      }
    }
    boxaDestroy(&boxa3);

    if (pixadb && pta) {  /* display baselines */
        l_int32  npts, x1, y1, x2, y2;
        pix1 = pixConvertTo32(pixs);
        npts = ptaGetCount(pta);
        for (i = 0; i < npts; i += 2) {
            ptaGetIPt(pta, i, &x1, &y1);
            ptaGetIPt(pta, i + 1, &x2, &y2);
            pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0);
        }
        pixWrite("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG);
        pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
        pixDestroy(&pix1);
    }

    return naloc;
}
Exemple #4
0
int main(int    argc,
         char **argv)
{
l_uint8     *data1, *data2;
l_int32      i, same, w, h, width, success, nba;
size_t       size1, size2;
l_float32    diffarea, diffxor, scalefact;
BOX         *box;
BOXA        *boxa1, *boxa2, *boxa3;
BOXAA       *baa1, *baa2, *baa3;
PIX         *pix1, *pixdb;
PIXA        *pixa1, *pixa2;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_mkdir("lept/boxa");

        /* Make a boxa and display its contents */
    boxa1 = boxaCreate(6);
    box = boxCreate(60, 60, 40, 20);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(120, 50, 20, 50);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(50, 140, 46, 60);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(166, 130, 64, 28);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(64, 224, 44, 34);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(117, 206, 26, 74);
    boxaAddBox(boxa1, box, L_INSERT);
    pix1 = DisplayBoxa(boxa1);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 0 */
    pixDisplayWithTitle(pix1, 0, 0, NULL, rp->display);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa1, 100, &same, &diffarea, &diffxor, NULL);
    regTestCompareValues(rp, 1, same, 0.0);  /* 1 */
    regTestCompareValues(rp, 0.0, diffarea, 0.0);  /* 2 */
    regTestCompareValues(rp, 0.0, diffxor, 0.0);  /* 3 */

    boxa2 = boxaTransform(boxa1, -13, -13, 1.0, 1.0);
    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, NULL);
    regTestCompareValues(rp, 1, same, 0.0);  /* 4 */
    regTestCompareValues(rp, 0.0, diffarea, 0.0);  /* 5 */
    regTestCompareValues(rp, 0.0, diffxor, 0.0);  /* 6 */
    boxaDestroy(&boxa2);

    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP_AND_BOT, 6,
                                       L_ADJUST_CHOOSE_MIN, 1.0, 0);
    pix1 = DisplayBoxa(boxa2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 7 */
    pixDisplayWithTitle(pix1, 200, 0, NULL, rp->display);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, &pixdb);
    regTestCompareValues(rp, 1, same, 0.0);  /* 8 */
    regTestCompareValues(rp, 0.053, diffarea, 0.002);  /* 9 */
    regTestCompareValues(rp, 0.240, diffxor, 0.002);  /* 10 */
    regTestWritePixAndCheck(rp, pixdb, IFF_PNG);  /* 11 */
    pixDisplayWithTitle(pixdb, 400, 0, NULL, rp->display);
    pixDestroy(&pixdb);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Input is a fairly clean boxa */
    boxa1 = boxaRead("boxa1.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 12 */
    pixDisplayWithTitle(pix1, 600, 0, NULL, rp->display);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

        /* Input is an unsmoothed and noisy boxa */
    boxa1 = boxaRead("boxa2.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 13 */
    pixDisplayWithTitle(pix1, 800, 0, NULL, rp->display);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

        /* Input is a boxa smoothed with a median window filter */
    boxa1 = boxaRead("boxa3.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 14 */
    pixDisplayWithTitle(pix1, 1000, 0, NULL, rp->display);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

        /* Test serialized boxa I/O to and from memory */
    data1 = l_binaryRead("boxa2.ba", &size1);
    boxa1 = boxaReadMem(data1, size1);
    boxaWriteMem(&data2, &size2, boxa1);
    boxa2 = boxaReadMem(data2, size2);
    boxaWrite("/tmp/lept/boxa/boxa1.ba", boxa1);
    boxaWrite("/tmp/lept/boxa/boxa2.ba", boxa2);
    filesAreIdentical("/tmp/lept/boxa/boxa1.ba", "/tmp/lept/boxa/boxa2.ba",
                      &same); 
    regTestCompareValues(rp, 1, same, 0.0);  /* 15 */
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    lept_free(data1);
    lept_free(data2);

        /* ----------- Test pixaDisplayBoxaa() ------------ */
    pixa1 = pixaReadBoth("showboxes.pac");
    baa1 = boxaaRead("showboxes1.baa");
    baa2 = boxaaTranspose(baa1);
    baa3 = boxaaTranspose(baa2);
    nba = boxaaGetCount(baa1);
    success = TRUE;
    for (i = 0; i < nba; i++) {
        boxa1 = boxaaGetBoxa(baa1, i, L_CLONE);
        boxa2 = boxaaGetBoxa(baa3, i, L_CLONE);
        boxaEqual(boxa1, boxa2, 0, NULL, &same);
        boxaDestroy(&boxa1);
        boxaDestroy(&boxa2);
        if (!same) success = FALSE;
    }
        /* Check that the transpose is reversible */
    regTestCompareValues(rp, 1, success, 0.0);  /* 16 */
    pixa2 = pixaDisplayBoxaa(pixa1, baa2, L_DRAW_RGB, 2);
    pix1 = pixaDisplayTiledInRows(pixa2, 32, 1400, 1.0, 0, 10, 0);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 17 */
    pixDisplayWithTitle(pix1, 0, 600, NULL, rp->display);
    fprintf(stderr, "Writing to: /tmp/lept/boxa/show.pdf\n");
    l_pdfSetDateAndVersion(FALSE);
    pixaConvertToPdf(pixa2, 75, 1.0, 0, 0, NULL, "/tmp/lept/boxa/show.pdf");
    regTestCheckFile(rp, "/tmp/lept/boxa/show.pdf");  /* 18 */
    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    pixaDestroy(&pixa2);
    boxaaDestroy(&baa1);
    boxaaDestroy(&baa2);
    boxaaDestroy(&baa3);

    return regTestCleanup(rp);
}
Exemple #5
0
int main(int    argc,
         char **argv)
{
char         buffer[512];
char        *tempfile1, *tempfile2;
l_uint8     *data;
l_int32      i, j, w, h, seq, ret, same;
size_t       nbytes;
const char  *title;
BOX         *box;
BOXA        *boxa1, *boxa2;
L_BYTEA     *ba;
L_PDF_DATA  *lpd;
PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6;
PIX         *pixs, *pixt, *pixg, *pixgc, *pixc;
static char  mainName[] = "pdfiotest";

    if (argc != 1)
        return ERROR_INT("syntax: pdfiotest", mainName, 1);
    l_pdfSetDateAndVersion(0);

    lept_mkdir("lept/pdf");

#if 1
    /* ---------------  Single image tests  ------------------- */
    fprintf(stderr, "\n*** Writing single images as pdf files\n");

    convertToPdf("weasel2.4c.png", L_FLATE_ENCODE, 0, "/tmp/lept/pdf/file01.pdf",
                 0, 0, 72, "weasel2.4c.png", NULL, 0);
    convertToPdf("test24.jpg", L_JPEG_ENCODE, 0, "/tmp/lept/pdf/file02.pdf",
                 0, 0, 72, "test24.jpg", NULL, 0);
    convertToPdf("feyn.tif", L_G4_ENCODE, 0, "/tmp/lept/pdf/file03.pdf",
                 0, 0, 300, "feyn.tif", NULL, 0);

    pixs = pixRead("feyn.tif");
    pixConvertToPdf(pixs, L_G4_ENCODE, 0, "/tmp/lept/pdf/file04.pdf", 0, 0, 300,
                    "feyn.tif", NULL, 0);
    pixDestroy(&pixs);

    pixs = pixRead("test24.jpg");
    pixConvertToPdf(pixs, L_JPEG_ENCODE, 5, "/tmp/lept/pdf/file05.pdf",
                    0, 0, 72, "test24.jpg", NULL, 0);
    pixDestroy(&pixs);

    pixs = pixRead("feyn.tif");
    pixt = pixScaleToGray2(pixs);
    pixWrite("/tmp/lept/pdf/feyn8.png", pixt, IFF_PNG);
    convertToPdf("/tmp/lept/pdf/feyn8.png", L_JPEG_ENCODE, 0,
                 "/tmp/lept/pdf/file06.pdf", 0, 0, 150, "feyn8.png", NULL, 0);
    pixDestroy(&pixs);
    pixDestroy(&pixt);

    convertToPdf("weasel4.16g.png", L_FLATE_ENCODE, 0,
                 "/tmp/lept/pdf/file07.pdf", 0, 0, 30,
                 "weasel4.16g.png", NULL, 0);

    pixs = pixRead("test24.jpg");
    pixg = pixConvertTo8(pixs, 0);
    box = boxCreate(100, 100, 100, 100);
    pixc = pixClipRectangle(pixs, box, NULL);
    pixgc = pixClipRectangle(pixg, box, NULL);
    pixWrite("/tmp/lept/pdf/pix32.jpg", pixc, IFF_JFIF_JPEG);
    pixWrite("/tmp/lept/pdf/pix8.jpg", pixgc, IFF_JFIF_JPEG);
    convertToPdf("/tmp/lept/pdf/pix32.jpg", L_FLATE_ENCODE, 0,
                 "/tmp/lept/pdf/file08.pdf", 0, 0, 72, "pix32.jpg", NULL, 0);
    convertToPdf("/tmp/lept/pdf/pix8.jpg", L_FLATE_ENCODE, 0,
                 "/tmp/lept/pdf/file09.pdf", 0, 0, 72, "pix8.jpg", NULL, 0);
    pixDestroy(&pixs);
    pixDestroy(&pixg);
    pixDestroy(&pixc);
    pixDestroy(&pixgc);
    boxDestroy(&box);
#endif


#if 1
    /* ---------------  Multiple image tests  ------------------- */
    fprintf(stderr, "\n*** Writing multiple images as single page pdf files\n");

    pix1 = pixRead("feyn-fract.tif");
    pix2 = pixRead("weasel8.240c.png");

/*    l_pdfSetDateAndVersion(0); */
        /* First, write the 1 bpp image through the mask onto the weasels */
    for (i = 0; i < 5; i++) {
        for (j = 0; j < 10; j++) {
            seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE;
            title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL;
            pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j,
                            100 * i, 70, title, &lpd, seq);
        }
    }
    pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/lept/pdf/file10.pdf", 0, 0, 80,
                    NULL, &lpd, L_LAST_IMAGE);

        /* Now, write the 1 bpp image over the weasels */
    l_pdfSetG4ImageMask(0);
    for (i = 0; i < 5; i++) {
        for (j = 0; j < 10; j++) {
            seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE;
            title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL;
            pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j,
                            100 * i, 70, title, &lpd, seq);
        }
    }
    pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/lept/pdf/file11.pdf", 0, 0, 80,
                    NULL, &lpd, L_LAST_IMAGE);
    l_pdfSetG4ImageMask(1);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
#endif

#if 1
    /* -------- pdf convert segmented with no image regions -------- */
    fprintf(stderr, "\n*** Writing segmented images without image regions\n");

    pix1 = pixRead("rabi.png");
    pix2 = pixScaleToGray2(pix1);
    pixWrite("/tmp/lept/pdf/rabi8.jpg", pix2, IFF_JFIF_JPEG);
    pix3 = pixThresholdTo4bpp(pix2, 16, 1);
    pixWrite("/tmp/lept/pdf/rabi4.png", pix3, IFF_PNG);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* 1 bpp input */
    convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, NULL, 0, 0,
                          NULL, "/tmp/lept/pdf/file12.pdf");
    convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, NULL, 0, 0,
                          NULL, "/tmp/lept/pdf/file13.pdf");
    convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, NULL, 0, 0,
                          NULL, "/tmp/lept/pdf/file14.pdf");

        /* 8 bpp input, no cmap */
    convertToPdfSegmented("/tmp/lept/pdf/rabi8.jpg", 150, L_G4_ENCODE, 128,
                          NULL, 0, 0, NULL, "/tmp/lept/pdf/file15.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi8.jpg", 150, L_JPEG_ENCODE, 128,
                          NULL, 0, 0, NULL, "/tmp/lept/pdf/file16.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi8.jpg", 150, L_FLATE_ENCODE, 128,
                          NULL, 0, 0, NULL, "/tmp/lept/pdf/file17.pdf");

        /* 4 bpp input, cmap */
    convertToPdfSegmented("/tmp/lept/pdf/rabi4.png", 150, L_G4_ENCODE, 128,
                          NULL, 0, 0, NULL, "/tmp/lept/pdf/file18.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi4.png", 150, L_JPEG_ENCODE, 128,
                          NULL, 0, 0, NULL, "/tmp/lept/pdf/file19.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi4.png", 150, L_FLATE_ENCODE, 128,
                          NULL, 0, 0, NULL, "/tmp/lept/pdf/file20.pdf");

#endif

#if 1
    /* ---------- pdf convert segmented with image regions ---------- */
    fprintf(stderr, "\n*** Writing segmented images with image regions\n");

        /* Get the image region(s) for rabi.png.  There are two
         * small bogus regions at the top, but we'll keep them for
         * the demonstration. */
    pix1 = pixRead("rabi.png");
    pixSetResolution(pix1, 300, 300);
    pixGetDimensions(pix1, &w, &h, NULL);
    pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, NULL);
    pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0);
    boxa1 = pixConnComp(pix3, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* 1 bpp input */
    convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, boxa1,
                          0, 0.25, NULL, "/tmp/lept/pdf/file21.pdf");
    convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, boxa1,
                          0, 0.25, NULL, "/tmp/lept/pdf/file22.pdf");
    convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, boxa1,
                          0, 0.25, NULL, "/tmp/lept/pdf/file23.pdf");

        /* 8 bpp input, no cmap */
    convertToPdfSegmented("/tmp/lept/pdf/rabi8.jpg", 150, L_G4_ENCODE, 128,
                          boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file24.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi8.jpg", 150, L_JPEG_ENCODE, 128,
                          boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file25.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi8.jpg", 150, L_FLATE_ENCODE, 128,
                          boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file26.pdf");

        /* 4 bpp input, cmap */
    convertToPdfSegmented("/tmp/lept/pdf/rabi4.png", 150, L_G4_ENCODE, 128,
                          boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file27.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi4.png", 150, L_JPEG_ENCODE, 128,
                          boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file28.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/rabi4.png", 150, L_FLATE_ENCODE, 128,
                          boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file29.pdf");

        /* 4 bpp input, cmap, data output */
    data = NULL;
    convertToPdfDataSegmented("/tmp/lept/pdf/rabi4.png", 150, L_G4_ENCODE,
                              128, boxa2, 0, 0.5, NULL, &data, &nbytes);
    l_binaryWrite("/tmp/lept/pdf/file30.pdf", "w", data, nbytes);
    lept_free(data);
    convertToPdfDataSegmented("/tmp/lept/pdf/rabi4.png", 150, L_JPEG_ENCODE,
                              128, boxa2, 0, 0.5, NULL, &data, &nbytes);
    l_binaryWrite("/tmp/lept/pdf/file31.pdf", "w", data, nbytes);
    lept_free(data);
    convertToPdfDataSegmented("/tmp/lept/pdf/rabi4.png", 150, L_FLATE_ENCODE,
                              128, boxa2, 0, 0.5, NULL, &data, &nbytes);
    l_binaryWrite("/tmp/lept/pdf/file32.pdf", "w", data, nbytes);
    lept_free(data);

    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
#endif


#if 1
    /* -------- pdf convert segmented from color image -------- */
    fprintf(stderr, "\n*** Writing color segmented images\n");

    pix1 = pixRead("candelabrum.011.jpg");
    pix2 = pixScale(pix1, 3.0, 3.0);
    pixWrite("/tmp/lept/pdf/candelabrum3.jpg", pix2, IFF_JFIF_JPEG);
    GetImageMask(pix2, 200, &boxa1, "/tmp/lept/pdf/seg1.jpg");
    convertToPdfSegmented("/tmp/lept/pdf/candelabrum3.jpg", 200, L_G4_ENCODE,
                          100, boxa1, 0, 0.25, NULL,
                          "/tmp/lept/pdf/file33.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/candelabrum3.jpg", 200, L_JPEG_ENCODE,
                          100, boxa1, 0, 0.25, NULL,
                          "/tmp/lept/pdf/file34.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/candelabrum3.jpg", 200, L_FLATE_ENCODE,
                          100, boxa1, 0, 0.25, NULL,
                          "/tmp/lept/pdf/file35.pdf");

    pixDestroy(&pix1);
    pixDestroy(&pix2);
    boxaDestroy(&boxa1);

    pix1 = pixRead("lion-page.00016.jpg");
    pix2 = pixScale(pix1, 3.0, 3.0);
    pixWrite("/tmp/lept/pdf/lion16.jpg", pix2, IFF_JFIF_JPEG);
    pix3 = pixRead("lion-mask.00016.tif");
    boxa1 = pixConnComp(pix3, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, 3.0, 3.0);
    convertToPdfSegmented("/tmp/lept/pdf/lion16.jpg", 200, L_G4_ENCODE,
                          190, boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file36.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/lion16.jpg", 200, L_JPEG_ENCODE,
                          190, boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file37.pdf");
    convertToPdfSegmented("/tmp/lept/pdf/lion16.jpg", 200, L_FLATE_ENCODE,
                          190, boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file38.pdf");

        /* Quantize the non-image part and flate encode.
         * This is useful because it results in a smaller file than
         * when you flate-encode the un-quantized non-image regions. */
    pix4 = pixScale(pix3, 3.0, 3.0);  /* higher res mask, for combining */
    pix5 = QuantizeNonImageRegion(pix2, pix4, 12);
    pixWrite("/tmp/lept/pdf/lion16-quant.png", pix5, IFF_PNG);
    convertToPdfSegmented("/tmp/lept/pdf/lion16-quant.png", 200, L_FLATE_ENCODE,
                          190, boxa2, 0, 0.5, NULL, "/tmp/lept/pdf/file39.pdf");

    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
#endif

#if 1
    /* ------------------ Test multipage pdf generation ----------------- */
    fprintf(stderr, "\n*** Writing multipage pdfs from single page pdfs\n");

        /* Generate a multi-page pdf from all these files */
    startTimer();
    concatenatePdf("/tmp/lept/pdf", "file", "/tmp/lept/pdf/cat_lept.pdf");
    fprintf(stderr,
            "All files have been concatenated: /tmp/lept/pdf/cat_lept.pdf\n"
                    "Concatenation time: %7.3f\n", stopTimer());
#endif

#if 1
    /* ----------- Test corruption recovery by concatenation ------------ */
        /* Put two good pdf files in a directory */
    lept_rmdir("lept/good");
    lept_mkdir("lept/good");
    lept_cp("testfile1.pdf", "lept/good", NULL, NULL);
    lept_cp("testfile2.pdf", "lept/good", NULL, NULL);
    concatenatePdf("/tmp/lept/good", "file", "/tmp/lept/pdf/good.pdf");

        /* Make a bad version with the pdf id removed, so that it is not
         * recognized as a pdf */
    lept_rmdir("lept/bad");
    lept_mkdir("lept/bad");
    ba = l_byteaInitFromFile("testfile2.pdf");
    data = l_byteaGetData(ba, &nbytes);
    l_binaryWrite("/tmp/lept/bad/testfile0.notpdf.pdf", "w",
                  data + 10, nbytes - 10);

        /* Make a version with a corrupted trailer */
    if (data)
        data[2297] = '2';  /* munge trailer object 6: change 458 --> 428 */
    l_binaryWrite("/tmp/lept/bad/testfile2.bad.pdf", "w", data, nbytes);
    l_byteaDestroy(&ba);

        /* Copy testfile1.pdf to the /tmp/lept/bad directory.  Then
         * run concat on the bad files.  The "not pdf" file should be
         * ignored, and the corrupted pdf file should be properly parsed,
         * so the resulting concatenated pdf files should be identical.  */
    fprintf(stderr, "\nWe attempt to build from the bad directory\n");
    lept_cp("testfile1.pdf", "lept/bad", NULL, NULL);
    concatenatePdf("/tmp/lept/bad", "file", "/tmp/lept/pdf/bad.pdf");
    filesAreIdentical("/tmp/lept/pdf/good.pdf", "/tmp/lept/pdf/bad.pdf", &same);
    if (same)
        fprintf(stderr, "Fixed: files are the same\n"
                        "Attempt succeeded\n");
    else
        fprintf(stderr, "Busted: files are different\n");
#endif

#if 0
    fprintf(stderr, "\n*** pdftk writes multipage pdfs from images\n");
    tempfile1 = genPathname("/tmp/lept/pdf", "file*.pdf");
    tempfile2 = genPathname("/tmp/lept/pdf", "cat_pdftk.pdf");
    snprintf(buffer, sizeof(buffer), "pdftk %s output %s",
             tempfile1, tempfile2);
    ret = system(buffer);  /* pdftk */
    lept_free(tempfile1);
    lept_free(tempfile2);
#endif

#if 1
    /* -- Test simple interface for generating multi-page pdf from images -- */
    fprintf(stderr, "\n*** Writing multipage pdfs from images\n");

        /* Put four image files in a directory.  They will be encoded thus:
         *     file1.png:  flate (8 bpp, only 10 colors)
         *     file2.jpg:  dct (8 bpp, 256 colors because of the jpeg encoding)
         *     file3.tif:  g4 (1 bpp)
         *     file4.jpg:  dct (32 bpp)    */
    lept_mkdir("lept/image");
    pix1 = pixRead("feyn.tif");
    pix2 = pixRead("rabi.png");
    pix3 = pixScaleToGray3(pix1);
    pix4 = pixScaleToGray3(pix2);
    pix5 = pixScale(pix1, 0.33, 0.33);
    pix6 = pixRead("test24.jpg");
    pixWrite("/tmp/lept/image/file1.png", pix3, IFF_PNG);  /* 10 colors */
    pixWrite("/tmp/lept/image/file2.jpg", pix4, IFF_JFIF_JPEG); /* 256 colors */
    pixWrite("/tmp/lept/image/file3.tif", pix5, IFF_TIFF_G4);
    pixWrite("/tmp/lept/image/file4.jpg", pix6, IFF_JFIF_JPEG);

    startTimer();
    convertFilesToPdf("/tmp/lept/image", "file", 100, 0.8, 0, 75, "4 file test",
                      "/tmp/lept/pdf/fourimages.pdf");
    fprintf(stderr, "4-page pdf generated: /tmp/lept/pdf/fourimages.pdf\n"
                    "Time: %7.3f\n", stopTimer());
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
#endif

    return 0;
}
Exemple #6
0
/*!
 *  pixSplitIntoCharacters()
 *
 *      Input:  pixs (1 bpp, contains only deskewed text)
 *              minw (minimum component width for initial filtering; typ. 4)
 *              minh (minimum component height for initial filtering; typ. 4)
 *              &boxa (<optional return> character bounding boxes)
 *              &pixa (<optional return> character images)
 *              &pixdebug (<optional return> showing splittings)
 *
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is a simple function that attempts to find split points
 *          based on vertical pixel profiles.
 *      (2) It should be given an image that has an arbitrary number
 *          of text characters.
 *      (3) The returned pixa includes the boxes from which the
 *          (possibly split) components are extracted.
 */
l_int32
pixSplitIntoCharacters(PIX     *pixs,
                       l_int32  minw,
                       l_int32  minh,
                       BOXA   **pboxa,
                       PIXA   **ppixa,
                       PIX    **ppixdebug)
{
l_int32  ncomp, i, xoff, yoff;
BOXA   *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
BOXAA  *baa;
PIX    *pix, *pix1, *pix2, *pixdb;
PIXA   *pixa1, *pixadb;

    PROCNAME("pixSplitIntoCharacters");

    if (pboxa) *pboxa = NULL;
    if (ppixa) *ppixa = NULL;
    if (ppixdebug) *ppixdebug = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);

        /* Remove the small stuff */
    pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
                           L_SELECT_IF_GT, NULL);

        /* Small vertical close for consolidation */
    pix2 = pixMorphSequence(pix1, "c1.10", 0);
    pixDestroy(&pix1);

        /* Get the 8-connected components */
    boxa1 = pixConnComp(pix2, &pixa1, 8);
    pixDestroy(&pix2);
    boxaDestroy(&boxa1);

        /* Split the components if obvious */
    ncomp = pixaGetCount(pixa1);
    boxa2 = boxaCreate(ncomp);
    pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
    for (i = 0; i < ncomp; i++) {
        pix = pixaGetPix(pixa1, i, L_CLONE);
        if (ppixdebug) {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
            if (pixdb)
                pixaAddPix(pixadb, pixdb, L_INSERT);
        } else {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
        }
        pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
        boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
        boxaJoin(boxa2, boxat2, 0, -1);
        pixDestroy(&pix);
        boxaDestroy(&boxat1);
        boxaDestroy(&boxat2);
    }
    pixaDestroy(&pixa1);

        /* Generate the debug image */
    if (ppixdebug) {
        if (pixaGetCount(pixadb) > 0) {
            *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
                                                1.0, 0, 20, 1);
        }
        pixaDestroy(&pixadb);
    }

        /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
    baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
    boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
    boxaaDestroy(&baa);
    boxaDestroy(&boxa2);

        /* Optionally extract the pieces from the input image */
    if (ppixa)
        *ppixa = pixClipRectangles(pixs, boxad);
    if (pboxa)
        *pboxa = boxad;
    else
        boxaDestroy(&boxad);
    return 0;
}
Exemple #7
0
main(int    argc,
     char **argv)
{
l_int32      h;
l_float32    scalefactor;
BOX         *box;
BOXA        *boxa1, *boxa2;
BOXAA       *baa;
PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_rmdir("segtest");
    lept_mkdir("segtest");
    baa = boxaaCreate(5);

        /* Image region input.  */
    pix1 = pixRead("wet-day.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/0.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/0.jpg");   /* 0 */
    box = boxCreate(105, 161, 620, 872);   /* image region */
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Compute image region at w = 2 * WIDTH */
    pix1 = pixRead("candelabrum-11.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pix3 = pixConvertTo1(pix2, 100);
    pix4 = pixExpandBinaryPower2(pix3, 2);  /* w = 2 * WIDTH */
    pix5 = pixGenHalftoneMask(pix4, NULL, NULL, 1);
    pix6 = pixMorphSequence(pix5, "c20.1 + c1.20", 0);
    pix7 = pixMaskConnComp(pix6, 8, &boxa1);
    pix8 = pixReduceBinary2(pix7, NULL);  /* back to w = WIDTH */
    pix9 = pixBackgroundNormSimple(pix2, pix8, NULL);
    pixWrite("/tmp/segtest/1.jpg", pix9, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/1.jpg");   /* 1 */
    boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);  /* back to w = WIDTH */
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
    pixDestroy(&pix7);
    pixDestroy(&pix8);
    pixDestroy(&pix9);
    boxaDestroy(&boxa1);

        /* Use mask to find image region */
    pix1 = pixRead("lion-page.00016.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/2.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/2.jpg");   /* 2 */
    pix3 = pixRead("lion-mask.00016.tif");
    pix4 = pixScaleToSize(pix3, WIDTH, 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);

        /* Compute image region at full res */
    pix1 = pixRead("rabi.png");
    scalefactor = (l_float32)WIDTH / (l_float32)pixGetWidth(pix1);
    pix2 = pixScaleToGray(pix1, scalefactor);
    pixWrite("/tmp/segtest/3.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/3.jpg");   /* 3 */
    pix3 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
    pix4 = pixMorphSequence(pix3, "c20.1 + c1.20", 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, scalefactor, scalefactor);
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    boxaDestroy(&boxa1);

        /* Page with no image regions */
    pix1 = pixRead("lucasta-47.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    boxa1 = boxaCreate(1);
    pixWrite("/tmp/segtest/4.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/4.jpg");   /* 4 */
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Page that is all image */
    pix1 = pixRead("map1.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/5.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/5.jpg");   /* 5 */
    h = pixGetHeight(pix2);
    box = boxCreate(0, 0, WIDTH, h);
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Save the boxaa file */
    boxaaWrite("/tmp/segtest/seg.baa", baa);
    regTestCheckFile(rp, "/tmp/segtest/seg.baa");   /* 6 */

        /* Do the conversion */
    l_pdfSetDateAndVersion(FALSE);
    convertSegmentedFilesToPdf("/tmp/segtest", ".jpg", 100, L_G4_ENCODE,
                               140, baa, 75, 0.6, "Segmentation Test",
                               "/tmp/pdfseg.7.pdf");
    regTestCheckFile(rp, "/tmp/pdfseg.7.pdf");   /* 7 */

    boxaaDestroy(&baa);
    return regTestCleanup(rp);
}
Exemple #8
0
void static
TestBoxa(L_REGPARAMS  *rp,
         l_int32       index)
{
l_uint8   *data;
l_int32    w, h, medw, medh, isame;
size_t     size;
l_float32  scalefact, devw, devh, ratiowh, fvarp, fvarm;
BOXA      *boxa1, *boxa2, *boxa3;
PIX       *pix1;

    boxa1 = boxaRead(boxafiles[index]);

        /* Read and display initial boxa */
    boxaGetExtent(boxa1, &w, &h, NULL);
    scalefact = 100.0 / (l_float32)w;
    boxa2 = boxaTransform(boxa1, 0, 0, scalefact, scalefact);
    boxaWriteMem(&data, &size, boxa2);
    regTestWriteDataAndCheck(rp, data, size, "ba");  /* 0, 13, 26 */
    lept_free(data);
    pix1 = boxaDisplayTiled(boxa2, NULL, 0, -1, 2200, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);   /* 1, 14, 27 */
    pixDisplayWithTitle(pix1, 0, 0, NULL, rp->display);
    pixDestroy(&pix1);

        /* Find the median sizes */
    boxaMedianDimensions(boxa2, &medw, &medh, NULL, NULL, NULL, NULL,
                         NULL, NULL);
    if (rp->display)
        fprintf(stderr, "median width = %d, median height = %d\n", medw, medh);

        /* Check for deviations from median by pairs: method 1 */
    boxaSizeConsistency1(boxa2, L_CHECK_HEIGHT, 0.0, 0.0,
                         &fvarp, &fvarm, &isame);
    regTestCompareValues(rp, varp[index], fvarp, 0.003);  /* 2, 15, 28 */
    regTestCompareValues(rp, varm[index], fvarm, 0.003);  /* 3, 16, 29 */
    regTestCompareValues(rp, same[index], isame, 0);  /* 4, 17, 30 */
    if (rp->display)
        fprintf(stderr, "fvarp = %7.4f, fvarm = %7.4f, same = %d\n",
                fvarp, fvarm, isame);

        /* Check for deviations from median by pairs: method 2 */
    boxaSizeConsistency2(boxa2, &devw, &devh, 0);
    regTestCompareValues(rp, devwidth[index], devw, 0.001);  /* 5, 18, 31 */
    regTestCompareValues(rp, devheight[index], devh, 0.001);  /* 6, 19, 32 */
    if (rp->display)
        fprintf(stderr, "dev width = %7.4f, dev height = %7.4f\n", devw, devh);

        /* Reconcile widths */
    boxa3 = boxaReconcileSizeByMedian(boxa2, L_CHECK_WIDTH, 0.05, 0.04, 1.03,
                                      NULL, NULL, &ratiowh);
    boxaWriteMem(&data, &size, boxa3);
    regTestWriteDataAndCheck(rp, data, size, "ba");  /* 7, 20, 33 */
    lept_free(data);
    pix1 = boxaDisplayTiled(boxa3, NULL, 0, -1, 2200, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);   /* 8, 21, 34 */
    pixDisplayWithTitle(pix1, 500, 0, NULL, rp->display);
    if (rp->display)
        fprintf(stderr, "ratio median width/height = %6.3f\n", ratiowh);
    boxaDestroy(&boxa3);
    pixDestroy(&pix1);

        /* Reconcile heights */
    boxa3 = boxaReconcileSizeByMedian(boxa2, L_CHECK_HEIGHT, 0.05, 0.04, 1.03,
                                      NULL, NULL, NULL);
    boxaWriteMem(&data, &size, boxa3);
    regTestWriteDataAndCheck(rp, data, size, "ba");  /* 9, 22, 35 */
    lept_free(data);
    pix1 = boxaDisplayTiled(boxa3, NULL, 0, -1, 2200, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);   /* 10, 23, 36 */
    pixDisplayWithTitle(pix1, 1000, 0, NULL, rp->display);
    boxaDestroy(&boxa3);
    pixDestroy(&pix1);

        /* Reconcile both widths and heights */
    boxa3 = boxaReconcileSizeByMedian(boxa2, L_CHECK_BOTH, 0.05, 0.04, 1.03,
                                      NULL, NULL, NULL);
    boxaWriteMem(&data, &size, boxa3);
    regTestWriteDataAndCheck(rp, data, size, "ba");  /* 11, 24, 37 */
    lept_free(data);
    pix1 = boxaDisplayTiled(boxa3, NULL, 0, -1, 2200, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);   /* 12, 25, 38 */
    pixDisplayWithTitle(pix1, 1500, 0, NULL, rp->display);
    boxaDestroy(&boxa3);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);
}
Exemple #9
0
int main(int    argc,
         char **argv)
{
    l_uint8     *data1, *data2;
    l_int32      i, same, w, h, width, success, nba;
    size_t       size1, size2;
    l_float32    diffarea, diffxor, scalefact;
    BOX         *box;
    BOXA        *boxa1, *boxa2, *boxa3;
    BOXAA       *baa1, *baa2, *baa3;
    PIX         *pix1, *pixdb;
    PIXA        *pixa1, *pixa2;
    static char  mainName[] = "boxa1_reg";

    if (argc != 1)
        return ERROR_INT(" Syntax: boxa1_reg", mainName, 1);

    lept_mkdir("lept/boxa");

    /* Make a boxa and display its contents */
    boxa1 = boxaCreate(6);
    box = boxCreate(60, 60, 40, 20);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(120, 50, 20, 50);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(50, 140, 46, 60);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(166, 130, 64, 28);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(64, 224, 44, 34);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(117, 206, 26, 74);
    boxaAddBox(boxa1, box, L_INSERT);
    pix1 = DisplayBoxa(boxa1);
    pixDisplay(pix1, 100, 100);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa1, 100, &same, &diffarea, &diffxor, NULL);
    fprintf(stderr, "same = %d, diffarea = %5.3f, diffxor = %5.3f\n",
            same, diffarea, diffxor);

    boxa2 = boxaTransform(boxa1, -13, -13, 1.0, 1.0);
    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, NULL);
    fprintf(stderr, "same = %d, diffarea = %5.3f, diffxor = %5.3f\n",
            same, diffarea, diffxor);
    boxaDestroy(&boxa2);

    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP_AND_BOT, 6,
                                       L_ADJUST_CHOOSE_MIN, 1.0, 0);
    pix1 = DisplayBoxa(boxa2);
    pixDisplay(pix1, 100, 500);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, &pixdb);
    fprintf(stderr, "same = %d, diffarea = %5.3f, diffxor = %5.3f\n",
            same, diffarea, diffxor);
    pixDisplay(pixdb, 700, 100);

    pixDestroy(&pixdb);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

    /* Input is a fairly clean boxa */
    boxa1 = boxaRead("boxa1.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    pixDisplay(pix1, 0, 100);
    pixWrite("/tmp/lept/boxa/pix1.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

    /* Input is an unsmoothed and noisy boxa */
    boxa1 = boxaRead("boxa2.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    pixDisplay(pix1, 500, 100);
    pixWrite("/tmp/lept/boxa/pix2.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

    /* Input is a boxa smoothed with a median window filter */
    boxa1 = boxaRead("boxa3.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    pixDisplay(pix1, 1000, 100);
    pixWrite("/tmp/lept/boxa/pix3.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

    /* Test serialized boxa I/O to and from memory */
    data1 = l_binaryRead("boxa2.ba", &size1);
    boxa1 = boxaReadMem(data1, size1);
    boxaWriteMem(&data2, &size2, boxa1);
    boxa2 = boxaReadMem(data2, size2);
    boxaWrite("/tmp/lept/boxa/boxa1.ba", boxa1);
    boxaWrite("/tmp/lept/boxa/boxa2.ba", boxa2);
    filesAreIdentical("/tmp/lept/boxa/boxa1.ba", "/tmp/lept/boxa/boxa2.ba",
                      &same);
    if (same)
        fprintf(stderr, "Good: boxes files are identical\n");
    else
        fprintf(stderr, "Bad: boxes files differ\n");
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    lept_free(data1);
    lept_free(data2);

    /* Test pixaDisplayBoxaa() */
    pixa1 = pixaReadBoth("showboxes.pac");
    baa1 = boxaaRead("showboxes1.baa");
    baa2 = boxaaTranspose(baa1);
    baa3 = boxaaTranspose(baa2);
    nba = boxaaGetCount(baa1);
    success = TRUE;
    for (i = 0; i < nba; i++) {
        boxa1 = boxaaGetBoxa(baa1, i, L_CLONE);
        boxa2 = boxaaGetBoxa(baa3, i, L_CLONE);
        boxaEqual(boxa1, boxa2, 0, NULL, &same);
        boxaDestroy(&boxa1);
        boxaDestroy(&boxa2);
        if (!same) success = FALSE;
    }
    if (success)
        fprintf(stderr, "Good: transpose is reversible\n");
    else
        fprintf(stderr, "Bad: transpose failed\n");
    pixa2 = pixaDisplayBoxaa(pixa1, baa2, L_DRAW_RGB, 2);
    pix1 = pixaDisplayTiledInRows(pixa2, 32, 1400, 1.0, 0, 10, 0);
    pixDisplay(pix1, 0, 600);
    fprintf(stderr, "Writing to: /tmp/lept/boxa/show.pdf\n");
    pixaConvertToPdf(pixa2, 75, 1.0, 0, 0, NULL, "/tmp/lept/boxa/show.pdf");
    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    pixaDestroy(&pixa2);
    boxaaDestroy(&baa1);
    boxaaDestroy(&baa2);
    boxaaDestroy(&baa3);

    return 0;
}