void
MakeWordBoxes2(PIX          *pixs,
               l_int32       reduction,
               L_REGPARAMS  *rp)
{
l_int32  default_minwidth = 10;
l_int32  default_minheight = 10;
l_int32  default_maxwidth = 400;
l_int32  default_maxheight = 70;
l_int32  minwidth, minheight, maxwidth, maxheight;
BOXA    *boxa1, *boxa2;
NUMA    *na;
PIX     *pixd1, *pixd2;
PIXA    *pixa;

    minwidth = default_minwidth / reduction;
    minheight = default_minheight / reduction;
    maxwidth = default_maxwidth / reduction;
    maxheight = default_maxheight / reduction;

        /* Get the word boxes */
    pixGetWordsInTextlines(pixs, reduction, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, &pixa, &na);
    pixaDestroy(&pixa);
    numaDestroy(&na);
    if (reduction == 1)
        boxa2 = boxaCopy(boxa1, L_CLONE);
    else
        boxa2 = boxaTransform(boxa1, 0, 0, 2.0, 2.0);
    pixd1 = pixConvertTo8(pixs, 1);
    pixRenderBoxaArb(pixd1, boxa2, 2, 255, 0, 0);
    regTestWritePixAndCheck(rp, pixd1, IFF_PNG);
    pixDisplayWithTitle(pixd1, 800, 100, NULL, rp->display);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Do it again with this interface.  The result should be the same. */
    pixGetWordBoxesInTextlines(pixs, reduction, minwidth, minheight,
                               maxwidth, maxheight, &boxa1, NULL);
    if (reduction == 1)
        boxa2 = boxaCopy(boxa1, L_CLONE);
    else
        boxa2 = boxaTransform(boxa1, 0, 0, 2.0, 2.0);
    pixd2 = pixConvertTo8(pixs, 1);
    pixRenderBoxaArb(pixd2, boxa2, 2, 255, 0, 0);
    if (regTestComparePix(rp, pixd1, pixd2)) {
        L_ERROR("pix not the same", "MakeWordBoxes2");
        pixDisplayWithTitle(pixd2, 800, 100, NULL, rp->display);
    }
    pixDestroy(&pixd1);
    pixDestroy(&pixd2);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    return;
}
Exemple #2
0
int main(int    argc,
         char **argv)
{
char         filename[BUF_SIZE];
char        *dirin, *rootname, *fname;
l_int32      i, j, w, h, firstpage, npages, nfiles, ncomp;
l_int32      index, ival, rval, gval, bval;
BOX         *box;
BOXA        *boxa;
BOXAA       *baa;
JBDATA      *data;
JBCLASSER   *classer;
NUMA        *nai;
NUMAA       *naa;
SARRAY      *safiles;
PIX         *pixs, *pixt1, *pixt2, *pixd;
PIXCMAP     *cmap;
static char  mainName[] = "wordsinorder";

    if (argc != 3 && argc != 5)
        return ERROR_INT(
            " Syntax: wordsinorder dirin rootname [firstpage, npages]",
            mainName, 1);

    dirin = argv[1];
    rootname = argv[2];

    if (argc == 3) {
        firstpage = 0;
        npages = 0;
    }
    else {
        firstpage = atoi(argv[3]);
        npages = atoi(argv[4]);
    }

        /* Compute the word bounding boxes at 2x reduction, along with
         * the textlines that they are in. */
    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);
    baa = boxaaCreate(nfiles);
    naa = numaaCreate(nfiles);
    for (i = 0; i < nfiles; i++) {
        fname = sarrayGetString(safiles, i, 0);
        if ((pixs = pixRead(fname)) == NULL) {
            L_WARNING("image file %d not read\n", mainName, i);
            continue;
        }
        pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT,
                                   MAX_WORD_WIDTH, MAX_WORD_HEIGHT,
                                   &boxa, &nai);
        boxaaAddBoxa(baa, boxa, L_INSERT);
        numaaAddNuma(naa, nai, L_INSERT);

#if  RENDER_PAGES
            /* Show the results on a 2x reduced image, where each
             * word is outlined and the color of the box depends on the
             * computed textline. */
        pixt1 = pixReduceRankBinary2(pixs, 2, NULL);
        pixGetDimensions(pixt1, &w, &h, NULL);
        pixd = pixCreate(w, h, 8);
        cmap = pixcmapCreateRandom(8, 1, 1);  /* first color is black */
        pixSetColormap(pixd, cmap);

        pixt2 = pixUnpackBinary(pixt1, 8, 1);
        pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0);
        ncomp = boxaGetCount(boxa);
        for (j = 0; j < ncomp; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            numaGetIValue(nai, j, &ival);
            index = 1 + (ival % 254);  /* omit black and white */
            pixcmapGetColor(cmap, index, &rval, &gval, &bval);
            pixRenderBoxArb(pixd, box, 2, rval, gval, bval);
            boxDestroy(&box);
        }

        snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
        fprintf(stderr, "filename: %s\n", filename);
        pixWrite(filename, pixd, IFF_PNG);
        pixDestroy(&pixt1);
        pixDestroy(&pixt2);
        pixDestroy(&pixs);
        pixDestroy(&pixd);
#endif  /* RENDER_PAGES */
    }

    boxaaDestroy(&baa);
    numaaDestroy(&naa);
    sarrayDestroy(&safiles);
    return 0;
}
Exemple #3
0
l_int32 main(int    argc,
             char **argv)
{
char        buf[64];
BOXA       *boxa1, *boxa2, *boxa3, *boxa4;
L_DEWARP   *dew;
L_DEWARPA  *dewa;
PIX        *pixs, *pixn, *pixg, *pixb, *pix2, *pix3, *pix4, *pix5, *pix6;

    lept_mkdir("lept");

    snprintf(buf, sizeof(buf), "cat.%03d.jpg", pageno);
    pixs = pixRead(buf);
    dewa = dewarpaCreate(40, 30, 1, 15, 10);
    dewarpaUseBothArrays(dewa, 1);

        /* Normalize for varying background and binarize */
    pixn = pixBackgroundNormSimple(pixs, NULL, NULL);
    pixg = pixConvertRGBToGray(pixn, 0.5, 0.3, 0.2);
    pixb = pixThresholdToBinary(pixg, 130);
    pixDisplay(pixb, 0, 100);

        /* Build the model */
    dew = dewarpCreate(pixb, pageno);
    dewarpaInsertDewarp(dewa, dew);
    if (build_output) {
        snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_build_%d.pdf", pageno);
        dewarpBuildPageModel(dew, buf);
    } else {
        dewarpBuildPageModel(dew, NULL);
    }

        /* Apply the model */
    dewarpPopulateFullRes(dew, pixg, 0, 0);
    if (apply_output) {
        snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_apply_%d.pdf", pageno);
        dewarpaApplyDisparity(dewa, pageno, pixb, 200, 0, 0, &pix2, buf);
    } else {
        dewarpaApplyDisparity(dewa, pageno, pixb, 200, 0, 0, &pix2, NULL);
    }
    pixDisplay(pix2, 200, 100);

        /* Reverse direction: get the word boxes for the dewarped pix ... */
    pixGetWordBoxesInTextlines(pix2, 5, 5, 500, 100, &boxa1, NULL);
    pix3 = pixConvertTo32(pix2);
    pixRenderBoxaArb(pix3, boxa1, 2, 255, 0, 0);
    pixDisplay(pix3, 400, 100);

        /* ... and map to the word boxes for the input image */
    if (map_output) {
        snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_map1_%d.pdf", pageno);
        dewarpaApplyDisparityBoxa(dewa, pageno, pix2, boxa1, 0, 0, 0, &boxa2,
                                  buf);
    } else {
        dewarpaApplyDisparityBoxa(dewa, pageno, pix2, boxa1, 0, 0, 0, &boxa2,
                                  NULL);
    }
    pix4 = pixConvertTo32(pixb);
    pixRenderBoxaArb(pix4, boxa2, 2, 0, 255, 0);
    pixDisplay(pix4, 600, 100);

        /* Forward direction: get the word boxes for the input pix ... */
    pixGetWordBoxesInTextlines(pixb, 5, 5, 500, 100, &boxa3, NULL);
    pix5 = pixConvertTo32(pixb);
    pixRenderBoxaArb(pix5, boxa3, 2, 255, 0, 0);
    pixDisplay(pix5, 800, 100);

        /* ... and map to the word boxes for the dewarped image */
    if (map_output) {
        snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_map2_%d.pdf", pageno);
        dewarpaApplyDisparityBoxa(dewa, pageno, pixb, boxa3, 1, 0, 0, &boxa4,
                                  buf);
    } else {
        dewarpaApplyDisparityBoxa(dewa, pageno, pixb, boxa3, 1, 0, 0, &boxa4,
                                  NULL);
    }
    pix6 = pixConvertTo32(pix2);
    pixRenderBoxaArb(pix6, boxa4, 2, 0, 255, 0);
    pixDisplay(pix6, 1000, 100);

    dewarpaDestroy(&dewa);
    pixDestroy(&pixs);
    pixDestroy(&pixn);
    pixDestroy(&pixg);
    pixDestroy(&pixb);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);
    boxaDestroy(&boxa4);
    return 0;
}
main(int    argc,
     char **argv)
{
l_int32  i, n, w, h, success, display;
FILE    *fp;
BOXA    *boxa;
NUMA    *naindex, *naw, *nah, *naw_med, *nah_med;
PIX     *pixs, *pixt, *pixd;

    if (regTestSetup(argc, argv, &fp, &display, &success, NULL))
        return 1;

        /* Generate arrays of word widths and heights */
    pixs = pixRead("feyn.tif");
    pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex);
    n = boxaGetCount(boxa);
    naw = numaCreate(0);
    nah = numaCreate(0);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        numaAddNumber(naw, w);
        numaAddNumber(nah, h);
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Make the rank bin arrays of median values, with 10 bins */
    numaGetRankBinValues(naw, 10, NULL, &naw_med);
    numaGetRankBinValues(nah, 10, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_10bin", 
                 "width vs rank bins (10)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_10bin", 
                 "height vs rank bins (10)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Make the rank bin arrays of median values, with 30 bins */
    numaGetRankBinValues(naw, 30, NULL, &naw_med);
    numaGetRankBinValues(nah, 30, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_30bin", 
                 "width vs rank bins (30)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_30bin", 
                 "height vs rank bins (30)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Give gnuplot time to write out the files */
#ifndef  _WIN32
    sleep(2);
#else
    Sleep(2000);
#endif  /* _WIN32 */

        /* Save as golden files, or check against them */
    regTestCheckFile(fp, argv, "/tmp/w_10bin.png", 0, &success);
    regTestCheckFile(fp, argv, "/tmp/h_10bin.png", 1, &success);
    regTestCheckFile(fp, argv, "/tmp/w_30bin.png", 2, &success);
    regTestCheckFile(fp, argv, "/tmp/h_30bin.png", 3, &success);

        /* Display results for debugging */
    pixt = pixRead("/tmp/w_10bin.png");
    pixDisplayWithTitle(pixt, 0, 0, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/h_10bin.png");
    pixDisplayWithTitle(pixt, 650, 0, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/w_30bin.png");
    pixDisplayWithTitle(pixt, 0, 550, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/h_30bin.png");
    pixDisplayWithTitle(pixt, 650, 550, NULL, display);
    pixDestroy(&pixt);

    pixDestroy(&pixs);
    numaDestroy(&naw);
    numaDestroy(&nah);
    regTestCleanup(argc, argv, fp, success, NULL);
    return 0;
}
Exemple #5
0
int main(int    argc,
         char **argv)
{
l_int32       i, n, w, h;
BOXA         *boxa;
NUMA         *naindex, *naw, *nah, *naw_med, *nah_med;
PIX          *pixs, *pixt;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

        /* Generate arrays of word widths and heights */
    pixs = pixRead("feyn.tif");
    pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex);
    n = boxaGetCount(boxa);
    naw = numaCreate(0);
    nah = numaCreate(0);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        numaAddNumber(naw, w);
        numaAddNumber(nah, h);
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Make the rank bin arrays of median values, with 10 bins */
    lept_rmfile("/tmp/lept/regout/w_10bin.png");  /* remove existing ones */
    lept_rmfile("/tmp/lept/regout/h_10bin.png");
    lept_rmfile("/tmp/lept/regout/w_30bin.png");
    lept_rmfile("/tmp/lept/regout/h_30bin.png");
    numaGetRankBinValues(naw, 10, NULL, &naw_med);
    numaGetRankBinValues(nah, 10, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_10bin",
                 "width vs rank bins (10)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_10bin",
                 "height vs rank bins (10)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Make the rank bin arrays of median values, with 30 bins */
    numaGetRankBinValues(naw, 30, NULL, &naw_med);
    numaGetRankBinValues(nah, 30, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_30bin",
                 "width vs rank bins (30)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_30bin",
                 "height vs rank bins (30)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Save as golden files, or check against them */
    regTestCheckFile(rp, "/tmp/lept/regout/w_10bin.png");  /* 0 */
    regTestCheckFile(rp, "/tmp/lept/regout/h_10bin.png");  /* 1 */
    regTestCheckFile(rp, "/tmp/lept/regout/w_30bin.png");  /* 2 */
    regTestCheckFile(rp, "/tmp/lept/regout/h_30bin.png");  /* 3 */

        /* Display results for debugging */
    pixt = pixRead("/tmp/lept/regout/w_10bin.png");
    pixDisplayWithTitle(pixt, 0, 0, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/h_10bin.png");
    pixDisplayWithTitle(pixt, 650, 0, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/w_30bin.png");
    pixDisplayWithTitle(pixt, 0, 550, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/h_30bin.png");
    pixDisplayWithTitle(pixt, 650, 550, NULL, rp->display);
    pixDestroy(&pixt);

    pixDestroy(&pixs);
    numaDestroy(&naw);
    numaDestroy(&nah);
    return regTestCleanup(rp);
}