Esempio n. 1
0
int main(int    argc,
         char **argv)
{
l_int32       i, n, w, h;
BOXA         *boxa;
NUMA         *naindex, *naw, *nah, *naw_med, *nah_med;
PIX          *pixs, *pixt;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

        /* Generate arrays of word widths and heights */
    pixs = pixRead("feyn.tif");
    pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex);
    n = boxaGetCount(boxa);
    naw = numaCreate(0);
    nah = numaCreate(0);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        numaAddNumber(naw, w);
        numaAddNumber(nah, h);
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Make the rank bin arrays of median values, with 10 bins */
    lept_rmfile("/tmp/lept/regout/w_10bin.png");  /* remove existing ones */
    lept_rmfile("/tmp/lept/regout/h_10bin.png");
    lept_rmfile("/tmp/lept/regout/w_30bin.png");
    lept_rmfile("/tmp/lept/regout/h_30bin.png");
    numaGetRankBinValues(naw, 10, NULL, &naw_med);
    numaGetRankBinValues(nah, 10, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_10bin",
                 "width vs rank bins (10)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_10bin",
                 "height vs rank bins (10)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Make the rank bin arrays of median values, with 30 bins */
    numaGetRankBinValues(naw, 30, NULL, &naw_med);
    numaGetRankBinValues(nah, 30, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_30bin",
                 "width vs rank bins (30)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_30bin",
                 "height vs rank bins (30)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Save as golden files, or check against them */
    regTestCheckFile(rp, "/tmp/lept/regout/w_10bin.png");  /* 0 */
    regTestCheckFile(rp, "/tmp/lept/regout/h_10bin.png");  /* 1 */
    regTestCheckFile(rp, "/tmp/lept/regout/w_30bin.png");  /* 2 */
    regTestCheckFile(rp, "/tmp/lept/regout/h_30bin.png");  /* 3 */

        /* Display results for debugging */
    pixt = pixRead("/tmp/lept/regout/w_10bin.png");
    pixDisplayWithTitle(pixt, 0, 0, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/h_10bin.png");
    pixDisplayWithTitle(pixt, 650, 0, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/w_30bin.png");
    pixDisplayWithTitle(pixt, 0, 550, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/h_30bin.png");
    pixDisplayWithTitle(pixt, 650, 550, NULL, rp->display);
    pixDestroy(&pixt);

    pixDestroy(&pixs);
    numaDestroy(&naw);
    numaDestroy(&nah);
    return regTestCleanup(rp);
}
main(int    argc,
     char **argv)
{
l_int32  i, n, w, h, success, display;
FILE    *fp;
BOXA    *boxa;
NUMA    *naindex, *naw, *nah, *naw_med, *nah_med;
PIX     *pixs, *pixt, *pixd;

    if (regTestSetup(argc, argv, &fp, &display, &success, NULL))
        return 1;

        /* Generate arrays of word widths and heights */
    pixs = pixRead("feyn.tif");
    pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex);
    n = boxaGetCount(boxa);
    naw = numaCreate(0);
    nah = numaCreate(0);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        numaAddNumber(naw, w);
        numaAddNumber(nah, h);
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Make the rank bin arrays of median values, with 10 bins */
    numaGetRankBinValues(naw, 10, NULL, &naw_med);
    numaGetRankBinValues(nah, 10, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_10bin", 
                 "width vs rank bins (10)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_10bin", 
                 "height vs rank bins (10)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Make the rank bin arrays of median values, with 30 bins */
    numaGetRankBinValues(naw, 30, NULL, &naw_med);
    numaGetRankBinValues(nah, 30, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_30bin", 
                 "width vs rank bins (30)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_30bin", 
                 "height vs rank bins (30)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Give gnuplot time to write out the files */
#ifndef  _WIN32
    sleep(2);
#else
    Sleep(2000);
#endif  /* _WIN32 */

        /* Save as golden files, or check against them */
    regTestCheckFile(fp, argv, "/tmp/w_10bin.png", 0, &success);
    regTestCheckFile(fp, argv, "/tmp/h_10bin.png", 1, &success);
    regTestCheckFile(fp, argv, "/tmp/w_30bin.png", 2, &success);
    regTestCheckFile(fp, argv, "/tmp/h_30bin.png", 3, &success);

        /* Display results for debugging */
    pixt = pixRead("/tmp/w_10bin.png");
    pixDisplayWithTitle(pixt, 0, 0, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/h_10bin.png");
    pixDisplayWithTitle(pixt, 650, 0, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/w_30bin.png");
    pixDisplayWithTitle(pixt, 0, 550, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/h_30bin.png");
    pixDisplayWithTitle(pixt, 650, 550, NULL, display);
    pixDestroy(&pixt);

    pixDestroy(&pixs);
    numaDestroy(&naw);
    numaDestroy(&nah);
    regTestCleanup(argc, argv, fp, success, NULL);
    return 0;
}
Esempio n. 3
0
l_int32 main(int    argc,
             char **argv)
{
l_int32       i, w, h, n, val, ne, no, nbins, minw, maxw, minh, maxh;
l_int32       mine, mino, maxe, maxo;
l_int32       w_diff, h_diff, median_w_diff, median_h_diff;
l_int32       noutw, nouth;
l_float32     medwe, medhe, medwo, medho;
BOXA         *boxa1, *boxa2, *boxae, *boxao;
NUMA         *na1, *nawe, *nahe, *nawo, *naho;
NUMA         *nadiffw, *nadiffh;  /* diff from median w and h */
NUMA         *naiw, *naih;  /* indicator arrays for small outlier dimensions */
NUMA         *narbwe, *narbhe, *narbwo, *narbho;  /* rank-binned w and h */
PIX          *pix1;
PIXA         *pixa1;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_mkdir("lept/boxa");
    boxa1 = boxaRead("boxa4.ba");

        /* Fill invalid boxes */
    n = boxaGetCount(boxa1);
    na1 = boxaFindInvalidBoxes(boxa1);
    if (na1)
        boxa2 = boxaFillSequence(boxa1, L_USE_SAME_PARITY_BOXES, 0);
    else
        boxa2 = boxaCopy(boxa1, L_CLONE);
    boxaDestroy(&boxa1);

        /* Get the widths and heights for even and odd parity */
    boxaSplitEvenOdd(boxa2, 0, &boxae, &boxao);
    boxaGetSizes(boxae, &nawe, &nahe);
    boxaGetSizes(boxao, &nawo, &naho);
    boxaDestroy(&boxa2);

        /* Find the medians */
    numaGetMedian(nawe, &medwe);
    numaGetMedian(nahe, &medhe);
    numaGetMedian(nawo, &medwo);
    numaGetMedian(naho, &medho);

        /* Find the median even/odd differences for width and height */
    median_w_diff = L_ABS(medwe - medwo);
    median_h_diff = L_ABS(medhe - medho);
    regTestCompareValues(rp, 210, median_w_diff, 0.0);  /* 0 */
    regTestCompareValues(rp, 15, median_h_diff, 0.0);  /* 1 */
    if (rp->display) {
        fprintf(stderr, "diff of e/o median widths = %d\n", median_w_diff);
        fprintf(stderr, "diff of e/o median heights = %d\n", median_h_diff);
    }

        /* Find the differences of box width and height from the median */
    nadiffw = numaMakeConstant(0, n);
    nadiffh = numaMakeConstant(0, n);
    ne = numaGetCount(nawe);
    no = numaGetCount(nawo);
    for (i = 0; i < ne; i++) {
        numaGetIValue(nawe, i, &val);
        numaSetValue(nadiffw, 2 * i, L_ABS(val - medwe));
        numaGetIValue(nahe, i, &val);
        numaSetValue(nadiffh, 2 * i, L_ABS(val - medhe));
    }
    for (i = 0; i < no; i++) {
        numaGetIValue(nawo, i, &val);
        numaSetValue(nadiffw, 2 * i + 1, L_ABS(val - medwo));
        numaGetIValue(naho, i, &val);
        numaSetValue(nadiffh, 2 * i + 1, L_ABS(val - medho));
    }

        /* Don't count invalid boxes; set the diffs to 0 for them */
    if (na1) {
        for (i = 0; i < n; i++) {
            numaGetIValue(na1, i, &val);
            if (val == 1) {
                numaSetValue(nadiffw, i, 0);
                numaSetValue(nadiffh, i, 0);
            }
        }
    }

        /* Make an indicator array for boxes that differ from the
         * median by more than a threshold value for outliers */
    naiw = numaMakeThresholdIndicator(nadiffw, 90, L_SELECT_IF_GT);
    naih = numaMakeThresholdIndicator(nadiffh, 90, L_SELECT_IF_GT);
    numaGetCountRelativeToZero(naiw, L_GREATER_THAN_ZERO, &noutw);
    numaGetCountRelativeToZero(naih, L_GREATER_THAN_ZERO, &nouth);
    regTestCompareValues(rp, 24, noutw, 0.0);  /* 2 */
    regTestCompareValues(rp, 0, nouth, 0.0);  /* 3 */
    if (rp->display)
        fprintf(stderr, "num width outliers = %d, num height outliers = %d\n",
                noutw, nouth);
    numaDestroy(&nadiffw);
    numaDestroy(&nadiffh);
    numaDestroy(&naiw);
    numaDestroy(&naih);

        /* Find the rank bins for width and height */
    nbins = L_MAX(5, ne / 50);  // up to 50 pages/bin
    numaGetRankBinValues(nawe, nbins, NULL, &narbwe);
    numaGetRankBinValues(nawo, nbins, NULL, &narbwo);
    numaGetRankBinValues(nahe, nbins, NULL, &narbhe);
    numaGetRankBinValues(naho, nbins, NULL, &narbho);
    numaDestroy(&nawe);
    numaDestroy(&nawo);
    numaDestroy(&nahe);
    numaDestroy(&naho);

        /* Find min and max binned widths and heights; get the max diffs */
    numaGetIValue(narbwe, 0, &mine);
    numaGetIValue(narbwe, nbins - 1, &maxe);
    numaGetIValue(narbwo, 0, &mino);
    numaGetIValue(narbwo, nbins - 1, &maxo);
    minw = L_MIN(mine, mino);
    maxw = L_MAX(maxe, maxo);
    w_diff = maxw - minw;
    numaGetIValue(narbhe, 0, &mine);
    numaGetIValue(narbhe, nbins - 1, &maxe);
    numaGetIValue(narbho, 0, &mino);
    numaGetIValue(narbho, nbins - 1, &maxo);
    minh = L_MIN(mine, mino);
    maxh = L_MAX(maxe, maxo);
    h_diff = maxh - minh;
    numaDestroy(&narbwe);
    numaDestroy(&narbhe);
    numaDestroy(&narbwo);
    numaDestroy(&narbho);
    regTestCompareValues(rp, 409, w_diff, 0.0);  /* 4 */
    regTestCompareValues(rp, 49, h_diff, 0.0);  /* 5 */
    if (rp->display)
        fprintf(stderr, "Binned rank results: w_diff = %d, h_diff = %d\n",
                w_diff, h_diff);

        /* Plot the results */
    if (noutw > 0 || nouth > 0) {
        pixa1 = pixaCreate(2);
        boxaPlotSizes(boxae, "even", NULL, NULL, &pix1);
        pixaAddPix(pixa1, pix1, L_INSERT);
        boxaPlotSizes(boxao, "odd", NULL, NULL, &pix1);
        pixaAddPix(pixa1, pix1, L_INSERT);
        pix1 = pixaDisplayTiledInRows(pixa1, 32, 1500, 1.0, 0, 30, 2);
        regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 6 */
        pixDisplayWithTitle(pix1, 100, 100, NULL, rp->display);
        pixDestroy(&pix1);
        pixaDestroy(&pixa1);
    }

    boxaDestroy(&boxae);
    boxaDestroy(&boxao);
    return regTestCleanup(rp);
}