示例#1
0
/*!
 * \brief   pixQuadtreeMean()
 *
 * \param[in]    pixs     8 bpp, no colormap
 * \param[in]    nlevels  in quadtree; max allowed depends on image size
 * \param[in]   *pix_ma   input mean accumulator; can be null
 * \param[out]  *pfpixa   mean values in quadtree
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The returned fpixa has %nlevels of fpix, each containing
 *          the mean values at its level.  Level 0 has a
 *          single value; level 1 has 4 values; level 2 has 16; etc.
 * </pre>
 */
l_int32
pixQuadtreeMean(PIX     *pixs,
                l_int32  nlevels,
                PIX     *pix_ma,
                FPIXA  **pfpixa)
{
l_int32    i, j, w, h, size, n;
l_float32  val;
BOX       *box;
BOXA      *boxa;
BOXAA     *baa;
FPIX      *fpix;
PIX       *pix_mac;

    PROCNAME("pixQuadtreeMean");

    if (!pfpixa)
        return ERROR_INT("&fpixa not defined", procName, 1);
    *pfpixa = NULL;
    if (!pixs || pixGetDepth(pixs) != 8)
        return ERROR_INT("pixs not defined or not 8 bpp", procName, 1);
    pixGetDimensions(pixs, &w, &h, NULL);
    if (nlevels > quadtreeMaxLevels(w, h))
        return ERROR_INT("nlevels too large for image", procName, 1);

    if (!pix_ma)
        pix_mac = pixBlockconvAccum(pixs);
    else
        pix_mac = pixClone(pix_ma);
    if (!pix_mac)
        return ERROR_INT("pix_mac not made", procName, 1);

    if ((baa = boxaaQuadtreeRegions(w, h, nlevels)) == NULL) {
        pixDestroy(&pix_mac);
        return ERROR_INT("baa not made", procName, 1);
    }

    *pfpixa = fpixaCreate(nlevels);
    for (i = 0; i < nlevels; i++) {
        boxa = boxaaGetBoxa(baa, i, L_CLONE);
        size = 1 << i;
        n = boxaGetCount(boxa);  /* n == size * size */
        fpix = fpixCreate(size, size);
        for (j = 0; j < n; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            pixMeanInRectangle(pixs, box, pix_mac, &val);
            fpixSetPixel(fpix, j % size, j / size, val);
            boxDestroy(&box);
        }
        fpixaAddFPix(*pfpixa, fpix, L_INSERT);
        boxaDestroy(&boxa);
    }

    pixDestroy(&pix_mac);
    boxaaDestroy(&baa);
    return 0;
}
/*!
 * \brief   pixGetWordBoxesInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    reduction 1 for input res; 2 for 2x reduction of input res
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   pnai [optional] index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of about 300 ppi.
 *          The word masks can be computed at either 150 ppi or 300 ppi.
 *          For the former, set reduction = 2.
 *      (2) This is a special version of pixGetWordsInTextlines(), that
 *          just finds the word boxes in line order, with a numa
 *          giving the textline index for each word.
 *          See pixGetWordsInTextlines() for more details.
 * </pre>
 */
l_int32
pixGetWordBoxesInTextlines(PIX     *pixs,
                           l_int32  reduction,
                           l_int32  minwidth,
                           l_int32  minheight,
                           l_int32  maxwidth,
                           l_int32  maxheight,
                           BOXA   **pboxad,
                           NUMA   **pnai)
{
l_int32  maxdil;
BOXA    *boxa1;
BOXAA   *baa;
NUMA    *nai;
PIX     *pix1;

    PROCNAME("pixGetWordBoxesInTextlines");

    if (pnai) *pnai = NULL;
    if (!pboxad)
        return ERROR_INT("&boxad and &nai not both defined", procName, 1);
    *pboxad = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (reduction != 1 && reduction != 2)
        return ERROR_INT("reduction not in {1,2}", procName, 1);

    if (reduction == 1) {
        pix1 = pixClone(pixs);
        maxdil = 18;
    } else {  /* reduction == 2 */
        pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
        maxdil = 9;
    }

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, NULL);

        /* 2D sort the bounding boxes of these words. */
    baa = boxaSort2d(boxa1, NULL, 3, -5, 5);

        /* Flatten the boxaa, saving the boxa index for each box */
    *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);

    if (pnai)
        *pnai = nai;
    else
        numaDestroy(&nai);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    return 0;
}
示例#3
0
/*!
 * \brief   pixGetWordsInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 75 - 150 ppi
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   ppixad word images sorted in textline line order
 * \param[out]   pnai index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of between 75 and 150 ppi.
 *      (2) The four size constraints on saved components are all
 *          scaled by %reduction.
 *      (3) The result are word images (and their b.b.), extracted in
 *          textline order, at either full res or 2x reduction,
 *          and with a numa giving the textline index for each word.
 *      (4) The pixa and boxa interfaces should make this type of
 *          application simple to put together.  The steps are:
 *           ~ generate first estimate of word masks
 *           ~ get b.b. of these, and remove the small and big ones
 *           ~ extract pixa of the word images, using the b.b.
 *           ~ sort actual word images in textline order (2d)
 *           ~ flatten them to a pixa (1d), saving the textline index
 *             for each pix
 *      (5) In an actual application, it may be desirable to pre-filter
 *          the input image to remove large components, to extract
 *          single columns of text, and to deskew them.  For example,
 *          to remove both large components and small noisy components
 *          that can interfere with the statistics used to estimate
 *          parameters for segmenting by words, but still retain text lines,
 *          the following image preprocessing can be done:
 *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
 *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
 *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
 *                pixAnd(pixf, pixf, pixs);  // the filtered image
 *          The closing turns text lines into long blobs, but does not
 *          significantly increase their height.  But if there are many
 *          small connected components in a dense texture, this is likely
 *          to generate tall components that will be eliminated in pixf.
 * </pre>
 */
l_int32
pixGetWordsInTextlines(PIX     *pixs,
                       l_int32  minwidth,
                       l_int32  minheight,
                       l_int32  maxwidth,
                       l_int32  maxheight,
                       BOXA   **pboxad,
                       PIXA   **ppixad,
                       NUMA   **pnai)
{
BOXA    *boxa1, *boxad;
BOXAA   *baa;
NUMA    *nai;
NUMAA   *naa;
PIXA    *pixa1, *pixad;
PIXAA   *paa;

    PROCNAME("pixGetWordsInTextlines");

    if (!pboxad || !ppixad || !pnai)
        return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
    *pboxad = NULL;
    *ppixad = NULL;
    *pnai = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
                           &boxa1, NULL, NULL);

        /* Generate a pixa of the word images */
    pixa1 = pixaCreateFromBoxa(pixs, boxa1, NULL);  /* mask over each word */

        /* Sort the bounding boxes of these words by line.  We use the
         * index mapping to allow identical sorting of the pixa. */
    baa = boxaSort2d(boxa1, &naa, -1, -1, 4);
    paa = pixaSort2dByIndex(pixa1, naa, L_CLONE);

        /* Flatten the word paa */
    pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
    boxad = pixaGetBoxa(pixad, L_COPY);

    *pnai = nai;
    *pboxad = boxad;
    *ppixad = pixad;

    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    pixaaDestroy(&paa);
    numaaDestroy(&naa);
    return 0;
}
示例#4
0
/*!
 * \brief   pixGetWordBoxesInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   pnai [optional] index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of between 75 and 150 ppi.
 *      (2) This is a special version of pixGetWordsInTextlines(), that
 *          just finds the word boxes in line order, with a numa
 *          giving the textline index for each word.
 *          See pixGetWordsInTextlines() for more details.
 * </pre>
 */
l_int32
pixGetWordBoxesInTextlines(PIX     *pixs,
                           l_int32  minwidth,
                           l_int32  minheight,
                           l_int32  maxwidth,
                           l_int32  maxheight,
                           BOXA   **pboxad,
                           NUMA   **pnai)
{
BOXA    *boxa1;
BOXAA   *baa;
NUMA    *nai;

    PROCNAME("pixGetWordBoxesInTextlines");

    if (pnai) *pnai = NULL;
    if (!pboxad)
        return ERROR_INT("&boxad and &nai not both defined", procName, 1);
    *pboxad = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
                           &boxa1, NULL, NULL);

        /* 2D sort the bounding boxes of these words. */
    baa = boxaSort2d(boxa1, NULL, 3, -5, 5);

        /* Flatten the boxaa, saving the boxa index for each box */
    *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);

    if (pnai)
        *pnai = nai;
    else
        numaDestroy(&nai);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    return 0;
}
示例#5
0
int main(int    argc,
         char **argv)
{
char         filename[BUF_SIZE];
char        *dirin, *rootname, *fname;
l_int32      i, j, w, h, firstpage, npages, nfiles, ncomp;
l_int32      index, ival, rval, gval, bval;
BOX         *box;
BOXA        *boxa;
BOXAA       *baa;
JBDATA      *data;
JBCLASSER   *classer;
NUMA        *nai;
NUMAA       *naa;
SARRAY      *safiles;
PIX         *pixs, *pixt1, *pixt2, *pixd;
PIXCMAP     *cmap;
static char  mainName[] = "wordsinorder";

    if (argc != 3 && argc != 5)
        return ERROR_INT(
            " Syntax: wordsinorder dirin rootname [firstpage, npages]",
            mainName, 1);

    dirin = argv[1];
    rootname = argv[2];

    if (argc == 3) {
        firstpage = 0;
        npages = 0;
    }
    else {
        firstpage = atoi(argv[3]);
        npages = atoi(argv[4]);
    }

        /* Compute the word bounding boxes at 2x reduction, along with
         * the textlines that they are in. */
    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);
    baa = boxaaCreate(nfiles);
    naa = numaaCreate(nfiles);
    for (i = 0; i < nfiles; i++) {
        fname = sarrayGetString(safiles, i, 0);
        if ((pixs = pixRead(fname)) == NULL) {
            L_WARNING("image file %d not read\n", mainName, i);
            continue;
        }
        pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT,
                                   MAX_WORD_WIDTH, MAX_WORD_HEIGHT,
                                   &boxa, &nai);
        boxaaAddBoxa(baa, boxa, L_INSERT);
        numaaAddNuma(naa, nai, L_INSERT);

#if  RENDER_PAGES
            /* Show the results on a 2x reduced image, where each
             * word is outlined and the color of the box depends on the
             * computed textline. */
        pixt1 = pixReduceRankBinary2(pixs, 2, NULL);
        pixGetDimensions(pixt1, &w, &h, NULL);
        pixd = pixCreate(w, h, 8);
        cmap = pixcmapCreateRandom(8, 1, 1);  /* first color is black */
        pixSetColormap(pixd, cmap);

        pixt2 = pixUnpackBinary(pixt1, 8, 1);
        pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0);
        ncomp = boxaGetCount(boxa);
        for (j = 0; j < ncomp; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            numaGetIValue(nai, j, &ival);
            index = 1 + (ival % 254);  /* omit black and white */
            pixcmapGetColor(cmap, index, &rval, &gval, &bval);
            pixRenderBoxArb(pixd, box, 2, rval, gval, bval);
            boxDestroy(&box);
        }

        snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
        fprintf(stderr, "filename: %s\n", filename);
        pixWrite(filename, pixd, IFF_PNG);
        pixDestroy(&pixt1);
        pixDestroy(&pixt2);
        pixDestroy(&pixs);
        pixDestroy(&pixd);
#endif  /* RENDER_PAGES */
    }

    boxaaDestroy(&baa);
    numaaDestroy(&naa);
    sarrayDestroy(&safiles);
    return 0;
}
/*!
 * \brief   pixGetWordsInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    reduction 1 for input res; 2 for 2x reduction of input res
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   ppixad word images sorted in textline line order
 * \param[out]   pnai index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of about 300 ppi.
 *          The word masks and word images can be computed at either
 *          150 ppi or 300 ppi.  For the former, set reduction = 2.
 *      (2) The four size constraints on saved components are all
 *          scaled by %reduction.
 *      (3) The result are word images (and their b.b.), extracted in
 *          textline order, at either full res or 2x reduction,
 *          and with a numa giving the textline index for each word.
 *      (4) The pixa and boxa interfaces should make this type of
 *          application simple to put together.  The steps are:
 *           ~ optionally reduce by 2x
 *           ~ generate first estimate of word masks
 *           ~ get b.b. of these, and remove the small and big ones
 *           ~ extract pixa of the word images, using the b.b.
 *           ~ sort actual word images in textline order (2d)
 *           ~ flatten them to a pixa (1d), saving the textline index
 *             for each pix
 *      (5) In an actual application, it may be desirable to pre-filter
 *          the input image to remove large components, to extract
 *          single columns of text, and to deskew them.  For example,
 *          to remove both large components and small noisy components
 *          that can interfere with the statistics used to estimate
 *          parameters for segmenting by words, but still retain text lines,
 *          the following image preprocessing can be done:
 *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
 *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
 *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
 *                pixAnd(pixf, pixf, pixs);  // the filtered image
 *          The closing turns text lines into long blobs, but does not
 *          significantly increase their height.  But if there are many
 *          small connected components in a dense texture, this is likely
 *          to generate tall components that will be eliminated in pixf.
 * </pre>
 */
l_int32
pixGetWordsInTextlines(PIX     *pixs,
                       l_int32  reduction,
                       l_int32  minwidth,
                       l_int32  minheight,
                       l_int32  maxwidth,
                       l_int32  maxheight,
                       BOXA   **pboxad,
                       PIXA   **ppixad,
                       NUMA   **pnai)
{
l_int32  maxdil;
BOXA    *boxa1, *boxad;
BOXAA   *baa;
NUMA    *nai;
NUMAA   *naa;
PIXA    *pixa1, *pixad;
PIX     *pix1;
PIXAA   *paa;

    PROCNAME("pixGetWordsInTextlines");

    if (!pboxad || !ppixad || !pnai)
        return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
    *pboxad = NULL;
    *ppixad = NULL;
    *pnai = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (reduction != 1 && reduction != 2)
        return ERROR_INT("reduction not in {1,2}", procName, 1);

    if (reduction == 1) {
        pix1 = pixClone(pixs);
        maxdil = 18;
    } else {  /* reduction == 2 */
        pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
        maxdil = 9;
    }

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, NULL);

        /* Generate a pixa of the word images */
    pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL);  /* mask over each word */

        /* Sort the bounding boxes of these words by line.  We use the
         * index mapping to allow identical sorting of the pixa. */
    baa = boxaSort2d(boxa1, &naa, -1, -1, 4);
    paa = pixaSort2dByIndex(pixa1, naa, L_CLONE);

        /* Flatten the word paa */
    pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
    boxad = pixaGetBoxa(pixad, L_COPY);

    *pnai = nai;
    *pboxad = boxad;
    *ppixad = pixad;

    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    pixaaDestroy(&paa);
    numaaDestroy(&naa);
    return 0;
}
main(int    argc,
     char **argv)
{
char        *pagedir, *pagesubstr, *maskdir, *masksubstr;
char        *title, *fileout, *boxaafile, *boxaapath;
l_int32      ret, res, type, thresh;
l_float32    scalefactor;
BOXAA       *baa;
static char  mainName[] = "convertsegfilestopdf";

    if (argc != 12) {
        fprintf(stderr,
	    " Syntax: convertsegfilestopdf dirin substr res type thresh \\ \n"
            "                       boxaafile scalefactor title fileout\n"
            "     where\n"
            "         pagedir:  input directory for image files\n"
            "         pagesubstr:  Use 'allfiles' to convert all files\n"
            "                  in the directory\n"
            "         maskdir:  input directory for mask files;\n"
            "                   use 'skip' to skip \n"
            "         masksubstr:  Use 'allfiles' to convert all files\n"
            "                  in the directory; 'skip' to skip\n"
            "         res:  Input resolution of each image;\n"
            "               assumed to all be the same\n"
            "         type: compression used for non-image regions:\n"
            "               0: default (G4 encoding)\n"
            "               1: JPEG encoding\n"
            "               2: G4 encoding\n"
            "               3: PNG encoding\n"
            "         thresh:  threshold for binarization; use 0 for default\n"
            "         boxaafile: Optional file of 'image' regions within\n"
            "                    each page.  This contains a boxa for each\n"
            "                    page, consisting of a set of regions.\n"
            "                    Use 'skip' to skip.\n"
            "         scalefactor:  Use to scale down the image regions\n"
            "         title:  Use 'none' to omit\n"
            "         fileout:  Output pdf file\n");
        return 1;
    }

    pagedir = argv[1];
    pagesubstr = argv[2];
    maskdir = argv[3];
    masksubstr = argv[4];
    res = atoi(argv[5]);
    type = atoi(argv[6]);
    thresh = atoi(argv[7]);
    boxaafile = argv[8];
    scalefactor = atof(argv[9]);
    title = argv[10];
    fileout = argv[11];

    if (!strcmp(pagesubstr, "allfiles"))
        pagesubstr = NULL;
    if (!strcmp(maskdir, "skip"))
        maskdir = NULL;
    if (!strcmp(masksubstr, "allfiles"))
        masksubstr = NULL;
    if (scalefactor <= 0.0 || scalefactor > 1.0) {
        L_WARNING("invalid scalefactor: setting to 1.0", mainName);
        scalefactor = 1.0;
    }
    if (type != 1 && type != 2 && type != 3)
        type = L_G4_ENCODE;
    if (thresh <= 0)
        thresh = 150;
    if (!strcmp(title, "none"))
        title = NULL;

    if (maskdir)  /* use this; ignore any input boxaafile */
        baa = convertNumberedMasksToBoxaa(maskdir, masksubstr, 0, 0);
    else if (strcmp(boxaafile, "skip")) {  /* use the boxaafile */
        boxaapath = genPathname(boxaafile, NULL);
        baa = boxaaRead(boxaapath);
        FREE(boxaapath);
    } 
    else  /* no maskdir and no input boxaafile */
        baa = NULL;

    ret = convertSegmentedFilesToPdf(pagedir, pagesubstr, res, type, thresh,
                                     baa, 75, scalefactor, title, fileout);
    boxaaDestroy(&baa);
    return ret;
}
示例#8
0
/*!
 *  pixSplitIntoCharacters()
 *
 *      Input:  pixs (1 bpp, contains only deskewed text)
 *              minw (minimum component width for initial filtering; typ. 4)
 *              minh (minimum component height for initial filtering; typ. 4)
 *              &boxa (<optional return> character bounding boxes)
 *              &pixa (<optional return> character images)
 *              &pixdebug (<optional return> showing splittings)
 *
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is a simple function that attempts to find split points
 *          based on vertical pixel profiles.
 *      (2) It should be given an image that has an arbitrary number
 *          of text characters.
 *      (3) The returned pixa includes the boxes from which the
 *          (possibly split) components are extracted.
 */
l_int32
pixSplitIntoCharacters(PIX     *pixs,
                       l_int32  minw,
                       l_int32  minh,
                       BOXA   **pboxa,
                       PIXA   **ppixa,
                       PIX    **ppixdebug)
{
l_int32  ncomp, i, xoff, yoff;
BOXA   *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
BOXAA  *baa;
PIX    *pix, *pix1, *pix2, *pixdb;
PIXA   *pixa1, *pixadb;

    PROCNAME("pixSplitIntoCharacters");

    if (pboxa) *pboxa = NULL;
    if (ppixa) *ppixa = NULL;
    if (ppixdebug) *ppixdebug = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);

        /* Remove the small stuff */
    pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
                           L_SELECT_IF_GT, NULL);

        /* Small vertical close for consolidation */
    pix2 = pixMorphSequence(pix1, "c1.10", 0);
    pixDestroy(&pix1);

        /* Get the 8-connected components */
    boxa1 = pixConnComp(pix2, &pixa1, 8);
    pixDestroy(&pix2);
    boxaDestroy(&boxa1);

        /* Split the components if obvious */
    ncomp = pixaGetCount(pixa1);
    boxa2 = boxaCreate(ncomp);
    pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
    for (i = 0; i < ncomp; i++) {
        pix = pixaGetPix(pixa1, i, L_CLONE);
        if (ppixdebug) {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
            if (pixdb)
                pixaAddPix(pixadb, pixdb, L_INSERT);
        } else {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
        }
        pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
        boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
        boxaJoin(boxa2, boxat2, 0, -1);
        pixDestroy(&pix);
        boxaDestroy(&boxat1);
        boxaDestroy(&boxat2);
    }
    pixaDestroy(&pixa1);

        /* Generate the debug image */
    if (ppixdebug) {
        if (pixaGetCount(pixadb) > 0) {
            *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
                                                1.0, 0, 20, 1);
        }
        pixaDestroy(&pixadb);
    }

        /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
    baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
    boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
    boxaaDestroy(&baa);
    boxaDestroy(&boxa2);

        /* Optionally extract the pieces from the input image */
    if (ppixa)
        *ppixa = pixClipRectangles(pixs, boxad);
    if (pboxa)
        *pboxa = boxad;
    else
        boxaDestroy(&boxad);
    return 0;
}
示例#9
0
void k2pdfopt_reflow_bmp(KOPTContext *kctx) {
    K2PDFOPT_SETTINGS _k2settings, *k2settings;
    MASTERINFO _masterinfo, *masterinfo;
    WILLUSBITMAP _srcgrey, *srcgrey;
    WILLUSBITMAP *src, *dst;
    BMPREGION region;
    int i, bw, marbot, marleft;

    src = &kctx->src;
    srcgrey = &_srcgrey;
    bmp_init(srcgrey);

    k2settings = &_k2settings;
    masterinfo = &_masterinfo;
    /* Initialize settings */
    k2pdfopt_settings_init_from_koptcontext(k2settings, kctx);
    k2pdfopt_settings_quick_sanity_check(k2settings);
    /* Init for new source doc */
    k2pdfopt_settings_new_source_document_init(k2settings);
    /* Init master output structure */
    masterinfo_init(masterinfo, k2settings);
    wrapbmp_init(&masterinfo->wrapbmp, k2settings->dst_color);
    /* Init new source bitmap */
    bmpregion_init(&region);
    masterinfo_new_source_page_init(masterinfo, k2settings, src, srcgrey, NULL,
            &region, k2settings->src_rot, NULL, NULL, 1, -1, NULL );
    /* Set output size */
    k2pdfopt_settings_set_margins_and_devsize(k2settings,&region,masterinfo,-1.,0);
    /* Process single source page */
    bmpregion_source_page_add(&region, k2settings, masterinfo, 1, 0);
    wrapbmp_flush(masterinfo, k2settings, 0);

    if (fabs(k2settings->dst_gamma - 1.0) > .001)
        bmp_gamma_correct(&masterinfo->bmp, &masterinfo->bmp,
                k2settings->dst_gamma);

    /* copy master bitmap to context dst bitmap */
    dst = &kctx->dst;
    marbot = (int) (k2settings->dst_dpi * k2settings->dstmargins.box[1] + .5);
    marleft = (int) (k2settings->dst_dpi * k2settings->dstmargins.box[0] + .5);
    dst->bpp = masterinfo->bmp.bpp;
    dst->width = masterinfo->bmp.width;
    dst->height = masterinfo->rows > kctx->page_height ? masterinfo->rows + marbot : kctx->page_height;
    bmp_alloc(dst);
    bmp_fill(dst, 255, 255, 255);
    bw = bmp_bytewidth(&masterinfo->bmp);
    for (i = 0; i < masterinfo->rows; i++)
        memcpy(bmp_rowptr_from_top(dst, i),
                bmp_rowptr_from_top(&masterinfo->bmp, i), bw);

    kctx->page_width = kctx->dst.width;
    kctx->page_height = kctx->dst.height;
    kctx->precache = 0;

    int j;
    BOXA *rboxa = boxaCreate(masterinfo->rectmaps.n);
    BOXA *nboxa = boxaCreate(masterinfo->rectmaps.n);
    for (j = 0; j < masterinfo->rectmaps.n; j++) {
        WRECTMAP * rectmap = &masterinfo->rectmaps.wrectmap[j];
        rectmap->coords[1].x += marleft;
        BOX* rlbox = boxCreate(rectmap->coords[1].x,
                              rectmap->coords[1].y,
                              rectmap->coords[2].x,
                              rectmap->coords[2].y);
        BOX* nlbox = boxCreate(rectmap->coords[0].x*k2settings->src_dpi/rectmap->srcdpiw/kctx->zoom + kctx->bbox.x0,
                              rectmap->coords[0].y*k2settings->src_dpi/rectmap->srcdpih/kctx->zoom + kctx->bbox.y0,
                              rectmap->coords[2].x*k2settings->src_dpi/rectmap->srcdpiw/kctx->zoom,
                              rectmap->coords[2].y*k2settings->src_dpi/rectmap->srcdpih/kctx->zoom);
        boxaAddBox(rboxa, rlbox, L_INSERT);
        boxaAddBox(nboxa, nlbox, L_INSERT);
        wrectmaps_add_wrectmap(&kctx->rectmaps, rectmap);

        /*printf("rectmap:coords:\t%.1f %.1f\t%.1f %.1f\t%.1f %.1f\t%.1f %.1f\n",
                rectmap->coords[0].x, rectmap->coords[0].y,
                rectmap->coords[1].x, rectmap->coords[1].y,
                rectmap->coords[2].x, rectmap->coords[2].y,
                rectmap->srcdpiw,     rectmap->srcdpih);*/
    }
    /* 2D sort the bounding boxes of these words. */
    BOXAA *rbaa = boxaSort2d(rboxa, NULL, 3, -5, 5);
    BOXAA *nbaa = boxaSort2d(nboxa, NULL, 3, -5, 5);

    /* Flatten the boxaa, saving the boxa index for each box */
    kctx->rboxa = boxaaFlattenToBoxa(rbaa, &kctx->rnai, L_CLONE);
    kctx->nboxa = boxaaFlattenToBoxa(nbaa, &kctx->nnai, L_CLONE);

    boxaDestroy(&rboxa);
    boxaaDestroy(&rbaa);
    boxaDestroy(&nboxa);
    boxaaDestroy(&nbaa);

    bmp_free(src);
    bmp_free(srcgrey);
    bmpregion_free(&region);
    masterinfo_free(masterinfo, k2settings);
}
示例#10
0
int main(int    argc,
         char **argv)
{
    l_uint8     *data1, *data2;
    l_int32      i, same, w, h, width, success, nba;
    size_t       size1, size2;
    l_float32    diffarea, diffxor, scalefact;
    BOX         *box;
    BOXA        *boxa1, *boxa2, *boxa3;
    BOXAA       *baa1, *baa2, *baa3;
    PIX         *pix1, *pixdb;
    PIXA        *pixa1, *pixa2;
    static char  mainName[] = "boxa1_reg";

    if (argc != 1)
        return ERROR_INT(" Syntax: boxa1_reg", mainName, 1);

    lept_mkdir("lept/boxa");

    /* Make a boxa and display its contents */
    boxa1 = boxaCreate(6);
    box = boxCreate(60, 60, 40, 20);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(120, 50, 20, 50);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(50, 140, 46, 60);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(166, 130, 64, 28);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(64, 224, 44, 34);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(117, 206, 26, 74);
    boxaAddBox(boxa1, box, L_INSERT);
    pix1 = DisplayBoxa(boxa1);
    pixDisplay(pix1, 100, 100);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa1, 100, &same, &diffarea, &diffxor, NULL);
    fprintf(stderr, "same = %d, diffarea = %5.3f, diffxor = %5.3f\n",
            same, diffarea, diffxor);

    boxa2 = boxaTransform(boxa1, -13, -13, 1.0, 1.0);
    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, NULL);
    fprintf(stderr, "same = %d, diffarea = %5.3f, diffxor = %5.3f\n",
            same, diffarea, diffxor);
    boxaDestroy(&boxa2);

    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP_AND_BOT, 6,
                                       L_ADJUST_CHOOSE_MIN, 1.0, 0);
    pix1 = DisplayBoxa(boxa2);
    pixDisplay(pix1, 100, 500);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, &pixdb);
    fprintf(stderr, "same = %d, diffarea = %5.3f, diffxor = %5.3f\n",
            same, diffarea, diffxor);
    pixDisplay(pixdb, 700, 100);

    pixDestroy(&pixdb);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

    /* Input is a fairly clean boxa */
    boxa1 = boxaRead("boxa1.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    pixDisplay(pix1, 0, 100);
    pixWrite("/tmp/lept/boxa/pix1.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

    /* Input is an unsmoothed and noisy boxa */
    boxa1 = boxaRead("boxa2.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    pixDisplay(pix1, 500, 100);
    pixWrite("/tmp/lept/boxa/pix2.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

    /* Input is a boxa smoothed with a median window filter */
    boxa1 = boxaRead("boxa3.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    pixDisplay(pix1, 1000, 100);
    pixWrite("/tmp/lept/boxa/pix3.png", pix1, IFF_PNG);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

    /* Test serialized boxa I/O to and from memory */
    data1 = l_binaryRead("boxa2.ba", &size1);
    boxa1 = boxaReadMem(data1, size1);
    boxaWriteMem(&data2, &size2, boxa1);
    boxa2 = boxaReadMem(data2, size2);
    boxaWrite("/tmp/lept/boxa/boxa1.ba", boxa1);
    boxaWrite("/tmp/lept/boxa/boxa2.ba", boxa2);
    filesAreIdentical("/tmp/lept/boxa/boxa1.ba", "/tmp/lept/boxa/boxa2.ba",
                      &same);
    if (same)
        fprintf(stderr, "Good: boxes files are identical\n");
    else
        fprintf(stderr, "Bad: boxes files differ\n");
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    lept_free(data1);
    lept_free(data2);

    /* Test pixaDisplayBoxaa() */
    pixa1 = pixaReadBoth("showboxes.pac");
    baa1 = boxaaRead("showboxes1.baa");
    baa2 = boxaaTranspose(baa1);
    baa3 = boxaaTranspose(baa2);
    nba = boxaaGetCount(baa1);
    success = TRUE;
    for (i = 0; i < nba; i++) {
        boxa1 = boxaaGetBoxa(baa1, i, L_CLONE);
        boxa2 = boxaaGetBoxa(baa3, i, L_CLONE);
        boxaEqual(boxa1, boxa2, 0, NULL, &same);
        boxaDestroy(&boxa1);
        boxaDestroy(&boxa2);
        if (!same) success = FALSE;
    }
    if (success)
        fprintf(stderr, "Good: transpose is reversible\n");
    else
        fprintf(stderr, "Bad: transpose failed\n");
    pixa2 = pixaDisplayBoxaa(pixa1, baa2, L_DRAW_RGB, 2);
    pix1 = pixaDisplayTiledInRows(pixa2, 32, 1400, 1.0, 0, 10, 0);
    pixDisplay(pix1, 0, 600);
    fprintf(stderr, "Writing to: /tmp/lept/boxa/show.pdf\n");
    pixaConvertToPdf(pixa2, 75, 1.0, 0, 0, NULL, "/tmp/lept/boxa/show.pdf");
    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    pixaDestroy(&pixa2);
    boxaaDestroy(&baa1);
    boxaaDestroy(&baa2);
    boxaaDestroy(&baa3);

    return 0;
}
示例#11
0
main(int    argc,
     char **argv)
{
l_int32      h;
l_float32    scalefactor;
BOX         *box;
BOXA        *boxa1, *boxa2;
BOXAA       *baa;
PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_rmdir("segtest");
    lept_mkdir("segtest");
    baa = boxaaCreate(5);

        /* Image region input.  */
    pix1 = pixRead("wet-day.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/0.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/0.jpg");   /* 0 */
    box = boxCreate(105, 161, 620, 872);   /* image region */
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Compute image region at w = 2 * WIDTH */
    pix1 = pixRead("candelabrum-11.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pix3 = pixConvertTo1(pix2, 100);
    pix4 = pixExpandBinaryPower2(pix3, 2);  /* w = 2 * WIDTH */
    pix5 = pixGenHalftoneMask(pix4, NULL, NULL, 1);
    pix6 = pixMorphSequence(pix5, "c20.1 + c1.20", 0);
    pix7 = pixMaskConnComp(pix6, 8, &boxa1);
    pix8 = pixReduceBinary2(pix7, NULL);  /* back to w = WIDTH */
    pix9 = pixBackgroundNormSimple(pix2, pix8, NULL);
    pixWrite("/tmp/segtest/1.jpg", pix9, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/1.jpg");   /* 1 */
    boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);  /* back to w = WIDTH */
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
    pixDestroy(&pix7);
    pixDestroy(&pix8);
    pixDestroy(&pix9);
    boxaDestroy(&boxa1);

        /* Use mask to find image region */
    pix1 = pixRead("lion-page.00016.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/2.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/2.jpg");   /* 2 */
    pix3 = pixRead("lion-mask.00016.tif");
    pix4 = pixScaleToSize(pix3, WIDTH, 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);

        /* Compute image region at full res */
    pix1 = pixRead("rabi.png");
    scalefactor = (l_float32)WIDTH / (l_float32)pixGetWidth(pix1);
    pix2 = pixScaleToGray(pix1, scalefactor);
    pixWrite("/tmp/segtest/3.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/3.jpg");   /* 3 */
    pix3 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
    pix4 = pixMorphSequence(pix3, "c20.1 + c1.20", 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, scalefactor, scalefactor);
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    boxaDestroy(&boxa1);

        /* Page with no image regions */
    pix1 = pixRead("lucasta-47.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    boxa1 = boxaCreate(1);
    pixWrite("/tmp/segtest/4.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/4.jpg");   /* 4 */
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Page that is all image */
    pix1 = pixRead("map1.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/5.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/5.jpg");   /* 5 */
    h = pixGetHeight(pix2);
    box = boxCreate(0, 0, WIDTH, h);
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Save the boxaa file */
    boxaaWrite("/tmp/segtest/seg.baa", baa);
    regTestCheckFile(rp, "/tmp/segtest/seg.baa");   /* 6 */

        /* Do the conversion */
    l_pdfSetDateAndVersion(FALSE);
    convertSegmentedFilesToPdf("/tmp/segtest", ".jpg", 100, L_G4_ENCODE,
                               140, baa, 75, 0.6, "Segmentation Test",
                               "/tmp/pdfseg.7.pdf");
    regTestCheckFile(rp, "/tmp/pdfseg.7.pdf");   /* 7 */

    boxaaDestroy(&baa);
    return regTestCleanup(rp);
}
示例#12
0
main(int    argc,
     char **argv)
{
l_int32      i, j, w, h, error;
l_float32    val1, val2;
l_float32    val00, val10, val01, val11, valc00, valc10, valc01, valc11;
PIX         *pixs, *pixg, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5;
FPIXA       *fpixam, *fpixav, *fpixarv;
BOXAA       *baa;
static char  mainName[] = "quadtreetest";

    if (argc != 1)
	return ERROR_INT(" Syntax:  quadtreetest", mainName, 1);

        /* Test generation of quadtree regions. */
    baa = boxaaQuadtreeRegions(1000, 500, 3);
    boxaaWriteStream(stderr, baa);
    boxaaDestroy(&baa);
    baa = boxaaQuadtreeRegions(1001, 501, 3);
    boxaaWriteStream(stderr, baa);
    boxaaDestroy(&baa);

        /* Test quadtree stats generation */
#if 1
    pixs = pixRead("rabi.png");
    pixg = pixScaleToGray4(pixs);
#else
    pixs = pixRead("test24.jpg");
    pixg = pixConvertTo8(pixs, 0);
#endif
    pixQuadtreeMean(pixg, 8, NULL, &fpixam);
    pixt1 = fpixaDisplayQuadtree(fpixam, 4);
    pixDisplay(pixt1, 100, 0);
    pixWrite("/tmp/quadtree1.png", pixt1, IFF_PNG);
    pixQuadtreeVariance(pixg, 8, NULL, NULL, &fpixav, &fpixarv);
    pixt2 = fpixaDisplayQuadtree(fpixav, 4);
    pixDisplay(pixt2, 100, 200);
    pixWrite("/tmp/quadtree2.png", pixt2, IFF_PNG);
    pixt3 = fpixaDisplayQuadtree(fpixarv, 4);
    pixDisplay(pixt3, 100, 400);
    pixWrite("/tmp/quadtree3.png", pixt3, IFF_PNG);

        /* Compare with fixed-size tiling at a resolution corresponding
         * to the deepest level of the quadtree above */
    pixt4 = pixGetAverageTiled(pixg, 5, 6, L_MEAN_ABSVAL);
    pixt5 = pixExpandReplicate(pixt4, 4);
    pixWrite("/tmp/quadtree4.png", pixt5, IFF_PNG);
    pixDisplay(pixt5, 800, 0);
    pixDestroy(&pixt4);
    pixDestroy(&pixt5);
    pixt4 = pixGetAverageTiled(pixg, 5, 6, L_STANDARD_DEVIATION);
    pixt5 = pixExpandReplicate(pixt4, 4);
    pixWrite("/tmp/quadtree5.png", pixt5, IFF_PNG);
    pixDisplay(pixt5, 800, 400);

        /* Test quadtree parent/child access */
    error = FALSE;
    fpixaGetFPixDimensions(fpixam, 4, &w, &h);
    for (i = 0; i < w; i += 2) {
        for (j = 0; j < h; j += 2) {
            quadtreeGetParent(fpixam, 4, j, i, &val1);
            fpixaGetPixel(fpixam, 3, j / 2, i / 2, &val2);
            if (val1 != val2) error = TRUE;
        }
    }
    if (error)
        fprintf(stderr, "\n======================\nError: parent access\n");
    else
        fprintf(stderr, "\n======================\nSuccess: parent access\n");
    error = FALSE;
    for (i = 0; i < w; i++) {
        for (j = 0; j < h; j++) {
            quadtreeGetChildren(fpixam, 4, j, i,
                                &val00, &val10, &val01, &val11);
            fpixaGetPixel(fpixam, 5, 2 * j, 2 * i, &valc00);
            fpixaGetPixel(fpixam, 5, 2 * j + 1, 2 * i, &valc10);
            fpixaGetPixel(fpixam, 5, 2 * j, 2 * i + 1, &valc01);
            fpixaGetPixel(fpixam, 5, 2 * j + 1, 2 * i + 1, &valc11);
            if ((val00 != valc00) || (val10 != valc10) ||
                (val01 != valc01) || (val11 != valc11))
                error = TRUE;
        }
    }
    if (error)
        fprintf(stderr, "Error: child access\n======================\n");
    else
        fprintf(stderr, "Success: child access\n======================\n");

    pixDestroy(&pixs);
    pixDestroy(&pixg);
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);
    pixDestroy(&pixt5);
    fpixaDestroy(&fpixam);
    fpixaDestroy(&fpixav);
    fpixaDestroy(&fpixarv);
    return 0;
}
main(int    argc,
     char **argv)
{
char        *dirin, *substr, *title, *fileout, *boxaafile, *boxaapath;
l_int32      ret, res, type, thresh;
l_float32    scalefactor;
BOXAA       *baa;
static char  mainName[] = "convertsegfilestopdf";

    if (argc != 10) {
        fprintf(stderr,
	    " Syntax: convertsegfilestopdf dirin substr res type thresh \\ \n"
            "                       boxaafile scalefactor title fileout\n"
            "     where\n"
            "         dirin:  input directory for image files\n"
            "         substr:  Use 'allfiles' to convert all files\n"
            "                  in the directory\n"
            "         res:  Input resolution of each image;\n"
            "               assumed to all be the same\n"
            "         type: compression used for non-image regions:\n"
            "               0: default (G4 encoding)\n"
            "               1: JPEG encoding\n"
            "               2: G4 encoding\n"
            "               3: PNG encoding\n"
            "         thresh:  threshold for binarization; use 0 for default\n"
            "         boxaafile: File of 'image' regions within each page\n"
            "                    This contains a boxa for each page,\n"
            "                    consisting of a set of regions\n"
            "         scalefactor:  Use to scale down the image regions\n"
            "         title:  Use 'none' to omit\n"
            "         fileout:  Output pdf file\n");
        return 1;
    }

    dirin = argv[1];
    substr = argv[2];
    res = atoi(argv[3]);
    type = atoi(argv[4]);
    thresh = atoi(argv[5]);
    boxaafile = argv[6];
    scalefactor = atof(argv[7]);
    title = argv[8];
    fileout = argv[9];

    if (!strcmp(substr, "allfiles"))
        substr = NULL;
    if (scalefactor <= 0.0 || scalefactor > 1.0) {
        L_WARNING("invalid scalefactor: setting to 1.0", mainName);
        scalefactor = 1.0;
    }
    if (type != 1 && type != 2 && type != 3)
        type = L_G4_ENCODE;
    if (thresh <= 0)
        thresh = 150;
    if (!strcmp(title, "none"))
        title = NULL;

    boxaapath = genPathname(boxaafile, NULL);
    if ((baa = boxaaRead(boxaapath)) == NULL) {
        L_WARNING(
            "boxaa file not found; converting unsegmented and unscaled",
            mainName);
        ret = convertFilesToPdf(dirin, substr, res, 1.0, 75, title,
                                fileout);
        FREE(boxaapath);
        return ret;
    }

    ret = convertSegmentedFilesToPdf(dirin, substr, res, type, thresh, baa,
                                     75, scalefactor, title, fileout);
    FREE(boxaapath);
    boxaaDestroy(&baa);
    return ret;
}
示例#14
0
// Creates new set of lines from the computed columns
bool CubeLineSegmenter::AddLines(Pixa *lines) {
  // create an array that will hold the bounding boxes
  // of the concomps belonging to each line
  Boxaa *lines_con_comps = boxaaCreate(lines->n);
  if (lines_con_comps == NULL) {
    return false;
  }

  for (int line = 0; line < lines->n; line++) {
    // if the line is not valid
    if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) {
      // split it
      Pixa *split_lines = SplitLine(lines->pix[line],
          lines->boxa->box[line]);

      // remove the old line
      if (pixaRemovePix(lines, line) != 0) {
        return false;
      }

      line--;

      if (split_lines == NULL) {
        continue;
      }

      // add the split lines instead and move the pointer
      for (int s_line = 0; s_line < split_lines->n; s_line++) {
        Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE);
        Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE);

        if (sp_line == NULL || sp_box == NULL) {
          return false;
        }

        // insert the new line
        if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) {
          return false;
        }
      }

      // remove the split lines
      pixaDestroy(&split_lines);
    }
  }

  // compute the concomps bboxes of each line
  for (int line = 0; line < lines->n; line++) {
    Boxa *line_con_comps = ComputeLineConComps(lines->pix[line],
        lines->boxa->box[line], NULL);

    if (line_con_comps == NULL) {
      return false;
    }

    // insert it into the boxaa array
    if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) {
      return false;
    }
  }

  // post process the lines:
  // merge the contents of "small" lines info legitimate lines
  for (int line = 0; line < lines->n; line++) {
    // a small line detected
    if (SmallLine(lines->boxa->box[line]) == true) {
      // merge its components to one of the valid lines
      if (MergeLine(lines->pix[line], lines->boxa->box[line],
          lines, lines_con_comps) == true) {
        // remove the small line
        if (pixaRemovePix(lines, line) != 0) {
          return false;
        }

        if (boxaaRemoveBoxa(lines_con_comps, line) != 0) {
          return false;
        }

        line--;
      }
    }
  }

  boxaaDestroy(&lines_con_comps);

  // add the pix masks
  if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) {
    return false;
  }

  return true;
}
示例#15
0
/*!
 *  pixQuadtreeVariance()
 *
 *      Input:  pixs (8 bpp, no colormap)
 *              nlevels (in quadtree)
 *             *pix_ma (input mean accumulator; can be null)
 *             *dpix_msa (input mean square accumulator; can be null)
 *             *pfpixa_v (<optional return> variance values in quadtree)
 *             *pfpixa_rv (<optional return> root variance values in quadtree)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The returned fpixav and fpixarv have @nlevels of fpix,
 *          each containing at the respective levels the variance
 *          and root variance values.
 */
l_int32
pixQuadtreeVariance(PIX *pixs,
                    l_int32 nlevels,
                    PIX *pix_ma,
                    DPIX *dpix_msa,
                    FPIXA **pfpixa_v,
                    FPIXA **pfpixa_rv) {
    l_int32 i, j, w, h, size, n;
    l_float32 var, rvar;
    BOX *box;
    BOXA *boxa;
    BOXAA *baa;
    FPIX *fpixv, *fpixrv;
    PIX *pix_mac;  /* copy of mean accumulator */
    DPIX *dpix_msac;  /* msa clone */

    PROCNAME("pixQuadtreeVariance");

    if (!pfpixa_v && !pfpixa_rv)
        return ERROR_INT("neither &fpixav nor &fpixarv defined", procName, 1);
    if (pfpixa_v) *pfpixa_v = NULL;
    if (pfpixa_rv) *pfpixa_rv = NULL;
    if (!pixs || pixGetDepth(pixs) != 8)
        return ERROR_INT("pixs not defined or not 8 bpp", procName, 1);
    pixGetDimensions(pixs, &w, &h, NULL);
    if (nlevels > quadtreeMaxLevels(w, h))
        return ERROR_INT("nlevels too large for image", procName, 1);

    if (!pix_ma)
        pix_mac = pixBlockconvAccum(pixs);
    else
        pix_mac = pixClone(pix_ma);
    if (!pix_mac)
        return ERROR_INT("pix_mac not made", procName, 1);
    if (!dpix_msa)
        dpix_msac = pixMeanSquareAccum(pixs);
    else
        dpix_msac = dpixClone(dpix_msa);
    if (!dpix_msac)
        return ERROR_INT("dpix_msac not made", procName, 1);

    if ((baa = boxaaQuadtreeRegions(w, h, nlevels)) == NULL) {
        pixDestroy(&pix_mac);
        dpixDestroy(&dpix_msac);
        return ERROR_INT("baa not made", procName, 1);
    }

    if (pfpixa_v) *pfpixa_v = fpixaCreate(nlevels);
    if (pfpixa_rv) *pfpixa_rv = fpixaCreate(nlevels);
    for (i = 0; i < nlevels; i++) {
        boxa = boxaaGetBoxa(baa, i, L_CLONE);
        size = 1 << i;
        n = boxaGetCount(boxa);  /* n == size * size */
        if (pfpixa_v) fpixv = fpixCreate(size, size);
        if (pfpixa_rv) fpixrv = fpixCreate(size, size);
        for (j = 0; j < n; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            pixVarianceInRectangle(pixs, box, pix_mac, dpix_msac, &var, &rvar);
            if (pfpixa_v) fpixSetPixel(fpixv, j % size, j / size, var);
            if (pfpixa_rv) fpixSetPixel(fpixrv, j % size, j / size, rvar);
            boxDestroy(&box);
        }
        if (pfpixa_v) fpixaAddFPix(*pfpixa_v, fpixv, L_INSERT);
        if (pfpixa_rv) fpixaAddFPix(*pfpixa_rv, fpixrv, L_INSERT);
        boxaDestroy(&boxa);
    }

    pixDestroy(&pix_mac);
    dpixDestroy(&dpix_msac);
    boxaaDestroy(&baa);
    return 0;
}
示例#16
0
int main(int    argc,
         char **argv)
{
l_uint8     *data1, *data2;
l_int32      i, same, w, h, width, success, nba;
size_t       size1, size2;
l_float32    diffarea, diffxor, scalefact;
BOX         *box;
BOXA        *boxa1, *boxa2, *boxa3;
BOXAA       *baa1, *baa2, *baa3;
PIX         *pix1, *pixdb;
PIXA        *pixa1, *pixa2;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_mkdir("lept/boxa");

        /* Make a boxa and display its contents */
    boxa1 = boxaCreate(6);
    box = boxCreate(60, 60, 40, 20);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(120, 50, 20, 50);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(50, 140, 46, 60);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(166, 130, 64, 28);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(64, 224, 44, 34);
    boxaAddBox(boxa1, box, L_INSERT);
    box = boxCreate(117, 206, 26, 74);
    boxaAddBox(boxa1, box, L_INSERT);
    pix1 = DisplayBoxa(boxa1);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 0 */
    pixDisplayWithTitle(pix1, 0, 0, NULL, rp->display);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa1, 100, &same, &diffarea, &diffxor, NULL);
    regTestCompareValues(rp, 1, same, 0.0);  /* 1 */
    regTestCompareValues(rp, 0.0, diffarea, 0.0);  /* 2 */
    regTestCompareValues(rp, 0.0, diffxor, 0.0);  /* 3 */

    boxa2 = boxaTransform(boxa1, -13, -13, 1.0, 1.0);
    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, NULL);
    regTestCompareValues(rp, 1, same, 0.0);  /* 4 */
    regTestCompareValues(rp, 0.0, diffarea, 0.0);  /* 5 */
    regTestCompareValues(rp, 0.0, diffxor, 0.0);  /* 6 */
    boxaDestroy(&boxa2);

    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP_AND_BOT, 6,
                                       L_ADJUST_CHOOSE_MIN, 1.0, 0);
    pix1 = DisplayBoxa(boxa2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 7 */
    pixDisplayWithTitle(pix1, 200, 0, NULL, rp->display);
    pixDestroy(&pix1);

    boxaCompareRegions(boxa1, boxa2, 10, &same, &diffarea, &diffxor, &pixdb);
    regTestCompareValues(rp, 1, same, 0.0);  /* 8 */
    regTestCompareValues(rp, 0.053, diffarea, 0.002);  /* 9 */
    regTestCompareValues(rp, 0.240, diffxor, 0.002);  /* 10 */
    regTestWritePixAndCheck(rp, pixdb, IFF_PNG);  /* 11 */
    pixDisplayWithTitle(pixdb, 400, 0, NULL, rp->display);
    pixDestroy(&pixdb);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Input is a fairly clean boxa */
    boxa1 = boxaRead("boxa1.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 12 */
    pixDisplayWithTitle(pix1, 600, 0, NULL, rp->display);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

        /* Input is an unsmoothed and noisy boxa */
    boxa1 = boxaRead("boxa2.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 13 */
    pixDisplayWithTitle(pix1, 800, 0, NULL, rp->display);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

        /* Input is a boxa smoothed with a median window filter */
    boxa1 = boxaRead("boxa3.ba");
    boxa2 = boxaReconcileEvenOddHeight(boxa1, L_ADJUST_TOP, 80,
                                       L_ADJUST_CHOOSE_MIN, 1.05, 1);
    width = 100;
    boxaGetExtent(boxa2, &w, &h, NULL);
    scalefact = (l_float32)width / (l_float32)w;
    boxa3 = boxaTransform(boxa2, 0, 0, scalefact, scalefact);
    pix1 = boxaDisplayTiled(boxa3, NULL, 1500, 2, 1.0, 0, 3, 2);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 14 */
    pixDisplayWithTitle(pix1, 1000, 0, NULL, rp->display);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    boxaDestroy(&boxa3);

        /* Test serialized boxa I/O to and from memory */
    data1 = l_binaryRead("boxa2.ba", &size1);
    boxa1 = boxaReadMem(data1, size1);
    boxaWriteMem(&data2, &size2, boxa1);
    boxa2 = boxaReadMem(data2, size2);
    boxaWrite("/tmp/lept/boxa/boxa1.ba", boxa1);
    boxaWrite("/tmp/lept/boxa/boxa2.ba", boxa2);
    filesAreIdentical("/tmp/lept/boxa/boxa1.ba", "/tmp/lept/boxa/boxa2.ba",
                      &same); 
    regTestCompareValues(rp, 1, same, 0.0);  /* 15 */
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);
    lept_free(data1);
    lept_free(data2);

        /* ----------- Test pixaDisplayBoxaa() ------------ */
    pixa1 = pixaReadBoth("showboxes.pac");
    baa1 = boxaaRead("showboxes1.baa");
    baa2 = boxaaTranspose(baa1);
    baa3 = boxaaTranspose(baa2);
    nba = boxaaGetCount(baa1);
    success = TRUE;
    for (i = 0; i < nba; i++) {
        boxa1 = boxaaGetBoxa(baa1, i, L_CLONE);
        boxa2 = boxaaGetBoxa(baa3, i, L_CLONE);
        boxaEqual(boxa1, boxa2, 0, NULL, &same);
        boxaDestroy(&boxa1);
        boxaDestroy(&boxa2);
        if (!same) success = FALSE;
    }
        /* Check that the transpose is reversible */
    regTestCompareValues(rp, 1, success, 0.0);  /* 16 */
    pixa2 = pixaDisplayBoxaa(pixa1, baa2, L_DRAW_RGB, 2);
    pix1 = pixaDisplayTiledInRows(pixa2, 32, 1400, 1.0, 0, 10, 0);
    regTestWritePixAndCheck(rp, pix1, IFF_PNG);  /* 17 */
    pixDisplayWithTitle(pix1, 0, 600, NULL, rp->display);
    fprintf(stderr, "Writing to: /tmp/lept/boxa/show.pdf\n");
    l_pdfSetDateAndVersion(FALSE);
    pixaConvertToPdf(pixa2, 75, 1.0, 0, 0, NULL, "/tmp/lept/boxa/show.pdf");
    regTestCheckFile(rp, "/tmp/lept/boxa/show.pdf");  /* 18 */
    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    pixaDestroy(&pixa2);
    boxaaDestroy(&baa1);
    boxaaDestroy(&baa2);
    boxaaDestroy(&baa3);

    return regTestCleanup(rp);
}
示例#17
0
l_int32 main(int    argc,
             char **argv)
{
char      *boxatxt;
l_int32    i;
BOXA      *boxa1, *boxa2, *boxa3;
BOXAA     *baa, *baa1;
NUMAA     *naa1;
PIX       *pixdb, *pix1, *pix2, *pix3, *pix4;
PIXA      *pixa1, *pixa2, *pixa3, *pixat;
L_RECOG   *recog;
L_RECOGA  *recoga;
SARRAY    *sa1;

    /* ----- Example identifying samples using training data ----- */
#if 1
        /* Read the training data */
    pixat = pixaRead("recog/sets/train06.pa");
    recog = recogCreateFromPixa(pixat, 0, 0, L_USE_ALL, 128, 1);
    recoga = recogaCreateFromRecog(recog);
    pixaDestroy(&pixat);

        /* Read the data from all samples */
    pix1 = pixRead("recog/sets/samples06.png");
    boxatxt = pixGetText(pix1);
    boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt));
    pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL);
    pixDestroy(&pix1);  /* destroys boxa1 */

        /* Identify components in the sample data */
    pixa2 = pixaCreate(0);
    pixa3 = pixaCreate(0);
    for (i = 0; i < 9; i++) {
/*        if (i != 4) continue; */  /* dots form separate boxa */
/*        if (i != 8) continue; */  /* broken 2 in '24' */
        pix1 = pixaGetPix(pixa1, i, L_CLONE);

            /* Show the 2d box data in the sample */
        boxa2 = pixConnComp(pix1, NULL, 8);
        baa = boxaSort2d(boxa2, NULL, 6, 6, 5);
        pix2 = boxaaDisplay(baa, 3, 1, 0xff000000, 0x00ff0000, 0, 0);
        pixaAddPix(pixa3, pix2, L_INSERT);
        boxaaDestroy(&baa);
        boxaDestroy(&boxa2);

            /* Get the numbers in the sample */
        recogaIdentifyMultiple(recoga, pix1, 0, 5, 3, &boxa3, NULL, &pixdb, 0);
        sa1 = recogaExtractNumbers(recoga, boxa3, 0.7, -1, &baa1, &naa1);
        sarrayWriteStream(stderr, sa1);
        boxaaWriteStream(stderr, baa1);
        numaaWriteStream(stderr, naa1);
        pixaAddPix(pixa2, pixdb, L_INSERT);
/*        pixaWrite("/tmp/pixa.pa", pixa2); */
        pixDestroy(&pix1);
        boxaDestroy(&boxa3);
        boxaaDestroy(&baa1);
        numaaDestroy(&naa1);
        sarrayDestroy(&sa1);
    }

    pix3 = pixaDisplayLinearly(pixa2, L_VERT, 1.0, 0, 20, 1, NULL);
    pixWrite("/tmp/pix3.png", pix3, IFF_PNG);
    pix4 = pixaDisplayTiledInRows(pixa3, 32, 1500, 1.0, 0, 20, 2);
    pixDisplay(pix4, 500, 0);
    pixWrite("/tmp/pix4.png", pix4, IFF_PNG);
    pixaDestroy(&pixa2);
    pixaDestroy(&pixa3);
    pixDestroy(&pix1);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    recogaDestroy(&recoga);
#endif

    return 0;
}
示例#18
0
/*!
 *  boxaSort2d()
 *
 *      Input:  boxas
 *              &naa (<optional return> numaa with sorted indices
 *                    whose values are the indices of the input array)
 *              delta1 (min overlap that permits aggregation of a box
 *                      onto a boxa of horizontally-aligned boxes; pass 1)
 *              delta2 (min overlap that permits aggregation of a box
 *                      onto a boxa of horizontally-aligned boxes; pass 2)
 *              minh1 (components less than this height either join an
 *                     existing boxa or are set aside for pass 2)
 *      Return: boxaa (2d sorted version of boxa), or null on error
 *
 *  Notes:
 *      (1) The final result is a sort where the 'fast scan' direction is
 *          left to right, and the 'slow scan' direction is from top
 *          to bottom.  Each boxa in the boxaa represents a sorted set
 *          of boxes from left to right.
 *      (2) Two passes are used to aggregate the boxas, which can corresond
 *          to characters or words in a line of text.  In pass 1, only
 *          taller components, which correspond to xheight or larger,
 *          are permitted to start a new boxa, whereas in pass 2,
 *          the remaining vertically-challenged components are allowed
 *          to join an existing boxa or start a new one.
 *      (3) If delta1 < 0, the first pass allows aggregation when
 *          boxes in the same boxa do not overlap vertically.
 *          The distance by which they can miss and still be aggregated
 *          is the absolute value |delta1|.   Similar for delta2 on
 *          the second pass.
 *      (4) On the first pass, any component of height less than minh1
 *          cannot start a new boxa; it's put aside for later insertion.
 *      (5) On the second pass, any small component that doesn't align
 *          with an existing boxa can start a new one.
 *      (6) This can be used to identify lines of text from
 *          character or word bounding boxes.
 */
BOXAA *
boxaSort2d(BOXA    *boxas,
           NUMAA  **pnaad,
           l_int32  delta1,
           l_int32  delta2,
           l_int32  minh1)
{
l_int32  i, index, h, nt, ne, n, m, ival;
BOX     *box;
BOXA    *boxa, *boxae, *boxan, *boxat1, *boxat2, *boxav, *boxavs;
BOXAA   *baa, *baad;
NUMA    *naindex, *nae, *nan, *nah, *nav, *nat1, *nat2, *nad;
NUMAA   *naa, *naad;

    PROCNAME("boxaSort2d");

    if (pnaad) *pnaad = NULL;
    if (!boxas)
        return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);

        /* Sort from left to right */
    if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
                    == NULL)
        return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);

        /* First pass: assign taller boxes to boxa by row */
    nt = boxaGetCount(boxa);
    baa = boxaaCreate(0);
    naa = numaaCreate(0);
    boxae = boxaCreate(0);  /* save small height boxes here */
    nae = numaCreate(0);  /* keep track of small height boxes */
    for (i = 0; i < nt; i++) {
        box = boxaGetBox(boxa, i, L_CLONE);
        boxGetGeometry(box, NULL, NULL, NULL, &h);
        if (h < minh1) {  /* save for 2nd pass */
            boxaAddBox(boxae, box, L_INSERT);
            numaAddNumber(nae, i);
        }
        else {
            n = boxaaGetCount(baa);
            boxaaAlignBox(baa, box, delta1, &index);
            if (index < n) {  /* append to an existing boxa */
                boxaaAddBox(baa, index, box, L_INSERT);
            }
            else {  /* doesn't align, need new boxa */
                boxan = boxaCreate(0);
                boxaAddBox(boxan, box, L_INSERT);
                boxaaAddBoxa(baa, boxan, L_INSERT);
                nan = numaCreate(0);
                numaaAddNuma(naa, nan, L_INSERT);
            }
            numaGetIValue(naindex, i, &ival);
            numaaAddNumber(naa, index, ival);
        }
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Second pass: feed in small height boxes;
         * TODO: this correctly, using local y position! */
    ne = boxaGetCount(boxae);
    for (i = 0; i < ne; i++) {
        box = boxaGetBox(boxae, i, L_CLONE);
        n = boxaaGetCount(baa);
        boxaaAlignBox(baa, box, delta2, &index);
        if (index < n) {  /* append to an existing boxa */
            boxaaAddBox(baa, index, box, L_INSERT);
        }
        else {  /* doesn't align, need new boxa */
            boxan = boxaCreate(0);
            boxaAddBox(boxan, box, L_INSERT);
            boxaaAddBoxa(baa, boxan, L_INSERT);
            nan = numaCreate(0);
            numaaAddNuma(naa, nan, L_INSERT);
        }
        numaGetIValue(nae, i, &ival);  /* location in original boxas */
        numaaAddNumber(naa, index, ival);
    }

        /* Sort each boxa in the boxaa */
    m = boxaaGetCount(baa);
    for (i = 0; i < m; i++) {
        boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
        boxat2 = boxaSort(boxat1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
        boxaaReplaceBoxa(baa, i, boxat2);
        nat1 = numaaGetNuma(naa, i, L_CLONE);
        nat2 = numaSortByIndex(nat1, nah);
        numaaReplaceNuma(naa, i, nat2);
        boxaDestroy(&boxat1);
        numaDestroy(&nat1);
        numaDestroy(&nah);
    }

        /* Sort boxa vertically within boxaa, using the first box
         * in each boxa. */
    m = boxaaGetCount(baa);
    boxav = boxaCreate(m);  /* holds first box in each boxa in baa */
    naad = numaaCreate(m);
    if (pnaad)
        *pnaad = naad;
    baad = boxaaCreate(m);
    for (i = 0; i < m; i++) {
        boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
        box = boxaGetBox(boxat1, 0, L_CLONE);
        boxaAddBox(boxav, box, L_INSERT);
        boxaDestroy(&boxat1);
    }
    boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
    for (i = 0; i < m; i++) {
        numaGetIValue(nav, i, &index);
        boxa = boxaaGetBoxa(baa, index, L_CLONE);
        boxaaAddBoxa(baad, boxa, L_INSERT);
        nad = numaaGetNuma(naa, index, L_CLONE);
        numaaAddNuma(naad, nad, L_INSERT);
    }

/*    fprintf(stderr, "box count = %d, numaa count = %d\n", nt,
            numaaGetNumberCount(naad)); */

    boxaaDestroy(&baa);
    boxaDestroy(&boxav);
    boxaDestroy(&boxavs);
    boxaDestroy(&boxae);
    numaDestroy(&nav);
    numaDestroy(&nae);
    numaaDestroy(&naa);
    if (!pnaad)
        numaaDestroy(&naad);

    return baad;
}