/*!
 * \brief   pixGetWordBoxesInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    reduction 1 for input res; 2 for 2x reduction of input res
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   pnai [optional] index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of about 300 ppi.
 *          The word masks can be computed at either 150 ppi or 300 ppi.
 *          For the former, set reduction = 2.
 *      (2) This is a special version of pixGetWordsInTextlines(), that
 *          just finds the word boxes in line order, with a numa
 *          giving the textline index for each word.
 *          See pixGetWordsInTextlines() for more details.
 * </pre>
 */
l_int32
pixGetWordBoxesInTextlines(PIX     *pixs,
                           l_int32  reduction,
                           l_int32  minwidth,
                           l_int32  minheight,
                           l_int32  maxwidth,
                           l_int32  maxheight,
                           BOXA   **pboxad,
                           NUMA   **pnai)
{
l_int32  maxdil;
BOXA    *boxa1;
BOXAA   *baa;
NUMA    *nai;
PIX     *pix1;

    PROCNAME("pixGetWordBoxesInTextlines");

    if (pnai) *pnai = NULL;
    if (!pboxad)
        return ERROR_INT("&boxad and &nai not both defined", procName, 1);
    *pboxad = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (reduction != 1 && reduction != 2)
        return ERROR_INT("reduction not in {1,2}", procName, 1);

    if (reduction == 1) {
        pix1 = pixClone(pixs);
        maxdil = 18;
    } else {  /* reduction == 2 */
        pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
        maxdil = 9;
    }

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, NULL);

        /* 2D sort the bounding boxes of these words. */
    baa = boxaSort2d(boxa1, NULL, 3, -5, 5);

        /* Flatten the boxaa, saving the boxa index for each box */
    *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);

    if (pnai)
        *pnai = nai;
    else
        numaDestroy(&nai);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    return 0;
}
Ejemplo n.º 2
0
/*!
 * \brief   pixGetWordsInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 75 - 150 ppi
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   ppixad word images sorted in textline line order
 * \param[out]   pnai index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of between 75 and 150 ppi.
 *      (2) The four size constraints on saved components are all
 *          scaled by %reduction.
 *      (3) The result are word images (and their b.b.), extracted in
 *          textline order, at either full res or 2x reduction,
 *          and with a numa giving the textline index for each word.
 *      (4) The pixa and boxa interfaces should make this type of
 *          application simple to put together.  The steps are:
 *           ~ generate first estimate of word masks
 *           ~ get b.b. of these, and remove the small and big ones
 *           ~ extract pixa of the word images, using the b.b.
 *           ~ sort actual word images in textline order (2d)
 *           ~ flatten them to a pixa (1d), saving the textline index
 *             for each pix
 *      (5) In an actual application, it may be desirable to pre-filter
 *          the input image to remove large components, to extract
 *          single columns of text, and to deskew them.  For example,
 *          to remove both large components and small noisy components
 *          that can interfere with the statistics used to estimate
 *          parameters for segmenting by words, but still retain text lines,
 *          the following image preprocessing can be done:
 *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
 *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
 *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
 *                pixAnd(pixf, pixf, pixs);  // the filtered image
 *          The closing turns text lines into long blobs, but does not
 *          significantly increase their height.  But if there are many
 *          small connected components in a dense texture, this is likely
 *          to generate tall components that will be eliminated in pixf.
 * </pre>
 */
l_int32
pixGetWordsInTextlines(PIX     *pixs,
                       l_int32  minwidth,
                       l_int32  minheight,
                       l_int32  maxwidth,
                       l_int32  maxheight,
                       BOXA   **pboxad,
                       PIXA   **ppixad,
                       NUMA   **pnai)
{
BOXA    *boxa1, *boxad;
BOXAA   *baa;
NUMA    *nai;
NUMAA   *naa;
PIXA    *pixa1, *pixad;
PIXAA   *paa;

    PROCNAME("pixGetWordsInTextlines");

    if (!pboxad || !ppixad || !pnai)
        return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
    *pboxad = NULL;
    *ppixad = NULL;
    *pnai = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
                           &boxa1, NULL, NULL);

        /* Generate a pixa of the word images */
    pixa1 = pixaCreateFromBoxa(pixs, boxa1, NULL);  /* mask over each word */

        /* Sort the bounding boxes of these words by line.  We use the
         * index mapping to allow identical sorting of the pixa. */
    baa = boxaSort2d(boxa1, &naa, -1, -1, 4);
    paa = pixaSort2dByIndex(pixa1, naa, L_CLONE);

        /* Flatten the word paa */
    pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
    boxad = pixaGetBoxa(pixad, L_COPY);

    *pnai = nai;
    *pboxad = boxad;
    *ppixad = pixad;

    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    pixaaDestroy(&paa);
    numaaDestroy(&naa);
    return 0;
}
Ejemplo n.º 3
0
/*!
 * \brief   pixGetWordBoxesInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   pnai [optional] index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of between 75 and 150 ppi.
 *      (2) This is a special version of pixGetWordsInTextlines(), that
 *          just finds the word boxes in line order, with a numa
 *          giving the textline index for each word.
 *          See pixGetWordsInTextlines() for more details.
 * </pre>
 */
l_int32
pixGetWordBoxesInTextlines(PIX     *pixs,
                           l_int32  minwidth,
                           l_int32  minheight,
                           l_int32  maxwidth,
                           l_int32  maxheight,
                           BOXA   **pboxad,
                           NUMA   **pnai)
{
BOXA    *boxa1;
BOXAA   *baa;
NUMA    *nai;

    PROCNAME("pixGetWordBoxesInTextlines");

    if (pnai) *pnai = NULL;
    if (!pboxad)
        return ERROR_INT("&boxad and &nai not both defined", procName, 1);
    *pboxad = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
                           &boxa1, NULL, NULL);

        /* 2D sort the bounding boxes of these words. */
    baa = boxaSort2d(boxa1, NULL, 3, -5, 5);

        /* Flatten the boxaa, saving the boxa index for each box */
    *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);

    if (pnai)
        *pnai = nai;
    else
        numaDestroy(&nai);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    return 0;
}
Ejemplo n.º 4
0
l_int32 main(int    argc,
             char **argv)
{
char      *boxatxt;
l_int32    i;
BOXA      *boxa1, *boxa2, *boxa3;
BOXAA     *baa, *baa1;
NUMAA     *naa1;
PIX       *pixdb, *pix1, *pix2, *pix3, *pix4;
PIXA      *pixa1, *pixa2, *pixa3, *pixat;
L_RECOG   *recog;
L_RECOGA  *recoga;
SARRAY    *sa1;

    /* ----- Example identifying samples using training data ----- */
#if 1
        /* Read the training data */
    pixat = pixaRead("recog/sets/train06.pa");
    recog = recogCreateFromPixa(pixat, 0, 0, L_USE_ALL, 128, 1);
    recoga = recogaCreateFromRecog(recog);
    pixaDestroy(&pixat);

        /* Read the data from all samples */
    pix1 = pixRead("recog/sets/samples06.png");
    boxatxt = pixGetText(pix1);
    boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt));
    pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL);
    pixDestroy(&pix1);  /* destroys boxa1 */

        /* Identify components in the sample data */
    pixa2 = pixaCreate(0);
    pixa3 = pixaCreate(0);
    for (i = 0; i < 9; i++) {
/*        if (i != 4) continue; */  /* dots form separate boxa */
/*        if (i != 8) continue; */  /* broken 2 in '24' */
        pix1 = pixaGetPix(pixa1, i, L_CLONE);

            /* Show the 2d box data in the sample */
        boxa2 = pixConnComp(pix1, NULL, 8);
        baa = boxaSort2d(boxa2, NULL, 6, 6, 5);
        pix2 = boxaaDisplay(baa, 3, 1, 0xff000000, 0x00ff0000, 0, 0);
        pixaAddPix(pixa3, pix2, L_INSERT);
        boxaaDestroy(&baa);
        boxaDestroy(&boxa2);

            /* Get the numbers in the sample */
        recogaIdentifyMultiple(recoga, pix1, 0, 5, 3, &boxa3, NULL, &pixdb, 0);
        sa1 = recogaExtractNumbers(recoga, boxa3, 0.7, -1, &baa1, &naa1);
        sarrayWriteStream(stderr, sa1);
        boxaaWriteStream(stderr, baa1);
        numaaWriteStream(stderr, naa1);
        pixaAddPix(pixa2, pixdb, L_INSERT);
/*        pixaWrite("/tmp/pixa.pa", pixa2); */
        pixDestroy(&pix1);
        boxaDestroy(&boxa3);
        boxaaDestroy(&baa1);
        numaaDestroy(&naa1);
        sarrayDestroy(&sa1);
    }

    pix3 = pixaDisplayLinearly(pixa2, L_VERT, 1.0, 0, 20, 1, NULL);
    pixWrite("/tmp/pix3.png", pix3, IFF_PNG);
    pix4 = pixaDisplayTiledInRows(pixa3, 32, 1500, 1.0, 0, 20, 2);
    pixDisplay(pix4, 500, 0);
    pixWrite("/tmp/pix4.png", pix4, IFF_PNG);
    pixaDestroy(&pixa2);
    pixaDestroy(&pixa3);
    pixDestroy(&pix1);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    recogaDestroy(&recoga);
#endif

    return 0;
}
/*!
 * \brief   pixGetWordsInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    reduction 1 for input res; 2 for 2x reduction of input res
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   ppixad word images sorted in textline line order
 * \param[out]   pnai index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of about 300 ppi.
 *          The word masks and word images can be computed at either
 *          150 ppi or 300 ppi.  For the former, set reduction = 2.
 *      (2) The four size constraints on saved components are all
 *          scaled by %reduction.
 *      (3) The result are word images (and their b.b.), extracted in
 *          textline order, at either full res or 2x reduction,
 *          and with a numa giving the textline index for each word.
 *      (4) The pixa and boxa interfaces should make this type of
 *          application simple to put together.  The steps are:
 *           ~ optionally reduce by 2x
 *           ~ generate first estimate of word masks
 *           ~ get b.b. of these, and remove the small and big ones
 *           ~ extract pixa of the word images, using the b.b.
 *           ~ sort actual word images in textline order (2d)
 *           ~ flatten them to a pixa (1d), saving the textline index
 *             for each pix
 *      (5) In an actual application, it may be desirable to pre-filter
 *          the input image to remove large components, to extract
 *          single columns of text, and to deskew them.  For example,
 *          to remove both large components and small noisy components
 *          that can interfere with the statistics used to estimate
 *          parameters for segmenting by words, but still retain text lines,
 *          the following image preprocessing can be done:
 *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
 *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
 *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
 *                pixAnd(pixf, pixf, pixs);  // the filtered image
 *          The closing turns text lines into long blobs, but does not
 *          significantly increase their height.  But if there are many
 *          small connected components in a dense texture, this is likely
 *          to generate tall components that will be eliminated in pixf.
 * </pre>
 */
l_int32
pixGetWordsInTextlines(PIX     *pixs,
                       l_int32  reduction,
                       l_int32  minwidth,
                       l_int32  minheight,
                       l_int32  maxwidth,
                       l_int32  maxheight,
                       BOXA   **pboxad,
                       PIXA   **ppixad,
                       NUMA   **pnai)
{
l_int32  maxdil;
BOXA    *boxa1, *boxad;
BOXAA   *baa;
NUMA    *nai;
NUMAA   *naa;
PIXA    *pixa1, *pixad;
PIX     *pix1;
PIXAA   *paa;

    PROCNAME("pixGetWordsInTextlines");

    if (!pboxad || !ppixad || !pnai)
        return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
    *pboxad = NULL;
    *ppixad = NULL;
    *pnai = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (reduction != 1 && reduction != 2)
        return ERROR_INT("reduction not in {1,2}", procName, 1);

    if (reduction == 1) {
        pix1 = pixClone(pixs);
        maxdil = 18;
    } else {  /* reduction == 2 */
        pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
        maxdil = 9;
    }

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, NULL);

        /* Generate a pixa of the word images */
    pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL);  /* mask over each word */

        /* Sort the bounding boxes of these words by line.  We use the
         * index mapping to allow identical sorting of the pixa. */
    baa = boxaSort2d(boxa1, &naa, -1, -1, 4);
    paa = pixaSort2dByIndex(pixa1, naa, L_CLONE);

        /* Flatten the word paa */
    pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
    boxad = pixaGetBoxa(pixad, L_COPY);

    *pnai = nai;
    *pboxad = boxad;
    *ppixad = pixad;

    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    pixaaDestroy(&paa);
    numaaDestroy(&naa);
    return 0;
}
Ejemplo n.º 6
0
void k2pdfopt_reflow_bmp(KOPTContext *kctx) {
    K2PDFOPT_SETTINGS _k2settings, *k2settings;
    MASTERINFO _masterinfo, *masterinfo;
    WILLUSBITMAP _srcgrey, *srcgrey;
    WILLUSBITMAP *src, *dst;
    BMPREGION region;
    int i, bw, marbot, marleft;

    src = &kctx->src;
    srcgrey = &_srcgrey;
    bmp_init(srcgrey);

    k2settings = &_k2settings;
    masterinfo = &_masterinfo;
    /* Initialize settings */
    k2pdfopt_settings_init_from_koptcontext(k2settings, kctx);
    k2pdfopt_settings_quick_sanity_check(k2settings);
    /* Init for new source doc */
    k2pdfopt_settings_new_source_document_init(k2settings);
    /* Init master output structure */
    masterinfo_init(masterinfo, k2settings);
    wrapbmp_init(&masterinfo->wrapbmp, k2settings->dst_color);
    /* Init new source bitmap */
    bmpregion_init(&region);
    masterinfo_new_source_page_init(masterinfo, k2settings, src, srcgrey, NULL,
            &region, k2settings->src_rot, NULL, NULL, 1, -1, NULL );
    /* Set output size */
    k2pdfopt_settings_set_margins_and_devsize(k2settings,&region,masterinfo,-1.,0);
    /* Process single source page */
    bmpregion_source_page_add(&region, k2settings, masterinfo, 1, 0);
    wrapbmp_flush(masterinfo, k2settings, 0);

    if (fabs(k2settings->dst_gamma - 1.0) > .001)
        bmp_gamma_correct(&masterinfo->bmp, &masterinfo->bmp,
                k2settings->dst_gamma);

    /* copy master bitmap to context dst bitmap */
    dst = &kctx->dst;
    marbot = (int) (k2settings->dst_dpi * k2settings->dstmargins.box[1] + .5);
    marleft = (int) (k2settings->dst_dpi * k2settings->dstmargins.box[0] + .5);
    dst->bpp = masterinfo->bmp.bpp;
    dst->width = masterinfo->bmp.width;
    dst->height = masterinfo->rows > kctx->page_height ? masterinfo->rows + marbot : kctx->page_height;
    bmp_alloc(dst);
    bmp_fill(dst, 255, 255, 255);
    bw = bmp_bytewidth(&masterinfo->bmp);
    for (i = 0; i < masterinfo->rows; i++)
        memcpy(bmp_rowptr_from_top(dst, i),
                bmp_rowptr_from_top(&masterinfo->bmp, i), bw);

    kctx->page_width = kctx->dst.width;
    kctx->page_height = kctx->dst.height;
    kctx->precache = 0;

    int j;
    BOXA *rboxa = boxaCreate(masterinfo->rectmaps.n);
    BOXA *nboxa = boxaCreate(masterinfo->rectmaps.n);
    for (j = 0; j < masterinfo->rectmaps.n; j++) {
        WRECTMAP * rectmap = &masterinfo->rectmaps.wrectmap[j];
        rectmap->coords[1].x += marleft;
        BOX* rlbox = boxCreate(rectmap->coords[1].x,
                              rectmap->coords[1].y,
                              rectmap->coords[2].x,
                              rectmap->coords[2].y);
        BOX* nlbox = boxCreate(rectmap->coords[0].x*k2settings->src_dpi/rectmap->srcdpiw/kctx->zoom + kctx->bbox.x0,
                              rectmap->coords[0].y*k2settings->src_dpi/rectmap->srcdpih/kctx->zoom + kctx->bbox.y0,
                              rectmap->coords[2].x*k2settings->src_dpi/rectmap->srcdpiw/kctx->zoom,
                              rectmap->coords[2].y*k2settings->src_dpi/rectmap->srcdpih/kctx->zoom);
        boxaAddBox(rboxa, rlbox, L_INSERT);
        boxaAddBox(nboxa, nlbox, L_INSERT);
        wrectmaps_add_wrectmap(&kctx->rectmaps, rectmap);

        /*printf("rectmap:coords:\t%.1f %.1f\t%.1f %.1f\t%.1f %.1f\t%.1f %.1f\n",
                rectmap->coords[0].x, rectmap->coords[0].y,
                rectmap->coords[1].x, rectmap->coords[1].y,
                rectmap->coords[2].x, rectmap->coords[2].y,
                rectmap->srcdpiw,     rectmap->srcdpih);*/
    }
    /* 2D sort the bounding boxes of these words. */
    BOXAA *rbaa = boxaSort2d(rboxa, NULL, 3, -5, 5);
    BOXAA *nbaa = boxaSort2d(nboxa, NULL, 3, -5, 5);

    /* Flatten the boxaa, saving the boxa index for each box */
    kctx->rboxa = boxaaFlattenToBoxa(rbaa, &kctx->rnai, L_CLONE);
    kctx->nboxa = boxaaFlattenToBoxa(nbaa, &kctx->nnai, L_CLONE);

    boxaDestroy(&rboxa);
    boxaaDestroy(&rbaa);
    boxaDestroy(&nboxa);
    boxaaDestroy(&nbaa);

    bmp_free(src);
    bmp_free(srcgrey);
    bmpregion_free(&region);
    masterinfo_free(masterinfo, k2settings);
}
Ejemplo n.º 7
0
/*!
 *  pixSplitIntoCharacters()
 *
 *      Input:  pixs (1 bpp, contains only deskewed text)
 *              minw (minimum component width for initial filtering; typ. 4)
 *              minh (minimum component height for initial filtering; typ. 4)
 *              &boxa (<optional return> character bounding boxes)
 *              &pixa (<optional return> character images)
 *              &pixdebug (<optional return> showing splittings)
 *
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is a simple function that attempts to find split points
 *          based on vertical pixel profiles.
 *      (2) It should be given an image that has an arbitrary number
 *          of text characters.
 *      (3) The returned pixa includes the boxes from which the
 *          (possibly split) components are extracted.
 */
l_int32
pixSplitIntoCharacters(PIX     *pixs,
                       l_int32  minw,
                       l_int32  minh,
                       BOXA   **pboxa,
                       PIXA   **ppixa,
                       PIX    **ppixdebug)
{
l_int32  ncomp, i, xoff, yoff;
BOXA   *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
BOXAA  *baa;
PIX    *pix, *pix1, *pix2, *pixdb;
PIXA   *pixa1, *pixadb;

    PROCNAME("pixSplitIntoCharacters");

    if (pboxa) *pboxa = NULL;
    if (ppixa) *ppixa = NULL;
    if (ppixdebug) *ppixdebug = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);

        /* Remove the small stuff */
    pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
                           L_SELECT_IF_GT, NULL);

        /* Small vertical close for consolidation */
    pix2 = pixMorphSequence(pix1, "c1.10", 0);
    pixDestroy(&pix1);

        /* Get the 8-connected components */
    boxa1 = pixConnComp(pix2, &pixa1, 8);
    pixDestroy(&pix2);
    boxaDestroy(&boxa1);

        /* Split the components if obvious */
    ncomp = pixaGetCount(pixa1);
    boxa2 = boxaCreate(ncomp);
    pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
    for (i = 0; i < ncomp; i++) {
        pix = pixaGetPix(pixa1, i, L_CLONE);
        if (ppixdebug) {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
            if (pixdb)
                pixaAddPix(pixadb, pixdb, L_INSERT);
        } else {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
        }
        pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
        boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
        boxaJoin(boxa2, boxat2, 0, -1);
        pixDestroy(&pix);
        boxaDestroy(&boxat1);
        boxaDestroy(&boxat2);
    }
    pixaDestroy(&pixa1);

        /* Generate the debug image */
    if (ppixdebug) {
        if (pixaGetCount(pixadb) > 0) {
            *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
                                                1.0, 0, 20, 1);
        }
        pixaDestroy(&pixadb);
    }

        /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
    baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
    boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
    boxaaDestroy(&baa);
    boxaDestroy(&boxa2);

        /* Optionally extract the pieces from the input image */
    if (ppixa)
        *ppixa = pixClipRectangles(pixs, boxad);
    if (pboxa)
        *pboxa = boxad;
    else
        boxaDestroy(&boxad);
    return 0;
}