/*!
 * \brief   pixGetWordBoxesInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    reduction 1 for input res; 2 for 2x reduction of input res
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   pnai [optional] index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of about 300 ppi.
 *          The word masks can be computed at either 150 ppi or 300 ppi.
 *          For the former, set reduction = 2.
 *      (2) This is a special version of pixGetWordsInTextlines(), that
 *          just finds the word boxes in line order, with a numa
 *          giving the textline index for each word.
 *          See pixGetWordsInTextlines() for more details.
 * </pre>
 */
l_int32
pixGetWordBoxesInTextlines(PIX     *pixs,
                           l_int32  reduction,
                           l_int32  minwidth,
                           l_int32  minheight,
                           l_int32  maxwidth,
                           l_int32  maxheight,
                           BOXA   **pboxad,
                           NUMA   **pnai)
{
l_int32  maxdil;
BOXA    *boxa1;
BOXAA   *baa;
NUMA    *nai;
PIX     *pix1;

    PROCNAME("pixGetWordBoxesInTextlines");

    if (pnai) *pnai = NULL;
    if (!pboxad)
        return ERROR_INT("&boxad and &nai not both defined", procName, 1);
    *pboxad = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (reduction != 1 && reduction != 2)
        return ERROR_INT("reduction not in {1,2}", procName, 1);

    if (reduction == 1) {
        pix1 = pixClone(pixs);
        maxdil = 18;
    } else {  /* reduction == 2 */
        pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
        maxdil = 9;
    }

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, NULL);

        /* 2D sort the bounding boxes of these words. */
    baa = boxaSort2d(boxa1, NULL, 3, -5, 5);

        /* Flatten the boxaa, saving the boxa index for each box */
    *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);

    if (pnai)
        *pnai = nai;
    else
        numaDestroy(&nai);
    pixDestroy(&pix1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    return 0;
}
Beispiel #2
0
/*!
 * \brief   pixGetWordBoxesInTextlines()
 *
 * \param[in]    pixs 1 bpp, typ. 300 ppi
 * \param[in]    minwidth, minheight of saved components; smaller are discarded
 * \param[in]    maxwidth, maxheight of saved components; larger are discarded
 * \param[out]   pboxad word boxes sorted in textline line order
 * \param[out]   pnai [optional] index of textline for each word
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The input should be at a resolution of between 75 and 150 ppi.
 *      (2) This is a special version of pixGetWordsInTextlines(), that
 *          just finds the word boxes in line order, with a numa
 *          giving the textline index for each word.
 *          See pixGetWordsInTextlines() for more details.
 * </pre>
 */
l_int32
pixGetWordBoxesInTextlines(PIX     *pixs,
                           l_int32  minwidth,
                           l_int32  minheight,
                           l_int32  maxwidth,
                           l_int32  maxheight,
                           BOXA   **pboxad,
                           NUMA   **pnai)
{
BOXA    *boxa1;
BOXAA   *baa;
NUMA    *nai;

    PROCNAME("pixGetWordBoxesInTextlines");

    if (pnai) *pnai = NULL;
    if (!pboxad)
        return ERROR_INT("&boxad and &nai not both defined", procName, 1);
    *pboxad = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);

        /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
                           &boxa1, NULL, NULL);

        /* 2D sort the bounding boxes of these words. */
    baa = boxaSort2d(boxa1, NULL, 3, -5, 5);

        /* Flatten the boxaa, saving the boxa index for each box */
    *pboxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);

    if (pnai)
        *pnai = nai;
    else
        numaDestroy(&nai);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    return 0;
}
Beispiel #3
0
void k2pdfopt_reflow_bmp(KOPTContext *kctx) {
    K2PDFOPT_SETTINGS _k2settings, *k2settings;
    MASTERINFO _masterinfo, *masterinfo;
    WILLUSBITMAP _srcgrey, *srcgrey;
    WILLUSBITMAP *src, *dst;
    BMPREGION region;
    int i, bw, marbot, marleft;

    src = &kctx->src;
    srcgrey = &_srcgrey;
    bmp_init(srcgrey);

    k2settings = &_k2settings;
    masterinfo = &_masterinfo;
    /* Initialize settings */
    k2pdfopt_settings_init_from_koptcontext(k2settings, kctx);
    k2pdfopt_settings_quick_sanity_check(k2settings);
    /* Init for new source doc */
    k2pdfopt_settings_new_source_document_init(k2settings);
    /* Init master output structure */
    masterinfo_init(masterinfo, k2settings);
    wrapbmp_init(&masterinfo->wrapbmp, k2settings->dst_color);
    /* Init new source bitmap */
    bmpregion_init(&region);
    masterinfo_new_source_page_init(masterinfo, k2settings, src, srcgrey, NULL,
            &region, k2settings->src_rot, NULL, NULL, 1, -1, NULL );
    /* Set output size */
    k2pdfopt_settings_set_margins_and_devsize(k2settings,&region,masterinfo,-1.,0);
    /* Process single source page */
    bmpregion_source_page_add(&region, k2settings, masterinfo, 1, 0);
    wrapbmp_flush(masterinfo, k2settings, 0);

    if (fabs(k2settings->dst_gamma - 1.0) > .001)
        bmp_gamma_correct(&masterinfo->bmp, &masterinfo->bmp,
                k2settings->dst_gamma);

    /* copy master bitmap to context dst bitmap */
    dst = &kctx->dst;
    marbot = (int) (k2settings->dst_dpi * k2settings->dstmargins.box[1] + .5);
    marleft = (int) (k2settings->dst_dpi * k2settings->dstmargins.box[0] + .5);
    dst->bpp = masterinfo->bmp.bpp;
    dst->width = masterinfo->bmp.width;
    dst->height = masterinfo->rows > kctx->page_height ? masterinfo->rows + marbot : kctx->page_height;
    bmp_alloc(dst);
    bmp_fill(dst, 255, 255, 255);
    bw = bmp_bytewidth(&masterinfo->bmp);
    for (i = 0; i < masterinfo->rows; i++)
        memcpy(bmp_rowptr_from_top(dst, i),
                bmp_rowptr_from_top(&masterinfo->bmp, i), bw);

    kctx->page_width = kctx->dst.width;
    kctx->page_height = kctx->dst.height;
    kctx->precache = 0;

    int j;
    BOXA *rboxa = boxaCreate(masterinfo->rectmaps.n);
    BOXA *nboxa = boxaCreate(masterinfo->rectmaps.n);
    for (j = 0; j < masterinfo->rectmaps.n; j++) {
        WRECTMAP * rectmap = &masterinfo->rectmaps.wrectmap[j];
        rectmap->coords[1].x += marleft;
        BOX* rlbox = boxCreate(rectmap->coords[1].x,
                              rectmap->coords[1].y,
                              rectmap->coords[2].x,
                              rectmap->coords[2].y);
        BOX* nlbox = boxCreate(rectmap->coords[0].x*k2settings->src_dpi/rectmap->srcdpiw/kctx->zoom + kctx->bbox.x0,
                              rectmap->coords[0].y*k2settings->src_dpi/rectmap->srcdpih/kctx->zoom + kctx->bbox.y0,
                              rectmap->coords[2].x*k2settings->src_dpi/rectmap->srcdpiw/kctx->zoom,
                              rectmap->coords[2].y*k2settings->src_dpi/rectmap->srcdpih/kctx->zoom);
        boxaAddBox(rboxa, rlbox, L_INSERT);
        boxaAddBox(nboxa, nlbox, L_INSERT);
        wrectmaps_add_wrectmap(&kctx->rectmaps, rectmap);

        /*printf("rectmap:coords:\t%.1f %.1f\t%.1f %.1f\t%.1f %.1f\t%.1f %.1f\n",
                rectmap->coords[0].x, rectmap->coords[0].y,
                rectmap->coords[1].x, rectmap->coords[1].y,
                rectmap->coords[2].x, rectmap->coords[2].y,
                rectmap->srcdpiw,     rectmap->srcdpih);*/
    }
    /* 2D sort the bounding boxes of these words. */
    BOXAA *rbaa = boxaSort2d(rboxa, NULL, 3, -5, 5);
    BOXAA *nbaa = boxaSort2d(nboxa, NULL, 3, -5, 5);

    /* Flatten the boxaa, saving the boxa index for each box */
    kctx->rboxa = boxaaFlattenToBoxa(rbaa, &kctx->rnai, L_CLONE);
    kctx->nboxa = boxaaFlattenToBoxa(nbaa, &kctx->nnai, L_CLONE);

    boxaDestroy(&rboxa);
    boxaaDestroy(&rbaa);
    boxaDestroy(&nboxa);
    boxaaDestroy(&nbaa);

    bmp_free(src);
    bmp_free(srcgrey);
    bmpregion_free(&region);
    masterinfo_free(masterinfo, k2settings);
}
Beispiel #4
0
/*!
 *  pixSplitIntoCharacters()
 *
 *      Input:  pixs (1 bpp, contains only deskewed text)
 *              minw (minimum component width for initial filtering; typ. 4)
 *              minh (minimum component height for initial filtering; typ. 4)
 *              &boxa (<optional return> character bounding boxes)
 *              &pixa (<optional return> character images)
 *              &pixdebug (<optional return> showing splittings)
 *
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is a simple function that attempts to find split points
 *          based on vertical pixel profiles.
 *      (2) It should be given an image that has an arbitrary number
 *          of text characters.
 *      (3) The returned pixa includes the boxes from which the
 *          (possibly split) components are extracted.
 */
l_int32
pixSplitIntoCharacters(PIX     *pixs,
                       l_int32  minw,
                       l_int32  minh,
                       BOXA   **pboxa,
                       PIXA   **ppixa,
                       PIX    **ppixdebug)
{
l_int32  ncomp, i, xoff, yoff;
BOXA   *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
BOXAA  *baa;
PIX    *pix, *pix1, *pix2, *pixdb;
PIXA   *pixa1, *pixadb;

    PROCNAME("pixSplitIntoCharacters");

    if (pboxa) *pboxa = NULL;
    if (ppixa) *ppixa = NULL;
    if (ppixdebug) *ppixdebug = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);

        /* Remove the small stuff */
    pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
                           L_SELECT_IF_GT, NULL);

        /* Small vertical close for consolidation */
    pix2 = pixMorphSequence(pix1, "c1.10", 0);
    pixDestroy(&pix1);

        /* Get the 8-connected components */
    boxa1 = pixConnComp(pix2, &pixa1, 8);
    pixDestroy(&pix2);
    boxaDestroy(&boxa1);

        /* Split the components if obvious */
    ncomp = pixaGetCount(pixa1);
    boxa2 = boxaCreate(ncomp);
    pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
    for (i = 0; i < ncomp; i++) {
        pix = pixaGetPix(pixa1, i, L_CLONE);
        if (ppixdebug) {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
            if (pixdb)
                pixaAddPix(pixadb, pixdb, L_INSERT);
        } else {
            boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
        }
        pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
        boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
        boxaJoin(boxa2, boxat2, 0, -1);
        pixDestroy(&pix);
        boxaDestroy(&boxat1);
        boxaDestroy(&boxat2);
    }
    pixaDestroy(&pixa1);

        /* Generate the debug image */
    if (ppixdebug) {
        if (pixaGetCount(pixadb) > 0) {
            *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
                                                1.0, 0, 20, 1);
        }
        pixaDestroy(&pixadb);
    }

        /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
    baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
    boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
    boxaaDestroy(&baa);
    boxaDestroy(&boxa2);

        /* Optionally extract the pieces from the input image */
    if (ppixa)
        *ppixa = pixClipRectangles(pixs, boxad);
    if (pboxa)
        *pboxa = boxad;
    else
        boxaDestroy(&boxad);
    return 0;
}