C++ (Cpp) pixReduceRankBinaryCascade Beispiele

Programmiersprache: C++ (Cpp)

Methode / Funktion: pixReduceRankBinaryCascade

Beispiele auf hotexamples.com: 9

C++ (Cpp) pixReduceRankBinaryCascade - 9 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die pixReduceRankBinaryCascade, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: pageseg.c Projekt: AngusHardie/TesseractOCR-For-Mac

/*!
 *  pixGetRegionsBinary()
 *
 *      Input:  pixs (1 bpp, assumed to be 300 to 400 ppi)
 *              &pixhm (<optional return> halftone mask)
 *              &pixtm (<optional return> textline mask)
 *              &pixtb (<optional return> textblock mask)
 *              debug (flag: set to 1 for debug output)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) It is best to deskew the image before segmenting.
 *      (2) The debug flag enables a number of outputs.  These
 *          are included to show how to generate and save/display
 *          these results.
 */
l_int32
pixGetRegionsBinary(PIX     *pixs,
                    PIX    **ppixhm,
                    PIX    **ppixtm,
                    PIX    **ppixtb,
                    l_int32  debug)
{
char    *tempname;
l_int32  htfound, tlfound;
PIX     *pixr, *pixt1, *pixt2;
PIX     *pixtext;  /* text pixels only */
PIX     *pixhm2;   /* halftone mask; 2x reduction */
PIX     *pixhm;    /* halftone mask;  */
PIX     *pixtm2;   /* textline mask; 2x reduction */
PIX     *pixtm;    /* textline mask */
PIX     *pixvws;   /* vertical white space mask */
PIX     *pixtb2;   /* textblock mask; 2x reduction */
PIX     *pixtbf2;  /* textblock mask; 2x reduction; small comps filtered */
PIX     *pixtb;    /* textblock mask */

    PROCNAME("pixGetRegionsBinary");

    if (ppixhm) *ppixhm = NULL;
    if (ppixtm) *ppixtm = NULL;
    if (ppixtb) *ppixtb = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not 1 bpp", procName, 1);

        /* 2x reduce, to 150 -200 ppi */
    pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
    pixDisplayWrite(pixr, debug);

        /* Get the halftone mask */
    pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);

        /* Get the textline mask from the text pixels */
    pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);

        /* Get the textblock mask from the textline mask */
    pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
    pixDestroy(&pixr);
    pixDestroy(&pixtext);
    pixDestroy(&pixvws);

        /* Remove small components from the mask, where a small
         * component is defined as one with both width and height < 60 */
    pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
                              L_SELECT_IF_GTE, NULL);
    pixDestroy(&pixtb2);
    pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG);

        /* Expand all masks to full resolution, and do filling or
         * small dilations for better coverage. */
    pixhm = pixExpandReplicate(pixhm2, 2);
    pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
    pixOr(pixhm, pixhm, pixt1);
    pixDestroy(&pixt1);
    pixDisplayWriteFormat(pixhm, debug, IFF_PNG);

    pixt1 = pixExpandReplicate(pixtm2, 2);
    pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
    pixDestroy(&pixt1);
    pixDisplayWriteFormat(pixtm, debug, IFF_PNG);

    pixt1 = pixExpandReplicate(pixtbf2, 2);
    pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
    pixDestroy(&pixt1);
    pixDisplayWriteFormat(pixtb, debug, IFF_PNG);

    pixDestroy(&pixhm2);
    pixDestroy(&pixtm2);
    pixDestroy(&pixtbf2);

        /* Debug: identify objects that are neither text nor halftone image */
    if (debug) {
        pixt1 = pixSubtract(NULL, pixs, pixtm);  /* remove text pixels */
        pixt2 = pixSubtract(NULL, pixt1, pixhm);  /* remove halftone pixels */
        pixDisplayWriteFormat(pixt2, 1, IFF_PNG);
        pixDestroy(&pixt1);
        pixDestroy(&pixt2);
    }

        /* Debug: display textline components with random colors */
    if (debug) {
        l_int32  w, h;
        BOXA    *boxa;
        PIXA    *pixa;
        boxa = pixConnComp(pixtm, &pixa, 8);
        pixGetDimensions(pixtm, &w, &h, NULL);
        pixt1 = pixaDisplayRandomCmap(pixa, w, h);
        pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
        pixDisplay(pixt1, 100, 100);
        pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
        pixaDestroy(&pixa);
        boxaDestroy(&boxa);
        pixDestroy(&pixt1);
    }

        /* Debug: identify the outlines of each textblock */
    if (debug) {
        PIXCMAP  *cmap;
        PTAA     *ptaa;
        ptaa = pixGetOuterBordersPtaa(pixtb);
        tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0);
	ptaaWrite(tempname, ptaa, 1);
        FREE(tempname);
        pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
        cmap = pixGetColormap(pixt1);
        pixcmapResetColor(cmap, 0, 130, 130, 130);
        pixDisplay(pixt1, 500, 100);
        pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
        pixDestroy(&pixt1);
        ptaaDestroy(&ptaa);
    }

        /* Debug: get b.b. for all mask components */
    if (debug) {
        BOXA  *bahm, *batm, *batb;
        bahm = pixConnComp(pixhm, NULL, 4);
        batm = pixConnComp(pixtm, NULL, 4);
        batb = pixConnComp(pixtb, NULL, 4);
        tempname = genTempFilename("/tmp", "htmask.boxa", 0);
        boxaWrite(tempname, bahm);
        FREE(tempname);
        tempname = genTempFilename("/tmp", "textmask.boxa", 0);
        boxaWrite(tempname, batm);
        FREE(tempname);
        tempname = genTempFilename("/tmp", "textblock.boxa", 0);
        boxaWrite(tempname, batb);
        FREE(tempname);
	boxaDestroy(&bahm);
	boxaDestroy(&batm);
	boxaDestroy(&batb);
    }

    if (ppixhm)
        *ppixhm = pixhm;
    else
        pixDestroy(&pixhm);
    if (ppixtm)
        *ppixtm = pixtm;
    else
        pixDestroy(&pixtm);
    if (ppixtb)
        *ppixtb = pixtb;
    else
        pixDestroy(&pixtb);

    return 0;
}

Beispiel #2

Datei anzeigen

Datei: morphseq.c Projekt: ErfanHasmin/scope-ocr

/*!
 *  pixMorphCompSequence()
 *
 *      Input:  pixs
 *              sequence (string specifying sequence)
 *              dispsep (controls debug display of each result in the sequence:
 *                       0: no output
 *                       > 0: gives horizontal separation in pixels between
 *                            successive displays
 *                       < 0: pdf output; abs(dispsep) is used for naming)
 *      Return: pixd, or null on error
 *
 *  Notes:
 *      (1) This does rasterop morphology on binary images, using composite
 *          operations for extra speed on large Sels.
 *      (2) Safe closing is used atomically.  However, if you implement a
 *          closing as a sequence with a dilation followed by an
 *          erosion, it will not be safe, and to ensure that you have
 *          no boundary effects you must add a border in advance and
 *          remove it at the end.
 *      (3) For other usage details, see the notes for pixMorphSequence().
 *      (4) The sequence string is formatted as follows:
 *            - An arbitrary number of operations,  each separated
 *              by a '+' character.  White space is ignored.
 *            - Each operation begins with a case-independent character
 *              specifying the operation:
 *                 d or D  (dilation)
 *                 e or E  (erosion)
 *                 o or O  (opening)
 *                 c or C  (closing)
 *                 r or R  (rank binary reduction)
 *                 x or X  (replicative binary expansion)
 *                 b or B  (add a border of 0 pixels of this size)
 *            - The args to the morphological operations are bricks of hits,
 *              and are formatted as a.b, where a and b are horizontal and
 *              vertical dimensions, rsp.
 *            - The args to the reduction are a sequence of up to 4 integers,
 *              each from 1 to 4.
 *            - The arg to the expansion is a power of two, in the set
 *              {2, 4, 8, 16}.
 */
PIX *
pixMorphCompSequence(PIX         *pixs,
                     const char  *sequence,
                     l_int32      dispsep)
{
char    *rawop, *op, *fname;
char     buf[256];
l_int32  nops, i, j, nred, fact, w, h, x, y, border, pdfout;
l_int32  level[4];
PIX     *pixt1, *pixt2;
PIXA    *pixa;
SARRAY  *sa;

    PROCNAME("pixMorphCompSequence");

    if (!pixs)
        return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
    if (!sequence)
        return (PIX *)ERROR_PTR("sequence not defined", procName, NULL);

        /* Split sequence into individual operations */
    sa = sarrayCreate(0);
    sarraySplitString(sa, sequence, "+");
    nops = sarrayGetCount(sa);
    pdfout = (dispsep < 0) ? 1 : 0;

    if (!morphSequenceVerify(sa)) {
        sarrayDestroy(&sa);
        return (PIX *)ERROR_PTR("sequence not valid", procName, NULL);
    }

        /* Parse and operate */
    pixa = NULL;
    if (pdfout) {
        pixa = pixaCreate(0);
        pixaAddPix(pixa, pixs, L_CLONE);
        snprintf(buf, sizeof(buf), "/tmp/seq_output_%d.pdf", L_ABS(dispsep));
        fname = genPathname(buf, NULL);
    }
    border = 0;
    pixt1 = pixCopy(NULL, pixs);
    pixt2 = NULL;
    x = y = 0;
    for (i = 0; i < nops; i++) {
        rawop = sarrayGetString(sa, i, 0);
        op = stringRemoveChars(rawop, " \n\t");
        switch (op[0])
        {
        case 'd':
        case 'D':
            sscanf(&op[1], "%d.%d", &w, &h);
            pixt2 = pixDilateCompBrick(NULL, pixt1, w, h);
            pixSwapAndDestroy(&pixt1, &pixt2);
            break;
        case 'e':
        case 'E':
            sscanf(&op[1], "%d.%d", &w, &h);
            pixt2 = pixErodeCompBrick(NULL, pixt1, w, h);
            pixSwapAndDestroy(&pixt1, &pixt2);
            break;
        case 'o':
        case 'O':
            sscanf(&op[1], "%d.%d", &w, &h);
            pixOpenCompBrick(pixt1, pixt1, w, h);
            break;
        case 'c':
        case 'C':
            sscanf(&op[1], "%d.%d", &w, &h);
            pixCloseSafeCompBrick(pixt1, pixt1, w, h);
            break;
        case 'r':
        case 'R':
            nred = strlen(op) - 1;
            for (j = 0; j < nred; j++)
                level[j] = op[j + 1] - '0';
            for (j = nred; j < 4; j++)
                level[j] = 0;
            pixt2 = pixReduceRankBinaryCascade(pixt1, level[0], level[1],
                                               level[2], level[3]);
            pixSwapAndDestroy(&pixt1, &pixt2);
            break;
        case 'x':
        case 'X':
            sscanf(&op[1], "%d", &fact);
            pixt2 = pixExpandReplicate(pixt1, fact);
            pixSwapAndDestroy(&pixt1, &pixt2);
            break;
        case 'b':
        case 'B':
            sscanf(&op[1], "%d", &border);
            pixt2 = pixAddBorder(pixt1, border, 0);
            pixSwapAndDestroy(&pixt1, &pixt2);
            break;
        default:
            /* All invalid ops are caught in the first pass */
            break;
        }
        FREE(op);

            /* Debug output */
        if (dispsep > 0) {
            pixDisplay(pixt1, x, y);
            x += dispsep;
        }
        if (pdfout)
            pixaAddPix(pixa, pixt1, L_COPY);
    }
    if (border > 0) {
        pixt2 = pixRemoveBorder(pixt1, border);
        pixSwapAndDestroy(&pixt1, &pixt2);
    }

    if (pdfout) {
        pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname);
        FREE(fname);
        pixaDestroy(&pixa);
    }

    sarrayDestroy(&sa);
    return pixt1;
}

Beispiel #3

Datei anzeigen

Datei: flipdetect.c Projekt: creatale/node-dv

/*!
 * \brief   pixUpDownDetectGeneralDwa()
 *
 * \param[in]    pixs 1 bpp, deskewed, English text
 * \param[out]   pconf confidence that text is rightside-up
 * \param[in]    mincount min number of up + down; use 0 for default
 * \param[in]    npixels number of pixels removed from each side of word box
 * \param[in]    debug 1 for debug output; 0 otherwise
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) See the notes in pixUpDownDetectGeneral() for usage.
 * </pre>
 */
l_int32
pixUpDownDetectGeneralDwa(PIX        *pixs,
                          l_float32  *pconf,
                          l_int32     mincount,
                          l_int32     npixels,
                          l_int32     debug)
{
char       flipsel1[] = "flipsel1";
char       flipsel2[] = "flipsel2";
char       flipsel3[] = "flipsel3";
char       flipsel4[] = "flipsel4";
l_int32    countup, countdown, nmax;
l_float32  nup, ndown;
PIX       *pixt, *pix0, *pix1, *pix2, *pix3, *pixm;

    PROCNAME("pixUpDownDetectGeneralDwa");

    if (!pconf)
        return ERROR_INT("&conf not defined", procName, 1);
    *pconf = 0.0;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
    if (mincount == 0)
        mincount = DEFAULT_MIN_UP_DOWN_COUNT;
    if (npixels < 0)
        npixels = 0;

    lept_mkdir("lept/orient");

        /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
         * This closes holes in x-height characters and joins them at
         * the x-height.  There is more noise in the descender detection
         * from this, but it works fairly well. */
    pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0);

        /* Be sure to add the border before the flip DWA operations! */
    pix0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER,
                                ADDED_BORDER, ADDED_BORDER, 0);
    pixDestroy(&pixt);

        /* Optionally, make a mask of the word bounding boxes, shortening
         * each of them by a fixed amount at each end. */
    pixm = NULL;
    if (npixels > 0) {
        l_int32  i, nbox, x, y, w, h;
        BOX   *box;
        BOXA  *boxa;
        pix1 = pixMorphSequenceDwa(pix0, "o10.1", 0);
        boxa = pixConnComp(pix1, NULL, 8);
        pixm = pixCreateTemplate(pix1);
        pixDestroy(&pix1);
        nbox = boxaGetCount(boxa);
        for (i = 0; i < nbox; i++) {
            box = boxaGetBox(boxa, i, L_CLONE);
            boxGetGeometry(box, &x, &y, &w, &h);
            if (w > 2 * npixels)
                pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
                            PIX_SET, NULL, 0, 0);
            boxDestroy(&box);
        }
        boxaDestroy(&boxa);
    }

        /* Find the ascenders and optionally filter with pixm.
         * For an explanation of the procedure used for counting the result
         * of the HMT, see comments in pixUpDownDetectGeneral().  */
    pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1);
    pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2);
    pixOr(pix1, pix1, pix2);
    if (pixm)
        pixAnd(pix1, pix1, pixm);
    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
    pixCountPixels(pix3, &countup, NULL);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* Find the ascenders and optionally filter with pixm. */
    pix1 = pixFlipFHMTGen(NULL, pix0, flipsel3);
    pix2 = pixFlipFHMTGen(NULL, pix0, flipsel4);
    pixOr(pix1, pix1, pix2);
    if (pixm)
        pixAnd(pix1, pix1, pixm);
    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
    pixCountPixels(pix3, &countdown, NULL);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* Evaluate statistically, generating a confidence that is
         * related to the probability with a gaussian distribution. */
    nup = (l_float32)(countup);
    ndown = (l_float32)(countdown);
    nmax = L_MAX(countup, countdown);
    if (nmax > mincount)
        *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));

    if (debug) {
        if (pixm) pixWriteDebug("/tmp/lept/orient/pixm2.png", pixm, IFF_PNG);
        fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
                nup, ndown, *pconf);
        if (*pconf > DEFAULT_MIN_UP_DOWN_CONF)
            fprintf(stderr, "Text is rightside-up\n");
        if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF)
            fprintf(stderr, "Text is upside-down\n");
    }

    pixDestroy(&pix0);
    pixDestroy(&pixm);
    return 0;
}

Beispiel #4

Datei anzeigen

Datei: flipdetect.c Projekt: creatale/node-dv

/*!
 * \brief   pixMirrorDetect()
 *
 * \param[in]    pixs 1 bpp, deskewed, English text
 * \param[out]   pconf confidence that text is not LR mirror reversed
 * \param[in]    mincount min number of left + right; use 0 for default
 * \param[in]    debug 1 for debug output; 0 otherwise
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) For this test, it is necessary that the text is horizontally
 *          oriented, with ascenders going up.
 *      (2) conf is the normalized difference between the number of
 *          right and left facing characters with ascenders.
 *          Left-facing are {d}; right-facing are {b, h, k}.
 *          At least that was the expectation.  In practice, we can
 *          really just say that it is the normalized difference in
 *          hits using two specific hit-miss filters, textsel1 and textsel2,
 *          after the image has been suitably pre-filtered so that
 *          these filters are effective.  See (4) for what's really happening.
 *      (3) A large positive conf value indicates normal text, whereas
 *          a large negative conf value means the page is mirror reversed.
 *      (4) The implementation is a bit tricky.  The general idea is
 *          to fill the x-height part of characters, but not the space
 *          between them, before doing the HMT.  This is done by
 *          finding pixels added using two different operations -- a
 *          horizontal close and a vertical dilation -- and adding
 *          the intersection of these sets to the original.  It turns
 *          out that the original intuition about the signal was largely
 *          in error: much of the signal for right-facing characters
 *          comes from the lower part of common x-height characters, like
 *          the e and c, that remain open after these operations.
 *          So it's important that the operations to close the x-height
 *          parts of the characters are purposely weakened sufficiently
 *          to allow these characters to remain open.  The wonders
 *          of morphology!
 * </pre>
 */
l_int32
pixMirrorDetect(PIX        *pixs,
                l_float32  *pconf,
                l_int32     mincount,
                l_int32     debug)
{
l_int32    count1, count2, nmax;
l_float32  nleft, nright;
PIX       *pix0, *pix1, *pix2, *pix3;
SEL       *sel1, *sel2;

    PROCNAME("pixMirrorDetect");

    if (!pconf)
        return ERROR_INT("&conf not defined", procName, 1);
    *pconf = 0.0;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
    if (mincount == 0)
        mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT;

    sel1 = selCreateFromString(textsel1, 5, 6, NULL);
    sel2 = selCreateFromString(textsel2, 5, 6, NULL);

        /* Fill x-height characters but not space between them, sort of. */
    pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
    pixXor(pix3, pix3, pixs);
    pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
    pixXor(pix0, pix0, pixs);
    pixAnd(pix0, pix0, pix3);
    pixOr(pix0, pix0, pixs);
    pixDestroy(&pix3);

        /* Filter the right-facing characters. */
    pix1 = pixHMT(NULL, pix0, sel1);
    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
    pixCountPixels(pix3, &count1, NULL);
    pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
    pixDestroy(&pix1);
    pixDestroy(&pix3);

        /* Filter the left-facing characters. */
    pix2 = pixHMT(NULL, pix0, sel2);
    pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
    pixCountPixels(pix3, &count2, NULL);
    pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

    nright = (l_float32)count1;
    nleft = (l_float32)count2;
    nmax = L_MAX(count1, count2);
    pixDestroy(&pix0);
    selDestroy(&sel1);
    selDestroy(&sel2);

    if (nmax > mincount)
        *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));

    if (debug) {
        fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft);
        if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF)
            fprintf(stderr, "Text is not mirror reversed\n");
        if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF)
            fprintf(stderr, "Text is mirror reversed\n");
    }

    return 0;
}

Beispiel #5

Datei anzeigen

Datei: flipdetect.c Projekt: creatale/node-dv

/*!
 * \brief   pixMirrorDetectDwa()
 *
 * \param[in]    pixs 1 bpp, deskewed, English text
 * \param[out]   pconf confidence that text is not LR mirror reversed
 * \param[in]    mincount min number of left + right; use 0 for default
 * \param[in]    debug 1 for debug output; 0 otherwise
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) We assume the text is horizontally oriented, with
 *          ascenders going up.
 *      (2) See notes in pixMirrorDetect().
 * </pre>
 */
l_int32
pixMirrorDetectDwa(PIX        *pixs,
                   l_float32  *pconf,
                   l_int32     mincount,
                   l_int32     debug)
{
char       flipsel1[] = "flipsel1";
char       flipsel2[] = "flipsel2";
l_int32    count1, count2, nmax;
l_float32  nleft, nright;
PIX       *pix0, *pix1, *pix2, *pix3;

    PROCNAME("pixMirrorDetectDwa");

    if (!pconf)
        return ERROR_INT("&conf not defined", procName, 1);
    *pconf = 0.0;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
    if (mincount == 0)
        mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT;

        /* Fill x-height characters but not space between them, sort of. */
    pix3 = pixMorphSequenceDwa(pixs, "d1.30", 0);
    pixXor(pix3, pix3, pixs);
    pix0 = pixMorphSequenceDwa(pixs, "c15.1", 0);
    pixXor(pix0, pix0, pixs);
    pixAnd(pix0, pix0, pix3);
    pixOr(pix3, pix0, pixs);
    pixDestroy(&pix0);
    pix0 = pixAddBorderGeneral(pix3, ADDED_BORDER, ADDED_BORDER,
                                ADDED_BORDER, ADDED_BORDER, 0);
    pixDestroy(&pix3);

        /* Filter the right-facing characters. */
    pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1);
    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
    pixCountPixels(pix3, &count1, NULL);
    pixDestroy(&pix1);
    pixDestroy(&pix3);

        /* Filter the left-facing characters. */
    pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2);
    pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
    pixCountPixels(pix3, &count2, NULL);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

    pixDestroy(&pix0);
    nright = (l_float32)count1;
    nleft = (l_float32)count2;
    nmax = L_MAX(count1, count2);

    if (nmax > mincount)
        *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));

    if (debug) {
        fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft);
        if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF)
            fprintf(stderr, "Text is not mirror reversed\n");
        if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF)
            fprintf(stderr, "Text is mirror reversed\n");
    }

    return 0;
}

Beispiel #6

Datei anzeigen

Datei: classapp.c Projekt: mehulsbhatt/MyOCRTEST

/*!
 *  pixGetWordsInTextlines()
 *
 *      Input:  pixs (1 bpp, typ. 300 ppi)
 *              reduction (1 for input res; 2 for 2x reduction of input res)
 *              minwidth, minheight (of saved components; smaller are discarded)
 *              maxwidth, maxheight (of saved components; larger are discarded)
 *              &boxad (<return> word boxes sorted in textline line order)
 *              &pixad (<return> word images sorted in textline line order)
 *              &naindex (<return> index of textline for each word)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The input should be at a resolution of about 300 ppi.
 *          The word masks and word images can be computed at either
 *          150 ppi or 300 ppi.  For the former, set reduction = 2.
 *      (2) The four size constraints on saved components are all
 *          scaled by @reduction.
 *      (3) The result are word images (and their b.b.), extracted in
 *          textline order, at either full res or 2x reduction,
 *          and with a numa giving the textline index for each word.
 *      (4) The pixa and boxa interfaces should make this type of
 *          application simple to put together.  The steps are:
 *           - optionally reduce by 2x
 *           - generate first estimate of word masks
 *           - get b.b. of these, and remove the small and big ones
 *           - extract pixa of the word images, using the b.b.
 *           - sort actual word images in textline order (2d)
 *           - flatten them to a pixa (1d), saving the textline index
 *             for each pix
 *      (5) In an actual application, it may be desirable to pre-filter
 *          the input image to remove large components, to extract
 *          single columns of text, and to deskew them.  For example,
 *          to remove both large components and small noisy components
 *          that can interfere with the statistics used to estimate
 *          parameters for segmenting by words, but still retain text lines,
 *          the following image preprocessing can be done:
 *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
 *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
 *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
 *                pixAnd(pixf, pixf, pixs);  // the filtered image
 *          The closing turns text lines into long blobs, but does not
 *          significantly increase their height.  But if there are many
 *          small connected components in a dense texture, this is likely
 *          to generate tall components that will be eliminated in pixf.
 */
l_int32
pixGetWordsInTextlines(PIX *pixs,
                       l_int32 reduction,
                       l_int32 minwidth,
                       l_int32 minheight,
                       l_int32 maxwidth,
                       l_int32 maxheight,
                       BOXA **pboxad,
                       PIXA **ppixad,
                       NUMA **pnai) {
    l_int32 maxdil;
    BOXA *boxa1, *boxad;
    BOXAA *baa;
    NUMA *nai;
    NUMAA *naa;
    PIXA *pixa1, *pixad;
    PIX *pix1;
    PIXAA *paa;

    PROCNAME("pixGetWordsInTextlines");

    if (!pboxad || !ppixad || !pnai)
        return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
    *pboxad = NULL;
    *ppixad = NULL;
    *pnai = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (reduction != 1 && reduction != 2)
        return ERROR_INT("reduction not in {1,2}", procName, 1);

    if (reduction == 1) {
        pix1 = pixClone(pixs);
        maxdil = 18;
    } else {  /* reduction == 2 */
        pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
        maxdil = 9;
    }

    /* Get the bounding boxes of the words from the word mask. */
    pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight,
                           maxwidth, maxheight, &boxa1, NULL);

    /* Generate a pixa of the word images */
    pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL);  /* mask over each word */

    /* Sort the bounding boxes of these words by line.  We use the
     * index mapping to allow identical sorting of the pixa. */
    baa = boxaSort2d(boxa1, &naa, -1, -1, 4);
    paa = pixaSort2dByIndex(pixa1, naa, L_CLONE);

    /* Flatten the word paa */
    pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
    boxad = pixaGetBoxa(pixad, L_COPY);

    *pnai = nai;
    *pboxad = boxad;
    *ppixad = pixad;

    pixDestroy(&pix1);
    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    boxaaDestroy(&baa);
    pixaaDestroy(&paa);
    numaaDestroy(&naa);
    return 0;
}

Beispiel #7

Datei anzeigen

Datei: livre_pageseg.c Projekt: DanBloomberg/leptonica

l_int32
DoPageSegmentation(PIX     *pixs,   /* should be at least 300 ppi */
                   l_int32  which)  /* 1, 2, 3, 4 */
{
char         buf[256];
l_int32      zero;
BOXA        *boxatm, *boxahm;
PIX         *pixr;   /* image reduced to 150 ppi */
PIX         *pixhs;  /* image of halftone seed, 150 ppi */
PIX         *pixm;   /* image of mask of components, 150 ppi */
PIX         *pixhm1; /* image of halftone mask, 150 ppi */
PIX         *pixhm2; /* image of halftone mask, 300 ppi */
PIX         *pixht;  /* image of halftone components, 150 ppi */
PIX         *pixnht; /* image without halftone components, 150 ppi */
PIX         *pixi;   /* inverted image, 150 ppi */
PIX         *pixvws; /* image of vertical whitespace, 150 ppi */
PIX         *pixtm1; /* image of closed textlines, 150 ppi */
PIX         *pixtm2; /* image of refined text line mask, 150 ppi */
PIX         *pixtm3; /* image of refined text line mask, 300 ppi */
PIX         *pixtb1; /* image of text block mask, 150 ppi */
PIX         *pixtb2; /* image of text block mask, 300 ppi */
PIX         *pixnon; /* image of non-text or halftone, 150 ppi */
PIX         *pixt1, *pixt2, *pixt3;
PIXA        *pixa;
PIXCMAP     *cmap;
PTAA        *ptaa;
l_int32      ht_flag = 0;
l_int32      ws_flag = 0;
l_int32      text_flag = 0;
l_int32      block_flag = 0;

    PROCNAME("DoPageSegmentation");

    if (which == 1)
        ht_flag = 1;
    else if (which == 2)
        ws_flag = 1;
    else if (which == 3)
        text_flag = 1;
    else if (which == 4)
        block_flag = 1;
    else
        return ERROR_INT("invalid parameter: not in [1...4]", procName, 1);
    pixDisplayWrite(NULL, -1);

        /* Reduce to 150 ppi */
    pixt1 = pixScaleToGray2(pixs);
    pixDisplayWriteFormat(pixt1, L_MAX(ws_flag, L_MAX(ht_flag, block_flag)),
                          IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/orig.gray.150.png", pixt1, IFF_PNG);
    pixDestroy(&pixt1);
    pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);

        /* Get seed for halftone parts */
    pixt1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0);
    pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
    pixhs = pixExpandBinaryPower2(pixt2, 8);
    pixDisplayWriteFormat(pixhs, ht_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/htseed.150.png", pixhs, IFF_PNG);
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Get mask for connected regions */
    pixm = pixCloseSafeBrick(NULL, pixr, 4, 4);
    pixDisplayWriteFormat(pixm, ht_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/ccmask.150.png", pixm, IFF_PNG);

        /* Fill seed into mask to get halftone mask */
    pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4);
    pixDisplayWriteFormat(pixhm1, ht_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/htmask.150.png", pixhm1, IFF_PNG);
    pixhm2 = pixExpandBinaryPower2(pixhm1, 2);

        /* Extract halftone stuff */
    pixht = pixAnd(NULL, pixhm1, pixr);
    if (which == 1) pixWrite("/tmp/lept/ht.150.png", pixht, IFF_PNG);

        /* Extract non-halftone stuff */
    pixnht = pixXor(NULL, pixht, pixr);
    pixDisplayWriteFormat(pixnht, text_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/text.150.png", pixnht, IFF_PNG);
    pixZero(pixht, &zero);
    if (zero)
        fprintf(stderr, "No halftone parts found\n");
    else
        fprintf(stderr, "Halftone parts found\n");

        /* Get bit-inverted image */
    pixi = pixInvert(NULL, pixnht);
    if (which == 1) pixWrite("/tmp/lept/invert.150.png", pixi, IFF_PNG);
    pixDisplayWriteFormat(pixi, ws_flag, IFF_PNG);

        /* The whitespace mask will break textlines where there
         * is a large amount of white space below or above.
         * We can prevent this by identifying regions of the
         * inverted image that have large horizontal (bigger than
         * the separation between columns) and significant
         * vertical extent (bigger than the separation between
         * textlines), and subtracting this from the whitespace mask. */
    pixt1 = pixMorphCompSequence(pixi, "o80.60", 0);
    pixt2 = pixSubtract(NULL, pixi, pixt1);
    pixDisplayWriteFormat(pixt2, ws_flag, IFF_PNG);
    pixDestroy(&pixt1);

        /* Identify vertical whitespace by opening inverted image */
    pixt3 = pixOpenBrick(NULL, pixt2, 5, 1);  /* removes thin vertical lines */
    pixvws = pixOpenBrick(NULL, pixt3, 1, 200);  /* gets long vertical lines */
    pixDisplayWriteFormat(pixvws, L_MAX(text_flag, ws_flag), IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/vertws.150.png", pixvws, IFF_PNG);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);

        /* Get proto (early processed) text line mask. */
        /* First close the characters and words in the textlines */
    pixtm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1);
    pixDisplayWriteFormat(pixtm1, text_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/textmask1.150.png", pixtm1, IFF_PNG);

        /* Next open back up the vertical whitespace corridors */
    pixtm2 = pixSubtract(NULL, pixtm1, pixvws);
    if (which == 1) pixWrite("/tmp/lept/textmask2.150.png", pixtm2, IFF_PNG);

        /* Do a small opening to remove noise */
    pixOpenBrick(pixtm2, pixtm2, 3, 3);
    pixDisplayWriteFormat(pixtm2, text_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/textmask3.150.png", pixtm2, IFF_PNG);
    pixtm3 = pixExpandBinaryPower2(pixtm2, 2);

        /* Join pixels vertically to make text block mask */
    pixtb1 = pixMorphSequence(pixtm2, "c1.10 + o4.1", 0);
    pixDisplayWriteFormat(pixtb1, block_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/textblock1.150.png", pixtb1, IFF_PNG);

        /* Solidify the textblock mask and remove noise:
         *  (1) For each c.c., close the blocks and dilate slightly
         *      to form a solid mask.
         *  (2) Small horizontal closing between components
         *  (3) Open the white space between columns, again
         *  (4) Remove small components */
    pixt1 = pixMorphSequenceByComponent(pixtb1, "c30.30 + d3.3", 8, 0, 0, NULL);
    pixCloseSafeBrick(pixt1, pixt1, 10, 1);
    pixDisplayWriteFormat(pixt1, block_flag, IFF_PNG);
    pixt2 = pixSubtract(NULL, pixt1, pixvws);
    pixt3 = pixSelectBySize(pixt2, 25, 5, 8, L_SELECT_IF_BOTH,
                            L_SELECT_IF_GTE, NULL);
    pixDisplayWriteFormat(pixt3, block_flag, IFF_PNG);
    if (which == 1) pixWrite("/tmp/lept/textblock2.150.png", pixt3, IFF_PNG);
    pixtb2 = pixExpandBinaryPower2(pixt3, 2);
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);

        /* Identify the outlines of each textblock */
    ptaa = pixGetOuterBordersPtaa(pixtb2);
    pixt1 = pixRenderRandomCmapPtaa(pixtb2, ptaa, 1, 8, 1);
    cmap = pixGetColormap(pixt1);
    pixcmapResetColor(cmap, 0, 130, 130, 130);  /* set interior to gray */
    if (which == 1) pixWrite("/tmp/lept/textblock3.300.png", pixt1, IFF_PNG);
    pixDisplayWithTitle(pixt1, 480, 360, "textblock mask with outlines", DFLAG);
    ptaaDestroy(&ptaa);
    pixDestroy(&pixt1);

        /* Fill line mask (as seed) into the original */
    pixt1 = pixSeedfillBinary(NULL, pixtm3, pixs, 8);
    pixOr(pixtm3, pixtm3, pixt1);
    pixDestroy(&pixt1);
    if (which == 1) pixWrite("/tmp/lept/textmask.300.png", pixtm3, IFF_PNG);
    pixDisplayWithTitle(pixtm3, 480, 360, "textline mask 4", DFLAG);

        /* Fill halftone mask (as seed) into the original */
    pixt1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8);
    pixOr(pixhm2, pixhm2, pixt1);
    pixDestroy(&pixt1);
    if (which == 1) pixWrite("/tmp/lept/htmask.300.png", pixhm2, IFF_PNG);
    pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG);

        /* Find objects that are neither text nor halftones */
    pixt1 = pixSubtract(NULL, pixs, pixtm3);  /* remove text pixels */
    pixnon = pixSubtract(NULL, pixt1, pixhm2);  /* remove halftone pixels */
    if (which == 1) pixWrite("/tmp/lept/other.300.png", pixnon, IFF_PNG);
    pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG);
    pixDestroy(&pixt1);

        /* Write out b.b. for text line mask and halftone mask components */
    boxatm = pixConnComp(pixtm3, NULL, 4);
    boxahm = pixConnComp(pixhm2, NULL, 8);
    if (which == 1) boxaWrite("/tmp/lept/textmask.boxa", boxatm);
    if (which == 1) boxaWrite("/tmp/lept/htmask.boxa", boxahm);

    pixa = pixaReadFiles("/tmp/lept/display", "file");
    pixt1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2);
    snprintf(buf, sizeof(buf), "/tmp/lept/segout.%d.png", which);
    pixWrite(buf, pixt1, IFF_PNG);
    pixDestroy(&pixt1);
    pixaDestroy(&pixa);

        /* clean up to test with valgrind */
    pixDestroy(&pixr);
    pixDestroy(&pixhs);
    pixDestroy(&pixm);
    pixDestroy(&pixhm1);
    pixDestroy(&pixhm2);
    pixDestroy(&pixht);
    pixDestroy(&pixnht);
    pixDestroy(&pixi);
    pixDestroy(&pixvws);
    pixDestroy(&pixtm1);
    pixDestroy(&pixtm2);
    pixDestroy(&pixtm3);
    pixDestroy(&pixtb1);
    pixDestroy(&pixtb2);
    pixDestroy(&pixnon);
    boxaDestroy(&boxatm);
    boxaDestroy(&boxahm);
    return 0;
}

Beispiel #8

Datei anzeigen

Datei: skew.c Projekt: ConfusedReality/pkg_images_leptonica

/*!
 * \brief   pixFindSkewSweepAndSearchScorePivot()
 *
 * \param[in]    pixs  1 bpp
 * \param[out]   pangle   angle required to deskew; in degrees
 * \param[out]   pconf    confidence given by ratio of max/min score
 * \param[out]   pendscore [optional] max score; use NULL to ignore
 * \param[in]    redsweep  sweep reduction factor = 1, 2, 4 or 8
 * \param[in]    redsearch  binary search reduction factor = 1, 2, 4 or 8;
 *                          and must not exceed redsweep
 * \param[in]    sweepcenter  angle about which sweep is performed; in degrees
 * \param[in]    sweeprange   half the full range, taken about sweepcenter;
 *                            in degrees
 * \param[in]    sweepdelta   angle increment of sweep; in degrees
 * \param[in]    minbsdelta   min binary search increment angle; in degrees
 * \param[in]    pivot  L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER
 * \return  0 if OK, 1 on error or if angle measurment not valid
 *
 * <pre>
 * Notes:
 *      (1) See notes in pixFindSkewSweepAndSearchScore().
 *      (2) This allows choice of shear pivoting from either the UL corner
 *          or the center.  For small angles, the ability to discriminate
 *          angles is better with shearing from the UL corner.  However,
 *          for large angles (say, greater than 20 degrees), it is better
 *          to shear about the center because a shear from the UL corner
 *          loses too much of the image.
 * </pre>
 */
l_int32
pixFindSkewSweepAndSearchScorePivot(PIX        *pixs,
                                    l_float32  *pangle,
                                    l_float32  *pconf,
                                    l_float32  *pendscore,
                                    l_int32     redsweep,
                                    l_int32     redsearch,
                                    l_float32   sweepcenter,
                                    l_float32   sweeprange,
                                    l_float32   sweepdelta,
                                    l_float32   minbsdelta,
                                    l_int32     pivot)
{
l_int32    ret, bzero, i, nangles, n, ratio, maxindex, minloc;
l_int32    width, height;
l_float32  deg2rad, theta, delta;
l_float32  sum, maxscore, maxangle;
l_float32  centerangle, leftcenterangle, rightcenterangle;
l_float32  lefttemp, righttemp;
l_float32  bsearchscore[5];
l_float32  minscore, minthresh;
l_float32  rangeleft;
NUMA      *natheta, *nascore;
PIX       *pixsw, *pixsch, *pixt1, *pixt2;

    PROCNAME("pixFindSkewSweepAndSearchScorePivot");

    if (pendscore) *pendscore = 0.0;
    if (pangle) *pangle = 0.0;
    if (pconf) *pconf = 0.0;
    if (!pangle || !pconf)
        return ERROR_INT("&angle and/or &conf not defined", procName, 1);
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
    if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8)
        return ERROR_INT("redsweep must be in {1,2,4,8}", procName, 1);
    if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8)
        return ERROR_INT("redsearch must be in {1,2,4,8}", procName, 1);
    if (redsearch > redsweep)
        return ERROR_INT("redsearch must not exceed redsweep", procName, 1);
    if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER)
        return ERROR_INT("invalid pivot", procName, 1);

    deg2rad = 3.1415926535 / 180.;
    ret = 0;

        /* Generate reduced image for binary search, if requested */
    if (redsearch == 1)
        pixsch = pixClone(pixs);
    else if (redsearch == 2)
        pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
    else if (redsearch == 4)
        pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
    else  /* redsearch == 8 */
        pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0);

    pixZero(pixsch, &bzero);
    if (bzero) {
        pixDestroy(&pixsch);
        return 1;
    }

        /* Generate reduced image for sweep, if requested */
    ratio = redsweep / redsearch;
    if (ratio == 1) {
        pixsw = pixClone(pixsch);
    } else {  /* ratio > 1 */
        if (ratio == 2)
            pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0);
        else if (ratio == 4)
            pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0);
        else  /* ratio == 8 */
            pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0);
    }

    pixt1 = pixCreateTemplate(pixsw);
    if (ratio == 1)
        pixt2 = pixClone(pixt1);
    else
        pixt2 = pixCreateTemplate(pixsch);

    nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1);
    natheta = numaCreate(nangles);
    nascore = numaCreate(nangles);

    if (!pixsch || !pixsw) {
        ret = ERROR_INT("pixsch and pixsw not both made", procName, 1);
        goto cleanup;
    }
    if (!pixt1 || !pixt2) {
        ret = ERROR_INT("pixt1 and pixt2 not both made", procName, 1);
        goto cleanup;
    }
    if (!natheta || !nascore) {
        ret = ERROR_INT("natheta and nascore not both made", procName, 1);
        goto cleanup;
    }

        /* Do sweep */
    rangeleft = sweepcenter - sweeprange;
    for (i = 0; i < nangles; i++) {
        theta = rangeleft + i * sweepdelta;   /* degrees */

            /* Shear pix and put the result in pixt1 */
        if (pivot == L_SHEAR_ABOUT_CORNER)
            pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE);
        else
            pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE);

            /* Get score */
        pixFindDifferentialSquareSum(pixt1, &sum);

#if  DEBUG_PRINT_SCORES
        L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum);
#endif  /* DEBUG_PRINT_SCORES */

            /* Save the result in the output arrays */
        numaAddNumber(nascore, sum);
        numaAddNumber(natheta, theta);
    }

        /* Find the largest of the set (maxscore at maxangle) */
    numaGetMax(nascore, &maxscore, &maxindex);
    numaGetFValue(natheta, maxindex, &maxangle);

#if  DEBUG_PRINT_SWEEP
    L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName,
           maxangle, maxscore);
#endif  /* DEBUG_PRINT_SWEEP */

#if  DEBUG_PLOT_SCORES
        /* Plot the sweep result -- the scores versus rotation angle --
         * using gnuplot with GPLOT_LINES (lines connecting data points). */
    {GPLOT  *gplot;
        gplot = gplotCreate("sweep_output", GPLOT_PNG,
                    "Sweep. Variance of difference of ON pixels vs. angle",
                    "angle (deg)", "score");
        gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1");
        gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
    }
#endif  /* DEBUG_PLOT_SCORES */

        /* Check if the max is at the end of the sweep. */
    n = numaGetCount(natheta);
    if (maxindex == 0 || maxindex == n - 1) {
        L_WARNING("max found at sweep edge\n", procName);
        goto cleanup;
    }

        /* Empty the numas for re-use */
    numaEmpty(nascore);
    numaEmpty(natheta);

        /* Do binary search to find skew angle.
         * First, set up initial three points. */
    centerangle = maxangle;
    if (pivot == L_SHEAR_ABOUT_CORNER) {
        pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]);
        pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta),
                        L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]);
        pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta),
                        L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]);
    } else {
        pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]);
        pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta),
                        L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]);
        pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta),
                        L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]);
    }

    numaAddNumber(nascore, bsearchscore[2]);
    numaAddNumber(natheta, centerangle);
    numaAddNumber(nascore, bsearchscore[0]);
    numaAddNumber(natheta, centerangle - sweepdelta);
    numaAddNumber(nascore, bsearchscore[4]);
    numaAddNumber(natheta, centerangle + sweepdelta);

        /* Start the search */
    delta = 0.5 * sweepdelta;
    while (delta >= minbsdelta)
    {
            /* Get the left intermediate score */
        leftcenterangle = centerangle - delta;
        if (pivot == L_SHEAR_ABOUT_CORNER)
            pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle,
                            L_BRING_IN_WHITE);
        else
            pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle,
                            L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]);
        numaAddNumber(nascore, bsearchscore[1]);
        numaAddNumber(natheta, leftcenterangle);

            /* Get the right intermediate score */
        rightcenterangle = centerangle + delta;
        if (pivot == L_SHEAR_ABOUT_CORNER)
            pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle,
                            L_BRING_IN_WHITE);
        else
            pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle,
                            L_BRING_IN_WHITE);
        pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]);
        numaAddNumber(nascore, bsearchscore[3]);
        numaAddNumber(natheta, rightcenterangle);

            /* Find the maximum of the five scores and its location.
             * Note that the maximum must be in the center
             * three values, not in the end two. */
        maxscore = bsearchscore[1];
        maxindex = 1;
        for (i = 2; i < 4; i++) {
            if (bsearchscore[i] > maxscore) {
                maxscore = bsearchscore[i];
                maxindex = i;
            }
        }

            /* Set up score array to interpolate for the next iteration */
        lefttemp = bsearchscore[maxindex - 1];
        righttemp = bsearchscore[maxindex + 1];
        bsearchscore[2] = maxscore;
        bsearchscore[0] = lefttemp;
        bsearchscore[4] = righttemp;

            /* Get new center angle and delta for next iteration */
        centerangle = centerangle + delta * (maxindex - 2);
        delta = 0.5 * delta;
    }
    *pangle = centerangle;

#if  DEBUG_PRINT_SCORES
    L_INFO(" Binary search score = %7.3f\n", procName, bsearchscore[2]);
#endif  /* DEBUG_PRINT_SCORES */

    if (pendscore)  /* save if requested */
        *pendscore = bsearchscore[2];

        /* Return the ratio of Max score over Min score
         * as a confidence value.  Don't trust if the Min score
         * is too small, which can happen if the image is all black
         * with only a few white pixels interspersed.  In that case,
         * we get a contribution from the top and bottom edges when
         * vertically sheared, but this contribution becomes zero when
         * the shear angle is zero.  For zero shear angle, the only
         * contribution will be from the white pixels.  We expect that
         * the signal goes as the product of the (height * width^2),
         * so we compute a (hopefully) normalized minimum threshold as
         * a function of these dimensions.  */
    numaGetMin(nascore, &minscore, &minloc);
    width = pixGetWidth(pixsch);
    height = pixGetHeight(pixsch);
    minthresh = MINSCORE_THRESHOLD_CONSTANT * width * width * height;

#if  DEBUG_THRESHOLD
    L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", procName,
           minthresh, minscore);
    L_INFO(" maxscore = %10.2f\n", procName, maxscore);
#endif  /* DEBUG_THRESHOLD */

    if (minscore > minthresh)
        *pconf = maxscore / minscore;
    else
        *pconf = 0.0;

        /* Don't trust it if too close to the edge of the sweep
         * range or if maxscore is small */
    if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) ||
        (centerangle < rangeleft + sweepdelta) ||
        (maxscore < MIN_VALID_MAXSCORE))
        *pconf = 0.0;

#if  DEBUG_PRINT_BINARY
    fprintf(stderr, "Binary search: angle = %7.3f, score ratio = %6.2f\n",
            *pangle, *pconf);
    fprintf(stderr, "               max score = %8.0f\n", maxscore);
#endif  /* DEBUG_PRINT_BINARY */

#if  DEBUG_PLOT_SCORES
        /* Plot the result -- the scores versus rotation angle --
         * using gnuplot with GPLOT_POINTS.  Because the data
         * points are not ordered by theta (increasing or decreasing),
         * using GPLOT_LINES would be confusing! */
    {GPLOT  *gplot;
        gplot = gplotCreate("search_output", GPLOT_PNG,
                "Binary search.  Variance of difference of ON pixels vs. angle",
                "angle (deg)", "score");
        gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
    }
#endif  /* DEBUG_PLOT_SCORES */

cleanup:
    pixDestroy(&pixsw);
    pixDestroy(&pixsch);
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    numaDestroy(&nascore);
    numaDestroy(&natheta);
    return ret;
}

Beispiel #9

Datei anzeigen

Datei: skew.c Projekt: ConfusedReality/pkg_images_leptonica

/*!
 * \brief   pixFindSkewSweep()
 *
 * \param[in]    pixs  1 bpp
 * \param[out]   pangle   angle required to deskew, in degrees
 * \param[in]    reduction  factor = 1, 2, 4 or 8
 * \param[in]    sweeprange   half the full range; assumed about 0; in degrees
 * \param[in]    sweepdelta   angle increment of sweep; in degrees
 * \return  0 if OK, 1 on error or if angle measurment not valid
 *
 * <pre>
 * Notes:
 *      (1) This examines the 'score' for skew angles with equal intervals.
 *      (2) Caller must check the return value for validity of the result.
 * </pre>
 */
l_int32
pixFindSkewSweep(PIX        *pixs,
                 l_float32  *pangle,
                 l_int32     reduction,
                 l_float32   sweeprange,
                 l_float32   sweepdelta)
{
l_int32    ret, bzero, i, nangles;
l_float32  deg2rad, theta;
l_float32  sum, maxscore, maxangle;
NUMA      *natheta, *nascore;
PIX       *pix, *pixt;

    PROCNAME("pixFindSkewSweep");

    if (!pangle)
        return ERROR_INT("&angle not defined", procName, 1);
    *pangle = 0.0;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not 1 bpp", procName, 1);
    if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8)
        return ERROR_INT("reduction must be in {1,2,4,8}", procName, 1);

    deg2rad = 3.1415926535 / 180.;
    ret = 0;

        /* Generate reduced image, if requested */
    if (reduction == 1)
        pix = pixClone(pixs);
    else if (reduction == 2)
        pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
    else if (reduction == 4)
        pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
    else /* reduction == 8 */
        pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0);

    pixZero(pix, &bzero);
    if (bzero) {
        pixDestroy(&pix);
        return 1;
    }

    nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1);
    natheta = numaCreate(nangles);
    nascore = numaCreate(nangles);
    pixt = pixCreateTemplate(pix);

    if (!pix || !pixt) {
        ret = ERROR_INT("pix and pixt not both made", procName, 1);
        goto cleanup;
    }
    if (!natheta || !nascore) {
        ret = ERROR_INT("natheta and nascore not both made", procName, 1);
        goto cleanup;
    }

    for (i = 0; i < nangles; i++) {
        theta = -sweeprange + i * sweepdelta;   /* degrees */

            /* Shear pix about the UL corner and put the result in pixt */
        pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE);

            /* Get score */
        pixFindDifferentialSquareSum(pixt, &sum);

#if  DEBUG_PRINT_SCORES
        L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum);
#endif  /* DEBUG_PRINT_SCORES */

            /* Save the result in the output arrays */
        numaAddNumber(nascore, sum);
        numaAddNumber(natheta, theta);
    }

        /* Find the location of the maximum (i.e., the skew angle)
         * by fitting the largest data point and its two neighbors
         * to a quadratic, using lagrangian interpolation.  */
    numaFitMax(nascore, &maxscore, natheta, &maxangle);
    *pangle = maxangle;

#if  DEBUG_PRINT_SWEEP
    L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName,
           maxangle, maxscore);
#endif  /* DEBUG_PRINT_SWEEP */

#if  DEBUG_PLOT_SCORES
        /* Plot the result -- the scores versus rotation angle --
         * using gnuplot with GPLOT_LINES (lines connecting data points).
         * The GPLOT data structure is first created, with the
         * appropriate data incorporated from the two input NUMAs,
         * and then the function gplotMakeOutput() uses gnuplot to
         * generate the output plot.  This can be either a .png file
         * or a .ps file, depending on whether you use GPLOT_PNG
         * or GPLOT_PS.  */
    {GPLOT  *gplot;
        gplot = gplotCreate("sweep_output", GPLOT_PNG,
                    "Sweep. Variance of difference of ON pixels vs. angle",
                    "angle (deg)", "score");
        gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1");
        gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
    }
#endif  /* DEBUG_PLOT_SCORES */

cleanup:
    pixDestroy(&pix);
    pixDestroy(&pixt);
    numaDestroy(&nascore);
    numaDestroy(&natheta);
    return ret;
}