void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) {
  Clear();
  Numa* counts = pixCountPixelsByRow(pix, NULL);
  length_ = numaGetCount(counts);
  hist_ = new int[length_];
  for (int i = 0; i < length_; ++i) {
    l_int32 val = 0;
    numaGetIValue(counts, i, &val);
    hist_[i] = val;
  }
  numaDestroy(&counts);
}
예제 #2
0
/*!
 * \brief   pixFindDifferentialSquareSum()
 *
 * \param[in]    pixs
 * \param[out]   psum  result
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) At the top and bottom, we skip:
 *           ~ at least one scanline
 *           ~ not more than 10% of the image height
 *           ~ not more than 5% of the image width
 * </pre>
 */
l_int32
pixFindDifferentialSquareSum(PIX        *pixs,
                             l_float32  *psum)
{
l_int32    i, n;
l_int32    w, h, skiph, skip, nskip;
l_float32  val1, val2, diff, sum;
NUMA      *na;

    PROCNAME("pixFindDifferentialSquareSum");

    if (!psum)
        return ERROR_INT("&sum not defined", procName, 1);
    *psum = 0.0;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);

        /* Generate a number array consisting of the sum
         * of pixels in each row of pixs */
    if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL)
        return ERROR_INT("na not made", procName, 1);

        /* Compute the number of rows at top and bottom to omit.
         * We omit these to avoid getting a spurious signal from
         * the top and bottom of a (nearly) all black image. */
    w = pixGetWidth(pixs);
    h = pixGetHeight(pixs);
    skiph = (l_int32)(0.05 * w);  /* skip for max shear of 0.025 radians */
    skip = L_MIN(h / 10, skiph);  /* don't remove more than 10% of image */
    nskip = L_MAX(skip / 2, 1);  /* at top & bot; skip at least one line */

        /* Sum the squares of differential row sums, on the
         * allowed rows.  Note that nskip must be >= 1. */
    n = numaGetCount(na);
    sum = 0.0;
    for (i = nskip; i < n - nskip; i++) {
        numaGetFValue(na, i - 1, &val1);
        numaGetFValue(na, i, &val2);
        diff = val2 - val1;
        sum += diff * diff;
    }
    numaDestroy(&na);
    *psum = sum;
    return 0;
}
/*!
 *  pixGetTextBaseline()
 *
 *      Input:  pixs (1 bpp, one textline character set)
 *              tab8 (<optional> pixel sum table)
 *              &y   (<return> baseline value)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Method: find the largest difference in pixel sums from one
 *          raster line to the next one below it.  The baseline is the
 *          upper raster line for the pair of raster lines that
 *          maximizes this function.
 */
static l_int32
pixGetTextBaseline(PIX      *pixs,
                   l_int32  *tab8,
                   l_int32  *py)
{
l_int32   i, h, val1, val2, diff, diffmax, ymax;
l_int32  *tab;
NUMA     *na;

    PROCNAME("pixGetTextBaseline");

    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (!py)
        return ERROR_INT("&y not defined", procName, 1);
    *py = 0;
    if (!tab8)
        tab = makePixelSumTab8();
    else
        tab = tab8;

    na = pixCountPixelsByRow(pixs, tab);
    h = numaGetCount(na);
    diffmax = 0;
    ymax = 0;
    for (i = 1; i < h; i++) {
        numaGetIValue(na, i - 1, &val1);
        numaGetIValue(na, i, &val2);
        diff = L_MAX(0, val1 - val2);
        if (diff > diffmax) {
            diffmax = diff;
            ymax = i - 1;  /* upper raster line */
        }
    }
    *py = ymax;

    if (!tab8)
        FREE(tab);
    numaDestroy(&na);
    return 0;
}
예제 #4
0
/*!
 * \brief   pixFindBaselines()
 *
 * \param[in]    pixs     1 bpp, 300 ppi
 * \param[out]   ppta     [optional] pairs of pts corresponding to
 *                        approx. ends of each text line
 * \param[in]    pixadb   for debug output; use NULL to skip
 * \return  na of baseline y values, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Input binary image must have text lines already aligned
 *          horizontally.  This can be done by either rotating the
 *          image with pixDeskew(), or, if a projective transform
 *          is required, by doing pixDeskewLocal() first.
 *      (2) Input null for &pta if you don't want this returned.
 *          The pta will come in pairs of points (left and right end
 *          of each baseline).
 *      (3) Caution: this will not work properly on text with multiple
 *          columns, where the lines are not aligned between columns.
 *          If there are multiple columns, they should be extracted
 *          separately before finding the baselines.
 *      (4) This function constructs different types of output
 *          for baselines; namely, a set of raster line values and
 *          a set of end points of each baseline.
 *      (5) This function was designed to handle short and long text lines
 *          without using dangerous thresholds on the peak heights.  It does
 *          this by combining the differential signal with a morphological
 *          analysis of the locations of the text lines.  One can also
 *          combine this data to normalize the peak heights, by weighting
 *          the differential signal in the region of each baseline
 *          by the inverse of the width of the text line found there.
 * </pre>
 */
NUMA *
pixFindBaselines(PIX   *pixs,
                 PTA  **ppta,
                 PIXA  *pixadb)
{
l_int32    h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
l_int32    imaxloc, peakthresh, zerothresh, inpeak;
l_int32    mintosearch, max, maxloc, nloc, locval;
l_int32   *array;
l_float32  maxval;
BOXA      *boxa1, *boxa2, *boxa3;
GPLOT     *gplot;
NUMA      *nasum, *nadiff, *naloc, *naval;
PIX       *pix1, *pix2;
PTA       *pta;

    PROCNAME("pixFindBaselines");

    if (ppta) *ppta = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);

        /* Close up the text characters, removing noise */
    pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0);

        /* Estimate the resolution */
    if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);

        /* Save the difference of adjacent row sums.
         * The high positive-going peaks are the baselines */
    if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) {
        pixDestroy(&pix1);
        return (NUMA *)ERROR_PTR("nasum not made", procName, NULL);
    }
    h = pixGetHeight(pixs);
    nadiff = numaCreate(h);
    numaGetIValue(nasum, 0, &val2);
    for (i = 0; i < h - 1; i++) {
        val1 = val2;
        numaGetIValue(nasum, i + 1, &val2);
        numaAddNumber(nadiff, val1 - val2);
    }
    numaDestroy(&nasum);

    if (pixadb) {  /* show the difference signal */
        lept_mkdir("lept/baseline");
        gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig");
        pix2 = pixRead("/tmp/lept/baseline/diff.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }

        /* Use the zeroes of the profile to locate each baseline. */
    array = numaGetIArray(nadiff);
    ndiff = numaGetCount(nadiff);
    numaGetMax(nadiff, &maxval, &imaxloc);
    numaDestroy(&nadiff);

        /* Use this to begin locating a new peak: */
    peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO;
        /* Use this to begin a region between peaks: */
    zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO;

    naloc = numaCreate(0);
    naval = numaCreate(0);
    inpeak = FALSE;
    for (i = 0; i < ndiff; i++) {
        if (inpeak == FALSE) {
            if (array[i] > peakthresh) {  /* transition to in-peak */
                inpeak = TRUE;
                mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros
                                               * between i and mintosearch */
                max = array[i];
                maxloc = i;
            }
        } else {  /* inpeak == TRUE; look for max */
            if (array[i] > max) {
                max = array[i];
                maxloc = i;
                mintosearch = i + MIN_DIST_IN_PEAK;
            } else if (i > mintosearch && array[i] <= zerothresh) {  /* leave */
                inpeak = FALSE;
                numaAddNumber(naval, max);
                numaAddNumber(naloc, maxloc);
            }
        }
    }
    LEPT_FREE(array);

        /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
    if (inpeak) {
        numaAddNumber(naval, max);
        numaAddNumber(naloc, maxloc);
    }

    if (pixadb) {  /* show the raster locations for the peaks */
        gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs",
                            "rasterline", "height");
        gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
        pix2 = pixRead("/tmp/lept/baseline/loc.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }
    numaDestroy(&naval);

        /* Generate an approximate profile of text line width.
         * First, filter the boxes of text, where there may be
         * more than one box for a given textline. */
    pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0);
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
    boxa1 = pixConnComp(pix2, NULL, 4);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    if (boxaGetCount(boxa1) == 0) {
        numaDestroy(&naloc);
        boxaDestroy(&boxa1);
        L_INFO("no compnents after filtering\n", procName);
        return NULL;
    }
    boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
    boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Optionally, find the baseline segments */
    pta = NULL;
    if (ppta) {
        pta = ptaCreate(0);
        *ppta = pta;
    }
    if (pta) {
      nloc = numaGetCount(naloc);
      nbox = boxaGetCount(boxa3);
      for (i = 0; i < nbox; i++) {
          boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
          for (j = 0; j < nloc; j++) {
              numaGetIValue(naloc, j, &locval);
              if (L_ABS(locval - (by + bh)) > 25)
                  continue;
              ptaAddPt(pta, bx, locval);
              ptaAddPt(pta, bx + bw, locval);
              break;
          }
      }
    }
    boxaDestroy(&boxa3);

    if (pixadb && pta) {  /* display baselines */
        l_int32  npts, x1, y1, x2, y2;
        pix1 = pixConvertTo32(pixs);
        npts = ptaGetCount(pta);
        for (i = 0; i < npts; i += 2) {
            ptaGetIPt(pta, i, &x1, &y1);
            ptaGetIPt(pta, i + 1, &x2, &y2);
            pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0);
        }
        pixWrite("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG);
        pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
        pixDestroy(&pix1);
    }

    return naloc;
}
/*!
 *  pixaGenerateFont()
 *
 *      Input:  pix (of 95 characters in 3 rows)
 *              fontsize (4, 6, 8, ... , 20, in pts at 300 ppi)
 *              &bl1 (<return> baseline of row 1)
 *              &bl2 (<return> baseline of row 2)
 *              &bl3 (<return> baseline of row 3)
 *      Return: pixa of font bitmaps for 95 characters, or null on error
 *
 *  Notes:
 *      (1) This does all the work.  See pixaGenerateFontFromFile()
 *          for an overview.
 *      (2) The pix is for one of the 9 fonts.  @fontsize is only
 *          used here for debugging.
 */
PIXA *
pixaGenerateFont(PIX      *pixs,
                 l_int32   fontsize,
                 l_int32  *pbl0,
                 l_int32  *pbl1,
                 l_int32  *pbl2)
{
l_int32   i, j, nrows, nrowchars, nchars, h, yval;
l_int32   width, height;
l_int32   baseline[3];
l_int32  *tab = NULL;
BOX      *box, *box1, *box2;
BOXA     *boxar, *boxac, *boxacs;
PIX      *pix1, *pix2, *pixr, *pixrc, *pixc;
PIXA     *pixa;
l_int32   n, w, inrow, top;
l_int32  *ia;
NUMA     *na;

    PROCNAME("pixaGenerateFont");

    if (!pbl0 || !pbl1 || !pbl2)
        return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL);
    *pbl0 = *pbl1 = *pbl2 = 0;
    if (!pixs)
        return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL);

        /* Locate the 3 rows of characters */
    w = pixGetWidth(pixs);
    na = pixCountPixelsByRow(pixs, NULL);
    boxar = boxaCreate(0);
    n = numaGetCount(na);
    ia = numaGetIArray(na);
    inrow = 0;
    for (i = 0; i < n; i++) {
        if (!inrow && ia[i] > 0) {
            inrow = 1;
            top = i;
        } else if (inrow && ia[i] == 0) {
            inrow = 0;
            box = boxCreate(0, top, w, i - top);
            boxaAddBox(boxar, box, L_INSERT);
        }
    }
    FREE(ia);
    numaDestroy(&na);
    nrows = boxaGetCount(boxar);
#if  DEBUG_FONT_GEN
    L_INFO("For fontsize %s, have %d rows\n", procName, fontsize, nrows);
#endif  /* DEBUG_FONT_GEN */
    if (nrows != 3) {
        L_INFO("nrows = %d; skipping fontsize %d\n", procName, nrows, fontsize);
        return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL);
    }

        /* Grab the character images and baseline data */
#if DEBUG_BASELINE
    lept_rmdir("baseline");
    lept_mkdir("baseline");
#endif  /* DEBUG_BASELINE */
    tab = makePixelSumTab8();
    pixa = pixaCreate(95);
    for (i = 0; i < nrows; i++) {
        box = boxaGetBox(boxar, i, L_CLONE);
        pixr = pixClipRectangle(pixs, box, NULL);  /* row of chars */
        pixGetTextBaseline(pixr, tab, &yval);
        baseline[i] = yval;

#if DEBUG_BASELINE
        L_INFO("Baseline info: row %d, yval = %d, h = %d\n", procName,
               i, yval, pixGetHeight(pixr));
        pix1 = pixCopy(NULL, pixr);
        pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1,
                      L_FLIP_PIXELS);
        if (i == 0 )
            pixWrite("/tmp/baseline/row0.png", pix1, IFF_PNG);
        else if (i == 1)
            pixWrite("/tmp/baseline/row1.png", pix1, IFF_PNG);
        else
            pixWrite("/tmp/baseline/row2.png", pix1, IFF_PNG);
        pixDestroy(&pix1);
#endif  /* DEBUG_BASELINE */

        boxDestroy(&box);
        pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35);
        boxac = pixConnComp(pixrc, NULL, 8);
        boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL);
        if (i == 0) {  /* consolidate the two components of '"' */
            box1 = boxaGetBox(boxacs, 1, L_CLONE);
            box2 = boxaGetBox(boxacs, 2, L_CLONE);
            box1->w = box2->x + box2->w - box1->x;  /* increase width */
            boxDestroy(&box1);
            boxDestroy(&box2);
            boxaRemoveBox(boxacs, 2);
        }
        h = pixGetHeight(pixr);
        nrowchars = boxaGetCount(boxacs);
        for (j = 0; j < nrowchars; j++) {
            box = boxaGetBox(boxacs, j, L_COPY);
            if (box->w <= 2 && box->h == 1) {  /* skip 1x1, 2x1 components */
                boxDestroy(&box);
                continue;
            }
            box->y = 0;
            box->h = h - 1;
            pixc = pixClipRectangle(pixr, box, NULL);
            boxDestroy(&box);
            if (i == 0 && j == 0)  /* add a pix for the space; change later */
                pixaAddPix(pixa, pixc, L_COPY);
            if (i == 2 && j == 0)  /* add a pix for the '\'; change later */
                pixaAddPix(pixa, pixc, L_COPY);
            pixaAddPix(pixa, pixc, L_INSERT);
        }
        pixDestroy(&pixr);
        pixDestroy(&pixrc);
        boxaDestroy(&boxac);
        boxaDestroy(&boxacs);
    }
    FREE(tab);

    nchars = pixaGetCount(pixa);
    if (nchars != 95)
        return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL);

    *pbl0 = baseline[0];
    *pbl1 = baseline[1];
    *pbl2 = baseline[2];

        /* Fix the space character up; it should have no ON pixels,
         * and be about twice as wide as the '!' character.    */
    pix1 = pixaGetPix(pixa, 0, L_CLONE);
    width = 2 * pixGetWidth(pix1);
    height = pixGetHeight(pix1);
    pixDestroy(&pix1);
    pix1 = pixCreate(width, height, 1);
    pixaReplacePix(pixa, 0, pix1, NULL);

        /* Fix up the '\' character; use a LR flip of the '/' char */
    pix1 = pixaGetPix(pixa, 15, L_CLONE);
    pix2 = pixFlipLR(NULL, pix1);
    pixDestroy(&pix1);
    pixaReplacePix(pixa, 60, pix2, NULL);

#if DEBUG_CHARS
    pix1 = pixaDisplayTiled(pixa, 1500, 0, 10);
    pixDisplay(pix1, 100 * i, 200);
    pixDestroy(&pix1);
#endif  /* DEBUG_CHARS */

    boxaDestroy(&boxar);
    return pixa;
}
예제 #6
0
파일: skew.c 프로젝트: vkbrad/AndroidOCR
/*!
 *  pixFindNormalizedSquareSum()
 *
 *      Input:  pixs
 *              &hratio (<optional return> ratio of normalized horiz square sum
 *                       to result if the pixel distribution were uniform)
 *              &vratio (<optional return> ratio of normalized vert square sum
 *                       to result if the pixel distribution were uniform)
 *              &fract  (<optional return> ratio of fg pixels to total pixels)
 *      Return: 0 if OK, 1 on error or if there are no fg pixels
 *
 *  Notes:
 *      (1) Let the image have h scanlines and N fg pixels.
 *          If the pixels were uniformly distributed on scanlines,
 *          the sum of squares of fg pixels on each scanline would be
 *          h * (N / h)^2.  However, if the pixels are not uniformly
 *          distributed (e.g., for text), the sum of squares of fg
 *          pixels will be larger.  We return in hratio and vratio the
 *          ratio of these two values.
 *      (2) If there are no fg pixels, hratio and vratio are returned as 0.0.
 */
l_int32
pixFindNormalizedSquareSum(PIX        *pixs,
                           l_float32  *phratio,
                           l_float32  *pvratio,
                           l_float32  *pfract)
{
l_int32    i, w, h, empty;
l_float32  sum, sumsq, uniform, val;
NUMA      *na;
PIX       *pixt;

    PROCNAME("pixFindNormalizedSquareSum");

    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
    pixGetDimensions(pixs, &w, &h, NULL);

    if (!phratio && !pvratio)
        return ERROR_INT("nothing to do", procName, 1);
    if (phratio) *phratio = 0.0;
    if (pvratio) *pvratio = 0.0;

    empty = 0;
    if (phratio) {
        na = pixCountPixelsByRow(pixs, NULL);
        numaGetSum(na, &sum);  /* fg pixels */
        if (pfract) *pfract = sum / (l_float32)(w * h);
        if (sum != 0.0) {
            uniform = sum * sum / h;   /*  h*(sum / h)^2  */
            sumsq = 0.0;
            for (i = 0; i < h; i++) {
                numaGetFValue(na, i, &val);
                sumsq += val * val;
            }
            *phratio = sumsq / uniform;
        } else {
            empty = 1;
        }
        numaDestroy(&na);
    }

    if (pvratio) {
        if (empty == 1) return 1;
        pixt = pixRotateOrth(pixs, 1);
        na = pixCountPixelsByRow(pixt, NULL);
        numaGetSum(na, &sum);
        if (pfract) *pfract = sum / (l_float32)(w * h);
        if (sum != 0.0) {
            uniform = sum * sum / w;
            sumsq = 0.0;
            for (i = 0; i < w; i++) {
                numaGetFValue(na, i, &val);
                sumsq += val * val;
            }
            *pvratio = sumsq / uniform;
        } else {
            empty = 1;
        }
        pixDestroy(&pixt);
        numaDestroy(&na);
    }

    return empty;
}