void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) { Clear(); Numa* counts = pixCountPixelsByRow(pix, NULL); length_ = numaGetCount(counts); hist_ = new int[length_]; for (int i = 0; i < length_; ++i) { l_int32 val = 0; numaGetIValue(counts, i, &val); hist_[i] = val; } numaDestroy(&counts); }
/*! * \brief pixFindDifferentialSquareSum() * * \param[in] pixs * \param[out] psum result * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) At the top and bottom, we skip: * ~ at least one scanline * ~ not more than 10% of the image height * ~ not more than 5% of the image width * </pre> */ l_int32 pixFindDifferentialSquareSum(PIX *pixs, l_float32 *psum) { l_int32 i, n; l_int32 w, h, skiph, skip, nskip; l_float32 val1, val2, diff, sum; NUMA *na; PROCNAME("pixFindDifferentialSquareSum"); if (!psum) return ERROR_INT("&sum not defined", procName, 1); *psum = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Generate a number array consisting of the sum * of pixels in each row of pixs */ if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) return ERROR_INT("na not made", procName, 1); /* Compute the number of rows at top and bottom to omit. * We omit these to avoid getting a spurious signal from * the top and bottom of a (nearly) all black image. */ w = pixGetWidth(pixs); h = pixGetHeight(pixs); skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ /* Sum the squares of differential row sums, on the * allowed rows. Note that nskip must be >= 1. */ n = numaGetCount(na); sum = 0.0; for (i = nskip; i < n - nskip; i++) { numaGetFValue(na, i - 1, &val1); numaGetFValue(na, i, &val2); diff = val2 - val1; sum += diff * diff; } numaDestroy(&na); *psum = sum; return 0; }
/*! * pixGetTextBaseline() * * Input: pixs (1 bpp, one textline character set) * tab8 (<optional> pixel sum table) * &y (<return> baseline value) * Return: 0 if OK, 1 on error * * Notes: * (1) Method: find the largest difference in pixel sums from one * raster line to the next one below it. The baseline is the * upper raster line for the pair of raster lines that * maximizes this function. */ static l_int32 pixGetTextBaseline(PIX *pixs, l_int32 *tab8, l_int32 *py) { l_int32 i, h, val1, val2, diff, diffmax, ymax; l_int32 *tab; NUMA *na; PROCNAME("pixGetTextBaseline"); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (!py) return ERROR_INT("&y not defined", procName, 1); *py = 0; if (!tab8) tab = makePixelSumTab8(); else tab = tab8; na = pixCountPixelsByRow(pixs, tab); h = numaGetCount(na); diffmax = 0; ymax = 0; for (i = 1; i < h; i++) { numaGetIValue(na, i - 1, &val1); numaGetIValue(na, i, &val2); diff = L_MAX(0, val1 - val2); if (diff > diffmax) { diffmax = diff; ymax = i - 1; /* upper raster line */ } } *py = ymax; if (!tab8) FREE(tab); numaDestroy(&na); return 0; }
/*! * \brief pixFindBaselines() * * \param[in] pixs 1 bpp, 300 ppi * \param[out] ppta [optional] pairs of pts corresponding to * approx. ends of each text line * \param[in] pixadb for debug output; use NULL to skip * \return na of baseline y values, or NULL on error * * <pre> * Notes: * (1) Input binary image must have text lines already aligned * horizontally. This can be done by either rotating the * image with pixDeskew(), or, if a projective transform * is required, by doing pixDeskewLocal() first. * (2) Input null for &pta if you don't want this returned. * The pta will come in pairs of points (left and right end * of each baseline). * (3) Caution: this will not work properly on text with multiple * columns, where the lines are not aligned between columns. * If there are multiple columns, they should be extracted * separately before finding the baselines. * (4) This function constructs different types of output * for baselines; namely, a set of raster line values and * a set of end points of each baseline. * (5) This function was designed to handle short and long text lines * without using dangerous thresholds on the peak heights. It does * this by combining the differential signal with a morphological * analysis of the locations of the text lines. One can also * combine this data to normalize the peak heights, by weighting * the differential signal in the region of each baseline * by the inverse of the width of the text line found there. * </pre> */ NUMA * pixFindBaselines(PIX *pixs, PTA **ppta, PIXA *pixadb) { l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; l_int32 imaxloc, peakthresh, zerothresh, inpeak; l_int32 mintosearch, max, maxloc, nloc, locval; l_int32 *array; l_float32 maxval; BOXA *boxa1, *boxa2, *boxa3; GPLOT *gplot; NUMA *nasum, *nadiff, *naloc, *naval; PIX *pix1, *pix2; PTA *pta; PROCNAME("pixFindBaselines"); if (ppta) *ppta = NULL; if (!pixs || pixGetDepth(pixs) != 1) return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); /* Close up the text characters, removing noise */ pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0); /* Estimate the resolution */ if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); /* Save the difference of adjacent row sums. * The high positive-going peaks are the baselines */ if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) { pixDestroy(&pix1); return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); } h = pixGetHeight(pixs); nadiff = numaCreate(h); numaGetIValue(nasum, 0, &val2); for (i = 0; i < h - 1; i++) { val1 = val2; numaGetIValue(nasum, i + 1, &val2); numaAddNumber(nadiff, val1 - val2); } numaDestroy(&nasum); if (pixadb) { /* show the difference signal */ lept_mkdir("lept/baseline"); gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig"); pix2 = pixRead("/tmp/lept/baseline/diff.png"); pixaAddPix(pixadb, pix2, L_INSERT); } /* Use the zeroes of the profile to locate each baseline. */ array = numaGetIArray(nadiff); ndiff = numaGetCount(nadiff); numaGetMax(nadiff, &maxval, &imaxloc); numaDestroy(&nadiff); /* Use this to begin locating a new peak: */ peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO; /* Use this to begin a region between peaks: */ zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO; naloc = numaCreate(0); naval = numaCreate(0); inpeak = FALSE; for (i = 0; i < ndiff; i++) { if (inpeak == FALSE) { if (array[i] > peakthresh) { /* transition to in-peak */ inpeak = TRUE; mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros * between i and mintosearch */ max = array[i]; maxloc = i; } } else { /* inpeak == TRUE; look for max */ if (array[i] > max) { max = array[i]; maxloc = i; mintosearch = i + MIN_DIST_IN_PEAK; } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ inpeak = FALSE; numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } } } LEPT_FREE(array); /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ if (inpeak) { numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } if (pixadb) { /* show the raster locations for the peaks */ gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs", "rasterline", "height"); gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); gplotMakeOutput(gplot); gplotDestroy(&gplot); pix2 = pixRead("/tmp/lept/baseline/loc.png"); pixaAddPix(pixadb, pix2, L_INSERT); } numaDestroy(&naval); /* Generate an approximate profile of text line width. * First, filter the boxes of text, where there may be * more than one box for a given textline. */ pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0); if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); boxa1 = pixConnComp(pix2, NULL, 4); pixDestroy(&pix1); pixDestroy(&pix2); if (boxaGetCount(boxa1) == 0) { numaDestroy(&naloc); boxaDestroy(&boxa1); L_INFO("no compnents after filtering\n", procName); return NULL; } boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); boxaDestroy(&boxa1); boxaDestroy(&boxa2); /* Optionally, find the baseline segments */ pta = NULL; if (ppta) { pta = ptaCreate(0); *ppta = pta; } if (pta) { nloc = numaGetCount(naloc); nbox = boxaGetCount(boxa3); for (i = 0; i < nbox; i++) { boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); for (j = 0; j < nloc; j++) { numaGetIValue(naloc, j, &locval); if (L_ABS(locval - (by + bh)) > 25) continue; ptaAddPt(pta, bx, locval); ptaAddPt(pta, bx + bw, locval); break; } } } boxaDestroy(&boxa3); if (pixadb && pta) { /* display baselines */ l_int32 npts, x1, y1, x2, y2; pix1 = pixConvertTo32(pixs); npts = ptaGetCount(pta); for (i = 0; i < npts; i += 2) { ptaGetIPt(pta, i, &x1, &y1); ptaGetIPt(pta, i + 1, &x2, &y2); pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0); } pixWrite("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG); pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); pixDestroy(&pix1); } return naloc; }
/*! * pixaGenerateFont() * * Input: pix (of 95 characters in 3 rows) * fontsize (4, 6, 8, ... , 20, in pts at 300 ppi) * &bl1 (<return> baseline of row 1) * &bl2 (<return> baseline of row 2) * &bl3 (<return> baseline of row 3) * Return: pixa of font bitmaps for 95 characters, or null on error * * Notes: * (1) This does all the work. See pixaGenerateFontFromFile() * for an overview. * (2) The pix is for one of the 9 fonts. @fontsize is only * used here for debugging. */ PIXA * pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0, l_int32 *pbl1, l_int32 *pbl2) { l_int32 i, j, nrows, nrowchars, nchars, h, yval; l_int32 width, height; l_int32 baseline[3]; l_int32 *tab = NULL; BOX *box, *box1, *box2; BOXA *boxar, *boxac, *boxacs; PIX *pix1, *pix2, *pixr, *pixrc, *pixc; PIXA *pixa; l_int32 n, w, inrow, top; l_int32 *ia; NUMA *na; PROCNAME("pixaGenerateFont"); if (!pbl0 || !pbl1 || !pbl2) return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); *pbl0 = *pbl1 = *pbl2 = 0; if (!pixs) return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); /* Locate the 3 rows of characters */ w = pixGetWidth(pixs); na = pixCountPixelsByRow(pixs, NULL); boxar = boxaCreate(0); n = numaGetCount(na); ia = numaGetIArray(na); inrow = 0; for (i = 0; i < n; i++) { if (!inrow && ia[i] > 0) { inrow = 1; top = i; } else if (inrow && ia[i] == 0) { inrow = 0; box = boxCreate(0, top, w, i - top); boxaAddBox(boxar, box, L_INSERT); } } FREE(ia); numaDestroy(&na); nrows = boxaGetCount(boxar); #if DEBUG_FONT_GEN L_INFO("For fontsize %s, have %d rows\n", procName, fontsize, nrows); #endif /* DEBUG_FONT_GEN */ if (nrows != 3) { L_INFO("nrows = %d; skipping fontsize %d\n", procName, nrows, fontsize); return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL); } /* Grab the character images and baseline data */ #if DEBUG_BASELINE lept_rmdir("baseline"); lept_mkdir("baseline"); #endif /* DEBUG_BASELINE */ tab = makePixelSumTab8(); pixa = pixaCreate(95); for (i = 0; i < nrows; i++) { box = boxaGetBox(boxar, i, L_CLONE); pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ pixGetTextBaseline(pixr, tab, &yval); baseline[i] = yval; #if DEBUG_BASELINE L_INFO("Baseline info: row %d, yval = %d, h = %d\n", procName, i, yval, pixGetHeight(pixr)); pix1 = pixCopy(NULL, pixr); pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1, L_FLIP_PIXELS); if (i == 0 ) pixWrite("/tmp/baseline/row0.png", pix1, IFF_PNG); else if (i == 1) pixWrite("/tmp/baseline/row1.png", pix1, IFF_PNG); else pixWrite("/tmp/baseline/row2.png", pix1, IFF_PNG); pixDestroy(&pix1); #endif /* DEBUG_BASELINE */ boxDestroy(&box); pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); boxac = pixConnComp(pixrc, NULL, 8); boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); if (i == 0) { /* consolidate the two components of '"' */ box1 = boxaGetBox(boxacs, 1, L_CLONE); box2 = boxaGetBox(boxacs, 2, L_CLONE); box1->w = box2->x + box2->w - box1->x; /* increase width */ boxDestroy(&box1); boxDestroy(&box2); boxaRemoveBox(boxacs, 2); } h = pixGetHeight(pixr); nrowchars = boxaGetCount(boxacs); for (j = 0; j < nrowchars; j++) { box = boxaGetBox(boxacs, j, L_COPY); if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ boxDestroy(&box); continue; } box->y = 0; box->h = h - 1; pixc = pixClipRectangle(pixr, box, NULL); boxDestroy(&box); if (i == 0 && j == 0) /* add a pix for the space; change later */ pixaAddPix(pixa, pixc, L_COPY); if (i == 2 && j == 0) /* add a pix for the '\'; change later */ pixaAddPix(pixa, pixc, L_COPY); pixaAddPix(pixa, pixc, L_INSERT); } pixDestroy(&pixr); pixDestroy(&pixrc); boxaDestroy(&boxac); boxaDestroy(&boxacs); } FREE(tab); nchars = pixaGetCount(pixa); if (nchars != 95) return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL); *pbl0 = baseline[0]; *pbl1 = baseline[1]; *pbl2 = baseline[2]; /* Fix the space character up; it should have no ON pixels, * and be about twice as wide as the '!' character. */ pix1 = pixaGetPix(pixa, 0, L_CLONE); width = 2 * pixGetWidth(pix1); height = pixGetHeight(pix1); pixDestroy(&pix1); pix1 = pixCreate(width, height, 1); pixaReplacePix(pixa, 0, pix1, NULL); /* Fix up the '\' character; use a LR flip of the '/' char */ pix1 = pixaGetPix(pixa, 15, L_CLONE); pix2 = pixFlipLR(NULL, pix1); pixDestroy(&pix1); pixaReplacePix(pixa, 60, pix2, NULL); #if DEBUG_CHARS pix1 = pixaDisplayTiled(pixa, 1500, 0, 10); pixDisplay(pix1, 100 * i, 200); pixDestroy(&pix1); #endif /* DEBUG_CHARS */ boxaDestroy(&boxar); return pixa; }
/*! * pixFindNormalizedSquareSum() * * Input: pixs * &hratio (<optional return> ratio of normalized horiz square sum * to result if the pixel distribution were uniform) * &vratio (<optional return> ratio of normalized vert square sum * to result if the pixel distribution were uniform) * &fract (<optional return> ratio of fg pixels to total pixels) * Return: 0 if OK, 1 on error or if there are no fg pixels * * Notes: * (1) Let the image have h scanlines and N fg pixels. * If the pixels were uniformly distributed on scanlines, * the sum of squares of fg pixels on each scanline would be * h * (N / h)^2. However, if the pixels are not uniformly * distributed (e.g., for text), the sum of squares of fg * pixels will be larger. We return in hratio and vratio the * ratio of these two values. * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. */ l_int32 pixFindNormalizedSquareSum(PIX *pixs, l_float32 *phratio, l_float32 *pvratio, l_float32 *pfract) { l_int32 i, w, h, empty; l_float32 sum, sumsq, uniform, val; NUMA *na; PIX *pixt; PROCNAME("pixFindNormalizedSquareSum"); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); pixGetDimensions(pixs, &w, &h, NULL); if (!phratio && !pvratio) return ERROR_INT("nothing to do", procName, 1); if (phratio) *phratio = 0.0; if (pvratio) *pvratio = 0.0; empty = 0; if (phratio) { na = pixCountPixelsByRow(pixs, NULL); numaGetSum(na, &sum); /* fg pixels */ if (pfract) *pfract = sum / (l_float32)(w * h); if (sum != 0.0) { uniform = sum * sum / h; /* h*(sum / h)^2 */ sumsq = 0.0; for (i = 0; i < h; i++) { numaGetFValue(na, i, &val); sumsq += val * val; } *phratio = sumsq / uniform; } else { empty = 1; } numaDestroy(&na); } if (pvratio) { if (empty == 1) return 1; pixt = pixRotateOrth(pixs, 1); na = pixCountPixelsByRow(pixt, NULL); numaGetSum(na, &sum); if (pfract) *pfract = sum / (l_float32)(w * h); if (sum != 0.0) { uniform = sum * sum / w; sumsq = 0.0; for (i = 0; i < w; i++) { numaGetFValue(na, i, &val); sumsq += val * val; } *pvratio = sumsq / uniform; } else { empty = 1; } pixDestroy(&pixt); numaDestroy(&na); } return empty; }