void GenCleans(const char *fname, l_int32 *pindex, l_int32 thresh, L_BMF *bmf) { l_int32 index, blackval, whiteval; char buf[256]; PIX *pix1, *pix2, *pix3, *pix4, *pix5; blackval = 70; whiteval = 180; index = *pindex; pix1 = pixRead(fname); snprintf(buf, sizeof(buf), "/tmp/lept/adapt_%03d.jpg", index++); pixWrite(buf, pix1, IFF_JFIF_JPEG); pix2 = pixBackgroundNorm(pix1, NULL, NULL, 10, 15, thresh, 25, 200, 2, 1); snprintf(buf, sizeof(buf), "Norm color: fg thresh = %d", thresh); fprintf(stderr, "%s\n", buf); pix3 = pixAddSingleTextline(pix2, bmf, buf, 0x00ff0000, L_ADD_BELOW); snprintf(buf, sizeof(buf), "/tmp/lept/adapt_%03d.jpg", index++); pixWrite(buf, pix3, IFF_JFIF_JPEG); pixDestroy(&pix3); pix3 = pixGammaTRC(NULL, pix2, 1.0, blackval, whiteval); snprintf(buf, sizeof(buf), "Clean color: fg thresh = %d", thresh); pix4 = pixAddSingleTextblock(pix3, bmf, buf, 0x00ff0000, L_ADD_BELOW, NULL); snprintf(buf, sizeof(buf), "/tmp/lept/adapt_%03d.jpg", index++); pixWrite(buf, pix4, IFF_JFIF_JPEG); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pix2 = pixConvertRGBToGray(pix1, 0.33, 0.34, 0.33); pix3 = pixBackgroundNorm(pix2, NULL, NULL, 10, 15, thresh, 25, 200, 2, 1); pix4 = pixGammaTRC(NULL, pix3, 1.0, blackval, whiteval); snprintf(buf, sizeof(buf), "Clean gray: fg thresh = %d", thresh); pix5 = pixAddSingleTextblock(pix4, bmf, buf, 0x00ff0000, L_ADD_BELOW, NULL); snprintf(buf, sizeof(buf), "/tmp/lept/adapt_%03d.jpg", index++); pixWrite(buf, pix5, IFF_JFIF_JPEG); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); pixDestroy(&pix1); *pindex = index; return; }
/* * Clean dark background of image * based on leptonica adaptmap_dark.c */ PIX* MainWindow::cleanDarkBackground(int blackval, int whiteval, int thresh) { QApplication::setOverrideCursor(Qt::WaitCursor); PIX *pix1, *pix2; pix1 = pixBackgroundNorm(pixs, NULL, NULL, 10, 15, thresh, 25, 200, 2, 1); pix2 = pixGammaTRC(NULL, pix1, 1.0, blackval, whiteval); setPixToScene(pix2); pixDestroy(&pix1); QApplication::restoreOverrideCursor(); return pix2; }
/*! * pixOtsuThreshOnBackgroundNorm() * * Input: pixs (8 bpp grayscale; not colormapped) * pixim (<optional> 1 bpp 'image' mask; can be null) * sx, sy (tile size in pixels) * thresh (threshold for determining foreground) * mincount (min threshold on counts in a tile) * bgval (target bg val; typ. > 128) * smoothx (half-width of block convolution kernel width) * smoothy (half-width of block convolution kernel height) * scorefract (fraction of the max Otsu score; typ. 0.1) * &thresh (<optional return> threshold value that was * used on the normalized image) * Return: pixd (1 bpp thresholded image), or null on error * * Notes: * (1) This does background normalization followed by Otsu * thresholding. Otsu binarization attempts to split the * image into two roughly equal sets of pixels, and it does * a very poor job when there are large amounts of dark * background. By doing a background normalization first, * to get the background near 255, we remove this problem. * Then we use a modified Otsu to estimate the best global * threshold on the normalized image. * (2) See pixBackgroundNorm() for meaning and typical values * of input parameters. For a start, you can try: * sx, sy = 10, 15 * thresh = 100 * mincount = 50 * bgval = 255 * smoothx, smoothy = 2 */ PIX * pixOtsuThreshOnBackgroundNorm(PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, l_int32 *pthresh) { l_int32 w, h; l_uint32 val; PIX *pixn, *pixt, *pixd; PROCNAME("pixOtsuThreshOnBackgroundNorm"); if (pthresh) *pthresh = 0; if (!pixs || pixGetDepth(pixs) != 8) return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); if (pixGetColormap(pixs)) return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); if (sx < 4 || sy < 4) return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); if (mincount > sx * sy) { L_WARNING("mincount too large for tile size\n", procName); mincount = (sx * sy) / 3; } pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, mincount, bgval, smoothx, smoothy); if (!pixn) return (PIX *)ERROR_PTR("pixn not made", procName, NULL); /* Just use 1 tile for a global threshold, which is stored * as a single pixel in pixt. */ pixGetDimensions(pixn, &w, &h, NULL); pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd); pixDestroy(&pixn); if (pixt && pthresh) { pixGetPixel(pixt, 0, 0, &val); *pthresh = val; } pixDestroy(&pixt); if (!pixd) return (PIX *)ERROR_PTR("pixd not made", procName, NULL); else return pixd; }
int main(int argc, char **argv) { char *infile; l_int32 w, d, threshval, ival, newval; l_uint32 val; PIX *pixs, *pixg, *pixg2; PIX *pix1, *pix2; PIXA *pixa; static char mainName[] = "binarize_set"; if (argc != 2) return ERROR_INT(" Syntax: binarize_set infile", mainName, 1); infile = argv[1]; pixa = pixaCreate(5); pixs = pixRead(infile); pixGetDimensions(pixs, &w, NULL, &d); pixSaveTiled(pixs, pixa, 1.0, 1, 50, 32); pixDisplay(pixs, 100, 0); #if ALL /* 1. Standard background normalization with a global threshold. */ pixg = pixConvertTo8(pixs, 0); pix1 = pixBackgroundNorm(pixg, NULL, NULL, 10, 15, 100, 50, 255, 2, 2); pix2 = pixThresholdToBinary(pix1, 160); pixWrite("/tmp/binar1.png", pix2, IFF_PNG); pixDisplay(pix2, 100, 0); pixSaveTiled(pix2, pixa, 1.0, 1, 50, 32); pixDestroy(&pixg); pixDestroy(&pix1); pixDestroy(&pix2); #endif #if ALL /* 2. Background normalization followed by Otsu thresholding. Otsu * binarization attempts to split the image into two roughly equal * sets of pixels, and it does a very poor job when there are large * amounts of dark background. By doing a background normalization * first (to get the background near 255), we remove this problem. * Then we use a modified Otsu to estimate the best global * threshold on the normalized image. */ pixg = pixConvertTo8(pixs, 0); pix1 = pixOtsuThreshOnBackgroundNorm(pixg, NULL, 10, 15, 100, 50, 255, 2, 2, 0.10, &threshval); fprintf(stderr, "thresh val = %d\n", threshval); pixSaveTiled(pix1, pixa, 1.0, 1, 50, 32); pixWrite("/tmp/binar2.png", pix1, IFF_PNG); pixDisplay(pix1, 100, 200); pixDestroy(&pixg); pixDestroy(&pix1); #endif #if ALL /* 3. Background normalization with Otsu threshold estimation and * masking for threshold selection. */ pixg = pixConvertTo8(pixs, 0); pix1 = pixMaskedThreshOnBackgroundNorm(pixg, NULL, 10, 15, 100, 50, 2, 2, 0.10, &threshval); fprintf(stderr, "thresh val = %d\n", threshval); pixSaveTiled(pix1, pixa, 1.0, 1, 50, 32); pixWrite("/tmp/binar3.png", pix1, IFF_PNG); pixDisplay(pix1, 100, 400); pixDestroy(&pixg); pixDestroy(&pix1); #endif #if ALL /* 4. Background normalization followed by Sauvola binarization */ if (d == 32) pixg = pixConvertRGBToGray(pixs, 0.2, 0.7, 0.1); else pixg = pixConvertTo8(pixs, 0); pixg2 = pixContrastNorm(NULL, pixg, 20, 20, 130, 2, 2); pixSauvolaBinarizeTiled(pixg2, 25, 0.40, 1, 1, NULL, &pix1); pixSaveTiled(pix1, pixa, 1.0, 1, 50, 32); pixWrite("/tmp/binar4.png", pix1, IFF_PNG); pixDisplay(pix1, 100, 600); pixDestroy(&pixg); pixDestroy(&pixg2); pixDestroy(&pix1); #endif #if ALL /* 5. Contrast normalization followed by background normalization, and * thresholding. */ if (d == 32) pixg = pixConvertRGBToGray(pixs, 0.2, 0.7, 0.1); else pixg = pixConvertTo8(pixs, 0); pixOtsuAdaptiveThreshold(pixg, 5000, 5000, 0, 0, 0.1, &pix1, NULL); pixGetPixel(pix1, 0, 0, &val); ival = (l_int32) val; newval = ival + (l_int32)(0.6 * (110 - ival)); fprintf(stderr, "th1 = %d, th2 = %d\n", ival, newval); pixDestroy(&pix1); pixContrastNorm(pixg, pixg, 50, 50, 130, 2, 2); pixg2 = pixBackgroundNorm(pixg, NULL, NULL, 20, 20, 70, 40, 200, 2, 2); ival = L_MIN(ival, 110); pix1 = pixThresholdToBinary(pixg2, ival); pixSaveTiled(pix1, pixa, 1.0, 1, 50, 32); pixWrite("/tmp/binar5.png", pix1, IFF_PNG); pixDisplay(pix1, 100, 800); pixDestroy(&pixg); pixDestroy(&pixg2); pixDestroy(&pix1); #endif pix1 = pixaDisplayTiledInRows(pixa, 32, w + 100, 1.0, 0, 30, 2); pixWrite("/tmp/binar6.png", pix1, IFF_PNG); pixDisplay(pix1, 1000, 0); pixDestroy(&pix1); pixaDestroy(&pixa); pixDestroy(&pixs); return 0; }
/*! * pixMaskedThreshOnBackgroundNorm() * * Input: pixs (8 bpp grayscale; not colormapped) * pixim (<optional> 1 bpp 'image' mask; can be null) * sx, sy (tile size in pixels) * thresh (threshold for determining foreground) * mincount (min threshold on counts in a tile) * smoothx (half-width of block convolution kernel width) * smoothy (half-width of block convolution kernel height) * scorefract (fraction of the max Otsu score; typ. ~ 0.1) * &thresh (<optional return> threshold value that was * used on the normalized image) * Return: pixd (1 bpp thresholded image), or null on error * * Notes: * (1) This begins with a standard background normalization. * Additionally, there is a flexible background norm, that * will adapt to a rapidly varying background, and this * puts white pixels in the background near regions with * significant foreground. The white pixels are turned into * a 1 bpp selection mask by binarization followed by dilation. * Otsu thresholding is performed on the input image to get an * estimate of the threshold in the non-mask regions. * The background normalized image is thresholded with two * different values, and the result is combined using * the selection mask. * (2) Note that the numbers 255 (for bgval target) and 190 (for * thresholding on pixn) are tied together, and explicitly * defined in this function. * (3) See pixBackgroundNorm() for meaning and typical values * of input parameters. For a start, you can try: * sx, sy = 10, 15 * thresh = 100 * mincount = 50 * smoothx, smoothy = 2 */ PIX * pixMaskedThreshOnBackgroundNorm(PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, l_int32 *pthresh) { l_int32 w, h; l_uint32 val; PIX *pixn, *pixm, *pixd, *pixt1, *pixt2, *pixt3, *pixt4; PROCNAME("pixMaskedThreshOnBackgroundNorm"); if (pthresh) *pthresh = 0; if (!pixs || pixGetDepth(pixs) != 8) return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); if (pixGetColormap(pixs)) return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); if (sx < 4 || sy < 4) return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); if (mincount > sx * sy) { L_WARNING("mincount too large for tile size\n", procName); mincount = (sx * sy) / 3; } /* Standard background normalization */ pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, mincount, 255, smoothx, smoothy); if (!pixn) return (PIX *)ERROR_PTR("pixn not made", procName, NULL); /* Special background normalization for adaptation to quickly * varying background. Threshold on the very light parts, * which tend to be near significant edges, and dilate to * form a mask over regions that are typically text. The * dilation size is chosen to cover the text completely, * except for very thick fonts. */ pixt1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20); pixt2 = pixThresholdToBinary(pixt1, 240); pixInvert(pixt2, pixt2); pixm = pixMorphSequence(pixt2, "d21.21", 0); pixDestroy(&pixt1); pixDestroy(&pixt2); /* Use Otsu to get a global threshold estimate for the image, * which is stored as a single pixel in pixt3. */ pixGetDimensions(pixs, &w, &h, NULL); pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pixt3, NULL); if (pixt3 && pthresh) { pixGetPixel(pixt3, 0, 0, &val); *pthresh = val; } pixDestroy(&pixt3); /* Threshold the background normalized images differentially, * using a high value correlated with the background normalization * for the part of the image under the mask (i.e., near the * darker, thicker foreground), and a value that depends on the Otsu * threshold for the rest of the image. This gives a solid * (high) thresholding for the foreground parts of the image, * while allowing the background and light foreground to be * reasonably well cleaned using a threshold adapted to the * input image. */ pixd = pixThresholdToBinary(pixn, val + 30); /* for bg and light fg */ pixt4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */ pixCombineMasked(pixd, pixt4, pixm); pixDestroy(&pixt4); pixDestroy(&pixm); pixDestroy(&pixn); if (!pixd) return (PIX *)ERROR_PTR("pixd not made", procName, NULL); else return pixd; }
int main(int argc, char* argv[]) { PIX *pixs, *pixb, *pixt; int minb; if (argc < 2) { USAGE: fprintf(stderr, "Usage: %s </path/to/text-image>\n" "\t\t[binarize-threshold] [minw:minh:maxw:maxh]\n", strrchr(argv[0], '/') + 1); return EINVAL; } if (2 < argc) { errno = 0; minb = strtol(argv[2], NULL, 10); if (errno < 0) { fprintf(stderr, "strtol: %s\n", strerror(errno)); goto USAGE; } } else minb = 180; if (!(pixs = pixRead(argv[1]))) ; if (1 && (pixt = pixBackgroundNormMorph(pixs, NULL, 4, 5, 248))) { pixDestroy(&pixs); pixs = pixt; } else if (0 && (pixt = pixBackgroundNorm(pixs, NULL, NULL, 10, 15, 60, 40, 248, 2, 1))) { pixDestroy(&pixs); pixs = pixt; } if (1 && (pixt = pixFindSkewAndDeskew(pixs, 1, NULL, NULL))) { pixDestroy(&pixs); pixs = pixt; } if (0 && pixDisplay(pixs, 0, 0)) ; if (1) { PTA *ptas, *ptad; if (!(pixb = pixConvertTo1(pixs, minb))) ; // pixt = pixDeskewLocal(pixs, 10, 0, 0, 0.0, 0.0, 0.0)) if (!pixGetLocalSkewTransform(pixb, 10, 0, 0, 0.0, 0.0, 0.0, &ptas, &ptad)) { if ((pixt = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE))) { pixDestroy(&pixs); pixs = pixt; } ptaDestroy(&ptas); ptaDestroy(&ptad); } pixDestroy(&pixb); } if (0 && (pixt = pixGammaTRC(NULL, pixs, 1.0, 30, 180))) { pixDestroy(&pixs); pixs = pixt; } if (!(pixb = pixConvertTo1(pixs, minb))) ; if (0) { pixDestroy(&pixs); pixs = pixCopy(pixs, pixb); } // XXX: if (1) { BOX* box; int i, n, j, m; PIX *pixi, *pixl; BOXA *boxi, *boxl; int x, y, w, h, wid; int X = INT_MAX, Y = INT_MAX, W = 0, H; // XXX: do smaller(or no) pixOpenBrick if (pixGetRegionsBinary(pixb, &pixi, &pixl, NULL, 0)) ; boxl = pixConnComp(pixl, NULL, 4); n = boxaGetCount(boxl); for (i = 0; i < n; ++i) { BOXA* boxa; box = boxaGetBox(boxl, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w < 30 || h < 30 || w < h || h < (w / 40)) { boxDestroy(&box); continue; boxaRemoveBox(boxl, i); } if (x < X) X = x; if (y < Y) Y = y; if (W < w) W = w; pixt = pixClipRectangle(pixb, box, NULL); boxDestroy(&box); // XXX: for English if (0) pixt = pixDilateBrick(pixt, pixt, h >> 1, h >> 1); else pixt = pixDilateBrick(pixt, pixt, 16 < h ? h >> 4 : 1, h << 1); if (0 && pixDisplay(pixt, 0, 0)) ; boxa = pixConnComp(pixt, NULL, 8); pixDestroy(&pixt); wid = (h * 3) >> 2; //boxaShift(boxa, x, y); m = boxaGetCount(boxa); for (j = 0; j < m; ++j) { int x0, y0, w0; box = boxaGetBox(boxa, j, L_CLONE); boxGetGeometry(box, &x0, &y0, &w0, NULL); // merge adjacent 2 or 3 small boxes if (1 && w0 < wid && (j + 1) < m) { BOX* boxn; int xn, wn; boxn = boxaGetBox(boxa, j + 1, L_CLONE); boxGetGeometry(boxn, &xn, NULL, &wn, NULL); if ((w0 = xn + wn - x0) < h) { boxaSparseClearBox(boxa, ++j); if (w0 < wid && (j + 1) < m) { boxDestroy(&boxn); boxn = boxaGetBox(boxa, j + 1, L_CLONE); boxGetGeometry(boxn, &xn, NULL, &wn, NULL); if ((wn = xn + wn - x0) < h) { boxaSparseClearBox(boxa, ++j); w0 = wn; } } boxSetGeometry(box, -1, -1, w0, -1); } boxDestroy(&boxn); } boxSetGeometry(box, x + x0, y + y0, -1, -1); boxDestroy(&box); } boxaSparseCompact(boxa); if (1 && (pixt = pixDrawBoxa(pixs, boxa, 1, 0xff000000))) { pixDestroy(&pixs); pixs = pixt; } boxaDestroy(&boxa); } H = y + h;