/*! * \brief pixFindStrokeLength() * * \param[in] pixs 1 bpp * \param[in] tab8 [optional] table for counting fg pixels; can be NULL * \param[out] *plength estimated length of the strokes * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) Returns half the number of fg boundary pixels. * </pre> */ l_int32 pixFindStrokeLength(PIX *pixs, l_int32 *tab8, l_int32 *plength) { l_int32 n; l_int32 *tab; PIX *pix1; PROCNAME("pixFindStrokeLength"); if (!plength) return ERROR_INT("&length not defined", procName, 1); *plength = 0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); pix1 = pixExtractBoundary(pixs, 1); tab = (tab8) ? tab8 : makePixelSumTab8(); pixCountPixels(pix1, &n, tab); *plength = n / 2; if (!tab8) LEPT_FREE(tab); pixDestroy(&pix1); return 0; }
/*! * \brief pixaFindStrokeWidth() * * \param[in] pixa of 1 bpp images * \param[in] thresh fractional count threshold relative to distance 1 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL * \param[in] debug 1 for debug output; 0 to skip * \return na array of stroke widths for each pix in %pixa; NULL on error * * <pre> * Notes: * (1) See pixFindStrokeWidth() for details. * </pre> */ NUMA * pixaFindStrokeWidth(PIXA *pixa, l_float32 thresh, l_int32 *tab8, l_int32 debug) { l_int32 i, n, same, maxd; l_int32 *tab; l_float32 width; NUMA *na; PIX *pix; PROCNAME("pixaFindStrokeWidth"); if (!pixa) return (NUMA *)ERROR_PTR("pixa not defined", procName, NULL); pixaVerifyDepth(pixa, &same, &maxd); if (maxd > 1) return (NUMA *)ERROR_PTR("pix not all 1 bpp", procName, NULL); tab = (tab8) ? tab8 : makePixelSumTab8(); n = pixaGetCount(pixa); na = numaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa, i, L_CLONE); pixFindStrokeWidth(pix, thresh, tab8, &width, NULL); numaAddNumber(na, width); pixDestroy(&pix); } if (!tab8) LEPT_FREE(tab); return na; }
/*! * recogCreate() * * Input: scalew (scale all widths to this; use 0 for no scaling) * scaleh (scale all heights to this; use 0 for no scaling) * templ_type (L_USE_AVERAGE or L_USE_ALL) * threshold (for binarization; typically ~128) * maxyshift (from nominal centroid alignment; typically 0 or 1) * Return: recog, or null on error * * Notes: * (1) For a set trained on one font, such as numbers in a book, * it is sensible to set scalew = scaleh = 0. * (2) For a mixed training set, scaling to a fixed height, * such as 32 pixels, but leaving the width unscaled, is effective. * (3) The storage for most of the arrays is allocated when training * is finished. */ L_RECOG * recogCreate(l_int32 scalew, l_int32 scaleh, l_int32 templ_type, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PIXA *pixa; PIXAA *paa; PROCNAME("recogCreate"); if (scalew < 0 || scaleh < 0) return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL); if (templ_type != L_USE_AVERAGE && templ_type != L_USE_ALL) return (L_RECOG *)ERROR_PTR("invalid templ_type flag", procName, NULL); if (threshold < 1 || threshold > 255) return (L_RECOG *)ERROR_PTR("invalid threshold", procName, NULL); if ((recog = (L_RECOG *)CALLOC(1, sizeof(L_RECOG))) == NULL) return (L_RECOG *)ERROR_PTR("rec not made", procName, NULL); recog->templ_type = templ_type; recog->threshold = threshold; recog->scalew = scalew; recog->scaleh = scaleh; recog->maxyshift = maxyshift; recog->asperity_fr = DEFAULT_ASPERITY_FRACT; recogSetPadParams(recog, NULL, NULL, NULL, -1, -1, -1); recog->bmf = bmfCreate(NULL, 6); recog->bmf_size = 6; recog->maxarraysize = MAX_EXAMPLES_IN_CLASS; recog->index = -1; /* Generate the LUTs */ recog->centtab = makePixelCentroidTab8(); recog->sumtab = makePixelSumTab8(); recog->sa_text = sarrayCreate(0); recog->dna_tochar = l_dnaCreate(0); /* Input default values for min component size for splitting. * These are overwritten when pixTrainingFinished() is called. */ recog->min_splitw = 6; recog->min_splith = 6; recog->max_splith = 60; /* Generate the storage for the unscaled training bitmaps */ paa = pixaaCreate(recog->maxarraysize); pixa = pixaCreate(1); pixaaInitFull(paa, pixa); pixaDestroy(&pixa); recog->pixaa_u = paa; /* Generate the storage for debugging */ recog->pixadb_boot = pixaCreate(2); recog->pixadb_split = pixaCreate(2); return recog; }
/*! * pixConnCompAreaTransform() * * Input: pixs (1 bpp) * connect (connectivity: 4 or 8) * Return: pixd (16 bpp), or null on error * * Notes: * (1) The pixel values in pixd label the area of the fg component * to which the pixel belongs. Pixels in the bg are labelled 0. * (2) The pixel values cannot exceed 2^16 - 1, even if the area * of the c.c. is larger. * (3) For purposes of visualization, the output can be converted * to 8 bpp, using pixConvert16To8() or pixMaxDynamicRange(). */ PIX * pixConnCompAreaTransform(PIX *pixs, l_int32 connect) { l_int32 i, n, npix, w, h, xb, yb, wb, hb; l_int32 *tab8; BOXA *boxa; PIX *pix1, *pix2, *pixd; PIXA *pixa; PROCNAME("pixConnCompTransform"); if (!pixs || pixGetDepth(pixs) != 1) return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); if (connect != 4 && connect != 8) return (PIX *)ERROR_PTR("connectivity must be 4 or 8", procName, NULL); boxa = pixConnComp(pixs, &pixa, connect); n = pixaGetCount(pixa); boxaDestroy(&boxa); pixGetDimensions(pixs, &w, &h, NULL); pixd = pixCreate(w, h, 16); if (n == 0) { /* no fg */ pixaDestroy(&pixa); return pixd; } /* Label each component and blit it in */ tab8 = makePixelSumTab8(); for (i = 0; i < n; i++) { pixaGetBoxGeometry(pixa, i, &xb, &yb, &wb, &hb); pix1 = pixaGetPix(pixa, i, L_CLONE); pixCountPixels(pix1, &npix, tab8); npix = L_MIN(npix, 0xffff); pix2 = pixConvert1To16(NULL, pix1, 0, npix); pixRasterop(pixd, xb, yb, wb, hb, PIX_PAINT, pix2, 0, 0); pixDestroy(&pix1); pixDestroy(&pix2); } pixaDestroy(&pixa); FREE(tab8); return pixd; }
/*! * pixGetTextBaseline() * * Input: pixs (1 bpp, one textline character set) * tab8 (<optional> pixel sum table) * &y (<return> baseline value) * Return: 0 if OK, 1 on error * * Notes: * (1) Method: find the largest difference in pixel sums from one * raster line to the next one below it. The baseline is the * upper raster line for the pair of raster lines that * maximizes this function. */ static l_int32 pixGetTextBaseline(PIX *pixs, l_int32 *tab8, l_int32 *py) { l_int32 i, h, val1, val2, diff, diffmax, ymax; l_int32 *tab; NUMA *na; PROCNAME("pixGetTextBaseline"); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (!py) return ERROR_INT("&y not defined", procName, 1); *py = 0; if (!tab8) tab = makePixelSumTab8(); else tab = tab8; na = pixCountPixelsByRow(pixs, tab); h = numaGetCount(na); diffmax = 0; ymax = 0; for (i = 1; i < h; i++) { numaGetIValue(na, i - 1, &val1); numaGetIValue(na, i, &val2); diff = L_MAX(0, val1 - val2); if (diff > diffmax) { diffmax = diff; ymax = i - 1; /* upper raster line */ } } *py = ymax; if (!tab8) FREE(tab); numaDestroy(&na); return 0; }
/*! * \brief recogCreate() * * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~128; 0 for default * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recog, or NULL on error * * <pre> * Notes: * (1) If %scalew == 0 and %scaleh == 0, no scaling is done. * If one of these is 0 and the other is > 0, scaling is isotropic * to the requested size. We typically do not set both > 0. * (2) Use linew > 0 to convert the templates to images with fixed * width strokes. linew == 0 skips the conversion. * (3) The only valid values for %maxyshift are 0, 1 and 2. * It is recommended to use %maxyshift == 1 (default value). * Using %maxyshift == 0 is much faster than %maxyshift == 1, but * it is much less likely to find the template with the best * correlation. Use of anything but 1 results in a warning. * (4) Scaling is used for finding outliers and for training a * book-adapted recognizer (BAR) from a bootstrap recognizer (BSR). * Scaling the height to a fixed value and scaling the width * accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended. * (5) The storage for most of the arrays is allocated when training * is finished. * </pre> */ L_RECOG * recogCreate(l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PROCNAME("recogCreate"); if (scalew < 0 || scaleh < 0) return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL); if (linew > 10) return (L_RECOG *)ERROR_PTR("invalid linew > 10", procName, NULL); if (threshold == 0) threshold = DEFAULT_THRESHOLD; if (threshold < 0 || threshold > 255) { L_WARNING("invalid threshold; using default\n", procName); threshold = DEFAULT_THRESHOLD; } if (maxyshift < 0 || maxyshift > 2) { L_WARNING("invalid maxyshift; using default value\n", procName); maxyshift = DEFAULT_MAXYSHIFT; } else if (maxyshift == 0) { L_WARNING("Using maxyshift = 0; faster, worse correlation results\n", procName); } else if (maxyshift == 2) { L_WARNING("Using maxyshift = 2; slower\n", procName); } if ((recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG))) == NULL) return (L_RECOG *)ERROR_PTR("rec not made", procName, NULL); recog->templ_use = L_USE_ALL_TEMPLATES; /* default */ recog->threshold = threshold; recog->scalew = scalew; recog->scaleh = scaleh; recog->linew = linew; recog->maxyshift = maxyshift; recogSetParams(recog, 1, -1, -1.0, -1.0); recog->bmf = bmfCreate(NULL, 6); recog->bmf_size = 6; recog->maxarraysize = MAX_EXAMPLES_IN_CLASS; /* Generate the LUTs */ recog->centtab = makePixelCentroidTab8(); recog->sumtab = makePixelSumTab8(); recog->sa_text = sarrayCreate(0); recog->dna_tochar = l_dnaCreate(0); /* Input default values for min component size for splitting. * These are overwritten when pixTrainingFinished() is called. */ recog->min_splitw = 6; recog->max_splith = 60; /* Allocate the paa for the unscaled training bitmaps */ recog->pixaa_u = pixaaCreate(recog->maxarraysize); /* Generate the storage for debugging */ recog->pixadb_boot = pixaCreate(2); recog->pixadb_split = pixaCreate(2); return recog; }
/*! * \brief pixFindStrokeWidth() * * \param[in] pixs 1 bpp * \param[in] thresh fractional count threshold relative to distance 1 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL * \param[out] *pwidth estimated width of the strokes * \param[out] *pnahisto [optional] histo of pixel distances from bg * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) This uses two methods to estimate the stroke width: * (a) half the fg boundary length * (b) a value derived from the histogram of the fg distance transform * (2) Distance is measured in 8-connected * (3) %thresh is the minimum fraction N(dist=d)/N(dist=1) of pixels * required to determine if the pixels at distance d are above * the noise. It is typically about 0.15. * </pre> */ l_int32 pixFindStrokeWidth(PIX *pixs, l_float32 thresh, l_int32 *tab8, l_float32 *pwidth, NUMA **pnahisto) { l_int32 i, n, count, length, first, last; l_int32 *tab; l_float32 width1, width2, ratio, extra; l_float32 *fa; NUMA *na1, *na2; PIX *pix1; PROCNAME("pixFindStrokeWidth"); if (!pwidth) return ERROR_INT("&width not defined", procName, 1); *pwidth = 0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); tab = (tab8) ? tab8 : makePixelSumTab8(); /* ------- Method 1: via boundary length ------- */ /* The computed stroke length is a bit larger than that actual * length, because of the addition of the 'caps' at the * stroke ends. Therefore the computed width is a bit * smaller than the average width. */ pixFindStrokeLength(pixs, tab8, &length); pixCountPixels(pixs, &count, tab8); width1 = (l_float32)count / (l_float32)length; /* ------- Method 2: via distance transform ------- */ /* First get the histogram of distances */ pix1 = pixDistanceFunction(pixs, 8, 8, L_BOUNDARY_BG); na1 = pixGetGrayHistogram(pix1, 1); pixDestroy(&pix1); numaGetNonzeroRange(na1, 0.1, &first, &last); na2 = numaClipToInterval(na1, 0, last); numaWriteStream(stderr, na2); /* Find the bucket with the largest distance whose contents * exceed the threshold. */ fa = numaGetFArray(na2, L_NOCOPY); n = numaGetCount(na2); for (i = n - 1; i > 0; i--) { ratio = fa[i] / fa[1]; if (ratio > thresh) break; } /* Let the last skipped bucket contribute to the stop bucket. * This is the 'extra' term below. The result may be a slight * over-correction, so the computed width may be a bit larger * than the average width. */ extra = (i < n - 1) ? fa[i + 1] / fa[1] : 0; width2 = 2.0 * (i - 1.0 + ratio + extra); fprintf(stderr, "width1 = %5.2f, width2 = %5.2f\n", width1, width2); /* Average the two results */ *pwidth = (width1 + width2) / 2.0; if (!tab8) LEPT_FREE(tab); numaDestroy(&na1); if (pnahisto) *pnahisto = na2; else numaDestroy(&na2); return 0; }
l_int32 main(int argc, char **argv) { char buf[512]; l_int32 delx, dely, etransx, etransy, w, h, area1, area2; l_int32 *stab, *ctab; l_float32 cx1, cy1, cx2, cy2, score; PIX *pix0, *pix1, *pix2; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* ------------ Test of pixBestCorrelation() --------------- */ pix0 = pixRead("harmoniam100-11.png"); pix1 = pixConvertTo1(pix0, 160); pixGetDimensions(pix1, &w, &h, NULL); /* Now make a smaller image, translated by (-32, -12) * Except for the resizing, this is equivalent to * pix2 = pixTranslate(NULL, pix1, -32, -12, L_BRING_IN_WHITE); */ pix2 = pixCreate(w - 10, h, 1); pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, 32, 12); /* Get the number of FG pixels and the centroid locations */ stab = makePixelSumTab8(); ctab = makePixelCentroidTab8(); pixCountPixels(pix1, &area1, stab); pixCountPixels(pix2, &area2, stab); pixCentroid(pix1, ctab, stab, &cx1, &cy1); pixCentroid(pix2, ctab, stab, &cx2, &cy2); etransx = lept_roundftoi(cx1 - cx2); etransy = lept_roundftoi(cy1 - cy2); fprintf(stderr, "delta cx = %d, delta cy = %d\n", etransx, etransy); /* Get the best correlation, searching around the translation * where the centroids coincide */ pixBestCorrelation(pix1, pix2, area1, area2, etransx, etransy, 4, stab, &delx, &dely, &score, 5); fprintf(stderr, "delx = %d, dely = %d, score = %7.4f\n", delx, dely, score); regTestCompareValues(rp, 32, delx, 0); /* 0 */ regTestCompareValues(rp, 12, dely, 0); /* 1 */ regTestCheckFile(rp, "/tmp/junkcorrel_5.png"); /* 2 */ lept_rm(NULL, "junkcorrel_5.png"); FREE(stab); FREE(ctab); pixDestroy(&pix0); pixDestroy(&pix1); pixDestroy(&pix2); /* ------------ Test of pixCompareWithTranslation() ------------ */ /* Now use the pyramid to get the result. Do a translation * to remove pixels at the bottom from pix2, so that the * centroids are initially far apart. */ pix1 = pixRead("harmoniam-11.tif"); pix2 = pixTranslate(NULL, pix1, -45, 25, L_BRING_IN_WHITE); l_pdfSetDateAndVersion(0); pixCompareWithTranslation(pix1, pix2, 160, &delx, &dely, &score, 1); pixDestroy(&pix1); pixDestroy(&pix2); fprintf(stderr, "delx = %d, dely = %d\n", delx, dely); regTestCompareValues(rp, 45, delx, 0); /* 3 */ regTestCompareValues(rp, -25, dely, 0); /* 4 */ regTestCheckFile(rp, "/tmp/junkcmp.pdf"); /* 5 */ regTestCheckFile(rp, "/tmp/junkcorrel.pdf"); /* 6 */ return regTestCleanup(rp); }
/*! * pixaGenerateFont() * * Input: pix (of 95 characters in 3 rows) * fontsize (4, 6, 8, ... , 20, in pts at 300 ppi) * &bl1 (<return> baseline of row 1) * &bl2 (<return> baseline of row 2) * &bl3 (<return> baseline of row 3) * Return: pixa of font bitmaps for 95 characters, or null on error * * Notes: * (1) This does all the work. See pixaGenerateFontFromFile() * for an overview. * (2) The pix is for one of the 9 fonts. @fontsize is only * used here for debugging. */ PIXA * pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0, l_int32 *pbl1, l_int32 *pbl2) { l_int32 i, j, nrows, nrowchars, nchars, h, yval; l_int32 width, height; l_int32 baseline[3]; l_int32 *tab = NULL; BOX *box, *box1, *box2; BOXA *boxar, *boxac, *boxacs; PIX *pix1, *pix2, *pixr, *pixrc, *pixc; PIXA *pixa; l_int32 n, w, inrow, top; l_int32 *ia; NUMA *na; PROCNAME("pixaGenerateFont"); if (!pbl0 || !pbl1 || !pbl2) return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); *pbl0 = *pbl1 = *pbl2 = 0; if (!pixs) return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); /* Locate the 3 rows of characters */ w = pixGetWidth(pixs); na = pixCountPixelsByRow(pixs, NULL); boxar = boxaCreate(0); n = numaGetCount(na); ia = numaGetIArray(na); inrow = 0; for (i = 0; i < n; i++) { if (!inrow && ia[i] > 0) { inrow = 1; top = i; } else if (inrow && ia[i] == 0) { inrow = 0; box = boxCreate(0, top, w, i - top); boxaAddBox(boxar, box, L_INSERT); } } FREE(ia); numaDestroy(&na); nrows = boxaGetCount(boxar); #if DEBUG_FONT_GEN L_INFO("For fontsize %s, have %d rows\n", procName, fontsize, nrows); #endif /* DEBUG_FONT_GEN */ if (nrows != 3) { L_INFO("nrows = %d; skipping fontsize %d\n", procName, nrows, fontsize); return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL); } /* Grab the character images and baseline data */ #if DEBUG_BASELINE lept_rmdir("baseline"); lept_mkdir("baseline"); #endif /* DEBUG_BASELINE */ tab = makePixelSumTab8(); pixa = pixaCreate(95); for (i = 0; i < nrows; i++) { box = boxaGetBox(boxar, i, L_CLONE); pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ pixGetTextBaseline(pixr, tab, &yval); baseline[i] = yval; #if DEBUG_BASELINE L_INFO("Baseline info: row %d, yval = %d, h = %d\n", procName, i, yval, pixGetHeight(pixr)); pix1 = pixCopy(NULL, pixr); pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1, L_FLIP_PIXELS); if (i == 0 ) pixWrite("/tmp/baseline/row0.png", pix1, IFF_PNG); else if (i == 1) pixWrite("/tmp/baseline/row1.png", pix1, IFF_PNG); else pixWrite("/tmp/baseline/row2.png", pix1, IFF_PNG); pixDestroy(&pix1); #endif /* DEBUG_BASELINE */ boxDestroy(&box); pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); boxac = pixConnComp(pixrc, NULL, 8); boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); if (i == 0) { /* consolidate the two components of '"' */ box1 = boxaGetBox(boxacs, 1, L_CLONE); box2 = boxaGetBox(boxacs, 2, L_CLONE); box1->w = box2->x + box2->w - box1->x; /* increase width */ boxDestroy(&box1); boxDestroy(&box2); boxaRemoveBox(boxacs, 2); } h = pixGetHeight(pixr); nrowchars = boxaGetCount(boxacs); for (j = 0; j < nrowchars; j++) { box = boxaGetBox(boxacs, j, L_COPY); if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ boxDestroy(&box); continue; } box->y = 0; box->h = h - 1; pixc = pixClipRectangle(pixr, box, NULL); boxDestroy(&box); if (i == 0 && j == 0) /* add a pix for the space; change later */ pixaAddPix(pixa, pixc, L_COPY); if (i == 2 && j == 0) /* add a pix for the '\'; change later */ pixaAddPix(pixa, pixc, L_COPY); pixaAddPix(pixa, pixc, L_INSERT); } pixDestroy(&pixr); pixDestroy(&pixrc); boxaDestroy(&boxac); boxaDestroy(&boxacs); } FREE(tab); nchars = pixaGetCount(pixa); if (nchars != 95) return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL); *pbl0 = baseline[0]; *pbl1 = baseline[1]; *pbl2 = baseline[2]; /* Fix the space character up; it should have no ON pixels, * and be about twice as wide as the '!' character. */ pix1 = pixaGetPix(pixa, 0, L_CLONE); width = 2 * pixGetWidth(pix1); height = pixGetHeight(pix1); pixDestroy(&pix1); pix1 = pixCreate(width, height, 1); pixaReplacePix(pixa, 0, pix1, NULL); /* Fix up the '\' character; use a LR flip of the '/' char */ pix1 = pixaGetPix(pixa, 15, L_CLONE); pix2 = pixFlipLR(NULL, pix1); pixDestroy(&pix1); pixaReplacePix(pixa, 60, pix2, NULL); #if DEBUG_CHARS pix1 = pixaDisplayTiled(pixa, 1500, 0, 10); pixDisplay(pix1, 100 * i, 200); pixDestroy(&pix1); #endif /* DEBUG_CHARS */ boxaDestroy(&boxar); return pixa; }
l_int32 main(int argc, char **argv) { l_int32 delx, dely, etransx, etransy, w, h, area1, area2; l_int32 *stab, *ctab; l_float32 cx1, cy1, cx2, cy2, score, fract; PIX *pix0, *pix1, *pix2, *pix3, *pix4, *pix5; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* ------------ Test of pixBestCorrelation() --------------- */ pix0 = pixRead("harmoniam100-11.png"); pix1 = pixConvertTo1(pix0, 160); pixGetDimensions(pix1, &w, &h, NULL); /* Now make a smaller image, translated by (-32, -12) * Except for the resizing, this is equivalent to * pix2 = pixTranslate(NULL, pix1, -32, -12, L_BRING_IN_WHITE); */ pix2 = pixCreate(w - 10, h, 1); pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, 32, 12); /* Get the number of FG pixels and the centroid locations */ stab = makePixelSumTab8(); ctab = makePixelCentroidTab8(); pixCountPixels(pix1, &area1, stab); pixCountPixels(pix2, &area2, stab); pixCentroid(pix1, ctab, stab, &cx1, &cy1); pixCentroid(pix2, ctab, stab, &cx2, &cy2); etransx = lept_roundftoi(cx1 - cx2); etransy = lept_roundftoi(cy1 - cy2); fprintf(stderr, "delta cx = %d, delta cy = %d\n", etransx, etransy); /* Get the best correlation, searching around the translation * where the centroids coincide */ pixBestCorrelation(pix1, pix2, area1, area2, etransx, etransy, 4, stab, &delx, &dely, &score, 5); fprintf(stderr, "delx = %d, dely = %d, score = %7.4f\n", delx, dely, score); regTestCompareValues(rp, 32, delx, 0); /* 0 */ regTestCompareValues(rp, 12, dely, 0); /* 1 */ lept_mv("/tmp/lept/correl_5.png", "regout", NULL, NULL); regTestCheckFile(rp, "/tmp/regout/correl_5.png"); /* 2 */ lept_free(stab); lept_free(ctab); pixDestroy(&pix0); pixDestroy(&pix1); pixDestroy(&pix2); /* ------------ Test of pixCompareWithTranslation() ------------ */ /* Now use the pyramid to get the result. Do a translation * to remove pixels at the bottom from pix2, so that the * centroids are initially far apart. */ pix1 = pixRead("harmoniam-11.tif"); pix2 = pixTranslate(NULL, pix1, -45, 25, L_BRING_IN_WHITE); l_pdfSetDateAndVersion(0); pixCompareWithTranslation(pix1, pix2, 160, &delx, &dely, &score, 1); pixDestroy(&pix1); pixDestroy(&pix2); fprintf(stderr, "delx = %d, dely = %d\n", delx, dely); regTestCompareValues(rp, 45, delx, 0); /* 3 */ regTestCompareValues(rp, -25, dely, 0); /* 4 */ lept_mv("/tmp/lept/correl.pdf", "regout", NULL, NULL); lept_mv("/tmp/lept/compare.pdf", "regout", NULL, NULL); regTestCheckFile(rp, "/tmp/regout/compare.pdf"); /* 5 */ regTestCheckFile(rp, "/tmp/regout/correl.pdf"); /* 6 */ /* ------------ Test of pixGetPerceptualDiff() --------------- */ pix0 = pixRead("greencover.jpg"); pix1 = pixRead("redcover.jpg"); /* pre-scaled to the same size */ /* Apply directly to the color images */ pixGetPerceptualDiff(pix0, pix1, 1, 3, 20, &fract, &pix2, &pix3); fprintf(stderr, "Fraction of color pixels = %f\n", fract); regTestCompareValues(rp, 0.061252, fract, 0.01); /* 7 */ regTestWritePixAndCheck(rp, pix2, IFF_JFIF_JPEG); /* 8 */ regTestWritePixAndCheck(rp, pix3, IFF_TIFF_G4); /* 9 */ pixDestroy(&pix2); pixDestroy(&pix3); /* Apply to grayscale images */ pix2 = pixConvertTo8(pix0, 0); pix3 = pixConvertTo8(pix1, 0); pixGetPerceptualDiff(pix2, pix3, 1, 3, 20, &fract, &pix4, &pix5); fprintf(stderr, "Fraction of grayscale pixels = %f\n", fract); regTestCompareValues(rp, 0.046928, fract, 0.0002); /* 10 */ regTestWritePixAndCheck(rp, pix4, IFF_JFIF_JPEG); /* 11 */ regTestWritePixAndCheck(rp, pix5, IFF_TIFF_G4); /* 12 */ pixDestroy(&pix0); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); return regTestCleanup(rp); }
/*! * pixaGenerateFont() * * Input: dir (directory holding image of character set) * size (4, 6, 8, ... , 20, in pts at 300 ppi) * &bl1 (<return> baseline of row 1) * &bl2 (<return> baseline of row 2) * &bl3 (<return> baseline of row 3) * Return: pixa of font bitmaps for 95 characters, or null on error * * These font generation functions use 9 sets, each with bitmaps * of 94 ascii characters, all in Palatino-Roman font. * Each input bitmap has 3 rows of characters. The range of * ascii values in each row is as follows: * row 0: 32-57 (32 is a space) * row 1: 58-91 (92, '\', is not represented in this font) * row 2: 93-126 * We LR flip the '/' char to generate a bitmap for the missing * '\' character, so that we have representations of all 95 * printable chars. * * Computation of the bitmaps and baselines for a single * font takes from 40 to 200 msec on a 2 GHz processor, * depending on the size. Use pixaGetFont() to read the * generated character set directly from files that were * produced in prog/genfonts.c using this function. */ PIXA * pixaGenerateFont(const char *dir, l_int32 size, l_int32 *pbl0, l_int32 *pbl1, l_int32 *pbl2) { char *pathname; l_int32 fileno; l_int32 i, j, nrows, nrowchars, nchars, h, yval; l_int32 width, height; l_int32 baseline[3]; l_int32 *tab; BOX *box, *box1, *box2; BOXA *boxar, *boxac, *boxacs; PIX *pixs, *pixt1, *pixt2, *pixt3; PIX *pixr, *pixrc, *pixc; PIXA *pixa; PROCNAME("pixaGenerateFont"); if (!pbl0 || !pbl1 || !pbl2) return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); *pbl0 = *pbl1 = *pbl2 = 0; fileno = (size / 2) - 2; if (fileno < 0 || fileno > NFONTS) return (PIXA *)ERROR_PTR("font size invalid", procName, NULL); tab = makePixelSumTab8(); pathname = genPathname(dir, inputfonts[fileno]); if ((pixs = pixRead(pathname)) == NULL) return (PIXA *)ERROR_PTR("pixs not all defined", procName, NULL); FREE(pathname); pixa = pixaCreate(95); pixt1 = pixMorphSequence(pixs, "c1.35 + c101.1", 0); boxar = pixConnComp(pixt1, NULL, 8); /* one box for each row */ pixDestroy(&pixt1); nrows = boxaGetCount(boxar); #if DEBUG_FONT_GEN fprintf(stderr, "For font %s, number of rows is %d\n", inputfonts[fileno], nrows); #endif /* DEBUG_FONT_GEN */ if (nrows != 3) { L_INFO_INT2("nrows = %d; skipping font %d", procName, nrows, fileno); return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL); } for (i = 0; i < nrows; i++) { box = boxaGetBox(boxar, i, L_CLONE); pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ pixGetTextBaseline(pixr, tab, &yval); baseline[i] = yval; #if DEBUG_BASELINE { PIX *pixbl; fprintf(stderr, "row %d, yval = %d, h = %d\n", i, yval, pixGetHeight(pixr)); pixbl = pixCopy(NULL, pixr); pixRenderLine(pixbl, 0, yval, pixGetWidth(pixbl), yval, 1, L_FLIP_PIXELS); if (i == 0 ) pixWrite("junktl0", pixbl, IFF_PNG); else if (i == 1) pixWrite("junktl1", pixbl, IFF_PNG); else pixWrite("junktl2", pixbl, IFF_PNG); pixDestroy(&pixbl); } #endif /* DEBUG_BASELINE */ boxDestroy(&box); pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); boxac = pixConnComp(pixrc, NULL, 8); boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); if (i == 0) { /* consolidate the two components of '"' */ box1 = boxaGetBox(boxacs, 1, L_CLONE); box2 = boxaGetBox(boxacs, 2, L_CLONE); box1->w = box2->x + box2->w - box1->x; /* increase width */ boxDestroy(&box1); boxDestroy(&box2); boxaRemoveBox(boxacs, 2); } h = pixGetHeight(pixr); nrowchars = boxaGetCount(boxacs); for (j = 0; j < nrowchars; j++) { box = boxaGetBox(boxacs, j, L_COPY); if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ boxDestroy(&box); continue; } box->y = 0; box->h = h - 1; pixc = pixClipRectangle(pixr, box, NULL); boxDestroy(&box); if (i == 0 && j == 0) /* add a pix for the space; change later */ pixaAddPix(pixa, pixc, L_COPY); if (i == 2 && j == 0) /* add a pix for the '\'; change later */ pixaAddPix(pixa, pixc, L_COPY); pixaAddPix(pixa, pixc, L_INSERT); } pixDestroy(&pixr); pixDestroy(&pixrc); boxaDestroy(&boxac); boxaDestroy(&boxacs); } nchars = pixaGetCount(pixa); if (nchars != 95) return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL); *pbl0 = baseline[0]; *pbl1 = baseline[1]; *pbl2 = baseline[2]; /* Fix the space character up; it should have no ON pixels, * and be about twice as wide as the '!' character. */ pixt2 = pixaGetPix(pixa, 0, L_CLONE); width = 2 * pixGetWidth(pixt2); height = pixGetHeight(pixt2); pixDestroy(&pixt2); pixt2 = pixCreate(width, height, 1); pixaReplacePix(pixa, 0, pixt2, NULL); /* Fix up the '\' character; use a LR flip of the '/' char */ pixt2 = pixaGetPix(pixa, 15, L_CLONE); pixt3 = pixFlipLR(NULL, pixt2); pixDestroy(&pixt2); pixaReplacePix(pixa, 60, pixt3, NULL); #if DEBUG_CHARS { PIX *pixd; pixd = pixaDisplayTiled(pixa, 1500, 0, 10); pixDisplay(pixd, 100 * i, 200); pixDestroy(&pixd); } #endif /* DEBUG_CHARS */ pixDestroy(&pixs); boxaDestroy(&boxar); FREE(tab); return pixa; }