/*! * recogCreate() * * Input: scalew (scale all widths to this; use 0 for no scaling) * scaleh (scale all heights to this; use 0 for no scaling) * templ_type (L_USE_AVERAGE or L_USE_ALL) * threshold (for binarization; typically ~128) * maxyshift (from nominal centroid alignment; typically 0 or 1) * Return: recog, or null on error * * Notes: * (1) For a set trained on one font, such as numbers in a book, * it is sensible to set scalew = scaleh = 0. * (2) For a mixed training set, scaling to a fixed height, * such as 32 pixels, but leaving the width unscaled, is effective. * (3) The storage for most of the arrays is allocated when training * is finished. */ L_RECOG * recogCreate(l_int32 scalew, l_int32 scaleh, l_int32 templ_type, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PIXA *pixa; PIXAA *paa; PROCNAME("recogCreate"); if (scalew < 0 || scaleh < 0) return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL); if (templ_type != L_USE_AVERAGE && templ_type != L_USE_ALL) return (L_RECOG *)ERROR_PTR("invalid templ_type flag", procName, NULL); if (threshold < 1 || threshold > 255) return (L_RECOG *)ERROR_PTR("invalid threshold", procName, NULL); if ((recog = (L_RECOG *)CALLOC(1, sizeof(L_RECOG))) == NULL) return (L_RECOG *)ERROR_PTR("rec not made", procName, NULL); recog->templ_type = templ_type; recog->threshold = threshold; recog->scalew = scalew; recog->scaleh = scaleh; recog->maxyshift = maxyshift; recog->asperity_fr = DEFAULT_ASPERITY_FRACT; recogSetPadParams(recog, NULL, NULL, NULL, -1, -1, -1); recog->bmf = bmfCreate(NULL, 6); recog->bmf_size = 6; recog->maxarraysize = MAX_EXAMPLES_IN_CLASS; recog->index = -1; /* Generate the LUTs */ recog->centtab = makePixelCentroidTab8(); recog->sumtab = makePixelSumTab8(); recog->sa_text = sarrayCreate(0); recog->dna_tochar = l_dnaCreate(0); /* Input default values for min component size for splitting. * These are overwritten when pixTrainingFinished() is called. */ recog->min_splitw = 6; recog->min_splith = 6; recog->max_splith = 60; /* Generate the storage for the unscaled training bitmaps */ paa = pixaaCreate(recog->maxarraysize); pixa = pixaCreate(1); pixaaInitFull(paa, pixa); pixaDestroy(&pixa); recog->pixaa_u = paa; /* Generate the storage for debugging */ recog->pixadb_boot = pixaCreate(2); recog->pixadb_split = pixaCreate(2); return recog; }
/*! * \brief recogCreate() * * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~128; 0 for default * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recog, or NULL on error * * <pre> * Notes: * (1) If %scalew == 0 and %scaleh == 0, no scaling is done. * If one of these is 0 and the other is > 0, scaling is isotropic * to the requested size. We typically do not set both > 0. * (2) Use linew > 0 to convert the templates to images with fixed * width strokes. linew == 0 skips the conversion. * (3) The only valid values for %maxyshift are 0, 1 and 2. * It is recommended to use %maxyshift == 1 (default value). * Using %maxyshift == 0 is much faster than %maxyshift == 1, but * it is much less likely to find the template with the best * correlation. Use of anything but 1 results in a warning. * (4) Scaling is used for finding outliers and for training a * book-adapted recognizer (BAR) from a bootstrap recognizer (BSR). * Scaling the height to a fixed value and scaling the width * accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended. * (5) The storage for most of the arrays is allocated when training * is finished. * </pre> */ L_RECOG * recogCreate(l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PROCNAME("recogCreate"); if (scalew < 0 || scaleh < 0) return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL); if (linew > 10) return (L_RECOG *)ERROR_PTR("invalid linew > 10", procName, NULL); if (threshold == 0) threshold = DEFAULT_THRESHOLD; if (threshold < 0 || threshold > 255) { L_WARNING("invalid threshold; using default\n", procName); threshold = DEFAULT_THRESHOLD; } if (maxyshift < 0 || maxyshift > 2) { L_WARNING("invalid maxyshift; using default value\n", procName); maxyshift = DEFAULT_MAXYSHIFT; } else if (maxyshift == 0) { L_WARNING("Using maxyshift = 0; faster, worse correlation results\n", procName); } else if (maxyshift == 2) { L_WARNING("Using maxyshift = 2; slower\n", procName); } if ((recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG))) == NULL) return (L_RECOG *)ERROR_PTR("rec not made", procName, NULL); recog->templ_use = L_USE_ALL_TEMPLATES; /* default */ recog->threshold = threshold; recog->scalew = scalew; recog->scaleh = scaleh; recog->linew = linew; recog->maxyshift = maxyshift; recogSetParams(recog, 1, -1, -1.0, -1.0); recog->bmf = bmfCreate(NULL, 6); recog->bmf_size = 6; recog->maxarraysize = MAX_EXAMPLES_IN_CLASS; /* Generate the LUTs */ recog->centtab = makePixelCentroidTab8(); recog->sumtab = makePixelSumTab8(); recog->sa_text = sarrayCreate(0); recog->dna_tochar = l_dnaCreate(0); /* Input default values for min component size for splitting. * These are overwritten when pixTrainingFinished() is called. */ recog->min_splitw = 6; recog->max_splith = 60; /* Allocate the paa for the unscaled training bitmaps */ recog->pixaa_u = pixaaCreate(recog->maxarraysize); /* Generate the storage for debugging */ recog->pixadb_boot = pixaCreate(2); recog->pixadb_split = pixaCreate(2); return recog; }
l_int32 main(int argc, char **argv) { char buf[512]; l_int32 delx, dely, etransx, etransy, w, h, area1, area2; l_int32 *stab, *ctab; l_float32 cx1, cy1, cx2, cy2, score; PIX *pix0, *pix1, *pix2; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* ------------ Test of pixBestCorrelation() --------------- */ pix0 = pixRead("harmoniam100-11.png"); pix1 = pixConvertTo1(pix0, 160); pixGetDimensions(pix1, &w, &h, NULL); /* Now make a smaller image, translated by (-32, -12) * Except for the resizing, this is equivalent to * pix2 = pixTranslate(NULL, pix1, -32, -12, L_BRING_IN_WHITE); */ pix2 = pixCreate(w - 10, h, 1); pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, 32, 12); /* Get the number of FG pixels and the centroid locations */ stab = makePixelSumTab8(); ctab = makePixelCentroidTab8(); pixCountPixels(pix1, &area1, stab); pixCountPixels(pix2, &area2, stab); pixCentroid(pix1, ctab, stab, &cx1, &cy1); pixCentroid(pix2, ctab, stab, &cx2, &cy2); etransx = lept_roundftoi(cx1 - cx2); etransy = lept_roundftoi(cy1 - cy2); fprintf(stderr, "delta cx = %d, delta cy = %d\n", etransx, etransy); /* Get the best correlation, searching around the translation * where the centroids coincide */ pixBestCorrelation(pix1, pix2, area1, area2, etransx, etransy, 4, stab, &delx, &dely, &score, 5); fprintf(stderr, "delx = %d, dely = %d, score = %7.4f\n", delx, dely, score); regTestCompareValues(rp, 32, delx, 0); /* 0 */ regTestCompareValues(rp, 12, dely, 0); /* 1 */ regTestCheckFile(rp, "/tmp/junkcorrel_5.png"); /* 2 */ lept_rm(NULL, "junkcorrel_5.png"); FREE(stab); FREE(ctab); pixDestroy(&pix0); pixDestroy(&pix1); pixDestroy(&pix2); /* ------------ Test of pixCompareWithTranslation() ------------ */ /* Now use the pyramid to get the result. Do a translation * to remove pixels at the bottom from pix2, so that the * centroids are initially far apart. */ pix1 = pixRead("harmoniam-11.tif"); pix2 = pixTranslate(NULL, pix1, -45, 25, L_BRING_IN_WHITE); l_pdfSetDateAndVersion(0); pixCompareWithTranslation(pix1, pix2, 160, &delx, &dely, &score, 1); pixDestroy(&pix1); pixDestroy(&pix2); fprintf(stderr, "delx = %d, dely = %d\n", delx, dely); regTestCompareValues(rp, 45, delx, 0); /* 3 */ regTestCompareValues(rp, -25, dely, 0); /* 4 */ regTestCheckFile(rp, "/tmp/junkcmp.pdf"); /* 5 */ regTestCheckFile(rp, "/tmp/junkcorrel.pdf"); /* 6 */ return regTestCleanup(rp); }
l_int32 main(int argc, char **argv) { l_int32 delx, dely, etransx, etransy, w, h, area1, area2; l_int32 *stab, *ctab; l_float32 cx1, cy1, cx2, cy2, score, fract; PIX *pix0, *pix1, *pix2, *pix3, *pix4, *pix5; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* ------------ Test of pixBestCorrelation() --------------- */ pix0 = pixRead("harmoniam100-11.png"); pix1 = pixConvertTo1(pix0, 160); pixGetDimensions(pix1, &w, &h, NULL); /* Now make a smaller image, translated by (-32, -12) * Except for the resizing, this is equivalent to * pix2 = pixTranslate(NULL, pix1, -32, -12, L_BRING_IN_WHITE); */ pix2 = pixCreate(w - 10, h, 1); pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, 32, 12); /* Get the number of FG pixels and the centroid locations */ stab = makePixelSumTab8(); ctab = makePixelCentroidTab8(); pixCountPixels(pix1, &area1, stab); pixCountPixels(pix2, &area2, stab); pixCentroid(pix1, ctab, stab, &cx1, &cy1); pixCentroid(pix2, ctab, stab, &cx2, &cy2); etransx = lept_roundftoi(cx1 - cx2); etransy = lept_roundftoi(cy1 - cy2); fprintf(stderr, "delta cx = %d, delta cy = %d\n", etransx, etransy); /* Get the best correlation, searching around the translation * where the centroids coincide */ pixBestCorrelation(pix1, pix2, area1, area2, etransx, etransy, 4, stab, &delx, &dely, &score, 5); fprintf(stderr, "delx = %d, dely = %d, score = %7.4f\n", delx, dely, score); regTestCompareValues(rp, 32, delx, 0); /* 0 */ regTestCompareValues(rp, 12, dely, 0); /* 1 */ lept_mv("/tmp/lept/correl_5.png", "regout", NULL, NULL); regTestCheckFile(rp, "/tmp/regout/correl_5.png"); /* 2 */ lept_free(stab); lept_free(ctab); pixDestroy(&pix0); pixDestroy(&pix1); pixDestroy(&pix2); /* ------------ Test of pixCompareWithTranslation() ------------ */ /* Now use the pyramid to get the result. Do a translation * to remove pixels at the bottom from pix2, so that the * centroids are initially far apart. */ pix1 = pixRead("harmoniam-11.tif"); pix2 = pixTranslate(NULL, pix1, -45, 25, L_BRING_IN_WHITE); l_pdfSetDateAndVersion(0); pixCompareWithTranslation(pix1, pix2, 160, &delx, &dely, &score, 1); pixDestroy(&pix1); pixDestroy(&pix2); fprintf(stderr, "delx = %d, dely = %d\n", delx, dely); regTestCompareValues(rp, 45, delx, 0); /* 3 */ regTestCompareValues(rp, -25, dely, 0); /* 4 */ lept_mv("/tmp/lept/correl.pdf", "regout", NULL, NULL); lept_mv("/tmp/lept/compare.pdf", "regout", NULL, NULL); regTestCheckFile(rp, "/tmp/regout/compare.pdf"); /* 5 */ regTestCheckFile(rp, "/tmp/regout/correl.pdf"); /* 6 */ /* ------------ Test of pixGetPerceptualDiff() --------------- */ pix0 = pixRead("greencover.jpg"); pix1 = pixRead("redcover.jpg"); /* pre-scaled to the same size */ /* Apply directly to the color images */ pixGetPerceptualDiff(pix0, pix1, 1, 3, 20, &fract, &pix2, &pix3); fprintf(stderr, "Fraction of color pixels = %f\n", fract); regTestCompareValues(rp, 0.061252, fract, 0.01); /* 7 */ regTestWritePixAndCheck(rp, pix2, IFF_JFIF_JPEG); /* 8 */ regTestWritePixAndCheck(rp, pix3, IFF_TIFF_G4); /* 9 */ pixDestroy(&pix2); pixDestroy(&pix3); /* Apply to grayscale images */ pix2 = pixConvertTo8(pix0, 0); pix3 = pixConvertTo8(pix1, 0); pixGetPerceptualDiff(pix2, pix3, 1, 3, 20, &fract, &pix4, &pix5); fprintf(stderr, "Fraction of grayscale pixels = %f\n", fract); regTestCompareValues(rp, 0.046928, fract, 0.0002); /* 10 */ regTestWritePixAndCheck(rp, pix4, IFF_JFIF_JPEG); /* 11 */ regTestWritePixAndCheck(rp, pix5, IFF_TIFF_G4); /* 12 */ pixDestroy(&pix0); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); return regTestCleanup(rp); }