/*!
 *  recogCreate()
 *
 *      Input:  scalew  (scale all widths to this; use 0 for no scaling)
 *              scaleh  (scale all heights to this; use 0 for no scaling)
 *              templ_type (L_USE_AVERAGE or L_USE_ALL)
 *              threshold (for binarization; typically ~128)
 *              maxyshift (from nominal centroid alignment; typically 0 or 1)
 *      Return: recog, or null on error
 *
 *  Notes:
 *      (1) For a set trained on one font, such as numbers in a book,
 *          it is sensible to set scalew = scaleh = 0.
 *      (2) For a mixed training set, scaling to a fixed height,
 *          such as 32 pixels, but leaving the width unscaled, is effective.
 *      (3) The storage for most of the arrays is allocated when training
 *          is finished.
 */
L_RECOG *
recogCreate(l_int32      scalew,
            l_int32      scaleh,
            l_int32      templ_type,
            l_int32      threshold,
            l_int32      maxyshift)
{
L_RECOG  *recog;
PIXA     *pixa;
PIXAA    *paa;

    PROCNAME("recogCreate");

    if (scalew < 0 || scaleh < 0)
        return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL);
    if (templ_type != L_USE_AVERAGE && templ_type != L_USE_ALL)
        return (L_RECOG *)ERROR_PTR("invalid templ_type flag", procName, NULL);
    if (threshold < 1 || threshold > 255)
        return (L_RECOG *)ERROR_PTR("invalid threshold", procName, NULL);

    if ((recog = (L_RECOG *)CALLOC(1, sizeof(L_RECOG))) == NULL)
        return (L_RECOG *)ERROR_PTR("rec not made", procName, NULL);
    recog->templ_type = templ_type;
    recog->threshold = threshold;
    recog->scalew = scalew;
    recog->scaleh = scaleh;
    recog->maxyshift = maxyshift;
    recog->asperity_fr = DEFAULT_ASPERITY_FRACT;
    recogSetPadParams(recog, NULL, NULL, NULL, -1, -1, -1);
    recog->bmf = bmfCreate(NULL, 6);
    recog->bmf_size = 6;
    recog->maxarraysize = MAX_EXAMPLES_IN_CLASS;
    recog->index = -1;

        /* Generate the LUTs */
    recog->centtab = makePixelCentroidTab8();
    recog->sumtab = makePixelSumTab8();
    recog->sa_text = sarrayCreate(0);
    recog->dna_tochar = l_dnaCreate(0);

        /* Input default values for min component size for splitting.
         * These are overwritten when pixTrainingFinished() is called. */
    recog->min_splitw = 6;
    recog->min_splith = 6;
    recog->max_splith = 60;

        /* Generate the storage for the unscaled training bitmaps */
    paa = pixaaCreate(recog->maxarraysize);
    pixa = pixaCreate(1);
    pixaaInitFull(paa, pixa);
    pixaDestroy(&pixa);
    recog->pixaa_u = paa;

        /* Generate the storage for debugging */
    recog->pixadb_boot = pixaCreate(2);
    recog->pixadb_split = pixaCreate(2);
    return recog;
}
Exemple #2
0
/*!
 * \brief   recogCreate()
 *
 * \param[in]    scalew  scale all widths to this; use 0 otherwise
 * \param[in]    scaleh  scale all heights to this; use 0 otherwise
 * \param[in]    linew   width of normalized strokes; use 0 to skip
 * \param[in]    threshold for binarization; typically ~128; 0 for default
 * \param[in]    maxyshift from nominal centroid alignment; default is 1
 * \return  recog, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) If %scalew == 0 and %scaleh == 0, no scaling is done.
 *          If one of these is 0 and the other is > 0, scaling is isotropic
 *          to the requested size.  We typically do not set both > 0.
 *      (2) Use linew > 0 to convert the templates to images with fixed
 *          width strokes.  linew == 0 skips the conversion.
 *      (3) The only valid values for %maxyshift are 0, 1 and 2.
 *          It is recommended to use %maxyshift == 1 (default value).
 *          Using %maxyshift == 0 is much faster than %maxyshift == 1, but
 *          it is much less likely to find the template with the best
 *          correlation.  Use of anything but 1 results in a warning.
 *      (4) Scaling is used for finding outliers and for training a
 *          book-adapted recognizer (BAR) from a bootstrap recognizer (BSR).
 *          Scaling the height to a fixed value and scaling the width
 *          accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended.
 *      (5) The storage for most of the arrays is allocated when training
 *          is finished.
 * </pre>
 */
L_RECOG *
recogCreate(l_int32      scalew,
            l_int32      scaleh,
            l_int32      linew,
            l_int32      threshold,
            l_int32      maxyshift)
{
L_RECOG  *recog;

    PROCNAME("recogCreate");

    if (scalew < 0 || scaleh < 0)
        return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL);
    if (linew > 10)
        return (L_RECOG *)ERROR_PTR("invalid linew > 10", procName, NULL);
    if (threshold == 0) threshold = DEFAULT_THRESHOLD;
    if (threshold < 0 || threshold > 255) {
        L_WARNING("invalid threshold; using default\n", procName);
        threshold = DEFAULT_THRESHOLD;
    }
    if (maxyshift < 0 || maxyshift > 2) {
         L_WARNING("invalid maxyshift; using default value\n", procName);
         maxyshift = DEFAULT_MAXYSHIFT;
    } else if (maxyshift == 0) {
         L_WARNING("Using maxyshift = 0; faster, worse correlation results\n",
                   procName);
    } else if (maxyshift == 2) {
         L_WARNING("Using maxyshift = 2; slower\n", procName);
    }

    if ((recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG))) == NULL)
        return (L_RECOG *)ERROR_PTR("rec not made", procName, NULL);
    recog->templ_use = L_USE_ALL_TEMPLATES;  /* default */
    recog->threshold = threshold;
    recog->scalew = scalew;
    recog->scaleh = scaleh;
    recog->linew = linew;
    recog->maxyshift = maxyshift;
    recogSetParams(recog, 1, -1, -1.0, -1.0);
    recog->bmf = bmfCreate(NULL, 6);
    recog->bmf_size = 6;
    recog->maxarraysize = MAX_EXAMPLES_IN_CLASS;

        /* Generate the LUTs */
    recog->centtab = makePixelCentroidTab8();
    recog->sumtab = makePixelSumTab8();
    recog->sa_text = sarrayCreate(0);
    recog->dna_tochar = l_dnaCreate(0);

        /* Input default values for min component size for splitting.
         * These are overwritten when pixTrainingFinished() is called. */
    recog->min_splitw = 6;
    recog->max_splith = 60;

        /* Allocate the paa for the unscaled training bitmaps */
    recog->pixaa_u = pixaaCreate(recog->maxarraysize);

        /* Generate the storage for debugging */
    recog->pixadb_boot = pixaCreate(2);
    recog->pixadb_split = pixaCreate(2);
    return recog;
}
Exemple #3
0
l_int32 main(int    argc,
             char **argv)
{
char          buf[512];
l_int32       delx, dely, etransx, etransy, w, h, area1, area2;
l_int32      *stab, *ctab;
l_float32     cx1, cy1, cx2, cy2, score;
PIX          *pix0, *pix1, *pix2;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;


    /* ------------ Test of pixBestCorrelation() --------------- */
    pix0 = pixRead("harmoniam100-11.png");
    pix1 = pixConvertTo1(pix0, 160);
    pixGetDimensions(pix1, &w, &h, NULL);

        /* Now make a smaller image, translated by (-32, -12)
         * Except for the resizing, this is equivalent to
         *     pix2 = pixTranslate(NULL, pix1, -32, -12, L_BRING_IN_WHITE);  */
    pix2 = pixCreate(w - 10, h, 1);
    pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, 32, 12);

        /* Get the number of FG pixels and the centroid locations */
    stab = makePixelSumTab8();
    ctab = makePixelCentroidTab8();
    pixCountPixels(pix1, &area1, stab);
    pixCountPixels(pix2, &area2, stab);
    pixCentroid(pix1, ctab, stab, &cx1, &cy1);
    pixCentroid(pix2, ctab, stab, &cx2, &cy2);
    etransx = lept_roundftoi(cx1 - cx2);
    etransy = lept_roundftoi(cy1 - cy2);
    fprintf(stderr, "delta cx = %d, delta cy = %d\n",
            etransx, etransy);

        /* Get the best correlation, searching around the translation
         * where the centroids coincide */
    pixBestCorrelation(pix1, pix2, area1, area2, etransx, etransy,
                       4, stab, &delx, &dely, &score, 5);
    fprintf(stderr, "delx = %d, dely = %d, score = %7.4f\n",
            delx, dely, score);
    regTestCompareValues(rp, 32, delx, 0);   /* 0 */
    regTestCompareValues(rp, 12, dely, 0);   /* 1 */
    regTestCheckFile(rp, "/tmp/junkcorrel_5.png");   /* 2 */
    lept_rm(NULL, "junkcorrel_5.png");
    FREE(stab);
    FREE(ctab);
    pixDestroy(&pix0);
    pixDestroy(&pix1);
    pixDestroy(&pix2);


    /* ------------ Test of pixCompareWithTranslation() ------------ */
        /* Now use the pyramid to get the result.  Do a translation
         * to remove pixels at the bottom from pix2, so that the
         * centroids are initially far apart. */
    pix1 = pixRead("harmoniam-11.tif");
    pix2 = pixTranslate(NULL, pix1, -45, 25, L_BRING_IN_WHITE);
    l_pdfSetDateAndVersion(0);
    pixCompareWithTranslation(pix1, pix2, 160, &delx, &dely, &score, 1);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    fprintf(stderr, "delx = %d, dely = %d\n", delx, dely);
    regTestCompareValues(rp, 45, delx, 0);   /* 3 */
    regTestCompareValues(rp, -25, dely, 0);   /* 4 */
    regTestCheckFile(rp, "/tmp/junkcmp.pdf");   /* 5 */
    regTestCheckFile(rp, "/tmp/junkcorrel.pdf");  /* 6 */

    return regTestCleanup(rp);
}
l_int32 main(int    argc,
             char **argv)
{
l_int32       delx, dely, etransx, etransy, w, h, area1, area2;
l_int32      *stab, *ctab;
l_float32     cx1, cy1, cx2, cy2, score, fract;
PIX          *pix0, *pix1, *pix2, *pix3, *pix4, *pix5;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;


    /* ------------ Test of pixBestCorrelation() --------------- */
    pix0 = pixRead("harmoniam100-11.png");
    pix1 = pixConvertTo1(pix0, 160);
    pixGetDimensions(pix1, &w, &h, NULL);

        /* Now make a smaller image, translated by (-32, -12)
         * Except for the resizing, this is equivalent to
         *     pix2 = pixTranslate(NULL, pix1, -32, -12, L_BRING_IN_WHITE);  */
    pix2 = pixCreate(w - 10, h, 1);
    pixRasterop(pix2, 0, 0, w, h, PIX_SRC, pix1, 32, 12);

        /* Get the number of FG pixels and the centroid locations */
    stab = makePixelSumTab8();
    ctab = makePixelCentroidTab8();
    pixCountPixels(pix1, &area1, stab);
    pixCountPixels(pix2, &area2, stab);
    pixCentroid(pix1, ctab, stab, &cx1, &cy1);
    pixCentroid(pix2, ctab, stab, &cx2, &cy2);
    etransx = lept_roundftoi(cx1 - cx2);
    etransy = lept_roundftoi(cy1 - cy2);
    fprintf(stderr, "delta cx = %d, delta cy = %d\n",
            etransx, etransy);

        /* Get the best correlation, searching around the translation
         * where the centroids coincide */
    pixBestCorrelation(pix1, pix2, area1, area2, etransx, etransy,
                       4, stab, &delx, &dely, &score, 5);
    fprintf(stderr, "delx = %d, dely = %d, score = %7.4f\n",
            delx, dely, score);
    regTestCompareValues(rp, 32, delx, 0);   /* 0 */
    regTestCompareValues(rp, 12, dely, 0);   /* 1 */
    lept_mv("/tmp/lept/correl_5.png", "regout", NULL, NULL);
    regTestCheckFile(rp, "/tmp/regout/correl_5.png");   /* 2 */
    lept_free(stab);
    lept_free(ctab);
    pixDestroy(&pix0);
    pixDestroy(&pix1);
    pixDestroy(&pix2);


    /* ------------ Test of pixCompareWithTranslation() ------------ */
        /* Now use the pyramid to get the result.  Do a translation
         * to remove pixels at the bottom from pix2, so that the
         * centroids are initially far apart. */
    pix1 = pixRead("harmoniam-11.tif");
    pix2 = pixTranslate(NULL, pix1, -45, 25, L_BRING_IN_WHITE);
    l_pdfSetDateAndVersion(0);
    pixCompareWithTranslation(pix1, pix2, 160, &delx, &dely, &score, 1);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    fprintf(stderr, "delx = %d, dely = %d\n", delx, dely);
    regTestCompareValues(rp, 45, delx, 0);   /* 3 */
    regTestCompareValues(rp, -25, dely, 0);   /* 4 */
    lept_mv("/tmp/lept/correl.pdf", "regout", NULL, NULL);
    lept_mv("/tmp/lept/compare.pdf", "regout", NULL, NULL);
    regTestCheckFile(rp, "/tmp/regout/compare.pdf");   /* 5 */
    regTestCheckFile(rp, "/tmp/regout/correl.pdf");  /* 6 */

    /* ------------ Test of pixGetPerceptualDiff() --------------- */
    pix0 = pixRead("greencover.jpg");
    pix1 = pixRead("redcover.jpg");  /* pre-scaled to the same size */
        /* Apply directly to the color images */
    pixGetPerceptualDiff(pix0, pix1, 1, 3, 20, &fract, &pix2, &pix3);
    fprintf(stderr, "Fraction of color pixels = %f\n", fract);
    regTestCompareValues(rp, 0.061252, fract, 0.01);  /* 7 */
    regTestWritePixAndCheck(rp, pix2, IFF_JFIF_JPEG);  /* 8 */
    regTestWritePixAndCheck(rp, pix3, IFF_TIFF_G4);  /* 9 */
    pixDestroy(&pix2);
    pixDestroy(&pix3);
        /* Apply to grayscale images */
    pix2 = pixConvertTo8(pix0, 0);
    pix3 = pixConvertTo8(pix1, 0);
    pixGetPerceptualDiff(pix2, pix3, 1, 3, 20, &fract, &pix4, &pix5);
    fprintf(stderr, "Fraction of grayscale pixels = %f\n", fract);
    regTestCompareValues(rp, 0.046928, fract, 0.0002);  /* 10 */
    regTestWritePixAndCheck(rp, pix4, IFF_JFIF_JPEG);  /* 11 */
    regTestWritePixAndCheck(rp, pix5, IFF_TIFF_G4);  /* 12 */
    pixDestroy(&pix0);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);

    return regTestCleanup(rp);
}