/*! * \brief recogCreateFromPixa() * * \param[in] pixa of labeled, 1 bpp images * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~150 * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recog, or NULL on error * * <pre> * Notes: * (1) This is a convenience function for training from labeled data. * The pixa can be read from file. * (2) The pixa should contain the unscaled bitmaps used for training. * (3) See recogCreate() for use of %scalew, %scaleh and %linew. * (4) It is recommended to use %maxyshift = 1 (the default value) * (5) All examples in the same class (i.e., with the same character * label) should be similar. They can be made similar by invoking * recogRemoveOutliers[1,2]() on %pixa before calling this function. * </pre> */ L_RECOG * recogCreateFromPixa(PIXA *pixa, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PROCNAME("recogCreateFromPixa"); if (!pixa) return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL); recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew, threshold, maxyshift); if (!recog) return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); recogTrainingFinished(&recog, 1, -1, -1.0); if (!recog) return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL); return recog; }
/*! * recogAddAllSamples() * * Input: recog * paa (pixaa from previously trained recog) * debug * Return: 0 if OK, 1 on error * * Notes: * (1) This is used with the serialization routine recogRead(), * where each pixa in the pixaa represents a set of characters * in a different class. Two different pixa may represent * characters with the same label. Before calling this * function, we verify that the number of character classes, * given by the setsize field in recog, equals the number of * pixa in the paa. The character labels for each set are * in the sa_text field. */ static l_int32 recogAddAllSamples(L_RECOG *recog, PIXAA *paa, l_int32 debug) { char *text; l_int32 i, j, nc, ns; PIX *pix; PIXA *pixa; PROCNAME("recogAddAllSamples"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!paa) return ERROR_INT("paa not defined", procName, 1); nc = pixaaGetCount(paa, NULL); for (i = 0; i < nc; i++) { pixa = pixaaGetPixa(paa, i, L_CLONE); ns = pixaGetCount(pixa); text = sarrayGetString(recog->sa_text, i, L_NOCOPY); for (j = 0; j < ns; j++) { pix = pixaGetPix(pixa, j, L_CLONE); if (debug) { fprintf(stderr, "pix[%d,%d]: text = %s\n", i, j, text); } pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT); } pixaDestroy(&pixa); } recogTrainingFinished(recog, debug); return 0; }
/*! * recogCreateFromPixa() * * Input: pixa (of labelled, 1 bpp images) * scalew (scale all widths to this; use 0 for no scaling) * scaleh (scale all heights to this; use 0 for no scaling) * templ_type (L_USE_AVERAGE or L_USE_ALL) * threshold (for binarization; typically ~128) * maxyshift (from nominal centroid alignment; typically 0 or 1) * fontdir (<optional> directory for bitmap fonts for debugging) * Return: recog, or null on error * * Notes: * (1) This is a convenience function for training from labelled data. * The pixa can be read from file. * (2) The pixa should contain the unscaled bitmaps used for training. * (3) The characters here should work as a single "font", because * each image example is put into a class defined by its * character label. All examples in the same class should be * similar. */ L_RECOG * recogCreateFromPixa(PIXA *pixa, l_int32 scalew, l_int32 scaleh, l_int32 templ_type, l_int32 threshold, l_int32 maxyshift, const char *fontdir) { char *text; l_int32 full, n, i, ntext; L_RECOG *recog; PIX *pix; PROCNAME("recogCreateFromPixa"); if (!pixa) return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL); if (pixaVerifyDepth(pixa, NULL) != 1) return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", procName, NULL); pixaIsFull(pixa, &full, NULL); if (!full) return (L_RECOG *)ERROR_PTR("not all pix are present", procName, NULL); n = pixaGetCount(pixa); pixaCountText(pixa, &ntext); if (ntext == 0) return (L_RECOG *)ERROR_PTR("no pix have text strings", procName, NULL); if (ntext < n) L_ERROR("%d text strings < %d pix\n", procName, ntext, n); recog = recogCreate(scalew, scaleh, templ_type, threshold, maxyshift, fontdir); if (!recog) return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa, i, L_CLONE); text = pixGetText(pix); if (!text || strlen(text) == 0) { L_ERROR("pix[%d] has no text\n", procName, i); pixDestroy(&pix); continue; } recogTrainLabelled(recog, pix, NULL, text, 0, 0); pixDestroy(&pix); } recogTrainingFinished(recog, 0); return recog; }
/*! * \brief recogAddAllSamples() * * \param[in] precog addr of recog * \param[in] paa pixaa from previously trained recog * \param[in] debug * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) On error, the input recog is destroyed. * (2) This is used with the serialization routine recogRead(), * where each pixa in the pixaa represents a set of characters * in a different class. Before calling this function, we have * verified that the number of character classes, given by the * setsize field in %recog, equals the number of pixa in the paa. * The character labels for each set are in the sa_text field. * </pre> */ static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug) { char *text; l_int32 i, j, nc, ns; PIX *pix; PIXA *pixa, *pixa1; L_RECOG *recog; PROCNAME("recogAddAllSamples"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); if (!paa) { recogDestroy(&recog); return ERROR_INT("paa not defined", procName, 1); } nc = pixaaGetCount(paa, NULL); for (i = 0; i < nc; i++) { pixa = pixaaGetPixa(paa, i, L_CLONE); ns = pixaGetCount(pixa); text = sarrayGetString(recog->sa_text, i, L_NOCOPY); pixa1 = pixaCreate(ns); pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT); for (j = 0; j < ns; j++) { pix = pixaGetPix(pixa, j, L_CLONE); if (debug) fprintf(stderr, "pix[%d,%d]: text = %s\n", i, j, text); pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT); } pixaDestroy(&pixa); } recogTrainingFinished(&recog, 0, -1, -1.0); /* For second parameter, see comment in recogRead() */ if (!recog) return ERROR_INT("bad templates; recog destroyed", procName, 1); return 0; }
PIXA *MakeBootnum2(void) { char *fname; l_int32 i, n, w, h; BOX *box; PIX *pix; PIXA *pixa; L_RECOG *recog; SARRAY *sa; /* Phase 1: generate recog from the digit data */ recog = recogCreate(20, 32, L_USE_ALL, 120, 1); sa = getSortedPathnamesInDirectory("recog/bootnums", "png", 0, 0); n = sarrayGetCount(sa); for (i = 0; i < n; i++) { /* Read each pix: grayscale, multi-character, labelled */ fname = sarrayGetString(sa, i, L_NOCOPY); if ((pix = pixRead(fname)) == NULL) { fprintf(stderr, "Can't read %s\n", fname); continue; } /* Convert to a set of 1 bpp, single character, labelled */ pixGetDimensions(pix, &w, &h, NULL); box = boxCreate(0, 0, w, h); recogTrainLabelled(recog, pix, box, NULL, 1, 0); pixDestroy(&pix); boxDestroy(&box); } recogTrainingFinished(recog, 1); sarrayDestroy(&sa); /* Phase 2: generate pixa consisting of 1 bpp, single character pix */ recogWritePixa("/tmp/lept/recog/digits/bootnum2.pa", recog); pixa = pixaRead("/tmp/lept/recog/digits/bootnum2.pa"); recogDestroy(&recog); return pixa; }