/*! * recogaWritePixaa() * * Input: filename * recoga * Return: 0 if OK, 1 on error * * Notes: * (1) For each recognizer, this generates a pixa of all the * unscaled images. They are combined into a pixaa for * the set of recognizers. Each pix has has its character * string in the pix text field. * (2) As a side-effect, the character class label is written * into each pix in recog. */ l_int32 recogaWritePixaa(const char *filename, L_RECOGA *recoga) { l_int32 i; PIXA *pixa; PIXAA *paa; L_RECOG *recog; PROCNAME("recogaWritePixaa"); if (!filename) return ERROR_INT("filename not defined", procName, 1); if (!recoga) return ERROR_INT("recoga not defined", procName, 1); paa = pixaaCreate(recoga->n); for (i = 0; i < recoga->n; i++) { recog = recogaGetRecog(recoga, i); recogAddCharstrLabels(recog); pixa = pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); pixaaAddPixa(paa, pixa, L_INSERT); } pixaaWrite(filename, paa); pixaaDestroy(&paa); return 0; }
/*! * \brief recogExtractPixa() * * \param[in] recog * \return pixa if OK, NULL on error * * <pre> * Notes: * (1) This generates a pixa of all the unscaled images in the * recognizer, where each one has its character class label in * the pix text field, by flattening pixaa_u to a pixa. * </pre> */ PIXA * recogExtractPixa(L_RECOG *recog) { PROCNAME("recogExtractPixa"); if (!recog) return (PIXA *)ERROR_PTR("recog not defined", procName, NULL); recogAddCharstrLabels(recog); return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); }
/*! * \brief pixGetWordsInTextlines() * * \param[in] pixs 1 bpp, typ. 75 - 150 ppi * \param[in] minwidth, minheight of saved components; smaller are discarded * \param[in] maxwidth, maxheight of saved components; larger are discarded * \param[out] pboxad word boxes sorted in textline line order * \param[out] ppixad word images sorted in textline line order * \param[out] pnai index of textline for each word * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) The input should be at a resolution of between 75 and 150 ppi. * (2) The four size constraints on saved components are all * scaled by %reduction. * (3) The result are word images (and their b.b.), extracted in * textline order, at either full res or 2x reduction, * and with a numa giving the textline index for each word. * (4) The pixa and boxa interfaces should make this type of * application simple to put together. The steps are: * ~ generate first estimate of word masks * ~ get b.b. of these, and remove the small and big ones * ~ extract pixa of the word images, using the b.b. * ~ sort actual word images in textline order (2d) * ~ flatten them to a pixa (1d), saving the textline index * for each pix * (5) In an actual application, it may be desirable to pre-filter * the input image to remove large components, to extract * single columns of text, and to deskew them. For example, * to remove both large components and small noisy components * that can interfere with the statistics used to estimate * parameters for segmenting by words, but still retain text lines, * the following image preprocessing can be done: * Pix *pixt = pixMorphSequence(pixs, "c40.1", 0); * Pix *pixf = pixSelectBySize(pixt, 0, 60, 8, * L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL); * pixAnd(pixf, pixf, pixs); // the filtered image * The closing turns text lines into long blobs, but does not * significantly increase their height. But if there are many * small connected components in a dense texture, this is likely * to generate tall components that will be eliminated in pixf. * </pre> */ l_int32 pixGetWordsInTextlines(PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai) { BOXA *boxa1, *boxad; BOXAA *baa; NUMA *nai; NUMAA *naa; PIXA *pixa1, *pixad; PIXAA *paa; PROCNAME("pixGetWordsInTextlines"); if (!pboxad || !ppixad || !pnai) return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1); *pboxad = NULL; *ppixad = NULL; *pnai = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Get the bounding boxes of the words from the word mask. */ pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight, &boxa1, NULL, NULL); /* Generate a pixa of the word images */ pixa1 = pixaCreateFromBoxa(pixs, boxa1, NULL); /* mask over each word */ /* Sort the bounding boxes of these words by line. We use the * index mapping to allow identical sorting of the pixa. */ baa = boxaSort2d(boxa1, &naa, -1, -1, 4); paa = pixaSort2dByIndex(pixa1, naa, L_CLONE); /* Flatten the word paa */ pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE); boxad = pixaGetBoxa(pixad, L_COPY); *pnai = nai; *pboxad = boxad; *ppixad = pixad; pixaDestroy(&pixa1); boxaDestroy(&boxa1); boxaaDestroy(&baa); pixaaDestroy(&paa); numaaDestroy(&naa); return 0; }
/*! * recogWritePixa() * * Input: filename * recog * Return: 0 if OK, 1 on error * * Notes: * (1) This generates a pixa of all the unscaled images in the * recognizer, where each one has its character string in * the pix text field, by flattening pixaa_u to a pixa. * (2) As a side-effect, the character class label is written * into each pix in recog. */ l_int32 recogWritePixa(const char *filename, L_RECOG *recog) { PIXA *pixa; PROCNAME("recogWritePixa"); if (!filename) return ERROR_INT("filename not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); recogAddCharstrLabels(recog); pixa = pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); pixaWrite(filename, pixa); pixaDestroy(&pixa); return 0; }
/*! * recogCreateFromRecog() * * Input: recs (source recog with arbitrary input parameters) * scalew (scale all widths to this; use 0 for no scaling) * scaleh (scale all heights to this; use 0 for no scaling) * templ_type (L_USE_AVERAGE or L_USE_ALL) * threshold (for binarization; typically ~128) * maxyshift (from nominal centroid alignment; typically 0 or 1) * Return: recd, or null on error * * Notes: * (1) This is a convenience function that generates a recog using * the unscaled training data in an existing recog. */ L_RECOG * recogCreateFromRecog(L_RECOG *recs, l_int32 scalew, l_int32 scaleh, l_int32 templ_type, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recd; PIXA *pixa; PROCNAME("recogCreateFromRecog"); if (!recs) return (L_RECOG *)ERROR_PTR("recs not defined", procName, NULL); pixa = pixaaFlattenToPixa(recs->pixaa_u, NULL, L_CLONE); recd = recogCreateFromPixa(pixa, scalew, scaleh, templ_type, threshold, maxyshift); pixaDestroy(&pixa); return recd; }
/*! * \brief pixGetWordsInTextlines() * * \param[in] pixs 1 bpp, typ. 300 ppi * \param[in] reduction 1 for input res; 2 for 2x reduction of input res * \param[in] minwidth, minheight of saved components; smaller are discarded * \param[in] maxwidth, maxheight of saved components; larger are discarded * \param[out] pboxad word boxes sorted in textline line order * \param[out] ppixad word images sorted in textline line order * \param[out] pnai index of textline for each word * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) The input should be at a resolution of about 300 ppi. * The word masks and word images can be computed at either * 150 ppi or 300 ppi. For the former, set reduction = 2. * (2) The four size constraints on saved components are all * scaled by %reduction. * (3) The result are word images (and their b.b.), extracted in * textline order, at either full res or 2x reduction, * and with a numa giving the textline index for each word. * (4) The pixa and boxa interfaces should make this type of * application simple to put together. The steps are: * ~ optionally reduce by 2x * ~ generate first estimate of word masks * ~ get b.b. of these, and remove the small and big ones * ~ extract pixa of the word images, using the b.b. * ~ sort actual word images in textline order (2d) * ~ flatten them to a pixa (1d), saving the textline index * for each pix * (5) In an actual application, it may be desirable to pre-filter * the input image to remove large components, to extract * single columns of text, and to deskew them. For example, * to remove both large components and small noisy components * that can interfere with the statistics used to estimate * parameters for segmenting by words, but still retain text lines, * the following image preprocessing can be done: * Pix *pixt = pixMorphSequence(pixs, "c40.1", 0); * Pix *pixf = pixSelectBySize(pixt, 0, 60, 8, * L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL); * pixAnd(pixf, pixf, pixs); // the filtered image * The closing turns text lines into long blobs, but does not * significantly increase their height. But if there are many * small connected components in a dense texture, this is likely * to generate tall components that will be eliminated in pixf. * </pre> */ l_int32 pixGetWordsInTextlines(PIX *pixs, l_int32 reduction, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai) { l_int32 maxdil; BOXA *boxa1, *boxad; BOXAA *baa; NUMA *nai; NUMAA *naa; PIXA *pixa1, *pixad; PIX *pix1; PIXAA *paa; PROCNAME("pixGetWordsInTextlines"); if (!pboxad || !ppixad || !pnai) return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1); *pboxad = NULL; *ppixad = NULL; *pnai = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (reduction != 1 && reduction != 2) return ERROR_INT("reduction not in {1,2}", procName, 1); if (reduction == 1) { pix1 = pixClone(pixs); maxdil = 18; } else { /* reduction == 2 */ pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); maxdil = 9; } /* Get the bounding boxes of the words from the word mask. */ pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight, maxwidth, maxheight, &boxa1, NULL); /* Generate a pixa of the word images */ pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL); /* mask over each word */ /* Sort the bounding boxes of these words by line. We use the * index mapping to allow identical sorting of the pixa. */ baa = boxaSort2d(boxa1, &naa, -1, -1, 4); paa = pixaSort2dByIndex(pixa1, naa, L_CLONE); /* Flatten the word paa */ pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE); boxad = pixaGetBoxa(pixad, L_COPY); *pnai = nai; *pboxad = boxad; *ppixad = pixad; pixDestroy(&pix1); pixaDestroy(&pixa1); boxaDestroy(&boxa1); boxaaDestroy(&baa); pixaaDestroy(&paa); numaaDestroy(&naa); return 0; }