/*! * jbCorrelation() * * Input: dirin (directory of input images) * thresh (typically ~0.8) * weight (typically ~0.6) * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * rootname (for output files) * firstpage (0-based) * npages (use 0 for all pages in dirin) * renderflag (1 to render from templates; 0 to skip) * Return: 0 if OK, 1 on error * * Notes: * (1) The images must be 1 bpp. If they are not, you can convert * them using convertFilesTo1bpp(). * (2) See prog/jbcorrelation for generating more output (e.g., * for debugging) */ l_int32 jbCorrelation(const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag) { char filename[L_BUF_SIZE]; l_int32 nfiles, i, numpages; JBDATA *data; JBCLASSER *classer; PIX *pix; PIXA *pixa; SARRAY *safiles; PROCNAME("jbCorrelation"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!rootname) return ERROR_INT("rootname not defined", procName, 1); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return ERROR_INT("components invalid", procName, 1); safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); /* Classify components */ classer = jbCorrelationInit(components, 0, 0, thresh, weight); jbAddPages(classer, safiles); /* Save data */ data = jbDataSave(classer); jbDataWrite(rootname, data); /* Optionally, render pages using class templates */ if (renderflag) { pixa = jbDataRender(data, FALSE); numpages = pixaGetCount(pixa); if (numpages != nfiles) fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n", numpages, nfiles); for (i = 0; i < numpages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixa); } sarrayDestroy(&safiles); jbClasserDestroy(&classer); jbDataDestroy(&data); return 0; }
JBDATA * classify_components (int num_input_files, char **input_files, double thresh, double weight) { /* JBCLASSER* classer = jbCorrelationInit(JB_CONN_COMPS, 9999, 9999, thresh, weight); */ JBCLASSER *classer = jbCorrelationInitWithoutComponents (JB_CONN_COMPS, 9999, 9999, thresh, weight); if (classer == NULL) { error_quit ("Unable to create leptonica JBCLASSER."); } int i; for (i = 0; i < num_input_files; i++) { PIX *page = pixRead (input_files[i]); if (page == NULL) { printf ("Problem with page %s\n", input_files[i]); error_quit ("Unable to read Page"); } if (pixGetDepth (page) != 1) { printf ("Input file %s is not 1bpp\n", input_files[i]); error_quit ("Only 1bpp (black and white) images currently supported."); } if (jbAddPage (classer, page) == 1) { printf ("Problem with page %s\n", input_files[i]); error_quit ("Unable to add page to JBCLASSIFIER."); } pixDestroy (&page); } /* This is the part we have to add stuff to save each glyph separately */ JBDATA *data = jbDataSave (classer); if (data == NULL) { error_quit ("Unable to create the leptonica JBDATA."); } jbClasserDestroy (&classer); return data; }
main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 reduction, i, firstpage, npages, nfiles; l_float32 thresh, weight; JBDATA *data; JBCLASSER *classer; NUMA *natl; SARRAY *safiles; PIX *pix; PIXA *pixa, *pixadb; static char mainName[] = "jbwords"; if (argc != 6 && argc != 8) exit(ERROR_INT( " Syntax: jbwords dirin reduction thresh weight rootname [firstpage, npages]", mainName, 1)); dirin = argv[1]; reduction = atoi(argv[2]); thresh = atof(argv[3]); weight = atof(argv[4]); rootname = argv[5]; if (argc == 6) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[6]); npages = atoi(argv[7]); } classer = jbWordsInTextlines(dirin, reduction, MAX_WORD_WIDTH, MAX_WORD_HEIGHT, thresh, weight, &natl, firstpage, npages); /* Save and write out the result */ data = jbDataSave(classer); jbDataWrite(rootname, data); #if RENDER_PAGES /* Render the pages from the classifier data, and write to file. * Use debugflag == FALSE to omit outlines of each component. */ pixa = jbDataRender(data, FALSE); npages = pixaGetCount(pixa); for (i = 0; i < npages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixa); #endif /* RENDER_PAGES */ #if RENDER_DEBUG /* Use debugflag == TRUE to see outlines of each component. */ pixadb = jbDataRender(data, TRUE); /* Write the debug pages out */ npages = pixaGetCount(pixadb); for (i = 0; i < npages; i++) { pix = pixaGetPix(pixadb, i, L_CLONE); snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixadb); #endif /* RENDER_DEBUG */ jbClasserDestroy(&classer); jbDataDestroy(&data); numaDestroy(&natl); return 0; }
int main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 i, firstpage, npages, nfiles; l_float32 thresh, weight; JBDATA *data; JBCLASSER *classer; SARRAY *safiles; PIX *pix, *pixt; PIXA *pixa, *pixadb; static char mainName[] = "jbcorrelation"; if (argc != 5 && argc != 7) return ERROR_INT(" Syntax: jbcorrelation dirin thresh weight " "rootname [firstpage, npages]", mainName, 1); dirin = argv[1]; thresh = atof(argv[2]); weight = atof(argv[3]); rootname = argv[4]; if (argc == 5) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[5]); npages = atoi(argv[6]); } #if 0 /*--------------------------------------------------------------*/ jbCorrelation(dirin, thresh, weight, COMPONENTS, rootname, firstpage, npages, 1); /*--------------------------------------------------------------*/ #else /*--------------------------------------------------------------*/ safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); sarrayWriteStream(stderr, safiles); /* Classify components on requested pages */ startTimer(); classer = jbCorrelationInit(COMPONENTS, 0, 0, thresh, weight); jbAddPages(classer, safiles); fprintf(stderr, "Time to generate classes: %6.3f sec\n", stopTimer()); /* Save and write out the result */ data = jbDataSave(classer); jbDataWrite(rootname, data); fprintf(stderr, "Number of classes: %d\n", classer->nclass); /* Render the pages from the classifier data. * Use debugflag == FALSE to omit outlines of each component. */ pixa = jbDataRender(data, FALSE); /* Write the pages out */ npages = pixaGetCount(pixa); if (npages != nfiles) fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n", npages, nfiles); for (i = 0; i < npages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } #if DISPLAY_DIFFERENCE fname = sarrayGetString(safiles, 0, 0); pixt = pixRead(fname); pix = pixaGetPix(pixa, 0, L_CLONE); pixXor(pixt, pixt, pix); pixWrite("junk_output_diff", pixt, IFF_PNG); pixDestroy(&pix); pixDestroy(&pixt); #endif /* DISPLAY_DIFFERENCE */ #if DEBUG_TEST_DATA_IO { JBDATA *newdata; PIX *newpix; PIXA *newpixa; l_int32 same, iofail; /* Read the data back in and render the pages */ newdata = jbDataRead(rootname); newpixa = jbDataRender(newdata, FALSE); iofail = FALSE; for (i = 0; i < npages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); newpix = pixaGetPix(newpixa, i, L_CLONE); pixEqual(pix, newpix, &same); if (!same) { iofail = TRUE; fprintf(stderr, "pix on page %d are unequal!\n", i); } pixDestroy(&pix); pixDestroy(&newpix); } if (iofail) fprintf(stderr, "read/write for jbdata fails\n"); else fprintf(stderr, "read/write for jbdata succeeds\n"); jbDataDestroy(&newdata); pixaDestroy(&newpixa); } #endif /* DEBUG_TEST_DATA_IO */ #if RENDER_DEBUG /* Use debugflag == TRUE to see outlines of each component. */ pixadb = jbDataRender(data, TRUE); /* Write the debug pages out */ npages = pixaGetCount(pixadb); for (i = 0; i < npages; i++) { pix = pixaGetPix(pixadb, i, L_CLONE); snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixadb); #endif /* RENDER_DEBUG */ #if DISPLAY_ALL_INSTANCES /* display all instances, organized by template */ pix = pixaaDisplayByPixa(classer->pixaa, X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH); pixWrite("output_instances", pix, IFF_PNG); pixDestroy(&pix); #endif /* DISPLAY_ALL_INSTANCES */ pixaDestroy(&pixa); sarrayDestroy(&safiles); jbClasserDestroy(&classer); jbDataDestroy(&data); /*--------------------------------------------------------------*/ #endif return 0; }