Пример #1
0
/*!
 *  jbCorrelation()
 *
 *       Input:  dirin (directory of input images)
 *               thresh (typically ~0.8)
 *               weight (typically ~0.6)
 *               components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
 *               rootname (for output files)
 *               firstpage (0-based)
 *               npages (use 0 for all pages in dirin)
 *               renderflag (1 to render from templates; 0 to skip)
 *       Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The images must be 1 bpp.  If they are not, you can convert
 *          them using convertFilesTo1bpp().
 *      (2) See prog/jbcorrelation for generating more output (e.g.,
 *          for debugging)
 */
l_int32
jbCorrelation(const char *dirin,
              l_float32 thresh,
              l_float32 weight,
              l_int32 components,
              const char *rootname,
              l_int32 firstpage,
              l_int32 npages,
              l_int32 renderflag) {
    char filename[L_BUF_SIZE];
    l_int32 nfiles, i, numpages;
    JBDATA *data;
    JBCLASSER *classer;
    PIX *pix;
    PIXA *pixa;
    SARRAY *safiles;

    PROCNAME("jbCorrelation");

    if (!dirin)
        return ERROR_INT("dirin not defined", procName, 1);
    if (!rootname)
        return ERROR_INT("rootname not defined", procName, 1);
    if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
        components != JB_WORDS)
        return ERROR_INT("components invalid", procName, 1);

    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);

    /* Classify components */
    classer = jbCorrelationInit(components, 0, 0, thresh, weight);
    jbAddPages(classer, safiles);

    /* Save data */
    data = jbDataSave(classer);
    jbDataWrite(rootname, data);

    /* Optionally, render pages using class templates */
    if (renderflag) {
        pixa = jbDataRender(data, FALSE);
        numpages = pixaGetCount(pixa);
        if (numpages != nfiles)
            fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
                    numpages, nfiles);
        for (i = 0; i < numpages; i++) {
            pix = pixaGetPix(pixa, i, L_CLONE);
            snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
            fprintf(stderr, "filename: %s\n", filename);
            pixWrite(filename, pix, IFF_PNG);
            pixDestroy(&pix);
        }
        pixaDestroy(&pixa);
    }

    sarrayDestroy(&safiles);
    jbClasserDestroy(&classer);
    jbDataDestroy(&data);
    return 0;
}
Пример #2
0
main(int    argc,
     char **argv)
{
char         filename[BUF_SIZE];
char        *dirin, *rootname, *fname;
l_int32      reduction, i, firstpage, npages, nfiles;
l_float32    thresh, weight;
JBDATA      *data;
JBCLASSER   *classer;
NUMA        *natl;
SARRAY      *safiles;
PIX         *pix;
PIXA        *pixa, *pixadb;
static char  mainName[] = "jbwords";

    if (argc != 6 && argc != 8)
	exit(ERROR_INT(
 " Syntax: jbwords dirin reduction thresh weight rootname [firstpage, npages]",
	     mainName, 1));

    dirin = argv[1];
    reduction = atoi(argv[2]);
    thresh = atof(argv[3]);
    weight = atof(argv[4]);
    rootname = argv[5];

    if (argc == 6) {
        firstpage = 0;
	npages = 0;
    }
    else {
        firstpage = atoi(argv[6]);
        npages = atoi(argv[7]);
    }

    classer = jbWordsInTextlines(dirin, reduction, MAX_WORD_WIDTH,
                                 MAX_WORD_HEIGHT, thresh, weight,
                                 &natl, firstpage, npages);

        /* Save and write out the result */
    data = jbDataSave(classer);
    jbDataWrite(rootname, data);

#if  RENDER_PAGES
        /* Render the pages from the classifier data, and write to file.
	 * Use debugflag == FALSE to omit outlines of each component. */
    pixa = jbDataRender(data, FALSE);
    npages = pixaGetCount(pixa);
    for (i = 0; i < npages; i++) {
        pix = pixaGetPix(pixa, i, L_CLONE);
	snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
	fprintf(stderr, "filename: %s\n", filename);
	pixWrite(filename, pix, IFF_PNG);
	pixDestroy(&pix);
    }
    pixaDestroy(&pixa);
#endif  /* RENDER_PAGES */

#if  RENDER_DEBUG
	/* Use debugflag == TRUE to see outlines of each component. */
    pixadb = jbDataRender(data, TRUE);
        /* Write the debug pages out */
    npages = pixaGetCount(pixadb);
    for (i = 0; i < npages; i++) {
        pix = pixaGetPix(pixadb, i, L_CLONE);
	snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i);
	fprintf(stderr, "filename: %s\n", filename);
	pixWrite(filename, pix, IFF_PNG);
	pixDestroy(&pix);
    }
    pixaDestroy(&pixadb);
#endif  /* RENDER_DEBUG */

    jbClasserDestroy(&classer);
    jbDataDestroy(&data);
    numaDestroy(&natl);

    return 0;
}
Пример #3
0
int main(int    argc,
         char **argv)
{
char         filename[BUF_SIZE];
char        *dirin, *rootname, *fname;
l_int32      i, firstpage, npages, nfiles;
l_float32    thresh, weight;
JBDATA      *data;
JBCLASSER   *classer;
SARRAY      *safiles;
PIX         *pix, *pixt;
PIXA        *pixa, *pixadb;
static char  mainName[] = "jbcorrelation";

    if (argc != 5 && argc != 7)
	return ERROR_INT(" Syntax: jbcorrelation dirin thresh weight "
                         "rootname [firstpage, npages]", mainName, 1);

    dirin = argv[1];
    thresh = atof(argv[2]);
    weight = atof(argv[3]);
    rootname = argv[4];

    if (argc == 5) {
        firstpage = 0;
	npages = 0;
    }
    else {
        firstpage = atoi(argv[5]);
        npages = atoi(argv[6]);
    }

#if 0

    /*--------------------------------------------------------------*/

    jbCorrelation(dirin, thresh, weight, COMPONENTS, rootname,
                  firstpage, npages, 1);

    /*--------------------------------------------------------------*/

#else

    /*--------------------------------------------------------------*/

    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);

    sarrayWriteStream(stderr, safiles);

        /* Classify components on requested pages */
    startTimer();
    classer = jbCorrelationInit(COMPONENTS, 0, 0, thresh, weight);
    jbAddPages(classer, safiles);
    fprintf(stderr, "Time to generate classes: %6.3f sec\n", stopTimer());

        /* Save and write out the result */
    data = jbDataSave(classer);
    jbDataWrite(rootname, data);
    fprintf(stderr, "Number of classes: %d\n", classer->nclass);

        /* Render the pages from the classifier data.
	 * Use debugflag == FALSE to omit outlines of each component. */
    pixa = jbDataRender(data, FALSE);

        /* Write the pages out */
    npages = pixaGetCount(pixa);
    if (npages != nfiles)
        fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n",
	        npages, nfiles);
    for (i = 0; i < npages; i++) {
        pix = pixaGetPix(pixa, i, L_CLONE);
	snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
	fprintf(stderr, "filename: %s\n", filename);
	pixWrite(filename, pix, IFF_PNG);
	pixDestroy(&pix);
    }

#if  DISPLAY_DIFFERENCE
    fname = sarrayGetString(safiles, 0, 0);
    pixt = pixRead(fname);
    pix = pixaGetPix(pixa, 0, L_CLONE);
    pixXor(pixt, pixt, pix);
    pixWrite("junk_output_diff", pixt, IFF_PNG);
    pixDestroy(&pix);
    pixDestroy(&pixt);
#endif  /* DISPLAY_DIFFERENCE */

#if  DEBUG_TEST_DATA_IO
{ JBDATA  *newdata;
  PIX     *newpix;
  PIXA    *newpixa;
  l_int32  same, iofail;
        /* Read the data back in and render the pages */
    newdata = jbDataRead(rootname);
    newpixa = jbDataRender(newdata, FALSE);
    iofail = FALSE;
    for (i = 0; i < npages; i++) {
        pix = pixaGetPix(pixa, i, L_CLONE);
        newpix = pixaGetPix(newpixa, i, L_CLONE);
	pixEqual(pix, newpix, &same);
	if (!same) {
	    iofail = TRUE;
	    fprintf(stderr, "pix on page %d are unequal!\n", i);
	}
	pixDestroy(&pix);
	pixDestroy(&newpix);

    }
    if (iofail)
	fprintf(stderr, "read/write for jbdata fails\n");
    else
	fprintf(stderr, "read/write for jbdata succeeds\n");
    jbDataDestroy(&newdata);
    pixaDestroy(&newpixa);
}
#endif  /* DEBUG_TEST_DATA_IO */

#if  RENDER_DEBUG
	/* Use debugflag == TRUE to see outlines of each component. */
    pixadb = jbDataRender(data, TRUE);
        /* Write the debug pages out */
    npages = pixaGetCount(pixadb);
    for (i = 0; i < npages; i++) {
        pix = pixaGetPix(pixadb, i, L_CLONE);
	snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i);
	fprintf(stderr, "filename: %s\n", filename);
	pixWrite(filename, pix, IFF_PNG);
	pixDestroy(&pix);
    }
    pixaDestroy(&pixadb);
#endif  /* RENDER_DEBUG */

#if  DISPLAY_ALL_INSTANCES
	/* display all instances, organized by template */
    pix = pixaaDisplayByPixa(classer->pixaa,
                             X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH);
    pixWrite("output_instances", pix, IFF_PNG);
    pixDestroy(&pix);
#endif  /* DISPLAY_ALL_INSTANCES */

    pixaDestroy(&pixa);
    sarrayDestroy(&safiles);
    jbClasserDestroy(&classer);
    jbDataDestroy(&data);

    /*--------------------------------------------------------------*/

#endif

    return 0;
}