/* * convertFilesToPS() * * Input: dirin (input directory) * substr (<optional> substring filter on filenames; can be NULL) * res (typ. 300 or 600 ppi) * fileout (output ps file) * Return: 0 if OK, 1 on error * * Notes: * (1) This generates a PS file for all image files in a specified * directory that contain the substr pattern to be matched. * (2) Each image is written to a separate page in the output PS file. * (3) All images are written compressed: * * if tiffg4 --> use ccittg4 * * if jpeg --> use dct * * all others --> use flate * If the image is jpeg or tiffg4, we use the existing compressed * strings for the encoding; otherwise, we read the image into * a pix and flate-encode the pieces. * (4) The resolution is often confusing. It is interpreted * as the resolution of the output display device: "If the * input image were digitized at 300 ppi, what would it * look like when displayed at res ppi." So, for example, * if res = 100 ppi, then the display pixels are 3x larger * than the 300 ppi pixels, and the image will be rendered * 3x larger. * (5) The size of the PostScript file is independent of the resolution, * because the entire file is encoded. The res parameter just * tells the PS decomposer how to render the page. Therefore, * for minimum file size without loss of visual information, * if the output res is less than 300, you should downscale * the image to the output resolution before wrapping in PS. * (6) The "canvas" on which the image is rendered, at the given * output resolution, is a standard page size (8.5 x 11 in). */ l_int32 convertFilesToPS(const char *dirin, const char *substr, l_int32 res, const char *fileout) { SARRAY *sa; PROCNAME("convertFilesToPS"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!fileout) return ERROR_INT("fileout not defined", procName, 1); if (res <= 0) { L_INFO("setting res to 300 ppi", procName); res = 300; } if (res < 10 || res > 4000) L_WARNING("res is typically in the range 300-600 ppi", procName); /* Get all filtered and sorted full pathnames. */ sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); /* Generate the PS file. */ sarrayConvertFilesToPS(sa, res, fileout); sarrayDestroy(&sa); return 0; }
/* * convertFilesFittedToPS() * * Input: dirin (input directory) * substr (<optional> substring filter on filenames; can be NULL) * xpts, ypts (desired size in printer points; use 0 for default) * fileout (output ps file) * Return: 0 if OK, 1 on error * * Notes: * (1) This generates a PS file for all files in a specified directory * that contain the substr pattern to be matched. * (2) Each image is written to a separate page in the output PS file. * (3) All images are written compressed: * * if tiffg4 --> use ccittg4 * * if jpeg --> use dct * * all others --> use flate * If the image is jpeg or tiffg4, we use the existing compressed * strings for the encoding; otherwise, we read the image into * a pix and flate-encode the pieces. * (4) The resolution is internally determined such that the images * are rendered, in at least one direction, at 100% of the given * size in printer points. Use 0.0 for xpts or ypts to get * the default value, which is 612.0 or 792.0, rsp. * (5) The size of the PostScript file is independent of the resolution, * because the entire file is encoded. The @xpts and @ypts * parameter tells the PS decomposer how to render the page. */ l_int32 convertFilesFittedToPS(const char *dirin, const char *substr, l_float32 xpts, l_float32 ypts, const char *fileout) { SARRAY *sa; PROCNAME("convertFilesFittedToPS"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!fileout) return ERROR_INT("fileout not defined", procName, 1); if (xpts <= 0.0) { L_INFO("setting xpts to 612.0 ppi", procName); xpts = 612.0; } if (ypts <= 0.0) { L_INFO("setting ypts to 792.0 ppi", procName); ypts = 792.0; } if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) L_WARNING("xpts,ypts are typically in the range 500-800", procName); /* Get all filtered and sorted full pathnames. */ sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); /* Generate the PS file. */ sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout); sarrayDestroy(&sa); return 0; }
/*! * jbCorrelation() * * Input: dirin (directory of input images) * thresh (typically ~0.8) * weight (typically ~0.6) * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * rootname (for output files) * firstpage (0-based) * npages (use 0 for all pages in dirin) * renderflag (1 to render from templates; 0 to skip) * Return: 0 if OK, 1 on error * * Notes: * (1) The images must be 1 bpp. If they are not, you can convert * them using convertFilesTo1bpp(). * (2) See prog/jbcorrelation for generating more output (e.g., * for debugging) */ l_int32 jbCorrelation(const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag) { char filename[L_BUF_SIZE]; l_int32 nfiles, i, numpages; JBDATA *data; JBCLASSER *classer; PIX *pix; PIXA *pixa; SARRAY *safiles; PROCNAME("jbCorrelation"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!rootname) return ERROR_INT("rootname not defined", procName, 1); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return ERROR_INT("components invalid", procName, 1); safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); /* Classify components */ classer = jbCorrelationInit(components, 0, 0, thresh, weight); jbAddPages(classer, safiles); /* Save data */ data = jbDataSave(classer); jbDataWrite(rootname, data); /* Optionally, render pages using class templates */ if (renderflag) { pixa = jbDataRender(data, FALSE); numpages = pixaGetCount(pixa); if (numpages != nfiles) fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n", numpages, nfiles); for (i = 0; i < numpages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixa); } sarrayDestroy(&safiles); jbClasserDestroy(&classer); jbDataDestroy(&data); return 0; }
int main(int argc, char **argv) { char *dirin, *dirout, *infile, *outfile, *tail; l_int32 i, nfiles, border, x, y, w, h, xb, yb, wb, hb; BOX *box1, *box2; BOXA *boxa1, *boxa2; PIX *pixs, *pixt1, *pixd; SARRAY *safiles; static char mainName[] = "croptext"; if (argc != 4) return ERROR_INT("Syntax: croptext dirin border dirout", mainName, 1); dirin = argv[1]; border = atoi(argv[2]); dirout = argv[3]; setLeptDebugOK(1); safiles = getSortedPathnamesInDirectory(dirin, NULL, 0, 0); nfiles = sarrayGetCount(safiles); for (i = 0; i < nfiles; i++) { infile = sarrayGetString(safiles, i, L_NOCOPY); splitPathAtDirectory(infile, NULL, &tail); outfile = genPathname(dirout, tail); pixs = pixRead(infile); pixt1 = pixMorphSequence(pixs, "r11 + c10.40 + o5.5 + x4", 0); boxa1 = pixConnComp(pixt1, NULL, 8); if (boxaGetCount(boxa1) == 0) { fprintf(stderr, "Warning: no components on page %s\n", tail); continue; } boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); box1 = boxaGetBox(boxa2, 0, L_CLONE); boxGetGeometry(box1, &x, &y, &w, &h); xb = L_MAX(0, x - border); yb = L_MAX(0, y - border); wb = w + 2 * border; hb = h + 2 * border; box2 = boxCreate(xb, yb, wb, hb); pixd = pixClipRectangle(pixs, box2, NULL); pixWrite(outfile, pixd, IFF_TIFF_G4); pixDestroy(&pixs); pixDestroy(&pixt1); pixDestroy(&pixd); boxaDestroy(&boxa1); boxaDestroy(&boxa2); } return 0; }
/*! * pixaReadFiles() * * Input: dirname * substr (<optional> substring filter on filenames; can be null) * Return: pixa, or null on error * * Notes: * (1) @dirname is the full path for the directory. * (2) @substr is the part of the file name (excluding * the directory) that is to be matched. All matching * filenames are read into the Pixa. If substr is NULL, * all filenames are read into the Pixa. */ PIXA * pixaReadFiles(const char *dirname, const char *substr) { PIXA *pixa; SARRAY *sa; PROCNAME("pixaReadFiles"); if (!dirname) return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL); if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) return (PIXA *)ERROR_PTR("sa not made", procName, NULL); pixa = pixaReadFilesSA(sa); sarrayDestroy(&sa); return pixa; }
PIXA *MakeBootnum2(void) { char *fname; l_int32 i, n, w, h; BOX *box; PIX *pix; PIXA *pixa; L_RECOG *recog; SARRAY *sa; /* Phase 1: generate recog from the digit data */ recog = recogCreate(20, 32, L_USE_ALL, 120, 1); sa = getSortedPathnamesInDirectory("recog/bootnums", "png", 0, 0); n = sarrayGetCount(sa); for (i = 0; i < n; i++) { /* Read each pix: grayscale, multi-character, labelled */ fname = sarrayGetString(sa, i, L_NOCOPY); if ((pix = pixRead(fname)) == NULL) { fprintf(stderr, "Can't read %s\n", fname); continue; } /* Convert to a set of 1 bpp, single character, labelled */ pixGetDimensions(pix, &w, &h, NULL); box = boxCreate(0, 0, w, h); recogTrainLabelled(recog, pix, box, NULL, 1, 0); pixDestroy(&pix); boxDestroy(&box); } recogTrainingFinished(recog, 1); sarrayDestroy(&sa); /* Phase 2: generate pixa consisting of 1 bpp, single character pix */ recogWritePixa("/tmp/lept/recog/digits/bootnum2.pa", recog); pixa = pixaRead("/tmp/lept/recog/digits/bootnum2.pa"); recogDestroy(&recog); return pixa; }
/*! * regTestCompareFiles() * * Input: rp (regtest parameters) * index1 (of one output file from reg test) * index2 (of another output file from reg test) * Return: 0 if OK, 1 on error (a failure in comparison is not an error) * * Notes: * (1) This only does something in "compare" mode. * (2) The canonical format of the golden filenames is: * /tmp/golden/<root of main name>_golden.<index>.<ext of localname> * e.g., * /tmp/golden/maze_golden.0.png */ l_int32 regTestCompareFiles(L_REGPARAMS *rp, l_int32 index1, l_int32 index2) { char *name1, *name2; char namebuf[256]; l_int32 same; SARRAY *sa; PROCNAME("regTestCompareFiles"); if (!rp) return ERROR_INT("rp not defined", procName, 1); if (index1 < 0 || index2 < 0) { rp->success = FALSE; return ERROR_INT("index1 and/or index2 is negative", procName, 1); } if (index1 == index2) { rp->success = FALSE; return ERROR_INT("index1 must differ from index2", procName, 1); } rp->index++; if (rp->mode != L_REG_COMPARE) return 0; /* Generate the golden file names */ snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d.", rp->testname, index1); sa = getSortedPathnamesInDirectory("/tmp/golden", namebuf, 0, 0); if (sarrayGetCount(sa) != 1) { sarrayDestroy(&sa); rp->success = FALSE; L_ERROR("golden file %s not found\n", procName, namebuf); return 1; } name1 = sarrayGetString(sa, 0, L_COPY); sarrayDestroy(&sa); snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d.", rp->testname, index2); sa = getSortedPathnamesInDirectory("/tmp/golden", namebuf, 0, 0); if (sarrayGetCount(sa) != 1) { sarrayDestroy(&sa); rp->success = FALSE; FREE(name1); L_ERROR("golden file %s not found\n", procName, namebuf); return 1; } name2 = sarrayGetString(sa, 0, L_COPY); sarrayDestroy(&sa); /* Test and record on failure */ filesAreIdentical(name1, name2, &same); if (!same) { fprintf(rp->fp, "Failure in %s_reg, index %d: comparing %s with %s\n", rp->testname, rp->index, name1, name2); fprintf(stderr, "Failure in %s_reg, index %d: comparing %s with %s\n", rp->testname, rp->index, name1, name2); rp->success = FALSE; } FREE(name1); FREE(name2); return 0; }
int main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 i, j, w, h, firstpage, npages, nfiles, ncomp; l_int32 index, ival, rval, gval, bval; BOX *box; BOXA *boxa; BOXAA *baa; JBDATA *data; JBCLASSER *classer; NUMA *nai; NUMAA *naa; SARRAY *safiles; PIX *pixs, *pixt1, *pixt2, *pixd; PIXCMAP *cmap; static char mainName[] = "wordsinorder"; if (argc != 3 && argc != 5) return ERROR_INT( " Syntax: wordsinorder dirin rootname [firstpage, npages]", mainName, 1); dirin = argv[1]; rootname = argv[2]; if (argc == 3) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[3]); npages = atoi(argv[4]); } /* Compute the word bounding boxes at 2x reduction, along with * the textlines that they are in. */ safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); baa = boxaaCreate(nfiles); naa = numaaCreate(nfiles); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, 0); if ((pixs = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", mainName, i); continue; } pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT, MAX_WORD_WIDTH, MAX_WORD_HEIGHT, &boxa, &nai); boxaaAddBoxa(baa, boxa, L_INSERT); numaaAddNuma(naa, nai, L_INSERT); #if RENDER_PAGES /* Show the results on a 2x reduced image, where each * word is outlined and the color of the box depends on the * computed textline. */ pixt1 = pixReduceRankBinary2(pixs, 2, NULL); pixGetDimensions(pixt1, &w, &h, NULL); pixd = pixCreate(w, h, 8); cmap = pixcmapCreateRandom(8, 1, 1); /* first color is black */ pixSetColormap(pixd, cmap); pixt2 = pixUnpackBinary(pixt1, 8, 1); pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0); ncomp = boxaGetCount(boxa); for (j = 0; j < ncomp; j++) { box = boxaGetBox(boxa, j, L_CLONE); numaGetIValue(nai, j, &ival); index = 1 + (ival % 254); /* omit black and white */ pixcmapGetColor(cmap, index, &rval, &gval, &bval); pixRenderBoxArb(pixd, box, 2, rval, gval, bval); boxDestroy(&box); } snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pixd, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixs); pixDestroy(&pixd); #endif /* RENDER_PAGES */ } boxaaDestroy(&baa); numaaDestroy(&naa); sarrayDestroy(&safiles); return 0; }
/*! * convertFilesTo1bpp() * * Input: dirin * substr (<optional> substring filter on filenames; can be NULL) * upscaling (1, 2 or 4; only for input color or grayscale) * thresh (global threshold for binarization; use 0 for default) * firstpage * npages (use 0 to do all from @firstpage to the end) * dirout * outformat (IFF_PNG, IFF_TIFF_G4) * Return: 0 if OK, 1 on error * * Notes: * (1) Images are sorted lexicographically, and the names in the * output directory are retained except for the extension. */ l_int32 convertFilesTo1bpp(const char *dirin, const char *substr, l_int32 upscaling, l_int32 thresh, l_int32 firstpage, l_int32 npages, const char *dirout, l_int32 outformat) { l_int32 i, nfiles; char buf[512]; char *fname, *tail, *basename; PIX *pixs, *pixg1, *pixg2, *pixb; SARRAY *safiles; PROCNAME("convertFilesTo1bpp"); if (!dirin) return ERROR_INT("dirin", procName, 1); if (!dirout) return ERROR_INT("dirout", procName, 1); if (upscaling != 1 && upscaling != 2 && upscaling != 4) return ERROR_INT("invalid upscaling factor", procName, 1); if (thresh <= 0) thresh = 180; if (firstpage < 0) firstpage = 0; if (npages < 0) npages = 0; if (outformat != IFF_TIFF_G4) outformat = IFF_PNG; safiles = getSortedPathnamesInDirectory(dirin, substr, firstpage, npages); if (!safiles) return ERROR_INT("safiles not made", procName, 1); if ((nfiles = sarrayGetCount(safiles)) == 0) { sarrayDestroy(&safiles); return ERROR_INT("no matching files in the directory", procName, 1); } for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, L_NOCOPY); if ((pixs = pixRead(fname)) == NULL) { L_WARNING("Couldn't read file %s\n", procName, fname); continue; } if (pixGetDepth(pixs) == 32) pixg1 = pixConvertRGBToLuminance(pixs); else pixg1 = pixClone(pixs); pixg2 = pixRemoveColormap(pixg1, REMOVE_CMAP_TO_GRAYSCALE); if (pixGetDepth(pixg2) == 1) { pixb = pixClone(pixg2); } else { if (upscaling == 1) pixb = pixThresholdToBinary(pixg2, thresh); else if (upscaling == 2) pixb = pixScaleGray2xLIThresh(pixg2, thresh); else /* upscaling == 4 */ pixb = pixScaleGray4xLIThresh(pixg2, thresh); } pixDestroy(&pixs); pixDestroy(&pixg1); pixDestroy(&pixg2); splitPathAtDirectory(fname, NULL, &tail); splitPathAtExtension(tail, &basename, NULL); if (outformat == IFF_TIFF_G4) { snprintf(buf, sizeof(buf), "%s/%s.tif", dirout, basename); pixWrite(buf, pixb, IFF_TIFF_G4); } else { snprintf(buf, sizeof(buf), "%s/%s.png", dirout, basename); pixWrite(buf, pixb, IFF_PNG); } pixDestroy(&pixb); FREE(tail); FREE(basename); } sarrayDestroy(&safiles); return 0; }
int main(int argc, char **argv) { char *fname, *filename; const char *str; char buffer[512]; l_int32 i, npages; size_t length; FILE *fp; NUMA *naflags, *nasizes; PIX *pix, *pix1, *pix2, *pixd; PIXA *pixa; PIXCMAP *cmap; SARRAY *savals, *satypes, *sa; static char mainName[] = "mtifftest"; if (argc != 1) return ERROR_INT(" Syntax: mtifftest", mainName, 1); lept_mkdir("tiff"); #if 1 /* ------------------ Test multipage I/O -------------------*/ /* This puts every image file in the directory with a string * match to "weasel" into a multipage tiff file. * Images with 1 bpp are coded as g4; the others as zip. * It then reads back into a pix and displays. */ writeMultipageTiff(".", "weasel8.", "/tmp/tiff/weasel8.tif"); pixa = pixaReadMultipageTiff("/tmp/tiff/weasel8.tif"); pixd = pixaDisplayTiledInRows(pixa, 1, 1200, 0.5, 0, 15, 4); pixDisplay(pixd, 100, 0); pixDestroy(&pixd); pixd = pixaDisplayTiledInRows(pixa, 8, 1200, 0.8, 0, 15, 4); pixDisplay(pixd, 100, 200); pixDestroy(&pixd); pixd = pixaDisplayTiledInRows(pixa, 32, 1200, 1.2, 0, 15, 4); pixDisplay(pixd, 100, 400); pixDestroy(&pixd); pixaDestroy(&pixa); #endif #if 1 /* ------------ Test single-to-multipage I/O -------------------*/ /* Read the files and generate a multipage tiff file of G4 images. * Then convert that to a G4 compressed and ascii85 encoded PS file. */ sa = getSortedPathnamesInDirectory(".", "weasel4.", 0, 4); sarrayWriteStream(stderr, sa); sarraySort(sa, sa, L_SORT_INCREASING); sarrayWriteStream(stderr, sa); npages = sarrayGetCount(sa); for (i = 0; i < npages; i++) { fname = sarrayGetString(sa, i, 0); filename = genPathname(".", fname); pix1 = pixRead(filename); if (!pix1) continue; pix2 = pixConvertTo1(pix1, 128); if (i == 0) pixWriteTiff("/tmp/tiff/weasel4", pix2, IFF_TIFF_G4, "w+"); else pixWriteTiff("/tmp/tiff/weasel4", pix2, IFF_TIFF_G4, "a"); pixDestroy(&pix1); pixDestroy(&pix2); lept_free(filename); } /* Write it out as a PS file */ convertTiffMultipageToPS("/tmp/tiff/weasel4", "/tmp/tiff/weasel4.ps", NULL, 0.95); sarrayDestroy(&sa); #endif #if 1 /* ------------------ Test multipage I/O -------------------*/ /* Read count of pages in tiff multipage file */ writeMultipageTiff(".", "weasel2", weasel_orig); fp = lept_fopen(weasel_orig, "rb"); if (fileFormatIsTiff(fp)) { tiffGetCount(fp, &npages); fprintf(stderr, " Tiff: %d page\n", npages); } else return ERROR_INT(" file not tiff", mainName, 1); lept_fclose(fp); /* Split into separate page files */ for (i = 0; i < npages + 1; i++) { /* read one beyond to catch error */ if (i == npages) L_INFO("Errors in next 2 lines are intentional!\n", mainName); pix = pixReadTiff(weasel_orig, i); if (!pix) continue; sprintf(buffer, "/tmp/tiff/%03d.tif", i); pixWrite(buffer, pix, IFF_TIFF_ZIP); pixDestroy(&pix); } /* Read separate page files and write reversed file */ for (i = npages - 1; i >= 0; i--) { sprintf(buffer, "/tmp/tiff/%03d.tif", i); pix = pixRead(buffer); if (!pix) continue; if (i == npages - 1) pixWriteTiff(weasel_rev, pix, IFF_TIFF_ZIP, "w+"); else pixWriteTiff(weasel_rev, pix, IFF_TIFF_ZIP, "a"); pixDestroy(&pix); } /* Read reversed file and reverse again */ pixa = pixaCreate(npages); for (i = 0; i < npages; i++) { pix = pixReadTiff(weasel_rev, i); pixaAddPix(pixa, pix, L_INSERT); } for (i = npages - 1; i >= 0; i--) { pix = pixaGetPix(pixa, i, L_CLONE); if (i == npages - 1) pixWriteTiff(weasel_rev_rev, pix, IFF_TIFF_ZIP, "w+"); else pixWriteTiff(weasel_rev_rev, pix, IFF_TIFF_ZIP, "a"); pixDestroy(&pix); } pixaDestroy(&pixa); #endif #if 0 /* ----- test adding custom public tags to a tiff header ----- */ pix = pixRead("feyn.tif"); naflags = numaCreate(10); savals = sarrayCreate(10); satypes = sarrayCreate(10); nasizes = numaCreate(10); /* numaAddNumber(naflags, TIFFTAG_XMLPACKET); */ /* XMP: 700 */ numaAddNumber(naflags, 700); str = "<xmp>This is a Fake XMP packet</xmp>\n<text>Guess what ...?</text>"; length = strlen(str); sarrayAddString(savals, (char *)str, L_COPY); sarrayAddString(satypes, (char *)"char*", L_COPY); numaAddNumber(nasizes, length); /* get it all */ numaAddNumber(naflags, 269); /* DOCUMENTNAME */ sarrayAddString(savals, (char *)"One silly title", L_COPY); sarrayAddString(satypes, (char *)"const char*", L_COPY); numaAddNumber(naflags, 270); /* IMAGEDESCRIPTION */ sarrayAddString(savals, (char *)"One page of text", L_COPY); sarrayAddString(satypes, (char *)"const char*", L_COPY); /* the max sample is used by rendering programs * to scale the dynamic range */ numaAddNumber(naflags, 281); /* MAXSAMPLEVALUE */ sarrayAddString(savals, (char *)"4", L_COPY); sarrayAddString(satypes, (char *)"l_uint16", L_COPY); /* note that date is required to be a 20 byte string */ numaAddNumber(naflags, 306); /* DATETIME */ sarrayAddString(savals, (char *)"2004:10:11 09:35:15", L_COPY); sarrayAddString(satypes, (char *)"const char*", L_COPY); /* note that page number requires 2 l_uint16 input */ numaAddNumber(naflags, 297); /* PAGENUMBER */ sarrayAddString(savals, (char *)"1-412", L_COPY); sarrayAddString(satypes, (char *)"l_uint16-l_uint16", L_COPY); pixWriteTiffCustom("/tmp/tiff/tags.tif", pix, IFF_TIFF_G4, "w", naflags, savals, satypes, nasizes); fprintTiffInfo(stderr, (char *)"/tmp/tiff/tags.tif"); fprintf(stderr, "num flags = %d\n", numaGetCount(naflags)); fprintf(stderr, "num sizes = %d\n", numaGetCount(nasizes)); fprintf(stderr, "num vals = %d\n", sarrayGetCount(savals)); fprintf(stderr, "num types = %d\n", sarrayGetCount(satypes)); numaDestroy(&naflags); numaDestroy(&nasizes); sarrayDestroy(&savals); sarrayDestroy(&satypes); pixDestroy(&pix); #endif return 0; }
/*! * \brief jbWordsInTextlines() * * \param[in] dirin directory of input pages * \param[in] reduction 1 for full res; 2 for half-res * \param[in] maxwidth of word mask components, to be kept * \param[in] maxheight of word mask components, to be kept * \param[in] thresh on correlation; 0.80 is reasonable * \param[in] weight for handling thick text; 0.6 is reasonable * \param[out] pnatl numa with textline index for each component * \param[in] firstpage 0-based * \param[in] npages use 0 for all pages in dirin * \return classer for the set of pages * * <pre> * Notes: * (1) This is a high-level function. See prog/jbwords for example * of usage. * (2) Typically, words can be found reasonably well at a resolution * of about 150 ppi. For highest accuracy, you should use 300 ppi. * Assuming that the input images are 300 ppi, use reduction = 1 * for finding words at full res, and reduction = 2 for finding * them at 150 ppi. * </pre> */ JBCLASSER * jbWordsInTextlines(const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages) { char *fname; l_int32 nfiles, i, w, h; BOXA *boxa; JBCLASSER *classer; NUMA *nai, *natl; PIX *pix; PIXA *pixa; SARRAY *safiles; PROCNAME("jbWordsInTextlines"); if (!pnatl) return (JBCLASSER *)ERROR_PTR("&natl not defined", procName, NULL); *pnatl = NULL; if (!dirin) return (JBCLASSER *)ERROR_PTR("dirin not defined", procName, NULL); if (reduction != 1 && reduction != 2) return (JBCLASSER *)ERROR_PTR("reduction not in {1,2}", procName, NULL); safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); /* Classify components */ classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight); classer->safiles = sarrayCopy(safiles); natl = numaCreate(0); *pnatl = natl; for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, L_NOCOPY); if ((pix = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", procName, i); continue; } pixGetDimensions(pix, &w, &h, NULL); if (reduction == 1) { classer->w = w; classer->h = h; } else { /* reduction == 2 */ classer->w = w / 2; classer->h = h / 2; } pixGetWordsInTextlines(pix, reduction, JB_WORDS_MIN_WIDTH, JB_WORDS_MIN_HEIGHT, maxwidth, maxheight, &boxa, &pixa, &nai); jbAddPageComponents(classer, pix, boxa, pixa); numaJoin(natl, nai, 0, -1); pixDestroy(&pix); numaDestroy(&nai); boxaDestroy(&boxa); pixaDestroy(&pixa); } sarrayDestroy(&safiles); return classer; }
l_int32 main(int argc, char **argv) { char buf[256], dirname[256]; char *dirin, *pattern, *subdirout, *fname, *tail, *basename; l_int32 thresh, i, n; l_float32 scalefactor; PIX *pix1, *pix2, *pix3, *pix4; SARRAY *sa; static char mainName[] = "binarizefiles.c"; if (argc != 6) { fprintf(stderr, "Syntax: binarizefiles dirin pattern thresh scalefact dirout\n" " dirin: input directory for image files\n" " pattern: use 'allfiles' to convert all files\n" " in the directory\n" " thresh: 0 for adaptive; > 0 for global thresh (e.g., 128)\n" " scalefactor: in (0.0 ... 4.0]; use 1.0 to prevent scaling\n" " subdirout: subdirectory of /tmp for output files\n"); return 1; } dirin = argv[1]; pattern = argv[2]; thresh = atoi(argv[3]); scalefactor = atof(argv[4]); subdirout = argv[5]; if (!strcmp(pattern, "allfiles")) pattern = NULL; if (scalefactor <= 0.0 || scalefactor > 4.0) { L_WARNING("invalid scalefactor: setting to 1.0\n", mainName); scalefactor = 1.0; } /* Get the input filenames */ sa = getSortedPathnamesInDirectory(dirin, pattern, 0, 0); sarrayWriteStream(stderr, sa); n = sarrayGetCount(sa); /* Write the output files */ makeTempDirname(dirname, 256, subdirout); fprintf(stderr, "dirname: %s\n", dirname); lept_mkdir(subdirout); for (i = 0; i < n; i++) { fname = sarrayGetString(sa, i, L_NOCOPY); if ((pix1 = pixRead(fname)) == NULL) { L_ERROR("file %s not read as image", mainName, fname); continue; } splitPathAtDirectory(fname, NULL, &tail); splitPathAtExtension(tail, &basename, NULL); snprintf(buf, sizeof(buf), "%s/%s.tif", dirname, basename); FREE(tail); FREE(basename); fprintf(stderr, "fileout: %s\n", buf); if (scalefactor != 1.0) pix2 = pixScale(pix1, scalefactor, scalefactor); else pix2 = pixClone(pix1); if (thresh == 0) { pix4 = pixConvertTo8(pix2, 0); pix3 = pixAdaptThresholdToBinary(pix4, NULL, 1.0); pixDestroy(&pix4); } else { pix3 = pixConvertTo1(pix2, thresh); } pixWrite(buf, pix3, IFF_TIFF_G4); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); } sarrayDestroy(&sa); return 0; }
l_int32 main(int argc, char **argv) { char buf[256], rootname[256]; char *dir, *pattern, *psdir, *imagedir; char *fname, *tail, *filename; l_int32 i, n, ret; SARRAY *sa, *saps; static char mainName[] = "concatpdf"; if (argc != 2 && argc != 3) return ERROR_INT("Syntax: concatpdf dir [pattern]", mainName, 1); dir = argv[1]; pattern = (argc == 3) ? argv[2] : NULL; /* Get the names of the pdf files */ sa = getSortedPathnamesInDirectory(dir, pattern, 0, 0); sarrayWriteStream(stderr, sa); n = sarrayGetCount(sa); #if 1 /* Convert to ps */ psdir = genPathname("/tmp/ps", NULL); lept_rmdir("ps"); lept_mkdir("ps"); saps = sarrayCreate(n); for (i = 0; i < n; i++) { fname = sarrayGetString(sa, i, L_NOCOPY); splitPathAtDirectory(fname, NULL, &tail); splitPathAtExtension(tail, &filename, NULL); snprintf(buf, sizeof(buf), "acroread -toPostScript -annotsOff %s %s", fname, psdir); fprintf(stderr, "%s\n", buf); ret = system(buf); /* acroread -toPostScript -annotsOff */ snprintf(buf, sizeof(buf), "%s/%s.ps", psdir, filename); sarrayAddString(saps, buf, L_COPY); lept_free(tail); lept_free(filename); } sarrayDestroy(&sa); #endif #if 1 /* Rasterize */ imagedir = genPathname("/tmp/image", NULL); lept_rmdir("image"); lept_mkdir("image"); sarrayWriteStream(stderr, saps); n = sarrayGetCount(saps); for (i = 0; i < n; i++) { fname = sarrayGetString(saps, i, L_NOCOPY); snprintf(rootname, sizeof(rootname), "%s/r%d", imagedir, i); snprintf(buf, sizeof(buf), "ps2png-gray %s %s", fname, rootname); fprintf(stderr, "%s\n", buf); ret = system(buf); /* ps2png-gray */ } #endif #if 1 /* Generate the pdf */ convertFilesToPdf(imagedir, "png", RESOLUTION, 1.0, L_FLATE_ENCODE, 0, "", "/tmp/output.pdf"); #endif return 0; }
int main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 i, firstpage, npages, nfiles; l_float32 thresh, weight; JBDATA *data; JBCLASSER *classer; SARRAY *safiles; PIX *pix, *pixt; PIXA *pixa, *pixadb; static char mainName[] = "jbcorrelation"; if (argc != 5 && argc != 7) return ERROR_INT(" Syntax: jbcorrelation dirin thresh weight " "rootname [firstpage, npages]", mainName, 1); dirin = argv[1]; thresh = atof(argv[2]); weight = atof(argv[3]); rootname = argv[4]; if (argc == 5) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[5]); npages = atoi(argv[6]); } #if 0 /*--------------------------------------------------------------*/ jbCorrelation(dirin, thresh, weight, COMPONENTS, rootname, firstpage, npages, 1); /*--------------------------------------------------------------*/ #else /*--------------------------------------------------------------*/ safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); sarrayWriteStream(stderr, safiles); /* Classify components on requested pages */ startTimer(); classer = jbCorrelationInit(COMPONENTS, 0, 0, thresh, weight); jbAddPages(classer, safiles); fprintf(stderr, "Time to generate classes: %6.3f sec\n", stopTimer()); /* Save and write out the result */ data = jbDataSave(classer); jbDataWrite(rootname, data); fprintf(stderr, "Number of classes: %d\n", classer->nclass); /* Render the pages from the classifier data. * Use debugflag == FALSE to omit outlines of each component. */ pixa = jbDataRender(data, FALSE); /* Write the pages out */ npages = pixaGetCount(pixa); if (npages != nfiles) fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n", npages, nfiles); for (i = 0; i < npages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } #if DISPLAY_DIFFERENCE fname = sarrayGetString(safiles, 0, 0); pixt = pixRead(fname); pix = pixaGetPix(pixa, 0, L_CLONE); pixXor(pixt, pixt, pix); pixWrite("junk_output_diff", pixt, IFF_PNG); pixDestroy(&pix); pixDestroy(&pixt); #endif /* DISPLAY_DIFFERENCE */ #if DEBUG_TEST_DATA_IO { JBDATA *newdata; PIX *newpix; PIXA *newpixa; l_int32 same, iofail; /* Read the data back in and render the pages */ newdata = jbDataRead(rootname); newpixa = jbDataRender(newdata, FALSE); iofail = FALSE; for (i = 0; i < npages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); newpix = pixaGetPix(newpixa, i, L_CLONE); pixEqual(pix, newpix, &same); if (!same) { iofail = TRUE; fprintf(stderr, "pix on page %d are unequal!\n", i); } pixDestroy(&pix); pixDestroy(&newpix); } if (iofail) fprintf(stderr, "read/write for jbdata fails\n"); else fprintf(stderr, "read/write for jbdata succeeds\n"); jbDataDestroy(&newdata); pixaDestroy(&newpixa); } #endif /* DEBUG_TEST_DATA_IO */ #if RENDER_DEBUG /* Use debugflag == TRUE to see outlines of each component. */ pixadb = jbDataRender(data, TRUE); /* Write the debug pages out */ npages = pixaGetCount(pixadb); for (i = 0; i < npages; i++) { pix = pixaGetPix(pixadb, i, L_CLONE); snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixadb); #endif /* RENDER_DEBUG */ #if DISPLAY_ALL_INSTANCES /* display all instances, organized by template */ pix = pixaaDisplayByPixa(classer->pixaa, X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH); pixWrite("output_instances", pix, IFF_PNG); pixDestroy(&pix); #endif /* DISPLAY_ALL_INSTANCES */ pixaDestroy(&pixa); sarrayDestroy(&safiles); jbClasserDestroy(&classer); jbDataDestroy(&data); /*--------------------------------------------------------------*/ #endif return 0; }
int main(int argc, char **argv) { const char *name; l_int32 i, n; BOX *box; PIX *pix0, *pix1, *pixd; PIXA *pixa; SARRAY *sa1, *sa2, *sa3, *sa4; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* ---------------- Find all the jpg and tif images --------------- */ sa1 = getSortedPathnamesInDirectory(".", ".jpg", 0, 0); sa2 = getSortedPathnamesInDirectory(".", ".tif", 0, 0); sa3 = sarraySelectByRange(sa1, 0, 9); sa4 = sarraySelectByRange(sa2, 0, 9); sarrayConcatenate(sa3, sa4); n = sarrayGetCount(sa3); sarrayDestroy(&sa1); sarrayDestroy(&sa2); sarrayDestroy(&sa4); /* ---------------- Use replace to fill up a pixa -------------------*/ pixa = pixaCreate(1); pixaExtendArrayToSize(pixa, n); if ((pix0 = pixRead("marge.jpg")) == NULL) rp->success = FALSE; pix1 = pixScaleToSize(pix0, 144, 108); /* scale 0.25 */ pixDestroy(&pix0); pixaInitFull(pixa, pix1, NULL); /* fill it up */ pixd = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 25, 2); pixDisplayWithTitle(pixd, 100, 100, NULL, rp->display); pixWrite("/tmp/regout/pix1.jpg", pixd, IFF_JFIF_JPEG); pixDestroy(&pix1); pixDestroy(&pixd); /* ---------------- And again with jpgs and tifs -------------------*/ for (i = 0; i < n; i++) { name = sarrayGetString(sa3, i, L_NOCOPY); if ((pix0 = pixRead(name)) == NULL) rp->success = FALSE; pix1 = pixScaleToSize(pix0, 144, 108); pixaReplacePix(pixa, i, pix1, NULL); pixDestroy(&pix0); } pixd = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 25, 2); pixDisplayWithTitle(pixd, 400, 100, NULL, rp->display); pixWrite("/tmp/regout/pix2.jpg", pixd, IFF_JFIF_JPEG); pixDestroy(&pixd); /* ---------------- And again, reversing the order ------------------*/ box = boxCreate(0, 0, 0, 0); pixaInitFull(pixa, NULL, box); boxDestroy(&box); for (i = 0; i < n; i++) { name = sarrayGetString(sa3, i, L_NOCOPY); if ((pix0 = pixRead(name)) == NULL) rp->success = FALSE; pix1 = pixScaleToSize(pix0, 144, 108); pixaReplacePix(pixa, n - 1 - i, pix1, NULL); pixDestroy(&pix0); } pixd = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 25, 2); pixDisplayWithTitle(pixd, 700, 100, NULL, rp->display); pixWrite("/tmp/regout/pix3.jpg", pixd, IFF_JFIF_JPEG); pixDestroy(&pixd); sarrayDestroy(&sa3); pixaDestroy(&pixa); return regTestCleanup(rp); }
/*! * regTestCompareFiles() * * Input: stream (for output; use NULL to generate golden files) * argv ([0] == name of reg test) * index1 (of one output file from reg test) * index2 (of another output file from reg test) * &success (<return> 0 on if different; input value on success) * Return: 0 if OK, 1 on error (a failure in comparison is not an error) * * Notes: * (1) If @fp != NULL, this function compares two golden files to * determine if they are the same. If @fp == NULL, this is a * "generate" operation; don't do the comparison. * (2) This function can be called repeatedly in a single reg test. * (3) The value for @success is initialized to TRUE in the reg test * setup before this function is called for the first time. * A failure in any file comparison is registered as a failure * of the regression test. * (4) The canonical format of the golden filenames is: * /tmp/<root of main name>_golden.<index>.<ext of localname> * e.g., * /tmp/maze_golden.0.png */ l_int32 regTestCompareFiles(FILE *fp, char **argv, l_int32 index1, l_int32 index2, l_int32 *psuccess) { char *root, *name1, *name2; char namebuf[64]; l_int32 error,same; SARRAY *sa; PROCNAME("regTestCompareFiles"); if (!psuccess) return ERROR_INT("&success not defined", procName, 1); if (index1 < 0 || index2 < 0) return ERROR_INT("index1 and/or index2 is negative", procName, 1); if (index1 == index2) return ERROR_INT("index1 must differ from index2", procName, 1); if (!fp) /* no-op */ return 0; /* Generate partial golden file names and find the actual * paths to them. */ error = FALSE; name1 = name2 = NULL; if ((root = getRootNameFromArgv0(argv[0])) == NULL) return ERROR_INT("invalid root", procName, 1); snprintf(namebuf, sizeof(namebuf), "%s_golden.%d.", root, index1); sa = getSortedPathnamesInDirectory("/tmp", namebuf, 0, 0); if (sarrayGetCount(sa) != 1) error = TRUE; else name1 = sarrayGetString(sa, 0, L_COPY); sarrayDestroy(&sa); snprintf(namebuf, sizeof(namebuf), "%s_golden.%d.", root, index2); sa = getSortedPathnamesInDirectory("/tmp", namebuf, 0, 0); if (sarrayGetCount(sa) != 1) error = TRUE; else name2 = sarrayGetString(sa, 0, L_COPY); sarrayDestroy(&sa); FREE(root); if (error == TRUE) { if (name1) FREE(name1); if (name2) FREE(name2); L_ERROR("golden files not found", procName); return 1; } /* Test and record on failure */ filesAreIdentical(name1, name2, &same); if (!same) { fprintf(fp, "Failure in %s: comparing %s with %s\n", argv[0], name1, name2); fprintf(stderr, "Failure in %s: comparing %s with %s\n", argv[0], name1, name2); *psuccess = 0; } FREE(name1); FREE(name2); return 0; }