/* * convertSegmentedPagesToPS() * * Input: pagedir (input page image directory) * pagestr (<optional> substring filter on page filenames; * can be NULL) * page_numpre (number of characters in page name before number) * maskdir (input mask image directory) * maskstr (<optional> substring filter on mask filenames; * can be NULL) * mask_numpre (number of characters in mask name before number) * numpost (number of characters in names after number) * maxnum (only consider page numbers up to this value) * textscale (scale of text output relative to pixs) * imagescale (scale of image output relative to pixs) * threshold (for binarization; typ. about 190; 0 for default) * fileout (output ps file) * Return: 0 if OK, 1 on error * * Notes: * (1) This generates a PS file for all page image and mask files in two * specified directories and that contain the page numbers as * specified below. The two directories can be the same, in which * case the page and mask files are differentiated by the two * substrings for string matches. * (2) The page images are taken in lexicographic order. * Mask images whose numbers match the page images are used to * segment the page images. Page images without a matching * mask image are scaled, thresholded and rendered entirely as text. * (3) Each PS page is generated as a compressed representation of * the page image, where the part of the image under the mask * is suitably scaled and compressed as DCT (i.e., jpeg), and * the remaining part of the page is suitably scaled, thresholded, * compressed as G4 (i.e., tiff g4), and rendered by painting * black through the resulting text mask. * (4) The scaling is typically 2x down for the DCT component * (@imagescale = 0.5) and 2x up for the G4 component * (@textscale = 2.0). * (5) The resolution is automatically set to fit to a * letter-size (8.5 x 11 inch) page. * (6) Both the DCT and the G4 encoding are PostScript level 2. * (7) It is assumed that the page number is contained within * the basename (the filename without directory or extension). * @page_numpre is the number of characters in the page basename * preceding the actual page number; @mask_numpre is likewise for * the mask basename; @numpost is the number of characters * following the page number. For example, for mask name * mask_006.tif, mask_numpre = 5 ("mask_). * (8) To render a page as is -- that is, with no thresholding * of any pixels -- use a mask in the mask directory that is * full size with all pixels set to 1. If the page is 1 bpp, * it is not necessary to have a mask. */ l_int32 convertSegmentedPagesToPS(const char *pagedir, const char *pagestr, l_int32 page_numpre, const char *maskdir, const char *maskstr, l_int32 mask_numpre, l_int32 numpost, l_int32 maxnum, l_float32 textscale, l_float32 imagescale, l_int32 threshold, const char *fileout) { l_int32 pageno, i, npages; PIX *pixs, *pixm; SARRAY *sapage, *samask; PROCNAME("convertSegmentedPagesToPS"); if (!pagedir) return ERROR_INT("pagedir not defined", procName, 1); if (!maskdir) return ERROR_INT("maskdir not defined", procName, 1); if (!fileout) return ERROR_INT("fileout not defined", procName, 1); if (threshold <= 0) { L_INFO("setting threshold to 190\n", procName); threshold = 190; } /* Get numbered full pathnames; max size of sarray is maxnum */ sapage = getNumberedPathnamesInDirectory(pagedir, pagestr, page_numpre, numpost, maxnum); samask = getNumberedPathnamesInDirectory(maskdir, maskstr, mask_numpre, numpost, maxnum); sarrayPadToSameSize(sapage, samask, (char *)""); if ((npages = sarrayGetCount(sapage)) == 0) { sarrayDestroy(&sapage); sarrayDestroy(&samask); return ERROR_INT("no matching pages found", procName, 1); } /* Generate the PS file */ pageno = 1; for (i = 0; i < npages; i++) { if ((pixs = pixReadIndexed(sapage, i)) == NULL) continue; pixm = pixReadIndexed(samask, i); pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale, threshold, pageno, fileout); pixDestroy(&pixs); pixDestroy(&pixm); pageno++; } sarrayDestroy(&sapage); sarrayDestroy(&samask); return 0; }
/*! * gplotDestroy() * * Input: &gplot (<to be nulled>) * Return: void */ void gplotDestroy(GPLOT **pgplot) { GPLOT *gplot; PROCNAME("gplotDestroy"); if (pgplot == NULL) { L_WARNING("ptr address is null!\n", procName); return; } if ((gplot = *pgplot) == NULL) return; FREE(gplot->rootname); FREE(gplot->cmdname); sarrayDestroy(&gplot->cmddata); sarrayDestroy(&gplot->datanames); sarrayDestroy(&gplot->plotdata); sarrayDestroy(&gplot->plottitles); numaDestroy(&gplot->plotstyles); FREE(gplot->outname); if (gplot->title) FREE(gplot->title); if (gplot->xlabel) FREE(gplot->xlabel); if (gplot->ylabel) FREE(gplot->ylabel); FREE(gplot); *pgplot = NULL; return; }
/* * parseForProtos() * * Input: filein (output of cpp) * prestring (<optional> string that prefaces each decl; * use NULL to omit) * Return: parsestr (string of function prototypes), or NULL on error * * Notes: * (1) We parse the output of cpp: * cpp -ansi <filein> * Three plans were attempted, with success on the third. * (2) Plan 1. A cursory examination of the cpp output indicated that * every function was preceeded by a cpp comment statement. * So we just need to look at statements beginning after comments. * Unfortunately, this is NOT the case. Some functions start * without cpp comment lines, typically when there are no * comments in the source that immediately precede the function. * (3) Plan 2. Consider the keywords in the language that start * parts of the cpp file. Some, like 'typedef', 'enum', * 'union' and 'struct', are followed after a while by '{', * and eventually end with '}, plus an optional token and a * final ';' Others, like 'extern' and 'static', are never * the beginnings of global function definitions. Function * prototypes have one or more sets of '(' followed eventually * by a ')', and end with ';'. But function definitions have * tokens, followed by '(', more tokens, ')' and then * immediately a '{'. We would generate a prototype from this * by adding a ';' to all tokens up to the ')'. So we use * these special tokens to decide what we are parsing. And * whenever a function definition is found and the prototype * extracted, we skip through the rest of the function * past the corresponding '}'. This token ends a line, and * is often on a line of its own. But as it turns out, * the only keyword we need to consider is 'static'. * (4) Plan 3. Consider the parentheses and braces for various * declarations. A struct, enum, or union has a pair of * braces followed by a semicolon. They cannot have parentheses * before the left brace, but a struct can have lots of parentheses * within the brace set. A function prototype has no braces. * A function declaration can have sets of left and right * parentheses, but these are followed by a left brace. * So plan 3 looks at the way parentheses and braces are * organized. Once the beginning of a function definition * is found, the prototype is extracted and we search for * the ending right brace. * (5) To find the ending right brace, it is necessary to do some * careful parsing. For example, in this file, we have * left and right braces as characters, and these must not * be counted. Somewhat more tricky, the file fhmtauto.c * generates code, and includes a right brace in a string. * So we must not include braces that are in strings. But how * do we know if something is inside a string? Keep state, * starting with not-inside, and every time you hit a double quote * that is not escaped, toggle the condition. Any brace * found in the state of being within a string is ignored. * (6) When a prototype is extracted, it is put in a canonical * form (i.e., cleaned up). Finally, we check that it is * not static and save it. (If static, it is ignored). * (7) The @prestring for unix is NULL; it is included here so that * you can use Microsoft's declaration for importing or * exporting to a dll. See environ.h for examples of use. * Here, we set: @prestring = "LEPT_DLL ". Note in particular * the space character that will separate 'LEPT_DLL' from * the standard unix prototype that follows. */ char * parseForProtos(const char *filein, const char *prestring) { char *strdata, *str, *newstr, *parsestr, *secondword; l_int32 nbytes, start, next, stop, charindex, found; SARRAY *sa, *saout, *satest; PROCNAME("parseForProtos"); if (!filein) return (char *)ERROR_PTR("filein not defined", procName, NULL); /* Read in the cpp output into memory, one string for each * line in the file, omitting blank lines. */ strdata = (char *)arrayRead(filein, &nbytes); sa = sarrayCreateLinesFromString(strdata, 0); saout = sarrayCreate(0); next = 0; while (1) { /* repeat after each non-static prototype is extracted */ searchForProtoSignature(sa, next, &start, &stop, &charindex, &found); if (!found) break; /* fprintf(stderr, " start = %d, stop = %d, charindex = %d\n", start, stop, charindex); */ str = captureProtoSignature(sa, start, stop, charindex); /* Make sure it is not static. Note that 'extern' has * been prepended to the prototype, so the 'static' * keyword, if it exists, would be the second word. */ satest = sarrayCreateWordsFromString(str); secondword = sarrayGetString(satest, 1, 0); if (strcmp(secondword, "static")) { /* not static */ if (prestring) { /* prepend it to the prototype */ newstr = stringJoin(prestring, str); sarrayAddString(saout, newstr, L_INSERT); FREE(str); } else sarrayAddString(saout, str, L_INSERT); } else FREE(str); sarrayDestroy(&satest); skipToEndOfFunction(sa, stop, charindex, &next); if (next == -1) break; } /* Flatten into a string with newlines between prototypes */ parsestr = sarrayToString(saout, 1); FREE(strdata); sarrayDestroy(&sa); sarrayDestroy(&saout); return parsestr; }
/* * cleanProtoSignature() * * Input: instr (input prototype string) * Return: cleanstr (clean prototype string), or NULL on error * * Notes: * (1) Adds 'extern' at beginning and regularizes spaces * between tokens. */ static char * cleanProtoSignature(char *instr) { char *str, *cleanstr; char buf[L_BUF_SIZE]; char externstring[] = "extern"; l_int32 i, j, nwords, nchars, index, len; SARRAY *sa, *saout; PROCNAME("cleanProtoSignature"); if (!instr) return (char *)ERROR_PTR("instr not defined", procName, NULL); sa = sarrayCreateWordsFromString(instr); nwords = sarrayGetCount(sa); saout = sarrayCreate(0); sarrayAddString(saout, externstring, 1); for (i = 0; i < nwords; i++) { str = sarrayGetString(sa, i, 0); nchars = strlen(str); index = 0; for (j = 0; j < nchars; j++) { if (index > L_BUF_SIZE - 6) return (char *)ERROR_PTR("token too large", procName, NULL); if (str[j] == '(') { buf[index++] = ' '; buf[index++] = '('; buf[index++] = ' '; } else if (str[j] == ')') { buf[index++] = ' '; buf[index++] = ')'; } else buf[index++] = str[j]; } buf[index] = '\0'; sarrayAddString(saout, buf, 1); } /* Flatten to a prototype string with spaces added after * each word, and remove the last space */ cleanstr = sarrayToString(saout, 2); len = strlen(cleanstr); cleanstr[len - 1] = '\0'; sarrayDestroy(&sa); sarrayDestroy(&saout); return cleanstr; }
/* * convertFilesToPS() * * Input: dirin (input directory) * substr (<optional> substring filter on filenames; can be NULL) * res (typ. 300 or 600 ppi) * fileout (output ps file) * Return: 0 if OK, 1 on error * * Notes: * (1) This generates a PS file for all image files in a specified * directory that contain the substr pattern to be matched. * (2) Each image is written to a separate page in the output PS file. * (3) All images are written compressed: * * if tiffg4 --> use ccittg4 * * if jpeg --> use dct * * all others --> use flate * If the image is jpeg or tiffg4, we use the existing compressed * strings for the encoding; otherwise, we read the image into * a pix and flate-encode the pieces. * (4) The resolution is often confusing. It is interpreted * as the resolution of the output display device: "If the * input image were digitized at 300 ppi, what would it * look like when displayed at res ppi." So, for example, * if res = 100 ppi, then the display pixels are 3x larger * than the 300 ppi pixels, and the image will be rendered * 3x larger. * (5) The size of the PostScript file is independent of the resolution, * because the entire file is encoded. The res parameter just * tells the PS decomposer how to render the page. Therefore, * for minimum file size without loss of visual information, * if the output res is less than 300, you should downscale * the image to the output resolution before wrapping in PS. * (6) The "canvas" on which the image is rendered, at the given * output resolution, is a standard page size (8.5 x 11 in). */ l_int32 convertFilesToPS(const char *dirin, const char *substr, l_int32 res, const char *fileout) { SARRAY *sa; PROCNAME("convertFilesToPS"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!fileout) return ERROR_INT("fileout not defined", procName, 1); if (res <= 0) { L_INFO("setting res to 300 ppi", procName); res = 300; } if (res < 10 || res > 4000) L_WARNING("res is typically in the range 300-600 ppi", procName); /* Get all filtered and sorted full pathnames. */ sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); /* Generate the PS file. */ sarrayConvertFilesToPS(sa, res, fileout); sarrayDestroy(&sa); return 0; }
/*! * jbCorrelation() * * Input: dirin (directory of input images) * thresh (typically ~0.8) * weight (typically ~0.6) * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * rootname (for output files) * firstpage (0-based) * npages (use 0 for all pages in dirin) * renderflag (1 to render from templates; 0 to skip) * Return: 0 if OK, 1 on error * * Notes: * (1) The images must be 1 bpp. If they are not, you can convert * them using convertFilesTo1bpp(). * (2) See prog/jbcorrelation for generating more output (e.g., * for debugging) */ l_int32 jbCorrelation(const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag) { char filename[L_BUF_SIZE]; l_int32 nfiles, i, numpages; JBDATA *data; JBCLASSER *classer; PIX *pix; PIXA *pixa; SARRAY *safiles; PROCNAME("jbCorrelation"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!rootname) return ERROR_INT("rootname not defined", procName, 1); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return ERROR_INT("components invalid", procName, 1); safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); /* Classify components */ classer = jbCorrelationInit(components, 0, 0, thresh, weight); jbAddPages(classer, safiles); /* Save data */ data = jbDataSave(classer); jbDataWrite(rootname, data); /* Optionally, render pages using class templates */ if (renderflag) { pixa = jbDataRender(data, FALSE); numpages = pixaGetCount(pixa); if (numpages != nfiles) fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n", numpages, nfiles); for (i = 0; i < numpages; i++) { pix = pixaGetPix(pixa, i, L_CLONE); snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pix, IFF_PNG); pixDestroy(&pix); } pixaDestroy(&pixa); } sarrayDestroy(&safiles); jbClasserDestroy(&classer); jbDataDestroy(&data); return 0; }
/* * captureProtoSignature() * * Input: sa (output from cpp, by line) * start (starting index to search; never a comment line) * stop (index of line on which pattern is completed) * charindex (char index of completing ')' character) * Return: cleanstr (prototype string), or NULL on error * * Notes: * (1) Return all characters, ending with a ';' after the ')' */ static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop, l_int32 charindex) { char *str, *newstr, *protostr, *cleanstr; SARRAY *sap; l_int32 i; PROCNAME("captureProtoSignature"); if (!sa) return (char *)ERROR_PTR("sa not defined", procName, NULL); sap = sarrayCreate(0); for (i = start; i < stop; i++) { str = sarrayGetString(sa, i, L_COPY); sarrayAddString(sap, str, L_INSERT); } str = sarrayGetString(sa, stop, L_COPY); str[charindex + 1] = '\0'; newstr = stringJoin(str, ";"); sarrayAddString(sap, newstr, L_INSERT); LEPT_FREE(str); protostr = sarrayToString(sap, 2); sarrayDestroy(&sap); cleanstr = cleanProtoSignature(protostr); LEPT_FREE(protostr); return cleanstr; }
/* * convertFilesFittedToPS() * * Input: dirin (input directory) * substr (<optional> substring filter on filenames; can be NULL) * xpts, ypts (desired size in printer points; use 0 for default) * fileout (output ps file) * Return: 0 if OK, 1 on error * * Notes: * (1) This generates a PS file for all files in a specified directory * that contain the substr pattern to be matched. * (2) Each image is written to a separate page in the output PS file. * (3) All images are written compressed: * * if tiffg4 --> use ccittg4 * * if jpeg --> use dct * * all others --> use flate * If the image is jpeg or tiffg4, we use the existing compressed * strings for the encoding; otherwise, we read the image into * a pix and flate-encode the pieces. * (4) The resolution is internally determined such that the images * are rendered, in at least one direction, at 100% of the given * size in printer points. Use 0.0 for xpts or ypts to get * the default value, which is 612.0 or 792.0, rsp. * (5) The size of the PostScript file is independent of the resolution, * because the entire file is encoded. The @xpts and @ypts * parameter tells the PS decomposer how to render the page. */ l_int32 convertFilesFittedToPS(const char *dirin, const char *substr, l_float32 xpts, l_float32 ypts, const char *fileout) { SARRAY *sa; PROCNAME("convertFilesFittedToPS"); if (!dirin) return ERROR_INT("dirin not defined", procName, 1); if (!fileout) return ERROR_INT("fileout not defined", procName, 1); if (xpts <= 0.0) { L_INFO("setting xpts to 612.0 ppi", procName); xpts = 612.0; } if (ypts <= 0.0) { L_INFO("setting ypts to 792.0 ppi", procName); ypts = 792.0; } if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) L_WARNING("xpts,ypts are typically in the range 500-800", procName); /* Get all filtered and sorted full pathnames. */ sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); /* Generate the PS file. */ sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout); sarrayDestroy(&sa); return 0; }
/*! * getSortedPathnamesInDirectory() * * Input: directory name * substr (<optional> substring filter on filenames; can be NULL) * firstpage (0-based) * npages (use 0 for all to the end) * Return: sarray of sorted pathnames, or NULL on error * * Notes: * (1) If 'substr' is not NULL, only filenames that contain * the substring can be returned. If 'substr' is NULL, * none of the filenames are filtered out. * (2) The files in the directory, after optional filtering by * the substring, are lexically sorted in increasing order. * The full pathnames are returned for the requested sequence. * If no files are found after filtering, returns an empty sarray. */ SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 firstpage, l_int32 npages) { char *fname, *fullname; l_int32 i, nfiles, lastpage; SARRAY *sa, *safiles, *saout; PROCNAME("getSortedPathnamesInDirectory"); if (!dirname) return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); if ((sa = getFilenamesInDirectory(dirname)) == NULL) return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); safiles = sarraySelectBySubstring(sa, substr); sarrayDestroy(&sa); nfiles = sarrayGetCount(safiles); if (nfiles == 0) { L_WARNING("no files found", procName); return safiles; } sarraySort(safiles, safiles, L_SORT_INCREASING); firstpage = L_MIN(L_MAX(firstpage, 0), nfiles - 1); if (npages == 0) npages = nfiles - firstpage; lastpage = L_MIN(firstpage + npages - 1, nfiles - 1); saout = sarrayCreate(lastpage - firstpage + 1); for (i = firstpage; i <= lastpage; i++) { fname = sarrayGetString(safiles, i, L_NOCOPY); fullname = genPathname(dirname, fname); sarrayAddString(saout, fullname, L_INSERT); } sarrayDestroy(&safiles); return saout; }
/*! * recogDestroy() * * Input: &recog (<will be set to null before returning>) * Return: void * * Notes: * (1) If a recog has a parent, the parent owns it. A recogDestroy() * will fail if there is a parent. */ void recogDestroy(L_RECOG **precog) { L_RECOG *recog; PROCNAME("recogDestroy"); if (!precog) { L_WARNING("ptr address is null\n", procName); return; } if ((recog = *precog) == NULL) return; if (recogGetParent(recog) != NULL) { L_ERROR("recog has parent; can't be destroyed\n", procName); return; } FREE(recog->bootdir); FREE(recog->bootpattern); FREE(recog->bootpath); FREE(recog->centtab); FREE(recog->sumtab); FREE(recog->fname); sarrayDestroy(&recog->sa_text); l_dnaDestroy(&recog->dna_tochar); pixaaDestroy(&recog->pixaa_u); pixaDestroy(&recog->pixa_u); ptaaDestroy(&recog->ptaa_u); ptaDestroy(&recog->pta_u); numaDestroy(&recog->nasum_u); numaaDestroy(&recog->naasum_u); pixaaDestroy(&recog->pixaa); pixaDestroy(&recog->pixa); ptaaDestroy(&recog->ptaa); ptaDestroy(&recog->pta); numaDestroy(&recog->nasum); numaaDestroy(&recog->naasum); pixaDestroy(&recog->pixa_tr); pixaDestroy(&recog->pixadb_ave); pixaDestroy(&recog->pixa_id); pixDestroy(&recog->pixdb_ave); pixDestroy(&recog->pixdb_range); pixaDestroy(&recog->pixadb_boot); pixaDestroy(&recog->pixadb_split); FREE(recog->fontdir); bmfDestroy(&recog->bmf); rchDestroy(&recog->rch); rchaDestroy(&recog->rcha); recogDestroyDid(recog); FREE(recog); *precog = NULL; return; }
/*! * \brief strcodeDestroy() * * \param[out] pstrcode &strcode is set to null after destroying the sarrays * \return void */ static void strcodeDestroy(L_STRCODE **pstrcode) { L_STRCODE *strcode; PROCNAME("strcodeDestroy"); if (pstrcode == NULL) { L_WARNING("ptr address is null!\n", procName); return; } if ((strcode = *pstrcode) == NULL) return; sarrayDestroy(&strcode->function); sarrayDestroy(&strcode->data); sarrayDestroy(&strcode->descr); LEPT_FREE(strcode); *pstrcode = NULL; return; }
/*! * \brief l_getIndexFromFile() * * \param[in] filename * \param[out] pindex found index * \return 0 if found, 1 on error. */ static l_int32 l_getIndexFromFile(const char *filename, l_int32 *pindex) { char buf[256]; char *word; FILE *fp; l_int32 notfound, format; SARRAY *sa; PROCNAME("l_getIndexFromFile"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = 0; if (!filename) return ERROR_INT("filename not defined", procName, 1); /* Open the stream, read lines until you find one with more * than a newline, and grab the first word. */ if ((fp = fopenReadStream(filename)) == NULL) return ERROR_INT("stream not opened", procName, 1); do { if ((fgets(buf, sizeof(buf), fp)) == NULL) { fclose(fp); return ERROR_INT("fgets read fail", procName, 1); } } while (buf[0] == '\n'); fclose(fp); sa = sarrayCreateWordsFromString(buf); word = sarrayGetString(sa, 0, L_NOCOPY); /* Find the index associated with the word. If it is not * found, test to see if the file is a compressed pix. */ notfound = l_getIndexFromStructname(word, pindex); sarrayDestroy(&sa); if (notfound) { /* maybe a Pix */ if (findFileFormat(filename, &format) == 0) { l_getIndexFromStructname("Pix", pindex); } else { return ERROR_INT("no file type identified", procName, 1); } } return 0; }
/*! * \brief strcodeCreateFromFile() * * \param[in] filein containing filenames of serialized data * \param[in] fileno integer that labels the two output files * \param[in] outdir [optional] if null, files are made in /tmp/lept/auto * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) The %filein has one filename on each line. * Comment lines begin with "#". * (2) The output is 2 files: * autogen.\<fileno\>.c * autogen.\<fileno\>.h * </pre> */ l_int32 strcodeCreateFromFile(const char *filein, l_int32 fileno, const char *outdir) { char *fname; const char *type; l_uint8 *data; size_t nbytes; l_int32 i, n, index; SARRAY *sa; L_STRCODE *strcode; PROCNAME("strcodeCreateFromFile"); if (!filein) return ERROR_INT("filein not defined", procName, 1); if ((data = l_binaryRead(filein, &nbytes)) == NULL) return ERROR_INT("data not read from file", procName, 1); sa = sarrayCreateLinesFromString((char *)data, 0); LEPT_FREE(data); if (!sa) return ERROR_INT("sa not made", procName, 1); if ((n = sarrayGetCount(sa)) == 0) { sarrayDestroy(&sa); return ERROR_INT("no filenames in the file", procName, 1); } strcode = strcodeCreate(fileno); for (i = 0; i < n; i++) { fname = sarrayGetString(sa, i, L_NOCOPY); if (fname[0] == '#') continue; if (l_getIndexFromFile(fname, &index)) { L_ERROR("File %s has no recognizable type\n", procName, fname); } else { type = l_assoc[index].type; L_INFO("File %s is type %s\n", procName, fname, type); strcodeGenerate(strcode, fname, type); } } strcodeFinalize(&strcode, outdir); return 0; }
/*! * \brief recogDestroy() * * \param[in,out] precog will be set to null before returning * \return void */ void recogDestroy(L_RECOG **precog) { L_RECOG *recog; PROCNAME("recogDestroy"); if (!precog) { L_WARNING("ptr address is null\n", procName); return; } if ((recog = *precog) == NULL) return; LEPT_FREE(recog->centtab); LEPT_FREE(recog->sumtab); sarrayDestroy(&recog->sa_text); l_dnaDestroy(&recog->dna_tochar); pixaaDestroy(&recog->pixaa_u); pixaDestroy(&recog->pixa_u); ptaaDestroy(&recog->ptaa_u); ptaDestroy(&recog->pta_u); numaDestroy(&recog->nasum_u); numaaDestroy(&recog->naasum_u); pixaaDestroy(&recog->pixaa); pixaDestroy(&recog->pixa); ptaaDestroy(&recog->ptaa); ptaDestroy(&recog->pta); numaDestroy(&recog->nasum); numaaDestroy(&recog->naasum); pixaDestroy(&recog->pixa_tr); pixaDestroy(&recog->pixadb_ave); pixaDestroy(&recog->pixa_id); pixDestroy(&recog->pixdb_ave); pixDestroy(&recog->pixdb_range); pixaDestroy(&recog->pixadb_boot); pixaDestroy(&recog->pixadb_split); bmfDestroy(&recog->bmf); rchDestroy(&recog->rch); rchaDestroy(&recog->rcha); recogDestroyDid(recog); LEPT_FREE(recog); *precog = NULL; return; }
/*! * pixaReadFiles() * * Input: dirname * substr (<optional> substring filter on filenames; can be null) * Return: pixa, or null on error * * Notes: * (1) @dirname is the full path for the directory. * (2) @substr is the part of the file name (excluding * the directory) that is to be matched. All matching * filenames are read into the Pixa. If substr is NULL, * all filenames are read into the Pixa. */ PIXA * pixaReadFiles(const char *dirname, const char *substr) { PIXA *pixa; SARRAY *sa; PROCNAME("pixaReadFiles"); if (!dirname) return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL); if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) return (PIXA *)ERROR_PTR("sa not made", procName, NULL); pixa = pixaReadFilesSA(sa); sarrayDestroy(&sa); return pixa; }
/*! * \brief sarrayUnionByAset() * * \param[in] sa1, sa2 * \return sad with the union of the string set, or NULL on error * * <pre> * Notes: * (1) Duplicates are removed from the concatenation of the two arrays. * (2) The key for each string is a 64-bit hash. * (2) Algorithm: Concatenate the two sarrays. Then build a set, * using hashed strings as keys. As the set is built, first do * a find; if not found, add the key to the set and add the string * to the output sarray. This is O(nlogn). * </pre> */ SARRAY * sarrayUnionByAset(SARRAY *sa1, SARRAY *sa2) { SARRAY *sa3, *sad; PROCNAME("sarrayUnionByAset"); if (!sa1) return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL); if (!sa2) return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL); /* Join */ sa3 = sarrayCopy(sa1); sarrayJoin(sa3, sa2); /* Eliminate duplicates */ sad = sarrayRemoveDupsByAset(sa3); sarrayDestroy(&sa3); return sad; }
SARRAY * getFilenamesInDirectory(const char *dirname) { SARRAY *safiles; WIN32_FIND_DATAA ffd; size_t length_of_path; CHAR szDir[MAX_PATH]; /* MAX_PATH is defined in stdlib.h */ HANDLE hFind = INVALID_HANDLE_VALUE; PROCNAME("getFilenamesInDirectory"); if (!dirname) return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); length_of_path = strlen(dirname); if (length_of_path > (MAX_PATH - 2)) return (SARRAY *)ERROR_PTR("dirname is to long", procName, NULL); strncpy(szDir, dirname, MAX_PATH); szDir[MAX_PATH - 1] = '\0'; strncat(szDir, TEXT("\\*"), MAX_PATH - strlen(szDir)); if ((safiles = sarrayCreate(0)) == NULL) return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL); hFind = FindFirstFileA(szDir, &ffd); if (INVALID_HANDLE_VALUE == hFind) { sarrayDestroy(&safiles); return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL); } while (FindNextFileA(hFind, &ffd) != 0) { if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) /* skip dirs */ continue; sarrayAddString(safiles, ffd.cFileName, L_COPY); } FindClose(hFind); return safiles; }
PIXA *MakeBootnum2(void) { char *fname; l_int32 i, n, w, h; BOX *box; PIX *pix; PIXA *pixa; L_RECOG *recog; SARRAY *sa; /* Phase 1: generate recog from the digit data */ recog = recogCreate(20, 32, L_USE_ALL, 120, 1); sa = getSortedPathnamesInDirectory("recog/bootnums", "png", 0, 0); n = sarrayGetCount(sa); for (i = 0; i < n; i++) { /* Read each pix: grayscale, multi-character, labelled */ fname = sarrayGetString(sa, i, L_NOCOPY); if ((pix = pixRead(fname)) == NULL) { fprintf(stderr, "Can't read %s\n", fname); continue; } /* Convert to a set of 1 bpp, single character, labelled */ pixGetDimensions(pix, &w, &h, NULL); box = boxCreate(0, 0, w, h); recogTrainLabelled(recog, pix, box, NULL, 1, 0); pixDestroy(&pix); boxDestroy(&box); } recogTrainingFinished(recog, 1); sarrayDestroy(&sa); /* Phase 2: generate pixa consisting of 1 bpp, single character pix */ recogWritePixa("/tmp/lept/recog/digits/bootnum2.pa", recog); pixa = pixaRead("/tmp/lept/recog/digits/bootnum2.pa"); recogDestroy(&recog); return pixa; }
/*! * numaCreateFromString() * * Input: string (of comma-separated numbers) * Return: na, or null on error * * Notes: * (1) The numbers can be ints or floats; they will be interpreted * and stored as floats. To use them as integers (e.g., for * indexing into arrays), use numaGetIValue(...). */ NUMA * numaCreateFromString(const char *str) { char *substr; l_int32 i, n, nerrors; l_float32 val; NUMA *na; SARRAY *sa; PROCNAME("numaCreateFromString"); if (!str || (strlen(str) == 0)) return (NUMA *)ERROR_PTR("str not defined or empty", procName, NULL); sa = sarrayCreate(0); sarraySplitString(sa, str, ","); n = sarrayGetCount(sa); na = numaCreate(n); nerrors = 0; for (i = 0; i < n; i++) { substr = sarrayGetString(sa, i, L_NOCOPY); if (sscanf(substr, "%f", &val) != 1) { L_ERROR("substr %d not float\n", procName, i); nerrors++; } else { numaAddNumber(na, val); } } sarrayDestroy(&sa); if (nerrors > 0) { numaDestroy(&na); return (NUMA *)ERROR_PTR("non-floats in string", procName, NULL); } return na; }
/*! * \brief l_genDataString() * * \param[in] filein input file of serialized data * \param[in] ifunc index into set of functions in output file * \return encoded ascii data string, or NULL on error reading from file */ static char * l_genDataString(const char *filein, l_int32 ifunc) { char buf[80]; char *cdata1, *cdata2, *cdata3; l_uint8 *data1, *data2; l_int32 csize1, csize2; size_t size1, size2; SARRAY *sa; PROCNAME("l_genDataString"); if (!filein) return (char *)ERROR_PTR("filein not defined", procName, NULL); /* Read it in, gzip it, encode, and reformat. We gzip because some * serialized data has a significant amount of ascii content. */ if ((data1 = l_binaryRead(filein, &size1)) == NULL) return (char *)ERROR_PTR("bindata not returned", procName, NULL); data2 = zlibCompress(data1, size1, &size2); cdata1 = encodeBase64(data2, size2, &csize1); cdata2 = reformatPacked64(cdata1, csize1, 4, 72, 1, &csize2); LEPT_FREE(data1); LEPT_FREE(data2); LEPT_FREE(cdata1); /* Prepend the string declaration signature and put it together */ sa = sarrayCreate(3); snprintf(buf, sizeof(buf), "static const char *l_strdata_%d =\n", ifunc); sarrayAddString(sa, buf, L_COPY); sarrayAddString(sa, cdata2, L_INSERT); sarrayAddString(sa, (char *)";\n", L_COPY); cdata3 = sarrayToString(sa, 0); sarrayDestroy(&sa); return cdata3; }
l_int32 main(int argc, char **argv) { char *boxatxt; l_int32 i; BOXA *boxa1, *boxa2, *boxa3; BOXAA *baa, *baa1; NUMAA *naa1; PIX *pixdb, *pix1, *pix2, *pix3, *pix4; PIXA *pixa1, *pixa2, *pixa3, *pixat; L_RECOG *recog; L_RECOGA *recoga; SARRAY *sa1; /* ----- Example identifying samples using training data ----- */ #if 1 /* Read the training data */ pixat = pixaRead("recog/sets/train06.pa"); recog = recogCreateFromPixa(pixat, 0, 0, L_USE_ALL, 128, 1); recoga = recogaCreateFromRecog(recog); pixaDestroy(&pixat); /* Read the data from all samples */ pix1 = pixRead("recog/sets/samples06.png"); boxatxt = pixGetText(pix1); boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt)); pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL); pixDestroy(&pix1); /* destroys boxa1 */ /* Identify components in the sample data */ pixa2 = pixaCreate(0); pixa3 = pixaCreate(0); for (i = 0; i < 9; i++) { /* if (i != 4) continue; */ /* dots form separate boxa */ /* if (i != 8) continue; */ /* broken 2 in '24' */ pix1 = pixaGetPix(pixa1, i, L_CLONE); /* Show the 2d box data in the sample */ boxa2 = pixConnComp(pix1, NULL, 8); baa = boxaSort2d(boxa2, NULL, 6, 6, 5); pix2 = boxaaDisplay(baa, 3, 1, 0xff000000, 0x00ff0000, 0, 0); pixaAddPix(pixa3, pix2, L_INSERT); boxaaDestroy(&baa); boxaDestroy(&boxa2); /* Get the numbers in the sample */ recogaIdentifyMultiple(recoga, pix1, 0, 5, 3, &boxa3, NULL, &pixdb, 0); sa1 = recogaExtractNumbers(recoga, boxa3, 0.7, -1, &baa1, &naa1); sarrayWriteStream(stderr, sa1); boxaaWriteStream(stderr, baa1); numaaWriteStream(stderr, naa1); pixaAddPix(pixa2, pixdb, L_INSERT); /* pixaWrite("/tmp/pixa.pa", pixa2); */ pixDestroy(&pix1); boxaDestroy(&boxa3); boxaaDestroy(&baa1); numaaDestroy(&naa1); sarrayDestroy(&sa1); } pix3 = pixaDisplayLinearly(pixa2, L_VERT, 1.0, 0, 20, 1, NULL); pixWrite("/tmp/pix3.png", pix3, IFF_PNG); pix4 = pixaDisplayTiledInRows(pixa3, 32, 1500, 1.0, 0, 20, 2); pixDisplay(pix4, 500, 0); pixWrite("/tmp/pix4.png", pix4, IFF_PNG); pixaDestroy(&pixa2); pixaDestroy(&pixa3); pixDestroy(&pix1); pixDestroy(&pix3); pixDestroy(&pix4); pixaDestroy(&pixa1); boxaDestroy(&boxa1); recogaDestroy(&recoga); #endif return 0; }
/*! * kernelCreateFromFile() * * Input: filename * Return: kernel, or null on error * * Notes: * (1) The file contains, in the following order: * - Any number of comment lines starting with '#' are ignored * - The height and width of the kernel * - The y and x values of the kernel origin * - The kernel data, formatted as lines of numbers (integers * or floats) for the kernel values in row-major order, * and with no other punctuation. * (Note: this differs from kernelCreateFromString(), * where each line must begin and end with a double-quote * to tell the compiler it's part of a string.) * - The kernel specification ends when a blank line, * a comment line, or the end of file is reached. * (2) All lines must be left-justified. * (3) See kernelCreateFromString() for a description of the string * format for the kernel data. As an example, here are the lines * of a valid kernel description file In the file, all lines * are left-justified: * # small 3x3 kernel * 3 3 * 1 1 * 25.5 51 24.3 * 70.2 146.3 73.4 * 20 50.9 18.4 */ L_KERNEL * kernelCreateFromFile(const char *filename) { char *filestr, *line; l_int32 nlines, i, j, first, index, w, h, cx, cy, n; l_float32 val; size_t size; NUMA *na, *nat; SARRAY *sa; L_KERNEL *kel; PROCNAME("kernelCreateFromFile"); if (!filename) return (L_KERNEL *)ERROR_PTR("filename not defined", procName, NULL); filestr = (char *)l_binaryRead(filename, &size); sa = sarrayCreateLinesFromString(filestr, 1); FREE(filestr); nlines = sarrayGetCount(sa); /* Find the first data line. */ for (i = 0; i < nlines; i++) { line = sarrayGetString(sa, i, L_NOCOPY); if (line[0] != '#') { first = i; break; } } /* Find the kernel dimensions and origin location. */ line = sarrayGetString(sa, first, L_NOCOPY); if (sscanf(line, "%d %d", &h, &w) != 2) return (L_KERNEL *)ERROR_PTR("error reading h,w", procName, NULL); line = sarrayGetString(sa, first + 1, L_NOCOPY); if (sscanf(line, "%d %d", &cy, &cx) != 2) return (L_KERNEL *)ERROR_PTR("error reading cy,cx", procName, NULL); /* Extract the data. This ends when we reach eof, or when we * encounter a line of data that is either a null string or * contains just a newline. */ na = numaCreate(0); for (i = first + 2; i < nlines; i++) { line = sarrayGetString(sa, i, L_NOCOPY); if (line[0] == '\0' || line[0] == '\n' || line[0] == '#') break; nat = parseStringForNumbers(line, " \t\n"); numaJoin(na, nat, 0, -1); numaDestroy(&nat); } sarrayDestroy(&sa); n = numaGetCount(na); if (n != w * h) { numaDestroy(&na); fprintf(stderr, "w = %d, h = %d, num ints = %d\n", w, h, n); return (L_KERNEL *)ERROR_PTR("invalid integer data", procName, NULL); } kel = kernelCreate(h, w); kernelSetOrigin(kel, cy, cx); index = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { numaGetFValue(na, index, &val); kernelSetElement(kel, i, j, val); index++; } } numaDestroy(&na); return kel; }
int main(int argc, char **argv) { char *filein, *str, *prestring, *outprotos, *protostr; const char *spacestr = " "; char buf[L_BUF_SIZE]; l_uint8 *allheaders; l_int32 i, maxindex, in_line, nflags, protos_added, firstfile, len, ret; size_t nbytes; L_BYTEA *ba, *ba2; SARRAY *sa, *safirst; static char mainName[] = "xtractprotos"; if (argc == 1) { fprintf(stderr, "xtractprotos [-prestring=<string>] [-protos=<where>] " "[list of C files]\n" "where the prestring is prepended to each prototype, and \n" "protos can be either 'inline' or the name of an output " "prototype file\n"); return 1; } /* ---------------------------------------------------------------- */ /* Parse input flags and find prestring and outprotos, if requested */ /* ---------------------------------------------------------------- */ prestring = outprotos = NULL; in_line = FALSE; nflags = 0; maxindex = L_MIN(3, argc); for (i = 1; i < maxindex; i++) { if (argv[i][0] == '-') { if (!strncmp(argv[i], "-prestring", 10)) { nflags++; ret = sscanf(argv[i] + 1, "prestring=%s", buf); if (ret != 1) { fprintf(stderr, "parse failure for prestring\n"); return 1; } if ((len = strlen(buf)) > L_BUF_SIZE - 3) { L_WARNING("prestring too large; omitting!\n", mainName); } else { buf[len] = ' '; buf[len + 1] = '\0'; prestring = stringNew(buf); } } else if (!strncmp(argv[i], "-protos", 7)) { nflags++; ret = sscanf(argv[i] + 1, "protos=%s", buf); if (ret != 1) { fprintf(stderr, "parse failure for protos\n"); return 1; } outprotos = stringNew(buf); if (!strncmp(outprotos, "inline", 7)) in_line = TRUE; } } } if (argc - nflags < 2) { fprintf(stderr, "no files specified!\n"); return 1; } /* ---------------------------------------------------------------- */ /* Generate the prototype string */ /* ---------------------------------------------------------------- */ ba = l_byteaCreate(500); /* First the extern C head */ sa = sarrayCreate(0); sarrayAddString(sa, (char *)"/*", 1); snprintf(buf, L_BUF_SIZE, " * These prototypes were autogen'd by xtractprotos, v. %s", version); sarrayAddString(sa, buf, 1); sarrayAddString(sa, (char *)" */", 1); sarrayAddString(sa, (char *)"#ifdef __cplusplus", 1); sarrayAddString(sa, (char *)"extern \"C\" {", 1); sarrayAddString(sa, (char *)"#endif /* __cplusplus */\n", 1); str = sarrayToString(sa, 1); l_byteaAppendString(ba, str); lept_free(str); sarrayDestroy(&sa); /* Then the prototypes */ firstfile = 1 + nflags; protos_added = FALSE; for (i = firstfile; i < argc; i++) { filein = argv[i]; len = strlen(filein); if (filein[len - 1] == 'h') /* skip .h files */ continue; snprintf(buf, L_BUF_SIZE, "cpp -ansi -DNO_PROTOS %s %s", filein, tempfile); ret = system(buf); if (ret) { fprintf(stderr, "cpp failure for %s; continuing\n", filein); continue; } if ((str = parseForProtos(tempfile, prestring)) == NULL) { fprintf(stderr, "parse failure for %s; continuing\n", filein); continue; } if (strlen(str) > 1) { /* strlen(str) == 1 is a file without protos */ l_byteaAppendString(ba, str); protos_added = TRUE; } lept_free(str); } /* Lastly the extern C tail */ sa = sarrayCreate(0); sarrayAddString(sa, (char *)"\n#ifdef __cplusplus", 1); sarrayAddString(sa, (char *)"}", 1); sarrayAddString(sa, (char *)"#endif /* __cplusplus */", 1); str = sarrayToString(sa, 1); l_byteaAppendString(ba, str); lept_free(str); sarrayDestroy(&sa); protostr = (char *)l_byteaCopyData(ba, &nbytes); l_byteaDestroy(&ba); /* ---------------------------------------------------------------- */ /* Generate the output */ /* ---------------------------------------------------------------- */ if (!outprotos) { /* just write to stdout */ fprintf(stderr, "%s\n", protostr); lept_free(protostr); return 0; } /* If no protos were found, do nothing further */ if (!protos_added) { fprintf(stderr, "No protos found\n"); lept_free(protostr); return 1; } /* Make the output files */ ba = l_byteaInitFromFile("allheaders_top.txt"); if (!in_line) { snprintf(buf, sizeof(buf), "#include \"%s\"\n", outprotos); l_byteaAppendString(ba, buf); l_binaryWrite(outprotos, "w", protostr, nbytes); } else { l_byteaAppendString(ba, protostr); } ba2 = l_byteaInitFromFile("allheaders_bot.txt"); l_byteaJoin(ba, &ba2); l_byteaWrite("allheaders.h", ba, 0, 0); l_byteaDestroy(&ba); lept_free(protostr); return 0; }
int main(int argc, char **argv) { l_int32 ignore; size_t nbytesin, nbytesout; char *infile, *instring, *outstring; SARRAY *sa1, *sa2, *sa3, *sa4, *sa5; char buf[256]; static char mainName[] = "string_reg"; if (argc != 2) return ERROR_INT(" Syntax: string_reg infile", mainName, 1); infile = argv[1]; instring = (char *)l_binaryRead(infile, &nbytesin); if (!instring) return ERROR_INT("file not read", mainName, 1); sa1 = sarrayCreateWordsFromString(instring); sa2 = sarrayCreateLinesFromString(instring, 0); sa3 = sarrayCreateLinesFromString(instring, 1); outstring = sarrayToString(sa1, 0); nbytesout = strlen(outstring); l_binaryWrite("/tmp/junk1.txt", "w", outstring, nbytesout); lept_free(outstring); outstring = sarrayToString(sa1, 1); nbytesout = strlen(outstring); l_binaryWrite("/tmp/junk2.txt", "w", outstring, nbytesout); lept_free(outstring); outstring = sarrayToString(sa2, 0); nbytesout = strlen(outstring); l_binaryWrite("/tmp/junk3.txt", "w", outstring, nbytesout); lept_free(outstring); outstring = sarrayToString(sa2, 1); nbytesout = strlen(outstring); l_binaryWrite("/tmp/junk4.txt", "w", outstring, nbytesout); lept_free(outstring); outstring = sarrayToString(sa3, 0); nbytesout = strlen(outstring); l_binaryWrite("/tmp/junk5.txt", "w", outstring, nbytesout); lept_free(outstring); outstring = sarrayToString(sa3, 1); nbytesout = strlen(outstring); l_binaryWrite("/tmp/junk6.txt", "w", outstring, nbytesout); lept_free(outstring); sprintf(buf, "diff -s /tmp/junk6.txt %s", infile); ignore = system(buf); /* write/read/write; compare /tmp/junkout5 with /tmp/junkout6 */ sarrayWrite("/tmp/junk7.txt", sa2); sarrayWrite("/tmp/junk8.txt", sa3); sa4 = sarrayRead("/tmp/junk8.txt"); sarrayWrite("/tmp/junk9.txt", sa4); sa5 = sarrayRead("/tmp/junk9.txt"); ignore = system("diff -s /tmp/junk8.txt /tmp/junk9.txt"); sarrayDestroy(&sa1); sarrayDestroy(&sa2); sarrayDestroy(&sa3); sarrayDestroy(&sa4); sarrayDestroy(&sa5); lept_free(instring); return 0; }
/*! * regTestCompareFiles() * * Input: rp (regtest parameters) * index1 (of one output file from reg test) * index2 (of another output file from reg test) * Return: 0 if OK, 1 on error (a failure in comparison is not an error) * * Notes: * (1) This only does something in "compare" mode. * (2) The canonical format of the golden filenames is: * /tmp/golden/<root of main name>_golden.<index>.<ext of localname> * e.g., * /tmp/golden/maze_golden.0.png */ l_int32 regTestCompareFiles(L_REGPARAMS *rp, l_int32 index1, l_int32 index2) { char *name1, *name2; char namebuf[256]; l_int32 same; SARRAY *sa; PROCNAME("regTestCompareFiles"); if (!rp) return ERROR_INT("rp not defined", procName, 1); if (index1 < 0 || index2 < 0) { rp->success = FALSE; return ERROR_INT("index1 and/or index2 is negative", procName, 1); } if (index1 == index2) { rp->success = FALSE; return ERROR_INT("index1 must differ from index2", procName, 1); } rp->index++; if (rp->mode != L_REG_COMPARE) return 0; /* Generate the golden file names */ snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d.", rp->testname, index1); sa = getSortedPathnamesInDirectory("/tmp/golden", namebuf, 0, 0); if (sarrayGetCount(sa) != 1) { sarrayDestroy(&sa); rp->success = FALSE; L_ERROR("golden file %s not found\n", procName, namebuf); return 1; } name1 = sarrayGetString(sa, 0, L_COPY); sarrayDestroy(&sa); snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d.", rp->testname, index2); sa = getSortedPathnamesInDirectory("/tmp/golden", namebuf, 0, 0); if (sarrayGetCount(sa) != 1) { sarrayDestroy(&sa); rp->success = FALSE; FREE(name1); L_ERROR("golden file %s not found\n", procName, namebuf); return 1; } name2 = sarrayGetString(sa, 0, L_COPY); sarrayDestroy(&sa); /* Test and record on failure */ filesAreIdentical(name1, name2, &same); if (!same) { fprintf(rp->fp, "Failure in %s_reg, index %d: comparing %s with %s\n", rp->testname, rp->index, name1, name2); fprintf(stderr, "Failure in %s_reg, index %d: comparing %s with %s\n", rp->testname, rp->index, name1, name2); rp->success = FALSE; } FREE(name1); FREE(name2); return 0; }
int main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 i, j, w, h, firstpage, npages, nfiles, ncomp; l_int32 index, ival, rval, gval, bval; BOX *box; BOXA *boxa; BOXAA *baa; JBDATA *data; JBCLASSER *classer; NUMA *nai; NUMAA *naa; SARRAY *safiles; PIX *pixs, *pixt1, *pixt2, *pixd; PIXCMAP *cmap; static char mainName[] = "wordsinorder"; if (argc != 3 && argc != 5) return ERROR_INT( " Syntax: wordsinorder dirin rootname [firstpage, npages]", mainName, 1); dirin = argv[1]; rootname = argv[2]; if (argc == 3) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[3]); npages = atoi(argv[4]); } /* Compute the word bounding boxes at 2x reduction, along with * the textlines that they are in. */ safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); baa = boxaaCreate(nfiles); naa = numaaCreate(nfiles); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, 0); if ((pixs = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", mainName, i); continue; } pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT, MAX_WORD_WIDTH, MAX_WORD_HEIGHT, &boxa, &nai); boxaaAddBoxa(baa, boxa, L_INSERT); numaaAddNuma(naa, nai, L_INSERT); #if RENDER_PAGES /* Show the results on a 2x reduced image, where each * word is outlined and the color of the box depends on the * computed textline. */ pixt1 = pixReduceRankBinary2(pixs, 2, NULL); pixGetDimensions(pixt1, &w, &h, NULL); pixd = pixCreate(w, h, 8); cmap = pixcmapCreateRandom(8, 1, 1); /* first color is black */ pixSetColormap(pixd, cmap); pixt2 = pixUnpackBinary(pixt1, 8, 1); pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0); ncomp = boxaGetCount(boxa); for (j = 0; j < ncomp; j++) { box = boxaGetBox(boxa, j, L_CLONE); numaGetIValue(nai, j, &ival); index = 1 + (ival % 254); /* omit black and white */ pixcmapGetColor(cmap, index, &rval, &gval, &bval); pixRenderBoxArb(pixd, box, 2, rval, gval, bval); boxDestroy(&box); } snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pixd, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixs); pixDestroy(&pixd); #endif /* RENDER_PAGES */ } boxaaDestroy(&baa); numaaDestroy(&naa); sarrayDestroy(&safiles); return 0; }
/*! * convertFilesTo1bpp() * * Input: dirin * substr (<optional> substring filter on filenames; can be NULL) * upscaling (1, 2 or 4; only for input color or grayscale) * thresh (global threshold for binarization; use 0 for default) * firstpage * npages (use 0 to do all from @firstpage to the end) * dirout * outformat (IFF_PNG, IFF_TIFF_G4) * Return: 0 if OK, 1 on error * * Notes: * (1) Images are sorted lexicographically, and the names in the * output directory are retained except for the extension. */ l_int32 convertFilesTo1bpp(const char *dirin, const char *substr, l_int32 upscaling, l_int32 thresh, l_int32 firstpage, l_int32 npages, const char *dirout, l_int32 outformat) { l_int32 i, nfiles; char buf[512]; char *fname, *tail, *basename; PIX *pixs, *pixg1, *pixg2, *pixb; SARRAY *safiles; PROCNAME("convertFilesTo1bpp"); if (!dirin) return ERROR_INT("dirin", procName, 1); if (!dirout) return ERROR_INT("dirout", procName, 1); if (upscaling != 1 && upscaling != 2 && upscaling != 4) return ERROR_INT("invalid upscaling factor", procName, 1); if (thresh <= 0) thresh = 180; if (firstpage < 0) firstpage = 0; if (npages < 0) npages = 0; if (outformat != IFF_TIFF_G4) outformat = IFF_PNG; safiles = getSortedPathnamesInDirectory(dirin, substr, firstpage, npages); if (!safiles) return ERROR_INT("safiles not made", procName, 1); if ((nfiles = sarrayGetCount(safiles)) == 0) { sarrayDestroy(&safiles); return ERROR_INT("no matching files in the directory", procName, 1); } for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, L_NOCOPY); if ((pixs = pixRead(fname)) == NULL) { L_WARNING("Couldn't read file %s\n", procName, fname); continue; } if (pixGetDepth(pixs) == 32) pixg1 = pixConvertRGBToLuminance(pixs); else pixg1 = pixClone(pixs); pixg2 = pixRemoveColormap(pixg1, REMOVE_CMAP_TO_GRAYSCALE); if (pixGetDepth(pixg2) == 1) { pixb = pixClone(pixg2); } else { if (upscaling == 1) pixb = pixThresholdToBinary(pixg2, thresh); else if (upscaling == 2) pixb = pixScaleGray2xLIThresh(pixg2, thresh); else /* upscaling == 4 */ pixb = pixScaleGray4xLIThresh(pixg2, thresh); } pixDestroy(&pixs); pixDestroy(&pixg1); pixDestroy(&pixg2); splitPathAtDirectory(fname, NULL, &tail); splitPathAtExtension(tail, &basename, NULL); if (outformat == IFF_TIFF_G4) { snprintf(buf, sizeof(buf), "%s/%s.tif", dirout, basename); pixWrite(buf, pixb, IFF_TIFF_G4); } else { snprintf(buf, sizeof(buf), "%s/%s.png", dirout, basename); pixWrite(buf, pixb, IFF_PNG); } pixDestroy(&pixb); FREE(tail); FREE(basename); } sarrayDestroy(&safiles); return 0; }
/*! * \brief recogReadStream() * * \param[in] fp file stream * \return recog, or NULL on error */ L_RECOG * recogReadStream(FILE *fp) { l_int32 version, setsize, threshold, scalew, scaleh, linew; l_int32 maxyshift, nc; L_DNA *dna_tochar; PIXAA *paa; L_RECOG *recog; SARRAY *sa_text; PROCNAME("recogReadStream"); if (!fp) return (L_RECOG *)ERROR_PTR("stream not defined", procName, NULL); if (fscanf(fp, "\nRecog Version %d\n", &version) != 1) return (L_RECOG *)ERROR_PTR("not a recog file", procName, NULL); if (version != RECOG_VERSION_NUMBER) return (L_RECOG *)ERROR_PTR("invalid recog version", procName, NULL); if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1) return (L_RECOG *)ERROR_PTR("setsize not read", procName, NULL); if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1) return (L_RECOG *)ERROR_PTR("binary thresh not read", procName, NULL); if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1) return (L_RECOG *)ERROR_PTR("maxyshift not read", procName, NULL); if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1) return (L_RECOG *)ERROR_PTR("width not read", procName, NULL); if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1) return (L_RECOG *)ERROR_PTR("height not read", procName, NULL); if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1) return (L_RECOG *)ERROR_PTR("line width not read", procName, NULL); if ((recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift)) == NULL) return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); if (fscanf(fp, "\nLabels for character set:\n") != 0) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("label intro not read", procName, NULL); } l_dnaDestroy(&recog->dna_tochar); if ((dna_tochar = l_dnaReadStream(fp)) == NULL) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("dna_tochar not read", procName, NULL); } recog->dna_tochar = dna_tochar; sarrayDestroy(&recog->sa_text); if ((sa_text = sarrayReadStream(fp)) == NULL) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("sa_text not read", procName, NULL); } recog->sa_text = sa_text; if (fscanf(fp, "\nPixaa of all samples in the training set:\n") != 0) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("pixaa intro not read", procName, NULL); } if ((paa = pixaaReadStream(fp)) == NULL) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("pixaa not read", procName, NULL); } recog->setsize = setsize; nc = pixaaGetCount(paa, NULL); if (nc != setsize) { recogDestroy(&recog); pixaaDestroy(&paa); L_ERROR("(setsize = %d) != (paa count = %d)\n", procName, setsize, nc); return NULL; } recogAddAllSamples(&recog, paa, 0); /* this finishes */ pixaaDestroy(&paa); if (!recog) return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL); return recog; }
/*! * gplotRead() * * Input: filename * Return: gplot, or NULL on error */ GPLOT * gplotRead(const char *filename) { char buf[L_BUF_SIZE]; char *rootname, *title, *xlabel, *ylabel, *ignores; l_int32 outformat, ret, version, ignore; FILE *fp; GPLOT *gplot; PROCNAME("gplotRead"); if (!filename) return (GPLOT *)ERROR_PTR("filename not defined", procName, NULL); if ((fp = fopenReadStream(filename)) == NULL) return (GPLOT *)ERROR_PTR("stream not opened", procName, NULL); ret = fscanf(fp, "Gplot Version %d\n", &version); if (ret != 1) { fclose(fp); return (GPLOT *)ERROR_PTR("not a gplot file", procName, NULL); } if (version != GPLOT_VERSION_NUMBER) { fclose(fp); return (GPLOT *)ERROR_PTR("invalid gplot version", procName, NULL); } ignore = fscanf(fp, "Rootname: %s\n", buf); rootname = stringNew(buf); ignore = fscanf(fp, "Output format: %d\n", &outformat); ignores = fgets(buf, L_BUF_SIZE, fp); /* Title: ... */ title = stringNew(buf + 7); title[strlen(title) - 1] = '\0'; ignores = fgets(buf, L_BUF_SIZE, fp); /* X axis label: ... */ xlabel = stringNew(buf + 14); xlabel[strlen(xlabel) - 1] = '\0'; ignores = fgets(buf, L_BUF_SIZE, fp); /* Y axis label: ... */ ylabel = stringNew(buf + 14); ylabel[strlen(ylabel) - 1] = '\0'; if (!(gplot = gplotCreate(rootname, outformat, title, xlabel, ylabel))) { fclose(fp); return (GPLOT *)ERROR_PTR("gplot not made", procName, NULL); } FREE(rootname); FREE(title); FREE(xlabel); FREE(ylabel); sarrayDestroy(&gplot->cmddata); sarrayDestroy(&gplot->datanames); sarrayDestroy(&gplot->plotdata); sarrayDestroy(&gplot->plottitles); numaDestroy(&gplot->plotstyles); ignore = fscanf(fp, "Commandfile name: %s\n", buf); stringReplace(&gplot->cmdname, buf); ignore = fscanf(fp, "\nCommandfile data:"); gplot->cmddata = sarrayReadStream(fp); ignore = fscanf(fp, "\nDatafile names:"); gplot->datanames = sarrayReadStream(fp); ignore = fscanf(fp, "\nPlot data:"); gplot->plotdata = sarrayReadStream(fp); ignore = fscanf(fp, "\nPlot titles:"); gplot->plottitles = sarrayReadStream(fp); ignore = fscanf(fp, "\nPlot styles:"); gplot->plotstyles = numaReadStream(fp); ignore = fscanf(fp, "Number of plots: %d\n", &gplot->nplots); ignore = fscanf(fp, "Output file name: %s\n", buf); stringReplace(&gplot->outname, buf); ignore = fscanf(fp, "Axis scaling: %d\n", &gplot->scaling); fclose(fp); return gplot; }
l_int32 main(int argc, char **argv) { L_ASET *set; L_DNA *da1, *da2, *da3, *da4, *da5, *da6, *da7, *da8, *dav, *dac; L_DNAHASH *dahash; NUMA *nav, *nac; PTA *pta1, *pta2, *pta3; SARRAY *sa1, *sa2, *sa3, *sa4; lept_mkdir("lept/hash"); #if 1 /* Test string hashing with aset */ fprintf(stderr, "Set results with string hashing:\n"); sa1 = BuildShortStrings(3, 0); sa2 = BuildShortStrings(3, 1); fprintf(stderr, " size with unique strings: %d\n", sarrayGetCount(sa1)); fprintf(stderr, " size with dups: %d\n", sarrayGetCount(sa2)); startTimer(); set = l_asetCreateFromSarray(sa2); fprintf(stderr, " time to make set: %5.3f sec\n", stopTimer()); fprintf(stderr, " size of set without dups: %d\n", l_asetSize(set)); l_asetDestroy(&set); startTimer(); sa3 = sarrayRemoveDupsByAset(sa2); fprintf(stderr, " time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", sarrayGetCount(sa3)); startTimer(); sa4 = sarrayIntersectionByAset(sa1, sa2); fprintf(stderr, " time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", sarrayGetCount(sa4)); sarrayDestroy(&sa3); sarrayDestroy(&sa4); /* Test sarray set operations with dna hash. * We use the same hash function as is used with aset. */ fprintf(stderr, "\nDna hash results for sarray:\n"); fprintf(stderr, " size with unique strings: %d\n", sarrayGetCount(sa1)); fprintf(stderr, " size with dups: %d\n", sarrayGetCount(sa2)); startTimer(); dahash = l_dnaHashCreateFromSarray(sa2); fprintf(stderr, " time to make hashmap: %5.3f sec\n", stopTimer()); fprintf(stderr, " entries in hashmap with dups: %d\n", l_dnaHashGetTotalCount(dahash)); l_dnaHashDestroy(&dahash); startTimer(); sarrayRemoveDupsByHash(sa2, &sa3, NULL); fprintf(stderr, " time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", sarrayGetCount(sa3)); startTimer(); sa4 = sarrayIntersectionByHash(sa1, sa2); fprintf(stderr, " time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", sarrayGetCount(sa4)); sarrayDestroy(&sa3); sarrayDestroy(&sa4); sarrayDestroy(&sa1); sarrayDestroy(&sa2); #endif #if 1 /* Test point hashing with aset. * Enter all points within a 1500 x 1500 image in pta1, and include * 450,000 duplicates in pta2. With this pt hashing function, * there are no hash collisions among any of the 400 million pixel * locations in a 20000 x 20000 image. */ pta1 = BuildPointSet(1500, 1500, 0); pta2 = BuildPointSet(1500, 1500, 1); fprintf(stderr, "\nSet results for pta:\n"); fprintf(stderr, " pta1 size with unique points: %d\n", ptaGetCount(pta1)); fprintf(stderr, " pta2 size with dups: %d\n", ptaGetCount(pta2)); startTimer(); pta3 = ptaRemoveDupsByAset(pta2); fprintf(stderr, " Time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta3); startTimer(); pta3 = ptaIntersectionByAset(pta1, pta2); fprintf(stderr, " Time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta1); ptaDestroy(&pta2); ptaDestroy(&pta3); #endif #if 1 /* Test pta set operations with dna hash, using the same pt hashing * function. Although there are no collisions in 20K x 20K images, * the dna hash implementation works properly even if there are some. */ pta1 = BuildPointSet(1500, 1500, 0); pta2 = BuildPointSet(1500, 1500, 1); fprintf(stderr, "\nDna hash results for pta:\n"); fprintf(stderr, " pta1 size with unique points: %d\n", ptaGetCount(pta1)); fprintf(stderr, " pta2 size with dups: %d\n", ptaGetCount(pta2)); startTimer(); ptaRemoveDupsByHash(pta2, &pta3, NULL); fprintf(stderr, " Time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta3); startTimer(); pta3 = ptaIntersectionByHash(pta1, pta2); fprintf(stderr, " Time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta1); ptaDestroy(&pta2); ptaDestroy(&pta3); #endif /* Test dna set and histo operations using dna hash */ #if 1 fprintf(stderr, "\nDna hash results for dna:\n"); da1 = l_dnaMakeSequence(0.0, 0.125, 8000); da2 = l_dnaMakeSequence(300.0, 0.125, 8000); da3 = l_dnaMakeSequence(600.0, 0.125, 8000); da4 = l_dnaMakeSequence(900.0, 0.125, 8000); da5 = l_dnaMakeSequence(1200.0, 0.125, 8000); l_dnaJoin(da1, da2, 0, -1); l_dnaJoin(da1, da3, 0, -1); l_dnaJoin(da1, da4, 0, -1); l_dnaJoin(da1, da5, 0, -1); l_dnaRemoveDupsByHash(da1, &da6, &dahash); l_dnaHashDestroy(&dahash); fprintf(stderr, " dna size with dups = %d\n", l_dnaGetCount(da1)); fprintf(stderr, " dna size of unique numbers = %d\n", l_dnaGetCount(da6)); l_dnaMakeHistoByHash(da1, &dahash, &dav, &dac); nav = l_dnaConvertToNuma(dav); nac = l_dnaConvertToNuma(dac); fprintf(stderr, " dna number of histo points = %d\n", l_dnaGetCount(dac)); gplotSimpleXY1(nav, nac, GPLOT_IMPULSES, GPLOT_PNG, "/tmp/lept/hash/histo", "Histo"); da7 = l_dnaIntersectionByHash(da2, da3); fprintf(stderr, " dna number of points: da2 = %d, da3 = %d\n", l_dnaGetCount(da2), l_dnaGetCount(da3)); fprintf(stderr, " dna number of da2/da3 intersection points = %d\n", l_dnaGetCount(da7)); l_fileDisplay("/tmp/lept/hash/histo.png", 700, 100, 1.0); l_dnaDestroy(&da1); l_dnaDestroy(&da2); l_dnaDestroy(&da3); l_dnaDestroy(&da4); l_dnaDestroy(&da5); l_dnaDestroy(&da6); l_dnaDestroy(&da7); l_dnaDestroy(&dac); l_dnaDestroy(&dav); l_dnaHashDestroy(&dahash); numaDestroy(&nav); numaDestroy(&nac); #endif #if 1 da1 = l_dnaMakeSequence(0, 3, 10000); da2 = l_dnaMakeSequence(0, 5, 10000); da3 = l_dnaMakeSequence(0, 7, 10000); l_dnaJoin(da1, da2, 0, -1); l_dnaJoin(da1, da3, 0, -1); fprintf(stderr, "\nDna results using set:\n"); fprintf(stderr, " da1 count: %d\n", l_dnaGetCount(da1)); set = l_asetCreateFromDna(da1); fprintf(stderr, " da1 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da4 = l_dnaUnionByAset(da2, da3); fprintf(stderr, " da4 count: %d\n", l_dnaGetCount(da4)); set = l_asetCreateFromDna(da4); fprintf(stderr, " da4 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da5 = l_dnaIntersectionByAset(da1, da2); fprintf(stderr, " da5 count: %d\n", l_dnaGetCount(da5)); set = l_asetCreateFromDna(da5); fprintf(stderr, " da5 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da6 = l_dnaMakeSequence(100000, 11, 5000); l_dnaJoin(da6, da1, 0, -1); fprintf(stderr, " da6 count: %d\n", l_dnaGetCount(da6)); set = l_asetCreateFromDna(da6); fprintf(stderr, " da6 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da7 = l_dnaIntersectionByAset(da6, da3); fprintf(stderr, " da7 count: %d\n", l_dnaGetCount(da7)); set = l_asetCreateFromDna(da7); fprintf(stderr, " da7 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da8 = l_dnaRemoveDupsByAset(da1); fprintf(stderr, " da8 count: %d\n\n", l_dnaGetCount(da8)); l_dnaDestroy(&da1); l_dnaDestroy(&da2); l_dnaDestroy(&da3); l_dnaDestroy(&da4); l_dnaDestroy(&da5); l_dnaDestroy(&da6); l_dnaDestroy(&da7); l_dnaDestroy(&da8); #endif return 0; }