示例#1
0
/*
 *  convertSegmentedPagesToPS()
 *
 *      Input:  pagedir (input page image directory)
 *              pagestr (<optional> substring filter on page filenames;
 *                       can be NULL)
 *              page_numpre (number of characters in page name before number)
 *              maskdir (input mask image directory)
 *              maskstr (<optional> substring filter on mask filenames;
 *                       can be NULL)
 *              mask_numpre (number of characters in mask name before number)
 *              numpost (number of characters in names after number)
 *              maxnum (only consider page numbers up to this value)
 *              textscale (scale of text output relative to pixs)
 *              imagescale (scale of image output relative to pixs)
 *              threshold (for binarization; typ. about 190; 0 for default)
 *              fileout (output ps file)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This generates a PS file for all page image and mask files in two
 *          specified directories and that contain the page numbers as
 *          specified below.  The two directories can be the same, in which
 *          case the page and mask files are differentiated by the two
 *          substrings for string matches.
 *      (2) The page images are taken in lexicographic order.
 *          Mask images whose numbers match the page images are used to
 *          segment the page images.  Page images without a matching
 *          mask image are scaled, thresholded and rendered entirely as text.
 *      (3) Each PS page is generated as a compressed representation of
 *          the page image, where the part of the image under the mask
 *          is suitably scaled and compressed as DCT (i.e., jpeg), and
 *          the remaining part of the page is suitably scaled, thresholded,
 *          compressed as G4 (i.e., tiff g4), and rendered by painting
 *          black through the resulting text mask.
 *      (4) The scaling is typically 2x down for the DCT component
 *          (@imagescale = 0.5) and 2x up for the G4 component
 *          (@textscale = 2.0).
 *      (5) The resolution is automatically set to fit to a
 *          letter-size (8.5 x 11 inch) page.
 *      (6) Both the DCT and the G4 encoding are PostScript level 2.
 *      (7) It is assumed that the page number is contained within
 *          the basename (the filename without directory or extension).
 *          @page_numpre is the number of characters in the page basename
 *          preceding the actual page number; @mask_numpre is likewise for
 *          the mask basename; @numpost is the number of characters
 *          following the page number.  For example, for mask name
 *          mask_006.tif, mask_numpre = 5 ("mask_).
 *      (8) To render a page as is -- that is, with no thresholding
 *          of any pixels -- use a mask in the mask directory that is
 *          full size with all pixels set to 1.  If the page is 1 bpp,
 *          it is not necessary to have a mask.
 */
l_int32
convertSegmentedPagesToPS(const char  *pagedir,
                          const char  *pagestr,
                          l_int32      page_numpre,
                          const char  *maskdir,
                          const char  *maskstr,
                          l_int32      mask_numpre,
                          l_int32      numpost,
                          l_int32      maxnum,
                          l_float32    textscale,
                          l_float32    imagescale,
                          l_int32      threshold,
                          const char  *fileout)
{
l_int32  pageno, i, npages;
PIX     *pixs, *pixm;
SARRAY  *sapage, *samask;

    PROCNAME("convertSegmentedPagesToPS");

    if (!pagedir)
        return ERROR_INT("pagedir not defined", procName, 1);
    if (!maskdir)
        return ERROR_INT("maskdir not defined", procName, 1);
    if (!fileout)
        return ERROR_INT("fileout not defined", procName, 1);
    if (threshold <= 0) {
        L_INFO("setting threshold to 190\n", procName);
        threshold = 190;
    }

        /* Get numbered full pathnames; max size of sarray is maxnum */
    sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
                                             page_numpre, numpost, maxnum);
    samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
                                             mask_numpre, numpost, maxnum);
    sarrayPadToSameSize(sapage, samask, (char *)"");
    if ((npages = sarrayGetCount(sapage)) == 0) {
        sarrayDestroy(&sapage);
        sarrayDestroy(&samask);
        return ERROR_INT("no matching pages found", procName, 1);
    }

        /* Generate the PS file */
    pageno = 1;
    for (i = 0; i < npages; i++) {
        if ((pixs = pixReadIndexed(sapage, i)) == NULL)
            continue;
        pixm = pixReadIndexed(samask, i);
        pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
                                  threshold, pageno, fileout);
        pixDestroy(&pixs);
        pixDestroy(&pixm);
        pageno++;
    }

    sarrayDestroy(&sapage);
    sarrayDestroy(&samask);
    return 0;
}
示例#2
0
/*!
 *   gplotDestroy()
 *
 *        Input: &gplot (<to be nulled>)
 *        Return: void
 */
void
gplotDestroy(GPLOT  **pgplot)
{
GPLOT  *gplot;

    PROCNAME("gplotDestroy");

    if (pgplot == NULL) {
        L_WARNING("ptr address is null!\n", procName);
        return;
    }

    if ((gplot = *pgplot) == NULL)
        return;

    FREE(gplot->rootname);
    FREE(gplot->cmdname);
    sarrayDestroy(&gplot->cmddata);
    sarrayDestroy(&gplot->datanames);
    sarrayDestroy(&gplot->plotdata);
    sarrayDestroy(&gplot->plottitles);
    numaDestroy(&gplot->plotstyles);
    FREE(gplot->outname);
    if (gplot->title)
        FREE(gplot->title);
    if (gplot->xlabel)
        FREE(gplot->xlabel);
    if (gplot->ylabel)
        FREE(gplot->ylabel);

    FREE(gplot);
    *pgplot = NULL;
    return;
}
示例#3
0
/*
 *  parseForProtos()
 *
 *      Input:  filein (output of cpp)
 *              prestring (<optional> string that prefaces each decl;
 *                        use NULL to omit)
 *      Return: parsestr (string of function prototypes), or NULL on error
 *
 *  Notes:
 *      (1) We parse the output of cpp:
 *              cpp -ansi <filein> 
 *          Three plans were attempted, with success on the third. 
 *      (2) Plan 1.  A cursory examination of the cpp output indicated that
 *          every function was preceeded by a cpp comment statement.
 *          So we just need to look at statements beginning after comments.
 *          Unfortunately, this is NOT the case.  Some functions start
 *          without cpp comment lines, typically when there are no
 *          comments in the source that immediately precede the function.
 *      (3) Plan 2.  Consider the keywords in the language that start
 *          parts of the cpp file.  Some, like 'typedef', 'enum',
 *          'union' and 'struct', are followed after a while by '{',
 *          and eventually end with '}, plus an optional token and a
 *          final ';'  Others, like 'extern' and 'static', are never
 *          the beginnings of global function definitions.   Function
 *          prototypes have one or more sets of '(' followed eventually
 *          by a ')', and end with ';'.  But function definitions have
 *          tokens, followed by '(', more tokens, ')' and then
 *          immediately a '{'.  We would generate a prototype from this
 *          by adding a ';' to all tokens up to the ')'.  So we use
 *          these special tokens to decide what we are parsing.  And
 *          whenever a function definition is found and the prototype
 *          extracted, we skip through the rest of the function
 *          past the corresponding '}'.  This token ends a line, and
 *          is often on a line of its own.  But as it turns out,
 *          the only keyword we need to consider is 'static'.
 *      (4) Plan 3.  Consider the parentheses and braces for various
 *          declarations.  A struct, enum, or union has a pair of
 *          braces followed by a semicolon.  They cannot have parentheses
 *          before the left brace, but a struct can have lots of parentheses
 *          within the brace set.  A function prototype has no braces.
 *          A function declaration can have sets of left and right
 *          parentheses, but these are followed by a left brace.
 *          So plan 3 looks at the way parentheses and braces are
 *          organized.  Once the beginning of a function definition
 *          is found, the prototype is extracted and we search for
 *          the ending right brace.
 *      (5) To find the ending right brace, it is necessary to do some
 *          careful parsing.  For example, in this file, we have
 *          left and right braces as characters, and these must not
 *          be counted.  Somewhat more tricky, the file fhmtauto.c
 *          generates code, and includes a right brace in a string.
 *          So we must not include braces that are in strings.  But how
 *          do we know if something is inside a string?  Keep state,
 *          starting with not-inside, and every time you hit a double quote
 *          that is not escaped, toggle the condition.  Any brace
 *          found in the state of being within a string is ignored.
 *      (6) When a prototype is extracted, it is put in a canonical
 *          form (i.e., cleaned up).  Finally, we check that it is
 *          not static and save it.  (If static, it is ignored).
 *      (7) The @prestring for unix is NULL; it is included here so that
 *          you can use Microsoft's declaration for importing or
 *          exporting to a dll.  See environ.h for examples of use.
 *          Here, we set: @prestring = "LEPT_DLL ".  Note in particular
 *          the space character that will separate 'LEPT_DLL' from
 *          the standard unix prototype that follows.
 */
char *
parseForProtos(const char *filein,
               const char *prestring)
{
char    *strdata, *str, *newstr, *parsestr, *secondword;
l_int32  nbytes, start, next, stop, charindex, found;
SARRAY  *sa, *saout, *satest;

    PROCNAME("parseForProtos");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read in the cpp output into memory, one string for each
         * line in the file, omitting blank lines.  */
    strdata = (char *)arrayRead(filein, &nbytes);
    sa = sarrayCreateLinesFromString(strdata, 0);

    saout = sarrayCreate(0);
    next = 0;
    while (1) {  /* repeat after each non-static prototype is extracted */
        searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
        if (!found)
            break;
/*        fprintf(stderr, "  start = %d, stop = %d, charindex = %d\n",
                start, stop, charindex); */
        str = captureProtoSignature(sa, start, stop, charindex);

            /* Make sure it is not static.  Note that 'extern' has
             * been prepended to the prototype, so the 'static'
             * keyword, if it exists, would be the second word. */
        satest = sarrayCreateWordsFromString(str);
        secondword = sarrayGetString(satest, 1, 0);
        if (strcmp(secondword, "static")) {  /* not static */
            if (prestring) {  /* prepend it to the prototype */
                newstr = stringJoin(prestring, str);
                sarrayAddString(saout, newstr, L_INSERT);
                FREE(str);
            }
            else
                sarrayAddString(saout, str, L_INSERT);
        }
        else
            FREE(str);
        sarrayDestroy(&satest);

        skipToEndOfFunction(sa, stop, charindex, &next);
        if (next == -1) break;
    }

        /* Flatten into a string with newlines between prototypes */
    parsestr = sarrayToString(saout, 1);
    FREE(strdata);
    sarrayDestroy(&sa);
    sarrayDestroy(&saout);

    return parsestr;
}
示例#4
0
/*
 *  cleanProtoSignature()
 *
 *      Input:  instr (input prototype string)
 *      Return: cleanstr (clean prototype string), or NULL on error
 *
 *  Notes:
 *      (1) Adds 'extern' at beginning and regularizes spaces
 *          between tokens.
 */
static char *
cleanProtoSignature(char *instr)
{
char    *str, *cleanstr;
char     buf[L_BUF_SIZE];
char     externstring[] = "extern";
l_int32  i, j, nwords, nchars, index, len;
SARRAY  *sa, *saout;

    PROCNAME("cleanProtoSignature");

    if (!instr)
        return (char *)ERROR_PTR("instr not defined", procName, NULL);

    sa = sarrayCreateWordsFromString(instr);
    nwords = sarrayGetCount(sa);
    saout = sarrayCreate(0);
    sarrayAddString(saout, externstring, 1);
    for (i = 0; i < nwords; i++) {
        str = sarrayGetString(sa, i, 0);
        nchars = strlen(str);
        index = 0;
        for (j = 0; j < nchars; j++) {
            if (index > L_BUF_SIZE - 6)
                return (char *)ERROR_PTR("token too large", procName, NULL);
            if (str[j] == '(') {
                buf[index++] = ' ';
                buf[index++] = '(';
                buf[index++] = ' ';
            }
            else if (str[j] == ')') {
                buf[index++] = ' ';
                buf[index++] = ')';
            }
            else 
                buf[index++] = str[j];
        }
        buf[index] = '\0';
        sarrayAddString(saout, buf, 1);
    }

        /* Flatten to a prototype string with spaces added after
         * each word, and remove the last space */
    cleanstr = sarrayToString(saout, 2);
    len = strlen(cleanstr);
    cleanstr[len - 1] = '\0';

    sarrayDestroy(&sa);
    sarrayDestroy(&saout);
    return cleanstr;
}
示例#5
0
/*
 *  convertFilesToPS()
 *
 *      Input:  dirin (input directory)
 *              substr (<optional> substring filter on filenames; can be NULL)
 *              res (typ. 300 or 600 ppi)
 *              fileout (output ps file)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This generates a PS file for all image files in a specified
 *          directory that contain the substr pattern to be matched.
 *      (2) Each image is written to a separate page in the output PS file.
 *      (3) All images are written compressed:
 *              * if tiffg4  -->  use ccittg4
 *              * if jpeg    -->  use dct
 *              * all others -->  use flate
 *          If the image is jpeg or tiffg4, we use the existing compressed
 *          strings for the encoding; otherwise, we read the image into
 *          a pix and flate-encode the pieces.
 *      (4) The resolution is often confusing.  It is interpreted
 *          as the resolution of the output display device:  "If the
 *          input image were digitized at 300 ppi, what would it
 *          look like when displayed at res ppi."  So, for example,
 *          if res = 100 ppi, then the display pixels are 3x larger
 *          than the 300 ppi pixels, and the image will be rendered
 *          3x larger.
 *      (5) The size of the PostScript file is independent of the resolution,
 *          because the entire file is encoded.  The res parameter just
 *          tells the PS decomposer how to render the page.  Therefore,
 *          for minimum file size without loss of visual information,
 *          if the output res is less than 300, you should downscale
 *          the image to the output resolution before wrapping in PS.
 *      (6) The "canvas" on which the image is rendered, at the given
 *          output resolution, is a standard page size (8.5 x 11 in).
 */
l_int32
convertFilesToPS(const char  *dirin,
                 const char  *substr,
                 l_int32      res,
                 const char  *fileout)
{
SARRAY  *sa;

    PROCNAME("convertFilesToPS");

    if (!dirin)
        return ERROR_INT("dirin not defined", procName, 1);
    if (!fileout)
        return ERROR_INT("fileout not defined", procName, 1);
    if (res <= 0) {
        L_INFO("setting res to 300 ppi", procName);
        res = 300;
    }
    if (res < 10 || res > 4000)
        L_WARNING("res is typically in the range 300-600 ppi", procName);

        /* Get all filtered and sorted full pathnames. */
    sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);

        /* Generate the PS file. */
    sarrayConvertFilesToPS(sa, res, fileout);
    sarrayDestroy(&sa);
    return 0;
}
示例#6
0
/*!
 *  jbCorrelation()
 *
 *       Input:  dirin (directory of input images)
 *               thresh (typically ~0.8)
 *               weight (typically ~0.6)
 *               components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
 *               rootname (for output files)
 *               firstpage (0-based)
 *               npages (use 0 for all pages in dirin)
 *               renderflag (1 to render from templates; 0 to skip)
 *       Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The images must be 1 bpp.  If they are not, you can convert
 *          them using convertFilesTo1bpp().
 *      (2) See prog/jbcorrelation for generating more output (e.g.,
 *          for debugging)
 */
l_int32
jbCorrelation(const char *dirin,
              l_float32 thresh,
              l_float32 weight,
              l_int32 components,
              const char *rootname,
              l_int32 firstpage,
              l_int32 npages,
              l_int32 renderflag) {
    char filename[L_BUF_SIZE];
    l_int32 nfiles, i, numpages;
    JBDATA *data;
    JBCLASSER *classer;
    PIX *pix;
    PIXA *pixa;
    SARRAY *safiles;

    PROCNAME("jbCorrelation");

    if (!dirin)
        return ERROR_INT("dirin not defined", procName, 1);
    if (!rootname)
        return ERROR_INT("rootname not defined", procName, 1);
    if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
        components != JB_WORDS)
        return ERROR_INT("components invalid", procName, 1);

    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);

    /* Classify components */
    classer = jbCorrelationInit(components, 0, 0, thresh, weight);
    jbAddPages(classer, safiles);

    /* Save data */
    data = jbDataSave(classer);
    jbDataWrite(rootname, data);

    /* Optionally, render pages using class templates */
    if (renderflag) {
        pixa = jbDataRender(data, FALSE);
        numpages = pixaGetCount(pixa);
        if (numpages != nfiles)
            fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
                    numpages, nfiles);
        for (i = 0; i < numpages; i++) {
            pix = pixaGetPix(pixa, i, L_CLONE);
            snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
            fprintf(stderr, "filename: %s\n", filename);
            pixWrite(filename, pix, IFF_PNG);
            pixDestroy(&pix);
        }
        pixaDestroy(&pixa);
    }

    sarrayDestroy(&safiles);
    jbClasserDestroy(&classer);
    jbDataDestroy(&data);
    return 0;
}
示例#7
0
/*
 *  captureProtoSignature()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (starting index to search; never a comment line)
 *              stop (index of line on which pattern is completed)
 *              charindex (char index of completing ')' character)
 *      Return: cleanstr (prototype string), or NULL on error
 *
 *  Notes:
 *      (1) Return all characters, ending with a ';' after the ')'
 */
static char *
captureProtoSignature(SARRAY  *sa,
                      l_int32  start,
                      l_int32  stop,
                      l_int32  charindex)
{
char    *str, *newstr, *protostr, *cleanstr;
SARRAY  *sap;
l_int32  i;

    PROCNAME("captureProtoSignature");

    if (!sa)
        return (char *)ERROR_PTR("sa not defined", procName, NULL);

    sap = sarrayCreate(0);
    for (i = start; i < stop; i++) {
        str = sarrayGetString(sa, i, L_COPY);
        sarrayAddString(sap, str, L_INSERT);
    }
    str = sarrayGetString(sa, stop, L_COPY);
    str[charindex + 1] = '\0';
    newstr = stringJoin(str, ";");
    sarrayAddString(sap, newstr, L_INSERT);
    LEPT_FREE(str);
    protostr = sarrayToString(sap, 2);
    sarrayDestroy(&sap);
    cleanstr = cleanProtoSignature(protostr);
    LEPT_FREE(protostr);

    return cleanstr;
}
示例#8
0
/*
 *  convertFilesFittedToPS()
 *
 *      Input:  dirin (input directory)
 *              substr (<optional> substring filter on filenames; can be NULL)
 *              xpts, ypts (desired size in printer points; use 0 for default)
 *              fileout (output ps file)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This generates a PS file for all files in a specified directory
 *          that contain the substr pattern to be matched.
 *      (2) Each image is written to a separate page in the output PS file.
 *      (3) All images are written compressed:
 *              * if tiffg4  -->  use ccittg4
 *              * if jpeg    -->  use dct
 *              * all others -->  use flate
 *          If the image is jpeg or tiffg4, we use the existing compressed
 *          strings for the encoding; otherwise, we read the image into
 *          a pix and flate-encode the pieces.
 *      (4) The resolution is internally determined such that the images
 *          are rendered, in at least one direction, at 100% of the given
 *          size in printer points.  Use 0.0 for xpts or ypts to get
 *          the default value, which is 612.0 or 792.0, rsp.
 *      (5) The size of the PostScript file is independent of the resolution,
 *          because the entire file is encoded.  The @xpts and @ypts
 *          parameter tells the PS decomposer how to render the page.
 */
l_int32
convertFilesFittedToPS(const char  *dirin,
                       const char  *substr,
                       l_float32    xpts,
                       l_float32    ypts,
                       const char  *fileout)
{
SARRAY  *sa;

    PROCNAME("convertFilesFittedToPS");

    if (!dirin)
        return ERROR_INT("dirin not defined", procName, 1);
    if (!fileout)
        return ERROR_INT("fileout not defined", procName, 1);
    if (xpts <= 0.0) {
        L_INFO("setting xpts to 612.0 ppi", procName);
        xpts = 612.0;
    }
    if (ypts <= 0.0) {
        L_INFO("setting ypts to 792.0 ppi", procName);
        ypts = 792.0;
    }
    if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
        L_WARNING("xpts,ypts are typically in the range 500-800", procName);

        /* Get all filtered and sorted full pathnames. */
    sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);

        /* Generate the PS file. */
    sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
    sarrayDestroy(&sa);
    return 0;
}
/*!
 *  getSortedPathnamesInDirectory()
 *
 *      Input:  directory name
 *              substr (<optional> substring filter on filenames; can be NULL)
 *              firstpage (0-based)
 *              npages (use 0 for all to the end)
 *      Return: sarray of sorted pathnames, or NULL on error
 *
 *  Notes:
 *      (1) If 'substr' is not NULL, only filenames that contain
 *          the substring can be returned.  If 'substr' is NULL,
 *          none of the filenames are filtered out.
 *      (2) The files in the directory, after optional filtering by
 *          the substring, are lexically sorted in increasing order.
 *          The full pathnames are returned for the requested sequence.
 *          If no files are found after filtering, returns an empty sarray.
 */
SARRAY *
getSortedPathnamesInDirectory(const char  *dirname,
                              const char  *substr,
                              l_int32      firstpage,
                              l_int32      npages)
{
char    *fname, *fullname;
l_int32  i, nfiles, lastpage;
SARRAY  *sa, *safiles, *saout;

    PROCNAME("getSortedPathnamesInDirectory");

    if (!dirname)
        return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);

    if ((sa = getFilenamesInDirectory(dirname)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    safiles = sarraySelectBySubstring(sa, substr);
    sarrayDestroy(&sa);
    nfiles = sarrayGetCount(safiles);
    if (nfiles == 0) {
        L_WARNING("no files found", procName);
        return safiles;
    }

    sarraySort(safiles, safiles, L_SORT_INCREASING);

    firstpage = L_MIN(L_MAX(firstpage, 0), nfiles - 1);
    if (npages == 0)
        npages = nfiles - firstpage;
    lastpage = L_MIN(firstpage + npages - 1, nfiles - 1);

    saout = sarrayCreate(lastpage - firstpage + 1);
    for (i = firstpage; i <= lastpage; i++) {
        fname = sarrayGetString(safiles, i, L_NOCOPY);
        fullname = genPathname(dirname, fname);
        sarrayAddString(saout, fullname, L_INSERT);
    }

    sarrayDestroy(&safiles);
    return saout;
}
示例#10
0
/*!
 *  recogDestroy()
 *
 *      Input:  &recog (<will be set to null before returning>)
 *      Return: void
 *
 *  Notes:
 *      (1) If a recog has a parent, the parent owns it.  A recogDestroy()
 *          will fail if there is a parent.
 */
void
recogDestroy(L_RECOG  **precog)
{
L_RECOG  *recog;

    PROCNAME("recogDestroy");

    if (!precog) {
        L_WARNING("ptr address is null\n", procName);
        return;
    }

    if ((recog = *precog) == NULL) return;
    if (recogGetParent(recog) != NULL) {
        L_ERROR("recog has parent; can't be destroyed\n", procName);
        return;
    }

    FREE(recog->bootdir);
    FREE(recog->bootpattern);
    FREE(recog->bootpath);
    FREE(recog->centtab);
    FREE(recog->sumtab);
    FREE(recog->fname);
    sarrayDestroy(&recog->sa_text);
    l_dnaDestroy(&recog->dna_tochar);
    pixaaDestroy(&recog->pixaa_u);
    pixaDestroy(&recog->pixa_u);
    ptaaDestroy(&recog->ptaa_u);
    ptaDestroy(&recog->pta_u);
    numaDestroy(&recog->nasum_u);
    numaaDestroy(&recog->naasum_u);
    pixaaDestroy(&recog->pixaa);
    pixaDestroy(&recog->pixa);
    ptaaDestroy(&recog->ptaa);
    ptaDestroy(&recog->pta);
    numaDestroy(&recog->nasum);
    numaaDestroy(&recog->naasum);
    pixaDestroy(&recog->pixa_tr);
    pixaDestroy(&recog->pixadb_ave);
    pixaDestroy(&recog->pixa_id);
    pixDestroy(&recog->pixdb_ave);
    pixDestroy(&recog->pixdb_range);
    pixaDestroy(&recog->pixadb_boot);
    pixaDestroy(&recog->pixadb_split);
    FREE(recog->fontdir);
    bmfDestroy(&recog->bmf);
    rchDestroy(&recog->rch);
    rchaDestroy(&recog->rcha);
    recogDestroyDid(recog);
    FREE(recog);
    *precog = NULL;
    return;
}
/*!
 * \brief   strcodeDestroy()
 *
 * \param[out]  pstrcode &strcode is set to null after destroying the sarrays
 * \return  void
 */
static void
strcodeDestroy(L_STRCODE  **pstrcode)
{
L_STRCODE  *strcode;

    PROCNAME("strcodeDestroy");

    if (pstrcode == NULL) {
        L_WARNING("ptr address is null!\n", procName);
        return;
    }

    if ((strcode = *pstrcode) == NULL)
        return;

    sarrayDestroy(&strcode->function);
    sarrayDestroy(&strcode->data);
    sarrayDestroy(&strcode->descr);
    LEPT_FREE(strcode);
    *pstrcode = NULL;
    return;
}
/*!
 * \brief   l_getIndexFromFile()
 *
 * \param[in]    filename
 * \param[out]   pindex found index
 * \return  0 if found, 1 on error.
 */
static l_int32
l_getIndexFromFile(const char  *filename,
                   l_int32     *pindex)
{
char     buf[256];
char    *word;
FILE    *fp;
l_int32  notfound, format;
SARRAY  *sa;

    PROCNAME("l_getIndexFromFile");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);
    *pindex = 0;
    if (!filename)
        return ERROR_INT("filename not defined", procName, 1);

        /* Open the stream, read lines until you find one with more
         * than a newline, and grab the first word. */
    if ((fp = fopenReadStream(filename)) == NULL)
        return ERROR_INT("stream not opened", procName, 1);
    do {
        if ((fgets(buf, sizeof(buf), fp)) == NULL) {
            fclose(fp);
            return ERROR_INT("fgets read fail", procName, 1);
        }
    } while (buf[0] == '\n');
    fclose(fp);
    sa = sarrayCreateWordsFromString(buf);
    word = sarrayGetString(sa, 0, L_NOCOPY);

        /* Find the index associated with the word.  If it is not
         * found, test to see if the file is a compressed pix. */
    notfound = l_getIndexFromStructname(word, pindex);
    sarrayDestroy(&sa);
    if (notfound) {  /* maybe a Pix */
        if (findFileFormat(filename, &format) == 0) {
            l_getIndexFromStructname("Pix", pindex);
        } else {
            return ERROR_INT("no file type identified", procName, 1);
        }
    }

    return 0;
}
/*!
 * \brief   strcodeCreateFromFile()
 *
 * \param[in]    filein containing filenames of serialized data
 * \param[in]    fileno integer that labels the two output files
 * \param[in]    outdir [optional] if null, files are made in /tmp/lept/auto
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The %filein has one filename on each line.
 *          Comment lines begin with "#".
 *      (2) The output is 2 files:
 *             autogen.\<fileno\>.c
 *             autogen.\<fileno\>.h
 * </pre>
 */
l_int32
strcodeCreateFromFile(const char  *filein,
                      l_int32      fileno,
                      const char  *outdir)
{
char        *fname;
const char  *type;
l_uint8     *data;
size_t       nbytes;
l_int32      i, n, index;
SARRAY      *sa;
L_STRCODE   *strcode;

    PROCNAME("strcodeCreateFromFile");

    if (!filein)
        return ERROR_INT("filein not defined", procName, 1);

    if ((data = l_binaryRead(filein, &nbytes)) == NULL)
        return ERROR_INT("data not read from file", procName, 1);
    sa = sarrayCreateLinesFromString((char *)data, 0);
    LEPT_FREE(data);
    if (!sa)
        return ERROR_INT("sa not made", procName, 1);
    if ((n = sarrayGetCount(sa)) == 0) {
        sarrayDestroy(&sa);
        return ERROR_INT("no filenames in the file", procName, 1);
    }

    strcode = strcodeCreate(fileno);

    for (i = 0; i < n; i++) {
        fname = sarrayGetString(sa, i, L_NOCOPY);
        if (fname[0] == '#') continue;
        if (l_getIndexFromFile(fname, &index)) {
            L_ERROR("File %s has no recognizable type\n", procName, fname);
        } else {
            type = l_assoc[index].type;
            L_INFO("File %s is type %s\n", procName, fname, type);
            strcodeGenerate(strcode, fname, type);
        }
    }
    strcodeFinalize(&strcode, outdir);
    return 0;
}
示例#14
0
/*!
 * \brief   recogDestroy()
 *
 * \param[in,out]   precog will be set to null before returning
 * \return  void
 */
void
recogDestroy(L_RECOG  **precog)
{
L_RECOG  *recog;

    PROCNAME("recogDestroy");

    if (!precog) {
        L_WARNING("ptr address is null\n", procName);
        return;
    }

    if ((recog = *precog) == NULL) return;

    LEPT_FREE(recog->centtab);
    LEPT_FREE(recog->sumtab);
    sarrayDestroy(&recog->sa_text);
    l_dnaDestroy(&recog->dna_tochar);
    pixaaDestroy(&recog->pixaa_u);
    pixaDestroy(&recog->pixa_u);
    ptaaDestroy(&recog->ptaa_u);
    ptaDestroy(&recog->pta_u);
    numaDestroy(&recog->nasum_u);
    numaaDestroy(&recog->naasum_u);
    pixaaDestroy(&recog->pixaa);
    pixaDestroy(&recog->pixa);
    ptaaDestroy(&recog->ptaa);
    ptaDestroy(&recog->pta);
    numaDestroy(&recog->nasum);
    numaaDestroy(&recog->naasum);
    pixaDestroy(&recog->pixa_tr);
    pixaDestroy(&recog->pixadb_ave);
    pixaDestroy(&recog->pixa_id);
    pixDestroy(&recog->pixdb_ave);
    pixDestroy(&recog->pixdb_range);
    pixaDestroy(&recog->pixadb_boot);
    pixaDestroy(&recog->pixadb_split);
    bmfDestroy(&recog->bmf);
    rchDestroy(&recog->rch);
    rchaDestroy(&recog->rcha);
    recogDestroyDid(recog);
    LEPT_FREE(recog);
    *precog = NULL;
    return;
}
示例#15
0
/*!
 *  pixaReadFiles()
 *
 *      Input:  dirname
 *              substr (<optional> substring filter on filenames; can be null)
 *      Return: pixa, or null on error
 *
 *  Notes:
 *      (1) @dirname is the full path for the directory.
 *      (2) @substr is the part of the file name (excluding
 *          the directory) that is to be matched.  All matching
 *          filenames are read into the Pixa.  If substr is NULL,
 *          all filenames are read into the Pixa.
 */
PIXA *
pixaReadFiles(const char  *dirname,
              const char  *substr)
{
PIXA    *pixa;
SARRAY  *sa;

    PROCNAME("pixaReadFiles");

    if (!dirname)
        return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL);

    if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
        return (PIXA *)ERROR_PTR("sa not made", procName, NULL);

    pixa = pixaReadFilesSA(sa);
    sarrayDestroy(&sa);
    return pixa;
}
/*!
 * \brief   sarrayUnionByAset()
 *
 * \param[in]    sa1, sa2
 * \return  sad with the union of the string set, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Duplicates are removed from the concatenation of the two arrays.
 *      (2) The key for each string is a 64-bit hash.
 *      (2) Algorithm: Concatenate the two sarrays.  Then build a set,
 *          using hashed strings as keys.  As the set is built, first do
 *          a find; if not found, add the key to the set and add the string
 *          to the output sarray.  This is O(nlogn).
 * </pre>
 */
SARRAY *
sarrayUnionByAset(SARRAY  *sa1,
                  SARRAY  *sa2)
{
SARRAY  *sa3, *sad;

    PROCNAME("sarrayUnionByAset");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Join */
    sa3 = sarrayCopy(sa1);
    sarrayJoin(sa3, sa2);

        /* Eliminate duplicates */
    sad = sarrayRemoveDupsByAset(sa3);
    sarrayDestroy(&sa3);
    return sad;
}
SARRAY *
getFilenamesInDirectory(const char  *dirname)
{
SARRAY           *safiles;
WIN32_FIND_DATAA  ffd;
size_t            length_of_path;
CHAR              szDir[MAX_PATH];  /* MAX_PATH is defined in stdlib.h */
HANDLE            hFind = INVALID_HANDLE_VALUE;

    PROCNAME("getFilenamesInDirectory");

    if (!dirname)
        return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);

    length_of_path = strlen(dirname);
    if (length_of_path > (MAX_PATH - 2))
        return (SARRAY *)ERROR_PTR("dirname is to long", procName, NULL);

    strncpy(szDir, dirname, MAX_PATH);
    szDir[MAX_PATH - 1] = '\0';
    strncat(szDir, TEXT("\\*"), MAX_PATH - strlen(szDir));

    if ((safiles = sarrayCreate(0)) == NULL)
        return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL);
    hFind = FindFirstFileA(szDir, &ffd);
    if (INVALID_HANDLE_VALUE == hFind) {
        sarrayDestroy(&safiles);
        return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL);
    }

    while (FindNextFileA(hFind, &ffd) != 0) {
        if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)  /* skip dirs */
            continue;
        sarrayAddString(safiles, ffd.cFileName, L_COPY);
    }

    FindClose(hFind);
    return safiles;
}
示例#18
0
PIXA *MakeBootnum2(void)
{
char     *fname;
l_int32   i, n, w, h;
BOX      *box;
PIX      *pix;
PIXA     *pixa;
L_RECOG  *recog;
SARRAY   *sa;

        /* Phase 1: generate recog from the digit data */
    recog = recogCreate(20, 32, L_USE_ALL, 120, 1);
    sa = getSortedPathnamesInDirectory("recog/bootnums", "png", 0, 0);
    n = sarrayGetCount(sa);
    for (i = 0; i < n; i++) {
            /* Read each pix: grayscale, multi-character, labelled */
        fname = sarrayGetString(sa, i, L_NOCOPY);
        if ((pix = pixRead(fname)) == NULL) {
            fprintf(stderr, "Can't read %s\n", fname);
            continue;
        }

            /* Convert to a set of 1 bpp, single character, labelled */
        pixGetDimensions(pix, &w, &h, NULL);
        box = boxCreate(0, 0, w, h);
        recogTrainLabelled(recog, pix, box, NULL, 1, 0);
        pixDestroy(&pix);
        boxDestroy(&box);
    }
    recogTrainingFinished(recog, 1);
    sarrayDestroy(&sa);

        /* Phase 2: generate pixa consisting of 1 bpp, single character pix */
    recogWritePixa("/tmp/lept/recog/digits/bootnum2.pa", recog);
    pixa = pixaRead("/tmp/lept/recog/digits/bootnum2.pa");
    recogDestroy(&recog);
    return pixa;
}
示例#19
0
/*!
 *  numaCreateFromString()
 *
 *      Input:  string (of comma-separated numbers)
 *      Return: na, or null on error
 *
 *  Notes:
 *      (1) The numbers can be ints or floats; they will be interpreted
 *          and stored as floats.  To use them as integers (e.g., for
 *          indexing into arrays), use numaGetIValue(...).
 */
NUMA *
numaCreateFromString(const char  *str)
{
char      *substr;
l_int32    i, n, nerrors;
l_float32  val;
NUMA      *na;
SARRAY    *sa;

    PROCNAME("numaCreateFromString");

    if (!str || (strlen(str) == 0))
        return (NUMA *)ERROR_PTR("str not defined or empty", procName, NULL);

    sa = sarrayCreate(0);
    sarraySplitString(sa, str, ",");
    n = sarrayGetCount(sa);
    na = numaCreate(n);
    nerrors = 0;
    for (i = 0; i < n; i++) {
        substr = sarrayGetString(sa, i, L_NOCOPY);
        if (sscanf(substr, "%f", &val) != 1) {
            L_ERROR("substr %d not float\n", procName, i);
            nerrors++;
        } else {
            numaAddNumber(na, val);
        }
    }

    sarrayDestroy(&sa);
    if (nerrors > 0) {
        numaDestroy(&na);
        return (NUMA *)ERROR_PTR("non-floats in string", procName, NULL);
    }

    return na;
}
/*!
 * \brief   l_genDataString()
 *
 * \param[in]    filein input file of serialized data
 * \param[in]    ifunc index into set of functions in output file
 * \return  encoded ascii data string, or NULL on error reading from file
 */
static char *
l_genDataString(const char  *filein,
                l_int32      ifunc)
{
char      buf[80];
char     *cdata1, *cdata2, *cdata3;
l_uint8  *data1, *data2;
l_int32   csize1, csize2;
size_t    size1, size2;
SARRAY   *sa;

    PROCNAME("l_genDataString");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read it in, gzip it, encode, and reformat.  We gzip because some
         * serialized data has a significant amount of ascii content. */
    if ((data1 = l_binaryRead(filein, &size1)) == NULL)
        return (char *)ERROR_PTR("bindata not returned", procName, NULL);
    data2 = zlibCompress(data1, size1, &size2);
    cdata1 = encodeBase64(data2, size2, &csize1);
    cdata2 = reformatPacked64(cdata1, csize1, 4, 72, 1, &csize2);
    LEPT_FREE(data1);
    LEPT_FREE(data2);
    LEPT_FREE(cdata1);

        /* Prepend the string declaration signature and put it together */
    sa = sarrayCreate(3);
    snprintf(buf, sizeof(buf), "static const char *l_strdata_%d =\n", ifunc);
    sarrayAddString(sa, buf, L_COPY);
    sarrayAddString(sa, cdata2, L_INSERT);
    sarrayAddString(sa, (char *)";\n", L_COPY);
    cdata3 = sarrayToString(sa, 0);
    sarrayDestroy(&sa);
    return cdata3;
}
示例#21
0
l_int32 main(int    argc,
             char **argv)
{
char      *boxatxt;
l_int32    i;
BOXA      *boxa1, *boxa2, *boxa3;
BOXAA     *baa, *baa1;
NUMAA     *naa1;
PIX       *pixdb, *pix1, *pix2, *pix3, *pix4;
PIXA      *pixa1, *pixa2, *pixa3, *pixat;
L_RECOG   *recog;
L_RECOGA  *recoga;
SARRAY    *sa1;

    /* ----- Example identifying samples using training data ----- */
#if 1
        /* Read the training data */
    pixat = pixaRead("recog/sets/train06.pa");
    recog = recogCreateFromPixa(pixat, 0, 0, L_USE_ALL, 128, 1);
    recoga = recogaCreateFromRecog(recog);
    pixaDestroy(&pixat);

        /* Read the data from all samples */
    pix1 = pixRead("recog/sets/samples06.png");
    boxatxt = pixGetText(pix1);
    boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt));
    pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL);
    pixDestroy(&pix1);  /* destroys boxa1 */

        /* Identify components in the sample data */
    pixa2 = pixaCreate(0);
    pixa3 = pixaCreate(0);
    for (i = 0; i < 9; i++) {
/*        if (i != 4) continue; */  /* dots form separate boxa */
/*        if (i != 8) continue; */  /* broken 2 in '24' */
        pix1 = pixaGetPix(pixa1, i, L_CLONE);

            /* Show the 2d box data in the sample */
        boxa2 = pixConnComp(pix1, NULL, 8);
        baa = boxaSort2d(boxa2, NULL, 6, 6, 5);
        pix2 = boxaaDisplay(baa, 3, 1, 0xff000000, 0x00ff0000, 0, 0);
        pixaAddPix(pixa3, pix2, L_INSERT);
        boxaaDestroy(&baa);
        boxaDestroy(&boxa2);

            /* Get the numbers in the sample */
        recogaIdentifyMultiple(recoga, pix1, 0, 5, 3, &boxa3, NULL, &pixdb, 0);
        sa1 = recogaExtractNumbers(recoga, boxa3, 0.7, -1, &baa1, &naa1);
        sarrayWriteStream(stderr, sa1);
        boxaaWriteStream(stderr, baa1);
        numaaWriteStream(stderr, naa1);
        pixaAddPix(pixa2, pixdb, L_INSERT);
/*        pixaWrite("/tmp/pixa.pa", pixa2); */
        pixDestroy(&pix1);
        boxaDestroy(&boxa3);
        boxaaDestroy(&baa1);
        numaaDestroy(&naa1);
        sarrayDestroy(&sa1);
    }

    pix3 = pixaDisplayLinearly(pixa2, L_VERT, 1.0, 0, 20, 1, NULL);
    pixWrite("/tmp/pix3.png", pix3, IFF_PNG);
    pix4 = pixaDisplayTiledInRows(pixa3, 32, 1500, 1.0, 0, 20, 2);
    pixDisplay(pix4, 500, 0);
    pixWrite("/tmp/pix4.png", pix4, IFF_PNG);
    pixaDestroy(&pixa2);
    pixaDestroy(&pixa3);
    pixDestroy(&pix1);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixaDestroy(&pixa1);
    boxaDestroy(&boxa1);
    recogaDestroy(&recoga);
#endif

    return 0;
}
示例#22
0
/*!
 *  kernelCreateFromFile()
 *
 *      Input:  filename
 *      Return: kernel, or null on error
 *
 *  Notes:
 *      (1) The file contains, in the following order:
 *           - Any number of comment lines starting with '#' are ignored
 *           - The height and width of the kernel
 *           - The y and x values of the kernel origin
 *           - The kernel data, formatted as lines of numbers (integers
 *             or floats) for the kernel values in row-major order,
 *             and with no other punctuation.
 *             (Note: this differs from kernelCreateFromString(),
 *             where each line must begin and end with a double-quote
 *             to tell the compiler it's part of a string.)
 *           - The kernel specification ends when a blank line,
 *             a comment line, or the end of file is reached.
 *      (2) All lines must be left-justified.
 *      (3) See kernelCreateFromString() for a description of the string
 *          format for the kernel data.  As an example, here are the lines
 *          of a valid kernel description file  In the file, all lines
 *          are left-justified:
 *                    # small 3x3 kernel
 *                    3 3
 *                    1 1
 *                    25.5   51    24.3
 *                    70.2  146.3  73.4
 *                    20     50.9  18.4
 */
L_KERNEL *
kernelCreateFromFile(const char  *filename)
{
char      *filestr, *line;
l_int32    nlines, i, j, first, index, w, h, cx, cy, n;
l_float32  val;
size_t     size;
NUMA      *na, *nat;
SARRAY    *sa;
L_KERNEL  *kel;

    PROCNAME("kernelCreateFromFile");

    if (!filename)
        return (L_KERNEL *)ERROR_PTR("filename not defined", procName, NULL);

    filestr = (char *)l_binaryRead(filename, &size);
    sa = sarrayCreateLinesFromString(filestr, 1);
    FREE(filestr);
    nlines = sarrayGetCount(sa);

        /* Find the first data line. */
    for (i = 0; i < nlines; i++) {
        line = sarrayGetString(sa, i, L_NOCOPY);
        if (line[0] != '#') {
            first = i;
            break;
        }
    }

        /* Find the kernel dimensions and origin location. */
    line = sarrayGetString(sa, first, L_NOCOPY);
    if (sscanf(line, "%d %d", &h, &w) != 2)
        return (L_KERNEL *)ERROR_PTR("error reading h,w", procName, NULL);
    line = sarrayGetString(sa, first + 1, L_NOCOPY);
    if (sscanf(line, "%d %d", &cy, &cx) != 2)
        return (L_KERNEL *)ERROR_PTR("error reading cy,cx", procName, NULL);

        /* Extract the data.  This ends when we reach eof, or when we
         * encounter a line of data that is either a null string or
         * contains just a newline. */
    na = numaCreate(0);
    for (i = first + 2; i < nlines; i++) {
        line = sarrayGetString(sa, i, L_NOCOPY);
        if (line[0] == '\0' || line[0] == '\n' || line[0] == '#')
            break;
        nat = parseStringForNumbers(line, " \t\n");
        numaJoin(na, nat, 0, -1);
        numaDestroy(&nat);
    }
    sarrayDestroy(&sa);

    n = numaGetCount(na);
    if (n != w * h) {
        numaDestroy(&na);
        fprintf(stderr, "w = %d, h = %d, num ints = %d\n", w, h, n);
        return (L_KERNEL *)ERROR_PTR("invalid integer data", procName, NULL);
    }

    kel = kernelCreate(h, w);
    kernelSetOrigin(kel, cy, cx);
    index = 0;
    for (i = 0; i < h; i++) {
        for (j = 0; j < w; j++) {
            numaGetFValue(na, index, &val);
            kernelSetElement(kel, i, j, val);
            index++;
        }
    }

    numaDestroy(&na);
    return kel;
}
示例#23
0
int main(int    argc,
         char **argv)
{
char        *filein, *str, *prestring, *outprotos, *protostr;
const char  *spacestr = " ";
char         buf[L_BUF_SIZE];
l_uint8     *allheaders;
l_int32      i, maxindex, in_line, nflags, protos_added, firstfile, len, ret;
size_t       nbytes;
L_BYTEA     *ba, *ba2;
SARRAY      *sa, *safirst;
static char  mainName[] = "xtractprotos";

    if (argc == 1) {
        fprintf(stderr,
                "xtractprotos [-prestring=<string>] [-protos=<where>] "
                "[list of C files]\n"
                "where the prestring is prepended to each prototype, and \n"
                "protos can be either 'inline' or the name of an output "
                "prototype file\n");
        return 1;
    }

    /* ---------------------------------------------------------------- */
    /* Parse input flags and find prestring and outprotos, if requested */
    /* ---------------------------------------------------------------- */
    prestring = outprotos = NULL;
    in_line = FALSE;
    nflags = 0;
    maxindex = L_MIN(3, argc);
    for (i = 1; i < maxindex; i++) {
        if (argv[i][0] == '-') {
            if (!strncmp(argv[i], "-prestring", 10)) {
                nflags++;
                ret = sscanf(argv[i] + 1, "prestring=%s", buf);
                if (ret != 1) {
                    fprintf(stderr, "parse failure for prestring\n");
                    return 1;
                }
                if ((len = strlen(buf)) > L_BUF_SIZE - 3) {
                    L_WARNING("prestring too large; omitting!\n", mainName);
                } else {
                    buf[len] = ' ';
                    buf[len + 1] = '\0';
                    prestring = stringNew(buf);
                }
            } else if (!strncmp(argv[i], "-protos", 7)) {
                nflags++;
                ret = sscanf(argv[i] + 1, "protos=%s", buf);
                if (ret != 1) {
                    fprintf(stderr, "parse failure for protos\n");
                    return 1;
                }
                outprotos = stringNew(buf);
                if (!strncmp(outprotos, "inline", 7))
                    in_line = TRUE;
            }
        }
    }

    if (argc - nflags < 2) {
        fprintf(stderr, "no files specified!\n");
        return 1;
    }


    /* ---------------------------------------------------------------- */
    /*                   Generate the prototype string                  */
    /* ---------------------------------------------------------------- */
    ba = l_byteaCreate(500);

        /* First the extern C head */
    sa = sarrayCreate(0);
    sarrayAddString(sa, (char *)"/*", 1);
    snprintf(buf, L_BUF_SIZE,
             " *  These prototypes were autogen'd by xtractprotos, v. %s",
             version);
    sarrayAddString(sa, buf, 1);
    sarrayAddString(sa, (char *)" */", 1);
    sarrayAddString(sa, (char *)"#ifdef __cplusplus", 1);
    sarrayAddString(sa, (char *)"extern \"C\" {", 1);
    sarrayAddString(sa, (char *)"#endif  /* __cplusplus */\n", 1);
    str = sarrayToString(sa, 1);
    l_byteaAppendString(ba, str);
    lept_free(str);
    sarrayDestroy(&sa);

        /* Then the prototypes */
    firstfile = 1 + nflags;
    protos_added = FALSE;
    for (i = firstfile; i < argc; i++) {
        filein = argv[i];
	len = strlen(filein);
	if (filein[len - 1] == 'h')  /* skip .h files */
	    continue;
	snprintf(buf, L_BUF_SIZE, "cpp -ansi -DNO_PROTOS %s %s",
	         filein, tempfile);
	ret = system(buf);
	if (ret) {
            fprintf(stderr, "cpp failure for %s; continuing\n", filein);
	    continue;
	}

	if ((str = parseForProtos(tempfile, prestring)) == NULL) {
            fprintf(stderr, "parse failure for %s; continuing\n", filein);
	    continue;
	}
	if (strlen(str) > 1) {  /* strlen(str) == 1 is a file without protos */
            l_byteaAppendString(ba, str);
            protos_added = TRUE;
        }
        lept_free(str);
    }

        /* Lastly the extern C tail */
    sa = sarrayCreate(0);
    sarrayAddString(sa, (char *)"\n#ifdef __cplusplus", 1);
    sarrayAddString(sa, (char *)"}", 1);
    sarrayAddString(sa, (char *)"#endif  /* __cplusplus */", 1);
    str = sarrayToString(sa, 1);
    l_byteaAppendString(ba, str);
    lept_free(str);
    sarrayDestroy(&sa);

    protostr = (char *)l_byteaCopyData(ba, &nbytes);
    l_byteaDestroy(&ba);


    /* ---------------------------------------------------------------- */
    /*                       Generate the output                        */
    /* ---------------------------------------------------------------- */
    if (!outprotos) {  /* just write to stdout */
        fprintf(stderr, "%s\n", protostr);
        lept_free(protostr);
        return 0;
    }

        /* If no protos were found, do nothing further */
    if (!protos_added) {
        fprintf(stderr, "No protos found\n");
        lept_free(protostr);
        return 1;
    }

        /* Make the output files */
    ba = l_byteaInitFromFile("allheaders_top.txt");
    if (!in_line) {
        snprintf(buf, sizeof(buf), "#include \"%s\"\n", outprotos);
        l_byteaAppendString(ba, buf);
        l_binaryWrite(outprotos, "w", protostr, nbytes);
    } else {
        l_byteaAppendString(ba, protostr);
    }
    ba2 = l_byteaInitFromFile("allheaders_bot.txt");
    l_byteaJoin(ba, &ba2);
    l_byteaWrite("allheaders.h", ba, 0, 0);
    l_byteaDestroy(&ba);
    lept_free(protostr);
    return 0;
}
示例#24
0
int main(int    argc,
         char **argv)
{
l_int32      ignore;
size_t       nbytesin, nbytesout;
char        *infile, *instring, *outstring;
SARRAY      *sa1, *sa2, *sa3, *sa4, *sa5;
char         buf[256];
static char  mainName[] = "string_reg";

    if (argc != 2)
        return ERROR_INT(" Syntax:  string_reg infile", mainName, 1);

    infile = argv[1];
    instring = (char *)l_binaryRead(infile, &nbytesin);

    if (!instring)
        return ERROR_INT("file not read", mainName, 1);

    sa1 = sarrayCreateWordsFromString(instring);
    sa2 = sarrayCreateLinesFromString(instring, 0);
    sa3 = sarrayCreateLinesFromString(instring, 1);

    outstring = sarrayToString(sa1, 0);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk1.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa1, 1);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk2.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa2, 0);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk3.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa2, 1);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk4.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa3, 0);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk5.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa3, 1);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk6.txt", "w", outstring, nbytesout);
    lept_free(outstring);
    sprintf(buf, "diff -s /tmp/junk6.txt %s", infile);
    ignore = system(buf);

        /* write/read/write; compare /tmp/junkout5 with /tmp/junkout6 */
    sarrayWrite("/tmp/junk7.txt", sa2);
    sarrayWrite("/tmp/junk8.txt", sa3);
    sa4 = sarrayRead("/tmp/junk8.txt");
    sarrayWrite("/tmp/junk9.txt", sa4);
    sa5 = sarrayRead("/tmp/junk9.txt");
    ignore = system("diff -s /tmp/junk8.txt /tmp/junk9.txt");

    sarrayDestroy(&sa1);
    sarrayDestroy(&sa2);
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);
    sarrayDestroy(&sa5);
    lept_free(instring);
    return 0;
}
示例#25
0
/*!
 *  regTestCompareFiles()
 *
 *      Input:  rp (regtest parameters)
 *              index1 (of one output file from reg test)
 *              index2 (of another output file from reg test)
 *      Return: 0 if OK, 1 on error (a failure in comparison is not an error)
 *
 *  Notes:
 *      (1) This only does something in "compare" mode.
 *      (2) The canonical format of the golden filenames is:
 *            /tmp/golden/<root of main name>_golden.<index>.<ext of localname>
 *          e.g.,
 *            /tmp/golden/maze_golden.0.png
 */
l_int32
regTestCompareFiles(L_REGPARAMS *rp,
                    l_int32 index1,
                    l_int32 index2) {
    char *name1, *name2;
    char namebuf[256];
    l_int32 same;
    SARRAY *sa;

    PROCNAME("regTestCompareFiles");

    if (!rp)
        return ERROR_INT("rp not defined", procName, 1);
    if (index1 < 0 || index2 < 0) {
        rp->success = FALSE;
        return ERROR_INT("index1 and/or index2 is negative", procName, 1);
    }
    if (index1 == index2) {
        rp->success = FALSE;
        return ERROR_INT("index1 must differ from index2", procName, 1);
    }

    rp->index++;
    if (rp->mode != L_REG_COMPARE) return 0;

    /* Generate the golden file names */
    snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d.", rp->testname, index1);
    sa = getSortedPathnamesInDirectory("/tmp/golden", namebuf, 0, 0);
    if (sarrayGetCount(sa) != 1) {
        sarrayDestroy(&sa);
        rp->success = FALSE;
        L_ERROR("golden file %s not found\n", procName, namebuf);
        return 1;
    }
    name1 = sarrayGetString(sa, 0, L_COPY);
    sarrayDestroy(&sa);

    snprintf(namebuf, sizeof(namebuf), "%s_golden.%02d.", rp->testname, index2);
    sa = getSortedPathnamesInDirectory("/tmp/golden", namebuf, 0, 0);
    if (sarrayGetCount(sa) != 1) {
        sarrayDestroy(&sa);
        rp->success = FALSE;
        FREE(name1);
        L_ERROR("golden file %s not found\n", procName, namebuf);
        return 1;
    }
    name2 = sarrayGetString(sa, 0, L_COPY);
    sarrayDestroy(&sa);

    /* Test and record on failure */
    filesAreIdentical(name1, name2, &same);
    if (!same) {
        fprintf(rp->fp,
                "Failure in %s_reg, index %d: comparing %s with %s\n",
                rp->testname, rp->index, name1, name2);
        fprintf(stderr,
                "Failure in %s_reg, index %d: comparing %s with %s\n",
                rp->testname, rp->index, name1, name2);
        rp->success = FALSE;
    }

    FREE(name1);
    FREE(name2);
    return 0;
}
示例#26
0
int main(int    argc,
         char **argv)
{
char         filename[BUF_SIZE];
char        *dirin, *rootname, *fname;
l_int32      i, j, w, h, firstpage, npages, nfiles, ncomp;
l_int32      index, ival, rval, gval, bval;
BOX         *box;
BOXA        *boxa;
BOXAA       *baa;
JBDATA      *data;
JBCLASSER   *classer;
NUMA        *nai;
NUMAA       *naa;
SARRAY      *safiles;
PIX         *pixs, *pixt1, *pixt2, *pixd;
PIXCMAP     *cmap;
static char  mainName[] = "wordsinorder";

    if (argc != 3 && argc != 5)
        return ERROR_INT(
            " Syntax: wordsinorder dirin rootname [firstpage, npages]",
            mainName, 1);

    dirin = argv[1];
    rootname = argv[2];

    if (argc == 3) {
        firstpage = 0;
        npages = 0;
    }
    else {
        firstpage = atoi(argv[3]);
        npages = atoi(argv[4]);
    }

        /* Compute the word bounding boxes at 2x reduction, along with
         * the textlines that they are in. */
    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);
    baa = boxaaCreate(nfiles);
    naa = numaaCreate(nfiles);
    for (i = 0; i < nfiles; i++) {
        fname = sarrayGetString(safiles, i, 0);
        if ((pixs = pixRead(fname)) == NULL) {
            L_WARNING("image file %d not read\n", mainName, i);
            continue;
        }
        pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT,
                                   MAX_WORD_WIDTH, MAX_WORD_HEIGHT,
                                   &boxa, &nai);
        boxaaAddBoxa(baa, boxa, L_INSERT);
        numaaAddNuma(naa, nai, L_INSERT);

#if  RENDER_PAGES
            /* Show the results on a 2x reduced image, where each
             * word is outlined and the color of the box depends on the
             * computed textline. */
        pixt1 = pixReduceRankBinary2(pixs, 2, NULL);
        pixGetDimensions(pixt1, &w, &h, NULL);
        pixd = pixCreate(w, h, 8);
        cmap = pixcmapCreateRandom(8, 1, 1);  /* first color is black */
        pixSetColormap(pixd, cmap);

        pixt2 = pixUnpackBinary(pixt1, 8, 1);
        pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0);
        ncomp = boxaGetCount(boxa);
        for (j = 0; j < ncomp; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            numaGetIValue(nai, j, &ival);
            index = 1 + (ival % 254);  /* omit black and white */
            pixcmapGetColor(cmap, index, &rval, &gval, &bval);
            pixRenderBoxArb(pixd, box, 2, rval, gval, bval);
            boxDestroy(&box);
        }

        snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
        fprintf(stderr, "filename: %s\n", filename);
        pixWrite(filename, pixd, IFF_PNG);
        pixDestroy(&pixt1);
        pixDestroy(&pixt2);
        pixDestroy(&pixs);
        pixDestroy(&pixd);
#endif  /* RENDER_PAGES */
    }

    boxaaDestroy(&baa);
    numaaDestroy(&naa);
    sarrayDestroy(&safiles);
    return 0;
}
示例#27
0
/*!
 *  convertFilesTo1bpp()
 *
 *      Input:  dirin
 *              substr (<optional> substring filter on filenames; can be NULL)
 *              upscaling (1, 2 or 4; only for input color or grayscale)
 *              thresh  (global threshold for binarization; use 0 for default)
 *              firstpage
 *              npages (use 0 to do all from @firstpage to the end)
 *              dirout
 *              outformat (IFF_PNG, IFF_TIFF_G4)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Images are sorted lexicographically, and the names in the
 *          output directory are retained except for the extension.
 */
l_int32
convertFilesTo1bpp(const char *dirin,
                   const char *substr,
                   l_int32 upscaling,
                   l_int32 thresh,
                   l_int32 firstpage,
                   l_int32 npages,
                   const char *dirout,
                   l_int32 outformat) {
    l_int32 i, nfiles;
    char buf[512];
    char *fname, *tail, *basename;
    PIX *pixs, *pixg1, *pixg2, *pixb;
    SARRAY *safiles;

    PROCNAME("convertFilesTo1bpp");

    if (!dirin)
        return ERROR_INT("dirin", procName, 1);
    if (!dirout)
        return ERROR_INT("dirout", procName, 1);
    if (upscaling != 1 && upscaling != 2 && upscaling != 4)
        return ERROR_INT("invalid upscaling factor", procName, 1);
    if (thresh <= 0) thresh = 180;
    if (firstpage < 0) firstpage = 0;
    if (npages < 0) npages = 0;
    if (outformat != IFF_TIFF_G4)
        outformat = IFF_PNG;

    safiles = getSortedPathnamesInDirectory(dirin, substr, firstpage, npages);
    if (!safiles)
        return ERROR_INT("safiles not made", procName, 1);
    if ((nfiles = sarrayGetCount(safiles)) == 0) {
        sarrayDestroy(&safiles);
        return ERROR_INT("no matching files in the directory", procName, 1);
    }

    for (i = 0; i < nfiles; i++) {
        fname = sarrayGetString(safiles, i, L_NOCOPY);
        if ((pixs = pixRead(fname)) == NULL) {
            L_WARNING("Couldn't read file %s\n", procName, fname);
            continue;
        }
        if (pixGetDepth(pixs) == 32)
            pixg1 = pixConvertRGBToLuminance(pixs);
        else
            pixg1 = pixClone(pixs);
        pixg2 = pixRemoveColormap(pixg1, REMOVE_CMAP_TO_GRAYSCALE);
        if (pixGetDepth(pixg2) == 1) {
            pixb = pixClone(pixg2);
        } else {
            if (upscaling == 1)
                pixb = pixThresholdToBinary(pixg2, thresh);
            else if (upscaling == 2)
                pixb = pixScaleGray2xLIThresh(pixg2, thresh);
            else  /* upscaling == 4 */
                pixb = pixScaleGray4xLIThresh(pixg2, thresh);
        }
        pixDestroy(&pixs);
        pixDestroy(&pixg1);
        pixDestroy(&pixg2);

        splitPathAtDirectory(fname, NULL, &tail);
        splitPathAtExtension(tail, &basename, NULL);
        if (outformat == IFF_TIFF_G4) {
            snprintf(buf, sizeof(buf), "%s/%s.tif", dirout, basename);
            pixWrite(buf, pixb, IFF_TIFF_G4);
        } else {
            snprintf(buf, sizeof(buf), "%s/%s.png", dirout, basename);
            pixWrite(buf, pixb, IFF_PNG);
        }
        pixDestroy(&pixb);
        FREE(tail);
        FREE(basename);
    }

    sarrayDestroy(&safiles);
    return 0;
}
示例#28
0
/*!
 * \brief   recogReadStream()
 *
 * \param[in]    fp file stream
 * \return  recog, or NULL on error
 */
L_RECOG *
recogReadStream(FILE  *fp)
{
l_int32   version, setsize, threshold, scalew, scaleh, linew;
l_int32   maxyshift, nc;
L_DNA    *dna_tochar;
PIXAA    *paa;
L_RECOG  *recog;
SARRAY   *sa_text;

    PROCNAME("recogReadStream");

    if (!fp)
        return (L_RECOG *)ERROR_PTR("stream not defined", procName, NULL);

    if (fscanf(fp, "\nRecog Version %d\n", &version) != 1)
        return (L_RECOG *)ERROR_PTR("not a recog file", procName, NULL);
    if (version != RECOG_VERSION_NUMBER)
        return (L_RECOG *)ERROR_PTR("invalid recog version", procName, NULL);
    if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1)
        return (L_RECOG *)ERROR_PTR("setsize not read", procName, NULL);
    if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1)
        return (L_RECOG *)ERROR_PTR("binary thresh not read", procName, NULL);
    if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1)
        return (L_RECOG *)ERROR_PTR("maxyshift not read", procName, NULL);
    if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1)
        return (L_RECOG *)ERROR_PTR("width not read", procName, NULL);
    if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1)
        return (L_RECOG *)ERROR_PTR("height not read", procName, NULL);
    if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1)
        return (L_RECOG *)ERROR_PTR("line width not read", procName, NULL);
    if ((recog = recogCreate(scalew, scaleh, linew, threshold,
                             maxyshift)) == NULL)
        return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL);

    if (fscanf(fp, "\nLabels for character set:\n") != 0) {
        recogDestroy(&recog);
        return (L_RECOG *)ERROR_PTR("label intro not read", procName, NULL);
    }
    l_dnaDestroy(&recog->dna_tochar);
    if ((dna_tochar = l_dnaReadStream(fp)) == NULL) {
        recogDestroy(&recog);
        return (L_RECOG *)ERROR_PTR("dna_tochar not read", procName, NULL);
    }
    recog->dna_tochar = dna_tochar;
    sarrayDestroy(&recog->sa_text);
    if ((sa_text = sarrayReadStream(fp)) == NULL) {
        recogDestroy(&recog);
        return (L_RECOG *)ERROR_PTR("sa_text not read", procName, NULL);
    }
    recog->sa_text = sa_text;

    if (fscanf(fp, "\nPixaa of all samples in the training set:\n") != 0) {
        recogDestroy(&recog);
        return (L_RECOG *)ERROR_PTR("pixaa intro not read", procName, NULL);
    }
    if ((paa = pixaaReadStream(fp)) == NULL) {
        recogDestroy(&recog);
        return (L_RECOG *)ERROR_PTR("pixaa not read", procName, NULL);
    }
    recog->setsize = setsize;
    nc = pixaaGetCount(paa, NULL);
    if (nc != setsize) {
        recogDestroy(&recog);
        pixaaDestroy(&paa);
        L_ERROR("(setsize = %d) != (paa count = %d)\n", procName,
                     setsize, nc);
        return NULL;
    }

    recogAddAllSamples(&recog, paa, 0);  /* this finishes */
    pixaaDestroy(&paa);
    if (!recog)
        return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL);
    return recog;
}
示例#29
0
/*!
 *  gplotRead()
 *
 *      Input:  filename
 *      Return: gplot, or NULL on error
 */
GPLOT *
gplotRead(const char  *filename)
{
char     buf[L_BUF_SIZE];
char    *rootname, *title, *xlabel, *ylabel, *ignores;
l_int32  outformat, ret, version, ignore;
FILE    *fp;
GPLOT   *gplot;

    PROCNAME("gplotRead");

    if (!filename)
        return (GPLOT *)ERROR_PTR("filename not defined", procName, NULL);

    if ((fp = fopenReadStream(filename)) == NULL)
        return (GPLOT *)ERROR_PTR("stream not opened", procName, NULL);

    ret = fscanf(fp, "Gplot Version %d\n", &version);
    if (ret != 1) {
        fclose(fp);
        return (GPLOT *)ERROR_PTR("not a gplot file", procName, NULL);
    }
    if (version != GPLOT_VERSION_NUMBER) {
        fclose(fp);
        return (GPLOT *)ERROR_PTR("invalid gplot version", procName, NULL);
    }

    ignore = fscanf(fp, "Rootname: %s\n", buf);
    rootname = stringNew(buf);
    ignore = fscanf(fp, "Output format: %d\n", &outformat);
    ignores = fgets(buf, L_BUF_SIZE, fp);   /* Title: ... */
    title = stringNew(buf + 7);
    title[strlen(title) - 1] = '\0';
    ignores = fgets(buf, L_BUF_SIZE, fp);   /* X axis label: ... */
    xlabel = stringNew(buf + 14);
    xlabel[strlen(xlabel) - 1] = '\0';
    ignores = fgets(buf, L_BUF_SIZE, fp);   /* Y axis label: ... */
    ylabel = stringNew(buf + 14);
    ylabel[strlen(ylabel) - 1] = '\0';

    if (!(gplot = gplotCreate(rootname, outformat, title, xlabel, ylabel))) {
        fclose(fp);
        return (GPLOT *)ERROR_PTR("gplot not made", procName, NULL);
    }
    FREE(rootname);
    FREE(title);
    FREE(xlabel);
    FREE(ylabel);
    sarrayDestroy(&gplot->cmddata);
    sarrayDestroy(&gplot->datanames);
    sarrayDestroy(&gplot->plotdata);
    sarrayDestroy(&gplot->plottitles);
    numaDestroy(&gplot->plotstyles);

    ignore = fscanf(fp, "Commandfile name: %s\n", buf);
    stringReplace(&gplot->cmdname, buf);
    ignore = fscanf(fp, "\nCommandfile data:");
    gplot->cmddata = sarrayReadStream(fp);
    ignore = fscanf(fp, "\nDatafile names:");
    gplot->datanames = sarrayReadStream(fp);
    ignore = fscanf(fp, "\nPlot data:");
    gplot->plotdata = sarrayReadStream(fp);
    ignore = fscanf(fp, "\nPlot titles:");
    gplot->plottitles = sarrayReadStream(fp);
    ignore = fscanf(fp, "\nPlot styles:");
    gplot->plotstyles = numaReadStream(fp);

    ignore = fscanf(fp, "Number of plots: %d\n", &gplot->nplots);
    ignore = fscanf(fp, "Output file name: %s\n", buf);
    stringReplace(&gplot->outname, buf);
    ignore = fscanf(fp, "Axis scaling: %d\n", &gplot->scaling);

    fclose(fp);
    return gplot;
}
示例#30
0
l_int32 main(int    argc,
             char **argv)
{
    L_ASET     *set;
    L_DNA      *da1, *da2, *da3, *da4, *da5, *da6, *da7, *da8, *dav, *dac;
    L_DNAHASH  *dahash;
    NUMA       *nav, *nac;
    PTA        *pta1, *pta2, *pta3;
    SARRAY     *sa1, *sa2, *sa3, *sa4;

    lept_mkdir("lept/hash");

#if 1
    /* Test string hashing with aset */
    fprintf(stderr, "Set results with string hashing:\n");
    sa1 = BuildShortStrings(3, 0);
    sa2 = BuildShortStrings(3, 1);
    fprintf(stderr, "  size with unique strings: %d\n", sarrayGetCount(sa1));
    fprintf(stderr, "  size with dups: %d\n", sarrayGetCount(sa2));
    startTimer();
    set = l_asetCreateFromSarray(sa2);
    fprintf(stderr, "  time to make set: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size of set without dups: %d\n", l_asetSize(set));
    l_asetDestroy(&set);
    startTimer();
    sa3 = sarrayRemoveDupsByAset(sa2);
    fprintf(stderr, "  time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", sarrayGetCount(sa3));
    startTimer();
    sa4 = sarrayIntersectionByAset(sa1, sa2);
    fprintf(stderr, "  time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", sarrayGetCount(sa4));
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);

    /* Test sarray set operations with dna hash.
     * We use the same hash function as is used with aset. */
    fprintf(stderr, "\nDna hash results for sarray:\n");
    fprintf(stderr, "  size with unique strings: %d\n", sarrayGetCount(sa1));
    fprintf(stderr, "  size with dups: %d\n", sarrayGetCount(sa2));
    startTimer();
    dahash = l_dnaHashCreateFromSarray(sa2);
    fprintf(stderr, "  time to make hashmap: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  entries in hashmap with dups: %d\n",
            l_dnaHashGetTotalCount(dahash));
    l_dnaHashDestroy(&dahash);
    startTimer();
    sarrayRemoveDupsByHash(sa2, &sa3, NULL);
    fprintf(stderr, "  time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", sarrayGetCount(sa3));
    startTimer();
    sa4 = sarrayIntersectionByHash(sa1, sa2);
    fprintf(stderr, "  time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", sarrayGetCount(sa4));
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);
    sarrayDestroy(&sa1);
    sarrayDestroy(&sa2);
#endif

#if 1
    /* Test point hashing with aset.
     * Enter all points within a 1500 x 1500 image in pta1, and include
     * 450,000 duplicates in pta2.  With this pt hashing function,
     * there are no hash collisions among any of the 400 million pixel
     * locations in a 20000 x 20000 image. */
    pta1 = BuildPointSet(1500, 1500, 0);
    pta2 = BuildPointSet(1500, 1500, 1);
    fprintf(stderr, "\nSet results for pta:\n");
    fprintf(stderr, "  pta1 size with unique points: %d\n", ptaGetCount(pta1));
    fprintf(stderr, "  pta2 size with dups: %d\n", ptaGetCount(pta2));
    startTimer();
    pta3 = ptaRemoveDupsByAset(pta2);
    fprintf(stderr, "  Time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta3);

    startTimer();
    pta3 = ptaIntersectionByAset(pta1, pta2);
    fprintf(stderr, "  Time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta1);
    ptaDestroy(&pta2);
    ptaDestroy(&pta3);
#endif

#if 1
    /* Test pta set operations with dna hash, using the same pt hashing
     * function.  Although there are no collisions in 20K x 20K images,
     * the dna hash implementation works properly even if there are some. */
    pta1 = BuildPointSet(1500, 1500, 0);
    pta2 = BuildPointSet(1500, 1500, 1);
    fprintf(stderr, "\nDna hash results for pta:\n");
    fprintf(stderr, "  pta1 size with unique points: %d\n", ptaGetCount(pta1));
    fprintf(stderr, "  pta2 size with dups: %d\n", ptaGetCount(pta2));
    startTimer();
    ptaRemoveDupsByHash(pta2, &pta3, NULL);
    fprintf(stderr, "  Time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta3);

    startTimer();
    pta3 = ptaIntersectionByHash(pta1, pta2);
    fprintf(stderr, "  Time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta1);
    ptaDestroy(&pta2);
    ptaDestroy(&pta3);
#endif

    /* Test dna set and histo operations using dna hash */
#if 1
    fprintf(stderr, "\nDna hash results for dna:\n");
    da1 = l_dnaMakeSequence(0.0, 0.125, 8000);
    da2 = l_dnaMakeSequence(300.0, 0.125, 8000);
    da3 = l_dnaMakeSequence(600.0, 0.125, 8000);
    da4 = l_dnaMakeSequence(900.0, 0.125, 8000);
    da5 = l_dnaMakeSequence(1200.0, 0.125, 8000);
    l_dnaJoin(da1, da2, 0, -1);
    l_dnaJoin(da1, da3, 0, -1);
    l_dnaJoin(da1, da4, 0, -1);
    l_dnaJoin(da1, da5, 0, -1);
    l_dnaRemoveDupsByHash(da1, &da6, &dahash);
    l_dnaHashDestroy(&dahash);
    fprintf(stderr, "  dna size with dups = %d\n", l_dnaGetCount(da1));
    fprintf(stderr, "  dna size of unique numbers = %d\n", l_dnaGetCount(da6));
    l_dnaMakeHistoByHash(da1, &dahash, &dav, &dac);
    nav = l_dnaConvertToNuma(dav);
    nac = l_dnaConvertToNuma(dac);
    fprintf(stderr, "  dna number of histo points = %d\n", l_dnaGetCount(dac));
    gplotSimpleXY1(nav, nac, GPLOT_IMPULSES, GPLOT_PNG,
                   "/tmp/lept/hash/histo", "Histo");
    da7 = l_dnaIntersectionByHash(da2, da3);
    fprintf(stderr, "  dna number of points: da2 = %d, da3 = %d\n",
            l_dnaGetCount(da2), l_dnaGetCount(da3));
    fprintf(stderr, "  dna number of da2/da3 intersection points = %d\n",
            l_dnaGetCount(da7));
    l_fileDisplay("/tmp/lept/hash/histo.png", 700, 100, 1.0);
    l_dnaDestroy(&da1);
    l_dnaDestroy(&da2);
    l_dnaDestroy(&da3);
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);
    l_dnaDestroy(&da6);
    l_dnaDestroy(&da7);
    l_dnaDestroy(&dac);
    l_dnaDestroy(&dav);
    l_dnaHashDestroy(&dahash);
    numaDestroy(&nav);
    numaDestroy(&nac);
#endif

#if 1
    da1 = l_dnaMakeSequence(0, 3, 10000);
    da2 = l_dnaMakeSequence(0, 5, 10000);
    da3 = l_dnaMakeSequence(0, 7, 10000);
    l_dnaJoin(da1, da2, 0, -1);
    l_dnaJoin(da1, da3, 0, -1);

    fprintf(stderr, "\nDna results using set:\n");
    fprintf(stderr, "  da1 count: %d\n", l_dnaGetCount(da1));
    set = l_asetCreateFromDna(da1);
    fprintf(stderr, "  da1 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da4 = l_dnaUnionByAset(da2, da3);
    fprintf(stderr, "  da4 count: %d\n", l_dnaGetCount(da4));
    set = l_asetCreateFromDna(da4);
    fprintf(stderr, "  da4 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da5 = l_dnaIntersectionByAset(da1, da2);
    fprintf(stderr, "  da5 count: %d\n", l_dnaGetCount(da5));
    set = l_asetCreateFromDna(da5);
    fprintf(stderr, "  da5 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da6 = l_dnaMakeSequence(100000, 11, 5000);
    l_dnaJoin(da6, da1, 0, -1);
    fprintf(stderr, "  da6 count: %d\n", l_dnaGetCount(da6));
    set = l_asetCreateFromDna(da6);
    fprintf(stderr, "  da6 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da7 = l_dnaIntersectionByAset(da6, da3);
    fprintf(stderr, "  da7 count: %d\n", l_dnaGetCount(da7));
    set = l_asetCreateFromDna(da7);
    fprintf(stderr, "  da7 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da8 = l_dnaRemoveDupsByAset(da1);
    fprintf(stderr, "  da8 count: %d\n\n", l_dnaGetCount(da8));

    l_dnaDestroy(&da1);
    l_dnaDestroy(&da2);
    l_dnaDestroy(&da3);
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);
    l_dnaDestroy(&da6);
    l_dnaDestroy(&da7);
    l_dnaDestroy(&da8);
#endif

    return 0;
}