/*!
 * \brief   sarrayIntersectionByHash()
 *
 * \param[in]    sa1, sa2
 * \return  sad intersection of the strings, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is faster than sarrayIntersectionByAset(), because the
 *          bucket lookup is O(n).
 * </pre>
 */
SARRAY *
sarrayIntersectionByHash(SARRAY  *sa1,
                         SARRAY  *sa2)
{
char       *str;
l_int32     n1, n2, nsmall, i, index1, index2;
l_uint32    nsize2;
l_uint64    key;
L_DNAHASH  *dahash1, *dahash2;
SARRAY     *sa_small, *sa_big, *sad;

    PROCNAME("sarrayIntersectionByHash");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Put the elements of the biggest sarray into a dnahash */
    n1 = sarrayGetCount(sa1);
    n2 = sarrayGetCount(sa2);
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
    dahash1 = l_dnaHashCreateFromSarray(sa_big);

        /* Build up the intersection of strings.  Add to %sad
         * if the string is in sa_big (using dahash1) but hasn't
         * yet been seen in the traversal of sa_small (using dahash2). */
    sad = sarrayCreate(0);
    nsmall = sarrayGetCount(sa_small);
    findNextLargerPrime(nsmall / 20, &nsize2);  /* buckets in hash table */
    dahash2 = l_dnaHashCreate(nsize2, 0);
    for (i = 0; i < nsmall; i++) {
        str = sarrayGetString(sa_small, i, L_NOCOPY);
        sarrayFindStringByHash(sa_big, dahash1, str, &index1);
        if (index1 >= 0) {
            sarrayFindStringByHash(sa_small, dahash2, str, &index2);
            if (index2 == -1) {
                sarrayAddString(sad, str, L_COPY);
                l_hashStringToUint64(str, &key);
                l_dnaHashAdd(dahash2, key, (l_float64)i);
            }
        }
    }

    l_dnaHashDestroy(&dahash1);
    l_dnaHashDestroy(&dahash2);
    return sad;
}
/*!
 *  sarrayAppendRange()
 *
 *      Input:  sa1  (to be added to)
 *              sa2  (append specified range of strings in sa2 to sa1)
 *              start (index of first string of sa2 to append)
 *              end (index of last string of sa2 to append)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Copies of the strings in sarray2 are added to sarray1.
 *      (2) The [start ... end] range is truncated if necessary.
 */
l_int32
sarrayAppendRange(SARRAY  *sa1,
                  SARRAY  *sa2,
		  l_int32  start,
		  l_int32  end)
{
char    *str;
l_int32  n, i;

    PROCNAME("sarrayAppendRange");

    if (!sa1)
        return ERROR_INT("sa1 not defined", procName, 1);
    if (!sa2)
        return ERROR_INT("sa2 not defined", procName, 1);
    if (start < 0)
        start = 0;
    n = sarrayGetCount(sa2);
    if (end >= n)
        end = n - 1;
    if (start > end)
        return ERROR_INT("start > end", procName, 1);

    for (i = start; i <= end; i++) {
        str = sarrayGetString(sa2, i, L_NOCOPY);
        sarrayAddString(sa1, str, L_COPY);
    }

    return 0;
}
예제 #3
0
/*!
 *  jbCorrelation()
 *
 *       Input:  dirin (directory of input images)
 *               thresh (typically ~0.8)
 *               weight (typically ~0.6)
 *               components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
 *               rootname (for output files)
 *               firstpage (0-based)
 *               npages (use 0 for all pages in dirin)
 *               renderflag (1 to render from templates; 0 to skip)
 *       Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The images must be 1 bpp.  If they are not, you can convert
 *          them using convertFilesTo1bpp().
 *      (2) See prog/jbcorrelation for generating more output (e.g.,
 *          for debugging)
 */
l_int32
jbCorrelation(const char *dirin,
              l_float32 thresh,
              l_float32 weight,
              l_int32 components,
              const char *rootname,
              l_int32 firstpage,
              l_int32 npages,
              l_int32 renderflag) {
    char filename[L_BUF_SIZE];
    l_int32 nfiles, i, numpages;
    JBDATA *data;
    JBCLASSER *classer;
    PIX *pix;
    PIXA *pixa;
    SARRAY *safiles;

    PROCNAME("jbCorrelation");

    if (!dirin)
        return ERROR_INT("dirin not defined", procName, 1);
    if (!rootname)
        return ERROR_INT("rootname not defined", procName, 1);
    if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
        components != JB_WORDS)
        return ERROR_INT("components invalid", procName, 1);

    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);

    /* Classify components */
    classer = jbCorrelationInit(components, 0, 0, thresh, weight);
    jbAddPages(classer, safiles);

    /* Save data */
    data = jbDataSave(classer);
    jbDataWrite(rootname, data);

    /* Optionally, render pages using class templates */
    if (renderflag) {
        pixa = jbDataRender(data, FALSE);
        numpages = pixaGetCount(pixa);
        if (numpages != nfiles)
            fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
                    numpages, nfiles);
        for (i = 0; i < numpages; i++) {
            pix = pixaGetPix(pixa, i, L_CLONE);
            snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
            fprintf(stderr, "filename: %s\n", filename);
            pixWrite(filename, pix, IFF_PNG);
            pixDestroy(&pix);
        }
        pixaDestroy(&pixa);
    }

    sarrayDestroy(&safiles);
    jbClasserDestroy(&classer);
    jbDataDestroy(&data);
    return 0;
}
/*!
 * \brief   sarraySortByIndex()
 *
 * \param[in]    sain
 * \param[in]    naindex na that maps from the new sarray to the input sarray
 * \return  saout sorted, or NULL on error
 */
SARRAY *
sarraySortByIndex(SARRAY  *sain,
                  NUMA    *naindex)
{
char    *str;
l_int32  i, n, index;
SARRAY  *saout;

    PROCNAME("sarraySortByIndex");

    if (!sain)
        return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
    if (!naindex)
        return (SARRAY *)ERROR_PTR("naindex not defined", procName, NULL);

    n = sarrayGetCount(sain);
    saout = sarrayCreate(n);
    for (i = 0; i < n; i++) {
        numaGetIValue(naindex, i, &index);
        str = sarrayGetString(sain, index, L_COPY);
        sarrayAddString(saout, str, L_INSERT);
    }

    return saout;
}
예제 #5
0
파일: psio1.c 프로젝트: AbdelghaniDr/mirror
/*
 *  sarrayConvertFilesToPS()
 *
 *      Input:  sarray (of full path names)
 *              res (typ. 300 or 600 ppi)
 *              fileout (output ps file)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) See convertFilesToPS()
 */
l_int32
sarrayConvertFilesToPS(SARRAY      *sa,
                       l_int32      res,
                       const char  *fileout)
{
char    *fname;
l_int32  i, nfiles, index, firstfile, ret, format;

    PROCNAME("sarrayConvertFilesToPS");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!fileout)
        return ERROR_INT("fileout not defined", procName, 1);
    if (res <= 0) {
        L_INFO("setting res to 300 ppi", procName);
        res = 300;
    }
    if (res < 10 || res > 4000)
        L_WARNING("res is typically in the range 300-600 ppi", procName);

    nfiles = sarrayGetCount(sa);
    firstfile = TRUE;
    for (i = 0, index = 0; i < nfiles; i++) {
        fname = sarrayGetString(sa, i, L_NOCOPY);
        ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
        if (ret) continue;
        if (format == IFF_UNKNOWN)
            continue;

        writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
    }

    return 0;
}
예제 #6
0
/*!
 *  pixReadIndexed()
 *
 *      Input:  sarray (of full pathnames)
 *              index (into pathname array)
 *      Return: pix if OK; null if not found
 *
 *  Notes:
 *      (1) This function is useful for selecting image files from a
 *          directory, where the integer @index is embedded into
 *          the file name.
 *      (2) This is typically done by generating the sarray using
 *          getNumberedPathnamesInDirectory(), so that the @index
 *          pathname would have the number @index in it.  The size
 *          of the sarray should be the largest number (plus 1) appearing
 *          in the file names, respecting the constraints in the
 *          call to getNumberedPathnamesInDirectory().
 *      (3) Consequently, for some indices into the sarray, there may
 *          be no pathnames in the directory containing that number.
 *          By convention, we place empty C strings ("") in those
 *          locations in the sarray, and it is not an error if such
 *          a string is encountered and no pix is returned.
 *          Therefore, the caller must verify that a pix is returned.
 *      (4) See convertSegmentedPagesToPS() in src/psio1.c for an
 *          example of usage.
 */
PIX *
pixReadIndexed(SARRAY  *sa,
               l_int32  index)
{
char    *fname;
l_int32  n;
PIX     *pix;

    PROCNAME("pixReadIndexed");

    if (!sa)
        return (PIX *)ERROR_PTR("sa not defined", procName, NULL);
    n = sarrayGetCount(sa);
    if (index < 0 || index >= n)
        return (PIX *)ERROR_PTR("index out of bounds", procName, NULL);

    fname = sarrayGetString(sa, index, L_NOCOPY);
    if (fname[0] == '\0')
        return NULL;

    if ((pix = pixRead(fname)) == NULL) {
        L_ERROR("pix not read from file %s\n", procName, fname);
        return NULL;
    }

    return pix;
}
예제 #7
0
/*!
 *  pixaReadFilesSA()
 *
 *      Input:  sarray (full pathnames for all files)
 *      Return: pixa, or null on error
 */
PIXA *
pixaReadFilesSA(SARRAY  *sa)
{
char    *str;
l_int32  i, n;
PIX     *pix;
PIXA    *pixa;

    PROCNAME("pixaReadFilesSA");

    if (!sa)
        return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);

    n = sarrayGetCount(sa);
    pixa = pixaCreate(n);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa, i, L_NOCOPY);
        if ((pix = pixRead(str)) == NULL) {
            L_WARNING("pix not read from file %s\n", procName, str);
            continue;
        }
        pixaAddPix(pixa, pix, L_INSERT);
    }

    return pixa;
}
예제 #8
0
/*
 *  getNextNonCommentLine()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (starting index to search)
 *              &next (<return> index of first uncommented line after
 *                     the start line)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Skips over all consecutive comment lines, beginning at 'start'
 *      (2) If all lines to the end are '#' comments, return next = -1
 */
static l_int32
getNextNonCommentLine(SARRAY  *sa,
                      l_int32  start,
                      l_int32 *pnext)
{
char    *str;
l_int32  i, n;

    PROCNAME("getNextNonCommentLine");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!pnext)
        return ERROR_INT("&pnext not defined", procName, 1);

        /* Init for situation where this line and all following are comments */
    *pnext = -1;

    n = sarrayGetCount(sa);
    for (i = start; i < n; i++) {
        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
            return ERROR_INT("str not returned; shouldn't happen", procName, 1);
        if (str[0] != '#') {
            *pnext = i;
            return 0;
        }
    }

    return 0;
}
/*!
 *  sarrayAddString()
 *
 *      Input:  sarray
 *              string  (string to be added)
 *              copyflag (L_INSERT, L_COPY)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Legacy usage decrees that we always use 0 to insert a string
 *          directly and 1 to insert a copy of the string.  The
 *          enums for L_INSERT and L_COPY agree with this convention,
 *          and will not change in the future.
 *      (2) See usage comments at the top of this file.
 */
l_int32
sarrayAddString(SARRAY  *sa,
                char    *string,
                l_int32  copyflag)
{
l_int32  n;

    PROCNAME("sarrayAddString");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!string)
        return ERROR_INT("string not defined", procName, 1);
    if (copyflag != L_INSERT && copyflag != L_COPY)
        return ERROR_INT("invalid copyflag", procName, 1);
    
    n = sarrayGetCount(sa);
    if (n >= sa->nalloc)
        sarrayExtendArray(sa);

    if (copyflag == L_INSERT)
        sa->array[n] = string;
    else  /* L_COPY */
        sa->array[n] = stringNew(string);
    sa->n++;

    return 0;
}
예제 #10
0
/*!
 * \brief   l_dnaHashCreateFromSarray()
 *
 * \param[in]    sa
 * \return  dahash, or NULL on error
 */
L_DNAHASH *
l_dnaHashCreateFromSarray(SARRAY  *sa)
{
char       *str;
l_int32     i, n;
l_uint32    nsize;
l_uint64    key;
L_DNAHASH  *dahash;

        /* Build up dnaHash of indices, hashed by a 64-bit key that
         * should randomize the lower bits used in bucket selection.
         * Having about 20 pts in each bucket is roughly optimal. */
    n = sarrayGetCount(sa);
    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */
/*    fprintf(stderr, "Prime used: %d\n", nsize); */

        /* Add each string, using the hash as key and the index into %sa
         * as the value.  Storing the index enables operations that check
         * for duplicates.  */
    dahash = l_dnaHashCreate(nsize, 8);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa, i, L_NOCOPY);
        l_hashStringToUint64(str, &key);
        l_dnaHashAdd(dahash, key, (l_float64)i);
    }

    return dahash;
}
예제 #11
0
/*
 *  getNextNonBlankLine()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (starting index to search)
 *              &next (<return> index of first nonblank line after
 *                     the start line)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Skips over all consecutive blank lines, beginning at 'start'
 *      (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
 *      (3) If all lines to the end are blank, return next = -1
 */
static l_int32
getNextNonBlankLine(SARRAY  *sa,
                    l_int32  start,
                    l_int32 *pnext)
{
char    *str;
l_int32  i, j, n, len;

    PROCNAME("getNextNonBlankLine");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!pnext)
        return ERROR_INT("&pnext not defined", procName, 1);

        /* Init for situation where this line and all following are blank */
    *pnext = -1;

    n = sarrayGetCount(sa);
    for (i = start; i < n; i++) {
        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
            return ERROR_INT("str not returned; shouldn't happen", procName, 1);
        len = strlen(str);
        for (j = 0; j < len; j++) {
            if (str[j] != ' ' && str[j] != '\t'
                && str[j] != '\n' && str[j] != '\r') {  /* non-blank */
                *pnext = i;
                return 0;
            }
        }
    }

    return 0;
}
예제 #12
0
/*!
 * \brief   l_asetCreateFromSarray()
 *
 * \param[in]    sa
 * \return  set using a string hash into a uint32 as the key
 */
L_ASET *
l_asetCreateFromSarray(SARRAY  *sa)
{
char     *str;
l_int32   i, n;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;

    PROCNAME("l_asetCreateFromSarray");

    if (!sa)
        return (L_ASET *)ERROR_PTR("sa not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    n = sarrayGetCount(sa);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        l_asetInsert(set, key);
    }

    return set;
}
예제 #13
0
/*
 *  getNextNonDoubleSlashLine()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (starting index to search)
 *              &next (<return> index of first uncommented line after
 *                     the start line)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Skips over all consecutive '//' lines, beginning at 'start'
 *      (2) If all lines to the end start with '//', return next = -1
 */
static l_int32
getNextNonDoubleSlashLine(SARRAY  *sa,
                          l_int32  start,
                          l_int32 *pnext)
{
char    *str;
l_int32  i, n, len;

    PROCNAME("getNextNonDoubleSlashLine");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!pnext)
        return ERROR_INT("&pnext not defined", procName, 1);

        /* Init for situation where this line and all following
         * start with '//' */
    *pnext = -1;

    n = sarrayGetCount(sa);
    for (i = start; i < n; i++) {
        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
            return ERROR_INT("str not returned; shouldn't happen", procName, 1);
        len = strlen(str);
        if (len < 2 || str[0] != '/' || str[1] != '/') {
            *pnext = i;
            return 0;
        }
    }

    return 0;
}
예제 #14
0
/*!
 * \brief   sarrayRemoveDupsByAset()
 *
 * \param[in]    sas
 * \return  sad with duplicates removed, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is O(nlogn), considerably slower than
 *          sarrayRemoveDupsByHash() for large string arrays.
 *      (2) The key for each string is a 64-bit hash.
 *      (3) Build a set, using hashed strings as keys.  As the set is
 *          built, first do a find; if not found, add the key to the
 *          set and add the string to the output sarray.
 * </pre>
 */
SARRAY *
sarrayRemoveDupsByAset(SARRAY  *sas)
{
char     *str;
l_int32   i, n;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;
SARRAY   *sad;

    PROCNAME("sarrayRemoveDupsByAset");

    if (!sas)
        return (SARRAY *)ERROR_PTR("sas not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    sad = sarrayCreate(0);
    n = sarrayGetCount(sas);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sas, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (!l_asetFind(set, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return sad;
}
예제 #15
0
파일: psio1.c 프로젝트: Android-BD/tess-two
/*
 *  convertSegmentedPagesToPS()
 *
 *      Input:  pagedir (input page image directory)
 *              pagestr (<optional> substring filter on page filenames;
 *                       can be NULL)
 *              page_numpre (number of characters in page name before number)
 *              maskdir (input mask image directory)
 *              maskstr (<optional> substring filter on mask filenames;
 *                       can be NULL)
 *              mask_numpre (number of characters in mask name before number)
 *              numpost (number of characters in names after number)
 *              maxnum (only consider page numbers up to this value)
 *              textscale (scale of text output relative to pixs)
 *              imagescale (scale of image output relative to pixs)
 *              threshold (for binarization; typ. about 190; 0 for default)
 *              fileout (output ps file)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This generates a PS file for all page image and mask files in two
 *          specified directories and that contain the page numbers as
 *          specified below.  The two directories can be the same, in which
 *          case the page and mask files are differentiated by the two
 *          substrings for string matches.
 *      (2) The page images are taken in lexicographic order.
 *          Mask images whose numbers match the page images are used to
 *          segment the page images.  Page images without a matching
 *          mask image are scaled, thresholded and rendered entirely as text.
 *      (3) Each PS page is generated as a compressed representation of
 *          the page image, where the part of the image under the mask
 *          is suitably scaled and compressed as DCT (i.e., jpeg), and
 *          the remaining part of the page is suitably scaled, thresholded,
 *          compressed as G4 (i.e., tiff g4), and rendered by painting
 *          black through the resulting text mask.
 *      (4) The scaling is typically 2x down for the DCT component
 *          (@imagescale = 0.5) and 2x up for the G4 component
 *          (@textscale = 2.0).
 *      (5) The resolution is automatically set to fit to a
 *          letter-size (8.5 x 11 inch) page.
 *      (6) Both the DCT and the G4 encoding are PostScript level 2.
 *      (7) It is assumed that the page number is contained within
 *          the basename (the filename without directory or extension).
 *          @page_numpre is the number of characters in the page basename
 *          preceding the actual page number; @mask_numpre is likewise for
 *          the mask basename; @numpost is the number of characters
 *          following the page number.  For example, for mask name
 *          mask_006.tif, mask_numpre = 5 ("mask_).
 *      (8) To render a page as is -- that is, with no thresholding
 *          of any pixels -- use a mask in the mask directory that is
 *          full size with all pixels set to 1.  If the page is 1 bpp,
 *          it is not necessary to have a mask.
 */
l_int32
convertSegmentedPagesToPS(const char  *pagedir,
                          const char  *pagestr,
                          l_int32      page_numpre,
                          const char  *maskdir,
                          const char  *maskstr,
                          l_int32      mask_numpre,
                          l_int32      numpost,
                          l_int32      maxnum,
                          l_float32    textscale,
                          l_float32    imagescale,
                          l_int32      threshold,
                          const char  *fileout)
{
l_int32  pageno, i, npages;
PIX     *pixs, *pixm;
SARRAY  *sapage, *samask;

    PROCNAME("convertSegmentedPagesToPS");

    if (!pagedir)
        return ERROR_INT("pagedir not defined", procName, 1);
    if (!maskdir)
        return ERROR_INT("maskdir not defined", procName, 1);
    if (!fileout)
        return ERROR_INT("fileout not defined", procName, 1);
    if (threshold <= 0) {
        L_INFO("setting threshold to 190\n", procName);
        threshold = 190;
    }

        /* Get numbered full pathnames; max size of sarray is maxnum */
    sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
                                             page_numpre, numpost, maxnum);
    samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
                                             mask_numpre, numpost, maxnum);
    sarrayPadToSameSize(sapage, samask, (char *)"");
    if ((npages = sarrayGetCount(sapage)) == 0) {
        sarrayDestroy(&sapage);
        sarrayDestroy(&samask);
        return ERROR_INT("no matching pages found", procName, 1);
    }

        /* Generate the PS file */
    pageno = 1;
    for (i = 0; i < npages; i++) {
        if ((pixs = pixReadIndexed(sapage, i)) == NULL)
            continue;
        pixm = pixReadIndexed(samask, i);
        pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
                                  threshold, pageno, fileout);
        pixDestroy(&pixs);
        pixDestroy(&pixm);
        pageno++;
    }

    sarrayDestroy(&sapage);
    sarrayDestroy(&samask);
    return 0;
}
예제 #16
0
/*!
 *  sarraySelectBySubstring()
 *
 *      Input:  sain (input sarray)
 *              substr (<optional> substring for matching; can be NULL)
 *      Return: saout (output sarray, filtered with substring) or null on error
 *
 *  Notes:
 *      (1) This selects all strings in sain that have substr as a substring.
 *          Note that we can't use strncmp() because we're looking for
 *          a match to the substring anywhere within each filename.
 *      (2) If substr == NULL, returns a copy of the sarray.
 */
SARRAY *
sarraySelectBySubstring(SARRAY      *sain,
                        const char  *substr)
{
char    *str;
l_int32  n, i, offset, found;
SARRAY  *saout;

    PROCNAME("sarraySelectBySubstring");

    if (!sain)
        return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);

    n = sarrayGetCount(sain);
    if (!substr || n == 0)
        return sarrayCopy(sain);

    saout = sarrayCreate(n);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sain, i, L_NOCOPY);
        arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
                          strlen(substr), &offset, &found);
        if (found)
            sarrayAddString(saout, str, L_COPY);
    }

    return saout;
}
예제 #17
0
/*
 *  skipToMatchingBrace()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (index of starting line with left bracket to search)
 *              lbindex (starting char index for left bracket)
 *              &stop (index of line with the matching right bracket)
 *              &rbindex (char index of matching right bracket)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) If the matching right brace is not found, returns
 *          stop = -1.  This shouldn't happen.
 */
static l_int32
skipToMatchingBrace(SARRAY   *sa,
                    l_int32   start,
                    l_int32   lbindex,
                    l_int32  *pstop,
                    l_int32  *prbindex)
{
char    *str;
l_int32  i, j, jstart, n, sumbrace, found, instring, nchars;

    PROCNAME("skipToMatchingBrace");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!pstop)
        return ERROR_INT("&stop not defined", procName, 1);
    if (!prbindex)
        return ERROR_INT("&rbindex not defined", procName, 1);

    instring = 0;  /* init to FALSE; toggle on double quotes */
    *pstop = -1;
    n = sarrayGetCount(sa);
    sumbrace = 1;
    found = FALSE;
    for (i = start; i < n; i++) {
        str = sarrayGetString(sa, i, L_NOCOPY);
        jstart = 0;
        if (i == start)
            jstart = lbindex + 1;
        nchars = strlen(str);
        for (j = jstart; j < nchars; j++) {
                /* Toggle the instring state every time you encounter
                 * a double quote that is NOT escaped. */
            if (j == jstart && str[j] == '\"')
                instring = 1 - instring;
            if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
                instring = 1 - instring;
                /* Record the braces if they are neither a literal character
                 * nor within a string. */
            if (str[j] == '{' && str[j+1] != '\'' && !instring) {
                sumbrace++;
            } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
                sumbrace--;
                if (sumbrace == 0) {
                    found = TRUE;
                    *prbindex = j;
                    break;
                }
            }
        }
        if (found) {
            *pstop = i;
            return 0;
        }
    }

    return ERROR_INT("matching right brace not found", procName, 1);
}
예제 #18
0
/*!
 * \brief   sarrayIntersectionByAset()
 *
 * \param[in]    sa1, sa2
 * \return  sad with the intersection of the string set, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Algorithm: put the smaller sarray into a set, using the string
 *          hashes as the key values.  Then run through the larger sarray,
 *          building an output sarray and a second set from the strings
 *          in the larger array: if a string is in the first set but
 *          not in the second, add the string to the output sarray and hash
 *          it into the second set.  The second set is required to make
 *          sure only one instance of each string is put into the output sarray.
 *          This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
 * </pre>
 */
SARRAY *
sarrayIntersectionByAset(SARRAY  *sa1,
                         SARRAY  *sa2)
{
char     *str;
l_int32   n1, n2, i, n;
l_uint64  hash;
L_ASET   *set1, *set2;
RB_TYPE   key;
SARRAY   *sa_small, *sa_big, *sad;

    PROCNAME("sarrayIntersectionByAset");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a set */
    n1 = sarrayGetCount(sa1);
    n2 = sarrayGetCount(sa2);
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
    set1 = l_asetCreateFromSarray(sa_big);

        /* Build up the intersection of strings */
    sad = sarrayCreate(0);
    n = sarrayGetCount(sa_small);
    set2 = l_asetCreate(L_UINT_TYPE);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa_small, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return sad;
}
예제 #19
0
/*
 *  getOffsetForCharacter()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (starting index in sa to search; never a comment line)
 *              tchar (we are searching for the first instance of this)
 *              &soffset (<return> offset in strings from start index)
 *              &boffset (<return> offset in bytes within string in which
 *                        the character is first found)
 *              &toffset (<return> offset in total bytes from beginning of
 *                        string indexed by 'start' to the location where
 *                        the character is first found)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) We are searching for the first instance of 'tchar', starting
 *          at the beginning of the string indexed by start.
 *      (2) If the character is not found, soffset is returned as -1,
 *          and the other offsets are set to very large numbers.  The
 *          caller must check the value of soffset.
 *      (3) This is only used in contexts where it is not necessary to
 *          consider if the character is inside a string.
 */
static l_int32
getOffsetForCharacter(SARRAY   *sa,
                      l_int32   start,
                      char      tchar,
                      l_int32  *psoffset,
                      l_int32  *pboffset,
                      l_int32  *ptoffset)
{
char    *str;
l_int32  i, j, n, nchars, totchars, found;

    PROCNAME("getOffsetForCharacter");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!psoffset)
        return ERROR_INT("&soffset not defined", procName, 1);
    if (!pboffset)
        return ERROR_INT("&boffset not defined", procName, 1);
    if (!ptoffset)
        return ERROR_INT("&toffset not defined", procName, 1);

    *psoffset = -1;  /* init to not found */
    *pboffset = 100000000;
    *ptoffset = 100000000;

    n = sarrayGetCount(sa);
    found = FALSE;
    totchars = 0;
    for (i = start; i < n; i++) {
        if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
            return ERROR_INT("str not returned; shouldn't happen", procName, 1);
        nchars = strlen(str);
        for (j = 0; j < nchars; j++) {
            if (str[j] == tchar) {
                found = TRUE;
                break;
            }
        }
        if (found)
            break;
        totchars += nchars;
    }

    if (found) {
        *psoffset = i - start;
        *pboffset = j;
        *ptoffset = totchars + j;
    }

    return 0;
}
예제 #20
0
파일: croptext.c 프로젝트: chewi/leptonica
int main(int    argc,
         char **argv)
{
char        *dirin, *dirout, *infile, *outfile, *tail;
l_int32      i, nfiles, border, x, y, w, h, xb, yb, wb, hb;
BOX         *box1, *box2;
BOXA        *boxa1, *boxa2;
PIX         *pixs, *pixt1, *pixd;
SARRAY      *safiles;
static char  mainName[] = "croptext";

    if (argc != 4)
        return ERROR_INT("Syntax: croptext dirin border dirout", mainName, 1);
    dirin = argv[1];
    border = atoi(argv[2]);
    dirout = argv[3];

    setLeptDebugOK(1);
    safiles = getSortedPathnamesInDirectory(dirin, NULL, 0, 0);
    nfiles = sarrayGetCount(safiles);

    for (i = 0; i < nfiles; i++) {
        infile = sarrayGetString(safiles, i, L_NOCOPY);
        splitPathAtDirectory(infile, NULL, &tail);
        outfile = genPathname(dirout, tail);
        pixs = pixRead(infile);
        pixt1 = pixMorphSequence(pixs, "r11 + c10.40 + o5.5 + x4", 0);
        boxa1 = pixConnComp(pixt1, NULL, 8);
        if (boxaGetCount(boxa1) == 0) {
            fprintf(stderr, "Warning: no components on page %s\n", tail);
            continue;
        }
        boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
        box1 = boxaGetBox(boxa2, 0, L_CLONE);
        boxGetGeometry(box1, &x, &y, &w, &h);
        xb = L_MAX(0, x - border);
        yb = L_MAX(0, y - border);
        wb = w + 2 * border;
        hb = h + 2 * border;
        box2 = boxCreate(xb, yb, wb, hb);
        pixd = pixClipRectangle(pixs, box2, NULL);
        pixWrite(outfile, pixd, IFF_TIFF_G4);

        pixDestroy(&pixs);
        pixDestroy(&pixt1);
        pixDestroy(&pixd);
        boxaDestroy(&boxa1);
        boxaDestroy(&boxa2);
    }

    return 0;
}
예제 #21
0
/*
 *  cleanProtoSignature()
 *
 *      Input:  instr (input prototype string)
 *      Return: cleanstr (clean prototype string), or NULL on error
 *
 *  Notes:
 *      (1) Adds 'extern' at beginning and regularizes spaces
 *          between tokens.
 */
static char *
cleanProtoSignature(char *instr)
{
char    *str, *cleanstr;
char     buf[L_BUF_SIZE];
char     externstring[] = "extern";
l_int32  i, j, nwords, nchars, index, len;
SARRAY  *sa, *saout;

    PROCNAME("cleanProtoSignature");

    if (!instr)
        return (char *)ERROR_PTR("instr not defined", procName, NULL);

    sa = sarrayCreateWordsFromString(instr);
    nwords = sarrayGetCount(sa);
    saout = sarrayCreate(0);
    sarrayAddString(saout, externstring, 1);
    for (i = 0; i < nwords; i++) {
        str = sarrayGetString(sa, i, 0);
        nchars = strlen(str);
        index = 0;
        for (j = 0; j < nchars; j++) {
            if (index > L_BUF_SIZE - 6)
                return (char *)ERROR_PTR("token too large", procName, NULL);
            if (str[j] == '(') {
                buf[index++] = ' ';
                buf[index++] = '(';
                buf[index++] = ' ';
            }
            else if (str[j] == ')') {
                buf[index++] = ' ';
                buf[index++] = ')';
            }
            else 
                buf[index++] = str[j];
        }
        buf[index] = '\0';
        sarrayAddString(saout, buf, 1);
    }

        /* Flatten to a prototype string with spaces added after
         * each word, and remove the last space */
    cleanstr = sarrayToString(saout, 2);
    len = strlen(cleanstr);
    cleanstr[len - 1] = '\0';

    sarrayDestroy(&sa);
    sarrayDestroy(&saout);
    return cleanstr;
}
예제 #22
0
파일: psio1.c 프로젝트: AbdelghaniDr/mirror
/*
 *  sarrayConvertFilesFittedToPS()
 *
 *      Input:  sarray (of full path names)
 *              xpts, ypts (desired size in printer points; use 0 for default)
 *              fileout (output ps file)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) See convertFilesFittedToPS()
 */
l_int32
sarrayConvertFilesFittedToPS(SARRAY      *sa,
                             l_float32    xpts,
                             l_float32    ypts,
                             const char  *fileout)
{
char    *fname;
l_int32  ret, i, w, h, nfiles, index, firstfile, format, res;

    PROCNAME("sarrayConvertFilesFittedToPS");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!fileout)
        return ERROR_INT("fileout not defined", procName, 1);
    if (xpts <= 0.0) {
        L_INFO("setting xpts to 612.0", procName);
        xpts = 612.0;
    }
    if (ypts <= 0.0) {
        L_INFO("setting ypts to 792.0", procName);
        ypts = 792.0;
    }
    if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
        L_WARNING("xpts,ypts are typically in the range 500-800", procName);

    nfiles = sarrayGetCount(sa);
    firstfile = TRUE;
    for (i = 0, index = 0; i < nfiles; i++) {
        fname = sarrayGetString(sa, i, L_NOCOPY);
        ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
        if (ret) continue;
        if (format == IFF_UNKNOWN)
            continue;

            /* Be sure the entire image is wrapped */
        if (xpts * h < ypts * w)
            res = (l_int32)((l_float32)w * 72.0 / xpts);
        else
            res = (l_int32)((l_float32)h * 72.0 / ypts);

        writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
    }

    return 0;
}
/*!
 * \brief   strcodeCreateFromFile()
 *
 * \param[in]    filein containing filenames of serialized data
 * \param[in]    fileno integer that labels the two output files
 * \param[in]    outdir [optional] if null, files are made in /tmp/lept/auto
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The %filein has one filename on each line.
 *          Comment lines begin with "#".
 *      (2) The output is 2 files:
 *             autogen.\<fileno\>.c
 *             autogen.\<fileno\>.h
 * </pre>
 */
l_int32
strcodeCreateFromFile(const char  *filein,
                      l_int32      fileno,
                      const char  *outdir)
{
char        *fname;
const char  *type;
l_uint8     *data;
size_t       nbytes;
l_int32      i, n, index;
SARRAY      *sa;
L_STRCODE   *strcode;

    PROCNAME("strcodeCreateFromFile");

    if (!filein)
        return ERROR_INT("filein not defined", procName, 1);

    if ((data = l_binaryRead(filein, &nbytes)) == NULL)
        return ERROR_INT("data not read from file", procName, 1);
    sa = sarrayCreateLinesFromString((char *)data, 0);
    LEPT_FREE(data);
    if (!sa)
        return ERROR_INT("sa not made", procName, 1);
    if ((n = sarrayGetCount(sa)) == 0) {
        sarrayDestroy(&sa);
        return ERROR_INT("no filenames in the file", procName, 1);
    }

    strcode = strcodeCreate(fileno);

    for (i = 0; i < n; i++) {
        fname = sarrayGetString(sa, i, L_NOCOPY);
        if (fname[0] == '#') continue;
        if (l_getIndexFromFile(fname, &index)) {
            L_ERROR("File %s has no recognizable type\n", procName, fname);
        } else {
            type = l_assoc[index].type;
            L_INFO("File %s is type %s\n", procName, fname, type);
            strcodeGenerate(strcode, fname, type);
        }
    }
    strcodeFinalize(&strcode, outdir);
    return 0;
}
예제 #24
0
/*!
 * \brief   sarrayRemoveDupsByHash()
 *
 * \param[in]    sas
 * \param[out]   psad unique set of strings; duplicates removed
 * \param[out]   pdahash [optional] dnahash used for lookup
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Generates a sarray with unique values.
 *      (2) The dnahash is built up with sad to assure uniqueness.
 *          It can be used to find if a string is in the set:
 *              sarrayFindValByHash(sad, dahash, str, \&index)
 *      (3) The hash of the string location is simple and fast.  It scales
 *          up with the number of buckets to insure a fairly random
 *          bucket selection input strings.
 *      (4) This is faster than sarrayRemoveDupsByAset(), because the
 *          bucket lookup is O(n), although there is a double-loop
 *          lookup within the dna in each bucket.
 * </pre>
 */
l_int32
sarrayRemoveDupsByHash(SARRAY      *sas,
                       SARRAY     **psad,
                       L_DNAHASH  **pdahash)
{
char       *str;
l_int32     i, n, index, items;
l_uint32    nsize;
l_uint64    key;
SARRAY     *sad;
L_DNAHASH  *dahash;

    PROCNAME("sarrayRemoveDupsByHash");

    if (pdahash) *pdahash = NULL;
    if (!psad)
        return ERROR_INT("&sad not defined", procName, 1);
    *psad = NULL;
    if (!sas)
        return ERROR_INT("sas not defined", procName, 1);

    n = sarrayGetCount(sas);
    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */
    dahash = l_dnaHashCreate(nsize, 8);
    sad = sarrayCreate(n);
    *psad = sad;
    for (i = 0, items = 0; i < n; i++) {
        str = sarrayGetString(sas, i, L_NOCOPY);
        sarrayFindStringByHash(sad, dahash, str, &index);
        if (index < 0) {  /* not found */
            l_hashStringToUint64(str, &key);
            l_dnaHashAdd(dahash, key, (l_float64)items);
            sarrayAddString(sad, str, L_COPY);
            items++;
        }
    }

    if (pdahash)
        *pdahash = dahash;
    else
        l_dnaHashDestroy(&dahash);
    return 0;
}
예제 #25
0
/*!
 *  getSortedPathnamesInDirectory()
 *
 *      Input:  directory name
 *              substr (<optional> substring filter on filenames; can be NULL)
 *              firstpage (0-based)
 *              npages (use 0 for all to the end)
 *      Return: sarray of sorted pathnames, or NULL on error
 *
 *  Notes:
 *      (1) If 'substr' is not NULL, only filenames that contain
 *          the substring can be returned.  If 'substr' is NULL,
 *          none of the filenames are filtered out.
 *      (2) The files in the directory, after optional filtering by
 *          the substring, are lexically sorted in increasing order.
 *          The full pathnames are returned for the requested sequence.
 *          If no files are found after filtering, returns an empty sarray.
 */
SARRAY *
getSortedPathnamesInDirectory(const char  *dirname,
                              const char  *substr,
                              l_int32      firstpage,
                              l_int32      npages)
{
char    *fname, *fullname;
l_int32  i, nfiles, lastpage;
SARRAY  *sa, *safiles, *saout;

    PROCNAME("getSortedPathnamesInDirectory");

    if (!dirname)
        return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);

    if ((sa = getFilenamesInDirectory(dirname)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    safiles = sarraySelectBySubstring(sa, substr);
    sarrayDestroy(&sa);
    nfiles = sarrayGetCount(safiles);
    if (nfiles == 0) {
        L_WARNING("no files found", procName);
        return safiles;
    }

    sarraySort(safiles, safiles, L_SORT_INCREASING);

    firstpage = L_MIN(L_MAX(firstpage, 0), nfiles - 1);
    if (npages == 0)
        npages = nfiles - firstpage;
    lastpage = L_MIN(firstpage + npages - 1, nfiles - 1);

    saout = sarrayCreate(lastpage - firstpage + 1);
    for (i = firstpage; i <= lastpage; i++) {
        fname = sarrayGetString(safiles, i, L_NOCOPY);
        fullname = genPathname(dirname, fname);
        sarrayAddString(saout, fullname, L_INSERT);
    }

    sarrayDestroy(&safiles);
    return saout;
}
예제 #26
0
/*!
 *  sarraySort()
 *
 *      Input:  saout (output sarray; can be NULL or equal to sain)
 *              sain (input sarray)
 *              sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
 *      Return: saout (output sarray, sorted by ascii value), or null on error
 *
 *  Notes:
 *      (1) Set saout = sain for in-place; otherwise, set naout = NULL.
 *      (2) Shell sort, modified from K&R, 2nd edition, p.62.
 *          Slow but simple O(n logn) sort.
 */
SARRAY *
sarraySort(SARRAY  *saout,
           SARRAY  *sain,
           l_int32  sortorder)
{
char   **array;
char    *tmp;
l_int32  n, i, j, gap;

    PROCNAME("sarraySort");

    if (!sain)
        return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);

        /* Make saout if necessary; otherwise do in-place */
    if (!saout)
        saout = sarrayCopy(sain);
    else if (sain != saout)
        return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL);
    array = saout->array;  /* operate directly on the array */
    n = sarrayGetCount(saout);

        /* Shell sort */
    for (gap = n/2; gap > 0; gap = gap / 2) {
        for (i = gap; i < n; i++) {
            for (j = i - gap; j >= 0; j -= gap) {
                if ((sortorder == L_SORT_INCREASING &&
                     stringCompareLexical(array[j], array[j + gap])) ||
                    (sortorder == L_SORT_DECREASING &&
                     stringCompareLexical(array[j + gap], array[j])))
                {
                    tmp = array[j];
                    array[j] = array[j + gap];
                    array[j + gap] = tmp;
                }
            }
        }
    }

    return saout;
}
예제 #27
0
/*
 *  skipToSemicolon()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (index of starting line to search)
 *              charindex (starting char index for search)
 *              &next (index of line containing the next ';')
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) If the semicolon isn't found, returns next = -1.
 *          This shouldn't happen.
 *      (2) This is only used in contexts where the semicolon is
 *          not within a string.
 */
static l_int32
skipToSemicolon(SARRAY   *sa,
                l_int32   start,
                l_int32   charindex,
                l_int32  *pnext)
{
char    *str;
l_int32  i, j, n, jstart, nchars, found;

    PROCNAME("skipToSemicolon");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!pnext)
        return ERROR_INT("&next not defined", procName, 1);

    *pnext = -1;
    n = sarrayGetCount(sa);
    found = FALSE;
    for (i = start; i < n; i++) {
        str = sarrayGetString(sa, i, L_NOCOPY);
        jstart = 0;
        if (i == start)
            jstart = charindex + 1;
        nchars = strlen(str);
        for (j = jstart; j < nchars; j++) {
            if (str[j] == ';') {
                found = TRUE;;
                break;
            }
        }
        if (found) {
            *pnext = i;
            return 0;
        }
    }

    return ERROR_INT("semicolon not found", procName, 1);
}
예제 #28
0
/*!
 *  sarrayConcatenate()
 *
 *      Input:  sa1  (to be added to)
 *              sa2  (append to sa1)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Copies of the strings in sarray2 are added to sarray1.
 */
l_int32
sarrayConcatenate(SARRAY  *sa1,
                  SARRAY  *sa2)
{
char    *str;
l_int32  n, i;

    PROCNAME("sarrayConcatenate");

    if (!sa1)
        return ERROR_INT("sa1 not defined", procName, 1);
    if (!sa2)
        return ERROR_INT("sa2 not defined", procName, 1);

    n = sarrayGetCount(sa2);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa2, i, L_NOCOPY);
        sarrayAddString(sa1, str, L_COPY);
    }

    return 0;
}
예제 #29
0
PIXA *MakeBootnum2(void)
{
char     *fname;
l_int32   i, n, w, h;
BOX      *box;
PIX      *pix;
PIXA     *pixa;
L_RECOG  *recog;
SARRAY   *sa;

        /* Phase 1: generate recog from the digit data */
    recog = recogCreate(20, 32, L_USE_ALL, 120, 1);
    sa = getSortedPathnamesInDirectory("recog/bootnums", "png", 0, 0);
    n = sarrayGetCount(sa);
    for (i = 0; i < n; i++) {
            /* Read each pix: grayscale, multi-character, labelled */
        fname = sarrayGetString(sa, i, L_NOCOPY);
        if ((pix = pixRead(fname)) == NULL) {
            fprintf(stderr, "Can't read %s\n", fname);
            continue;
        }

            /* Convert to a set of 1 bpp, single character, labelled */
        pixGetDimensions(pix, &w, &h, NULL);
        box = boxCreate(0, 0, w, h);
        recogTrainLabelled(recog, pix, box, NULL, 1, 0);
        pixDestroy(&pix);
        boxDestroy(&box);
    }
    recogTrainingFinished(recog, 1);
    sarrayDestroy(&sa);

        /* Phase 2: generate pixa consisting of 1 bpp, single character pix */
    recogWritePixa("/tmp/lept/recog/digits/bootnum2.pa", recog);
    pixa = pixaRead("/tmp/lept/recog/digits/bootnum2.pa");
    recogDestroy(&recog);
    return pixa;
}
예제 #30
0
/*!
 *  gplotGenDataFiles()
 *
 *      Input:  gplot
 *      Return: 0 if OK, 1 on error
 */
l_int32
gplotGenDataFiles(GPLOT *gplot) {
    char *plotdata, *dataname;
    l_int32 i, nplots;
    FILE *fp;

    PROCNAME("gplotGenDataFiles");

    if (!gplot)
        return ERROR_INT("gplot not defined", procName, 1);

    nplots = sarrayGetCount(gplot->datanames);
    for (i = 0; i < nplots; i++) {
        plotdata = sarrayGetString(gplot->plotdata, i, L_NOCOPY);
        dataname = sarrayGetString(gplot->datanames, i, L_NOCOPY);
        if ((fp = fopenWriteStream(dataname, "w")) == NULL)
            return ERROR_INT("datafile stream not opened", procName, 1);
        fwrite(plotdata, 1, strlen(plotdata), fp);
        fclose(fp);
    }

    return 0;
}