예제 #1
0
/*
 *  captureProtoSignature()
 *
 *      Input:  sa (output from cpp, by line)
 *              start (starting index to search; never a comment line)
 *              stop (index of line on which pattern is completed)
 *              charindex (char index of completing ')' character)
 *      Return: cleanstr (prototype string), or NULL on error
 *
 *  Notes:
 *      (1) Return all characters, ending with a ';' after the ')'
 */
static char *
captureProtoSignature(SARRAY  *sa,
                      l_int32  start,
                      l_int32  stop,
                      l_int32  charindex)
{
char    *str, *newstr, *protostr, *cleanstr;
SARRAY  *sap;
l_int32  i;

    PROCNAME("captureProtoSignature");

    if (!sa)
        return (char *)ERROR_PTR("sa not defined", procName, NULL);

    sap = sarrayCreate(0);
    for (i = start; i < stop; i++) {
        str = sarrayGetString(sa, i, L_COPY);
        sarrayAddString(sap, str, L_INSERT);
    }
    str = sarrayGetString(sa, stop, L_COPY);
    str[charindex + 1] = '\0';
    newstr = stringJoin(str, ";");
    sarrayAddString(sap, newstr, L_INSERT);
    LEPT_FREE(str);
    protostr = sarrayToString(sap, 2);
    sarrayDestroy(&sap);
    cleanstr = cleanProtoSignature(protostr);
    LEPT_FREE(protostr);

    return cleanstr;
}
/*
 *  sarraySplitString()
 *
 *      Input:  sa (to append to; typically empty initially)
 *              str (string to split; not changed)
 *              separators (characters that split input string)
 *      Return: 0 if OK, 1 on error.
 *
 *  Notes:
 *      (1) This uses strtokSafe().  See the notes there in utils.c.
 */
l_int32
sarraySplitString(SARRAY      *sa,
                  const char  *str,
                  const char  *separators)
{
char  *cstr, *substr, *saveptr;

    PROCNAME("sarraySplitString");

    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!str)
        return ERROR_INT("str not defined", procName, 1);
    if (!separators)
        return ERROR_INT("separators not defined", procName, 1);

    cstr = stringNew(str);  /* preserves const-ness of input str */
    substr = strtokSafe(cstr, separators, &saveptr);
    if (substr)
        sarrayAddString(sa, substr, L_INSERT);
    while ((substr = strtokSafe(NULL, separators, &saveptr)))
        sarrayAddString(sa, substr, L_INSERT);
    FREE(cstr);

    return 0;
}
예제 #3
0
/*
 *  parseForProtos()
 *
 *      Input:  filein (output of cpp)
 *              prestring (<optional> string that prefaces each decl;
 *                        use NULL to omit)
 *      Return: parsestr (string of function prototypes), or NULL on error
 *
 *  Notes:
 *      (1) We parse the output of cpp:
 *              cpp -ansi <filein> 
 *          Three plans were attempted, with success on the third. 
 *      (2) Plan 1.  A cursory examination of the cpp output indicated that
 *          every function was preceeded by a cpp comment statement.
 *          So we just need to look at statements beginning after comments.
 *          Unfortunately, this is NOT the case.  Some functions start
 *          without cpp comment lines, typically when there are no
 *          comments in the source that immediately precede the function.
 *      (3) Plan 2.  Consider the keywords in the language that start
 *          parts of the cpp file.  Some, like 'typedef', 'enum',
 *          'union' and 'struct', are followed after a while by '{',
 *          and eventually end with '}, plus an optional token and a
 *          final ';'  Others, like 'extern' and 'static', are never
 *          the beginnings of global function definitions.   Function
 *          prototypes have one or more sets of '(' followed eventually
 *          by a ')', and end with ';'.  But function definitions have
 *          tokens, followed by '(', more tokens, ')' and then
 *          immediately a '{'.  We would generate a prototype from this
 *          by adding a ';' to all tokens up to the ')'.  So we use
 *          these special tokens to decide what we are parsing.  And
 *          whenever a function definition is found and the prototype
 *          extracted, we skip through the rest of the function
 *          past the corresponding '}'.  This token ends a line, and
 *          is often on a line of its own.  But as it turns out,
 *          the only keyword we need to consider is 'static'.
 *      (4) Plan 3.  Consider the parentheses and braces for various
 *          declarations.  A struct, enum, or union has a pair of
 *          braces followed by a semicolon.  They cannot have parentheses
 *          before the left brace, but a struct can have lots of parentheses
 *          within the brace set.  A function prototype has no braces.
 *          A function declaration can have sets of left and right
 *          parentheses, but these are followed by a left brace.
 *          So plan 3 looks at the way parentheses and braces are
 *          organized.  Once the beginning of a function definition
 *          is found, the prototype is extracted and we search for
 *          the ending right brace.
 *      (5) To find the ending right brace, it is necessary to do some
 *          careful parsing.  For example, in this file, we have
 *          left and right braces as characters, and these must not
 *          be counted.  Somewhat more tricky, the file fhmtauto.c
 *          generates code, and includes a right brace in a string.
 *          So we must not include braces that are in strings.  But how
 *          do we know if something is inside a string?  Keep state,
 *          starting with not-inside, and every time you hit a double quote
 *          that is not escaped, toggle the condition.  Any brace
 *          found in the state of being within a string is ignored.
 *      (6) When a prototype is extracted, it is put in a canonical
 *          form (i.e., cleaned up).  Finally, we check that it is
 *          not static and save it.  (If static, it is ignored).
 *      (7) The @prestring for unix is NULL; it is included here so that
 *          you can use Microsoft's declaration for importing or
 *          exporting to a dll.  See environ.h for examples of use.
 *          Here, we set: @prestring = "LEPT_DLL ".  Note in particular
 *          the space character that will separate 'LEPT_DLL' from
 *          the standard unix prototype that follows.
 */
char *
parseForProtos(const char *filein,
               const char *prestring)
{
char    *strdata, *str, *newstr, *parsestr, *secondword;
l_int32  nbytes, start, next, stop, charindex, found;
SARRAY  *sa, *saout, *satest;

    PROCNAME("parseForProtos");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read in the cpp output into memory, one string for each
         * line in the file, omitting blank lines.  */
    strdata = (char *)arrayRead(filein, &nbytes);
    sa = sarrayCreateLinesFromString(strdata, 0);

    saout = sarrayCreate(0);
    next = 0;
    while (1) {  /* repeat after each non-static prototype is extracted */
        searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
        if (!found)
            break;
/*        fprintf(stderr, "  start = %d, stop = %d, charindex = %d\n",
                start, stop, charindex); */
        str = captureProtoSignature(sa, start, stop, charindex);

            /* Make sure it is not static.  Note that 'extern' has
             * been prepended to the prototype, so the 'static'
             * keyword, if it exists, would be the second word. */
        satest = sarrayCreateWordsFromString(str);
        secondword = sarrayGetString(satest, 1, 0);
        if (strcmp(secondword, "static")) {  /* not static */
            if (prestring) {  /* prepend it to the prototype */
                newstr = stringJoin(prestring, str);
                sarrayAddString(saout, newstr, L_INSERT);
                FREE(str);
            }
            else
                sarrayAddString(saout, str, L_INSERT);
        }
        else
            FREE(str);
        sarrayDestroy(&satest);

        skipToEndOfFunction(sa, stop, charindex, &next);
        if (next == -1) break;
    }

        /* Flatten into a string with newlines between prototypes */
    parsestr = sarrayToString(saout, 1);
    FREE(strdata);
    sarrayDestroy(&sa);
    sarrayDestroy(&saout);

    return parsestr;
}
/*!
 *  sarrayCreateLinesFromString()
 *
 *      Input:  string
 *              blankflag  (0 to exclude blank lines; 1 to include)
 *      Return: sarray, or null on error
 *
 *  Notes:
 *      (1) This finds the number of line substrings, creates an sarray of
 *          this size, and puts copies of each substring into the sarray.
 */
SARRAY *
sarrayCreateLinesFromString(char    *string,
                            l_int32  blankflag)
{
l_int32  i, nsub, size, startptr;
char    *cstring, *substring;
SARRAY  *sa;

    PROCNAME("sarrayCreateLinesFromString");

    if (!string)
        return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);

        /* find the number of lines */
    size = strlen(string);
    nsub = 0;
    for (i = 0; i < size; i++) {
        if (string[i] == '\n')
            nsub++;
    }

    if ((sa = sarrayCreate(nsub)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);

    if (blankflag) {  /* keep blank lines as null strings */
            /* Make a copy for munging */
        if ((cstring = stringNew(string)) == NULL)
            return (SARRAY *)ERROR_PTR("cstring not made", procName, NULL);
            /* We'll insert nulls like strtok */
        startptr = 0;
        for (i = 0; i < size; i++) {
            if (cstring[i] == '\n') {
                cstring[i] = '\0';
                if ((substring = stringNew(cstring + startptr)) == NULL)
                    return (SARRAY *)ERROR_PTR("substring not made",
                                                procName, NULL);
                sarrayAddString(sa, substring, L_INSERT);
/*                fprintf(stderr, "substring = %s\n", substring); */
                startptr = i + 1;
            }
        }
        if (startptr < size) {  /* no newline at end of last line */
            if ((substring = stringNew(cstring + startptr)) == NULL)
                return (SARRAY *)ERROR_PTR("substring not made",
                                            procName, NULL);
            sarrayAddString(sa, substring, L_INSERT);
/*            fprintf(stderr, "substring = %s\n", substring); */
        }
        FREE(cstring);
    }
    else {  /* remove blank lines; use strtok */
        sarraySplitString(sa, string, "\n");
    }

    return sa;
}
예제 #5
0
/*
 *  cleanProtoSignature()
 *
 *      Input:  instr (input prototype string)
 *      Return: cleanstr (clean prototype string), or NULL on error
 *
 *  Notes:
 *      (1) Adds 'extern' at beginning and regularizes spaces
 *          between tokens.
 */
static char *
cleanProtoSignature(char *instr)
{
char    *str, *cleanstr;
char     buf[L_BUF_SIZE];
char     externstring[] = "extern";
l_int32  i, j, nwords, nchars, index, len;
SARRAY  *sa, *saout;

    PROCNAME("cleanProtoSignature");

    if (!instr)
        return (char *)ERROR_PTR("instr not defined", procName, NULL);

    sa = sarrayCreateWordsFromString(instr);
    nwords = sarrayGetCount(sa);
    saout = sarrayCreate(0);
    sarrayAddString(saout, externstring, 1);
    for (i = 0; i < nwords; i++) {
        str = sarrayGetString(sa, i, 0);
        nchars = strlen(str);
        index = 0;
        for (j = 0; j < nchars; j++) {
            if (index > L_BUF_SIZE - 6)
                return (char *)ERROR_PTR("token too large", procName, NULL);
            if (str[j] == '(') {
                buf[index++] = ' ';
                buf[index++] = '(';
                buf[index++] = ' ';
            }
            else if (str[j] == ')') {
                buf[index++] = ' ';
                buf[index++] = ')';
            }
            else 
                buf[index++] = str[j];
        }
        buf[index] = '\0';
        sarrayAddString(saout, buf, 1);
    }

        /* Flatten to a prototype string with spaces added after
         * each word, and remove the last space */
    cleanstr = sarrayToString(saout, 2);
    len = strlen(cleanstr);
    cleanstr[len - 1] = '\0';

    sarrayDestroy(&sa);
    sarrayDestroy(&saout);
    return cleanstr;
}
/*!
 * \brief   sarraySortByIndex()
 *
 * \param[in]    sain
 * \param[in]    naindex na that maps from the new sarray to the input sarray
 * \return  saout sorted, or NULL on error
 */
SARRAY *
sarraySortByIndex(SARRAY  *sain,
                  NUMA    *naindex)
{
char    *str;
l_int32  i, n, index;
SARRAY  *saout;

    PROCNAME("sarraySortByIndex");

    if (!sain)
        return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
    if (!naindex)
        return (SARRAY *)ERROR_PTR("naindex not defined", procName, NULL);

    n = sarrayGetCount(sain);
    saout = sarrayCreate(n);
    for (i = 0; i < n; i++) {
        numaGetIValue(naindex, i, &index);
        str = sarrayGetString(sain, index, L_COPY);
        sarrayAddString(saout, str, L_INSERT);
    }

    return saout;
}
/*!
 *  sarrayAppendRange()
 *
 *      Input:  sa1  (to be added to)
 *              sa2  (append specified range of strings in sa2 to sa1)
 *              start (index of first string of sa2 to append)
 *              end (index of last string of sa2 to append)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Copies of the strings in sarray2 are added to sarray1.
 *      (2) The [start ... end] range is truncated if necessary.
 */
l_int32
sarrayAppendRange(SARRAY  *sa1,
                  SARRAY  *sa2,
		  l_int32  start,
		  l_int32  end)
{
char    *str;
l_int32  n, i;

    PROCNAME("sarrayAppendRange");

    if (!sa1)
        return ERROR_INT("sa1 not defined", procName, 1);
    if (!sa2)
        return ERROR_INT("sa2 not defined", procName, 1);
    if (start < 0)
        start = 0;
    n = sarrayGetCount(sa2);
    if (end >= n)
        end = n - 1;
    if (start > end)
        return ERROR_INT("start > end", procName, 1);

    for (i = start; i <= end; i++) {
        str = sarrayGetString(sa2, i, L_NOCOPY);
        sarrayAddString(sa1, str, L_COPY);
    }

    return 0;
}
/*!
 * \brief   sarrayRemoveDupsByAset()
 *
 * \param[in]    sas
 * \return  sad with duplicates removed, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is O(nlogn), considerably slower than
 *          sarrayRemoveDupsByHash() for large string arrays.
 *      (2) The key for each string is a 64-bit hash.
 *      (3) Build a set, using hashed strings as keys.  As the set is
 *          built, first do a find; if not found, add the key to the
 *          set and add the string to the output sarray.
 * </pre>
 */
SARRAY *
sarrayRemoveDupsByAset(SARRAY  *sas)
{
char     *str;
l_int32   i, n;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;
SARRAY   *sad;

    PROCNAME("sarrayRemoveDupsByAset");

    if (!sas)
        return (SARRAY *)ERROR_PTR("sas not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    sad = sarrayCreate(0);
    n = sarrayGetCount(sas);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sas, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (!l_asetFind(set, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return sad;
}
예제 #9
0
/*!
 * \brief   recogGetClassIndex()
 *
 * \param[in]    recog with LUT's pre-computed
 * \param[in]    val integer value; can be up to 3 bytes for UTF-8
 * \param[in]    text text from which %val was derived; used if not found
 * \param[out]   pindex index into dna_tochar
 * \return  0 if found; 1 if not found and added; 2 on error.
 *
 * <pre>
 * Notes:
 *      (1) This is used during training.  There is one entry in
 *          recog->dna_tochar (integer value, e.g., ascii) and
 *          one in recog->sa_text (e.g, ascii letter in a string)
 *          for each character class.
 *      (2) This searches the dna character array for %val.  If it is
 *          not found, the template represents a character class not
 *          already seen: it increments setsize (the number of character
 *          classes) by 1, and augments both the index (dna_tochar)
 *          and text (sa_text) arrays.
 *      (3) Returns the index in &index, except on error.
 *      (4) Caller must check the function return value.
 * </pre>
 */
l_int32
recogGetClassIndex(L_RECOG  *recog,
                   l_int32   val,
                   char     *text,
                   l_int32  *pindex)
{
l_int32  i, n, ival;

    PROCNAME("recogGetClassIndex");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 2);
    *pindex = -1;
    if (!recog)
        return ERROR_INT("recog not defined", procName, 2);
    if (!text)
        return ERROR_INT("text not defined", procName, 2);

        /* Search existing characters */
    n = l_dnaGetCount(recog->dna_tochar);
    for (i = 0; i < n; i++) {
        l_dnaGetIValue(recog->dna_tochar, i, &ival);
        if (val == ival) {  /* found */
            *pindex = i;
            return 0;
        }
    }

       /* If not found... */
    l_dnaAddNumber(recog->dna_tochar, val);
    sarrayAddString(recog->sa_text, text, L_COPY);
    recog->setsize++;
    *pindex = n;
    return 1;
}
예제 #10
0
/*!
 *  sarraySelectBySubstring()
 *
 *      Input:  sain (input sarray)
 *              substr (<optional> substring for matching; can be NULL)
 *      Return: saout (output sarray, filtered with substring) or null on error
 *
 *  Notes:
 *      (1) This selects all strings in sain that have substr as a substring.
 *          Note that we can't use strncmp() because we're looking for
 *          a match to the substring anywhere within each filename.
 *      (2) If substr == NULL, returns a copy of the sarray.
 */
SARRAY *
sarraySelectBySubstring(SARRAY      *sain,
                        const char  *substr)
{
char    *str;
l_int32  n, i, offset, found;
SARRAY  *saout;

    PROCNAME("sarraySelectBySubstring");

    if (!sain)
        return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);

    n = sarrayGetCount(sain);
    if (!substr || n == 0)
        return sarrayCopy(sain);

    saout = sarrayCreate(n);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sain, i, L_NOCOPY);
        arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
                          strlen(substr), &offset, &found);
        if (found)
            sarrayAddString(saout, str, L_COPY);
    }

    return saout;
}
예제 #11
0
파일: hashtest.c 프로젝트: stweil/leptonica
/* Build all possible strings, up to a max of 5 roman alphabet characters */
static SARRAY *
BuildShortStrings(l_int32  nchars,  /* 3, 4 or 5 */
                  l_int32  add_dups)
{
    char      buf[64];
    l_int32   i, j, k, l, m;
    l_uint64  hash;
    SARRAY   *sa;

    sa = sarrayCreate(1000);
    for (i = 0; i < 26; i++) {
        sprintf(buf, "%c", i + 0x61);
        sarrayAddString(sa, buf, L_COPY);
        for (j = 0; j < 26; j++) {
            sprintf(buf, "%c%c", i + 0x61, j + 0x61);
            sarrayAddString(sa, buf, L_COPY);
            for (k = 0; k < 26; k++) {
                sprintf(buf, "%c%c%c", i + 0x61, j + 0x61, k + 0x61);
                sarrayAddString(sa, buf, L_COPY);
                if (add_dups && k < 4)  /* add redundant strings */
                    sarrayAddString(sa, buf, L_COPY);
                if (nchars > 3) {
                    for (l = 0; l < 26; l++) {
                        sprintf(buf, "%c%c%c%c", i + 0x61, j + 0x61,
                                k + 0x61, l + 0x61);
                        sarrayAddString(sa, buf, L_COPY);
                        if (add_dups && l < 4)  /* add redundant strings */
                            sarrayAddString(sa, buf, L_COPY);
                        if (nchars > 4) {
                            for (m = 0; m < 26; m++) {
                                sprintf(buf, "%c%c%c%c%c", i + 0x61, j + 0x61,
                                        k + 0x61, l + 0x61, m + 0x61);
                                sarrayAddString(sa, buf, L_COPY);
                                if (!add_dups && i == 17 && j == 12 &&
                                        k == 4 && l == 21) {
                                    l_hashStringToUint64(buf, &hash);
                                    fprintf(stderr, "  %llx\n", hash);
                                }
                                if (add_dups && m < 4)  /* add redundant */
                                    sarrayAddString(sa, buf, L_COPY);
                            }
                        }
                    }
                }
            }
        }
    }

    return sa;
}
/*!
 * \brief   strcodeGenerate()
 *
 * \param[in]    strcode for accumulating data
 * \param[in]    filein input file with serialized data
 * \param[in]    type of data; use the typedef string
 * \return  0 if OK, 1 on error.
 *
 * <pre>
 * Notes:
 *      (1) The generated function name is
 *            l_autodecode_\<fileno\>()
 *          where \<fileno\> is the index label for the pair of output files.
 *      (2) To deserialize this data, the function is called with the
 *          argument 'ifunc', which increments each time strcodeGenerate()
 *          is called.
 * </pre>
 */
l_int32
strcodeGenerate(L_STRCODE   *strcode,
                const char  *filein,
                const char  *type)
{
char    *strdata, *strfunc, *strdescr;
l_int32  itype;

    PROCNAME("strcodeGenerate");

    if (!strcode)
        return ERROR_INT("strcode not defined", procName, 1);
    if (!filein)
        return ERROR_INT("filein not defined", procName, 1);
    if (!type)
        return ERROR_INT("type not defined", procName, 1);

        /* Get the index corresponding to type and validate */
    if (l_getIndexFromType(type, &itype) == 1)
        return ERROR_INT("data type unknown", procName, 1);

        /* Generate the encoded data string */
    if ((strdata = l_genDataString(filein, strcode->ifunc)) == NULL)
        return ERROR_INT("strdata not made", procName, 1);
    sarrayAddString(strcode->data, strdata, L_INSERT);

        /* Generate the case data for the decoding function */
    strfunc = l_genCaseString(strcode->ifunc, itype);
    sarrayAddString(strcode->function, strfunc, L_INSERT);

        /* Generate row of table for function type selection */
    strdescr = l_genDescrString(filein, strcode->ifunc, itype);
    sarrayAddString(strcode->descr, strdescr, L_INSERT);

    strcode->n++;
    strcode->ifunc++;
    return 0;
}
/*!
 * \brief   l_genDataString()
 *
 * \param[in]    filein input file of serialized data
 * \param[in]    ifunc index into set of functions in output file
 * \return  encoded ascii data string, or NULL on error reading from file
 */
static char *
l_genDataString(const char  *filein,
                l_int32      ifunc)
{
char      buf[80];
char     *cdata1, *cdata2, *cdata3;
l_uint8  *data1, *data2;
l_int32   csize1, csize2;
size_t    size1, size2;
SARRAY   *sa;

    PROCNAME("l_genDataString");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read it in, gzip it, encode, and reformat.  We gzip because some
         * serialized data has a significant amount of ascii content. */
    if ((data1 = l_binaryRead(filein, &size1)) == NULL)
        return (char *)ERROR_PTR("bindata not returned", procName, NULL);
    data2 = zlibCompress(data1, size1, &size2);
    cdata1 = encodeBase64(data2, size2, &csize1);
    cdata2 = reformatPacked64(cdata1, csize1, 4, 72, 1, &csize2);
    LEPT_FREE(data1);
    LEPT_FREE(data2);
    LEPT_FREE(cdata1);

        /* Prepend the string declaration signature and put it together */
    sa = sarrayCreate(3);
    snprintf(buf, sizeof(buf), "static const char *l_strdata_%d =\n", ifunc);
    sarrayAddString(sa, buf, L_COPY);
    sarrayAddString(sa, cdata2, L_INSERT);
    sarrayAddString(sa, (char *)";\n", L_COPY);
    cdata3 = sarrayToString(sa, 0);
    sarrayDestroy(&sa);
    return cdata3;
}
예제 #14
0
/*!
 * \brief   sarrayIntersectionByHash()
 *
 * \param[in]    sa1, sa2
 * \return  sad intersection of the strings, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is faster than sarrayIntersectionByAset(), because the
 *          bucket lookup is O(n).
 * </pre>
 */
SARRAY *
sarrayIntersectionByHash(SARRAY  *sa1,
                         SARRAY  *sa2)
{
char       *str;
l_int32     n1, n2, nsmall, i, index1, index2;
l_uint32    nsize2;
l_uint64    key;
L_DNAHASH  *dahash1, *dahash2;
SARRAY     *sa_small, *sa_big, *sad;

    PROCNAME("sarrayIntersectionByHash");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Put the elements of the biggest sarray into a dnahash */
    n1 = sarrayGetCount(sa1);
    n2 = sarrayGetCount(sa2);
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
    dahash1 = l_dnaHashCreateFromSarray(sa_big);

        /* Build up the intersection of strings.  Add to %sad
         * if the string is in sa_big (using dahash1) but hasn't
         * yet been seen in the traversal of sa_small (using dahash2). */
    sad = sarrayCreate(0);
    nsmall = sarrayGetCount(sa_small);
    findNextLargerPrime(nsmall / 20, &nsize2);  /* buckets in hash table */
    dahash2 = l_dnaHashCreate(nsize2, 0);
    for (i = 0; i < nsmall; i++) {
        str = sarrayGetString(sa_small, i, L_NOCOPY);
        sarrayFindStringByHash(sa_big, dahash1, str, &index1);
        if (index1 >= 0) {
            sarrayFindStringByHash(sa_small, dahash2, str, &index2);
            if (index2 == -1) {
                sarrayAddString(sad, str, L_COPY);
                l_hashStringToUint64(str, &key);
                l_dnaHashAdd(dahash2, key, (l_float64)i);
            }
        }
    }

    l_dnaHashDestroy(&dahash1);
    l_dnaHashDestroy(&dahash2);
    return sad;
}
예제 #15
0
/*!
 *  sarrayReadStream()
 *
 *      Input:  stream
 *      Return: sarray, or null on error
 *
 *  Notes:
 *      (1) We store the size of each string along with the string.
 *      (2) This allows a string to have embedded newlines.  By reading
 *          the entire string, as determined by its size, we are
 *          not affected by any number of embedded newlines.
 */
SARRAY *
sarrayReadStream(FILE  *fp)
{
char    *stringbuf;
l_int32  i, n, size, index, bufsize, ret, version;
SARRAY  *sa;

    PROCNAME("sarrayReadStream");

    if (!fp)
        return (SARRAY *)ERROR_PTR("stream not defined", procName, NULL);

    ret = fscanf(fp, "\nSarray Version %d\n", &version);
    if (ret != 1)
        return (SARRAY *)ERROR_PTR("not an sarray file", procName, NULL);
    if (version != SARRAY_VERSION_NUMBER)
        return (SARRAY *)ERROR_PTR("invalid sarray version", procName, NULL);
    fscanf(fp, "Number of strings = %d\n", &n);

    if ((sa = sarrayCreate(n)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    bufsize = L_BUF_SIZE + 1;
    if ((stringbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL)
        return (SARRAY *)ERROR_PTR("stringbuf not made", procName, NULL);

    for (i = 0; i < n; i++) {
	    /* Get the size of the stored string */
        fscanf(fp, "%d[%d]:", &index, &size);
	    /* Expand the string buffer if necessary */
	if (size > bufsize - 5) {
            FREE(stringbuf);
	    bufsize = (l_int32)(1.5 * size);
            stringbuf = (char *)CALLOC(bufsize, sizeof(char));
	}
	    /* Read the stored string, plus leading spaces and trailing \n */
	fread(stringbuf, 1, size + 3, fp);
	    /* Remove the \n that was added by sarrayWriteStream() */
	stringbuf[size + 2] = '\0';
	    /* Copy it in, skipping the 2 leading spaces */
        sarrayAddString(sa, stringbuf + 2, L_COPY);
    }
    fscanf(fp, "\n");

    FREE(stringbuf);
    return sa;
}
예제 #16
0
/*----------------------------------------------------------------------*
 *                      Miscellaneous operations                        *
 *----------------------------------------------------------------------*/
SARRAY *
sarrayGenerateIntegers(l_int32  n)
{
char     buf[32];
l_int32  i;
SARRAY  *sa;

    PROCNAME("sarrayGenerateIntegers");

    if ((sa = sarrayCreate(n)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    for (i = 0; i < n; i++) {
        snprintf(buf, sizeof(buf), "%d", i);
        sarrayAddString(sa, buf, L_COPY);
    }
    return sa;
}
예제 #17
0
파일: numabasic.c 프로젝트: AAAyag/tess-two
/*!
 *  numaConvertToSarray()
 *
 *      Input:  na
 *              size1 (size of conversion field)
 *              size2 (for float conversion: size of field to the right
 *                     of the decimal point)
 *              addzeros (for integer conversion: to add lead zeros)
 *              type (L_INTEGER_VALUE, L_FLOAT_VALUE)
 *      Return: a sarray of the float values converted to strings
 *              representing either integer or float values; or null on error.
 *
 *  Notes:
 *      (1) For integer conversion, size2 is ignored.
 *          For float conversion, addzeroes is ignored.
 */
SARRAY *
numaConvertToSarray(NUMA    *na,
                    l_int32  size1,
                    l_int32  size2,
                    l_int32  addzeros,
                    l_int32  type)
{
char       fmt[32], strbuf[64];
l_int32    i, n, ival;
l_float32  fval;
SARRAY    *sa;

    PROCNAME("numaConvertToSarray");

    if (!na)
        return (SARRAY *)ERROR_PTR("na not defined", procName, NULL);
    if (type != L_INTEGER_VALUE && type != L_FLOAT_VALUE)
        return (SARRAY *)ERROR_PTR("invalid type", procName, NULL);

    if (type == L_INTEGER_VALUE) {
        if (addzeros)
            snprintf(fmt, sizeof(fmt), "%%0%dd", size1);
        else
            snprintf(fmt, sizeof(fmt), "%%%dd", size1);
    } else {  /* L_FLOAT_VALUE */
        snprintf(fmt, sizeof(fmt), "%%%d.%df", size1, size2);
    }

    n = numaGetCount(na);
    if ((sa = sarrayCreate(n)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);

    for (i = 0; i < n; i++) {
        if (type == L_INTEGER_VALUE) {
            numaGetIValue(na, i, &ival);
            snprintf(strbuf, sizeof(strbuf), fmt, ival);
        } else {  /* L_FLOAT_VALUE */
            numaGetFValue(na, i, &fval);
            snprintf(strbuf, sizeof(strbuf), fmt, fval);
        }
        sarrayAddString(sa, strbuf, L_COPY);
    }

    return sa;
}
예제 #18
0
/*!
 * \brief   sarrayRemoveDupsByHash()
 *
 * \param[in]    sas
 * \param[out]   psad unique set of strings; duplicates removed
 * \param[out]   pdahash [optional] dnahash used for lookup
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Generates a sarray with unique values.
 *      (2) The dnahash is built up with sad to assure uniqueness.
 *          It can be used to find if a string is in the set:
 *              sarrayFindValByHash(sad, dahash, str, \&index)
 *      (3) The hash of the string location is simple and fast.  It scales
 *          up with the number of buckets to insure a fairly random
 *          bucket selection input strings.
 *      (4) This is faster than sarrayRemoveDupsByAset(), because the
 *          bucket lookup is O(n), although there is a double-loop
 *          lookup within the dna in each bucket.
 * </pre>
 */
l_int32
sarrayRemoveDupsByHash(SARRAY      *sas,
                       SARRAY     **psad,
                       L_DNAHASH  **pdahash)
{
char       *str;
l_int32     i, n, index, items;
l_uint32    nsize;
l_uint64    key;
SARRAY     *sad;
L_DNAHASH  *dahash;

    PROCNAME("sarrayRemoveDupsByHash");

    if (pdahash) *pdahash = NULL;
    if (!psad)
        return ERROR_INT("&sad not defined", procName, 1);
    *psad = NULL;
    if (!sas)
        return ERROR_INT("sas not defined", procName, 1);

    n = sarrayGetCount(sas);
    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */
    dahash = l_dnaHashCreate(nsize, 8);
    sad = sarrayCreate(n);
    *psad = sad;
    for (i = 0, items = 0; i < n; i++) {
        str = sarrayGetString(sas, i, L_NOCOPY);
        sarrayFindStringByHash(sad, dahash, str, &index);
        if (index < 0) {  /* not found */
            l_hashStringToUint64(str, &key);
            l_dnaHashAdd(dahash, key, (l_float64)items);
            sarrayAddString(sad, str, L_COPY);
            items++;
        }
    }

    if (pdahash)
        *pdahash = dahash;
    else
        l_dnaHashDestroy(&dahash);
    return 0;
}
예제 #19
0
/*!
 *  sarrayCopy()
 *
 *      Input:  sarray
 *      Return: copy of sarray, or null on error
 */
SARRAY *
sarrayCopy(SARRAY  *sa)
{
l_int32  i;
SARRAY  *csa;

    PROCNAME("sarrayCopy");

    if (!sa)
        return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);

    if ((csa = sarrayCreate(sa->nalloc)) == NULL)
        return (SARRAY *)ERROR_PTR("csa not made", procName, NULL);

    for (i = 0; i < sa->n; i++)
        sarrayAddString(csa, sa->array[i], L_COPY);

    return csa;
}
예제 #20
0
/*!
 * \brief   sarrayIntersectionByAset()
 *
 * \param[in]    sa1, sa2
 * \return  sad with the intersection of the string set, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Algorithm: put the smaller sarray into a set, using the string
 *          hashes as the key values.  Then run through the larger sarray,
 *          building an output sarray and a second set from the strings
 *          in the larger array: if a string is in the first set but
 *          not in the second, add the string to the output sarray and hash
 *          it into the second set.  The second set is required to make
 *          sure only one instance of each string is put into the output sarray.
 *          This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
 * </pre>
 */
SARRAY *
sarrayIntersectionByAset(SARRAY  *sa1,
                         SARRAY  *sa2)
{
char     *str;
l_int32   n1, n2, i, n;
l_uint64  hash;
L_ASET   *set1, *set2;
RB_TYPE   key;
SARRAY   *sa_small, *sa_big, *sad;

    PROCNAME("sarrayIntersectionByAset");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a set */
    n1 = sarrayGetCount(sa1);
    n2 = sarrayGetCount(sa2);
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
    set1 = l_asetCreateFromSarray(sa_big);

        /* Build up the intersection of strings */
    sad = sarrayCreate(0);
    n = sarrayGetCount(sa_small);
    set2 = l_asetCreate(L_UINT_TYPE);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa_small, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return sad;
}
예제 #21
0
/*!
 *  getSortedPathnamesInDirectory()
 *
 *      Input:  directory name
 *              substr (<optional> substring filter on filenames; can be NULL)
 *              firstpage (0-based)
 *              npages (use 0 for all to the end)
 *      Return: sarray of sorted pathnames, or NULL on error
 *
 *  Notes:
 *      (1) If 'substr' is not NULL, only filenames that contain
 *          the substring can be returned.  If 'substr' is NULL,
 *          none of the filenames are filtered out.
 *      (2) The files in the directory, after optional filtering by
 *          the substring, are lexically sorted in increasing order.
 *          The full pathnames are returned for the requested sequence.
 *          If no files are found after filtering, returns an empty sarray.
 */
SARRAY *
getSortedPathnamesInDirectory(const char  *dirname,
                              const char  *substr,
                              l_int32      firstpage,
                              l_int32      npages)
{
char    *fname, *fullname;
l_int32  i, nfiles, lastpage;
SARRAY  *sa, *safiles, *saout;

    PROCNAME("getSortedPathnamesInDirectory");

    if (!dirname)
        return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);

    if ((sa = getFilenamesInDirectory(dirname)) == NULL)
        return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
    safiles = sarraySelectBySubstring(sa, substr);
    sarrayDestroy(&sa);
    nfiles = sarrayGetCount(safiles);
    if (nfiles == 0) {
        L_WARNING("no files found", procName);
        return safiles;
    }

    sarraySort(safiles, safiles, L_SORT_INCREASING);

    firstpage = L_MIN(L_MAX(firstpage, 0), nfiles - 1);
    if (npages == 0)
        npages = nfiles - firstpage;
    lastpage = L_MIN(firstpage + npages - 1, nfiles - 1);

    saout = sarrayCreate(lastpage - firstpage + 1);
    for (i = firstpage; i <= lastpage; i++) {
        fname = sarrayGetString(safiles, i, L_NOCOPY);
        fullname = genPathname(dirname, fname);
        sarrayAddString(saout, fullname, L_INSERT);
    }

    sarrayDestroy(&safiles);
    return saout;
}
예제 #22
0
SARRAY *
getFilenamesInDirectory(const char  *dirname)
{
char           *name;
l_int32         len;
SARRAY         *safiles;
DIR            *pdir;
struct dirent  *pdirentry;

    PROCNAME("getFilenamesInDirectory");

    if (!dirname)
        return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);

    if ((safiles = sarrayCreate(0)) == NULL)
        return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL);
    if ((pdir = opendir(dirname)) == NULL)
        return (SARRAY *)ERROR_PTR("pdir not opened", procName, NULL);
    while ((pdirentry = readdir(pdir)))  {

        /* It's nice to ignore directories.  For this it is necessary to
         * define _BSD_SOURCE in the CC command, because the DT_DIR
         * flag is non-standard.  */ 
#if !defined(__MINGW32__) && !defined(_CYGWIN_ENVIRON) && !defined(__SOLARIS__)
        if (pdirentry->d_type == DT_DIR)
            continue;
#endif

            /* Filter out "." and ".." if they're passed through */
        name = pdirentry->d_name;
        len = strlen(name);
        if (len == 1 && name[len - 1] == '.') continue;
        if (len == 2 && name[len - 1] == '.' && name[len - 2] == '.') continue;
        sarrayAddString(safiles, name, L_COPY);
    }
    closedir(pdir);

    return safiles;
}
예제 #23
0
SARRAY *
getFilenamesInDirectory(const char  *dirname)
{
SARRAY           *safiles;
WIN32_FIND_DATAA  ffd;
size_t            length_of_path;
CHAR              szDir[MAX_PATH];  /* MAX_PATH is defined in stdlib.h */
HANDLE            hFind = INVALID_HANDLE_VALUE;

    PROCNAME("getFilenamesInDirectory");

    if (!dirname)
        return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);

    length_of_path = strlen(dirname);
    if (length_of_path > (MAX_PATH - 2))
        return (SARRAY *)ERROR_PTR("dirname is to long", procName, NULL);

    strncpy(szDir, dirname, MAX_PATH);
    szDir[MAX_PATH - 1] = '\0';
    strncat(szDir, TEXT("\\*"), MAX_PATH - strlen(szDir));

    if ((safiles = sarrayCreate(0)) == NULL)
        return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL);
    hFind = FindFirstFileA(szDir, &ffd);
    if (INVALID_HANDLE_VALUE == hFind) {
        sarrayDestroy(&safiles);
        return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL);
    }

    while (FindNextFileA(hFind, &ffd) != 0) {
        if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)  /* skip dirs */
            continue;
        sarrayAddString(safiles, ffd.cFileName, L_COPY);
    }

    FindClose(hFind);
    return safiles;
}
예제 #24
0
/*!
 *  sarrayConcatenate()
 *
 *      Input:  sa1  (to be added to)
 *              sa2  (append to sa1)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Copies of the strings in sarray2 are added to sarray1.
 */
l_int32
sarrayConcatenate(SARRAY  *sa1,
                  SARRAY  *sa2)
{
char    *str;
l_int32  n, i;

    PROCNAME("sarrayConcatenate");

    if (!sa1)
        return ERROR_INT("sa1 not defined", procName, 1);
    if (!sa2)
        return ERROR_INT("sa2 not defined", procName, 1);

    n = sarrayGetCount(sa2);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa2, i, L_NOCOPY);
        sarrayAddString(sa1, str, L_COPY);
    }

    return 0;
}
예제 #25
0
int main(int    argc,
         char **argv)
{
char        *filein, *str, *prestring, *outprotos, *protostr;
const char  *spacestr = " ";
char         buf[L_BUF_SIZE];
l_uint8     *allheaders;
l_int32      i, maxindex, in_line, nflags, protos_added, firstfile, len, ret;
size_t       nbytes;
L_BYTEA     *ba, *ba2;
SARRAY      *sa, *safirst;
static char  mainName[] = "xtractprotos";

    if (argc == 1) {
        fprintf(stderr,
                "xtractprotos [-prestring=<string>] [-protos=<where>] "
                "[list of C files]\n"
                "where the prestring is prepended to each prototype, and \n"
                "protos can be either 'inline' or the name of an output "
                "prototype file\n");
        return 1;
    }

    /* ---------------------------------------------------------------- */
    /* Parse input flags and find prestring and outprotos, if requested */
    /* ---------------------------------------------------------------- */
    prestring = outprotos = NULL;
    in_line = FALSE;
    nflags = 0;
    maxindex = L_MIN(3, argc);
    for (i = 1; i < maxindex; i++) {
        if (argv[i][0] == '-') {
            if (!strncmp(argv[i], "-prestring", 10)) {
                nflags++;
                ret = sscanf(argv[i] + 1, "prestring=%s", buf);
                if (ret != 1) {
                    fprintf(stderr, "parse failure for prestring\n");
                    return 1;
                }
                if ((len = strlen(buf)) > L_BUF_SIZE - 3) {
                    L_WARNING("prestring too large; omitting!\n", mainName);
                } else {
                    buf[len] = ' ';
                    buf[len + 1] = '\0';
                    prestring = stringNew(buf);
                }
            } else if (!strncmp(argv[i], "-protos", 7)) {
                nflags++;
                ret = sscanf(argv[i] + 1, "protos=%s", buf);
                if (ret != 1) {
                    fprintf(stderr, "parse failure for protos\n");
                    return 1;
                }
                outprotos = stringNew(buf);
                if (!strncmp(outprotos, "inline", 7))
                    in_line = TRUE;
            }
        }
    }

    if (argc - nflags < 2) {
        fprintf(stderr, "no files specified!\n");
        return 1;
    }


    /* ---------------------------------------------------------------- */
    /*                   Generate the prototype string                  */
    /* ---------------------------------------------------------------- */
    ba = l_byteaCreate(500);

        /* First the extern C head */
    sa = sarrayCreate(0);
    sarrayAddString(sa, (char *)"/*", 1);
    snprintf(buf, L_BUF_SIZE,
             " *  These prototypes were autogen'd by xtractprotos, v. %s",
             version);
    sarrayAddString(sa, buf, 1);
    sarrayAddString(sa, (char *)" */", 1);
    sarrayAddString(sa, (char *)"#ifdef __cplusplus", 1);
    sarrayAddString(sa, (char *)"extern \"C\" {", 1);
    sarrayAddString(sa, (char *)"#endif  /* __cplusplus */\n", 1);
    str = sarrayToString(sa, 1);
    l_byteaAppendString(ba, str);
    lept_free(str);
    sarrayDestroy(&sa);

        /* Then the prototypes */
    firstfile = 1 + nflags;
    protos_added = FALSE;
    for (i = firstfile; i < argc; i++) {
        filein = argv[i];
	len = strlen(filein);
	if (filein[len - 1] == 'h')  /* skip .h files */
	    continue;
	snprintf(buf, L_BUF_SIZE, "cpp -ansi -DNO_PROTOS %s %s",
	         filein, tempfile);
	ret = system(buf);
	if (ret) {
            fprintf(stderr, "cpp failure for %s; continuing\n", filein);
	    continue;
	}

	if ((str = parseForProtos(tempfile, prestring)) == NULL) {
            fprintf(stderr, "parse failure for %s; continuing\n", filein);
	    continue;
	}
	if (strlen(str) > 1) {  /* strlen(str) == 1 is a file without protos */
            l_byteaAppendString(ba, str);
            protos_added = TRUE;
        }
        lept_free(str);
    }

        /* Lastly the extern C tail */
    sa = sarrayCreate(0);
    sarrayAddString(sa, (char *)"\n#ifdef __cplusplus", 1);
    sarrayAddString(sa, (char *)"}", 1);
    sarrayAddString(sa, (char *)"#endif  /* __cplusplus */", 1);
    str = sarrayToString(sa, 1);
    l_byteaAppendString(ba, str);
    lept_free(str);
    sarrayDestroy(&sa);

    protostr = (char *)l_byteaCopyData(ba, &nbytes);
    l_byteaDestroy(&ba);


    /* ---------------------------------------------------------------- */
    /*                       Generate the output                        */
    /* ---------------------------------------------------------------- */
    if (!outprotos) {  /* just write to stdout */
        fprintf(stderr, "%s\n", protostr);
        lept_free(protostr);
        return 0;
    }

        /* If no protos were found, do nothing further */
    if (!protos_added) {
        fprintf(stderr, "No protos found\n");
        lept_free(protostr);
        return 1;
    }

        /* Make the output files */
    ba = l_byteaInitFromFile("allheaders_top.txt");
    if (!in_line) {
        snprintf(buf, sizeof(buf), "#include \"%s\"\n", outprotos);
        l_byteaAppendString(ba, buf);
        l_binaryWrite(outprotos, "w", protostr, nbytes);
    } else {
        l_byteaAppendString(ba, protostr);
    }
    ba2 = l_byteaInitFromFile("allheaders_bot.txt");
    l_byteaJoin(ba, &ba2);
    l_byteaWrite("allheaders.h", ba, 0, 0);
    l_byteaDestroy(&ba);
    lept_free(protostr);
    return 0;
}
예제 #26
0
/*!
 *  gplotGenCommandFile()
 *
 *      Input:  gplot
 *      Return: 0 if OK, 1 on error
 */
l_int32
gplotGenCommandFile(GPLOT  *gplot)
{
char     buf[L_BUF_SIZE];
char    *cmdstr, *plottitle, *dataname;
l_int32  i, plotstyle, nplots;
FILE    *fp;

    PROCNAME("gplotGenCommandFile");

    if (!gplot)
        return ERROR_INT("gplot not defined", procName, 1);

        /* Remove any previous command data */
    sarrayClear(gplot->cmddata);

        /* Generate command data instructions */
    if (gplot->title) {   /* set title */
        snprintf(buf, L_BUF_SIZE, "set title '%s'", gplot->title);
        sarrayAddString(gplot->cmddata, buf, L_COPY);
    }
    if (gplot->xlabel) {   /* set xlabel */
        snprintf(buf, L_BUF_SIZE, "set xlabel '%s'", gplot->xlabel);
        sarrayAddString(gplot->cmddata, buf, L_COPY);
    }
    if (gplot->ylabel) {   /* set ylabel */
        snprintf(buf, L_BUF_SIZE, "set ylabel '%s'", gplot->ylabel);
        sarrayAddString(gplot->cmddata, buf, L_COPY);
    }

    if (gplot->outformat == GPLOT_PNG)    /* set terminal type and output */
        snprintf(buf, L_BUF_SIZE, "set terminal png; set output '%s'",
                 gplot->outname);
    else if (gplot->outformat == GPLOT_PS)
        snprintf(buf, L_BUF_SIZE, "set terminal postscript; set output '%s'",
                 gplot->outname);
    else if (gplot->outformat == GPLOT_EPS)
        snprintf(buf, L_BUF_SIZE,
                "set terminal postscript eps; set output '%s'",
                gplot->outname);
    else if (gplot->outformat == GPLOT_LATEX)
        snprintf(buf, L_BUF_SIZE, "set terminal latex; set output '%s'",
                 gplot->outname);
    else  /* gplot->outformat == GPLOT_X11 */
#ifndef _WIN32
        snprintf(buf, L_BUF_SIZE, "set terminal x11");
#else
        snprintf(buf, L_BUF_SIZE, "set terminal windows");
#endif  /* _WIN32 */
    sarrayAddString(gplot->cmddata, buf, L_COPY);

    if (gplot->scaling == GPLOT_LOG_SCALE_X ||
        gplot->scaling == GPLOT_LOG_SCALE_X_Y) {
        snprintf(buf, L_BUF_SIZE, "set logscale x");
        sarrayAddString(gplot->cmddata, buf, L_COPY);
    }
    if (gplot->scaling == GPLOT_LOG_SCALE_Y ||
        gplot->scaling == GPLOT_LOG_SCALE_X_Y) {
        snprintf(buf, L_BUF_SIZE, "set logscale y");
        sarrayAddString(gplot->cmddata, buf, L_COPY);
    }

    nplots = sarrayGetCount(gplot->datanames);
    for (i = 0; i < nplots; i++) {
        plottitle = sarrayGetString(gplot->plottitles, i, L_NOCOPY);
        dataname = sarrayGetString(gplot->datanames, i, L_NOCOPY);
        numaGetIValue(gplot->plotstyles, i, &plotstyle);
        if (nplots == 1) {
            snprintf(buf, L_BUF_SIZE, "plot '%s' title '%s' %s",
                     dataname, plottitle, gplotstylenames[plotstyle]);
        } else {
            if (i == 0)
                snprintf(buf, L_BUF_SIZE, "plot '%s' title '%s' %s, \\",
                     dataname, plottitle, gplotstylenames[plotstyle]);
            else if (i < nplots - 1)
                snprintf(buf, L_BUF_SIZE, " '%s' title '%s' %s, \\",
                     dataname, plottitle, gplotstylenames[plotstyle]);
            else
                snprintf(buf, L_BUF_SIZE, " '%s' title '%s' %s",
                     dataname, plottitle, gplotstylenames[plotstyle]);
        }
        sarrayAddString(gplot->cmddata, buf, L_COPY);
    }

        /* Write command data to file */
    cmdstr = sarrayToString(gplot->cmddata, 1);
    if ((fp = fopenWriteStream(gplot->cmdname, "w")) == NULL)
        return ERROR_INT("cmd stream not opened", procName, 1);
    fwrite(cmdstr, 1, strlen(cmdstr), fp);
    fclose(fp);
    FREE(cmdstr);
    return 0;
}
예제 #27
0
/*!
 *  gplotAddPlot()
 *
 *      Input:  gplot
 *              nax (<optional> numa: set to null for Y_VS_I;
 *                   required for Y_VS_X)
 *              nay (numa: required for both Y_VS_I and Y_VS_X)
 *              plotstyle (GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES,
 *                         GPLOT_LINESPOINTS, GPLOT_DOTS)
 *              plottitle  (<optional> title for individual plot)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) There are 2 options for (x,y) values:
 *            o  To plot an array vs a linear function of the
 *               index, set nax = NULL.
 *            o  To plot one array vs another, use both nax and nay.
 *      (2) If nax is NULL, the x value corresponding to the i-th
 *          value of nay is found from the startx and delx fields
 *          in nay:
 *               x = startx + i * delx
 *          These are set with numaSetParameters().  Their default
 *          values are startx = 0.0, delx = 1.0.
 *      (3) If nax is defined, it must be the same size as nay.
 *      (4) The 'plottitle' string can have spaces, double
 *          quotes and backquotes, but not single quotes.
 */
l_int32
gplotAddPlot(GPLOT       *gplot,
             NUMA        *nax,
             NUMA        *nay,
             l_int32      plotstyle,
             const char  *plottitle)
{
char       buf[L_BUF_SIZE];
char       emptystring[] = "";
char      *datastr, *title;
l_int32    n, i;
l_float32  valx, valy, startx, delx;
SARRAY    *sa;

    PROCNAME("gplotAddPlot");

    if (!gplot)
        return ERROR_INT("gplot not defined", procName, 1);
    if (!nay)
        return ERROR_INT("nay not defined", procName, 1);
    if (plotstyle != GPLOT_LINES && plotstyle != GPLOT_POINTS &&
        plotstyle != GPLOT_IMPULSES && plotstyle != GPLOT_LINESPOINTS &&
        plotstyle != GPLOT_DOTS)
        return ERROR_INT("invalid plotstyle", procName, 1);

    n = numaGetCount(nay);
    numaGetParameters(nay, &startx, &delx);
    if (nax) {
        if (n != numaGetCount(nax))
            return ERROR_INT("nax and nay sizes differ", procName, 1);
    }

        /* Save plotstyle and plottitle */
    numaAddNumber(gplot->plotstyles, plotstyle);
    if (plottitle) {
        title = stringNew(plottitle);
        sarrayAddString(gplot->plottitles, title, L_INSERT);
    } else {
        sarrayAddString(gplot->plottitles, emptystring, L_COPY);
    }

        /* Generate and save data filename */
    gplot->nplots++;
    snprintf(buf, L_BUF_SIZE, "%s.data.%d", gplot->rootname, gplot->nplots);
    sarrayAddString(gplot->datanames, buf, L_COPY);

        /* Generate data and save as a string */
    sa = sarrayCreate(n);
    for (i = 0; i < n; i++) {
        if (nax)
            numaGetFValue(nax, i, &valx);
        else
            valx = startx + i * delx;
        numaGetFValue(nay, i, &valy);
        snprintf(buf, L_BUF_SIZE, "%f %f\n", valx, valy);
        sarrayAddString(sa, buf, L_COPY);
    }
    datastr = sarrayToString(sa, 0);
    sarrayAddString(gplot->plotdata, datastr, L_INSERT);
    sarrayDestroy(&sa);

    return 0;
}
예제 #28
0
main(int    argc,
     char **argv)
{
char        *errorstr;
l_int32      same, error;
PIX         *pixs1, *pixs2, *pixs4, *pixs8, *pixs16, *pixs32,  *pixd;
PIX         *pixc2, *pixc4, *pixc8;
PIX         *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6;
PIXCMAP     *cmap;
SARRAY      *sa;
static char  mainName[] = "convert_reg";

    if (argc != 1)
        exit(ERROR_INT(" Syntax:  convert_rt", mainName, 1));

    if ((pixs1 = pixRead("test1.png")) == NULL)
	exit(ERROR_INT("pixs1 not made", mainName, 1));
    if ((pixs2 = pixRead("dreyfus2.png")) == NULL)
	exit(ERROR_INT("pixs2 not made", mainName, 1));
    if ((pixc2 = pixRead("weasel2.4c.png")) == NULL)
	exit(ERROR_INT("pixc2 not made", mainName, 1));
    if ((pixs4 = pixRead("weasel4.16g.png")) == NULL)
	exit(ERROR_INT("pixs4 not made", mainName, 1));
    if ((pixc4 = pixRead("weasel4.11c.png")) == NULL)
	exit(ERROR_INT("pixc4 not made", mainName, 1));
    if ((pixs8 = pixRead("karen8.jpg")) == NULL)
	exit(ERROR_INT("pixs8 not made", mainName, 1));
    if ((pixc8 = pixRead("weasel8.240c.png")) == NULL)
	exit(ERROR_INT("pixc8 not made", mainName, 1));
    if ((pixs16 = pixRead("test16.tif")) == NULL)
	exit(ERROR_INT("pixs16 not made", mainName, 1));
    if ((pixs32 = pixRead("marge.jpg")) == NULL)
	exit(ERROR_INT("pixs32 not made", mainName, 1));
    error = FALSE;
    sa = sarrayCreate(0);

        /* Conversion: 1 bpp --> 8 bpp --> 1 bpp */
    pixt1 = pixConvertTo8(pixs1, FALSE);
    pixt2 = pixThreshold8(pixt1, 1, 0, 0);
    pixEqual(pixs1, pixt2, &same);
    if (!same) {
        pixDisplayWithTitle(pixs1, 100, 100, "1 bpp, no cmap", DFLAG);
        pixDisplayWithTitle(pixt2, 500, 100, "1 bpp, no cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 1 bpp <==> 8 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 1 bpp <==> 8 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Conversion: 2 bpp --> 8 bpp --> 2 bpp */
        /* Conversion: 2 bpp cmap --> 8 bpp cmap --> 2 bpp cmap */
    pixt1 = pixRemoveColormap(pixs2, REMOVE_CMAP_TO_GRAYSCALE);
    pixt2 = pixThreshold8(pixt1, 2, 4, 0);
    pixt3 = pixConvertTo8(pixt2, FALSE);
    pixt4 = pixThreshold8(pixt3, 2, 4, 0);
    pixEqual(pixt2, pixt4, &same);
    if (!same) {
        pixDisplayWithTitle(pixt2, 100, 100, "2 bpp, no cmap", DFLAG);
        pixDisplayWithTitle(pixt4, 500, 100, "2 bpp, no cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 2 bpp <==> 8 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 2 bpp <==> 8 bpp\n");
    pixt5 = pixConvertTo8(pixs2, TRUE);
    pixt6 = pixThreshold8(pixt5, 2, 4, 1);
    pixEqual(pixs2, pixt6, &same);
    if (!same) {
        pixDisplayWithTitle(pixs2, 100, 100, "2 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt6, 500, 100, "2 bpp, cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 2 bpp <==> 8 bpp; cmap",
                        L_COPY);
    } else
        fprintf(stderr, "OK: conversion 2 bpp <==> 8 bpp; cmap\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);
    pixDestroy(&pixt5);
    pixDestroy(&pixt6);

        /* Conversion: 4 bpp --> 8 bpp --> 4 bpp */
        /* Conversion: 4 bpp cmap --> 8 bpp cmap --> 4 bpp cmap */
    pixt1 = pixRemoveColormap(pixs4, REMOVE_CMAP_TO_GRAYSCALE);
    pixt2 = pixThreshold8(pixt1, 4, 16, 0);
    pixt3 = pixConvertTo8(pixt2, FALSE);
    pixt4 = pixThreshold8(pixt3, 4, 16, 0);
    pixEqual(pixt2, pixt4, &same);
    if (!same) {
        pixDisplayWithTitle(pixt2, 100, 100, "4 bpp, no cmap", DFLAG);
        pixDisplayWithTitle(pixt4, 500, 100, "4 bpp, no cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 4 bpp <==> 8 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 4 bpp <==> 8 bpp\n");
    pixt5 = pixConvertTo8(pixs4, TRUE);
    pixt6 = pixThreshold8(pixt5, 4, 16, 1);
    pixEqual(pixs4, pixt6, &same);
    if (!same) {
        pixDisplayWithTitle(pixs4, 100, 100, "4 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt6, 500, 100, "4 bpp, cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 4 bpp <==> 8 bpp, cmap",
                        L_COPY);
    } else
        fprintf(stderr, "OK: conversion 4 bpp <==> 8 bpp; cmap\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);
    pixDestroy(&pixt5);
    pixDestroy(&pixt6);

        /* Conversion: 2 bpp cmap --> 2 bpp --> 2 bpp cmap --> 2 bpp */
    pixt1 = pixRemoveColormap(pixs2, REMOVE_CMAP_TO_GRAYSCALE);
    pixt2 = pixConvertGrayToColormap(pixt1);
    pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_TO_GRAYSCALE);
    pixt4 = pixThresholdTo2bpp(pixt3, 4, 1);
    pixEqual(pixt1, pixt4, &same);
    if (!same) {
        pixDisplayWithTitle(pixs2, 100, 100, "2 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt4, 500, 100, "2 bpp, cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 2 bpp <==> 2 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 2 bpp <==> 2 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);

        /* Conversion: 4 bpp cmap --> 4 bpp --> 4 bpp cmap --> 4 bpp */
    pixt1 = pixRemoveColormap(pixs4, REMOVE_CMAP_TO_GRAYSCALE);
    pixt2 = pixConvertGrayToColormap(pixt1);
    pixt3 = pixRemoveColormap(pixt2, REMOVE_CMAP_TO_GRAYSCALE);
    pixt4 = pixThresholdTo4bpp(pixt3, 16, 1);
    pixEqual(pixt1, pixt4, &same);
    if (!same) {
        pixDisplayWithTitle(pixs4, 100, 100, "4 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt4, 500, 100, "4 bpp, cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 4 bpp <==> 4 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 4 bpp <==> 4 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);


        /* Conversion: 8 bpp --> 8 bpp cmap --> 8 bpp */
    pixt1 = pixConvertTo8(pixs8, TRUE);
    pixt2 = pixConvertTo8(pixt1, FALSE);
    pixEqual(pixs8, pixt2, &same);
    if (!same) {
        pixDisplayWithTitle(pixt1, 100, 100, "8 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt2, 500, 100, "8 bpp, no cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 8 bpp <==> 8 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 8 bpp <==> 8 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Conversion: 2 bpp cmap --> 32 bpp --> 2 bpp cmap */
    pixt1 = pixConvertTo8(pixc2, TRUE);
    pixt2 = pixConvertTo32(pixt1);
    pixt3 = pixConvertTo32(pixc2);
    pixEqual(pixt2, pixt3, &same);
    if (!same) {
        pixDisplayWithTitle(pixt2, 100, 100, "32 bpp", DFLAG);
        pixDisplayWithTitle(pixt3, 500, 100, "32 bpp", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 2 bpp ==> 32 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 2 bpp <==> 32 bpp\n");
    cmap = pixGetColormap(pixc2);
    pixt4 = pixOctcubeQuantFromCmap(pixt3, cmap, 2, 4, L_EUCLIDEAN_DISTANCE);
    pixEqual(pixc2, pixt4, &same);
    if (!same) {
        pixDisplayWithTitle(pixc2, 100, 100, "4 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt4, 500, 100, "4 bpp, cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 2 bpp <==> 32 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 2 bpp <==> 32 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);

        /* Conversion: 4 bpp cmap --> 32 bpp --> 4 bpp cmap */
    pixt1 = pixConvertTo8(pixc4, TRUE);
    pixt2 = pixConvertTo32(pixt1);
    pixt3 = pixConvertTo32(pixc4);
    pixEqual(pixt2, pixt3, &same);
    if (!same) {
        pixDisplayWithTitle(pixt2, 100, 100, "32 bpp", DFLAG);
        pixDisplayWithTitle(pixt3, 500, 100, "32 bpp", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 4 bpp ==> 32 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 4 bpp <==> 32 bpp\n");
    cmap = pixGetColormap(pixc4);
    pixt4 = pixOctcubeQuantFromCmap(pixt3, cmap, 2, 4, L_EUCLIDEAN_DISTANCE);
    pixEqual(pixc4, pixt4, &same);
    if (!same) {
        pixDisplayWithTitle(pixc4, 100, 100, "4 bpp, cmap", DFLAG);
        pixDisplayWithTitle(pixt4, 500, 100, "4 bpp, cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 4 bpp <==> 32 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 4 bpp <==> 32 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);
    pixDestroy(&pixt3);
    pixDestroy(&pixt4);

        /* Conversion: 8 bpp --> 32 bpp --> 8 bpp */
    pixt1 = pixConvertTo32(pixs8);
    pixt2 = pixConvertTo8(pixt1, FALSE);
    pixEqual(pixs8, pixt2, &same);
    if (!same) {
        pixDisplayWithTitle(pixs8, 100, 100, "8 bpp", DFLAG);
        pixDisplayWithTitle(pixt2, 500, 100, "8 bpp", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 8 bpp <==> 32 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 8 bpp <==> 32 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Conversion: 8 bpp --> 16 bpp --> 8 bpp */
    pixt1 = pixConvert8To16(pixs8, 8);
    pixt2 = pixConvertTo8(pixt1, FALSE);
    pixEqual(pixs8, pixt2, &same);
    if (!same) {
        pixDisplayWithTitle(pixs8, 100, 100, "8 bpp", DFLAG);
        pixDisplayWithTitle(pixt2, 500, 100, "8 bpp", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 8 bpp <==> 16 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 8 bpp <==> 16 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Conversion: 16 bpp --> 8 bpp --> 16 bpp */
    pixt1 = pixConvert16To8(pixs16, 1);
    pixt2 = pixConvertTo16(pixt1);
    pixWrite("/tmp/junkpix.png", pixt2, IFF_PNG);
    pixEqual(pixs16, pixt2, &same);
    if (!same) {
        pixDisplayWithTitle(pixs16, 100, 100, "16 bpp", DFLAG);
        pixDisplayWithTitle(pixt2, 500, 100, "16 bpp", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 16 bpp <==> 8 bpp", L_COPY);
    } else
        fprintf(stderr, "OK: conversion 16 bpp <==> 8 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Conversion: 8 bpp cmap --> 32 bpp --> 8 bpp cmap */
        /* Required to go to level 6 of octcube to get identical result */
    pixt1 = pixConvertTo32(pixc8);
    cmap = pixGetColormap(pixc8);
    pixt2 = pixOctcubeQuantFromCmap(pixt1, cmap, 2, 6, L_EUCLIDEAN_DISTANCE);
    pixEqual(pixc8, pixt2, &same);
    if (!same) {
        pixDisplayWithTitle(pixc8, 100, 100, "8 bpp cmap", DFLAG);
        pixDisplayWithTitle(pixt2, 500, 100, "8 bpp cmap", DFLAG);
        error = TRUE;
        sarrayAddString(sa, (char *)"conversion 8 bpp cmap <==> 32 bpp cmap",
                        L_COPY);
    } else
        fprintf(stderr, "OK: conversion 8 bpp <==> 32 bpp\n");
    pixDestroy(&pixt1);
    pixDestroy(&pixt2);

        /* Summarize results */
    if (error == FALSE)
        fprintf(stderr, "No errors found\n");
    else {
        errorstr = sarrayToString(sa, 1);
        fprintf(stderr, "Errors in the following:\n %s", errorstr);
        lept_free(errorstr);
    }

    sarrayDestroy(&sa);
    pixDestroy(&pixs1);
    pixDestroy(&pixs2);
    pixDestroy(&pixs4);
    pixDestroy(&pixc2);
    pixDestroy(&pixc4);
    pixDestroy(&pixs8);
    pixDestroy(&pixc8);
    pixDestroy(&pixs16);
    pixDestroy(&pixs32);
    return 0;
}
예제 #29
0
/*
 *  parseForProtos()
 *
 *      Input:  filein (output of cpp)
 *              prestring (<optional> string that prefaces each decl;
 *                        use NULL to omit)
 *      Return: parsestr (string of function prototypes), or NULL on error
 *
 *  Notes:
 *      (1) We parse the output of cpp:
 *              cpp -ansi <filein>
 *          Three plans were attempted, with success on the third.
 *      (2) Plan 1.  A cursory examination of the cpp output indicated that
 *          every function was preceded by a cpp comment statement.
 *          So we just need to look at statements beginning after comments.
 *          Unfortunately, this is NOT the case.  Some functions start
 *          without cpp comment lines, typically when there are no
 *          comments in the source that immediately precede the function.
 *      (3) Plan 2.  Consider the keywords in the language that start
 *          parts of the cpp file.  Some, like 'typedef', 'enum',
 *          'union' and 'struct', are followed after a while by '{',
 *          and eventually end with '}, plus an optional token and a
 *          final ';'  Others, like 'extern' and 'static', are never
 *          the beginnings of global function definitions.   Function
 *          prototypes have one or more sets of '(' followed eventually
 *          by a ')', and end with ';'.  But function definitions have
 *          tokens, followed by '(', more tokens, ')' and then
 *          immediately a '{'.  We would generate a prototype from this
 *          by adding a ';' to all tokens up to the ')'.  So we use
 *          these special tokens to decide what we are parsing.  And
 *          whenever a function definition is found and the prototype
 *          extracted, we skip through the rest of the function
 *          past the corresponding '}'.  This token ends a line, and
 *          is often on a line of its own.  But as it turns out,
 *          the only keyword we need to consider is 'static'.
 *      (4) Plan 3.  Consider the parentheses and braces for various
 *          declarations.  A struct, enum, or union has a pair of
 *          braces followed by a semicolon.  They cannot have parentheses
 *          before the left brace, but a struct can have lots of parentheses
 *          within the brace set.  A function prototype has no braces.
 *          A function declaration can have sets of left and right
 *          parentheses, but these are followed by a left brace.
 *          So plan 3 looks at the way parentheses and braces are
 *          organized.  Once the beginning of a function definition
 *          is found, the prototype is extracted and we search for
 *          the ending right brace.
 *      (5) To find the ending right brace, it is necessary to do some
 *          careful parsing.  For example, in this file, we have
 *          left and right braces as characters, and these must not
 *          be counted.  Somewhat more tricky, the file fhmtauto.c
 *          generates code, and includes a right brace in a string.
 *          So we must not include braces that are in strings.  But how
 *          do we know if something is inside a string?  Keep state,
 *          starting with not-inside, and every time you hit a double quote
 *          that is not escaped, toggle the condition.  Any brace
 *          found in the state of being within a string is ignored.
 *      (6) When a prototype is extracted, it is put in a canonical
 *          form (i.e., cleaned up).  Finally, we check that it is
 *          not static and save it.  (If static, it is ignored).
 *      (7) The @prestring for unix is NULL; it is included here so that
 *          you can use Microsoft's declaration for importing or
 *          exporting to a dll.  See environ.h for examples of use.
 *          Here, we set: @prestring = "LEPT_DLL ".  Note in particular
 *          the space character that will separate 'LEPT_DLL' from
 *          the standard unix prototype that follows.
 */
char *
parseForProtos(const char *filein,
               const char *prestring)
{
char    *strdata, *str, *newstr, *parsestr, *secondword;
l_int32  start, next, stop, charindex, found;
size_t   nbytes;
SARRAY  *sa, *saout, *satest;

    PROCNAME("parseForProtos");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read in the cpp output into memory, one string for each
         * line in the file, omitting blank lines.  */
    strdata = (char *)l_binaryRead(filein, &nbytes);
    sa = sarrayCreateLinesFromString(strdata, 0);

    saout = sarrayCreate(0);
    next = 0;
    while (1) {  /* repeat after each non-static prototype is extracted */
        searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
        if (!found)
            break;
/*        fprintf(stderr, "  start = %d, stop = %d, charindex = %d\n",
                start, stop, charindex); */
        str = captureProtoSignature(sa, start, stop, charindex);

            /* Make sure that the signature found by cpp is neither
             * static nor extern.  We get 'extern' declarations from
             * header files, and with some versions of cpp running on
             * #include <sys/stat.h> we get something of the form:
             *    extern ... (( ... )) ... ( ... ) { ...
             * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
             * and there is a lot of garbage between the rp and the lb.
             * It is easiest to simply reject any signature that starts
             * with 'extern'.  Note also that an 'extern' token has been
             * prepended to each prototype, so the 'static' or
             * 'extern' keywords we are looking for, if they exist,
             * would be the second word. */
        satest = sarrayCreateWordsFromString(str);
        secondword = sarrayGetString(satest, 1, L_NOCOPY);
        if (strcmp(secondword, "static") &&  /* not static */
            strcmp(secondword, "extern")) {  /* not extern */
            if (prestring) {  /* prepend it to the prototype */
                newstr = stringJoin(prestring, str);
                sarrayAddString(saout, newstr, L_INSERT);
                LEPT_FREE(str);
            } else {
                sarrayAddString(saout, str, L_INSERT);
            }
        } else {
            LEPT_FREE(str);
        }
        sarrayDestroy(&satest);

        skipToEndOfFunction(sa, stop, charindex, &next);
        if (next == -1) break;
    }

        /* Flatten into a string with newlines between prototypes */
    parsestr = sarrayToString(saout, 1);
    LEPT_FREE(strdata);
    sarrayDestroy(&sa);
    sarrayDestroy(&saout);

    return parsestr;
}
/*!
 * \brief   strcodeFinalize()
 *
 * \param[in,out]  pstrcode destroys after .c and .h files have been generated
 * \param[in]      outdir [optional] if NULL, files are made in /tmp/lept/auto
 * \return  void
 */
l_int32
strcodeFinalize(L_STRCODE  **pstrcode,
                const char  *outdir)
{
char        buf[256];
char       *filestr, *casestr, *descr, *datastr, *realoutdir;
l_int32     actstart, end, newstart, fileno, nbytes;
size_t      size;
L_STRCODE  *strcode;
SARRAY     *sa1, *sa2, *sa3;

    PROCNAME("strcodeFinalize");

    lept_mkdir("lept/auto");

    if (!pstrcode || *pstrcode == NULL)
        return ERROR_INT("No input data", procName, 1);
    strcode = *pstrcode;
    if (!outdir) {
        L_INFO("no outdir specified; writing to /tmp/lept/auto\n", procName);
        realoutdir = stringNew("/tmp/lept/auto");
    } else {
        realoutdir = stringNew(outdir);
    }

    /* ------------------------------------------------------- */
    /*              Make the output autogen.*.c file           */
    /* ------------------------------------------------------- */

       /* Make array of textlines from TEMPLATE1 */
    if ((filestr = (char *)l_binaryRead(TEMPLATE1, &size)) == NULL)
        return ERROR_INT("filestr not made", procName, 1);
    if ((sa1 = sarrayCreateLinesFromString(filestr, 1)) == NULL)
        return ERROR_INT("sa1 not made", procName, 1);
    LEPT_FREE(filestr);

    if ((sa3 = sarrayCreate(0)) == NULL)
        return ERROR_INT("sa3 not made", procName, 1);

        /* Copyright notice */
    sarrayParseRange(sa1, 0, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* File name comment */
    fileno = strcode->fileno;
    snprintf(buf, sizeof(buf), " *   autogen.%d.c", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* More text */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* Description of function types by index */
    descr = sarrayToString(strcode->descr, 1);
    descr[strlen(descr) - 1] = '\0';
    sarrayAddString(sa3, descr, L_INSERT);

        /* Includes */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);
    snprintf(buf, sizeof(buf), "#include \"autogen.%d.h\"", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Header for auto-generated deserializers */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* Function name (as comment) */
    snprintf(buf, sizeof(buf), " *  l_autodecode_%d()", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Input and return values */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* Function name */
    snprintf(buf, sizeof(buf), "l_autodecode_%d(l_int32 index)", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Stack vars */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* Declaration of nfunc on stack */
    snprintf(buf, sizeof(buf), "l_int32   nfunc = %d;\n", strcode->n);
    sarrayAddString(sa3, buf, L_COPY);

        /* Declaration of PROCNAME */
    snprintf(buf, sizeof(buf), "    PROCNAME(\"l_autodecode_%d\");", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Test input variables */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* Insert case string */
    casestr = sarrayToString(strcode->function, 0);
    casestr[strlen(casestr) - 1] = '\0';
    sarrayAddString(sa3, casestr, L_INSERT);

        /* End of function */
    sarrayParseRange(sa1, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa1, actstart, end);

        /* Flatten to string and output to autogen*.c file */
    if ((filestr = sarrayToString(sa3, 1)) == NULL)
        return ERROR_INT("filestr from sa3 not made", procName, 1);
    nbytes = strlen(filestr);
    snprintf(buf, sizeof(buf), "%s/autogen.%d.c", realoutdir, fileno);
    l_binaryWrite(buf, "w", filestr, nbytes);
    LEPT_FREE(filestr);
    sarrayDestroy(&sa1);
    sarrayDestroy(&sa3);

    /* ------------------------------------------------------- */
    /*              Make the output autogen.*.h file           */
    /* ------------------------------------------------------- */

       /* Make array of textlines from TEMPLATE2 */
    if ((filestr = (char *)l_binaryRead(TEMPLATE2, &size)) == NULL)
        return ERROR_INT("filestr not made", procName, 1);
    if ((sa2 = sarrayCreateLinesFromString(filestr, 1)) == NULL)
        return ERROR_INT("sa2 not made", procName, 1);
    LEPT_FREE(filestr);

    if ((sa3 = sarrayCreate(0)) == NULL)
        return ERROR_INT("sa3 not made", procName, 1);

        /* Copyright notice */
    sarrayParseRange(sa2, 0, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa2, actstart, end);

        /* File name comment */
    snprintf(buf, sizeof(buf), " *   autogen.%d.h", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* More text */
    sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa2, actstart, end);

        /* Beginning header protection */
    snprintf(buf, sizeof(buf), "#ifndef  LEPTONICA_AUTOGEN_%d_H\n"
                               "#define  LEPTONICA_AUTOGEN_%d_H",
             fileno, fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Prototype header text */
    sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa2, actstart, end);

        /* Prototype declaration */
    snprintf(buf, sizeof(buf), "void *l_autodecode_%d(l_int32 index);", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Prototype trailer text */
    sarrayParseRange(sa2, newstart, &actstart, &end, &newstart, "--", 0);
    sarrayAppendRange(sa3, sa2, actstart, end);

        /* Insert serialized data strings */
    datastr = sarrayToString(strcode->data, 1);
    datastr[strlen(datastr) - 1] = '\0';
    sarrayAddString(sa3, datastr, L_INSERT);

        /* End header protection */
    snprintf(buf, sizeof(buf), "#endif  /* LEPTONICA_AUTOGEN_%d_H */", fileno);
    sarrayAddString(sa3, buf, L_COPY);

        /* Flatten to string and output to autogen*.h file */
    if ((filestr = sarrayToString(sa3, 1)) == NULL)
        return ERROR_INT("filestr from sa3 not made", procName, 1);
    nbytes = strlen(filestr);
    snprintf(buf, sizeof(buf), "%s/autogen.%d.h", realoutdir, fileno);
    l_binaryWrite(buf, "w", filestr, nbytes);
    LEPT_FREE(filestr);
    LEPT_FREE(realoutdir);
    sarrayDestroy(&sa2);
    sarrayDestroy(&sa3);

        /* Cleanup */
    strcodeDestroy(pstrcode);
    return 0;
}