Beispiel #1
0
/*
 *  parseForProtos()
 *
 *      Input:  filein (output of cpp)
 *              prestring (<optional> string that prefaces each decl;
 *                        use NULL to omit)
 *      Return: parsestr (string of function prototypes), or NULL on error
 *
 *  Notes:
 *      (1) We parse the output of cpp:
 *              cpp -ansi <filein> 
 *          Three plans were attempted, with success on the third. 
 *      (2) Plan 1.  A cursory examination of the cpp output indicated that
 *          every function was preceeded by a cpp comment statement.
 *          So we just need to look at statements beginning after comments.
 *          Unfortunately, this is NOT the case.  Some functions start
 *          without cpp comment lines, typically when there are no
 *          comments in the source that immediately precede the function.
 *      (3) Plan 2.  Consider the keywords in the language that start
 *          parts of the cpp file.  Some, like 'typedef', 'enum',
 *          'union' and 'struct', are followed after a while by '{',
 *          and eventually end with '}, plus an optional token and a
 *          final ';'  Others, like 'extern' and 'static', are never
 *          the beginnings of global function definitions.   Function
 *          prototypes have one or more sets of '(' followed eventually
 *          by a ')', and end with ';'.  But function definitions have
 *          tokens, followed by '(', more tokens, ')' and then
 *          immediately a '{'.  We would generate a prototype from this
 *          by adding a ';' to all tokens up to the ')'.  So we use
 *          these special tokens to decide what we are parsing.  And
 *          whenever a function definition is found and the prototype
 *          extracted, we skip through the rest of the function
 *          past the corresponding '}'.  This token ends a line, and
 *          is often on a line of its own.  But as it turns out,
 *          the only keyword we need to consider is 'static'.
 *      (4) Plan 3.  Consider the parentheses and braces for various
 *          declarations.  A struct, enum, or union has a pair of
 *          braces followed by a semicolon.  They cannot have parentheses
 *          before the left brace, but a struct can have lots of parentheses
 *          within the brace set.  A function prototype has no braces.
 *          A function declaration can have sets of left and right
 *          parentheses, but these are followed by a left brace.
 *          So plan 3 looks at the way parentheses and braces are
 *          organized.  Once the beginning of a function definition
 *          is found, the prototype is extracted and we search for
 *          the ending right brace.
 *      (5) To find the ending right brace, it is necessary to do some
 *          careful parsing.  For example, in this file, we have
 *          left and right braces as characters, and these must not
 *          be counted.  Somewhat more tricky, the file fhmtauto.c
 *          generates code, and includes a right brace in a string.
 *          So we must not include braces that are in strings.  But how
 *          do we know if something is inside a string?  Keep state,
 *          starting with not-inside, and every time you hit a double quote
 *          that is not escaped, toggle the condition.  Any brace
 *          found in the state of being within a string is ignored.
 *      (6) When a prototype is extracted, it is put in a canonical
 *          form (i.e., cleaned up).  Finally, we check that it is
 *          not static and save it.  (If static, it is ignored).
 *      (7) The @prestring for unix is NULL; it is included here so that
 *          you can use Microsoft's declaration for importing or
 *          exporting to a dll.  See environ.h for examples of use.
 *          Here, we set: @prestring = "LEPT_DLL ".  Note in particular
 *          the space character that will separate 'LEPT_DLL' from
 *          the standard unix prototype that follows.
 */
char *
parseForProtos(const char *filein,
               const char *prestring)
{
char    *strdata, *str, *newstr, *parsestr, *secondword;
l_int32  nbytes, start, next, stop, charindex, found;
SARRAY  *sa, *saout, *satest;

    PROCNAME("parseForProtos");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read in the cpp output into memory, one string for each
         * line in the file, omitting blank lines.  */
    strdata = (char *)arrayRead(filein, &nbytes);
    sa = sarrayCreateLinesFromString(strdata, 0);

    saout = sarrayCreate(0);
    next = 0;
    while (1) {  /* repeat after each non-static prototype is extracted */
        searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
        if (!found)
            break;
/*        fprintf(stderr, "  start = %d, stop = %d, charindex = %d\n",
                start, stop, charindex); */
        str = captureProtoSignature(sa, start, stop, charindex);

            /* Make sure it is not static.  Note that 'extern' has
             * been prepended to the prototype, so the 'static'
             * keyword, if it exists, would be the second word. */
        satest = sarrayCreateWordsFromString(str);
        secondword = sarrayGetString(satest, 1, 0);
        if (strcmp(secondword, "static")) {  /* not static */
            if (prestring) {  /* prepend it to the prototype */
                newstr = stringJoin(prestring, str);
                sarrayAddString(saout, newstr, L_INSERT);
                FREE(str);
            }
            else
                sarrayAddString(saout, str, L_INSERT);
        }
        else
            FREE(str);
        sarrayDestroy(&satest);

        skipToEndOfFunction(sa, stop, charindex, &next);
        if (next == -1) break;
    }

        /* Flatten into a string with newlines between prototypes */
    parsestr = sarrayToString(saout, 1);
    FREE(strdata);
    sarrayDestroy(&sa);
    sarrayDestroy(&saout);

    return parsestr;
}
Beispiel #2
0
/*
 *  cleanProtoSignature()
 *
 *      Input:  instr (input prototype string)
 *      Return: cleanstr (clean prototype string), or NULL on error
 *
 *  Notes:
 *      (1) Adds 'extern' at beginning and regularizes spaces
 *          between tokens.
 */
static char *
cleanProtoSignature(char *instr)
{
char    *str, *cleanstr;
char     buf[L_BUF_SIZE];
char     externstring[] = "extern";
l_int32  i, j, nwords, nchars, index, len;
SARRAY  *sa, *saout;

    PROCNAME("cleanProtoSignature");

    if (!instr)
        return (char *)ERROR_PTR("instr not defined", procName, NULL);

    sa = sarrayCreateWordsFromString(instr);
    nwords = sarrayGetCount(sa);
    saout = sarrayCreate(0);
    sarrayAddString(saout, externstring, 1);
    for (i = 0; i < nwords; i++) {
        str = sarrayGetString(sa, i, 0);
        nchars = strlen(str);
        index = 0;
        for (j = 0; j < nchars; j++) {
            if (index > L_BUF_SIZE - 6)
                return (char *)ERROR_PTR("token too large", procName, NULL);
            if (str[j] == '(') {
                buf[index++] = ' ';
                buf[index++] = '(';
                buf[index++] = ' ';
            }
            else if (str[j] == ')') {
                buf[index++] = ' ';
                buf[index++] = ')';
            }
            else 
                buf[index++] = str[j];
        }
        buf[index] = '\0';
        sarrayAddString(saout, buf, 1);
    }

        /* Flatten to a prototype string with spaces added after
         * each word, and remove the last space */
    cleanstr = sarrayToString(saout, 2);
    len = strlen(cleanstr);
    cleanstr[len - 1] = '\0';

    sarrayDestroy(&sa);
    sarrayDestroy(&saout);
    return cleanstr;
}
/*!
 * \brief   l_getIndexFromFile()
 *
 * \param[in]    filename
 * \param[out]   pindex found index
 * \return  0 if found, 1 on error.
 */
static l_int32
l_getIndexFromFile(const char  *filename,
                   l_int32     *pindex)
{
char     buf[256];
char    *word;
FILE    *fp;
l_int32  notfound, format;
SARRAY  *sa;

    PROCNAME("l_getIndexFromFile");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);
    *pindex = 0;
    if (!filename)
        return ERROR_INT("filename not defined", procName, 1);

        /* Open the stream, read lines until you find one with more
         * than a newline, and grab the first word. */
    if ((fp = fopenReadStream(filename)) == NULL)
        return ERROR_INT("stream not opened", procName, 1);
    do {
        if ((fgets(buf, sizeof(buf), fp)) == NULL) {
            fclose(fp);
            return ERROR_INT("fgets read fail", procName, 1);
        }
    } while (buf[0] == '\n');
    fclose(fp);
    sa = sarrayCreateWordsFromString(buf);
    word = sarrayGetString(sa, 0, L_NOCOPY);

        /* Find the index associated with the word.  If it is not
         * found, test to see if the file is a compressed pix. */
    notfound = l_getIndexFromStructname(word, pindex);
    sarrayDestroy(&sa);
    if (notfound) {  /* maybe a Pix */
        if (findFileFormat(filename, &format) == 0) {
            l_getIndexFromStructname("Pix", pindex);
        } else {
            return ERROR_INT("no file type identified", procName, 1);
        }
    }

    return 0;
}
Beispiel #4
0
int main(int    argc,
         char **argv)
{
l_int32      ignore;
size_t       nbytesin, nbytesout;
char        *infile, *instring, *outstring;
SARRAY      *sa1, *sa2, *sa3, *sa4, *sa5;
char         buf[256];
static char  mainName[] = "string_reg";

    if (argc != 2)
        return ERROR_INT(" Syntax:  string_reg infile", mainName, 1);

    infile = argv[1];
    instring = (char *)l_binaryRead(infile, &nbytesin);

    if (!instring)
        return ERROR_INT("file not read", mainName, 1);

    sa1 = sarrayCreateWordsFromString(instring);
    sa2 = sarrayCreateLinesFromString(instring, 0);
    sa3 = sarrayCreateLinesFromString(instring, 1);

    outstring = sarrayToString(sa1, 0);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk1.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa1, 1);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk2.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa2, 0);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk3.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa2, 1);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk4.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa3, 0);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk5.txt", "w", outstring, nbytesout);
    lept_free(outstring);

    outstring = sarrayToString(sa3, 1);
    nbytesout = strlen(outstring);
    l_binaryWrite("/tmp/junk6.txt", "w", outstring, nbytesout);
    lept_free(outstring);
    sprintf(buf, "diff -s /tmp/junk6.txt %s", infile);
    ignore = system(buf);

        /* write/read/write; compare /tmp/junkout5 with /tmp/junkout6 */
    sarrayWrite("/tmp/junk7.txt", sa2);
    sarrayWrite("/tmp/junk8.txt", sa3);
    sa4 = sarrayRead("/tmp/junk8.txt");
    sarrayWrite("/tmp/junk9.txt", sa4);
    sa5 = sarrayRead("/tmp/junk9.txt");
    ignore = system("diff -s /tmp/junk8.txt /tmp/junk9.txt");

    sarrayDestroy(&sa1);
    sarrayDestroy(&sa2);
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);
    sarrayDestroy(&sa5);
    lept_free(instring);
    return 0;
}
Beispiel #5
0
/*
 *  parseForProtos()
 *
 *      Input:  filein (output of cpp)
 *              prestring (<optional> string that prefaces each decl;
 *                        use NULL to omit)
 *      Return: parsestr (string of function prototypes), or NULL on error
 *
 *  Notes:
 *      (1) We parse the output of cpp:
 *              cpp -ansi <filein>
 *          Three plans were attempted, with success on the third.
 *      (2) Plan 1.  A cursory examination of the cpp output indicated that
 *          every function was preceded by a cpp comment statement.
 *          So we just need to look at statements beginning after comments.
 *          Unfortunately, this is NOT the case.  Some functions start
 *          without cpp comment lines, typically when there are no
 *          comments in the source that immediately precede the function.
 *      (3) Plan 2.  Consider the keywords in the language that start
 *          parts of the cpp file.  Some, like 'typedef', 'enum',
 *          'union' and 'struct', are followed after a while by '{',
 *          and eventually end with '}, plus an optional token and a
 *          final ';'  Others, like 'extern' and 'static', are never
 *          the beginnings of global function definitions.   Function
 *          prototypes have one or more sets of '(' followed eventually
 *          by a ')', and end with ';'.  But function definitions have
 *          tokens, followed by '(', more tokens, ')' and then
 *          immediately a '{'.  We would generate a prototype from this
 *          by adding a ';' to all tokens up to the ')'.  So we use
 *          these special tokens to decide what we are parsing.  And
 *          whenever a function definition is found and the prototype
 *          extracted, we skip through the rest of the function
 *          past the corresponding '}'.  This token ends a line, and
 *          is often on a line of its own.  But as it turns out,
 *          the only keyword we need to consider is 'static'.
 *      (4) Plan 3.  Consider the parentheses and braces for various
 *          declarations.  A struct, enum, or union has a pair of
 *          braces followed by a semicolon.  They cannot have parentheses
 *          before the left brace, but a struct can have lots of parentheses
 *          within the brace set.  A function prototype has no braces.
 *          A function declaration can have sets of left and right
 *          parentheses, but these are followed by a left brace.
 *          So plan 3 looks at the way parentheses and braces are
 *          organized.  Once the beginning of a function definition
 *          is found, the prototype is extracted and we search for
 *          the ending right brace.
 *      (5) To find the ending right brace, it is necessary to do some
 *          careful parsing.  For example, in this file, we have
 *          left and right braces as characters, and these must not
 *          be counted.  Somewhat more tricky, the file fhmtauto.c
 *          generates code, and includes a right brace in a string.
 *          So we must not include braces that are in strings.  But how
 *          do we know if something is inside a string?  Keep state,
 *          starting with not-inside, and every time you hit a double quote
 *          that is not escaped, toggle the condition.  Any brace
 *          found in the state of being within a string is ignored.
 *      (6) When a prototype is extracted, it is put in a canonical
 *          form (i.e., cleaned up).  Finally, we check that it is
 *          not static and save it.  (If static, it is ignored).
 *      (7) The @prestring for unix is NULL; it is included here so that
 *          you can use Microsoft's declaration for importing or
 *          exporting to a dll.  See environ.h for examples of use.
 *          Here, we set: @prestring = "LEPT_DLL ".  Note in particular
 *          the space character that will separate 'LEPT_DLL' from
 *          the standard unix prototype that follows.
 */
char *
parseForProtos(const char *filein,
               const char *prestring)
{
char    *strdata, *str, *newstr, *parsestr, *secondword;
l_int32  start, next, stop, charindex, found;
size_t   nbytes;
SARRAY  *sa, *saout, *satest;

    PROCNAME("parseForProtos");

    if (!filein)
        return (char *)ERROR_PTR("filein not defined", procName, NULL);

        /* Read in the cpp output into memory, one string for each
         * line in the file, omitting blank lines.  */
    strdata = (char *)l_binaryRead(filein, &nbytes);
    sa = sarrayCreateLinesFromString(strdata, 0);

    saout = sarrayCreate(0);
    next = 0;
    while (1) {  /* repeat after each non-static prototype is extracted */
        searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
        if (!found)
            break;
/*        fprintf(stderr, "  start = %d, stop = %d, charindex = %d\n",
                start, stop, charindex); */
        str = captureProtoSignature(sa, start, stop, charindex);

            /* Make sure that the signature found by cpp is neither
             * static nor extern.  We get 'extern' declarations from
             * header files, and with some versions of cpp running on
             * #include <sys/stat.h> we get something of the form:
             *    extern ... (( ... )) ... ( ... ) { ...
             * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
             * and there is a lot of garbage between the rp and the lb.
             * It is easiest to simply reject any signature that starts
             * with 'extern'.  Note also that an 'extern' token has been
             * prepended to each prototype, so the 'static' or
             * 'extern' keywords we are looking for, if they exist,
             * would be the second word. */
        satest = sarrayCreateWordsFromString(str);
        secondword = sarrayGetString(satest, 1, L_NOCOPY);
        if (strcmp(secondword, "static") &&  /* not static */
            strcmp(secondword, "extern")) {  /* not extern */
            if (prestring) {  /* prepend it to the prototype */
                newstr = stringJoin(prestring, str);
                sarrayAddString(saout, newstr, L_INSERT);
                LEPT_FREE(str);
            } else {
                sarrayAddString(saout, str, L_INSERT);
            }
        } else {
            LEPT_FREE(str);
        }
        sarrayDestroy(&satest);

        skipToEndOfFunction(sa, stop, charindex, &next);
        if (next == -1) break;
    }

        /* Flatten into a string with newlines between prototypes */
    parsestr = sarrayToString(saout, 1);
    LEPT_FREE(strdata);
    sarrayDestroy(&sa);
    sarrayDestroy(&saout);

    return parsestr;
}
Beispiel #6
0
/*!
 * \brief   sudokuReadFile()
 *
 * \param[in]    filename of formatted sudoku file
 * \return  array of 81 numbers, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The file format has:
 *          * any number of comment lines beginning with '#'
 *          * a set of 9 lines, each having 9 digits (0-9) separated
 *            by a space
 * </pre>
 */
l_int32 *
sudokuReadFile(const char  *filename)
{
char     *str, *strj;
l_uint8  *data;
l_int32   i, j, nlines, val, index, error;
l_int32  *array;
size_t    size;
SARRAY   *saline, *sa1, *sa2;

    PROCNAME("sudokuReadFile");

    if (!filename)
        return (l_int32 *)ERROR_PTR("filename not defined", procName, NULL);
    data = l_binaryRead(filename, &size);
    sa1 = sarrayCreateLinesFromString((char *)data, 0);
    sa2 = sarrayCreate(9);

        /* Filter out the comment lines; verify that there are 9 data lines */
    nlines = sarrayGetCount(sa1);
    for (i = 0; i < nlines; i++) {
        str = sarrayGetString(sa1, i, L_NOCOPY);
        if (str[0] != '#')
            sarrayAddString(sa2, str, L_COPY);
    }
    LEPT_FREE(data);
    sarrayDestroy(&sa1);
    nlines = sarrayGetCount(sa2);
    if (nlines != 9) {
        sarrayDestroy(&sa2);
        L_ERROR("file has %d lines\n", procName, nlines);
        return (l_int32 *)ERROR_PTR("invalid file", procName, NULL);
    }

        /* Read the data into the array, verifying that each data
         * line has 9 numbers. */
    error = FALSE;
    array = (l_int32 *)LEPT_CALLOC(81, sizeof(l_int32));
    for (i = 0, index = 0; i < 9; i++) {
        str = sarrayGetString(sa2, i, L_NOCOPY);
        saline = sarrayCreateWordsFromString(str);
        if (sarrayGetCount(saline) != 9) {
            error = TRUE;
            sarrayDestroy(&saline);
            break;
        }
        for (j = 0; j < 9; j++) {
            strj = sarrayGetString(saline, j, L_NOCOPY);
            if (sscanf(strj, "%d", &val) != 1)
                error = TRUE;
            else
                array[index++] = val;
        }
        sarrayDestroy(&saline);
        if (error) break;
    }
    sarrayDestroy(&sa2);

    if (error) {
        LEPT_FREE(array);
        return (l_int32 *)ERROR_PTR("invalid data", procName, NULL);
    }

    return array;
}