예제 #1
0
파일: tokenize.c 프로젝트: ghorn/conftron
void tokenize_flush_line(tokenize_t *t)
{
    int c;
    do {
        c = tokenize_next_char(t);
    } while (c!=EOF && c!='\n');
}
예제 #2
0
파일: tokenize.c 프로젝트: ghorn/conftron
/** chunkify tokens. **/
int tokenize_next_internal(tokenize_t *t)
{
    int c;
    int pos = 0; // output char pos

skip_white:
    c = tokenize_next_char(t);

    if (c == EOF)
        return EOF;

    if (isspace(c))
        goto skip_white;

    // a token is starting. mark its position.
    t->token_line = t->current_line;
    t->token_column = t->current_column;

    // is a character literal?
    if (c=='\'') {
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c=='\\')
            c = unescape(tokenize_next_char(t));
        if (c == EOF)
            return -4;
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c!='\'')
            return -5;
        t->token[pos++] = c;
        goto end_tok;
    }

    // is a string literal?
    if (c=='\"') {
        int escape = 0;

        // add the initial quote
        t->token[pos++] = c;

        // keep reading until close quote
        while (1) {
            if (pos >= MAX_TOKEN_LEN)
                return -2;

            c = tokenize_next_char(t);

            if (c == EOF)
                goto end_tok;

            if (escape) {
                escape = 0;
                c = unescape(c);

                continue;
            }

            if (c=='\"') {
                t->token[pos++] = c;
                goto end_tok;
            }
            if (c=='\\') {
                escape = 1;
                continue;
            }

            t->token[pos++] = c;
        }
        goto end_tok;
    }

    // is an operator?
    if (strchr(op_chars, c)!=NULL) {
        while (strchr(op_chars, c)!=NULL) {
            if (pos >= MAX_TOKEN_LEN)
                return -2;
            t->token[pos++] = c;
            c = tokenize_next_char(t);
        }
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    // otherwise, all tokens are alpha-numeric blobs
in_tok:
    if (pos >= MAX_TOKEN_LEN)
        return -2;

    t->token[pos++] = c;

    if (strchr(single_char_toks,c)!=NULL)
        goto end_tok;

    c = tokenize_next_char(t);
    if (strchr(single_char_toks,c)!=NULL ||
        strchr(op_chars,c)!=NULL) {
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    if (!isspace(c) && c != EOF)
        goto in_tok;

end_tok:
    t->token[pos] = 0;

    return pos;
}
예제 #3
0
파일: tokenize.c 프로젝트: GArlington/lcm
/** chunkify tokens. **/
int tokenize_next_internal(tokenize_t *t)
{
    int c;
    int pos = 0; // output char pos

    t->token_type = LCM_TOK_INVALID;

    // Repeatedly read characters until EOF or a non-whitespace character is
    // reached.
    do {
        c = tokenize_next_char(t);

        if (c == EOF) {
            t->token_type = LCM_TOK_EOF;
            return EOF;
        }
    } while (isspace(c));

    // a token is starting. mark its position.
    t->token_line = t->current_line;
    t->token_column = t->current_column;

    // is a character literal?
    if (c=='\'') {
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c=='\\')
            c = unescape(tokenize_next_char(t));
        if (c == EOF)
            return -4;
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c!='\'')
            return -5;
        t->token[pos++] = c;
        t->token_type = LCM_TOK_OTHER;
        goto end_tok;
    }

    // is a string literal?
    if (c=='\"') {
        int escape = 0;

        // add the initial quote
        t->token[pos++] = c;

        // keep reading until close quote
        while (1) {
            if (!ensure_token_capacity(t, pos)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
            }

            c = tokenize_next_char(t);
            if (c == EOF)
                goto end_tok;

            if (escape) {
                escape = 0;
                c = unescape(c);

                continue;
            }

            if (c=='\"') {
                t->token[pos++] = c;
                goto end_tok;
            }
            if (c=='\\') {
                escape = 1;
                continue;
            }

            t->token[pos++] = c;
        }
        t->token_type = LCM_TOK_OTHER;
        goto end_tok;
    }

    // is an operator?
    if (strchr(op_chars, c)!=NULL) {
        while (strchr(op_chars, c)!=NULL) {
            if (!ensure_token_capacity(t, pos)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
            }
            t->token[pos++] = c;
            c = tokenize_next_char(t);
        }
        t->token_type = LCM_TOK_OTHER;
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    // Is a comment?
    if (c == '/') {
        if (!ensure_token_capacity(t, pos)) {
            return TOK_ERR_MEMORY_INSUFFICIENT;
        }
        t->token[pos++] = c;

        c = tokenize_next_char(t);
        if (c == EOF) {
            t->token_type = LCM_TOK_OTHER;
            goto end_tok;
        }

        // Extended comment '/* ... */'
        if (c == '*') {
            return tokenize_extended_comment(t);
        }

        // Single-line comment
        if (c == '/') {
            t->token_type = LCM_TOK_COMMENT;
            c = tokenize_next_char(t);

            // Strip out leading '/' characters
            while (c == '/') {
                c = tokenize_next_char(t);
            }

            // Strip out leading whitespace.
            while (c != EOF && c == ' ') {
                c = tokenize_next_char(t);
            }

            pos = 0;

            // Place the rest of the line into a comment token.
            while (c != EOF && c != '\n') {
                if (!ensure_token_capacity(t, pos)) {
                    return TOK_ERR_MEMORY_INSUFFICIENT;
                }
                t->token[pos++] = c;
                c = tokenize_next_char(t);
            };
            tokenize_ungetc(t, c);
            goto end_tok;
        }

        // If the '/' is not followed by a '*' or a '/', then treat it like an
        // operator
        t->token_type = LCM_TOK_OTHER;
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    // otherwise, all tokens are alpha-numeric blobs
    do {
        if (!ensure_token_capacity(t, pos)) {
            return TOK_ERR_MEMORY_INSUFFICIENT;
        }

        t->token[pos++] = c;

        t->token_type = LCM_TOK_OTHER;

        if (strchr(single_char_toks,c)!=NULL)
            goto end_tok;

        c = tokenize_next_char(t);
        if (strchr(single_char_toks,c)!=NULL ||
                strchr(op_chars,c)!=NULL) {
            tokenize_ungetc(t, c);
            goto end_tok;
        }

    } while (!isspace(c) && c != EOF);

end_tok:
    t->token[pos] = 0;

    return pos;
}
예제 #4
0
파일: tokenize.c 프로젝트: GArlington/lcm
int tokenize_extended_comment(tokenize_t* t)
{
    int pos = 0;

    // So far, the tokenizer has processed "/*"
    int comment_finished = 0;

    while (!comment_finished) {
        int pos_line_start = pos;

        // Go through leading whitespace.
        int c;
        while (1) {
            c = tokenize_next_char(t);
            if (c != EOF && (c == ' ' || c == '\t')) {
                if (!add_char_to_token(t, pos, c)) {
                    return TOK_ERR_MEMORY_INSUFFICIENT;
                }
                pos++;
            } else {
                break;
            }
        }

        // Go through asterisks
        int got_asterisk = 0;
        while (c == '*') {
            if (!add_char_to_token(t, pos, c)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
            }
            pos++;
            got_asterisk = 1;
            c = tokenize_next_char(t);
        }

        // Strip out leading comment characters in the line.
        if (got_asterisk) {
            pos = pos_line_start;
            if (c == '/') {
                // End of comment?
                comment_finished = 1;
                break;
            } else if (c == ' ') {
                // If a space immediately followed the leading asterisks, then
                // skip it.
                c = tokenize_next_char(t);
            }
        }

        // The rest of the line is comment content.
        while (!comment_finished && c != EOF && c != '\n') {
            int last_c = c;

            if (!add_char_to_token(t, pos, c)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
            }
            pos++;
            c = tokenize_next_char(t);

            if (last_c == '*' && c == '/') {
                comment_finished = 1;
                pos--;
            }
        }

        if (!comment_finished) {
            if (c == EOF) {
                printf("%s : EOF reached while parsing comment\n", t->path);
                return EOF;
            }

            assert(c == '\n');
            if (pos_line_start != pos) {
              if (!add_char_to_token(t, pos, c)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
              }
              pos++;
            }
        }
    }

    t->token[pos] = 0;
    t->token_type = LCM_TOK_COMMENT;

    return pos;
}