コード例 #1
0
ファイル: tokenize.c プロジェクト: ghorn/conftron
/** chunkify tokens. **/
int tokenize_next_internal(tokenize_t *t)
{
    int c;
    int pos = 0; // output char pos

skip_white:
    c = tokenize_next_char(t);

    if (c == EOF)
        return EOF;

    if (isspace(c))
        goto skip_white;

    // a token is starting. mark its position.
    t->token_line = t->current_line;
    t->token_column = t->current_column;

    // is a character literal?
    if (c=='\'') {
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c=='\\')
            c = unescape(tokenize_next_char(t));
        if (c == EOF)
            return -4;
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c!='\'')
            return -5;
        t->token[pos++] = c;
        goto end_tok;
    }

    // is a string literal?
    if (c=='\"') {
        int escape = 0;

        // add the initial quote
        t->token[pos++] = c;

        // keep reading until close quote
        while (1) {
            if (pos >= MAX_TOKEN_LEN)
                return -2;

            c = tokenize_next_char(t);

            if (c == EOF)
                goto end_tok;

            if (escape) {
                escape = 0;
                c = unescape(c);

                continue;
            }

            if (c=='\"') {
                t->token[pos++] = c;
                goto end_tok;
            }
            if (c=='\\') {
                escape = 1;
                continue;
            }

            t->token[pos++] = c;
        }
        goto end_tok;
    }

    // is an operator?
    if (strchr(op_chars, c)!=NULL) {
        while (strchr(op_chars, c)!=NULL) {
            if (pos >= MAX_TOKEN_LEN)
                return -2;
            t->token[pos++] = c;
            c = tokenize_next_char(t);
        }
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    // otherwise, all tokens are alpha-numeric blobs
in_tok:
    if (pos >= MAX_TOKEN_LEN)
        return -2;

    t->token[pos++] = c;

    if (strchr(single_char_toks,c)!=NULL)
        goto end_tok;

    c = tokenize_next_char(t);
    if (strchr(single_char_toks,c)!=NULL ||
        strchr(op_chars,c)!=NULL) {
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    if (!isspace(c) && c != EOF)
        goto in_tok;

end_tok:
    t->token[pos] = 0;

    return pos;
}
コード例 #2
0
ファイル: tokenize.c プロジェクト: GArlington/lcm
/** chunkify tokens. **/
int tokenize_next_internal(tokenize_t *t)
{
    int c;
    int pos = 0; // output char pos

    t->token_type = LCM_TOK_INVALID;

    // Repeatedly read characters until EOF or a non-whitespace character is
    // reached.
    do {
        c = tokenize_next_char(t);

        if (c == EOF) {
            t->token_type = LCM_TOK_EOF;
            return EOF;
        }
    } while (isspace(c));

    // a token is starting. mark its position.
    t->token_line = t->current_line;
    t->token_column = t->current_column;

    // is a character literal?
    if (c=='\'') {
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c=='\\')
            c = unescape(tokenize_next_char(t));
        if (c == EOF)
            return -4;
        t->token[pos++] = c;
        c = tokenize_next_char(t);
        if (c!='\'')
            return -5;
        t->token[pos++] = c;
        t->token_type = LCM_TOK_OTHER;
        goto end_tok;
    }

    // is a string literal?
    if (c=='\"') {
        int escape = 0;

        // add the initial quote
        t->token[pos++] = c;

        // keep reading until close quote
        while (1) {
            if (!ensure_token_capacity(t, pos)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
            }

            c = tokenize_next_char(t);
            if (c == EOF)
                goto end_tok;

            if (escape) {
                escape = 0;
                c = unescape(c);

                continue;
            }

            if (c=='\"') {
                t->token[pos++] = c;
                goto end_tok;
            }
            if (c=='\\') {
                escape = 1;
                continue;
            }

            t->token[pos++] = c;
        }
        t->token_type = LCM_TOK_OTHER;
        goto end_tok;
    }

    // is an operator?
    if (strchr(op_chars, c)!=NULL) {
        while (strchr(op_chars, c)!=NULL) {
            if (!ensure_token_capacity(t, pos)) {
                return TOK_ERR_MEMORY_INSUFFICIENT;
            }
            t->token[pos++] = c;
            c = tokenize_next_char(t);
        }
        t->token_type = LCM_TOK_OTHER;
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    // Is a comment?
    if (c == '/') {
        if (!ensure_token_capacity(t, pos)) {
            return TOK_ERR_MEMORY_INSUFFICIENT;
        }
        t->token[pos++] = c;

        c = tokenize_next_char(t);
        if (c == EOF) {
            t->token_type = LCM_TOK_OTHER;
            goto end_tok;
        }

        // Extended comment '/* ... */'
        if (c == '*') {
            return tokenize_extended_comment(t);
        }

        // Single-line comment
        if (c == '/') {
            t->token_type = LCM_TOK_COMMENT;
            c = tokenize_next_char(t);

            // Strip out leading '/' characters
            while (c == '/') {
                c = tokenize_next_char(t);
            }

            // Strip out leading whitespace.
            while (c != EOF && c == ' ') {
                c = tokenize_next_char(t);
            }

            pos = 0;

            // Place the rest of the line into a comment token.
            while (c != EOF && c != '\n') {
                if (!ensure_token_capacity(t, pos)) {
                    return TOK_ERR_MEMORY_INSUFFICIENT;
                }
                t->token[pos++] = c;
                c = tokenize_next_char(t);
            };
            tokenize_ungetc(t, c);
            goto end_tok;
        }

        // If the '/' is not followed by a '*' or a '/', then treat it like an
        // operator
        t->token_type = LCM_TOK_OTHER;
        tokenize_ungetc(t, c);
        goto end_tok;
    }

    // otherwise, all tokens are alpha-numeric blobs
    do {
        if (!ensure_token_capacity(t, pos)) {
            return TOK_ERR_MEMORY_INSUFFICIENT;
        }

        t->token[pos++] = c;

        t->token_type = LCM_TOK_OTHER;

        if (strchr(single_char_toks,c)!=NULL)
            goto end_tok;

        c = tokenize_next_char(t);
        if (strchr(single_char_toks,c)!=NULL ||
                strchr(op_chars,c)!=NULL) {
            tokenize_ungetc(t, c);
            goto end_tok;
        }

    } while (!isspace(c) && c != EOF);

end_tok:
    t->token[pos] = 0;

    return pos;
}