Beispiel #1
0
static enum rules_token
lex(struct scanner *s, union lvalue *val)
{
skip_more_whitespace_and_comments:
    /* Skip spaces. */
    while (is_space(peek(s)))
        if (next(s) == '\n')
            return TOK_END_OF_LINE;

    /* Skip comments. */
    if (chr(s, '#')) {
        skip_to_eol(s);
        goto skip_more_whitespace_and_comments;
    }

    /* See if we're done. */
    if (eof(s)) return TOK_END_OF_FILE;

    /* New token. */
    s->token_line = s->line;
    s->token_column = s->column;
    s->buf_pos = 0;

    /* LHS Keysym. */
    if (chr(s, '<')) {
        while (peek(s) != '>' && !eol(s))
            buf_append(s, next(s));
        if (!chr(s, '>')) {
            scanner_err(s, "unterminated keysym literal");
            return TOK_ERROR;
        }
        if (!buf_append(s, '\0')) {
            scanner_err(s, "keysym literal is too long");
            return TOK_ERROR;
        }
        val->string.str = s->buf;
        val->string.len = s->buf_pos;
        return TOK_LHS_KEYSYM;
    }

    /* Colon. */
    if (chr(s, ':'))
        return TOK_COLON;
    if (chr(s, '!'))
        return TOK_BANG;
    if (chr(s, '~'))
        return TOK_TILDE;

    /* String literal. */
    if (chr(s, '\"')) {
        while (!eof(s) && !eol(s) && peek(s) != '\"') {
            if (chr(s, '\\')) {
                uint8_t o;
                if (chr(s, '\\')) {
                    buf_append(s, '\\');
                }
                else if (chr(s, '"')) {
                    buf_append(s, '"');
                }
                else if (chr(s, 'x') || chr(s, 'X')) {
                    if (hex(s, &o))
                        buf_append(s, (char) o);
                    else
                        scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
                }
                else if (oct(s, &o)) {
                    buf_append(s, (char) o);
                }
                else {
                    scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
                    /* Ignore. */
                }
            } else {
                buf_append(s, next(s));
            }
        }
        if (!chr(s, '\"')) {
            scanner_err(s, "unterminated string literal");
            return TOK_ERROR;
        }
        if (!buf_append(s, '\0')) {
            scanner_err(s, "string literal is too long");
            return TOK_ERROR;
        }
        if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
            scanner_err(s, "string literal is not a valid UTF-8 string");
            return TOK_ERROR;
        }
        val->string.str = s->buf;
        val->string.len = s->buf_pos;
        return TOK_STRING;
    }

    /* Identifier or include. */
    if (is_alpha(peek(s)) || peek(s) == '_') {
        s->buf_pos = 0;
        while (is_alnum(peek(s)) || peek(s) == '_')
            buf_append(s, next(s));
        if (!buf_append(s, '\0')) {
            scanner_err(s, "identifier is too long");
            return TOK_ERROR;
        }

        if (streq(s->buf, "include"))
            return TOK_INCLUDE;

        val->string.str = s->buf;
        val->string.len = s->buf_pos;
        return TOK_IDENT;
    }

    /* Discard rest of line. */
    skip_to_eol(s);

    scanner_err(s, "unrecognized token");
    return TOK_ERROR;
}
Beispiel #2
0
int
_xkbcommon_lex(YYSTYPE *yylval, struct scanner *s)
{
    int tok;

skip_more_whitespace_and_comments:
    /* Skip spaces. */
    while (is_space(peek(s))) next(s);

    /* Skip comments. */
    if (lit(s, "//") || chr(s, '#')) {
        while (!eof(s) && !eol(s)) next(s);
        goto skip_more_whitespace_and_comments;
    }

    /* See if we're done. */
    if (eof(s)) return END_OF_FILE;

    /* New token. */
    s->token_line = s->line;
    s->token_column = s->column;
    s->buf_pos = 0;

    /* String literal. */
    if (chr(s, '\"')) {
        while (!eof(s) && !eol(s) && peek(s) != '\"') {
            if (chr(s, '\\')) {
                uint8_t o;
                if      (chr(s, '\\')) buf_append(s, '\\');
                else if (chr(s, 'n'))  buf_append(s, '\n');
                else if (chr(s, 't'))  buf_append(s, '\t');
                else if (chr(s, 'r'))  buf_append(s, '\r');
                else if (chr(s, 'b'))  buf_append(s, '\b');
                else if (chr(s, 'f'))  buf_append(s, '\f');
                else if (chr(s, 'v'))  buf_append(s, '\v');
                else if (chr(s, 'e'))  buf_append(s, '\033');
                else if (oct(s, &o))   buf_append(s, (char) o);
                else {
                    scanner_warn(s, "unknown escape sequence in string literal");
                    /* Ignore. */
                }
            } else {
                buf_append(s, next(s));
            }
        }
        if (!buf_append(s, '\0') || !chr(s, '\"'))
            return scanner_error(s, "unterminated string literal");
        yylval->str = strdup(s->buf);
        if (!yylval->str)
            return scanner_error(s, "scanner out of memory");
        return STRING;
    }

    /* Key name literal. */
    if (chr(s, '<')) {
        while (is_graph(peek(s)) && peek(s) != '>')
            buf_append(s, next(s));
        if (!buf_append(s, '\0') || !chr(s, '>'))
            return scanner_error(s, "unterminated key name literal");
        /* Empty key name literals are allowed. */
        yylval->sval = xkb_atom_intern(s->ctx, s->buf, s->buf_pos - 1);
        return KEYNAME;
    }

    /* Operators and punctuation. */
    if (chr(s, ';')) return SEMI;
    if (chr(s, '{')) return OBRACE;
    if (chr(s, '}')) return CBRACE;
    if (chr(s, '=')) return EQUALS;
    if (chr(s, '[')) return OBRACKET;
    if (chr(s, ']')) return CBRACKET;
    if (chr(s, '(')) return OPAREN;
    if (chr(s, ')')) return CPAREN;
    if (chr(s, '.')) return DOT;
    if (chr(s, ',')) return COMMA;
    if (chr(s, '+')) return PLUS;
    if (chr(s, '-')) return MINUS;
    if (chr(s, '*')) return TIMES;
    if (chr(s, '/')) return DIVIDE;
    if (chr(s, '!')) return EXCLAM;
    if (chr(s, '~')) return INVERT;

    /* Identifier. */
    if (is_alpha(peek(s)) || peek(s) == '_') {
        s->buf_pos = 0;
        while (is_alnum(peek(s)) || peek(s) == '_')
            buf_append(s, next(s));
        if (!buf_append(s, '\0'))
            return scanner_error(s, "identifier too long");

        /* Keyword. */
        tok = keyword_to_token(s->buf);
        if (tok != -1) return tok;

        yylval->str = strdup(s->buf);
        if (!yylval->str)
            return scanner_error(s, "scanner out of memory");
        return IDENT;
    }

    /* Number literal (hexadecimal / decimal / float). */
    if (number(s, &yylval->num, &tok)) {
        if (tok == ERROR_TOK)
            return scanner_error(s, "malformed number literal");
        return tok;
    }

    return scanner_error(s, "unrecognized token");
}