static enum rules_token lex(struct scanner *s, union lvalue *val) { skip_more_whitespace_and_comments: /* Skip spaces. */ while (is_space(peek(s))) if (next(s) == '\n') return TOK_END_OF_LINE; /* Skip comments. */ if (chr(s, '#')) { skip_to_eol(s); goto skip_more_whitespace_and_comments; } /* See if we're done. */ if (eof(s)) return TOK_END_OF_FILE; /* New token. */ s->token_line = s->line; s->token_column = s->column; s->buf_pos = 0; /* LHS Keysym. */ if (chr(s, '<')) { while (peek(s) != '>' && !eol(s)) buf_append(s, next(s)); if (!chr(s, '>')) { scanner_err(s, "unterminated keysym literal"); return TOK_ERROR; } if (!buf_append(s, '\0')) { scanner_err(s, "keysym literal is too long"); return TOK_ERROR; } val->string.str = s->buf; val->string.len = s->buf_pos; return TOK_LHS_KEYSYM; } /* Colon. */ if (chr(s, ':')) return TOK_COLON; if (chr(s, '!')) return TOK_BANG; if (chr(s, '~')) return TOK_TILDE; /* String literal. */ if (chr(s, '\"')) { while (!eof(s) && !eol(s) && peek(s) != '\"') { if (chr(s, '\\')) { uint8_t o; if (chr(s, '\\')) { buf_append(s, '\\'); } else if (chr(s, '"')) { buf_append(s, '"'); } else if (chr(s, 'x') || chr(s, 'X')) { if (hex(s, &o)) buf_append(s, (char) o); else scanner_warn(s, "illegal hexadecimal escape sequence in string literal"); } else if (oct(s, &o)) { buf_append(s, (char) o); } else { scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s)); /* Ignore. */ } } else { buf_append(s, next(s)); } } if (!chr(s, '\"')) { scanner_err(s, "unterminated string literal"); return TOK_ERROR; } if (!buf_append(s, '\0')) { scanner_err(s, "string literal is too long"); return TOK_ERROR; } if (!is_valid_utf8(s->buf, s->buf_pos - 1)) { scanner_err(s, "string literal is not a valid UTF-8 string"); return TOK_ERROR; } val->string.str = s->buf; val->string.len = s->buf_pos; return TOK_STRING; } /* Identifier or include. */ if (is_alpha(peek(s)) || peek(s) == '_') { s->buf_pos = 0; while (is_alnum(peek(s)) || peek(s) == '_') buf_append(s, next(s)); if (!buf_append(s, '\0')) { scanner_err(s, "identifier is too long"); return TOK_ERROR; } if (streq(s->buf, "include")) return TOK_INCLUDE; val->string.str = s->buf; val->string.len = s->buf_pos; return TOK_IDENT; } /* Discard rest of line. */ skip_to_eol(s); scanner_err(s, "unrecognized token"); return TOK_ERROR; }
int _xkbcommon_lex(YYSTYPE *yylval, struct scanner *s) { int tok; skip_more_whitespace_and_comments: /* Skip spaces. */ while (is_space(peek(s))) next(s); /* Skip comments. */ if (lit(s, "//") || chr(s, '#')) { while (!eof(s) && !eol(s)) next(s); goto skip_more_whitespace_and_comments; } /* See if we're done. */ if (eof(s)) return END_OF_FILE; /* New token. */ s->token_line = s->line; s->token_column = s->column; s->buf_pos = 0; /* String literal. */ if (chr(s, '\"')) { while (!eof(s) && !eol(s) && peek(s) != '\"') { if (chr(s, '\\')) { uint8_t o; if (chr(s, '\\')) buf_append(s, '\\'); else if (chr(s, 'n')) buf_append(s, '\n'); else if (chr(s, 't')) buf_append(s, '\t'); else if (chr(s, 'r')) buf_append(s, '\r'); else if (chr(s, 'b')) buf_append(s, '\b'); else if (chr(s, 'f')) buf_append(s, '\f'); else if (chr(s, 'v')) buf_append(s, '\v'); else if (chr(s, 'e')) buf_append(s, '\033'); else if (oct(s, &o)) buf_append(s, (char) o); else { scanner_warn(s, "unknown escape sequence in string literal"); /* Ignore. */ } } else { buf_append(s, next(s)); } } if (!buf_append(s, '\0') || !chr(s, '\"')) return scanner_error(s, "unterminated string literal"); yylval->str = strdup(s->buf); if (!yylval->str) return scanner_error(s, "scanner out of memory"); return STRING; } /* Key name literal. */ if (chr(s, '<')) { while (is_graph(peek(s)) && peek(s) != '>') buf_append(s, next(s)); if (!buf_append(s, '\0') || !chr(s, '>')) return scanner_error(s, "unterminated key name literal"); /* Empty key name literals are allowed. */ yylval->sval = xkb_atom_intern(s->ctx, s->buf, s->buf_pos - 1); return KEYNAME; } /* Operators and punctuation. */ if (chr(s, ';')) return SEMI; if (chr(s, '{')) return OBRACE; if (chr(s, '}')) return CBRACE; if (chr(s, '=')) return EQUALS; if (chr(s, '[')) return OBRACKET; if (chr(s, ']')) return CBRACKET; if (chr(s, '(')) return OPAREN; if (chr(s, ')')) return CPAREN; if (chr(s, '.')) return DOT; if (chr(s, ',')) return COMMA; if (chr(s, '+')) return PLUS; if (chr(s, '-')) return MINUS; if (chr(s, '*')) return TIMES; if (chr(s, '/')) return DIVIDE; if (chr(s, '!')) return EXCLAM; if (chr(s, '~')) return INVERT; /* Identifier. */ if (is_alpha(peek(s)) || peek(s) == '_') { s->buf_pos = 0; while (is_alnum(peek(s)) || peek(s) == '_') buf_append(s, next(s)); if (!buf_append(s, '\0')) return scanner_error(s, "identifier too long"); /* Keyword. */ tok = keyword_to_token(s->buf); if (tok != -1) return tok; yylval->str = strdup(s->buf); if (!yylval->str) return scanner_error(s, "scanner out of memory"); return IDENT; } /* Number literal (hexadecimal / decimal / float). */ if (number(s, &yylval->num, &tok)) { if (tok == ERROR_TOK) return scanner_error(s, "malformed number literal"); return tok; } return scanner_error(s, "unrecognized token"); }