void lexer::skip_whitespace() { for (;;) { if (eof_char()) break; if (!is_ascii_whitespace(peek_char())) break; advance_char(); } }
static void add_current_char() { update_quote_status(); add_to_buffer(current_char()); advance_char(); }
static void eat_whitespace() { if (in_quotes == false && current_char() == SPACE && next_char() == SPACE) { advance_char(); eat_whitespace(); } }
Lexer(std::istream &in) : input(in) { advance_char(); advance(); }
token_sp lexer::read_next_token() { for (;;) { skip_whitespace(); if (eof_char()) return token_sp(); char const* lex_start = pos; if (is_ascii_whitespace(peek_char())) { advance_char(); while (!eof_char() && is_ascii_whitespace(peek_char())) advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::whitespace); } else if (is_single_line_comment_start()) { advance_char(2); for (;;) { if (eof_char() || is_single_line_comment_end()) { advance_char(); break; } else advance_char(); } } else if (is_multi_line_comment_start()) { advance_char(2); for (;;) { if (eof_char()) { error_sink->push(error_tag(text_range::make_empty(pos), "unterminated comment")); break; } else if (is_multi_line_comment_end()) { advance_char(2); break; } else advance_char(); } } else if (is_raw_string_literal_start()) { advance_char(2); std::string prefix; std::string value; for (;;) { if (eof_char() || peek_char() == ' ' || peek_char() == ')' || peek_char() == '\t' || peek_char() == '\v' || peek_char() == '\r' || peek_char() == '\n') { text_range r(lex_start, pos); error_sink->push(error_tag(r, "expected '(' in raw string literal")); return make_unique<string_literal_token>(r, std::move(value)); } else if (peek_char() == '(') { advance_char(); break; } else { prefix += peek_char(); advance_char(); } } for (;;) { if (eof_char()) { text_range r(lex_start, pos); error_sink->push(error_tag(r, "unterminated string")); return make_unique<string_literal_token>(r, std::move(value)); } else if (is_raw_string_literal_end(prefix)) { // as raw-string-literal-end should begins with ')' and std::equals is short-circuited, // raw-string-literal should be lexed in linear time advance_char(2 + prefix.size()); return make_unique<string_literal_token>(text_range(lex_start, pos), std::move(value)); } else { value += peek_char(); advance_char(); } } } else if (is_identifier_start(peek_char())) { std::string s(1, peek_char()); advance_char(); while (!eof_char() && is_identifier_trail(peek_char())) { s += peek_char(); advance_char(); } return make_identifier_token(text_range(lex_start, pos), std::move(s)); } else if (is_number(peek_char())) { int value = char_to_number(peek_char()); advance_char(); while (!eof_char() && is_number(peek_char())) { value = value * 10 + char_to_number(peek_char()); advance_char(); } return make_unique<integer_literal_token>(text_range(lex_start, pos), value); } else if (peek_char() == '\"') { advance_char(); std::string value; for (;;) { if (eof_char() || peek_char() == '\n') { text_range r(lex_start, pos); error_sink->push(error_tag(r, "unterminated string")); return make_unique<string_literal_token>(r, std::move(value)); } else if (peek_char() == '\"') { advance_char(); return make_unique<string_literal_token>(text_range(lex_start, pos), std::move(value)); } else if (peek_char() == '\\') { const char* escape_start = pos; advance_char(); if (!eof_char()) { switch (peek_char()) { case 'a': value += '\a'; break; case 'b': value += '\b'; break; case 'f': value += '\f'; break; case 'n': value += '\n'; break; case 'r': value += '\r'; break; case 't': value += '\t'; break; case 'v': value += '\v'; break; case '\\': value += '\\'; break; case '\'': value += '\''; break; case '\"': value += '\"'; break; default: error_sink->push(error_tag(text_range(escape_start, pos + 1), "invalid escape character")); value += '\\'; value += peek_char(); break; } advance_char(); } } else { value += peek_char(); advance_char(); } } } else if (peek_char() == '{') { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::lbrace); } else if (peek_char() == '}') { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::rbrace); } else if (peek_char() == '=') { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::equals); } else if (peek_char() == ';') { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::semicolon); } else if (peek_char() == ',') { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::comma); } else if (peek_char() == ':') { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::colon); } else { advance_char(); return make_unique<simple_token>(text_range(lex_start, pos), token_type::unknown); } } }