예제 #1
0
파일: lexer.cpp 프로젝트: gfv/initd
void lexer::skip_whitespace()
{
    for (;;)
    {
        if (eof_char())
            break;

        if (!is_ascii_whitespace(peek_char()))
            break;

        advance_char();
    }
}
예제 #2
0
static void add_current_char() {
  update_quote_status();
  add_to_buffer(current_char());
  advance_char();
}
예제 #3
0
static void eat_whitespace() {
  if (in_quotes == false && current_char() == SPACE && next_char() == SPACE) {
    advance_char();
    eat_whitespace();
  }
}
예제 #4
0
 Lexer(std::istream &in) : input(in) {
   advance_char();
   advance();
 }
예제 #5
0
파일: lexer.cpp 프로젝트: gfv/initd
token_sp lexer::read_next_token()
{
    for (;;)
    {
        skip_whitespace();

        if (eof_char())
            return token_sp();

        char const* lex_start = pos;

        if (is_ascii_whitespace(peek_char()))
        {
            advance_char();
            while (!eof_char() && is_ascii_whitespace(peek_char()))
                advance_char();

            return make_unique<simple_token>(text_range(lex_start, pos), token_type::whitespace);
        }
        else if (is_single_line_comment_start())
        {
            advance_char(2);
            for (;;)
            {
                if (eof_char() || is_single_line_comment_end())
                {
                    advance_char();
                    break;
                }
                else
                    advance_char();
            }
        }
        else if (is_multi_line_comment_start())
        {
            advance_char(2);
            for (;;)
            {
                if (eof_char())
                {
                    error_sink->push(error_tag(text_range::make_empty(pos), "unterminated comment"));
                    break;
                }
                else if (is_multi_line_comment_end())
                {
                    advance_char(2);
                    break;
                }
                else
                    advance_char();
            }
        }
        else if (is_raw_string_literal_start())
        {
            advance_char(2);

            std::string prefix;
            std::string value;

            for (;;)
            {
                if (eof_char()
                 || peek_char() == ' '
                 || peek_char() == ')'
                 || peek_char() == '\t'
                 || peek_char() == '\v'
                 || peek_char() == '\r'
                 || peek_char() == '\n')
                {
                    text_range r(lex_start, pos);
                    error_sink->push(error_tag(r, "expected '(' in raw string literal"));
                    return make_unique<string_literal_token>(r, std::move(value));
                }
                else if (peek_char() == '(')
                {
                    advance_char();
                    break;
                }
                else
                {
                    prefix += peek_char();
                    advance_char();
                }
            }

            for (;;)
            {
                if (eof_char())
                {
                    text_range r(lex_start, pos);
                    error_sink->push(error_tag(r, "unterminated string"));
                    return make_unique<string_literal_token>(r, std::move(value));
                }
                else if (is_raw_string_literal_end(prefix))
                {
                    // as raw-string-literal-end should begins with ')' and std::equals is short-circuited,
                    // raw-string-literal should be lexed in linear time

                    advance_char(2 + prefix.size());

                    return make_unique<string_literal_token>(text_range(lex_start, pos), std::move(value));
                }
                else
                {
                    value += peek_char();
                    advance_char();
                }
            }
        }
        else if (is_identifier_start(peek_char()))
        {
            std::string s(1, peek_char());
            advance_char();
            while (!eof_char() && is_identifier_trail(peek_char()))
            {
                s += peek_char();
                advance_char();
            }

            return make_identifier_token(text_range(lex_start, pos), std::move(s));
        }
        else if (is_number(peek_char()))
        {
            int value = char_to_number(peek_char());
            advance_char();

            while (!eof_char() && is_number(peek_char()))
            {
                value = value * 10 + char_to_number(peek_char());
                advance_char();
            }

            return make_unique<integer_literal_token>(text_range(lex_start, pos), value);
        }
        else if (peek_char() == '\"')
        {
            advance_char();

            std::string value;

            for (;;)
            {
                if (eof_char() || peek_char() == '\n')
                {
                    text_range r(lex_start, pos);
                    error_sink->push(error_tag(r, "unterminated string"));
                    return make_unique<string_literal_token>(r, std::move(value));
                }
                else if (peek_char() == '\"')
                {
                    advance_char();
                    return make_unique<string_literal_token>(text_range(lex_start, pos), std::move(value));
                }
                else if (peek_char() == '\\')
                {
                    const char* escape_start = pos;
                    advance_char();
                    if (!eof_char())
                    {
                        switch (peek_char())
                        {
                        case 'a':
                            value += '\a';
                            break;
                        case 'b':
                            value += '\b';
                            break;
                        case 'f':
                            value += '\f';
                            break;
                        case 'n':
                            value += '\n';
                            break;
                        case 'r':
                            value += '\r';
                            break;
                        case 't':
                            value += '\t';
                            break;
                        case 'v':
                            value += '\v';
                            break;
                        case '\\':
                            value += '\\';
                            break;
                        case '\'':
                            value += '\'';
                            break;
                        case '\"':
                            value += '\"';
                            break;
                        default:
                            error_sink->push(error_tag(text_range(escape_start, pos + 1), "invalid escape character"));
                            value += '\\';
                            value += peek_char();
                            break;
                        }
                        advance_char();
                    }
                }
                else
                {
                    value += peek_char();
                    advance_char();
                }
            }
        }
        else if (peek_char() == '{')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::lbrace);
        }
        else if (peek_char() == '}')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::rbrace);
        }
        else if (peek_char() == '=')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::equals);
        }
        else if (peek_char() == ';')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::semicolon);
        }
        else if (peek_char() == ',')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::comma);
        }
        else if (peek_char() == ':')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::colon);
        }
        else
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::unknown);
        }
    }
}