bool try_to_consume_keyword(TokenizeContext& context, int keyword) { const char* str = get_token_text(keyword); int str_len = (int) strlen(str); // Check if every letter matches for (int i=0; i < str_len; i++) { if (context.next(i) != str[i]) return false; } // Check that this is really the end of the word if (is_acceptable_inside_identifier(context.next(str_len))) return false; // Don't match as a keyword if the next character is (. This might be // a bad idea. if (context.next(str_len) == '(') return false; // Keyword matches, now consume it context.consume(keyword, str_len); return true; }
void consume_identifier(TokenizeContext &context) { int lookahead = 0; while (is_acceptable_inside_identifier(context.next(lookahead))) lookahead++; context.consume(tok_Identifier, lookahead); }
void consume_symbol(TokenizeContext &context) { int lookahead = 0; // consume the leading : lookahead++; while (is_acceptable_inside_identifier(context.next(lookahead))) lookahead++; context.consume(tok_ColonString, lookahead); }
void consume_name(TokenizeContext &context) { int lookahead = 0; // consume the leading : lookahead++; while (is_acceptable_inside_identifier(context.next(lookahead))) lookahead++; context.consume(TK_NAME, lookahead); }
void top_level_consume_token(TokenizeContext &context) { if (is_identifier_first_letter(context.next())) { if (context.next() <= 'm') { // a through m if (try_to_consume_keyword(context, tok_And)) return; if (try_to_consume_keyword(context, tok_Break)) return; if (try_to_consume_keyword(context, tok_Case)) return; if (try_to_consume_keyword(context, tok_Continue)) return; if (try_to_consume_keyword(context, tok_Def)) return; if (try_to_consume_keyword(context, tok_Discard)) return; if (try_to_consume_keyword(context, tok_Else)) return; if (try_to_consume_keyword(context, tok_Elif)) return; if (try_to_consume_keyword(context, tok_False)) return; if (try_to_consume_keyword(context, tok_For)) return; if (try_to_consume_keyword(context, tok_If)) return; if (try_to_consume_keyword(context, tok_In)) return; if (try_to_consume_keyword(context, tok_Import)) return; if (try_to_consume_keyword(context, tok_Include)) return; if (try_to_consume_keyword(context, tok_Let)) return; } else { // n through z if (try_to_consume_keyword(context, tok_Namespace)) return; if (try_to_consume_keyword(context, tok_Not)) return; if (try_to_consume_keyword(context, tok_Nil)) return; if (try_to_consume_keyword(context, tok_Or)) return; if (try_to_consume_keyword(context, tok_Return)) return; if (try_to_consume_keyword(context, tok_State)) return; if (try_to_consume_keyword(context, tok_Struct)) return; if (try_to_consume_keyword(context, tok_Switch)) return; if (try_to_consume_keyword(context, tok_True)) return; if (try_to_consume_keyword(context, tok_Require)) return; if (try_to_consume_keyword(context, tok_RequireLocal)) return; if (try_to_consume_keyword(context, tok_Package)) return; if (try_to_consume_keyword(context, tok_Section)) return; if (try_to_consume_keyword(context, tok_While)) return; } consume_identifier(context); return; } if (is_whitespace(context.next())) { consume_whitespace(context); return; } if (context.next() == '0' && context.next(1) == 'x') { consume_hex_number(context); return; } if (match_number(context)) { consume_number(context); return; } // Check for specific characters switch(context.next()) { case '(': context.consume(tok_LParen, 1); return; case ')': context.consume(tok_RParen, 1); return; case '{': if (context.next(1) == '-') { consume_multiline_comment(context); return; } context.consume(tok_LBrace, 1); return; case '}': context.consume(tok_RBrace, 1); return; case '[': context.consume(tok_LSquare, 1); return; case ']': context.consume(tok_RSquare, 1); return; case ',': context.consume(tok_Comma, 1); return; case '@': context.consume(tok_At, 1); return; case '=': if (context.next(1) == '=') { context.consume(tok_DoubleEquals, 2); return; } else if (context.next(1) == '>') { context.consume(tok_FatArrow, 2); return; } context.consume(tok_Equals, 1); return; case '"': case '\'': consume_string_literal(context); return; case '\n': context.consume(tok_Newline, 1); return; case '.': if (context.next(1) == '.') { if (context.next(2) == '.') { context.consume(tok_Ellipsis, 3); } else { context.consume(tok_TwoDots, 2); } } else if (context.next(1) == '@') { context.consume(tok_DotAt, 2); } else { context.consume(tok_Dot, 1); } return; case '?': context.consume(tok_Question, 1); return; case '*': if (context.next(1) == '=') { context.consume(tok_StarEquals, 2); return; } if (context.next(1) == '*') { context.consume(tok_DoubleStar, 2); return; } context.consume(tok_Star, 1); return; case '/': if (context.next(1) == '=') { context.consume(tok_SlashEquals, 2); return; } if (context.next(1) == '/') { context.consume(tok_DoubleSlash, 2); return; } context.consume(tok_Slash, 1); return; case '!': if (context.next(1) == '=') { context.consume(tok_NotEquals, 2); return; } break; case ':': if (context.next(1) == '=') { context.consume(tok_ColonEquals, 2); return; } else if (context.next(1) == ':') { context.consume(tok_DoubleColon, 2); return; } else if (is_acceptable_inside_identifier(context.next(1))) { return consume_symbol(context); } context.consume(tok_Colon, 1); return; case '+': if (context.next(1) == '=') { context.consume(tok_PlusEquals, 2); } else { context.consume(tok_Plus, 1); } return; case '-': if (context.next(1) == '>') { context.consume(tok_RightArrow, 2); return; } if (context.next(1) == '-') return consume_comment(context); if (context.next(1) == '=') { context.consume(tok_MinusEquals, 2); return; } context.consume(tok_Minus, 1); return; case '<': if (context.next(1) == '<' && context.next(2) == '<') { consume_triple_quoted_string_literal(context); return; } if (context.next(1) == '=') { context.consume(tok_LThanEq, 2); return; } if (context.next(1) == '-') { context.consume(tok_LeftArrow, 2); return; } context.consume(tok_LThan, 1); return; case '>': if (context.next(1) == '=') context.consume(tok_GThanEq, 2); else context.consume(tok_GThan, 1); return; case '%': context.consume(tok_Percent, 1); return; case '|': if (context.next(1) == '|') context.consume(tok_DoubleVerticalBar, 2); else context.consume(tok_VerticalBar, 1); return; case '&': if (context.next(1) == '&') context.consume(tok_DoubleAmpersand, 2); else context.consume(tok_Ampersand, 1); return; case ';': context.consume(tok_Semicolon, 1); return; case '#': consume_color_literal(context); return; } // Fall through, consume the next letter as UNRECOGNIZED context.consume(tok_Unrecognized, 1); }