void consume_multiline_comment(TokenizeContext& context) { int lookahead = 0; // Keep track of the current depth, for nested blocks. int depth = 0; while (context.withinRange(lookahead)) { if (context.next(lookahead) == '{' && context.next(lookahead + 1) == '-') { // Found a comment opener, increase depth. Also advance lookahead so that // we don't get confused by this: {-} lookahead += 2; depth++; continue; } if (context.next(lookahead) == '-' && context.next(lookahead + 1) == '}') { // Found a comment ender. depth--; lookahead += 2; if (depth == 0) break; continue; } lookahead++; } context.consume(tok_Comment, lookahead); }
bool try_to_consume_keyword(TokenizeContext& context, int keyword) { const char* str = get_token_text(keyword); int str_len = (int) strlen(str); // Check if every letter matches for (int i=0; i < str_len; i++) { if (context.next(i) != str[i]) return false; } // Check that this is really the end of the word if (is_acceptable_inside_identifier(context.next(str_len))) return false; // Don't match as a keyword if the next character is (. This might be // a bad idea. if (context.next(str_len) == '(') return false; // Keyword matches, now consume it context.consume(keyword, str_len); return true; }
void consume_string_literal(TokenizeContext &context) { int lookahead = 0; // Consume starting quote, this can be ' or " char quote_type = context.next(); lookahead++; bool escapedNext = false; while (context.withinRange(lookahead)) { char c = context.next(lookahead); if (c == quote_type && !escapedNext) break; if (c == '\\' && !escapedNext) escapedNext = true; else escapedNext = false; lookahead++; } // consume ending quote lookahead++; context.consume(tok_String, lookahead); }
bool match_number(TokenizeContext &context) { int lookahead = 0; if (context.next(lookahead) == '.') lookahead++; if (is_number(context.next(lookahead))) return true; return false; }
void consume_whitespace(TokenizeContext &context) { int lookahead = 0; while (is_whitespace(context.next(lookahead))) lookahead++; context.consume(TK_WHITESPACE, lookahead); }
void consume_whitespace(TokenizeContext &context) { int lookahead = 0; while (is_whitespace(context.next(lookahead))) lookahead++; context.consume(tok_Whitespace, lookahead); }
void consume_identifier(TokenizeContext &context) { int lookahead = 0; while (is_acceptable_inside_identifier(context.next(lookahead))) lookahead++; context.consume(tok_Identifier, lookahead); }
void consume_triple_quoted_string_literal(TokenizeContext &context) { int lookahead = 0; // Consume initial <<< lookahead += 3; while (context.withinRange(lookahead) && !(context.next(lookahead) == '>' && context.next(lookahead + 1) == '>' && context.next(lookahead + 2) == '>')) lookahead++; // Consume closing >>> lookahead += 3; context.consume(tok_String, lookahead); }
void consume_comment(TokenizeContext& context) { int lookahead = 0; while (context.withinRange(lookahead) && !is_newline(context.next(lookahead))) lookahead++; context.consume(tok_Comment, lookahead); }
void consume_number(TokenizeContext &context) { int lookahead = 0; bool dot_encountered = false; // Possibly consume minus sign if (context.next(lookahead) == '-') { lookahead++; } while (true) { if (is_number(context.next(lookahead))) { lookahead++; } else if (context.next(lookahead) == '.') { // If we've already encountered a dot, finish and don't consume // this one. if (dot_encountered) break; // Special case: if this dot is followed by another dot, then it should // be tokenized as TWO_DOTS, so don't consume it here. if (context.next(lookahead+1) == '.') break; // Another special case, if the dot is followed by an identifier, then // don't consume it here. It might be an object call. if (is_identifier_first_letter(context.next(lookahead + 1))) break; // Otherwise, consume the dot lookahead++; dot_encountered = true; } else { break; } } if (dot_encountered) context.consume(tok_Float, lookahead); else context.consume(tok_Integer, lookahead); }
void consume_hex_number(TokenizeContext &context) { int lookahead = 0; // consume the 0x part lookahead += 2; while (is_hexadecimal_digit(context.next(lookahead))) lookahead++; context.consume(tok_HexInteger, lookahead); }
void consume_name(TokenizeContext &context) { int lookahead = 0; // consume the leading : lookahead++; while (is_acceptable_inside_identifier(context.next(lookahead))) lookahead++; context.consume(TK_NAME, lookahead); }
void consume_hex_number(TokenizeContext &context) { int lookahead = 0; // consume the 0x part lookahead += 2; while (is_hexadecimal_digit(context.next(lookahead))) lookahead++; context.consume(TK_HEX_INTEGER, lookahead); }
void consume_symbol(TokenizeContext &context) { int lookahead = 0; // consume the leading : lookahead++; while (is_acceptable_inside_identifier(context.next(lookahead))) lookahead++; context.consume(tok_ColonString, lookahead); }
void consume_color_literal(TokenizeContext &context) { int lookahead = 0; // consume # lookahead++; while (is_hexadecimal_digit(context.next(lookahead))) lookahead++; int hex_digits = lookahead - 1; // acceptable lengths are 3, 4, 6 or 8 characters (not including #) if (hex_digits == 3 || hex_digits == 4 || hex_digits == 6 || hex_digits == 8) context.consume(tok_Color, lookahead); else context.consume(tok_Unrecognized, lookahead); }
void top_level_consume_token(TokenizeContext &context) { if (is_identifier_first_letter(context.next())) { if (try_to_consume_keyword(context, TK_DEF)) return; if (try_to_consume_keyword(context, TK_TYPE)) return; if (try_to_consume_keyword(context, TK_BEGIN)) return; if (try_to_consume_keyword(context, TK_END)) return; if (try_to_consume_keyword(context, TK_IF)) return; if (try_to_consume_keyword(context, TK_ELSE)) return; if (try_to_consume_keyword(context, TK_ELIF)) return; if (try_to_consume_keyword(context, TK_FOR)) return; if (try_to_consume_keyword(context, TK_STATE)) return; if (try_to_consume_keyword(context, TK_IN)) return; if (try_to_consume_keyword(context, TK_TRUE)) return; if (try_to_consume_keyword(context, TK_FALSE)) return; // check 'do once' before 'do' if (try_to_consume_keyword(context, TK_DO_ONCE)) return; if (try_to_consume_keyword(context, TK_DO)) return; if (try_to_consume_keyword(context, TK_NAMESPACE)) return; if (try_to_consume_keyword(context, TK_INCLUDE)) return; if (try_to_consume_keyword(context, TK_IMPORT)) return; if (try_to_consume_keyword(context, TK_AND)) return; if (try_to_consume_keyword(context, TK_OR)) return; if (try_to_consume_keyword(context, TK_DISCARD)) return; if (try_to_consume_keyword(context, TK_NULL)) return; if (try_to_consume_keyword(context, TK_RETURN)) return; if (try_to_consume_keyword(context, TK_BREAK)) return; if (try_to_consume_keyword(context, TK_CONTINUE)) return; if (try_to_consume_keyword(context, TK_SWITCH)) return; if (try_to_consume_keyword(context, TK_CASE)) return; if (try_to_consume_keyword(context, TK_WHILE)) return; consume_identifier(context); return; } if (is_whitespace(context.next())) { consume_whitespace(context); return; } if (context.next() == '0' && context.next(1) == 'x') { consume_hex_number(context); return; } if (match_number(context)) { consume_number(context); return; } // Check for specific characters switch(context.next()) { case '(': context.consume(TK_LPAREN, 1); return; case ')': context.consume(TK_RPAREN, 1); return; case '{': if (context.next(1) == '-') { consume_multiline_comment(context); return; } context.consume(TK_LBRACE, 1); return; case '}': context.consume(TK_RBRACE, 1); return; case '[': context.consume(TK_LBRACKET, 1); return; case ']': context.consume(TK_RBRACKET, 1); return; case ',': context.consume(TK_COMMA, 1); return; case '@': if (context.next(1) == '.') { context.consume(TK_AT_DOT, 2); } else { context.consume(TK_AT_SIGN, 1); } return; case '=': if (context.next(1) == '=') { context.consume(TK_DOUBLE_EQUALS, 2); return; } context.consume(TK_EQUALS, 1); return; case '"': case '\'': consume_string_literal(context); return; case '\n': context.consume(TK_NEWLINE, 1); return; case '.': if (context.next(1) == '.') { if (context.next(2) == '.') { context.consume(TK_ELLIPSIS, 3); } else { context.consume(TK_TWO_DOTS, 2); } } else { context.consume(TK_DOT, 1); } return; case '?': context.consume(TK_QUESTION, 1); return; case '*': if (context.next(1) == '=') { context.consume(TK_STAR_EQUALS, 2); return; } context.consume(TK_STAR, 1); return; case '/': if (context.next(1) == '=') { context.consume(TK_SLASH_EQUALS, 2); return; } if (context.next(1) == '/') { context.consume(TK_DOUBLE_SLASH, 2); return; } context.consume(TK_SLASH, 1); return; case '!': if (context.next(1) == '=') { context.consume(TK_NOT_EQUALS, 2); return; } break; case ':': if (context.next(1) == '=') { context.consume(TK_COLON_EQUALS, 2); return; } else if (context.next(1) == ':') { context.consume(TK_DOUBLE_COLON, 2); return; } else if (is_identifier_first_letter(context.next(1))) { return consume_name(context); } context.consume(TK_COLON, 1); return; case '+': if (context.next(1) == '=') { context.consume(TK_PLUS_EQUALS, 2); } else { context.consume(TK_PLUS, 1); } return; case '-': if (context.next(1) == '>') { context.consume(TK_RIGHT_ARROW, 2); return; } if (context.next(1) == '-') return consume_comment(context); if (context.next(1) == '=') { context.consume(TK_MINUS_EQUALS, 2); return; } context.consume(TK_MINUS, 1); return; case '<': if (context.next(1) == '<' && context.next(2) == '<') { consume_triple_quoted_string_literal(context); return; } if (context.next(1) == '=') { context.consume(TK_LTHANEQ, 2); return; } if (context.next(1) == '-') { context.consume(TK_LEFT_ARROW, 2); return; } context.consume(TK_LTHAN, 1); return; case '>': if (context.next(1) == '=') { context.consume(TK_GTHANEQ, 2); return; } context.consume(TK_GTHAN, 1); return; case '%': context.consume(TK_PERCENT, 1); return; case '|': if (context.next(1) == '|') { context.consume(TK_DOUBLE_VERTICAL_BAR, 2); return; } break; case '&': if (context.next(1) == '&') { context.consume(TK_DOUBLE_AMPERSAND, 2); return; } context.consume(TK_AMPERSAND, 1); return; case ';': context.consume(TK_SEMICOLON, 1); return; case '#': consume_color_literal(context); return; } // Fall through, consume the next letter as UNRECOGNIZED context.consume(TK_UNRECOGNIZED, 1); }
void top_level_consume_token(TokenizeContext &context) { if (is_identifier_first_letter(context.next())) { if (context.next() <= 'm') { // a through m if (try_to_consume_keyword(context, tok_And)) return; if (try_to_consume_keyword(context, tok_Break)) return; if (try_to_consume_keyword(context, tok_Case)) return; if (try_to_consume_keyword(context, tok_Continue)) return; if (try_to_consume_keyword(context, tok_Def)) return; if (try_to_consume_keyword(context, tok_Discard)) return; if (try_to_consume_keyword(context, tok_Else)) return; if (try_to_consume_keyword(context, tok_Elif)) return; if (try_to_consume_keyword(context, tok_False)) return; if (try_to_consume_keyword(context, tok_For)) return; if (try_to_consume_keyword(context, tok_If)) return; if (try_to_consume_keyword(context, tok_In)) return; if (try_to_consume_keyword(context, tok_Import)) return; if (try_to_consume_keyword(context, tok_Include)) return; if (try_to_consume_keyword(context, tok_Let)) return; } else { // n through z if (try_to_consume_keyword(context, tok_Namespace)) return; if (try_to_consume_keyword(context, tok_Not)) return; if (try_to_consume_keyword(context, tok_Nil)) return; if (try_to_consume_keyword(context, tok_Or)) return; if (try_to_consume_keyword(context, tok_Return)) return; if (try_to_consume_keyword(context, tok_State)) return; if (try_to_consume_keyword(context, tok_Struct)) return; if (try_to_consume_keyword(context, tok_Switch)) return; if (try_to_consume_keyword(context, tok_True)) return; if (try_to_consume_keyword(context, tok_Require)) return; if (try_to_consume_keyword(context, tok_RequireLocal)) return; if (try_to_consume_keyword(context, tok_Package)) return; if (try_to_consume_keyword(context, tok_Section)) return; if (try_to_consume_keyword(context, tok_While)) return; } consume_identifier(context); return; } if (is_whitespace(context.next())) { consume_whitespace(context); return; } if (context.next() == '0' && context.next(1) == 'x') { consume_hex_number(context); return; } if (match_number(context)) { consume_number(context); return; } // Check for specific characters switch(context.next()) { case '(': context.consume(tok_LParen, 1); return; case ')': context.consume(tok_RParen, 1); return; case '{': if (context.next(1) == '-') { consume_multiline_comment(context); return; } context.consume(tok_LBrace, 1); return; case '}': context.consume(tok_RBrace, 1); return; case '[': context.consume(tok_LSquare, 1); return; case ']': context.consume(tok_RSquare, 1); return; case ',': context.consume(tok_Comma, 1); return; case '@': context.consume(tok_At, 1); return; case '=': if (context.next(1) == '=') { context.consume(tok_DoubleEquals, 2); return; } else if (context.next(1) == '>') { context.consume(tok_FatArrow, 2); return; } context.consume(tok_Equals, 1); return; case '"': case '\'': consume_string_literal(context); return; case '\n': context.consume(tok_Newline, 1); return; case '.': if (context.next(1) == '.') { if (context.next(2) == '.') { context.consume(tok_Ellipsis, 3); } else { context.consume(tok_TwoDots, 2); } } else if (context.next(1) == '@') { context.consume(tok_DotAt, 2); } else { context.consume(tok_Dot, 1); } return; case '?': context.consume(tok_Question, 1); return; case '*': if (context.next(1) == '=') { context.consume(tok_StarEquals, 2); return; } if (context.next(1) == '*') { context.consume(tok_DoubleStar, 2); return; } context.consume(tok_Star, 1); return; case '/': if (context.next(1) == '=') { context.consume(tok_SlashEquals, 2); return; } if (context.next(1) == '/') { context.consume(tok_DoubleSlash, 2); return; } context.consume(tok_Slash, 1); return; case '!': if (context.next(1) == '=') { context.consume(tok_NotEquals, 2); return; } break; case ':': if (context.next(1) == '=') { context.consume(tok_ColonEquals, 2); return; } else if (context.next(1) == ':') { context.consume(tok_DoubleColon, 2); return; } else if (is_acceptable_inside_identifier(context.next(1))) { return consume_symbol(context); } context.consume(tok_Colon, 1); return; case '+': if (context.next(1) == '=') { context.consume(tok_PlusEquals, 2); } else { context.consume(tok_Plus, 1); } return; case '-': if (context.next(1) == '>') { context.consume(tok_RightArrow, 2); return; } if (context.next(1) == '-') return consume_comment(context); if (context.next(1) == '=') { context.consume(tok_MinusEquals, 2); return; } context.consume(tok_Minus, 1); return; case '<': if (context.next(1) == '<' && context.next(2) == '<') { consume_triple_quoted_string_literal(context); return; } if (context.next(1) == '=') { context.consume(tok_LThanEq, 2); return; } if (context.next(1) == '-') { context.consume(tok_LeftArrow, 2); return; } context.consume(tok_LThan, 1); return; case '>': if (context.next(1) == '=') context.consume(tok_GThanEq, 2); else context.consume(tok_GThan, 1); return; case '%': context.consume(tok_Percent, 1); return; case '|': if (context.next(1) == '|') context.consume(tok_DoubleVerticalBar, 2); else context.consume(tok_VerticalBar, 1); return; case '&': if (context.next(1) == '&') context.consume(tok_DoubleAmpersand, 2); else context.consume(tok_Ampersand, 1); return; case ';': context.consume(tok_Semicolon, 1); return; case '#': consume_color_literal(context); return; } // Fall through, consume the next letter as UNRECOGNIZED context.consume(tok_Unrecognized, 1); }