Esempio n. 1
0
void consume_string_literal(TokenizeContext &context)
{
    int lookahead = 0;

    // Consume starting quote, this can be ' or "
    char quote_type = context.next();
    lookahead++;

    bool escapedNext = false;
    while (context.withinRange(lookahead)) {

        char c = context.next(lookahead);

        if (c == quote_type && !escapedNext)
            break;

        if (c == '\\' && !escapedNext)
            escapedNext = true;
        else
            escapedNext = false;

        lookahead++;
    }

    // consume ending quote
    lookahead++;

    context.consume(tok_String, lookahead);
}
Esempio n. 2
0
bool try_to_consume_keyword(TokenizeContext& context, int keyword)
{
    const char* str = get_token_text(keyword);
    int str_len = (int) strlen(str);

    // Check if every letter matches
    for (int i=0; i < str_len; i++) {
        if (context.next(i) != str[i])
            return false;
    }

    // Check that this is really the end of the word
    if (is_acceptable_inside_identifier(context.next(str_len)))
        return false;

    // Don't match as a keyword if the next character is (. This might be
    // a bad idea.
    if (context.next(str_len) == '(')
        return false;

    // Keyword matches, now consume it
    context.consume(keyword, str_len);

    return true;
}
Esempio n. 3
0
void consume_whitespace(TokenizeContext &context)
{
    int lookahead = 0;
    while (is_whitespace(context.next(lookahead)))
        lookahead++;

    context.consume(tok_Whitespace, lookahead);
}
Esempio n. 4
0
void consume_identifier(TokenizeContext &context)
{
    int lookahead = 0;
    while (is_acceptable_inside_identifier(context.next(lookahead)))
        lookahead++;

    context.consume(tok_Identifier, lookahead);
}
Esempio n. 5
0
void consume_whitespace(TokenizeContext &context)
{
    int lookahead = 0;
    while (is_whitespace(context.next(lookahead)))
        lookahead++;

    context.consume(TK_WHITESPACE, lookahead);
}
Esempio n. 6
0
void consume_comment(TokenizeContext& context)
{
    int lookahead = 0;
    while (context.withinRange(lookahead) && !is_newline(context.next(lookahead)))
        lookahead++;

    context.consume(tok_Comment, lookahead);
}
Esempio n. 7
0
void consume_name(TokenizeContext &context)
{
    int lookahead = 0;

    // consume the leading :
    lookahead++;

    while (is_acceptable_inside_identifier(context.next(lookahead)))
        lookahead++;

    context.consume(TK_NAME, lookahead);
}
Esempio n. 8
0
void consume_hex_number(TokenizeContext &context)
{
    int lookahead = 0;

    // consume the 0x part
    lookahead += 2;

    while (is_hexadecimal_digit(context.next(lookahead)))
        lookahead++;

    context.consume(TK_HEX_INTEGER, lookahead);
}
Esempio n. 9
0
void consume_symbol(TokenizeContext &context)
{
    int lookahead = 0;

    // consume the leading :
    lookahead++;

    while (is_acceptable_inside_identifier(context.next(lookahead)))
        lookahead++;

    context.consume(tok_ColonString, lookahead);
}
Esempio n. 10
0
void consume_hex_number(TokenizeContext &context)
{
    int lookahead = 0;

    // consume the 0x part
    lookahead += 2;

    while (is_hexadecimal_digit(context.next(lookahead)))
        lookahead++;

    context.consume(tok_HexInteger, lookahead);
}
Esempio n. 11
0
bool match_number(TokenizeContext &context)
{
    int lookahead = 0;

    if (context.next(lookahead) == '.')
        lookahead++;

    if (is_number(context.next(lookahead)))
        return true;

    return false;
}
Esempio n. 12
0
void consume_triple_quoted_string_literal(TokenizeContext &context)
{
    int lookahead = 0;

    // Consume initial <<<
    lookahead += 3;

    while (context.withinRange(lookahead) &&
            !(context.next(lookahead) == '>'
                && context.next(lookahead + 1) == '>'
                && context.next(lookahead + 2) == '>'))
        lookahead++;

    // Consume closing >>>
    lookahead += 3;
    context.consume(tok_String, lookahead);
}
Esempio n. 13
0
void consume_color_literal(TokenizeContext &context)
{
    int lookahead = 0;

    // consume #
    lookahead++;

    while (is_hexadecimal_digit(context.next(lookahead)))
        lookahead++;

    int hex_digits = lookahead - 1;

    // acceptable lengths are 3, 4, 6 or 8 characters (not including #)
    if (hex_digits == 3 || hex_digits == 4 || hex_digits == 6 || hex_digits == 8)
        context.consume(tok_Color, lookahead);
    else
        context.consume(tok_Unrecognized, lookahead);
}
Esempio n. 14
0
void consume_multiline_comment(TokenizeContext& context)
{
    int lookahead = 0;
    
    // Keep track of the current depth, for nested blocks.
    int depth = 0;

    while (context.withinRange(lookahead)) {
        if (context.next(lookahead) == '{' && context.next(lookahead + 1) == '-') {

            // Found a comment opener, increase depth. Also advance lookahead so that
            // we don't get confused by this: {-}
            lookahead += 2;
            depth++;
            continue;
        }

        if (context.next(lookahead) == '-' && context.next(lookahead + 1) == '}') {

            // Found a comment ender.
            depth--;
            lookahead += 2;

            if (depth == 0)
                break;

            continue;
        }

        lookahead++;
    }

    context.consume(tok_Comment, lookahead);
}
Esempio n. 15
0
void consume_number(TokenizeContext &context)
{
    int lookahead = 0;
    bool dot_encountered = false;

    // Possibly consume minus sign
    if (context.next(lookahead) == '-') {
        lookahead++;
    }

    while (true) {
        if (is_number(context.next(lookahead))) {
            lookahead++;
        } else if (context.next(lookahead) == '.') {
            // If we've already encountered a dot, finish and don't consume
            // this one.
            if (dot_encountered)
                break;

            // Special case: if this dot is followed by another dot, then it should
            // be tokenized as TWO_DOTS, so don't consume it here.
            if (context.next(lookahead+1) == '.')
                break;

            // Another special case, if the dot is followed by an identifier, then
            // don't consume it here. It might be an object call.
            if (is_identifier_first_letter(context.next(lookahead + 1)))
                break;

            // Otherwise, consume the dot
            lookahead++;
            dot_encountered = true;
        }
        else {
            break;
        }
    }

    if (dot_encountered)
        context.consume(tok_Float, lookahead);
    else
        context.consume(tok_Integer, lookahead);
}
Esempio n. 16
0
void top_level_consume_token(TokenizeContext &context)
{
    if (is_identifier_first_letter(context.next())) {

        if (try_to_consume_keyword(context, TK_DEF)) return;
        if (try_to_consume_keyword(context, TK_TYPE)) return;
        if (try_to_consume_keyword(context, TK_BEGIN)) return;
        if (try_to_consume_keyword(context, TK_END)) return;
        if (try_to_consume_keyword(context, TK_IF)) return;
        if (try_to_consume_keyword(context, TK_ELSE)) return;
        if (try_to_consume_keyword(context, TK_ELIF)) return;
        if (try_to_consume_keyword(context, TK_FOR)) return;
        if (try_to_consume_keyword(context, TK_STATE)) return;
        if (try_to_consume_keyword(context, TK_IN)) return;
        if (try_to_consume_keyword(context, TK_TRUE)) return;
        if (try_to_consume_keyword(context, TK_FALSE)) return;
        // check 'do once' before 'do'
        if (try_to_consume_keyword(context, TK_DO_ONCE)) return;
        if (try_to_consume_keyword(context, TK_DO)) return;
        if (try_to_consume_keyword(context, TK_NAMESPACE)) return;
        if (try_to_consume_keyword(context, TK_INCLUDE)) return;
        if (try_to_consume_keyword(context, TK_IMPORT)) return;
        if (try_to_consume_keyword(context, TK_AND)) return;
        if (try_to_consume_keyword(context, TK_OR)) return;
        if (try_to_consume_keyword(context, TK_DISCARD)) return;
        if (try_to_consume_keyword(context, TK_NULL)) return;
        if (try_to_consume_keyword(context, TK_RETURN)) return;
        if (try_to_consume_keyword(context, TK_BREAK)) return;
        if (try_to_consume_keyword(context, TK_CONTINUE)) return;
        if (try_to_consume_keyword(context, TK_SWITCH)) return;
        if (try_to_consume_keyword(context, TK_CASE)) return;
        if (try_to_consume_keyword(context, TK_WHILE)) return;

        consume_identifier(context);
        return;
    }

    if (is_whitespace(context.next())) {
        consume_whitespace(context);
        return;
    }

    if (context.next() == '0'
            && context.next(1) == 'x') {
        consume_hex_number(context);
        return;
    }

    if (match_number(context)) {
        consume_number(context);
        return;
    }

    // Check for specific characters
    switch(context.next()) {
    case '(':
        context.consume(TK_LPAREN, 1);
        return;
    case ')':
        context.consume(TK_RPAREN, 1);
        return;
    case '{':
        if (context.next(1) == '-') {
            consume_multiline_comment(context);
            return;
        }
        context.consume(TK_LBRACE, 1);
        return;
    case '}':
        context.consume(TK_RBRACE, 1);
        return;
    case '[':
        context.consume(TK_LBRACKET, 1);
        return;
    case ']':
        context.consume(TK_RBRACKET, 1);
        return;
    case ',':
        context.consume(TK_COMMA, 1);
        return;
    case '@':
        if (context.next(1) == '.') {
            context.consume(TK_AT_DOT, 2);
        } else {
            context.consume(TK_AT_SIGN, 1);
        }
        return;
    case '=':
        if (context.next(1) == '=') {
            context.consume(TK_DOUBLE_EQUALS, 2);
            return;
        }

        context.consume(TK_EQUALS, 1);
        return;
    case '"':
    case '\'':
        consume_string_literal(context);
        return;
    case '\n':
        context.consume(TK_NEWLINE, 1);
        return;
    case '.':
        if (context.next(1) == '.') {
            if (context.next(2) == '.') {
                context.consume(TK_ELLIPSIS, 3);
            } else {
                context.consume(TK_TWO_DOTS, 2);
            }
        } else {
            context.consume(TK_DOT, 1);
        }
        return;
    case '?':
        context.consume(TK_QUESTION, 1);
        return;
    case '*':
        if (context.next(1) == '=') {
            context.consume(TK_STAR_EQUALS, 2);
            return;
        }

        context.consume(TK_STAR, 1);
        return;
    case '/':
        if (context.next(1) == '=') {
            context.consume(TK_SLASH_EQUALS, 2);
            return;
        }
        if (context.next(1) == '/') {
            context.consume(TK_DOUBLE_SLASH, 2);
            return;
        }
        context.consume(TK_SLASH, 1);
        return;
    case '!':
        if (context.next(1) == '=') {
            context.consume(TK_NOT_EQUALS, 2);
            return;
        }
        break;

    case ':':
        if (context.next(1) == '=') {
            context.consume(TK_COLON_EQUALS, 2);
            return;
        }
        else if (context.next(1) == ':') {
            context.consume(TK_DOUBLE_COLON, 2);
            return;
        } else if (is_identifier_first_letter(context.next(1))) {
            return consume_name(context);
        }

        context.consume(TK_COLON, 1);
        return;
    case '+':
        if (context.next(1) == '=') {
            context.consume(TK_PLUS_EQUALS, 2);
        } else {
            context.consume(TK_PLUS, 1);
        }
        return;
    case '-':
        if (context.next(1) == '>') {
            context.consume(TK_RIGHT_ARROW, 2);
            return;
        }

        if (context.next(1) == '-')
            return consume_comment(context);

        if (context.next(1) == '=') {
            context.consume(TK_MINUS_EQUALS, 2);
            return;
        }

        context.consume(TK_MINUS, 1);
        return;

    case '<':
        if (context.next(1) == '<' && context.next(2) == '<') {
            consume_triple_quoted_string_literal(context);
            return;
        }

        if (context.next(1) == '=') {
            context.consume(TK_LTHANEQ, 2);
            return;
        }
        if (context.next(1) == '-') {
            context.consume(TK_LEFT_ARROW, 2);
            return;
        }
        context.consume(TK_LTHAN, 1);
        return;

    case '>':
        if (context.next(1) == '=') {
            context.consume(TK_GTHANEQ, 2);
            return;
        }
        context.consume(TK_GTHAN, 1);
        return;

    case '%':
        context.consume(TK_PERCENT, 1);
        return;

    case '|':
        if (context.next(1) == '|') {
            context.consume(TK_DOUBLE_VERTICAL_BAR, 2);
            return;
        }
        break;

    case '&':
        if (context.next(1) == '&') {
            context.consume(TK_DOUBLE_AMPERSAND, 2);
            return;
        }

        context.consume(TK_AMPERSAND, 1);
        return;

    case ';':
        context.consume(TK_SEMICOLON, 1);
        return;

    case '#':
        consume_color_literal(context);
        return;
    }

    // Fall through, consume the next letter as UNRECOGNIZED
    context.consume(TK_UNRECOGNIZED, 1);
}
Esempio n. 17
0
void top_level_consume_token(TokenizeContext &context)
{
    if (is_identifier_first_letter(context.next())) {

        if (context.next() <= 'm') {
            // a through m
            if (try_to_consume_keyword(context, tok_And)) return;
            if (try_to_consume_keyword(context, tok_Break)) return;
            if (try_to_consume_keyword(context, tok_Case)) return;
            if (try_to_consume_keyword(context, tok_Continue)) return;
            if (try_to_consume_keyword(context, tok_Def)) return;
            if (try_to_consume_keyword(context, tok_Discard)) return;
            if (try_to_consume_keyword(context, tok_Else)) return;
            if (try_to_consume_keyword(context, tok_Elif)) return;
            if (try_to_consume_keyword(context, tok_False)) return;
            if (try_to_consume_keyword(context, tok_For)) return;
            if (try_to_consume_keyword(context, tok_If)) return;
            if (try_to_consume_keyword(context, tok_In)) return;
            if (try_to_consume_keyword(context, tok_Import)) return;
            if (try_to_consume_keyword(context, tok_Include)) return;
            if (try_to_consume_keyword(context, tok_Let)) return;
        } else {
            // n through z
            if (try_to_consume_keyword(context, tok_Namespace)) return;
            if (try_to_consume_keyword(context, tok_Not)) return;
            if (try_to_consume_keyword(context, tok_Nil)) return;
            if (try_to_consume_keyword(context, tok_Or)) return;
            if (try_to_consume_keyword(context, tok_Return)) return;
            if (try_to_consume_keyword(context, tok_State)) return;
            if (try_to_consume_keyword(context, tok_Struct)) return;
            if (try_to_consume_keyword(context, tok_Switch)) return;
            if (try_to_consume_keyword(context, tok_True)) return;
            if (try_to_consume_keyword(context, tok_Require)) return;
            if (try_to_consume_keyword(context, tok_RequireLocal)) return;
            if (try_to_consume_keyword(context, tok_Package)) return;
            if (try_to_consume_keyword(context, tok_Section)) return;
            if (try_to_consume_keyword(context, tok_While)) return;
        }

        consume_identifier(context);
        return;
    }

    if (is_whitespace(context.next())) {
        consume_whitespace(context);
        return;
    }

    if (context.next() == '0'
        && context.next(1) == 'x') {
        consume_hex_number(context);
        return;
    }

    if (match_number(context)) {
        consume_number(context);
        return;
    }

    // Check for specific characters
    switch(context.next()) {
        case '(':
            context.consume(tok_LParen, 1);
            return;
        case ')':
            context.consume(tok_RParen, 1);
            return;
        case '{':
            if (context.next(1) == '-') {
                consume_multiline_comment(context);
                return;
            }
            context.consume(tok_LBrace, 1);
            return;
        case '}':
            context.consume(tok_RBrace, 1);
            return;
        case '[':
            context.consume(tok_LSquare, 1);
            return;
        case ']':
            context.consume(tok_RSquare, 1);
            return;
        case ',':
            context.consume(tok_Comma, 1);
            return;
        case '@':
            context.consume(tok_At, 1);
            return;
        case '=':
            if (context.next(1) == '=') {
                context.consume(tok_DoubleEquals, 2);
                return;
            }  else if (context.next(1) == '>') {
                context.consume(tok_FatArrow, 2);
                return;
            }

            context.consume(tok_Equals, 1);
            return;
        case '"':
        case '\'':
            consume_string_literal(context);
            return;
        case '\n':
            context.consume(tok_Newline, 1);
            return;
        case '.':
            if (context.next(1) == '.') {
                if (context.next(2) == '.') {
                    context.consume(tok_Ellipsis, 3); 
                } else {
                    context.consume(tok_TwoDots, 2);
                }
            } else if (context.next(1) == '@') {
                context.consume(tok_DotAt, 2);
            } else {
                context.consume(tok_Dot, 1);
            }
            return;
        case '?':
            context.consume(tok_Question, 1);
            return;
        case '*':
            if (context.next(1) == '=') {
                context.consume(tok_StarEquals, 2);
                return;
            }
            if (context.next(1) == '*') {
                context.consume(tok_DoubleStar, 2);
                return;
            }

            context.consume(tok_Star, 1);
            return;
        case '/':
            if (context.next(1) == '=') {
                context.consume(tok_SlashEquals, 2);
                return;
            }
            if (context.next(1) == '/') {
                context.consume(tok_DoubleSlash, 2);
                return;
            }
            context.consume(tok_Slash, 1);
            return;
        case '!':
            if (context.next(1) == '=') {
                context.consume(tok_NotEquals, 2);
                return;
            }
            break;

        case ':':
            if (context.next(1) == '=') {
                context.consume(tok_ColonEquals, 2);
                return;
            }
            else if (context.next(1) == ':') {
                context.consume(tok_DoubleColon, 2);
                return;
            } else if (is_acceptable_inside_identifier(context.next(1))) {
                return consume_symbol(context);
            }

            context.consume(tok_Colon, 1);
            return;
        case '+':
            if (context.next(1) == '=') {
                context.consume(tok_PlusEquals, 2);
            } else {
                context.consume(tok_Plus, 1);
            }
            return;
        case '-':
            if (context.next(1) == '>') {
                context.consume(tok_RightArrow, 2);
                return;
            }

            if (context.next(1) == '-')
                return consume_comment(context);

            if (context.next(1) == '=') {
                context.consume(tok_MinusEquals, 2);
                return;
            }

            context.consume(tok_Minus, 1);
            return;

        case '<':
            if (context.next(1) == '<' && context.next(2) == '<') {
                consume_triple_quoted_string_literal(context);
                return;
            }

            if (context.next(1) == '=') {
                context.consume(tok_LThanEq, 2);
                return;
            }
            if (context.next(1) == '-') {
                context.consume(tok_LeftArrow, 2);
                return;
            }
            context.consume(tok_LThan, 1);
            return;

        case '>':
            if (context.next(1) == '=')
                context.consume(tok_GThanEq, 2);
            else
                context.consume(tok_GThan, 1);
            return;

        case '%':
            context.consume(tok_Percent, 1);
            return;

        case '|':
            if (context.next(1) == '|')
                context.consume(tok_DoubleVerticalBar, 2);
            else
                context.consume(tok_VerticalBar, 1);
            return;

        case '&':
            if (context.next(1) == '&')
                context.consume(tok_DoubleAmpersand, 2);
            else
                context.consume(tok_Ampersand, 1);
            return;

        case ';':
            context.consume(tok_Semicolon, 1);
            return;

        case '#':
            consume_color_literal(context);
            return;
    }

    // Fall through, consume the next letter as UNRECOGNIZED
    context.consume(tok_Unrecognized, 1);
}