Ejemplo n.º 1
0
std::string lex_number(const char *&c, const std::string &filename, const Location &begin)
{
    // This function should be understood with reference to the linked image:
    // http://www.json.org/number.gif

    // Note, we deviate from the json.org documentation as follows:
    // There is no reason to lex negative numbers as atomic tokens, it is better to parse them
    // as a unary operator combined with a numeric literal.  This avoids x-1 being tokenized as
    // <identifier> <number> instead of the intended <identifier> <binop> <number>.

    enum State {
        BEGIN,
        AFTER_ZERO,
        AFTER_ONE_TO_NINE,
        AFTER_DOT,
        AFTER_DIGIT,
        AFTER_E,
        AFTER_EXP_SIGN,
        AFTER_EXP_DIGIT
    } state;

    std::string r;

    state = BEGIN;
    while (true) {
        switch (state) {
            case BEGIN:
            switch (*c) {
                case '0':
                state = AFTER_ZERO;
                break;

                case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_ONE_TO_NINE;
                break;

                default:
                throw StaticError(filename, begin, "Couldn't lex number");
            }
            break;

            case AFTER_ZERO:
            switch (*c) {
                case '.':
                state = AFTER_DOT;
                break;

                case 'e': case 'E':
                state = AFTER_E;
                break;

                default:
                goto end;
            }
            break;

            case AFTER_ONE_TO_NINE:
            switch (*c) {
                case '.':
                state = AFTER_DOT;
                break;

                case 'e': case 'E':
                state = AFTER_E;
                break;

                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_ONE_TO_NINE;
                break;

                default:
                goto end;
            }
            break;

            case AFTER_DOT:
            switch (*c) {
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_DIGIT;
                break;

                default: {
                    std::stringstream ss;
                    ss << "Couldn't lex number, junk after decimal point: " << *c;
                    throw StaticError(filename, begin, ss.str());
                }
            }
            break;

            case AFTER_DIGIT:
            switch (*c) {
                case 'e': case 'E':
                state = AFTER_E;
                break;

                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_DIGIT;
                break;

                default:
                goto end;
            }
            break;

            case AFTER_E:
            switch (*c) {
                case '+': case '-':
                state = AFTER_EXP_SIGN;
                break;

                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_EXP_DIGIT;
                break;

                default: {
                    std::stringstream ss;
                    ss << "Couldn't lex number, junk after 'E': " << *c;
                    throw StaticError(filename, begin, ss.str());
                }
            }
            break;

            case AFTER_EXP_SIGN:
            switch (*c) {
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_EXP_DIGIT;
                break;

                default: {
                    std::stringstream ss;
                    ss << "Couldn't lex number, junk after exponent sign: " << *c;
                    throw StaticError(filename, begin, ss.str());
                }
            }
            break;

            case AFTER_EXP_DIGIT:
            switch (*c) {
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                state = AFTER_EXP_DIGIT;
                break;

                default:
                goto end;
            }
            break;
        }
        r += *c;
        c++;
    }
    end:
    c--;
    return r;
}
Ejemplo n.º 2
0
std::list<Token> jsonnet_lex(const std::string &filename, const char *input)
{
    unsigned long line_number = 1;
    const char *line_start = input;

    std::list<Token> r;

    const char *c = input;

    for ( ; *c!='\0' ; ++c) {
        Location begin(line_number, c - line_start + 1);
        Token::Kind kind;
        std::string data;

        switch (*c) {

            // Skip non-\n whitespace
            case ' ': case '\t': case '\r':
            continue;

            // Skip \n and maintain line numbers
            case '\n':
            line_number++;
            line_start = c+1;
            continue;

            case '{':
            kind = Token::BRACE_L;
            break;

            case '}':
            kind = Token::BRACE_R;
            break;

            case '[':
            kind = Token::BRACKET_L;
            break;

            case ']':
            kind = Token::BRACKET_R;
            break;

            case ':':
            kind = Token::COLON;
            break;

            case ',':
            kind = Token::COMMA;
            break;

            case '$':
            kind = Token::DOLLAR;
            break;

            case '.':
            kind = Token::DOT;
            break;

            case '(':
            kind = Token::PAREN_L;
            break;

            case ')':
            kind = Token::PAREN_R;
            break;

            case ';':
            kind = Token::SEMICOLON;
            break;

            // Special cases for unary operators.
            case '!':
            kind = Token::OPERATOR;
            if (*(c+1) == '=') {
                c++;
                data = "!=";
            } else {
                data = "!";
            }
            break;

            case '~':
            kind = Token::OPERATOR;
            data = "~";
            break;

            case '+':
            kind = Token::OPERATOR;
            data = "+";

            break;
            case '-':
            kind = Token::OPERATOR;
            data = "-";
            break;

            // Numeric literals.
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            kind = Token::NUMBER;
            data = lex_number(c, filename, begin);
            break;

            // String literals.
            case '"': {
                c++;
                for (; ; ++c) {
                    if (*c == '\0') {
                        throw StaticError(filename, begin, "Unterminated string");
                    }
                    if (*c == '"') {
                        break;
                    }
                    switch (*c) {
                        case '\\':
                        switch (*(++c)) {
                            case '"':
                            data += *c;
                            break;

                            case '\\':
                            data += *c;
                            break;

                            case '/':
                            data += *c;
                            break;

                            case 'b':
                            data += '\b';
                            break;

                            case 'f':
                            data += '\f';
                            break;

                            case 'n':
                            data += '\n';
                            break;

                            case 'r':
                            data += '\r';
                            break;

                            case 't':
                            data += '\t';
                            break;

                            case 'u': {
                                ++c;  // Consume the 'u'.
                                unsigned long codepoint = 0;
                                // Expect 4 hex digits.
                                for (unsigned i=0 ; i<4 ; ++i) {
                                    auto x = (unsigned char)(c[i]);
                                    unsigned digit;
                                    if (x == '\0') {
                                        auto msg = "Unterminated string";
                                        throw StaticError(filename, begin, msg);
                                    } else if (x == '"') {
                                        auto msg = "Truncated unicode escape sequence in "
                                                   "string literal.";
                                        throw StaticError(filename, begin, msg);
                                    } else if (x >= '0' && x <= '9') {
                                        digit = x - '0';
                                    } else if (x >= 'a' && x <= 'f') {
                                        digit = x - 'a' + 10;
                                    } else if (x >= 'A' && x <= 'F') {
                                        digit = x - 'A' + 10;
                                    } else {
                                        std::stringstream ss;
                                        ss << "Malformed unicode escape character, "
                                           << "should be hex: '" << x << "'";
                                        throw StaticError(filename, begin, ss.str());
                                    }
                                    codepoint *= 16;
                                    codepoint += digit;
                                }

                                encode_utf8(codepoint, data);

                                // Leave us on the last char, ready for the ++c at
                                // the outer for loop.
                                c += 3;
                            }
                            break;

                            case '\0': {
                                auto msg = "Truncated escape sequence in string literal.";
                                throw StaticError(filename, begin, msg);
                            }

                            default: {
                                std::stringstream ss;
                                ss << "Unknown escape sequence in string literal: '" << *c << "'";
                                throw StaticError(filename, begin, ss.str());
                            }
                        }
                        break;

                        // Treat as a regular letter, but maintain line/column counters.
                        case '\n':
                        line_number++;
                        line_start = c+1;
                        data += *c;
                        break;

                        default:
                        // Just a regular letter.
                        data += *c;
                    }
                }
                kind = Token::STRING;
            }
            break;

            // Keywords
            default:
            if (is_identifier_first(*c)) {
                std::string id;
                for (; *c != '\0' ; ++c) {
                    if (!is_identifier(*c)) {
                        break;
                    }
                    id += *c;
                }
                --c;
                if (id == "assert") {
                    kind = Token::ASSERT;
                } else if (id == "else") {
                    kind = Token::ELSE;
                } else if (id == "error") {
                    kind = Token::ERROR;
                } else if (id == "false") {
                    kind = Token::FALSE;
                } else if (id == "for") {
                    kind = Token::FOR;
                } else if (id == "function") {
                    kind = Token::FUNCTION;
                } else if (id == "if") {
                    kind = Token::IF;
                } else if (id == "import") {
                    kind = Token::IMPORT;
                } else if (id == "importstr") {
                    kind = Token::IMPORTSTR;
                } else if (id == "in") {
                    kind = Token::IN;
                } else if (id == "local") {
                    kind = Token::LOCAL;
                } else if (id == "null") {
                    kind = Token::NULL_LIT;
                } else if (id == "self") {
                    kind = Token::SELF;
                } else if (id == "super") {
                    kind = Token::SUPER;
                } else if (id == "tailstrict") {
                    kind = Token::TAILSTRICT;
                } else if (id == "then") {
                    kind = Token::THEN;
                } else if (id == "true") {
                    kind = Token::TRUE;
                } else {
                    // Not a keyword, must be an identifier.
                    kind = Token::IDENTIFIER;
                    data = id;
                }
            } else if (is_symbol(*c)) {

                // Single line C++ style comment
                if (*c == '/' && *(c+1) == '/') {
                    while (*c != '\0' && *c != '\n') {
                        ++c;
                    }
                    // Leaving it on the \n allows processing of \n on next iteration,
                    // i.e. managing of the line & column counter.
                    c--;
                    continue;
                }

                // Single line # comment
                if (*c == '#') {
                    while (*c != '\0' && *c != '\n') {
                        ++c;
                    }
                    // Leaving it on the \n allows processing of \n on next iteration,
                    // i.e. managing of the line & column counter.
                    c--;
                    continue;
                }

                // Multi-line comment.
                if (*c == '/' && *(c+1) == '*') {
                    c += 2;  // Avoid matching /*/: skip the /* before starting the search for */.
                    while (*c != '\0' && !(*c == '*' && *(c+1) == '/')) {
                        if (*c == '\n') {
                            // Just keep track of the line / column counters.
                            line_number++;
                            line_start = c+1;
                        }
                        ++c;
                    }
                    if (*c == '\0') {
                        auto msg = "Multi-line comment has no terminating */.";
                        throw StaticError(filename, begin, msg);
                    }
                    // Leave the counter on the closing /.
                    c++;
                    continue;
                }
                // Text block
                if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') {
                    std::stringstream block;
                    c += 4; // Skip the "|||\n"
                    line_number++;
                    line_start = c;
                    const char *first_line = c;
                    int ws_chars = whitespace_check(first_line, c);
                    if (ws_chars == 0) {
                        auto msg = "Text block's first line must start with whitespace.";
                        throw StaticError(filename, begin, msg);
                    }
                    while (true) {
                        assert(ws_chars > 0);
                        // Read up to the \n
                        for (c = &c[ws_chars]; *c != '\n' ; ++c) {
                            if (*c == '\0')
                                throw StaticError(filename, begin, "Unexpected EOF");
                            block << *c;
                        }
                        // Add the \n
                        block << '\n';
                        ++c;
                        line_number++;
                        line_start = c;
                        // Examine next line
                        ws_chars = whitespace_check(first_line, c);
                        if (ws_chars == 0) {
                            // End of text block
                            // Skip over any whitespace
                            while (*c == ' ' || *c == '\t') ++c;
                            // Expect |||
                            if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) {
                                auto msg = "Text block not terminated with |||";
                                throw StaticError(filename, begin, msg);
                            }
                            c += 2;  // Leave on the last |
                            data = block.str();
                            kind = Token::STRING;
                            break;
                        }
                    }

                    break;  // Out of the switch.
                }

                for (; *c != '\0' ; ++c) {
                    if (!is_symbol(*c)) {
                        break;
                    }
                    data += *c;
                }
                --c;
                kind = Token::OPERATOR;
            } else {
                std::stringstream ss;
                ss << "Could not lex the character ";
                auto uc = (unsigned char)(*c);
                if (*c < 32)
                    ss << "code " << unsigned(uc);
                else
                    ss << "'" << *c << "'";
                throw StaticError(filename, begin, ss.str());
            }
            break;
        }

        Location end(line_number, c - line_start + 1);
        r.push_back(Token(kind, data, LocationRange(filename, begin, end)));
    }

    Location end(line_number, c - line_start + 1);
    r.push_back(Token(Token::END_OF_FILE, "", LocationRange(filename, end, end)));
    return r;
}
Ejemplo n.º 3
0
/** Statically analyse the given ast.
 *
 * \param ast_ The AST.
 * \param in_object Whether or not ast_ is within the lexical scope of an object AST.
 * \param vars The variables defined within lexical scope of ast_.
 * \returns The free variables in ast_.
 */
static IdSet static_analysis(AST *ast_, bool in_object, const IdSet &vars)
{
    IdSet r;

    if (auto *ast = dynamic_cast<const Apply*>(ast_)) {
        append(r, static_analysis(ast->target, in_object, vars));
        for (AST *arg : ast->arguments)
            append(r, static_analysis(arg, in_object, vars));

    } else if (auto *ast = dynamic_cast<const Array*>(ast_)) {
        for (AST *el : ast->elements)
            append(r, static_analysis(el, in_object, vars));

    } else if (auto *ast = dynamic_cast<const Binary*>(ast_)) {
        append(r, static_analysis(ast->left, in_object, vars));
        append(r, static_analysis(ast->right, in_object, vars));

    } else if (dynamic_cast<const BuiltinFunction*>(ast_)) {
        // Nothing to do.

    } else if (auto *ast = dynamic_cast<const Conditional*>(ast_)) {
        append(r, static_analysis(ast->cond, in_object, vars));
        append(r, static_analysis(ast->branchTrue, in_object, vars));
        append(r, static_analysis(ast->branchFalse, in_object, vars));

    } else if (auto *ast = dynamic_cast<const Error*>(ast_)) {
        return static_analysis(ast->expr, in_object, vars);

    } else if (auto *ast = dynamic_cast<const Function*>(ast_)) {
        auto new_vars = vars;
        IdSet params;
        for (auto *p : ast->parameters) {
            if (params.find(p) != params.end()) {
                throw StaticError(ast_->location, "Duplicate function parameter: " + p->name);
            }
            params.insert(p);
            new_vars.insert(p);
        }
        auto fv = static_analysis(ast->body, in_object, new_vars);
        for (auto *p : ast->parameters)
            fv.erase(p);
        append(r, fv);

    } else if (dynamic_cast<const Import*>(ast_)) {
        // Nothing to do.

    } else if (dynamic_cast<const Importstr*>(ast_)) {
        // Nothing to do.

    } else if (auto *ast = dynamic_cast<const Index*>(ast_)) {
        append(r, static_analysis(ast->target, in_object, vars));
        append(r, static_analysis(ast->index, in_object, vars));

    } else if (auto *ast = dynamic_cast<const Local*>(ast_)) {
        IdSet ast_vars;
        for (const auto &bind: ast->binds) {
            ast_vars.insert(bind.first);
        }
        auto new_vars = vars;
        append(new_vars, ast_vars);
        IdSet fvs;
        for (const auto &bind: ast->binds)
            append(fvs, static_analysis(bind.second, in_object, new_vars));

        append(fvs, static_analysis(ast->body, in_object, new_vars));

        for (const auto &bind: ast->binds)
            fvs.erase(bind.first);

        append(r, fvs);

    } else if (dynamic_cast<const LiteralBoolean*>(ast_)) {
        // Nothing to do.

    } else if (dynamic_cast<const LiteralNumber*>(ast_)) {
        // Nothing to do.

    } else if (dynamic_cast<const LiteralString*>(ast_)) {
        // Nothing to do.

    } else if (dynamic_cast<const LiteralNull*>(ast_)) {
        // Nothing to do.

    } else if (auto *ast = dynamic_cast<Object*>(ast_)) {
        for (auto field : ast->fields) {
            append(r, static_analysis(field.name, in_object, vars));
            append(r, static_analysis(field.body, true, vars));
        }

    } else if (auto *ast = dynamic_cast<ObjectComposition*>(ast_)) {
        auto new_vars = vars;
        new_vars.insert(ast->id);
        append(r, static_analysis(ast->field, false, new_vars));
        append(r, static_analysis(ast->value, true, new_vars));
        r.erase(ast->id);
        append(r, static_analysis(ast->array, in_object, vars));

    } else if (dynamic_cast<const Self*>(ast_)) {
        if (!in_object)
            throw StaticError(ast_->location, "Can't use self outside of an object.");

    } else if (dynamic_cast<const Super*>(ast_)) {
        if (!in_object)
            throw StaticError(ast_->location, "Can't use super outside of an object.");

    } else if (auto *ast = dynamic_cast<const Unary*>(ast_)) {
        append(r, static_analysis(ast->expr, in_object, vars));

    } else if (auto *ast = dynamic_cast<const Var*>(ast_)) {
        if (vars.find(ast->id) == vars.end()) {
            throw StaticError(ast->location, "Unknown variable: "+ast->id->name);
        }
        r.insert(ast->id);

    } else {
        std::cerr << "INTERNAL ERROR: Unknown AST: " << ast_ << std::endl;
        std::abort();

    }

    for (auto *id : r)
        ast_->freeVariables.push_back(id);

    return r;
}
Ejemplo n.º 4
0
String jsonnet_string_unescape(const LocationRange &loc, const String &s)
{
    String r;
    const char32_t *s_ptr = s.c_str();
    for (const char32_t *c = s_ptr; *c != U'\0' ; ++c) {
        switch (*c) {
            case '\\':
            switch (*(++c)) {
                case '"':
                case '\'':
                r += *c;
                break;

                case '\\':
                r += *c;
                break;

                case '/':
                r += *c;
                break;

                case 'b':
                r += '\b';
                break;

                case 'f':
                r += '\f';
                break;

                case 'n':
                r += '\n';
                break;

                case 'r':
                r += '\r';
                break;

                case 't':
                r += '\t';
                break;

                case 'u': {
                    ++c;  // Consume the 'u'.
                    unsigned long codepoint = 0;
                    // Expect 4 hex digits.
                    for (unsigned i=0 ; i<4 ; ++i) {
                        auto x = (unsigned char)(c[i]);
                        unsigned digit;
                        if (x == '\0') {
                            auto msg = "Truncated unicode escape sequence in string literal.";
                            throw StaticError(loc, msg);
                        } else if (x >= '0' && x <= '9') {
                            digit = x - '0';
                        } else if (x >= 'a' && x <= 'f') {
                            digit = x - 'a' + 10;
                        } else if (x >= 'A' && x <= 'F') {
                            digit = x - 'A' + 10;
                        } else {
                            std::stringstream ss;
                            ss << "Malformed unicode escape character, "
                               << "should be hex: '" << x << "'";
                            throw StaticError(loc, ss.str());
                        }
                        codepoint *= 16;
                        codepoint += digit;
                    }

                    r += codepoint;

                    // Leave us on the last char, ready for the ++c at
                    // the outer for loop.
                    c += 3;
                }
                break;

                case '\0': {
                    auto msg = "Truncated escape sequence in string literal.";
                    throw StaticError(loc, msg);
                }

                default: {
                    std::stringstream ss;
                    std::string utf8;
                    encode_utf8(*c, utf8);
                    ss << "Unknown escape sequence in string literal: '" << utf8 << "'";
                    throw StaticError(loc, ss.str());
                }
            }
            break;

            default:
            // Just a regular letter.
            r += *c;
        }
    }
    return r;
}
Ejemplo n.º 5
0
    void desugar(AST *&ast_, unsigned obj_level)
    {
        if (auto *ast = dynamic_cast<Apply*>(ast_)) {
            desugar(ast->target, obj_level);
            for (Apply::Arg &arg : ast->args)
                desugar(arg.expr, obj_level);

        } else if (auto *ast = dynamic_cast<ApplyBrace*>(ast_)) {
            desugar(ast->left, obj_level);
            desugar(ast->right, obj_level);
            ast_ = alloc->make<Binary>(ast->location, ast->openFodder,
                                       ast->left, EF, BOP_PLUS, ast->right);

        } else if (auto *ast = dynamic_cast<Array*>(ast_)) {
            for (auto &el : ast->elements)
                desugar(el.expr, obj_level);

        } else if (auto *ast = dynamic_cast<ArrayComprehension*>(ast_)) {
            for (ComprehensionSpec &spec : ast->specs)
                desugar(spec.expr, obj_level);
            desugar(ast->body, obj_level + 1);

            int n = ast->specs.size();
            AST *zero = make<LiteralNumber>(E, EF, "0.0");
            AST *one = make<LiteralNumber>(E, EF, "1.0");
            auto *_r = id(U"$r");
            auto *_l = id(U"$l");
            std::vector<const Identifier*> _i(n);
            for (int i = 0; i < n ; ++i) {
                StringStream ss;
                ss << U"$i_" << i;
                _i[i] = id(ss.str());
            }
            std::vector<const Identifier*> _aux(n);
            for (int i = 0; i < n ; ++i) {
                StringStream ss;
                ss << U"$aux_" << i;
                _aux[i] = id(ss.str());
            }

            // Build it from the inside out.  We keep wrapping 'in' with more ASTs.
            assert(ast->specs[0].kind == ComprehensionSpec::FOR);

            int last_for = n - 1;
            while (ast->specs[last_for].kind != ComprehensionSpec::FOR)
                last_for--;
            // $aux_{last_for}($i_{last_for} + 1, $r + [body])
            AST *in = make<Apply>(
                ast->body->location,
                EF,
                var(_aux[last_for]),
                EF,
                Apply::Args {
                    { make<Binary>(E, EF, var(_i[last_for]), EF, BOP_PLUS, one), EF},
                    { make<Binary>(E, EF, var(_r), EF, BOP_PLUS, singleton(ast->body)), EF}
                },
                false,  // trailingComma
                EF,
                EF,
                true  // tailstrict
            );
            for (int i = n - 1; i >= 0 ; --i) {
                const ComprehensionSpec &spec = ast->specs[i];
                AST *out;
                if (i > 0) {
                    int prev_for = i - 1;
                    while (ast->specs[prev_for].kind != ComprehensionSpec::FOR)
                        prev_for--;

                    // aux_{prev_for}($i_{prev_for} + 1, $r)
                    out = make<Apply>(  // False branch.
                        E,
                        EF,
                        var(_aux[prev_for]),
                        EF,
                        Apply::Args {
                            { make<Binary>(E, EF, var(_i[prev_for]), EF, BOP_PLUS, one), EF, },
                            { var(_r), EF, }
                        },
                        false, // trailingComma
                        EF,
                        EF,
                        true  // tailstrict
                    );
                } else {
                    out = var(_r);
                }
                switch (spec.kind) {
                    case ComprehensionSpec::IF: {
                        /*
                            if [[[...cond...]]] then
                                [[[...in...]]]
                            else
                                [[[...out...]]]
                        */
                        in = make<Conditional>(
                            ast->location,
                            EF,
                            spec.expr,
                            EF,
                            in,  // True branch.
                            EF,
                            out);  // False branch.
                    } break;
                    case ComprehensionSpec::FOR: {
                        /*
                            local $l = [[[...array...]]]
                                  aux_{i}(i_{i}, r) =
                                if i_{i} >= std.length($l) then
                                    [[[...out...]]]
                                else
                                    local [[[...var...]]] = $l[i_{i}];
                                    [[[...in...]]];`
                            if std.type($l) != "array" then
                                error "In comprehension, can only iterate over array.."
                            else
                                aux_{i}(0, r) tailstrict;
                        */
                        in = make<Local>(
                            ast->location,
                            EF,
                            Local::Binds {
                                bind(_l, spec.expr),  // Need to check expr is an array
                                bind(_aux[i], make<Function>(
                                    ast->location,
                                    EF,
                                    EF,
                                    std::vector<Param>{Param(EF, _i[i], EF), Param(EF, _r, EF)},
                                    false,  // trailingComma
                                    EF,
                                    make<Conditional>(
                                        ast->location,
                                        EF,
                                        make<Binary>(
                                            E, EF, var(_i[i]), EF, BOP_GREATER_EQ, length(var(_l))),
                                        EF,
                                        out,
                                        EF,
                                        make<Local>(
                                            ast->location,
                                            EF,
                                            singleBind(
                                                spec.var,
                                                make<Index>(E, EF, var(_l), EF, false, var(_i[i]),
                                                            EF, nullptr, EF, nullptr, EF)
                                            ),
                                            in)
                                    )
                                ))},
                            make<Conditional>(
                                ast->location,
                                EF,
                                equals(ast->location, type(var(_l)), str(U"array")),
                                EF,
                                make<Apply>(
                                    E,
                                    EF,
                                    var(_aux[i]),
                                    EF,
                                    Apply::Args {
                                        {zero, EF},
                                        {
                                            i == 0
                                            ? make<Array>(E, EF, Array::Elements{}, false, EF)
                                            : static_cast<AST*>(var(_r)),
                                            EF,
                                        }
                                    },
                                    false,  // trailingComma
                                    EF,
                                    EF,
                                    true),  // tailstrict
                                EF,
                                error(ast->location,
                                      U"In comprehension, can only iterate over array.")));
                    } break;
                }
            }

            ast_ = in;

        } else if (auto *ast = dynamic_cast<Assert*>(ast_)) {
            desugar(ast->cond, obj_level);
            if (ast->message == nullptr) {
                ast->message = str(U"Assertion failed.");
            }
            desugar(ast->message, obj_level);
            desugar(ast->rest, obj_level);

            // if cond then rest else error msg
            AST *branch_false = alloc->make<Error>(ast->location, EF, ast->message);
            ast_ = alloc->make<Conditional>(ast->location, ast->openFodder,
                                            ast->cond, EF, ast->rest, EF, branch_false);

        } else if (auto *ast = dynamic_cast<Binary*>(ast_)) {
            desugar(ast->left, obj_level);
            desugar(ast->right, obj_level);

            bool invert = false;

            switch (ast->op) {
                case BOP_PERCENT: {
                    AST *f_mod = alloc->make<Index>(E, EF, std(), EF, false, str(U"mod"), EF,
                                                    nullptr, EF, nullptr, EF);
                    Apply::Args args = {{ast->left, EF}, {ast->right, EF}};
                    ast_ = alloc->make<Apply>(ast->location, ast->openFodder, f_mod, EF, args,
                                              false, EF, EF, false);
                } break;

                case BOP_MANIFEST_UNEQUAL:
                invert = true;
                case BOP_MANIFEST_EQUAL: {
                    ast_ = equals(ast->location, ast->left, ast->right);
                    if (invert)
                        ast_ = alloc->make<Unary>(ast->location, ast->openFodder, UOP_NOT, ast_);
                }
                break;

                default:;
                // Otherwise don't change it.
            }

        } else if (dynamic_cast<const BuiltinFunction*>(ast_)) {
            // Nothing to do.

        } else if (auto *ast = dynamic_cast<Conditional*>(ast_)) {
            desugar(ast->cond, obj_level);
            desugar(ast->branchTrue, obj_level);
            if (ast->branchFalse == nullptr)
                ast->branchFalse = alloc->make<LiteralNull>(LocationRange(), EF);
            desugar(ast->branchFalse, obj_level);

        } else if (auto *ast = dynamic_cast<Dollar*>(ast_)) {
            if (obj_level == 0) {
                throw StaticError(ast->location, "No top-level object found.");
            }
            ast_ = alloc->make<Var>(ast->location, EF, alloc->makeIdentifier(U"$"));

        } else if (auto *ast = dynamic_cast<Error*>(ast_)) {
            desugar(ast->expr, obj_level);

        } else if (auto *ast = dynamic_cast<Function*>(ast_)) {
            desugar(ast->body, obj_level);

        } else if (dynamic_cast<const Import*>(ast_)) {
            // Nothing to do.

        } else if (dynamic_cast<const Importstr*>(ast_)) {
            // Nothing to do.

        } else if (auto *ast = dynamic_cast<Index*>(ast_)) {
            desugar(ast->target, obj_level);
            if (ast->isSlice) {
                if (ast->index == nullptr)
                    ast->index = make<LiteralNull>(ast->location, EF);
                desugar(ast->index, obj_level);

                if (ast->end == nullptr)
                    ast->end = make<LiteralNull>(ast->location, EF);
                desugar(ast->end, obj_level);

                if (ast->step == nullptr)
                    ast->step = make<LiteralNull>(ast->location, EF);
                desugar(ast->step, obj_level);

                ast_ = make<Apply>(
                    ast->location,
                    EF,
                    make<Index>(
                        E, EF, std(), EF, false, str(U"slice"), EF, nullptr, EF, nullptr, EF),
                    EF,
                    std::vector<Apply::Arg>{
                        {ast->target, EF},
                        {ast->index, EF},
                        {ast->end, EF},
                        {ast->step, EF},
                    },
                    false,  // trailing comma
                    EF,
                    EF,
                    false  // tailstrict
                );
            } else {
                if (ast->id != nullptr) {
                    assert(ast->index == nullptr);
                    ast->index = str(ast->id->name);
                    ast->id = nullptr;
                }
                desugar(ast->index, obj_level);
            }

        } else if (auto *ast = dynamic_cast<Local*>(ast_)) {
            for (auto &bind: ast->binds)
                desugar(bind.body, obj_level);
            desugar(ast->body, obj_level);

            for (auto &bind: ast->binds) {
                if (bind.functionSugar) {
                    bind.body = alloc->make<Function>(
                        ast->location, ast->openFodder, bind.parenLeftFodder, bind.params, false,
                        bind.parenRightFodder, bind.body);
                    bind.functionSugar = false;
                    bind.params.clear();
                }
            }

        } else if (dynamic_cast<const LiteralBoolean*>(ast_)) {
            // Nothing to do.

        } else if (dynamic_cast<const LiteralNumber*>(ast_)) {
            // Nothing to do.

        } else if (auto *ast = dynamic_cast<LiteralString*>(ast_)) {
            if (ast->tokenKind != LiteralString::BLOCK) {
                ast->value = jsonnet_string_unescape(ast->location, ast->value);
            }
            ast->tokenKind = LiteralString::DOUBLE;
            ast->blockIndent.clear();

        } else if (dynamic_cast<const LiteralNull*>(ast_)) {
            // Nothing to do.

        } else if (auto *ast = dynamic_cast<DesugaredObject*>(ast_)) {
            for (auto &field : ast->fields) {
                desugar(field.name, obj_level);
                desugar(field.body, obj_level + 1);
            }
            for (AST *assert : ast->asserts) {
                desugar(assert, obj_level + 1);
            }

        } else if (auto *ast = dynamic_cast<Object*>(ast_)) {
            // Hidden variable to allow outer/top binding.
            if (obj_level == 0) {
                const Identifier *hidden_var = alloc->makeIdentifier(U"$");
                auto *body = alloc->make<Self>(E, EF);
                ast->fields.push_back(ObjectField::Local(EF, EF, hidden_var, EF, body, EF));
            }

            desugarFields(ast, ast->fields, obj_level);

            DesugaredObject::Fields new_fields;
            ASTs new_asserts;
            for (const ObjectField &field : ast->fields) {
                if (field.kind == ObjectField::ASSERT) {
                    new_asserts.push_back(field.expr2);
                } else if (field.kind == ObjectField::FIELD_EXPR) {
                    new_fields.emplace_back(field.hide, field.expr1, field.expr2);
                } else {
                    std::cerr << "INTERNAL ERROR: field should have been desugared: "
                              << field.kind << std::endl;
                }
            }
            ast_ = alloc->make<DesugaredObject>(ast->location, new_asserts, new_fields);

        } else if (auto *ast = dynamic_cast<ObjectComprehension*>(ast_)) {
            // Hidden variable to allow outer/top binding.
            if (obj_level == 0) {
                const Identifier *hidden_var = alloc->makeIdentifier(U"$");
                auto *body = alloc->make<Self>(E, EF);
                ast->fields.push_back(ObjectField::Local(EF, EF, hidden_var, EF, body, EF));
            }

            desugarFields(ast, ast->fields, obj_level);

            for (ComprehensionSpec &spec : ast->specs)
                desugar(spec.expr, obj_level);

            AST *field = ast->fields.front().expr1;
            AST *value = ast->fields.front().expr2;

            /*  {
                    [arr[0]]: local x = arr[1], y = arr[2], z = arr[3]; val_expr
                    for arr in [ [key_expr, x, y, z] for ...  ]
                }
            */
            auto *_arr = id(U"$arr");
            AST *zero = make<LiteralNumber>(E, EF, "0.0");
            int counter = 1;
            Local::Binds binds;
            Array::Elements arr_e {Array::Element(field, EF)};
            for (ComprehensionSpec &spec : ast->specs) {
                if (spec.kind == ComprehensionSpec::FOR) {
                    std::stringstream num;
                    num << counter++;
                    binds.push_back(bind(
                        spec.var,
                        make<Index>(E, EF, var(_arr), EF, false,
                                    make<LiteralNumber>(E, EF, num.str()), EF, nullptr, EF, nullptr,
                                    EF)));
                    arr_e.emplace_back(var(spec.var), EF);
                }
            }
            AST *arr = make<ArrayComprehension>(
                ast->location,
                EF,
                make<Array>(ast->location, EF, arr_e, false, EF),
                EF,
                false,
                ast->specs,
                EF);
            desugar(arr, obj_level);
            ast_ = make<ObjectComprehensionSimple>(
                ast->location,
                make<Index>(E, EF, var(_arr), EF, false, zero, EF, nullptr, EF, nullptr, EF),
                make<Local>(
                    ast->location,
                    EF,
                    binds,
                    value),
                _arr,
                arr);

        } else if (auto *ast = dynamic_cast<ObjectComprehensionSimple*>(ast_)) {
            desugar(ast->field, obj_level);
            desugar(ast->value, obj_level + 1);
            desugar(ast->array, obj_level);

        } else if (auto *ast = dynamic_cast<Parens*>(ast_)) {
            // Strip parens.
            desugar(ast->expr, obj_level);
            ast_ = ast->expr;

        } else if (dynamic_cast<const Self*>(ast_)) {
            // Nothing to do.

        } else if (auto * ast = dynamic_cast<SuperIndex*>(ast_)) {
            if (ast->id != nullptr) {
                assert(ast->index == nullptr);
                ast->index = str(ast->id->name);
                ast->id = nullptr;
            }
            desugar(ast->index, obj_level);

        } else if (auto *ast = dynamic_cast<Unary*>(ast_)) {
            desugar(ast->expr, obj_level);

        } else if (dynamic_cast<const Var*>(ast_)) {
            // Nothing to do.

        } else {
            std::cerr << "INTERNAL ERROR: Unknown AST: " << ast_ << std::endl;
            std::abort();

        }
    }
Ejemplo n.º 6
0
Tokens jsonnet_lex(const std::string &filename, const char *input)
{
    unsigned long line_number = 1;
    const char *line_start = input;

    Tokens r;

    const char *c = input;

    Fodder fodder;
    bool fresh_line = true;  // Are we tokenizing from the beginning of a new line?

    while (*c!='\0') {
        Token::Kind kind;
        std::string data;
        std::string string_block_indent;
        std::string string_block_term_indent;

        unsigned new_lines, indent;
        lex_ws(c, new_lines, indent, line_start, line_number);

        // If it's the end of the file, discard final whitespace.
        if (*c == '\0')
            break;

        if (new_lines > 0) {
            // Otherwise store whitespace in fodder.
            unsigned blanks = new_lines - 1;
            fodder.emplace_back(FodderElement::LINE_END, blanks, indent, EMPTY);
            fresh_line = true;
        }

        Location begin(line_number, c - line_start + 1);

        switch (*c) {

            // The following operators should never be combined with subsequent symbols.
            case '{':
            kind = Token::BRACE_L;
            c++;
            break;

            case '}':
            kind = Token::BRACE_R;
            c++;
            break;

            case '[':
            kind = Token::BRACKET_L;
            c++;
            break;

            case ']':
            kind = Token::BRACKET_R;
            c++;
            break;

            case ',':
            kind = Token::COMMA;
            c++;
            break;

            case '.':
            kind = Token::DOT;
            c++;
            break;

            case '(':
            kind = Token::PAREN_L;
            c++;
            break;

            case ')':
            kind = Token::PAREN_R;
            c++;
            break;

            case ';':
            kind = Token::SEMICOLON;
            c++;
            break;

            // Numeric literals.
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            kind = Token::NUMBER;
            data = lex_number(c, filename, begin);
            break;

            // String literals.
            case '"': {
                c++;
                for (; ; ++c) {
                    if (*c == '\0') {
                        throw StaticError(filename, begin, "Unterminated string");
                    }
                    if (*c == '"') {
                        break;
                    }
                    if (*c == '\\' && *(c+1) != '\0') {
                        data += *c;
                        ++c;
                    }
                    if (*c == '\n') {
                        // Maintain line/column counters.
                        line_number++;
                        line_start = c+1;
                    }
                    data += *c;
                }
                c++;  // Advance beyond the ".
                kind = Token::STRING_DOUBLE;
            }
            break;

            // String literals.
            case '\'': {
                c++;
                for (; ; ++c) {
                    if (*c == '\0') {
                        throw StaticError(filename, begin, "Unterminated string");
                    }
                    if (*c == '\'') {
                        break;
                    }
                    if (*c == '\\' && *(c+1) != '\0') {
                        data += *c;
                        ++c;
                    }
                    if (*c == '\n') {
                        // Maintain line/column counters.
                        line_number++;
                        line_start = c+1;
                    }
                    data += *c;
                }
                c++;  // Advance beyond the '.
                kind = Token::STRING_SINGLE;
            }
            break;

            // Keywords
            default:
            if (is_identifier_first(*c)) {
                std::string id;
                for (; is_identifier(*c); ++c)
                    id += *c;
                if (id == "assert") {
                    kind = Token::ASSERT;
                } else if (id == "else") {
                    kind = Token::ELSE;
                } else if (id == "error") {
                    kind = Token::ERROR;
                } else if (id == "false") {
                    kind = Token::FALSE;
                } else if (id == "for") {
                    kind = Token::FOR;
                } else if (id == "function") {
                    kind = Token::FUNCTION;
                } else if (id == "if") {
                    kind = Token::IF;
                } else if (id == "import") {
                    kind = Token::IMPORT;
                } else if (id == "importstr") {
                    kind = Token::IMPORTSTR;
                } else if (id == "in") {
                    kind = Token::IN;
                } else if (id == "local") {
                    kind = Token::LOCAL;
                } else if (id == "null") {
                    kind = Token::NULL_LIT;
                } else if (id == "self") {
                    kind = Token::SELF;
                } else if (id == "super") {
                    kind = Token::SUPER;
                } else if (id == "tailstrict") {
                    kind = Token::TAILSTRICT;
                } else if (id == "then") {
                    kind = Token::THEN;
                } else if (id == "true") {
                    kind = Token::TRUE;
                } else {
                    // Not a keyword, must be an identifier.
                    kind = Token::IDENTIFIER;
                }
                data = id;

            } else if (is_symbol(*c) || *c == '#') {

                // Single line C++ and Python style comments.
                if (*c == '#' || (*c == '/' && *(c+1) == '/')) {
                    std::vector<std::string> comment(1);
                    unsigned blanks;
                    unsigned indent;
                    lex_until_newline(c, comment[0], blanks, indent, line_start, line_number);
                    auto kind = fresh_line ? FodderElement::PARAGRAPH : FodderElement::LINE_END;
                    fodder.emplace_back(kind, blanks, indent, comment);
                    fresh_line = true;
                    continue;  // We've not got a token, just fodder, so keep scanning.
                }

                // Multi-line C style comment.
                if (*c == '/' && *(c+1) == '*') {

                    unsigned margin = c - line_start;
 
                    const char *initial_c = c;
                    c += 2;  // Avoid matching /*/: skip the /* before starting the search for */.

                    while (!(*c == '*' && *(c+1) == '/')) {
                        if (*c == '\0') {
                            auto msg = "Multi-line comment has no terminating */.";
                            throw StaticError(filename, begin, msg);
                        }
                        if (*c == '\n') {
                            // Just keep track of the line / column counters.
                            line_number++;
                            line_start = c+1;
                        }
                        ++c;
                    }
                    c += 2;  // Move the pointer to the char after the closing '/'.

                    std::string comment(initial_c, c - initial_c);  // Includes the "/*" and "*/".

                    // Lex whitespace after comment
                    unsigned new_lines_after, indent_after;
                    lex_ws(c, new_lines_after, indent_after, line_start, line_number);
                    std::vector<std::string> lines;
                    if (comment.find('\n') >= comment.length()) {
                        // Comment looks like /* foo */
                        lines.push_back(comment);
                        fodder.emplace_back(FodderElement::INTERSTITIAL, 0, 0, lines);
                        if (new_lines_after > 0) {
                            fodder.emplace_back(FodderElement::LINE_END, new_lines_after - 1,
                                                indent_after, EMPTY);
                            fresh_line = true;
                        }
                    } else {
                        lines = line_split(comment, margin);
                        assert(lines[0][0] == '/');
                        // Little hack to support PARAGRAPHs with * down the LHS:
                        // Add a space to lines that start with a '*'
                        bool all_star = true;
                        for (auto &l : lines) {
                            if (l[0] != '*')
                                all_star = false;
                        }
                        if (all_star) {
                            for (auto &l : lines) {
                                if (l[0] == '*') l = " " + l;
                            }
                        }
                        if (new_lines_after == 0) {
                            // Ensure a line end after the paragraph.
                            new_lines_after = 1;
                            indent_after = 0;
                        }
                        if (!fresh_line)
                            // Ensure a line end before the comment.
                            fodder.emplace_back(FodderElement::LINE_END, 0, 0, EMPTY);
                        fodder.emplace_back(FodderElement::PARAGRAPH, new_lines_after - 1,
                                            indent_after, lines);
                        fresh_line = true;
                    }
                    continue;  // We've not got a token, just fodder, so keep scanning.
                }

                // Text block
                if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') {
                    std::stringstream block;
                    c += 4; // Skip the "|||\n"
                    line_number++;
                    // Skip any blank lines at the beginning of the block.
                    while (*c == '\n') {
                        line_number++;
                        ++c;
                        block << '\n';
                    }
                    line_start = c;
                    const char *first_line = c;
                    int ws_chars = whitespace_check(first_line, c);
                    string_block_indent = std::string(first_line, ws_chars);
                    if (ws_chars == 0) {
                        auto msg = "Text block's first line must start with whitespace.";
                        throw StaticError(filename, begin, msg);
                    }
                    while (true) {
                        assert(ws_chars > 0);
                        // Read up to the \n
                        for (c = &c[ws_chars]; *c != '\n' ; ++c) {
                            if (*c == '\0')
                                throw StaticError(filename, begin, "Unexpected EOF");
                            block << *c;
                        }
                        // Add the \n
                        block << '\n';
                        ++c;
                        line_number++;
                        line_start = c;
                        // Skip any blank lines
                        while (*c == '\n') {
                            line_number++;
                            ++c;
                            block << '\n';
                        }
                        // Examine next line
                        ws_chars = whitespace_check(first_line, c);
                        if (ws_chars == 0) {
                            // End of text block
                            // Skip over any whitespace
                            while (*c == ' ' || *c == '\t') {
                                string_block_term_indent += *c;
                                ++c;
                            }
                            // Expect |||
                            if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) {
                                auto msg = "Text block not terminated with |||";
                                throw StaticError(filename, begin, msg);
                            }
                            c += 3;  // Leave after the last |
                            data = block.str();
                            kind = Token::STRING_BLOCK;
                            break;  // Out of the while loop.
                        }
                    }

                    break;  // Out of the switch.
                }

                const char *operator_begin = c;
                for (; is_symbol(*c) ; ++c) {
                    // Not allowed // in operators
                    if (*c == '/' && *(c+1) == '/') break;
                    // Not allowed /* in operators
                    if (*c == '/' && *(c+1) == '*') break;
                    // Not allowed ||| in operators
                    if (*c == '|' && *(c+1) == '|' && *(c+2) == '|') break;
                }
                // Not allowed to end with a + - ~ ! unless a single char.
                // So, wind it back if we need to (but not too far).
                while (c > operator_begin + 1
                       && (*(c-1) == '+' || *(c-1) == '-' || *(c-1) == '~' || *(c-1) == '!')) {
                    c--;
                }
                data += std::string(operator_begin, c);
                if (data == "$") {
                    kind = Token::DOLLAR;
                    data = "";
                } else {
                    kind = Token::OPERATOR;
                }
            } else {
                std::stringstream ss;
                ss << "Could not lex the character ";
                auto uc = (unsigned char)(*c);
                if (*c < 32)
                    ss << "code " << unsigned(uc);
                else
                    ss << "'" << *c << "'";
                throw StaticError(filename, begin, ss.str());
            }
        }

        Location end(line_number, c - line_start);
        r.emplace_back(kind, fodder, data, string_block_indent, string_block_term_indent,
                       LocationRange(filename, begin, end));
        fodder.clear();
        fresh_line = false;
    }

    Location end(line_number, c - line_start + 1);
    r.emplace_back(Token::END_OF_FILE, fodder, "", "", "", LocationRange(filename, end, end));
    return r;
}