std::string lex_number(const char *&c, const std::string &filename, const Location &begin) { // This function should be understood with reference to the linked image: // http://www.json.org/number.gif // Note, we deviate from the json.org documentation as follows: // There is no reason to lex negative numbers as atomic tokens, it is better to parse them // as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as // <identifier> <number> instead of the intended <identifier> <binop> <number>. enum State { BEGIN, AFTER_ZERO, AFTER_ONE_TO_NINE, AFTER_DOT, AFTER_DIGIT, AFTER_E, AFTER_EXP_SIGN, AFTER_EXP_DIGIT } state; std::string r; state = BEGIN; while (true) { switch (state) { case BEGIN: switch (*c) { case '0': state = AFTER_ZERO; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_ONE_TO_NINE; break; default: throw StaticError(filename, begin, "Couldn't lex number"); } break; case AFTER_ZERO: switch (*c) { case '.': state = AFTER_DOT; break; case 'e': case 'E': state = AFTER_E; break; default: goto end; } break; case AFTER_ONE_TO_NINE: switch (*c) { case '.': state = AFTER_DOT; break; case 'e': case 'E': state = AFTER_E; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_ONE_TO_NINE; break; default: goto end; } break; case AFTER_DOT: switch (*c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_DIGIT; break; default: { std::stringstream ss; ss << "Couldn't lex number, junk after decimal point: " << *c; throw StaticError(filename, begin, ss.str()); } } break; case AFTER_DIGIT: switch (*c) { case 'e': case 'E': state = AFTER_E; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_DIGIT; break; default: goto end; } break; case AFTER_E: switch (*c) { case '+': case '-': state = AFTER_EXP_SIGN; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_EXP_DIGIT; break; default: { std::stringstream ss; ss << "Couldn't lex number, junk after 'E': " << *c; throw StaticError(filename, begin, ss.str()); } } break; case AFTER_EXP_SIGN: switch (*c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_EXP_DIGIT; break; default: { std::stringstream ss; ss << "Couldn't lex number, junk after exponent sign: " << *c; throw StaticError(filename, begin, ss.str()); } } break; case AFTER_EXP_DIGIT: switch (*c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state = AFTER_EXP_DIGIT; break; default: goto end; } break; } r += *c; c++; } end: c--; return r; }
std::list<Token> jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; std::list<Token> r; const char *c = input; for ( ; *c!='\0' ; ++c) { Location begin(line_number, c - line_start + 1); Token::Kind kind; std::string data; switch (*c) { // Skip non-\n whitespace case ' ': case '\t': case '\r': continue; // Skip \n and maintain line numbers case '\n': line_number++; line_start = c+1; continue; case '{': kind = Token::BRACE_L; break; case '}': kind = Token::BRACE_R; break; case '[': kind = Token::BRACKET_L; break; case ']': kind = Token::BRACKET_R; break; case ':': kind = Token::COLON; break; case ',': kind = Token::COMMA; break; case '$': kind = Token::DOLLAR; break; case '.': kind = Token::DOT; break; case '(': kind = Token::PAREN_L; break; case ')': kind = Token::PAREN_R; break; case ';': kind = Token::SEMICOLON; break; // Special cases for unary operators. case '!': kind = Token::OPERATOR; if (*(c+1) == '=') { c++; data = "!="; } else { data = "!"; } break; case '~': kind = Token::OPERATOR; data = "~"; break; case '+': kind = Token::OPERATOR; data = "+"; break; case '-': kind = Token::OPERATOR; data = "-"; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } switch (*c) { case '\\': switch (*(++c)) { case '"': data += *c; break; case '\\': data += *c; break; case '/': data += *c; break; case 'b': data += '\b'; break; case 'f': data += '\f'; break; case 'n': data += '\n'; break; case 'r': data += '\r'; break; case 't': data += '\t'; break; case 'u': { ++c; // Consume the 'u'. unsigned long codepoint = 0; // Expect 4 hex digits. for (unsigned i=0 ; i<4 ; ++i) { auto x = (unsigned char)(c[i]); unsigned digit; if (x == '\0') { auto msg = "Unterminated string"; throw StaticError(filename, begin, msg); } else if (x == '"') { auto msg = "Truncated unicode escape sequence in " "string literal."; throw StaticError(filename, begin, msg); } else if (x >= '0' && x <= '9') { digit = x - '0'; } else if (x >= 'a' && x <= 'f') { digit = x - 'a' + 10; } else if (x >= 'A' && x <= 'F') { digit = x - 'A' + 10; } else { std::stringstream ss; ss << "Malformed unicode escape character, " << "should be hex: '" << x << "'"; throw StaticError(filename, begin, ss.str()); } codepoint *= 16; codepoint += digit; } encode_utf8(codepoint, data); // Leave us on the last char, ready for the ++c at // the outer for loop. c += 3; } break; case '\0': { auto msg = "Truncated escape sequence in string literal."; throw StaticError(filename, begin, msg); } default: { std::stringstream ss; ss << "Unknown escape sequence in string literal: '" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } break; // Treat as a regular letter, but maintain line/column counters. case '\n': line_number++; line_start = c+1; data += *c; break; default: // Just a regular letter. data += *c; } } kind = Token::STRING; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; *c != '\0' ; ++c) { if (!is_identifier(*c)) { break; } id += *c; } --c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; data = id; } } else if (is_symbol(*c)) { // Single line C++ style comment if (*c == '/' && *(c+1) == '/') { while (*c != '\0' && *c != '\n') { ++c; } // Leaving it on the \n allows processing of \n on next iteration, // i.e. managing of the line & column counter. c--; continue; } // Single line # comment if (*c == '#') { while (*c != '\0' && *c != '\n') { ++c; } // Leaving it on the \n allows processing of \n on next iteration, // i.e. managing of the line & column counter. c--; continue; } // Multi-line comment. if (*c == '/' && *(c+1) == '*') { c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (*c != '\0' && !(*c == '*' && *(c+1) == '/')) { if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } // Leave the counter on the closing /. c++; continue; } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') ++c; // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 2; // Leave on the last | data = block.str(); kind = Token::STRING; break; } } break; // Out of the switch. } for (; *c != '\0' ; ++c) { if (!is_symbol(*c)) { break; } data += *c; } --c; kind = Token::OPERATOR; } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } break; } Location end(line_number, c - line_start + 1); r.push_back(Token(kind, data, LocationRange(filename, begin, end))); } Location end(line_number, c - line_start + 1); r.push_back(Token(Token::END_OF_FILE, "", LocationRange(filename, end, end))); return r; }
/** Statically analyse the given ast. * * \param ast_ The AST. * \param in_object Whether or not ast_ is within the lexical scope of an object AST. * \param vars The variables defined within lexical scope of ast_. * \returns The free variables in ast_. */ static IdSet static_analysis(AST *ast_, bool in_object, const IdSet &vars) { IdSet r; if (auto *ast = dynamic_cast<const Apply*>(ast_)) { append(r, static_analysis(ast->target, in_object, vars)); for (AST *arg : ast->arguments) append(r, static_analysis(arg, in_object, vars)); } else if (auto *ast = dynamic_cast<const Array*>(ast_)) { for (AST *el : ast->elements) append(r, static_analysis(el, in_object, vars)); } else if (auto *ast = dynamic_cast<const Binary*>(ast_)) { append(r, static_analysis(ast->left, in_object, vars)); append(r, static_analysis(ast->right, in_object, vars)); } else if (dynamic_cast<const BuiltinFunction*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<const Conditional*>(ast_)) { append(r, static_analysis(ast->cond, in_object, vars)); append(r, static_analysis(ast->branchTrue, in_object, vars)); append(r, static_analysis(ast->branchFalse, in_object, vars)); } else if (auto *ast = dynamic_cast<const Error*>(ast_)) { return static_analysis(ast->expr, in_object, vars); } else if (auto *ast = dynamic_cast<const Function*>(ast_)) { auto new_vars = vars; IdSet params; for (auto *p : ast->parameters) { if (params.find(p) != params.end()) { throw StaticError(ast_->location, "Duplicate function parameter: " + p->name); } params.insert(p); new_vars.insert(p); } auto fv = static_analysis(ast->body, in_object, new_vars); for (auto *p : ast->parameters) fv.erase(p); append(r, fv); } else if (dynamic_cast<const Import*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const Importstr*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<const Index*>(ast_)) { append(r, static_analysis(ast->target, in_object, vars)); append(r, static_analysis(ast->index, in_object, vars)); } else if (auto *ast = dynamic_cast<const Local*>(ast_)) { IdSet ast_vars; for (const auto &bind: ast->binds) { ast_vars.insert(bind.first); } auto new_vars = vars; append(new_vars, ast_vars); IdSet fvs; for (const auto &bind: ast->binds) append(fvs, static_analysis(bind.second, in_object, new_vars)); append(fvs, static_analysis(ast->body, in_object, new_vars)); for (const auto &bind: ast->binds) fvs.erase(bind.first); append(r, fvs); } else if (dynamic_cast<const LiteralBoolean*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const LiteralNumber*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const LiteralString*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const LiteralNull*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<Object*>(ast_)) { for (auto field : ast->fields) { append(r, static_analysis(field.name, in_object, vars)); append(r, static_analysis(field.body, true, vars)); } } else if (auto *ast = dynamic_cast<ObjectComposition*>(ast_)) { auto new_vars = vars; new_vars.insert(ast->id); append(r, static_analysis(ast->field, false, new_vars)); append(r, static_analysis(ast->value, true, new_vars)); r.erase(ast->id); append(r, static_analysis(ast->array, in_object, vars)); } else if (dynamic_cast<const Self*>(ast_)) { if (!in_object) throw StaticError(ast_->location, "Can't use self outside of an object."); } else if (dynamic_cast<const Super*>(ast_)) { if (!in_object) throw StaticError(ast_->location, "Can't use super outside of an object."); } else if (auto *ast = dynamic_cast<const Unary*>(ast_)) { append(r, static_analysis(ast->expr, in_object, vars)); } else if (auto *ast = dynamic_cast<const Var*>(ast_)) { if (vars.find(ast->id) == vars.end()) { throw StaticError(ast->location, "Unknown variable: "+ast->id->name); } r.insert(ast->id); } else { std::cerr << "INTERNAL ERROR: Unknown AST: " << ast_ << std::endl; std::abort(); } for (auto *id : r) ast_->freeVariables.push_back(id); return r; }
String jsonnet_string_unescape(const LocationRange &loc, const String &s) { String r; const char32_t *s_ptr = s.c_str(); for (const char32_t *c = s_ptr; *c != U'\0' ; ++c) { switch (*c) { case '\\': switch (*(++c)) { case '"': case '\'': r += *c; break; case '\\': r += *c; break; case '/': r += *c; break; case 'b': r += '\b'; break; case 'f': r += '\f'; break; case 'n': r += '\n'; break; case 'r': r += '\r'; break; case 't': r += '\t'; break; case 'u': { ++c; // Consume the 'u'. unsigned long codepoint = 0; // Expect 4 hex digits. for (unsigned i=0 ; i<4 ; ++i) { auto x = (unsigned char)(c[i]); unsigned digit; if (x == '\0') { auto msg = "Truncated unicode escape sequence in string literal."; throw StaticError(loc, msg); } else if (x >= '0' && x <= '9') { digit = x - '0'; } else if (x >= 'a' && x <= 'f') { digit = x - 'a' + 10; } else if (x >= 'A' && x <= 'F') { digit = x - 'A' + 10; } else { std::stringstream ss; ss << "Malformed unicode escape character, " << "should be hex: '" << x << "'"; throw StaticError(loc, ss.str()); } codepoint *= 16; codepoint += digit; } r += codepoint; // Leave us on the last char, ready for the ++c at // the outer for loop. c += 3; } break; case '\0': { auto msg = "Truncated escape sequence in string literal."; throw StaticError(loc, msg); } default: { std::stringstream ss; std::string utf8; encode_utf8(*c, utf8); ss << "Unknown escape sequence in string literal: '" << utf8 << "'"; throw StaticError(loc, ss.str()); } } break; default: // Just a regular letter. r += *c; } } return r; }
void desugar(AST *&ast_, unsigned obj_level) { if (auto *ast = dynamic_cast<Apply*>(ast_)) { desugar(ast->target, obj_level); for (Apply::Arg &arg : ast->args) desugar(arg.expr, obj_level); } else if (auto *ast = dynamic_cast<ApplyBrace*>(ast_)) { desugar(ast->left, obj_level); desugar(ast->right, obj_level); ast_ = alloc->make<Binary>(ast->location, ast->openFodder, ast->left, EF, BOP_PLUS, ast->right); } else if (auto *ast = dynamic_cast<Array*>(ast_)) { for (auto &el : ast->elements) desugar(el.expr, obj_level); } else if (auto *ast = dynamic_cast<ArrayComprehension*>(ast_)) { for (ComprehensionSpec &spec : ast->specs) desugar(spec.expr, obj_level); desugar(ast->body, obj_level + 1); int n = ast->specs.size(); AST *zero = make<LiteralNumber>(E, EF, "0.0"); AST *one = make<LiteralNumber>(E, EF, "1.0"); auto *_r = id(U"$r"); auto *_l = id(U"$l"); std::vector<const Identifier*> _i(n); for (int i = 0; i < n ; ++i) { StringStream ss; ss << U"$i_" << i; _i[i] = id(ss.str()); } std::vector<const Identifier*> _aux(n); for (int i = 0; i < n ; ++i) { StringStream ss; ss << U"$aux_" << i; _aux[i] = id(ss.str()); } // Build it from the inside out. We keep wrapping 'in' with more ASTs. assert(ast->specs[0].kind == ComprehensionSpec::FOR); int last_for = n - 1; while (ast->specs[last_for].kind != ComprehensionSpec::FOR) last_for--; // $aux_{last_for}($i_{last_for} + 1, $r + [body]) AST *in = make<Apply>( ast->body->location, EF, var(_aux[last_for]), EF, Apply::Args { { make<Binary>(E, EF, var(_i[last_for]), EF, BOP_PLUS, one), EF}, { make<Binary>(E, EF, var(_r), EF, BOP_PLUS, singleton(ast->body)), EF} }, false, // trailingComma EF, EF, true // tailstrict ); for (int i = n - 1; i >= 0 ; --i) { const ComprehensionSpec &spec = ast->specs[i]; AST *out; if (i > 0) { int prev_for = i - 1; while (ast->specs[prev_for].kind != ComprehensionSpec::FOR) prev_for--; // aux_{prev_for}($i_{prev_for} + 1, $r) out = make<Apply>( // False branch. E, EF, var(_aux[prev_for]), EF, Apply::Args { { make<Binary>(E, EF, var(_i[prev_for]), EF, BOP_PLUS, one), EF, }, { var(_r), EF, } }, false, // trailingComma EF, EF, true // tailstrict ); } else { out = var(_r); } switch (spec.kind) { case ComprehensionSpec::IF: { /* if [[[...cond...]]] then [[[...in...]]] else [[[...out...]]] */ in = make<Conditional>( ast->location, EF, spec.expr, EF, in, // True branch. EF, out); // False branch. } break; case ComprehensionSpec::FOR: { /* local $l = [[[...array...]]] aux_{i}(i_{i}, r) = if i_{i} >= std.length($l) then [[[...out...]]] else local [[[...var...]]] = $l[i_{i}]; [[[...in...]]];` if std.type($l) != "array" then error "In comprehension, can only iterate over array.." else aux_{i}(0, r) tailstrict; */ in = make<Local>( ast->location, EF, Local::Binds { bind(_l, spec.expr), // Need to check expr is an array bind(_aux[i], make<Function>( ast->location, EF, EF, std::vector<Param>{Param(EF, _i[i], EF), Param(EF, _r, EF)}, false, // trailingComma EF, make<Conditional>( ast->location, EF, make<Binary>( E, EF, var(_i[i]), EF, BOP_GREATER_EQ, length(var(_l))), EF, out, EF, make<Local>( ast->location, EF, singleBind( spec.var, make<Index>(E, EF, var(_l), EF, false, var(_i[i]), EF, nullptr, EF, nullptr, EF) ), in) ) ))}, make<Conditional>( ast->location, EF, equals(ast->location, type(var(_l)), str(U"array")), EF, make<Apply>( E, EF, var(_aux[i]), EF, Apply::Args { {zero, EF}, { i == 0 ? make<Array>(E, EF, Array::Elements{}, false, EF) : static_cast<AST*>(var(_r)), EF, } }, false, // trailingComma EF, EF, true), // tailstrict EF, error(ast->location, U"In comprehension, can only iterate over array."))); } break; } } ast_ = in; } else if (auto *ast = dynamic_cast<Assert*>(ast_)) { desugar(ast->cond, obj_level); if (ast->message == nullptr) { ast->message = str(U"Assertion failed."); } desugar(ast->message, obj_level); desugar(ast->rest, obj_level); // if cond then rest else error msg AST *branch_false = alloc->make<Error>(ast->location, EF, ast->message); ast_ = alloc->make<Conditional>(ast->location, ast->openFodder, ast->cond, EF, ast->rest, EF, branch_false); } else if (auto *ast = dynamic_cast<Binary*>(ast_)) { desugar(ast->left, obj_level); desugar(ast->right, obj_level); bool invert = false; switch (ast->op) { case BOP_PERCENT: { AST *f_mod = alloc->make<Index>(E, EF, std(), EF, false, str(U"mod"), EF, nullptr, EF, nullptr, EF); Apply::Args args = {{ast->left, EF}, {ast->right, EF}}; ast_ = alloc->make<Apply>(ast->location, ast->openFodder, f_mod, EF, args, false, EF, EF, false); } break; case BOP_MANIFEST_UNEQUAL: invert = true; case BOP_MANIFEST_EQUAL: { ast_ = equals(ast->location, ast->left, ast->right); if (invert) ast_ = alloc->make<Unary>(ast->location, ast->openFodder, UOP_NOT, ast_); } break; default:; // Otherwise don't change it. } } else if (dynamic_cast<const BuiltinFunction*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<Conditional*>(ast_)) { desugar(ast->cond, obj_level); desugar(ast->branchTrue, obj_level); if (ast->branchFalse == nullptr) ast->branchFalse = alloc->make<LiteralNull>(LocationRange(), EF); desugar(ast->branchFalse, obj_level); } else if (auto *ast = dynamic_cast<Dollar*>(ast_)) { if (obj_level == 0) { throw StaticError(ast->location, "No top-level object found."); } ast_ = alloc->make<Var>(ast->location, EF, alloc->makeIdentifier(U"$")); } else if (auto *ast = dynamic_cast<Error*>(ast_)) { desugar(ast->expr, obj_level); } else if (auto *ast = dynamic_cast<Function*>(ast_)) { desugar(ast->body, obj_level); } else if (dynamic_cast<const Import*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const Importstr*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<Index*>(ast_)) { desugar(ast->target, obj_level); if (ast->isSlice) { if (ast->index == nullptr) ast->index = make<LiteralNull>(ast->location, EF); desugar(ast->index, obj_level); if (ast->end == nullptr) ast->end = make<LiteralNull>(ast->location, EF); desugar(ast->end, obj_level); if (ast->step == nullptr) ast->step = make<LiteralNull>(ast->location, EF); desugar(ast->step, obj_level); ast_ = make<Apply>( ast->location, EF, make<Index>( E, EF, std(), EF, false, str(U"slice"), EF, nullptr, EF, nullptr, EF), EF, std::vector<Apply::Arg>{ {ast->target, EF}, {ast->index, EF}, {ast->end, EF}, {ast->step, EF}, }, false, // trailing comma EF, EF, false // tailstrict ); } else { if (ast->id != nullptr) { assert(ast->index == nullptr); ast->index = str(ast->id->name); ast->id = nullptr; } desugar(ast->index, obj_level); } } else if (auto *ast = dynamic_cast<Local*>(ast_)) { for (auto &bind: ast->binds) desugar(bind.body, obj_level); desugar(ast->body, obj_level); for (auto &bind: ast->binds) { if (bind.functionSugar) { bind.body = alloc->make<Function>( ast->location, ast->openFodder, bind.parenLeftFodder, bind.params, false, bind.parenRightFodder, bind.body); bind.functionSugar = false; bind.params.clear(); } } } else if (dynamic_cast<const LiteralBoolean*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const LiteralNumber*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<LiteralString*>(ast_)) { if (ast->tokenKind != LiteralString::BLOCK) { ast->value = jsonnet_string_unescape(ast->location, ast->value); } ast->tokenKind = LiteralString::DOUBLE; ast->blockIndent.clear(); } else if (dynamic_cast<const LiteralNull*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<DesugaredObject*>(ast_)) { for (auto &field : ast->fields) { desugar(field.name, obj_level); desugar(field.body, obj_level + 1); } for (AST *assert : ast->asserts) { desugar(assert, obj_level + 1); } } else if (auto *ast = dynamic_cast<Object*>(ast_)) { // Hidden variable to allow outer/top binding. if (obj_level == 0) { const Identifier *hidden_var = alloc->makeIdentifier(U"$"); auto *body = alloc->make<Self>(E, EF); ast->fields.push_back(ObjectField::Local(EF, EF, hidden_var, EF, body, EF)); } desugarFields(ast, ast->fields, obj_level); DesugaredObject::Fields new_fields; ASTs new_asserts; for (const ObjectField &field : ast->fields) { if (field.kind == ObjectField::ASSERT) { new_asserts.push_back(field.expr2); } else if (field.kind == ObjectField::FIELD_EXPR) { new_fields.emplace_back(field.hide, field.expr1, field.expr2); } else { std::cerr << "INTERNAL ERROR: field should have been desugared: " << field.kind << std::endl; } } ast_ = alloc->make<DesugaredObject>(ast->location, new_asserts, new_fields); } else if (auto *ast = dynamic_cast<ObjectComprehension*>(ast_)) { // Hidden variable to allow outer/top binding. if (obj_level == 0) { const Identifier *hidden_var = alloc->makeIdentifier(U"$"); auto *body = alloc->make<Self>(E, EF); ast->fields.push_back(ObjectField::Local(EF, EF, hidden_var, EF, body, EF)); } desugarFields(ast, ast->fields, obj_level); for (ComprehensionSpec &spec : ast->specs) desugar(spec.expr, obj_level); AST *field = ast->fields.front().expr1; AST *value = ast->fields.front().expr2; /* { [arr[0]]: local x = arr[1], y = arr[2], z = arr[3]; val_expr for arr in [ [key_expr, x, y, z] for ... ] } */ auto *_arr = id(U"$arr"); AST *zero = make<LiteralNumber>(E, EF, "0.0"); int counter = 1; Local::Binds binds; Array::Elements arr_e {Array::Element(field, EF)}; for (ComprehensionSpec &spec : ast->specs) { if (spec.kind == ComprehensionSpec::FOR) { std::stringstream num; num << counter++; binds.push_back(bind( spec.var, make<Index>(E, EF, var(_arr), EF, false, make<LiteralNumber>(E, EF, num.str()), EF, nullptr, EF, nullptr, EF))); arr_e.emplace_back(var(spec.var), EF); } } AST *arr = make<ArrayComprehension>( ast->location, EF, make<Array>(ast->location, EF, arr_e, false, EF), EF, false, ast->specs, EF); desugar(arr, obj_level); ast_ = make<ObjectComprehensionSimple>( ast->location, make<Index>(E, EF, var(_arr), EF, false, zero, EF, nullptr, EF, nullptr, EF), make<Local>( ast->location, EF, binds, value), _arr, arr); } else if (auto *ast = dynamic_cast<ObjectComprehensionSimple*>(ast_)) { desugar(ast->field, obj_level); desugar(ast->value, obj_level + 1); desugar(ast->array, obj_level); } else if (auto *ast = dynamic_cast<Parens*>(ast_)) { // Strip parens. desugar(ast->expr, obj_level); ast_ = ast->expr; } else if (dynamic_cast<const Self*>(ast_)) { // Nothing to do. } else if (auto * ast = dynamic_cast<SuperIndex*>(ast_)) { if (ast->id != nullptr) { assert(ast->index == nullptr); ast->index = str(ast->id->name); ast->id = nullptr; } desugar(ast->index, obj_level); } else if (auto *ast = dynamic_cast<Unary*>(ast_)) { desugar(ast->expr, obj_level); } else if (dynamic_cast<const Var*>(ast_)) { // Nothing to do. } else { std::cerr << "INTERNAL ERROR: Unknown AST: " << ast_ << std::endl; std::abort(); } }
Tokens jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; Tokens r; const char *c = input; Fodder fodder; bool fresh_line = true; // Are we tokenizing from the beginning of a new line? while (*c!='\0') { Token::Kind kind; std::string data; std::string string_block_indent; std::string string_block_term_indent; unsigned new_lines, indent; lex_ws(c, new_lines, indent, line_start, line_number); // If it's the end of the file, discard final whitespace. if (*c == '\0') break; if (new_lines > 0) { // Otherwise store whitespace in fodder. unsigned blanks = new_lines - 1; fodder.emplace_back(FodderElement::LINE_END, blanks, indent, EMPTY); fresh_line = true; } Location begin(line_number, c - line_start + 1); switch (*c) { // The following operators should never be combined with subsequent symbols. case '{': kind = Token::BRACE_L; c++; break; case '}': kind = Token::BRACE_R; c++; break; case '[': kind = Token::BRACKET_L; c++; break; case ']': kind = Token::BRACKET_R; c++; break; case ',': kind = Token::COMMA; c++; break; case '.': kind = Token::DOT; c++; break; case '(': kind = Token::PAREN_L; c++; break; case ')': kind = Token::PAREN_R; c++; break; case ';': kind = Token::SEMICOLON; c++; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the ". kind = Token::STRING_DOUBLE; } break; // String literals. case '\'': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '\'') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the '. kind = Token::STRING_SINGLE; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; is_identifier(*c); ++c) id += *c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; } data = id; } else if (is_symbol(*c) || *c == '#') { // Single line C++ and Python style comments. if (*c == '#' || (*c == '/' && *(c+1) == '/')) { std::vector<std::string> comment(1); unsigned blanks; unsigned indent; lex_until_newline(c, comment[0], blanks, indent, line_start, line_number); auto kind = fresh_line ? FodderElement::PARAGRAPH : FodderElement::LINE_END; fodder.emplace_back(kind, blanks, indent, comment); fresh_line = true; continue; // We've not got a token, just fodder, so keep scanning. } // Multi-line C style comment. if (*c == '/' && *(c+1) == '*') { unsigned margin = c - line_start; const char *initial_c = c; c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (!(*c == '*' && *(c+1) == '/')) { if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } c += 2; // Move the pointer to the char after the closing '/'. std::string comment(initial_c, c - initial_c); // Includes the "/*" and "*/". // Lex whitespace after comment unsigned new_lines_after, indent_after; lex_ws(c, new_lines_after, indent_after, line_start, line_number); std::vector<std::string> lines; if (comment.find('\n') >= comment.length()) { // Comment looks like /* foo */ lines.push_back(comment); fodder.emplace_back(FodderElement::INTERSTITIAL, 0, 0, lines); if (new_lines_after > 0) { fodder.emplace_back(FodderElement::LINE_END, new_lines_after - 1, indent_after, EMPTY); fresh_line = true; } } else { lines = line_split(comment, margin); assert(lines[0][0] == '/'); // Little hack to support PARAGRAPHs with * down the LHS: // Add a space to lines that start with a '*' bool all_star = true; for (auto &l : lines) { if (l[0] != '*') all_star = false; } if (all_star) { for (auto &l : lines) { if (l[0] == '*') l = " " + l; } } if (new_lines_after == 0) { // Ensure a line end after the paragraph. new_lines_after = 1; indent_after = 0; } if (!fresh_line) // Ensure a line end before the comment. fodder.emplace_back(FodderElement::LINE_END, 0, 0, EMPTY); fodder.emplace_back(FodderElement::PARAGRAPH, new_lines_after - 1, indent_after, lines); fresh_line = true; } continue; // We've not got a token, just fodder, so keep scanning. } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; // Skip any blank lines at the beginning of the block. while (*c == '\n') { line_number++; ++c; block << '\n'; } line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); string_block_indent = std::string(first_line, ws_chars); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Skip any blank lines while (*c == '\n') { line_number++; ++c; block << '\n'; } // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') { string_block_term_indent += *c; ++c; } // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 3; // Leave after the last | data = block.str(); kind = Token::STRING_BLOCK; break; // Out of the while loop. } } break; // Out of the switch. } const char *operator_begin = c; for (; is_symbol(*c) ; ++c) { // Not allowed // in operators if (*c == '/' && *(c+1) == '/') break; // Not allowed /* in operators if (*c == '/' && *(c+1) == '*') break; // Not allowed ||| in operators if (*c == '|' && *(c+1) == '|' && *(c+2) == '|') break; } // Not allowed to end with a + - ~ ! unless a single char. // So, wind it back if we need to (but not too far). while (c > operator_begin + 1 && (*(c-1) == '+' || *(c-1) == '-' || *(c-1) == '~' || *(c-1) == '!')) { c--; } data += std::string(operator_begin, c); if (data == "$") { kind = Token::DOLLAR; data = ""; } else { kind = Token::OPERATOR; } } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } Location end(line_number, c - line_start); r.emplace_back(kind, fodder, data, string_block_indent, string_block_term_indent, LocationRange(filename, begin, end)); fodder.clear(); fresh_line = false; } Location end(line_number, c - line_start + 1); r.emplace_back(Token::END_OF_FILE, fodder, "", "", "", LocationRange(filename, end, end)); return r; }