LocationRange cellRangeToLocationRange(const CellRange& cellRange) { return LocationRange( cellCoordToLocation(cellRange.topLeft), cellCoordToLocation(cellRange.bottomRight) ); }
WithDeps(const std::string& functionName, SourceSelection selection, const std::string& filename) : extractor( std::bind( clangutil::runTranslationUnitHandlerOnFile, TranslationUnitFunctionExtractorFactory().createFunctionExtractor( functionName, LocationRange(selection.from, selection.to), textModifier), filename), std::bind(recordReplacements, std::ref(textModifier), filename)) { }
TEST_F(DefaultStatementLocatorTest, should_return_the_most_nested_statement) { parseSourceAndCreateLocatorForSelection( "void f() {\ntry {\nif (true) { int a; } else { int b; }\n} catch(...) { } }", { rowCol(2, 12), rowCol(2, 18) }); expectGetRangeForStmtAndReturn(0, { rowCol(1, 0), rowCol(3, 16) }); // try auto& sourceManager = function->getDecl()->getASTContext().getSourceManager(); auto ifStmt = *clang::dyn_cast<clang::CXXTryStmt>(nthStmt(0))->getTryBlock()->body_begin(); EXPECT_CALL(*this, getStmtRange(Ref(sourceManager), Ref(*ifStmt))) .WillRepeatedly(Return(LocationRange(rowCol(2, 0), rowCol(2, 37)))); auto intDecl = *clang::dyn_cast<clang::IfStmt>(ifStmt)->getThen()->child_begin(); EXPECT_CALL(*this, getStmtRange(Ref(sourceManager), Ref(*intDecl))) .WillRepeatedly(Return(LocationRange(rowCol(2, 12), rowCol(2, 18)))); auto stmts = locator->findStatementsInFunction(*function->getDecl()); ASSERT_EQ(1u, std::distance(begin(stmts), end(stmts))); ASSERT_TRUE(*stmts == intDecl); }
TEST_F(DefaultStatementLocatorTest, should_return_the_nested_statement_from_a_try_block) { parseSourceAndCreateLocatorForSelection( "void f() {\ntry { int a; } catch(...) { } }", { rowCol(1, 6), rowCol(1, 13) }); expectGetRangeForStmtAndReturn(0, { rowCol(1, 0), rowCol(1, 15) }); auto& sourceManager = function->getDecl()->getASTContext().getSourceManager(); auto intDecl = *clang::dyn_cast<clang::CXXTryStmt>(nthStmt(0))->getTryBlock()->body_begin(); EXPECT_CALL(*this, getStmtRange(Ref(sourceManager), Ref(*intDecl))) .WillRepeatedly(Return(LocationRange(rowCol(1, 6), rowCol(1, 13)))); auto stmts = locator->findStatementsInFunction(*function->getDecl()); ASSERT_EQ(1u, std::distance(begin(stmts), end(stmts))); ASSERT_TRUE(*stmts == intDecl); }
std::list<Token> jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; std::list<Token> r; const char *c = input; for ( ; *c!='\0' ; ++c) { Location begin(line_number, c - line_start + 1); Token::Kind kind; std::string data; switch (*c) { // Skip non-\n whitespace case ' ': case '\t': case '\r': continue; // Skip \n and maintain line numbers case '\n': line_number++; line_start = c+1; continue; case '{': kind = Token::BRACE_L; break; case '}': kind = Token::BRACE_R; break; case '[': kind = Token::BRACKET_L; break; case ']': kind = Token::BRACKET_R; break; case ':': kind = Token::COLON; break; case ',': kind = Token::COMMA; break; case '$': kind = Token::DOLLAR; break; case '.': kind = Token::DOT; break; case '(': kind = Token::PAREN_L; break; case ')': kind = Token::PAREN_R; break; case ';': kind = Token::SEMICOLON; break; // Special cases for unary operators. case '!': kind = Token::OPERATOR; if (*(c+1) == '=') { c++; data = "!="; } else { data = "!"; } break; case '~': kind = Token::OPERATOR; data = "~"; break; case '+': kind = Token::OPERATOR; data = "+"; break; case '-': kind = Token::OPERATOR; data = "-"; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } switch (*c) { case '\\': switch (*(++c)) { case '"': data += *c; break; case '\\': data += *c; break; case '/': data += *c; break; case 'b': data += '\b'; break; case 'f': data += '\f'; break; case 'n': data += '\n'; break; case 'r': data += '\r'; break; case 't': data += '\t'; break; case 'u': { ++c; // Consume the 'u'. unsigned long codepoint = 0; // Expect 4 hex digits. for (unsigned i=0 ; i<4 ; ++i) { auto x = (unsigned char)(c[i]); unsigned digit; if (x == '\0') { auto msg = "Unterminated string"; throw StaticError(filename, begin, msg); } else if (x == '"') { auto msg = "Truncated unicode escape sequence in " "string literal."; throw StaticError(filename, begin, msg); } else if (x >= '0' && x <= '9') { digit = x - '0'; } else if (x >= 'a' && x <= 'f') { digit = x - 'a' + 10; } else if (x >= 'A' && x <= 'F') { digit = x - 'A' + 10; } else { std::stringstream ss; ss << "Malformed unicode escape character, " << "should be hex: '" << x << "'"; throw StaticError(filename, begin, ss.str()); } codepoint *= 16; codepoint += digit; } encode_utf8(codepoint, data); // Leave us on the last char, ready for the ++c at // the outer for loop. c += 3; } break; case '\0': { auto msg = "Truncated escape sequence in string literal."; throw StaticError(filename, begin, msg); } default: { std::stringstream ss; ss << "Unknown escape sequence in string literal: '" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } break; // Treat as a regular letter, but maintain line/column counters. case '\n': line_number++; line_start = c+1; data += *c; break; default: // Just a regular letter. data += *c; } } kind = Token::STRING; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; *c != '\0' ; ++c) { if (!is_identifier(*c)) { break; } id += *c; } --c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; data = id; } } else if (is_symbol(*c)) { // Single line C++ style comment if (*c == '/' && *(c+1) == '/') { while (*c != '\0' && *c != '\n') { ++c; } // Leaving it on the \n allows processing of \n on next iteration, // i.e. managing of the line & column counter. c--; continue; } // Single line # comment if (*c == '#') { while (*c != '\0' && *c != '\n') { ++c; } // Leaving it on the \n allows processing of \n on next iteration, // i.e. managing of the line & column counter. c--; continue; } // Multi-line comment. if (*c == '/' && *(c+1) == '*') { c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (*c != '\0' && !(*c == '*' && *(c+1) == '/')) { if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } // Leave the counter on the closing /. c++; continue; } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') ++c; // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 2; // Leave on the last | data = block.str(); kind = Token::STRING; break; } } break; // Out of the switch. } for (; *c != '\0' ; ++c) { if (!is_symbol(*c)) { break; } data += *c; } --c; kind = Token::OPERATOR; } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } break; } Location end(line_number, c - line_start + 1); r.push_back(Token(kind, data, LocationRange(filename, begin, end))); } Location end(line_number, c - line_start + 1); r.push_back(Token(Token::END_OF_FILE, "", LocationRange(filename, end, end))); return r; }
void assertFailsForSelection(SourceLocation from, SourceLocation to) { ASSERT_THROW(getFunctionFromAstInSelection(func->getASTContext(), LocationRange(from, to)), ExtractMethodError) << "[" << from.row << ", " << from.col << "; " << to.row << ", " << to.col << ")"; }
void assertFunctionContainsSelection(const std::string& name, SourceLocation from, SourceLocation to) { ASSERT_EQ(name, getFunctionFromAstInSelection(func->getASTContext(), LocationRange(from, to)).getNameAsString()) << "[" << from.row << ", " << from.col << "; " << to.row << ", " << to.col << ")"; }
TEST_F(getStmtLocationRangeTest, should_handle_multiline_statements) { auto range = getRangeFromSource("void dummy_function__() {\n int\n x;\n}"); ASSERT_EQ(LocationRange(rowCol(1, 2), rowCol(2, 3)), range); }
void expectStmtRangeIs(LocationRange range, const std::string& phrase) { auto phraseOffset = parsedSource.find(phrase); ASSERT_EQ(LocationRange(rowCol(0, phraseOffset), rowCol(0, phraseOffset + phrase.length())), range) << (*func->stmts())->getStmtClassName(); }
void desugar(AST *&ast_, unsigned obj_level) { if (auto *ast = dynamic_cast<Apply*>(ast_)) { desugar(ast->target, obj_level); for (Apply::Arg &arg : ast->args) desugar(arg.expr, obj_level); } else if (auto *ast = dynamic_cast<ApplyBrace*>(ast_)) { desugar(ast->left, obj_level); desugar(ast->right, obj_level); ast_ = alloc->make<Binary>(ast->location, ast->openFodder, ast->left, EF, BOP_PLUS, ast->right); } else if (auto *ast = dynamic_cast<Array*>(ast_)) { for (auto &el : ast->elements) desugar(el.expr, obj_level); } else if (auto *ast = dynamic_cast<ArrayComprehension*>(ast_)) { for (ComprehensionSpec &spec : ast->specs) desugar(spec.expr, obj_level); desugar(ast->body, obj_level + 1); int n = ast->specs.size(); AST *zero = make<LiteralNumber>(E, EF, "0.0"); AST *one = make<LiteralNumber>(E, EF, "1.0"); auto *_r = id(U"$r"); auto *_l = id(U"$l"); std::vector<const Identifier*> _i(n); for (int i = 0; i < n ; ++i) { StringStream ss; ss << U"$i_" << i; _i[i] = id(ss.str()); } std::vector<const Identifier*> _aux(n); for (int i = 0; i < n ; ++i) { StringStream ss; ss << U"$aux_" << i; _aux[i] = id(ss.str()); } // Build it from the inside out. We keep wrapping 'in' with more ASTs. assert(ast->specs[0].kind == ComprehensionSpec::FOR); int last_for = n - 1; while (ast->specs[last_for].kind != ComprehensionSpec::FOR) last_for--; // $aux_{last_for}($i_{last_for} + 1, $r + [body]) AST *in = make<Apply>( ast->body->location, EF, var(_aux[last_for]), EF, Apply::Args { { make<Binary>(E, EF, var(_i[last_for]), EF, BOP_PLUS, one), EF}, { make<Binary>(E, EF, var(_r), EF, BOP_PLUS, singleton(ast->body)), EF} }, false, // trailingComma EF, EF, true // tailstrict ); for (int i = n - 1; i >= 0 ; --i) { const ComprehensionSpec &spec = ast->specs[i]; AST *out; if (i > 0) { int prev_for = i - 1; while (ast->specs[prev_for].kind != ComprehensionSpec::FOR) prev_for--; // aux_{prev_for}($i_{prev_for} + 1, $r) out = make<Apply>( // False branch. E, EF, var(_aux[prev_for]), EF, Apply::Args { { make<Binary>(E, EF, var(_i[prev_for]), EF, BOP_PLUS, one), EF, }, { var(_r), EF, } }, false, // trailingComma EF, EF, true // tailstrict ); } else { out = var(_r); } switch (spec.kind) { case ComprehensionSpec::IF: { /* if [[[...cond...]]] then [[[...in...]]] else [[[...out...]]] */ in = make<Conditional>( ast->location, EF, spec.expr, EF, in, // True branch. EF, out); // False branch. } break; case ComprehensionSpec::FOR: { /* local $l = [[[...array...]]] aux_{i}(i_{i}, r) = if i_{i} >= std.length($l) then [[[...out...]]] else local [[[...var...]]] = $l[i_{i}]; [[[...in...]]];` if std.type($l) != "array" then error "In comprehension, can only iterate over array.." else aux_{i}(0, r) tailstrict; */ in = make<Local>( ast->location, EF, Local::Binds { bind(_l, spec.expr), // Need to check expr is an array bind(_aux[i], make<Function>( ast->location, EF, EF, std::vector<Param>{Param(EF, _i[i], EF), Param(EF, _r, EF)}, false, // trailingComma EF, make<Conditional>( ast->location, EF, make<Binary>( E, EF, var(_i[i]), EF, BOP_GREATER_EQ, length(var(_l))), EF, out, EF, make<Local>( ast->location, EF, singleBind( spec.var, make<Index>(E, EF, var(_l), EF, false, var(_i[i]), EF, nullptr, EF, nullptr, EF) ), in) ) ))}, make<Conditional>( ast->location, EF, equals(ast->location, type(var(_l)), str(U"array")), EF, make<Apply>( E, EF, var(_aux[i]), EF, Apply::Args { {zero, EF}, { i == 0 ? make<Array>(E, EF, Array::Elements{}, false, EF) : static_cast<AST*>(var(_r)), EF, } }, false, // trailingComma EF, EF, true), // tailstrict EF, error(ast->location, U"In comprehension, can only iterate over array."))); } break; } } ast_ = in; } else if (auto *ast = dynamic_cast<Assert*>(ast_)) { desugar(ast->cond, obj_level); if (ast->message == nullptr) { ast->message = str(U"Assertion failed."); } desugar(ast->message, obj_level); desugar(ast->rest, obj_level); // if cond then rest else error msg AST *branch_false = alloc->make<Error>(ast->location, EF, ast->message); ast_ = alloc->make<Conditional>(ast->location, ast->openFodder, ast->cond, EF, ast->rest, EF, branch_false); } else if (auto *ast = dynamic_cast<Binary*>(ast_)) { desugar(ast->left, obj_level); desugar(ast->right, obj_level); bool invert = false; switch (ast->op) { case BOP_PERCENT: { AST *f_mod = alloc->make<Index>(E, EF, std(), EF, false, str(U"mod"), EF, nullptr, EF, nullptr, EF); Apply::Args args = {{ast->left, EF}, {ast->right, EF}}; ast_ = alloc->make<Apply>(ast->location, ast->openFodder, f_mod, EF, args, false, EF, EF, false); } break; case BOP_MANIFEST_UNEQUAL: invert = true; case BOP_MANIFEST_EQUAL: { ast_ = equals(ast->location, ast->left, ast->right); if (invert) ast_ = alloc->make<Unary>(ast->location, ast->openFodder, UOP_NOT, ast_); } break; default:; // Otherwise don't change it. } } else if (dynamic_cast<const BuiltinFunction*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<Conditional*>(ast_)) { desugar(ast->cond, obj_level); desugar(ast->branchTrue, obj_level); if (ast->branchFalse == nullptr) ast->branchFalse = alloc->make<LiteralNull>(LocationRange(), EF); desugar(ast->branchFalse, obj_level); } else if (auto *ast = dynamic_cast<Dollar*>(ast_)) { if (obj_level == 0) { throw StaticError(ast->location, "No top-level object found."); } ast_ = alloc->make<Var>(ast->location, EF, alloc->makeIdentifier(U"$")); } else if (auto *ast = dynamic_cast<Error*>(ast_)) { desugar(ast->expr, obj_level); } else if (auto *ast = dynamic_cast<Function*>(ast_)) { desugar(ast->body, obj_level); } else if (dynamic_cast<const Import*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const Importstr*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<Index*>(ast_)) { desugar(ast->target, obj_level); if (ast->isSlice) { if (ast->index == nullptr) ast->index = make<LiteralNull>(ast->location, EF); desugar(ast->index, obj_level); if (ast->end == nullptr) ast->end = make<LiteralNull>(ast->location, EF); desugar(ast->end, obj_level); if (ast->step == nullptr) ast->step = make<LiteralNull>(ast->location, EF); desugar(ast->step, obj_level); ast_ = make<Apply>( ast->location, EF, make<Index>( E, EF, std(), EF, false, str(U"slice"), EF, nullptr, EF, nullptr, EF), EF, std::vector<Apply::Arg>{ {ast->target, EF}, {ast->index, EF}, {ast->end, EF}, {ast->step, EF}, }, false, // trailing comma EF, EF, false // tailstrict ); } else { if (ast->id != nullptr) { assert(ast->index == nullptr); ast->index = str(ast->id->name); ast->id = nullptr; } desugar(ast->index, obj_level); } } else if (auto *ast = dynamic_cast<Local*>(ast_)) { for (auto &bind: ast->binds) desugar(bind.body, obj_level); desugar(ast->body, obj_level); for (auto &bind: ast->binds) { if (bind.functionSugar) { bind.body = alloc->make<Function>( ast->location, ast->openFodder, bind.parenLeftFodder, bind.params, false, bind.parenRightFodder, bind.body); bind.functionSugar = false; bind.params.clear(); } } } else if (dynamic_cast<const LiteralBoolean*>(ast_)) { // Nothing to do. } else if (dynamic_cast<const LiteralNumber*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<LiteralString*>(ast_)) { if (ast->tokenKind != LiteralString::BLOCK) { ast->value = jsonnet_string_unescape(ast->location, ast->value); } ast->tokenKind = LiteralString::DOUBLE; ast->blockIndent.clear(); } else if (dynamic_cast<const LiteralNull*>(ast_)) { // Nothing to do. } else if (auto *ast = dynamic_cast<DesugaredObject*>(ast_)) { for (auto &field : ast->fields) { desugar(field.name, obj_level); desugar(field.body, obj_level + 1); } for (AST *assert : ast->asserts) { desugar(assert, obj_level + 1); } } else if (auto *ast = dynamic_cast<Object*>(ast_)) { // Hidden variable to allow outer/top binding. if (obj_level == 0) { const Identifier *hidden_var = alloc->makeIdentifier(U"$"); auto *body = alloc->make<Self>(E, EF); ast->fields.push_back(ObjectField::Local(EF, EF, hidden_var, EF, body, EF)); } desugarFields(ast, ast->fields, obj_level); DesugaredObject::Fields new_fields; ASTs new_asserts; for (const ObjectField &field : ast->fields) { if (field.kind == ObjectField::ASSERT) { new_asserts.push_back(field.expr2); } else if (field.kind == ObjectField::FIELD_EXPR) { new_fields.emplace_back(field.hide, field.expr1, field.expr2); } else { std::cerr << "INTERNAL ERROR: field should have been desugared: " << field.kind << std::endl; } } ast_ = alloc->make<DesugaredObject>(ast->location, new_asserts, new_fields); } else if (auto *ast = dynamic_cast<ObjectComprehension*>(ast_)) { // Hidden variable to allow outer/top binding. if (obj_level == 0) { const Identifier *hidden_var = alloc->makeIdentifier(U"$"); auto *body = alloc->make<Self>(E, EF); ast->fields.push_back(ObjectField::Local(EF, EF, hidden_var, EF, body, EF)); } desugarFields(ast, ast->fields, obj_level); for (ComprehensionSpec &spec : ast->specs) desugar(spec.expr, obj_level); AST *field = ast->fields.front().expr1; AST *value = ast->fields.front().expr2; /* { [arr[0]]: local x = arr[1], y = arr[2], z = arr[3]; val_expr for arr in [ [key_expr, x, y, z] for ... ] } */ auto *_arr = id(U"$arr"); AST *zero = make<LiteralNumber>(E, EF, "0.0"); int counter = 1; Local::Binds binds; Array::Elements arr_e {Array::Element(field, EF)}; for (ComprehensionSpec &spec : ast->specs) { if (spec.kind == ComprehensionSpec::FOR) { std::stringstream num; num << counter++; binds.push_back(bind( spec.var, make<Index>(E, EF, var(_arr), EF, false, make<LiteralNumber>(E, EF, num.str()), EF, nullptr, EF, nullptr, EF))); arr_e.emplace_back(var(spec.var), EF); } } AST *arr = make<ArrayComprehension>( ast->location, EF, make<Array>(ast->location, EF, arr_e, false, EF), EF, false, ast->specs, EF); desugar(arr, obj_level); ast_ = make<ObjectComprehensionSimple>( ast->location, make<Index>(E, EF, var(_arr), EF, false, zero, EF, nullptr, EF, nullptr, EF), make<Local>( ast->location, EF, binds, value), _arr, arr); } else if (auto *ast = dynamic_cast<ObjectComprehensionSimple*>(ast_)) { desugar(ast->field, obj_level); desugar(ast->value, obj_level + 1); desugar(ast->array, obj_level); } else if (auto *ast = dynamic_cast<Parens*>(ast_)) { // Strip parens. desugar(ast->expr, obj_level); ast_ = ast->expr; } else if (dynamic_cast<const Self*>(ast_)) { // Nothing to do. } else if (auto * ast = dynamic_cast<SuperIndex*>(ast_)) { if (ast->id != nullptr) { assert(ast->index == nullptr); ast->index = str(ast->id->name); ast->id = nullptr; } desugar(ast->index, obj_level); } else if (auto *ast = dynamic_cast<Unary*>(ast_)) { desugar(ast->expr, obj_level); } else if (dynamic_cast<const Var*>(ast_)) { // Nothing to do. } else { std::cerr << "INTERNAL ERROR: Unknown AST: " << ast_ << std::endl; std::abort(); } }
Tokens jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; Tokens r; const char *c = input; Fodder fodder; bool fresh_line = true; // Are we tokenizing from the beginning of a new line? while (*c!='\0') { Token::Kind kind; std::string data; std::string string_block_indent; std::string string_block_term_indent; unsigned new_lines, indent; lex_ws(c, new_lines, indent, line_start, line_number); // If it's the end of the file, discard final whitespace. if (*c == '\0') break; if (new_lines > 0) { // Otherwise store whitespace in fodder. unsigned blanks = new_lines - 1; fodder.emplace_back(FodderElement::LINE_END, blanks, indent, EMPTY); fresh_line = true; } Location begin(line_number, c - line_start + 1); switch (*c) { // The following operators should never be combined with subsequent symbols. case '{': kind = Token::BRACE_L; c++; break; case '}': kind = Token::BRACE_R; c++; break; case '[': kind = Token::BRACKET_L; c++; break; case ']': kind = Token::BRACKET_R; c++; break; case ',': kind = Token::COMMA; c++; break; case '.': kind = Token::DOT; c++; break; case '(': kind = Token::PAREN_L; c++; break; case ')': kind = Token::PAREN_R; c++; break; case ';': kind = Token::SEMICOLON; c++; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the ". kind = Token::STRING_DOUBLE; } break; // String literals. case '\'': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '\'') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the '. kind = Token::STRING_SINGLE; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; is_identifier(*c); ++c) id += *c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; } data = id; } else if (is_symbol(*c) || *c == '#') { // Single line C++ and Python style comments. if (*c == '#' || (*c == '/' && *(c+1) == '/')) { std::vector<std::string> comment(1); unsigned blanks; unsigned indent; lex_until_newline(c, comment[0], blanks, indent, line_start, line_number); auto kind = fresh_line ? FodderElement::PARAGRAPH : FodderElement::LINE_END; fodder.emplace_back(kind, blanks, indent, comment); fresh_line = true; continue; // We've not got a token, just fodder, so keep scanning. } // Multi-line C style comment. if (*c == '/' && *(c+1) == '*') { unsigned margin = c - line_start; const char *initial_c = c; c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (!(*c == '*' && *(c+1) == '/')) { if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } c += 2; // Move the pointer to the char after the closing '/'. std::string comment(initial_c, c - initial_c); // Includes the "/*" and "*/". // Lex whitespace after comment unsigned new_lines_after, indent_after; lex_ws(c, new_lines_after, indent_after, line_start, line_number); std::vector<std::string> lines; if (comment.find('\n') >= comment.length()) { // Comment looks like /* foo */ lines.push_back(comment); fodder.emplace_back(FodderElement::INTERSTITIAL, 0, 0, lines); if (new_lines_after > 0) { fodder.emplace_back(FodderElement::LINE_END, new_lines_after - 1, indent_after, EMPTY); fresh_line = true; } } else { lines = line_split(comment, margin); assert(lines[0][0] == '/'); // Little hack to support PARAGRAPHs with * down the LHS: // Add a space to lines that start with a '*' bool all_star = true; for (auto &l : lines) { if (l[0] != '*') all_star = false; } if (all_star) { for (auto &l : lines) { if (l[0] == '*') l = " " + l; } } if (new_lines_after == 0) { // Ensure a line end after the paragraph. new_lines_after = 1; indent_after = 0; } if (!fresh_line) // Ensure a line end before the comment. fodder.emplace_back(FodderElement::LINE_END, 0, 0, EMPTY); fodder.emplace_back(FodderElement::PARAGRAPH, new_lines_after - 1, indent_after, lines); fresh_line = true; } continue; // We've not got a token, just fodder, so keep scanning. } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; // Skip any blank lines at the beginning of the block. while (*c == '\n') { line_number++; ++c; block << '\n'; } line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); string_block_indent = std::string(first_line, ws_chars); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Skip any blank lines while (*c == '\n') { line_number++; ++c; block << '\n'; } // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') { string_block_term_indent += *c; ++c; } // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 3; // Leave after the last | data = block.str(); kind = Token::STRING_BLOCK; break; // Out of the while loop. } } break; // Out of the switch. } const char *operator_begin = c; for (; is_symbol(*c) ; ++c) { // Not allowed // in operators if (*c == '/' && *(c+1) == '/') break; // Not allowed /* in operators if (*c == '/' && *(c+1) == '*') break; // Not allowed ||| in operators if (*c == '|' && *(c+1) == '|' && *(c+2) == '|') break; } // Not allowed to end with a + - ~ ! unless a single char. // So, wind it back if we need to (but not too far). while (c > operator_begin + 1 && (*(c-1) == '+' || *(c-1) == '-' || *(c-1) == '~' || *(c-1) == '!')) { c--; } data += std::string(operator_begin, c); if (data == "$") { kind = Token::DOLLAR; data = ""; } else { kind = Token::OPERATOR; } } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } Location end(line_number, c - line_start); r.emplace_back(kind, fodder, data, string_block_indent, string_block_term_indent, LocationRange(filename, begin, end)); fodder.clear(); fresh_line = false; } Location end(line_number, c - line_start + 1); r.emplace_back(Token::END_OF_FILE, fodder, "", "", "", LocationRange(filename, end, end)); return r; }