/// \brief Tokenizes a string literal, taking into account string interpolation. static void getStringPartTokens(const Token &Tok, const LangOptions &LangOpts, const SourceManager &SM, int BufID, std::vector<Token> &Toks) { assert(Tok.is(tok::string_literal)); bool IsMultiline = Tok.IsMultilineString(); unsigned QuoteLen = IsMultiline ? 3 : 1; SmallVector<Lexer::StringSegment, 4> Segments; Lexer::getStringLiteralSegments(Tok, Segments, /*Diags=*/nullptr); for (unsigned i = 0, e = Segments.size(); i != e; ++i) { Lexer::StringSegment &Seg = Segments[i]; bool isFirst = i == 0; bool isLast = i == e-1; if (Seg.Kind == Lexer::StringSegment::Literal) { SourceLoc Loc = Seg.Loc; unsigned Len = Seg.Length; if (isFirst) { // Include the quote. Loc = Loc.getAdvancedLoc(-QuoteLen); Len += QuoteLen; } if (isLast) { // Include the quote. Len += QuoteLen; } StringRef Text = SM.extractText({ Loc, Len }); Token NewTok; NewTok.setToken(tok::string_literal, Text, IsMultiline); Toks.push_back(NewTok); } else { assert(Seg.Kind == Lexer::StringSegment::Expr && "new enumerator was introduced ?"); unsigned Offset = SM.getLocOffsetInBuffer(Seg.Loc, BufID); unsigned EndOffset = Offset + Seg.Length; if (isFirst) { // Add a token for the quote character. StringRef Text = SM.extractText({ Seg.Loc.getAdvancedLoc(-2), 1 }); Token NewTok; NewTok.setToken(tok::string_literal, Text); Toks.push_back(NewTok); } std::vector<Token> NewTokens = swift::tokenize(LangOpts, SM, BufID, Offset, EndOffset, /*KeepComments=*/true); Toks.insert(Toks.end(), NewTokens.begin(), NewTokens.end()); if (isLast) { // Add a token for the quote character. StringRef Text = SM.extractText({ Seg.Loc.getAdvancedLoc(Seg.Length), 1 }); Token NewTok; NewTok.setToken(tok::string_literal, Text); Toks.push_back(NewTok); } } } }
SingleRawComment::SingleRawComment(CharSourceRange Range, const SourceManager &SourceMgr) : Range(Range), RawText(SourceMgr.extractText(Range)), Kind(static_cast<unsigned>(getCommentKind(RawText))) { auto StartLineAndColumn = SourceMgr.getLineAndColumn(Range.getStart()); StartLine = StartLineAndColumn.first; StartColumn = StartLineAndColumn.second; EndLine = SourceMgr.getLineNumber(Range.getEnd()); }
/// \brief Extract a character immediately before \p Loc. If \p Loc is the /// start of the buffer, return '\f'. static char extractCharBefore(SourceManager &SM, SourceLoc Loc) { // We have to be careful not to go off the front of the buffer. auto bufferID = SM.findBufferContainingLoc(Loc); auto bufferRange = SM.getRangeForBuffer(bufferID); if (bufferRange.getStart() == Loc) return '\f'; auto chars = SM.extractText({Loc.getAdvancedLoc(-1), 1}, bufferID); assert(!chars.empty() && "Couldn't extractText with valid range"); return chars[0]; }
static SourceLoc findEndOfLine(SourceManager &SM, SourceLoc loc, unsigned bufferID) { CharSourceRange entireBuffer = SM.getRangeForBuffer(bufferID); CharSourceRange rangeFromLoc{SM, loc, entireBuffer.getEnd()}; StringRef textFromLoc = SM.extractText(rangeFromLoc); size_t newlineOffset = textFromLoc.find_first_of({"\r\n\0", 3}); if (newlineOffset == StringRef::npos) return entireBuffer.getEnd(); return loc.getAdvancedLoc(newlineOffset); }
SourceCompleteResult ide::isSourceInputComplete(std::unique_ptr<llvm::MemoryBuffer> MemBuf) { SourceManager SM; auto BufferID = SM.addNewSourceBuffer(std::move(MemBuf)); ParserUnit Parse(SM, BufferID); Parser &P = Parse.getParser(); bool Done; do { P.parseTopLevel(); Done = P.Tok.is(tok::eof); } while (!Done); SourceCompleteResult SCR; SCR.IsComplete = !P.isInputIncomplete(); // Use the same code that was in the REPL code to track the indent level // for now. In the future we should get this from the Parser if possible. CharSourceRange entireRange = SM.getRangeForBuffer(BufferID); StringRef Buffer = SM.extractText(entireRange); const char *SourceStart = Buffer.data(); const char *SourceEnd = Buffer.data() + Buffer.size(); const char *LineStart = SourceStart; const char *LineSourceStart = nullptr; uint32_t LineIndent = 0; struct IndentInfo { StringRef Prefix; uint32_t Indent; IndentInfo(const char *s, size_t n, uint32_t i) : Prefix(s, n), Indent(i) {} }; SmallVector<IndentInfo, 4> IndentInfos; for (const char *p = SourceStart; p<SourceEnd; ++p) { switch (*p) { case '\r': case '\n': LineIndent = 0; LineSourceStart = nullptr; LineStart = p + 1; break; case '"': p = skipStringInCode (p, SourceEnd); break; case '{': case '(': case '[': ++LineIndent; if (LineSourceStart == nullptr) IndentInfos.push_back(IndentInfo(LineStart, p - LineStart, LineIndent)); else IndentInfos.push_back(IndentInfo(LineStart, LineSourceStart - LineStart, LineIndent)); break; case '}': case ')': case ']': if (LineIndent > 0) --LineIndent; if (!IndentInfos.empty()) IndentInfos.pop_back(); break; default: if (LineSourceStart == nullptr && !isspace(*p)) LineSourceStart = p; break; } if (*p == '\0') break; } if (!IndentInfos.empty()) { SCR.IndentPrefix = IndentInfos.back().Prefix.str(); // Trim off anything that follows a non-space character const size_t pos = SCR.IndentPrefix.find_first_not_of(" \t"); if (pos != std::string::npos) SCR.IndentPrefix.erase(pos); SCR.IndentLevel = IndentInfos.back().Indent; } return SCR; }
/// \brief Extract a character at \p Loc. If \p Loc is the end of the buffer, /// return '\f'. static char extractCharAfter(SourceManager &SM, SourceLoc Loc) { auto chars = SM.extractText({Loc, 1}); return chars.empty() ? '\f' : chars[0]; }
/// Extract the text for the given expression. static StringRef extractExprText(const Expr *E, SourceManager &SM) { const auto CSR = Lexer::getCharSourceRangeFromSourceRange(SM, E->getSourceRange()); return SM.extractText(CSR); }