/// Turn a sequence of our tokens back into a string that we can hand /// to the MC asm parser. static bool buildMSAsmString(Preprocessor &PP, SourceLocation AsmLoc, ArrayRef<Token> AsmToks, SmallVectorImpl<unsigned> &TokOffsets, SmallString<512> &Asm) { assert(!AsmToks.empty() && "Didn't expect an empty AsmToks!"); // Is this the start of a new assembly statement? bool isNewStatement = true; for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) { const Token &Tok = AsmToks[i]; // Start each new statement with a newline and a tab. if (!isNewStatement && (Tok.is(tok::kw_asm) || Tok.isAtStartOfLine())) { Asm += "\n\t"; isNewStatement = true; } // Preserve the existence of leading whitespace except at the // start of a statement. if (!isNewStatement && Tok.hasLeadingSpace()) Asm += ' '; // Remember the offset of this token. TokOffsets.push_back(Asm.size()); // Don't actually write '__asm' into the assembly stream. if (Tok.is(tok::kw_asm)) { // Complain about __asm at the end of the stream. if (i + 1 == e) { PP.Diag(AsmLoc, diag::err_asm_empty); return true; } continue; } // Append the spelling of the token. SmallString<32> SpellingBuffer; bool SpellingInvalid = false; Asm += PP.getSpelling(Tok, SpellingBuffer, &SpellingInvalid); assert(!SpellingInvalid && "spelling was invalid after correct parse?"); // We are no longer at the start of a statement. isNewStatement = false; } // Ensure that the buffer is null-terminated. Asm.push_back('\0'); Asm.pop_back(); assert(TokOffsets.size() == AsmToks.size()); return false; }
/// GetFirstChar - Get the first character of the token \arg Tok, /// avoiding calls to getSpelling where possible. static char GetFirstChar(Preprocessor &PP, const Token &Tok) { if (IdentifierInfo *II = Tok.getIdentifierInfo()) { // Avoid spelling identifiers, the most common form of token. return II->getNameStart()[0]; } else if (!Tok.needsCleaning()) { if (Tok.isLiteral() && Tok.getLiteralData()) { return *Tok.getLiteralData(); } else { SourceManager &SM = PP.getSourceManager(); return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); } } else if (Tok.getLength() < 256) { char Buffer[256]; const char *TokPtr = Buffer; PP.getSpelling(Tok, TokPtr); return TokPtr[0]; } else { return PP.getSpelling(Tok)[0]; } }
void clang::DoRewriteTest(Preprocessor &PP, raw_ostream* OS) { SourceManager &SM = PP.getSourceManager(); const LangOptions &LangOpts = PP.getLangOptions(); TokenRewriter Rewriter(SM.getMainFileID(), SM, LangOpts); // Throw <i> </i> tags around comments. for (TokenRewriter::token_iterator I = Rewriter.token_begin(), E = Rewriter.token_end(); I != E; ++I) { if (I->isNot(tok::comment)) continue; Rewriter.AddTokenBefore(I, "<i>"); Rewriter.AddTokenAfter(I, "</i>"); } // Print out the output. for (TokenRewriter::token_iterator I = Rewriter.token_begin(), E = Rewriter.token_end(); I != E; ++I) *OS << PP.getSpelling(*I); }
/// FindExpectedDiags - Lex the main source file to find all of the // expected errors and warnings. static void FindExpectedDiags(Preprocessor &PP, DiagList &ExpectedErrors, DiagList &ExpectedWarnings, DiagList &ExpectedNotes) { // Create a raw lexer to pull all the comments out of the main file. We don't // want to look in #include'd headers for expected-error strings. FileID FID = PP.getSourceManager().getMainFileID(); // Create a lexer to lex all the tokens of the main file in raw mode. Lexer RawLex(FID, PP.getSourceManager(), PP.getLangOptions()); // Return comments as tokens, this is how we find expected diagnostics. RawLex.SetCommentRetentionState(true); Token Tok; Tok.setKind(tok::comment); while (Tok.isNot(tok::eof)) { RawLex.Lex(Tok); if (!Tok.is(tok::comment)) continue; std::string Comment = PP.getSpelling(Tok); if (Comment.empty()) continue; // Find all expected errors. FindDiagnostics(&Comment[0], Comment.size(), ExpectedErrors, PP, Tok.getLocation(), "expected-error"); // Find all expected warnings. FindDiagnostics(&Comment[0], Comment.size(), ExpectedWarnings, PP, Tok.getLocation(), "expected-warning"); // Find all expected notes. FindDiagnostics(&Comment[0], Comment.size(), ExpectedNotes, PP, Tok.getLocation(), "expected-note"); }; }
/// EvaluateHasIncludeCommon - Process a '__has_include("path")' /// or '__has_include_next("path")' expression. /// Returns true if successful. static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, Preprocessor &PP, const DirectoryLookup *LookupFrom) { SourceLocation LParenLoc; // Get '('. PP.LexNonComment(Tok); // Ensure we have a '('. if (Tok.isNot(tok::l_paren)) { PP.Diag(Tok.getLocation(), diag::err_pp_missing_lparen) << II->getName(); return false; } // Save '(' location for possible missing ')' message. LParenLoc = Tok.getLocation(); // Get the file name. PP.getCurrentLexer()->LexIncludeFilename(Tok); // Reserve a buffer to get the spelling. llvm::SmallString<128> FilenameBuffer; StringRef Filename; SourceLocation EndLoc; switch (Tok.getKind()) { case tok::eod: // If the token kind is EOD, the error has already been diagnosed. return false; case tok::angle_string_literal: case tok::string_literal: { bool Invalid = false; Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); if (Invalid) return false; break; } case tok::less: // This could be a <foo/bar.h> file coming from a macro expansion. In this // case, glue the tokens together into FilenameBuffer and interpret those. FilenameBuffer.push_back('<'); if (PP.ConcatenateIncludeName(FilenameBuffer, EndLoc)) return false; // Found <eod> but no ">"? Diagnostic already emitted. Filename = FilenameBuffer.str(); break; default: PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename); return false; } bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); // If GetIncludeFilenameSpelling set the start ptr to null, there was an // error. if (Filename.empty()) return false; // Search include directories. const DirectoryLookup *CurDir; const FileEntry *File = PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL); // Get the result value. Result = true means the file exists. bool Result = File != 0; // Get ')'. PP.LexNonComment(Tok); // Ensure we have a trailing ). if (Tok.isNot(tok::r_paren)) { PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName(); PP.Diag(LParenLoc, diag::note_matching) << "("; return false; } return Result; }
void BlankPragmaHandler::HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken) { SourceLocation loc = FirstToken.getLocation(); loc = comp.getSourceManager().getFileLoc(loc); FileID fileID = comp.getSourceManager().getFileID(loc); int line = comp.getSourceManager().getLineNumber(fileID, comp.getSourceManager().getFileOffset(loc)); Token &Tok = FirstToken; PP.LexNonComment(Tok); #define msg "Handler stub directive syntax error" std::string tokStr = Tok.getIdentifierInfo()->getName(); PragmaHandlerStub *curPragma = new PragmaHandlerStub; curPragma->line = line; curPragma->isAcross = false; //note: tell Pritula dvm src are incorrect PP.LexNonComment(Tok); while (Tok.isAnyIdentifier()) { std::string clauseName = Tok.getIdentifierInfo()->getName(); PP.LexNonComment(Tok); PP.LexNonComment(Tok); if (clauseName == "dvm_array") { while (Tok.isAnyIdentifier()) { tokStr = Tok.getIdentifierInfo()->getName(); curPragma->dvmArrays.insert(tokStr); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); } } } else if (clauseName == "regular_array") { while (Tok.isAnyIdentifier()) { tokStr = Tok.getIdentifierInfo()->getName(); curPragma->regArrays.insert(tokStr); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); } } } else if (clauseName == "scalar") { while (Tok.isAnyIdentifier()) { tokStr = Tok.getIdentifierInfo()->getName(); curPragma->scalars.insert(tokStr); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); } } } else if (clauseName == "loop_var") { while (Tok.isAnyIdentifier()) { LoopVarDesc loopVar; tokStr = Tok.getIdentifierInfo()->getName(); loopVar.name = tokStr; PP.LexNonComment(Tok); PP.LexNonComment(Tok); tokStr = PP.getSpelling(Tok); loopVar.stepSign = atoi(tokStr.c_str()); PP.LexNonComment(Tok); PP.LexNonComment(Tok); if (Tok.isNot(tok::r_paren)) { tokStr = PP.getSpelling(Tok); loopVar.constStep = tokStr; PP.LexNonComment(Tok); } curPragma->loopVars.push_back(loopVar); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); //note: tell Pritula dvm src are incorrect } } } else if (clauseName == "reduction") { while (Tok.isAnyIdentifier()) { ClauseReduction red; tokStr = Tok.getIdentifierInfo()->getName(); red.redType = ClauseReduction::guessRedType(tokStr); PP.LexNonComment(Tok); PP.LexNonComment(Tok); red.arrayName = Tok.getIdentifierInfo()->getName(); PP.LexNonComment(Tok); if (red.isLoc()) { PP.LexNonComment(Tok); red.locName = Tok.getIdentifierInfo()->getName(); PP.LexNonComment(Tok); PP.LexNonComment(Tok); tokStr = PP.getSpelling(Tok); red.locSize.strExpr = tokStr; PP.LexNonComment(Tok); } curPragma->reductions.push_back(red); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); } } } else if (clauseName == "private") { while (Tok.isAnyIdentifier()) { tokStr = Tok.getIdentifierInfo()->getName(); curPragma->privates.insert(tokStr); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); } } } else if (clauseName == "weird_rma") { while (Tok.isAnyIdentifier()) { tokStr = Tok.getIdentifierInfo()->getName(); curPragma->weirdRmas.insert(tokStr); PP.LexNonComment(Tok); if (Tok.is(tok::comma)) { PP.LexNonComment(Tok); } } } else if (clauseName == "across") { curPragma->isAcross = true; } else if (clauseName == "remote_access") { int depth = 1; while (Tok.isNot(tok::r_paren) || depth > 1) { if (Tok.is(tok::l_paren)) depth++; if (Tok.is(tok::r_paren)) depth--; PP.LexNonComment(Tok); } } else { } PP.LexNonComment(Tok); if (Tok.is(tok::comma)) PP.LexNonComment(Tok); } pragmas[line] = curPragma; #undef msg }
/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and /// return the computed value in Result. Return true if there was an error /// parsing. This function also returns information about the form of the /// expression in DT. See above for information on what DT means. /// /// If ValueLive is false, then this value is being evaluated in a context where /// the result is not used. As such, avoid diagnostics that relate to /// evaluation. static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, bool ValueLive, Preprocessor &PP) { DT.State = DefinedTracker::Unknown; if (PeekTok.is(tok::code_completion)) { if (PP.getCodeCompletionHandler()) PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression(); PP.setCodeCompletionReached(); PP.LexNonComment(PeekTok); } // If this token's spelling is a pp-identifier, check to see if it is // 'defined' or if it is a macro. Note that we check here because many // keywords are pp-identifiers, so we can't check the kind. if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) { // Handle "defined X" and "defined(X)". if (II->isStr("defined")) return(EvaluateDefined(Result, PeekTok, DT, ValueLive, PP)); // If this identifier isn't 'defined' or one of the special // preprocessor keywords and it wasn't macro expanded, it turns // into a simple 0, unless it is the C++ keyword "true", in which case it // turns into "1". if (ValueLive && II->getTokenID() != tok::kw_true && II->getTokenID() != tok::kw_false) PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II; Result.Val = II->getTokenID() == tok::kw_true; Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0. Result.setRange(PeekTok.getLocation()); PP.LexNonComment(PeekTok); return false; } switch (PeekTok.getKind()) { default: // Non-value token. PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr); return true; case tok::eod: case tok::r_paren: // If there is no expression, report and exit. PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr); return true; case tok::numeric_constant: { SmallString<64> IntegerBuffer; bool NumberInvalid = false; StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, &NumberInvalid); if (NumberInvalid) return true; // a diagnostic was already reported NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), PP); if (Literal.hadError) return true; // a diagnostic was already reported. if (Literal.isFloatingLiteral() || Literal.isImaginary) { PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal); return true; } assert(Literal.isIntegerLiteral() && "Unknown ppnumber"); // Complain about, and drop, any ud-suffix. if (Literal.hasUDSuffix()) PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*integer*/1; // 'long long' is a C99 or C++11 feature. if (!PP.getLangOpts().C99 && Literal.isLongLong) { if (PP.getLangOpts().CPlusPlus) PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus0x ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else PP.Diag(PeekTok, diag::ext_c99_longlong); } // Parse the integer literal into Result. if (Literal.GetIntegerValue(Result.Val)) { // Overflow parsing integer literal. if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large); Result.Val.setIsUnsigned(true); } else { // Set the signedness of the result to match whether there was a U suffix // or not. Result.Val.setIsUnsigned(Literal.isUnsigned); // Detect overflow based on whether the value is signed. If signed // and if the value is too large, emit a warning "integer constant is so // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t // is 64-bits. if (!Literal.isUnsigned && Result.Val.isNegative()) { // Don't warn for a hex literal: 0x8000..0 shouldn't warn. if (ValueLive && Literal.getRadix() != 16) PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed); Result.Val.setIsUnsigned(true); } } // Consume the token. Result.setRange(PeekTok.getLocation()); PP.LexNonComment(PeekTok); return false; } case tok::char_constant: // 'x' case tok::wide_char_constant: { // L'x' case tok::utf16_char_constant: // u'x' case tok::utf32_char_constant: // U'x' // Complain about, and drop, any ud-suffix. if (PeekTok.hasUDSuffix()) PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0; SmallString<32> CharBuffer; bool CharInvalid = false; StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid); if (CharInvalid) return true; CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), PeekTok.getLocation(), PP, PeekTok.getKind()); if (Literal.hadError()) return true; // A diagnostic was already emitted. // Character literals are always int or wchar_t, expand to intmax_t. const TargetInfo &TI = PP.getTargetInfo(); unsigned NumBits; if (Literal.isMultiChar()) NumBits = TI.getIntWidth(); else if (Literal.isWide()) NumBits = TI.getWCharWidth(); else if (Literal.isUTF16()) NumBits = TI.getChar16Width(); else if (Literal.isUTF32()) NumBits = TI.getChar32Width(); else NumBits = TI.getCharWidth(); // Set the width. llvm::APSInt Val(NumBits); // Set the value. Val = Literal.getValue(); // Set the signedness. UTF-16 and UTF-32 are always unsigned if (!Literal.isUTF16() && !Literal.isUTF32()) Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned); if (Result.Val.getBitWidth() > Val.getBitWidth()) { Result.Val = Val.extend(Result.Val.getBitWidth()); } else { assert(Result.Val.getBitWidth() == Val.getBitWidth() && "intmax_t smaller than char/wchar_t?"); Result.Val = Val; } // Consume the token. Result.setRange(PeekTok.getLocation()); PP.LexNonComment(PeekTok); return false; } case tok::l_paren: { SourceLocation Start = PeekTok.getLocation(); PP.LexNonComment(PeekTok); // Eat the (. // Parse the value and if there are any binary operators involved, parse // them. if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; // If this is a silly value like (X), which doesn't need parens, check for // !(defined X). if (PeekTok.is(tok::r_paren)) { // Just use DT unmodified as our result. } else { // Otherwise, we have something like (x+y), and we consumed '(x'. if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP)) return true; if (PeekTok.isNot(tok::r_paren)) { PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen) << Result.getRange(); PP.Diag(Start, diag::note_matching) << "("; return true; } DT.State = DefinedTracker::Unknown; } Result.setRange(Start, PeekTok.getLocation()); PP.LexNonComment(PeekTok); // Eat the ). return false; } case tok::plus: { SourceLocation Start = PeekTok.getLocation(); // Unary plus doesn't modify the value. PP.LexNonComment(PeekTok); if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; Result.setBegin(Start); return false; } case tok::minus: { SourceLocation Loc = PeekTok.getLocation(); PP.LexNonComment(PeekTok); if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; Result.setBegin(Loc); // C99 6.5.3.3p3: The sign of the result matches the sign of the operand. Result.Val = -Result.Val; // -MININT is the only thing that overflows. Unsigned never overflows. bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue(); // If this operator is live and overflowed, report the issue. if (Overflow && ValueLive) PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange(); DT.State = DefinedTracker::Unknown; return false; } case tok::tilde: { SourceLocation Start = PeekTok.getLocation(); PP.LexNonComment(PeekTok); if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; Result.setBegin(Start); // C99 6.5.3.3p4: The sign of the result matches the sign of the operand. Result.Val = ~Result.Val; DT.State = DefinedTracker::Unknown; return false; } case tok::exclaim: { SourceLocation Start = PeekTok.getLocation(); PP.LexNonComment(PeekTok); if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; Result.setBegin(Start); Result.Val = !Result.Val; // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed. Result.Val.setIsUnsigned(false); if (DT.State == DefinedTracker::DefinedMacro) DT.State = DefinedTracker::NotDefinedMacro; else if (DT.State == DefinedTracker::NotDefinedMacro) DT.State = DefinedTracker::DefinedMacro; return false; } // FIXME: Handle #assert } }
/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of /// tokens into the literal string token that should be produced by the C # /// preprocessor operator. If Charify is true, then it should be turned into /// a character literal for the Microsoft charize (#@) extension. /// Token MacroArgs::StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd) { Token Tok; Tok.startToken(); Tok.setKind(Charify ? tok::char_constant : tok::string_literal); const Token *ArgTokStart = ArgToks; // Stringify all the tokens. SmallString<128> Result; Result += "\""; bool isFirst = true; for (; ArgToks->isNot(tok::eof); ++ArgToks) { const Token &Tok = *ArgToks; if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) Result += ' '; isFirst = false; // If this is a string or character constant, escape the token as specified // by 6.10.3.2p2. if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc. Tok.is(tok::char_constant) || // 'x' Tok.is(tok::wide_char_constant) || // L'x'. Tok.is(tok::utf8_char_constant) || // u8'x'. Tok.is(tok::utf16_char_constant) || // u'x'. Tok.is(tok::utf32_char_constant)) { // U'x'. bool Invalid = false; std::string TokStr = PP.getSpelling(Tok, &Invalid); if (!Invalid) { std::string Str = Lexer::Stringify(TokStr); Result.append(Str.begin(), Str.end()); } } else if (Tok.is(tok::code_completion)) { PP.CodeCompleteNaturalLanguage(); } else { // Otherwise, just append the token. Do some gymnastics to get the token // in place and avoid copies where possible. unsigned CurStrLen = Result.size(); Result.resize(CurStrLen+Tok.getLength()); const char *BufPtr = Result.data() + CurStrLen; bool Invalid = false; unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid); if (!Invalid) { // If getSpelling returned a pointer to an already uniqued version of // the string instead of filling in BufPtr, memcpy it onto our string. if (ActualTokLen && BufPtr != &Result[CurStrLen]) memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); // If the token was dirty, the spelling may be shorter than the token. if (ActualTokLen != Tok.getLength()) Result.resize(CurStrLen+ActualTokLen); } } } // If the last character of the string is a \, and if it isn't escaped, this // is an invalid string literal, diagnose it as specified in C99. if (Result.back() == '\\') { // Count the number of consecutive \ characters. If even, then they are // just escaped backslashes, otherwise it's an error. unsigned FirstNonSlash = Result.size()-2; // Guaranteed to find the starting " if nothing else. while (Result[FirstNonSlash] == '\\') --FirstNonSlash; if ((Result.size()-1-FirstNonSlash) & 1) { // Diagnose errors for things like: #define F(X) #X / F(\) PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); Result.pop_back(); // remove one of the \'s. } } Result += '"'; // If this is the charify operation and the result is not a legal character // constant, diagnose it. if (Charify) { // First step, turn double quotes into single quotes: Result[0] = '\''; Result[Result.size()-1] = '\''; // Check for bogus character. bool isBad = false; if (Result.size() == 3) isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. else isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' if (isBad) { PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); Result = "' '"; // Use something arbitrary, but legal. } } PP.CreateString(Result, Tok, ExpansionLocStart, ExpansionLocEnd); return Tok; }
/// HighlightMacros - This uses the macro table state from the end of the /// file, to re-expand macros and insert (into the HTML) information about the /// macro expansions. This won't be perfectly perfect, but it will be /// reasonably close. void html::HighlightMacros(Rewriter &R, FileID FID, Preprocessor& PP) { // Re-lex the raw token stream into a token buffer. const SourceManager &SM = PP.getSourceManager(); std::vector<Token> TokenStream; Lexer L(FID, SM, PP.getLangOptions()); // Lex all the tokens in raw mode, to avoid entering #includes or expanding // macros. while (1) { Token Tok; L.LexFromRawLexer(Tok); // If this is a # at the start of a line, discard it from the token stream. // We don't want the re-preprocess step to see #defines, #includes or other // preprocessor directives. if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) continue; // If this is a ## token, change its kind to unknown so that repreprocessing // it will not produce an error. if (Tok.is(tok::hashhash)) Tok.setKind(tok::unknown); // If this raw token is an identifier, the raw lexer won't have looked up // the corresponding identifier info for it. Do this now so that it will be // macro expanded when we re-preprocess it. if (Tok.is(tok::identifier)) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID()); } TokenStream.push_back(Tok); if (Tok.is(tok::eof)) break; } // Temporarily change the diagnostics object so that we ignore any generated // diagnostics from this pass. IgnoringDiagClient TmpDC; Diagnostic TmpDiags(&TmpDC); Diagnostic *OldDiags = &PP.getDiagnostics(); PP.setDiagnostics(TmpDiags); // Inform the preprocessor that we don't want comments. PP.SetCommentRetentionState(false, false); // Enter the tokens we just lexed. This will cause them to be macro expanded // but won't enter sub-files (because we removed #'s). PP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false); TokenConcatenation ConcatInfo(PP); // Lex all the tokens. Token Tok; PP.Lex(Tok); while (Tok.isNot(tok::eof)) { // Ignore non-macro tokens. if (!Tok.getLocation().isMacroID()) { PP.Lex(Tok); continue; } // Okay, we have the first token of a macro expansion: highlight the // instantiation by inserting a start tag before the macro instantiation and // end tag after it. std::pair<SourceLocation, SourceLocation> LLoc = SM.getInstantiationRange(Tok.getLocation()); // Ignore tokens whose instantiation location was not the main file. if (SM.getFileID(LLoc.first) != FID) { PP.Lex(Tok); continue; } assert(SM.getFileID(LLoc.second) == FID && "Start and end of expansion must be in the same ultimate file!"); std::string Expansion = PP.getSpelling(Tok); unsigned LineLen = Expansion.size(); Token PrevTok = Tok; // Okay, eat this token, getting the next one. PP.Lex(Tok); // Skip all the rest of the tokens that are part of this macro // instantiation. It would be really nice to pop up a window with all the // spelling of the tokens or something. while (!Tok.is(tok::eof) && SM.getInstantiationLoc(Tok.getLocation()) == LLoc.first) { // Insert a newline if the macro expansion is getting large. if (LineLen > 60) { Expansion += "<br>"; LineLen = 0; } LineLen -= Expansion.size(); // If the tokens were already space separated, or if they must be to avoid // them being implicitly pasted, add a space between them. if (Tok.hasLeadingSpace() || ConcatInfo.AvoidConcat(PrevTok, Tok)) Expansion += ' '; // Escape any special characters in the token text. Expansion += EscapeText(PP.getSpelling(Tok)); LineLen += Expansion.size(); PrevTok = Tok; PP.Lex(Tok); } // Insert the expansion as the end tag, so that multi-line macros all get // highlighted. Expansion = "<span class='expansion'>" + Expansion + "</span></span>"; HighlightRange(R, LLoc.first, LLoc.second, "<span class='macro'>", Expansion.c_str()); } // Restore diagnostics object back to its own thing. PP.setDiagnostics(*OldDiags); }
/// RewriteMacrosInInput - Implement -rewrite-macros mode. void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) { SourceManager &SM = PP.getSourceManager(); Rewriter Rewrite; Rewrite.setSourceMgr(SM, PP.getLangOpts()); RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID()); std::vector<Token> RawTokens; LexRawTokensFromMainFile(PP, RawTokens); unsigned CurRawTok = 0; Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false); // Get the first preprocessing token. PP.EnterMainSourceFile(); Token PPTok; PP.Lex(PPTok); // Preprocess the input file in parallel with raw lexing the main file. Ignore // all tokens that are preprocessed from a file other than the main file (e.g. // a header). If we see tokens that are in the preprocessed file but not the // lexed file, we have a macro expansion. If we see tokens in the lexed file // that aren't in the preprocessed view, we have macros that expand to no // tokens, or macro arguments etc. while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) { SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation()); // If PPTok is from a different source file, ignore it. if (!SM.isFromMainFile(PPLoc)) { PP.Lex(PPTok); continue; } // If the raw file hits a preprocessor directive, they will be extra tokens // in the raw file that don't exist in the preprocsesed file. However, we // choose to preserve them in the output file and otherwise handle them // specially. if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) { // If this is a #warning directive or #pragma mark (GNU extensions), // comment the line out. if (RawTokens[CurRawTok].is(tok::identifier)) { const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo(); if (II->getName() == "warning") { // Comment out #warning. RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//"); } else if (II->getName() == "pragma" && RawTokens[CurRawTok+1].is(tok::identifier) && (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() == "mark")) { // Comment out #pragma mark. RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//"); } } // Otherwise, if this is a #include or some other directive, just leave it // in the file by skipping over the line. RawTok = GetNextRawTok(RawTokens, CurRawTok, false); while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof)) RawTok = GetNextRawTok(RawTokens, CurRawTok, false); continue; } // Okay, both tokens are from the same file. Get their offsets from the // start of the file. unsigned PPOffs = SM.getFileOffset(PPLoc); unsigned RawOffs = SM.getFileOffset(RawTok.getLocation()); // If the offsets are the same and the token kind is the same, ignore them. if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) { RawTok = GetNextRawTok(RawTokens, CurRawTok, false); PP.Lex(PPTok); continue; } // If the PP token is farther along than the raw token, something was // deleted. Comment out the raw token. if (RawOffs <= PPOffs) { // Comment out a whole run of tokens instead of bracketing each one with // comments. Add a leading space if RawTok didn't have one. bool HasSpace = RawTok.hasLeadingSpace(); RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]); unsigned EndPos; do { EndPos = RawOffs+RawTok.getLength(); RawTok = GetNextRawTok(RawTokens, CurRawTok, true); RawOffs = SM.getFileOffset(RawTok.getLocation()); if (RawTok.is(tok::comment)) { // Skip past the comment. RawTok = GetNextRawTok(RawTokens, CurRawTok, false); break; } } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() && (PPOffs != RawOffs || !isSameToken(RawTok, PPTok))); RB.InsertTextBefore(EndPos, "*/"); continue; } // Otherwise, there was a replacement an expansion. Insert the new token // in the output buffer. Insert the whole run of new tokens at once to get // them in the right order. unsigned InsertPos = PPOffs; std::string Expansion; while (PPOffs < RawOffs) { Expansion += ' ' + PP.getSpelling(PPTok); PP.Lex(PPTok); PPLoc = SM.getExpansionLoc(PPTok.getLocation()); PPOffs = SM.getFileOffset(PPLoc); } Expansion += ' '; RB.InsertTextBefore(InsertPos, Expansion); } // Get the buffer corresponding to MainFileID. If we haven't changed it, then // we are done. if (const RewriteBuffer *RewriteBuf = Rewrite.getRewriteBufferFor(SM.getMainFileID())) { //printf("Changed:\n"); *OS << std::string(RewriteBuf->begin(), RewriteBuf->end()); } else { fprintf(stderr, "No changes\n"); } OS->flush(); }