/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the /// identifier information for the token and install it into the token, /// updating the token kind accordingly. IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); // Look up this token, see if it is a macro, or if it is a language keyword. IdentifierInfo *II; if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { // No cleaning needed, just use the characters from the lexed buffer. II = getIdentifierInfo(Identifier.getRawIdentifier()); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. SmallString<64> IdentifierBuffer; StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); if (Identifier.hasUCN()) { SmallString<64> UCNIdentifierBuffer; expandUCNs(UCNIdentifierBuffer, CleanedStr); II = getIdentifierInfo(UCNIdentifierBuffer); } else { II = getIdentifierInfo(CleanedStr); } } // Update the token info (identifier info and appropriate token kind). Identifier.setIdentifierInfo(II); Identifier.setKind(II->getTokenID()); return II; }
void Parser::ClassifyToken(Token &T) { if (T.isNot(tok::identifier)) return; // Set the identifier info for this token. llvm::SmallVector<llvm::StringRef, 2> Spelling; TheLexer.getSpelling(T, Spelling); std::string NameStr = Tok.CleanLiteral(Spelling); // We assume that the "common case" is that if an identifier is also a // keyword, it will most likely be used as a keyword. I.e., most programs are // sane, and won't use keywords for variable names. We mark it as a keyword // for ease in parsing. But it's weak and can change into an identifier or // builtin depending upon the context. if (IdentifierInfo *KW = Identifiers.lookupKeyword(NameStr)) { T.setIdentifierInfo(KW); T.setKind(KW->getTokenID()); } else if (IdentifierInfo *BI = Identifiers.lookupBuiltin(NameStr)) { T.setIdentifierInfo(BI); T.setKind(BI->getTokenID()); } else { IdentifierInfo *II = getIdentifierInfo(NameStr); T.setIdentifierInfo(II); T.setKind(II->getTokenID()); } }
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the /// identifier information for the token and install it into the token, /// updating the token kind accordingly. IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); // Look up this token, see if it is a macro, or if it is a language keyword. IdentifierInfo *II; if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { // No cleaning needed, just use the characters from the lexed buffer. II = getIdentifierInfo(Identifier.getRawIdentifier()); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. SmallString<64> IdentifierBuffer; StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); if (Identifier.hasUCN()) { SmallString<64> UCNIdentifierBuffer; expandUCNs(UCNIdentifierBuffer, CleanedStr); II = getIdentifierInfo(UCNIdentifierBuffer); } else { II = getIdentifierInfo(CleanedStr); } } // Update the token info (identifier info and appropriate token kind). Identifier.setIdentifierInfo(II); if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && getSourceManager().isInSystemHeader(Identifier.getLocation())) Identifier.setKind(clang::tok::identifier); else Identifier.setKind(II->getTokenID()); return II; }
/// Lex - Lex and return a token from this macro stream. /// void TokenLexer::Lex(Token &Tok) { // Lexing off the end of the macro, pop this macro off the expansion stack. if (isAtEnd()) { // If this is a macro (not a token stream), mark the macro enabled now // that it is no longer being expanded. if (Macro) Macro->EnableMacro(); // Pop this context off the preprocessors lexer stack and get the next // token. This will delete "this" so remember the PP instance var. Preprocessor &PPCache = PP; if (PP.HandleEndOfTokenLexer(Tok)) return; // HandleEndOfTokenLexer may not return a token. If it doesn't, lex // whatever is next. return PPCache.Lex(Tok); } SourceManager &SM = PP.getSourceManager(); // If this is the first token of the expanded result, we inherit spacing // properties later. bool isFirstToken = CurToken == 0; // Get the next token to return. Tok = Tokens[CurToken++]; bool TokenIsFromPaste = false; // If this token is followed by a token paste (##) operator, paste the tokens! // Note that ## is a normal token when not expanding a macro. if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash) && Macro) { // When handling the microsoft /##/ extension, the final token is // returned by PasteTokens, not the pasted token. if (PasteTokens(Tok)) return; TokenIsFromPaste = true; } // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any // diagnostics for the expanded token should appear as if they came from // ExpansionLoc. Pull this information together into a new SourceLocation // that captures all of this. if (ExpandLocStart.isValid() && // Don't do this for token streams. // Check that the token's location was not already set properly. SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { SourceLocation instLoc; if (Tok.is(tok::comment)) { instLoc = SM.createExpansionLoc(Tok.getLocation(), ExpandLocStart, ExpandLocEnd, Tok.getLength()); } else { instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); } Tok.setLocation(instLoc); } // If this is the first token, set the lexical properties of the token to // match the lexical properties of the macro identifier. if (isFirstToken) { Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); } // Handle recursive expansion! if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != 0) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); Tok.setKind(II->getTokenID()); // If this identifier was poisoned and from a paste, emit an error. This // won't be handled by Preprocessor::HandleIdentifier because this is coming // from a macro expansion. if (II->isPoisoned() && TokenIsFromPaste) { PP.HandlePoisonedIdentifier(Tok); } if (!DisableMacroExpansion && II->isHandleIdentifierCase()) PP.HandleIdentifier(Tok); } // Otherwise, return a normal token. }
/// Lex - Lex and return a token from this macro stream. /// bool TokenLexer::Lex(Token &Tok) { // Lexing off the end of the macro, pop this macro off the expansion stack. if (isAtEnd()) { // If this is a macro (not a token stream), mark the macro enabled now // that it is no longer being expanded. if (Macro) Macro->EnableMacro(); Tok.startToken(); Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); if (CurToken == 0) Tok.setFlag(Token::LeadingEmptyMacro); return PP.HandleEndOfTokenLexer(Tok); } SourceManager &SM = PP.getSourceManager(); // If this is the first token of the expanded result, we inherit spacing // properties later. bool isFirstToken = CurToken == 0; // Get the next token to return. Tok = Tokens[CurToken++]; bool TokenIsFromPaste = false; // If this token is followed by a token paste (##) operator, paste the tokens! // Note that ## is a normal token when not expanding a macro. if (!isAtEnd() && Macro && (Tokens[CurToken].is(tok::hashhash) || // Special processing of L#x macros in -fms-compatibility mode. // Microsoft compiler is able to form a wide string literal from // 'L#macro_arg' construct in a function-like macro. (PP.getLangOpts().MSVCCompat && isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) { // When handling the microsoft /##/ extension, the final token is // returned by PasteTokens, not the pasted token. if (PasteTokens(Tok)) return true; TokenIsFromPaste = true; } // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any // diagnostics for the expanded token should appear as if they came from // ExpansionLoc. Pull this information together into a new SourceLocation // that captures all of this. if (ExpandLocStart.isValid() && // Don't do this for token streams. // Check that the token's location was not already set properly. SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { SourceLocation instLoc; if (Tok.is(tok::comment)) { instLoc = SM.createExpansionLoc(Tok.getLocation(), ExpandLocStart, ExpandLocEnd, Tok.getLength()); } else { instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); } Tok.setLocation(instLoc); } // If this is the first token, set the lexical properties of the token to // match the lexical properties of the macro identifier. if (isFirstToken) { Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); } else { // If this is not the first token, we may still need to pass through // leading whitespace if we've expanded a macro. if (AtStartOfLine) Tok.setFlag(Token::StartOfLine); if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace); } AtStartOfLine = false; HasLeadingSpace = false; // Handle recursive expansion! if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); Tok.setKind(II->getTokenID()); // If this identifier was poisoned and from a paste, emit an error. This // won't be handled by Preprocessor::HandleIdentifier because this is coming // from a macro expansion. if (II->isPoisoned() && TokenIsFromPaste) { PP.HandlePoisonedIdentifier(Tok); } if (!DisableMacroExpansion && II->isHandleIdentifierCase()) return PP.HandleIdentifier(Tok); } // Otherwise, return a normal token. return true; }
/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with /// information about keywords, macro expansions etc. This uses the macro /// table state from the end of the file, so it won't be perfectly perfect, /// but it will be reasonably close. void html::SyntaxHighlight(Rewriter &R, FileID FID, Preprocessor &PP) { RewriteBuffer &RB = R.getEditBuffer(FID); const SourceManager &SM = PP.getSourceManager(); Lexer L(FID, SM, PP.getLangOptions()); const char *BufferStart = L.getBufferStart(); // Inform the preprocessor that we want to retain comments as tokens, so we // can highlight them. L.SetCommentRetentionState(true); // Lex all the tokens in raw mode, to avoid entering #includes or expanding // macros. Token Tok; L.LexFromRawLexer(Tok); while (Tok.isNot(tok::eof)) { // Since we are lexing unexpanded tokens, all tokens are from the main // FileID. unsigned TokOffs = SM.getFileOffset(Tok.getLocation()); unsigned TokLen = Tok.getLength(); switch (Tok.getKind()) { default: break; case tok::identifier: { // Fill in Result.IdentifierInfo, looking up the identifier in the // identifier table. IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs); // If this is a pp-identifier, for a keyword, highlight it as such. if (II->getTokenID() != tok::identifier) HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, "<span class='keyword'>", "</span>"); break; } case tok::comment: HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, "<span class='comment'>", "</span>"); break; case tok::wide_string_literal: // Chop off the L prefix ++TokOffs; --TokLen; // FALL THROUGH. case tok::string_literal: HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, "<span class='string_literal'>", "</span>"); break; case tok::hash: { // If this is a preprocessor directive, all tokens to end of line are too. if (!Tok.isAtStartOfLine()) break; // Eat all of the tokens until we get to the next one at the start of // line. unsigned TokEnd = TokOffs+TokLen; L.LexFromRawLexer(Tok); while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) { TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength(); L.LexFromRawLexer(Tok); } // Find end of line. This is a hack. HighlightRange(RB, TokOffs, TokEnd, BufferStart, "<span class='directive'>", "</span>"); // Don't skip the next token. continue; } } L.LexFromRawLexer(Tok); } }