Ejemplo n.º 1
0
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");

  // Look up this token, see if it is a macro, or if it is a language keyword.
  IdentifierInfo *II;
  if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
    // No cleaning needed, just use the characters from the lexed buffer.
    II = getIdentifierInfo(Identifier.getRawIdentifier());
  } else {
    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
    SmallString<64> IdentifierBuffer;
    StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);

    if (Identifier.hasUCN()) {
      SmallString<64> UCNIdentifierBuffer;
      expandUCNs(UCNIdentifierBuffer, CleanedStr);
      II = getIdentifierInfo(UCNIdentifierBuffer);
    } else {
      II = getIdentifierInfo(CleanedStr);
    }
  }

  // Update the token info (identifier info and appropriate token kind).
  Identifier.setIdentifierInfo(II);
  Identifier.setKind(II->getTokenID());

  return II;
}
Ejemplo n.º 2
0
void Parser::ClassifyToken(Token &T) {
  if (T.isNot(tok::identifier))
    return;

  // Set the identifier info for this token.
  llvm::SmallVector<llvm::StringRef, 2> Spelling;
  TheLexer.getSpelling(T, Spelling);
  std::string NameStr = Tok.CleanLiteral(Spelling);

  // We assume that the "common case" is that if an identifier is also a
  // keyword, it will most likely be used as a keyword. I.e., most programs are
  // sane, and won't use keywords for variable names. We mark it as a keyword
  // for ease in parsing. But it's weak and can change into an identifier or
  // builtin depending upon the context.
  if (IdentifierInfo *KW = Identifiers.lookupKeyword(NameStr)) {
    T.setIdentifierInfo(KW);
    T.setKind(KW->getTokenID());
  } else if (IdentifierInfo *BI = Identifiers.lookupBuiltin(NameStr)) {
    T.setIdentifierInfo(BI);
    T.setKind(BI->getTokenID());
  } else {
    IdentifierInfo *II = getIdentifierInfo(NameStr);
    T.setIdentifierInfo(II);
    T.setKind(II->getTokenID());
  }
}
Ejemplo n.º 3
0
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");

  // Look up this token, see if it is a macro, or if it is a language keyword.
  IdentifierInfo *II;
  if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
    // No cleaning needed, just use the characters from the lexed buffer.
    II = getIdentifierInfo(Identifier.getRawIdentifier());
  } else {
    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
    SmallString<64> IdentifierBuffer;
    StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);

    if (Identifier.hasUCN()) {
      SmallString<64> UCNIdentifierBuffer;
      expandUCNs(UCNIdentifierBuffer, CleanedStr);
      II = getIdentifierInfo(UCNIdentifierBuffer);
    } else {
      II = getIdentifierInfo(CleanedStr);
    }
  }

  // Update the token info (identifier info and appropriate token kind).
  Identifier.setIdentifierInfo(II);
  if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
      getSourceManager().isInSystemHeader(Identifier.getLocation()))
    Identifier.setKind(clang::tok::identifier);
  else
    Identifier.setKind(II->getTokenID());

  return II;
}
Ejemplo n.º 4
0
/// Lex - Lex and return a token from this macro stream.
///
void TokenLexer::Lex(Token &Tok) {
  // Lexing off the end of the macro, pop this macro off the expansion stack.
  if (isAtEnd()) {
    // If this is a macro (not a token stream), mark the macro enabled now
    // that it is no longer being expanded.
    if (Macro) Macro->EnableMacro();

    // Pop this context off the preprocessors lexer stack and get the next
    // token.  This will delete "this" so remember the PP instance var.
    Preprocessor &PPCache = PP;
    if (PP.HandleEndOfTokenLexer(Tok))
      return;

    // HandleEndOfTokenLexer may not return a token.  If it doesn't, lex
    // whatever is next.
    return PPCache.Lex(Tok);
  }

  SourceManager &SM = PP.getSourceManager();

  // If this is the first token of the expanded result, we inherit spacing
  // properties later.
  bool isFirstToken = CurToken == 0;

  // Get the next token to return.
  Tok = Tokens[CurToken++];

  bool TokenIsFromPaste = false;

  // If this token is followed by a token paste (##) operator, paste the tokens!
  // Note that ## is a normal token when not expanding a macro.
  if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash) && Macro) {
    // When handling the microsoft /##/ extension, the final token is
    // returned by PasteTokens, not the pasted token.
    if (PasteTokens(Tok))
      return;

    TokenIsFromPaste = true;
  }

  // The token's current location indicate where the token was lexed from.  We
  // need this information to compute the spelling of the token, but any
  // diagnostics for the expanded token should appear as if they came from
  // ExpansionLoc.  Pull this information together into a new SourceLocation
  // that captures all of this.
  if (ExpandLocStart.isValid() &&   // Don't do this for token streams.
      // Check that the token's location was not already set properly.
      SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
    SourceLocation instLoc;
    if (Tok.is(tok::comment)) {
      instLoc = SM.createExpansionLoc(Tok.getLocation(),
                                      ExpandLocStart,
                                      ExpandLocEnd,
                                      Tok.getLength());
    } else {
      instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
    }

    Tok.setLocation(instLoc);
  }

  // If this is the first token, set the lexical properties of the token to
  // match the lexical properties of the macro identifier.
  if (isFirstToken) {
    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
  }

  // Handle recursive expansion!
  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != 0) {
    // Change the kind of this identifier to the appropriate token kind, e.g.
    // turning "for" into a keyword.
    IdentifierInfo *II = Tok.getIdentifierInfo();
    Tok.setKind(II->getTokenID());

    // If this identifier was poisoned and from a paste, emit an error.  This
    // won't be handled by Preprocessor::HandleIdentifier because this is coming
    // from a macro expansion.
    if (II->isPoisoned() && TokenIsFromPaste) {
      PP.HandlePoisonedIdentifier(Tok);
    }

    if (!DisableMacroExpansion && II->isHandleIdentifierCase())
      PP.HandleIdentifier(Tok);
  }

  // Otherwise, return a normal token.
}
Ejemplo n.º 5
0
/// Lex - Lex and return a token from this macro stream.
///
bool TokenLexer::Lex(Token &Tok) {
  // Lexing off the end of the macro, pop this macro off the expansion stack.
  if (isAtEnd()) {
    // If this is a macro (not a token stream), mark the macro enabled now
    // that it is no longer being expanded.
    if (Macro) Macro->EnableMacro();

    Tok.startToken();
    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
    if (CurToken == 0)
      Tok.setFlag(Token::LeadingEmptyMacro);
    return PP.HandleEndOfTokenLexer(Tok);
  }

  SourceManager &SM = PP.getSourceManager();

  // If this is the first token of the expanded result, we inherit spacing
  // properties later.
  bool isFirstToken = CurToken == 0;

  // Get the next token to return.
  Tok = Tokens[CurToken++];

  bool TokenIsFromPaste = false;

  // If this token is followed by a token paste (##) operator, paste the tokens!
  // Note that ## is a normal token when not expanding a macro.
  if (!isAtEnd() && Macro &&
      (Tokens[CurToken].is(tok::hashhash) ||
       // Special processing of L#x macros in -fms-compatibility mode.
       // Microsoft compiler is able to form a wide string literal from
       // 'L#macro_arg' construct in a function-like macro.
       (PP.getLangOpts().MSVCCompat &&
        isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) {
    // When handling the microsoft /##/ extension, the final token is
    // returned by PasteTokens, not the pasted token.
    if (PasteTokens(Tok))
      return true;

    TokenIsFromPaste = true;
  }

  // The token's current location indicate where the token was lexed from.  We
  // need this information to compute the spelling of the token, but any
  // diagnostics for the expanded token should appear as if they came from
  // ExpansionLoc.  Pull this information together into a new SourceLocation
  // that captures all of this.
  if (ExpandLocStart.isValid() &&   // Don't do this for token streams.
      // Check that the token's location was not already set properly.
      SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
    SourceLocation instLoc;
    if (Tok.is(tok::comment)) {
      instLoc = SM.createExpansionLoc(Tok.getLocation(),
                                      ExpandLocStart,
                                      ExpandLocEnd,
                                      Tok.getLength());
    } else {
      instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
    }

    Tok.setLocation(instLoc);
  }

  // If this is the first token, set the lexical properties of the token to
  // match the lexical properties of the macro identifier.
  if (isFirstToken) {
    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
  } else {
    // If this is not the first token, we may still need to pass through
    // leading whitespace if we've expanded a macro.
    if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
    if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
  }
  AtStartOfLine = false;
  HasLeadingSpace = false;

  // Handle recursive expansion!
  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
    // Change the kind of this identifier to the appropriate token kind, e.g.
    // turning "for" into a keyword.
    IdentifierInfo *II = Tok.getIdentifierInfo();
    Tok.setKind(II->getTokenID());

    // If this identifier was poisoned and from a paste, emit an error.  This
    // won't be handled by Preprocessor::HandleIdentifier because this is coming
    // from a macro expansion.
    if (II->isPoisoned() && TokenIsFromPaste) {
      PP.HandlePoisonedIdentifier(Tok);
    }

    if (!DisableMacroExpansion && II->isHandleIdentifierCase())
      return PP.HandleIdentifier(Tok);
  }

  // Otherwise, return a normal token.
  return true;
}
Ejemplo n.º 6
0
/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
/// information about keywords, macro expansions etc.  This uses the macro
/// table state from the end of the file, so it won't be perfectly perfect,
/// but it will be reasonably close.
void html::SyntaxHighlight(Rewriter &R, FileID FID, Preprocessor &PP) {
  RewriteBuffer &RB = R.getEditBuffer(FID);

  const SourceManager &SM = PP.getSourceManager();
  Lexer L(FID, SM, PP.getLangOptions());
  const char *BufferStart = L.getBufferStart();
  
  // Inform the preprocessor that we want to retain comments as tokens, so we 
  // can highlight them.
  L.SetCommentRetentionState(true);
 
  // Lex all the tokens in raw mode, to avoid entering #includes or expanding
  // macros.
  Token Tok;
  L.LexFromRawLexer(Tok);
  
  while (Tok.isNot(tok::eof)) {
    // Since we are lexing unexpanded tokens, all tokens are from the main
    // FileID.
    unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
    unsigned TokLen = Tok.getLength();
    switch (Tok.getKind()) {
    default: break;
    case tok::identifier: {
      // Fill in Result.IdentifierInfo, looking up the identifier in the
      // identifier table.
      IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs);
        
      // If this is a pp-identifier, for a keyword, highlight it as such.
      if (II->getTokenID() != tok::identifier)
        HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
                       "<span class='keyword'>", "</span>");
      break;
    }
    case tok::comment:
      HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
                     "<span class='comment'>", "</span>");
      break;
    case tok::wide_string_literal:
      // Chop off the L prefix
      ++TokOffs;
      --TokLen;
      // FALL THROUGH.
    case tok::string_literal:
      HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
                     "<span class='string_literal'>", "</span>");
      break;
    case tok::hash: {
      // If this is a preprocessor directive, all tokens to end of line are too.
      if (!Tok.isAtStartOfLine())
        break;
        
      // Eat all of the tokens until we get to the next one at the start of
      // line.
      unsigned TokEnd = TokOffs+TokLen;
      L.LexFromRawLexer(Tok);
      while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
        TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
        L.LexFromRawLexer(Tok);
      }
      
      // Find end of line.  This is a hack.
      HighlightRange(RB, TokOffs, TokEnd, BufferStart,
                     "<span class='directive'>", "</span>");
      
      // Don't skip the next token.
      continue;
    }
    }
    
    L.LexFromRawLexer(Tok);
  }
}