Example #1
0
/// Turn a sequence of our tokens back into a string that we can hand
/// to the MC asm parser.
static bool buildMSAsmString(Preprocessor &PP, SourceLocation AsmLoc,
                             ArrayRef<Token> AsmToks,
                             SmallVectorImpl<unsigned> &TokOffsets,
                             SmallString<512> &Asm) {
  assert(!AsmToks.empty() && "Didn't expect an empty AsmToks!");

  // Is this the start of a new assembly statement?
  bool isNewStatement = true;

  for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) {
    const Token &Tok = AsmToks[i];

    // Start each new statement with a newline and a tab.
    if (!isNewStatement && (Tok.is(tok::kw_asm) || Tok.isAtStartOfLine())) {
      Asm += "\n\t";
      isNewStatement = true;
    }

    // Preserve the existence of leading whitespace except at the
    // start of a statement.
    if (!isNewStatement && Tok.hasLeadingSpace())
      Asm += ' ';

    // Remember the offset of this token.
    TokOffsets.push_back(Asm.size());

    // Don't actually write '__asm' into the assembly stream.
    if (Tok.is(tok::kw_asm)) {
      // Complain about __asm at the end of the stream.
      if (i + 1 == e) {
        PP.Diag(AsmLoc, diag::err_asm_empty);
        return true;
      }

      continue;
    }

    // Append the spelling of the token.
    SmallString<32> SpellingBuffer;
    bool SpellingInvalid = false;
    Asm += PP.getSpelling(Tok, SpellingBuffer, &SpellingInvalid);
    assert(!SpellingInvalid && "spelling was invalid after correct parse?");

    // We are no longer at the start of a statement.
    isNewStatement = false;
  }

  // Ensure that the buffer is null-terminated.
  Asm.push_back('\0');
  Asm.pop_back();

  assert(TokOffsets.size() == AsmToks.size());
  return false;
}
/// GetFirstChar - Get the first character of the token \arg Tok,
/// avoiding calls to getSpelling where possible.
static char GetFirstChar(Preprocessor &PP, const Token &Tok) {
  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
    // Avoid spelling identifiers, the most common form of token.
    return II->getNameStart()[0];
  } else if (!Tok.needsCleaning()) {
    if (Tok.isLiteral() && Tok.getLiteralData()) {
      return *Tok.getLiteralData();
    } else {
      SourceManager &SM = PP.getSourceManager();
      return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
    }
  } else if (Tok.getLength() < 256) {
    char Buffer[256];
    const char *TokPtr = Buffer;
    PP.getSpelling(Tok, TokPtr);
    return TokPtr[0];
  } else {
    return PP.getSpelling(Tok)[0];
  }
}
void clang::DoRewriteTest(Preprocessor &PP, raw_ostream* OS) {
  SourceManager &SM = PP.getSourceManager();
  const LangOptions &LangOpts = PP.getLangOptions();

  TokenRewriter Rewriter(SM.getMainFileID(), SM, LangOpts);

  // Throw <i> </i> tags around comments.
  for (TokenRewriter::token_iterator I = Rewriter.token_begin(),
       E = Rewriter.token_end(); I != E; ++I) {
    if (I->isNot(tok::comment)) continue;

    Rewriter.AddTokenBefore(I, "<i>");
    Rewriter.AddTokenAfter(I, "</i>");
  }


  // Print out the output.
  for (TokenRewriter::token_iterator I = Rewriter.token_begin(),
       E = Rewriter.token_end(); I != E; ++I)
    *OS << PP.getSpelling(*I);
}
Example #4
0
/// FindExpectedDiags - Lex the main source file to find all of the
//   expected errors and warnings.
static void FindExpectedDiags(Preprocessor &PP,
                              DiagList &ExpectedErrors,
                              DiagList &ExpectedWarnings,
                              DiagList &ExpectedNotes) {
  // Create a raw lexer to pull all the comments out of the main file.  We don't
  // want to look in #include'd headers for expected-error strings.
  FileID FID = PP.getSourceManager().getMainFileID();
  
  // Create a lexer to lex all the tokens of the main file in raw mode.
  Lexer RawLex(FID, PP.getSourceManager(), PP.getLangOptions());
  
  // Return comments as tokens, this is how we find expected diagnostics.
  RawLex.SetCommentRetentionState(true);

  Token Tok;
  Tok.setKind(tok::comment);
  while (Tok.isNot(tok::eof)) {
    RawLex.Lex(Tok);
    if (!Tok.is(tok::comment)) continue;
    
    std::string Comment = PP.getSpelling(Tok);
    if (Comment.empty()) continue;

    
    // Find all expected errors.
    FindDiagnostics(&Comment[0], Comment.size(), ExpectedErrors, PP,
                    Tok.getLocation(), "expected-error");

    // Find all expected warnings.
    FindDiagnostics(&Comment[0], Comment.size(), ExpectedWarnings, PP,
                    Tok.getLocation(), "expected-warning");

    // Find all expected notes.
    FindDiagnostics(&Comment[0], Comment.size(), ExpectedNotes, PP,
                    Tok.getLocation(), "expected-note");
  };
}
Example #5
0
/// EvaluateHasIncludeCommon - Process a '__has_include("path")'
/// or '__has_include_next("path")' expression.
/// Returns true if successful.
static bool EvaluateHasIncludeCommon(Token &Tok,
                                     IdentifierInfo *II, Preprocessor &PP,
                                     const DirectoryLookup *LookupFrom) {
  SourceLocation LParenLoc;

  // Get '('.
  PP.LexNonComment(Tok);

  // Ensure we have a '('.
  if (Tok.isNot(tok::l_paren)) {
    PP.Diag(Tok.getLocation(), diag::err_pp_missing_lparen) << II->getName();
    return false;
  }

  // Save '(' location for possible missing ')' message.
  LParenLoc = Tok.getLocation();

  // Get the file name.
  PP.getCurrentLexer()->LexIncludeFilename(Tok);

  // Reserve a buffer to get the spelling.
  llvm::SmallString<128> FilenameBuffer;
  StringRef Filename;
  SourceLocation EndLoc;
  
  switch (Tok.getKind()) {
  case tok::eod:
    // If the token kind is EOD, the error has already been diagnosed.
    return false;

  case tok::angle_string_literal:
  case tok::string_literal: {
    bool Invalid = false;
    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
    if (Invalid)
      return false;
    break;
  }

  case tok::less:
    // This could be a <foo/bar.h> file coming from a macro expansion.  In this
    // case, glue the tokens together into FilenameBuffer and interpret those.
    FilenameBuffer.push_back('<');
    if (PP.ConcatenateIncludeName(FilenameBuffer, EndLoc))
      return false;   // Found <eod> but no ">"?  Diagnostic already emitted.
    Filename = FilenameBuffer.str();
    break;
  default:
    PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
    return false;
  }

  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
  // error.
  if (Filename.empty())
    return false;

  // Search include directories.
  const DirectoryLookup *CurDir;
  const FileEntry *File =
      PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL);

  // Get the result value.  Result = true means the file exists.
  bool Result = File != 0;

  // Get ')'.
  PP.LexNonComment(Tok);

  // Ensure we have a trailing ).
  if (Tok.isNot(tok::r_paren)) {
    PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName();
    PP.Diag(LParenLoc, diag::note_matching) << "(";
    return false;
  }

  return Result;
}
void BlankPragmaHandler::HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken) {
    SourceLocation loc = FirstToken.getLocation();
    loc = comp.getSourceManager().getFileLoc(loc);
    FileID fileID = comp.getSourceManager().getFileID(loc);
    int line = comp.getSourceManager().getLineNumber(fileID, comp.getSourceManager().getFileOffset(loc));
    Token &Tok = FirstToken;
    PP.LexNonComment(Tok);
#define msg "Handler stub directive syntax error"
    std::string tokStr = Tok.getIdentifierInfo()->getName();
    PragmaHandlerStub *curPragma = new PragmaHandlerStub;
    curPragma->line = line;
    curPragma->isAcross = false; //note: tell Pritula dvm src are incorrect
    PP.LexNonComment(Tok);
    while (Tok.isAnyIdentifier()) {
        std::string clauseName = Tok.getIdentifierInfo()->getName();
        PP.LexNonComment(Tok);
        PP.LexNonComment(Tok);
        if (clauseName == "dvm_array") {
            while (Tok.isAnyIdentifier()) {
                tokStr = Tok.getIdentifierInfo()->getName();
                curPragma->dvmArrays.insert(tokStr);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok);
                }
            }
        } else if (clauseName == "regular_array") {
            while (Tok.isAnyIdentifier()) {
                tokStr = Tok.getIdentifierInfo()->getName();
                curPragma->regArrays.insert(tokStr);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok);
                }
            }
        } else if (clauseName == "scalar") {
            while (Tok.isAnyIdentifier()) {
                tokStr = Tok.getIdentifierInfo()->getName();
                curPragma->scalars.insert(tokStr);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok);
                }
            }
        } else if (clauseName == "loop_var") {
            while (Tok.isAnyIdentifier()) {
                LoopVarDesc loopVar;
                tokStr = Tok.getIdentifierInfo()->getName();
                loopVar.name = tokStr;
                PP.LexNonComment(Tok);
                PP.LexNonComment(Tok);
                tokStr = PP.getSpelling(Tok);
                loopVar.stepSign = atoi(tokStr.c_str());
                PP.LexNonComment(Tok);
                PP.LexNonComment(Tok);
                if (Tok.isNot(tok::r_paren)) {
                    tokStr = PP.getSpelling(Tok);
                    loopVar.constStep = tokStr;
                    PP.LexNonComment(Tok);
                }
                curPragma->loopVars.push_back(loopVar);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok); //note: tell Pritula dvm src are incorrect
                }
            }
        } else if (clauseName == "reduction") {
            while (Tok.isAnyIdentifier()) {
                ClauseReduction red;
                tokStr = Tok.getIdentifierInfo()->getName();
                red.redType = ClauseReduction::guessRedType(tokStr);
                PP.LexNonComment(Tok);
                PP.LexNonComment(Tok);
                red.arrayName = Tok.getIdentifierInfo()->getName();
                PP.LexNonComment(Tok);
                if (red.isLoc()) {
                    PP.LexNonComment(Tok);
                    red.locName = Tok.getIdentifierInfo()->getName();
                    PP.LexNonComment(Tok);
                    PP.LexNonComment(Tok);
                    tokStr = PP.getSpelling(Tok);
                    red.locSize.strExpr = tokStr;
                    PP.LexNonComment(Tok);
                }
                curPragma->reductions.push_back(red);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok);
                }
            }
        } else if (clauseName == "private") {
            while (Tok.isAnyIdentifier()) {
                tokStr = Tok.getIdentifierInfo()->getName();
                curPragma->privates.insert(tokStr);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok);
                }
            }
        } else if (clauseName == "weird_rma") {
            while (Tok.isAnyIdentifier()) {
                tokStr = Tok.getIdentifierInfo()->getName();
                curPragma->weirdRmas.insert(tokStr);
                PP.LexNonComment(Tok);
                if (Tok.is(tok::comma)) {
                    PP.LexNonComment(Tok);
                }
            }
        } else if (clauseName == "across") {
            curPragma->isAcross = true;
        } else if (clauseName == "remote_access") {
            int depth = 1;
            while (Tok.isNot(tok::r_paren) || depth > 1) {
                if (Tok.is(tok::l_paren))
                    depth++;
                if (Tok.is(tok::r_paren))
                    depth--;
                PP.LexNonComment(Tok);
            }
        } else {
        }
        PP.LexNonComment(Tok);
        if (Tok.is(tok::comma))
            PP.LexNonComment(Tok);
    }
    pragmas[line] = curPragma;
#undef msg
}
Example #7
0
/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
/// return the computed value in Result.  Return true if there was an error
/// parsing.  This function also returns information about the form of the
/// expression in DT.  See above for information on what DT means.
///
/// If ValueLive is false, then this value is being evaluated in a context where
/// the result is not used.  As such, avoid diagnostics that relate to
/// evaluation.
static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
                          bool ValueLive, Preprocessor &PP) {
  DT.State = DefinedTracker::Unknown;

  if (PeekTok.is(tok::code_completion)) {
    if (PP.getCodeCompletionHandler())
      PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression();
    PP.setCodeCompletionReached();
    PP.LexNonComment(PeekTok);
  }
      
  // If this token's spelling is a pp-identifier, check to see if it is
  // 'defined' or if it is a macro.  Note that we check here because many
  // keywords are pp-identifiers, so we can't check the kind.
  if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
    // Handle "defined X" and "defined(X)".
    if (II->isStr("defined"))
      return(EvaluateDefined(Result, PeekTok, DT, ValueLive, PP));
    
    // If this identifier isn't 'defined' or one of the special
    // preprocessor keywords and it wasn't macro expanded, it turns
    // into a simple 0, unless it is the C++ keyword "true", in which case it
    // turns into "1".
    if (ValueLive &&
        II->getTokenID() != tok::kw_true &&
        II->getTokenID() != tok::kw_false)
      PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II;
    Result.Val = II->getTokenID() == tok::kw_true;
    Result.Val.setIsUnsigned(false);  // "0" is signed intmax_t 0.
    Result.setRange(PeekTok.getLocation());
    PP.LexNonComment(PeekTok);
    return false;
  }

  switch (PeekTok.getKind()) {
  default:  // Non-value token.
    PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr);
    return true;
  case tok::eod:
  case tok::r_paren:
    // If there is no expression, report and exit.
    PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
    return true;
  case tok::numeric_constant: {
    SmallString<64> IntegerBuffer;
    bool NumberInvalid = false;
    StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, 
                                              &NumberInvalid);
    if (NumberInvalid)
      return true; // a diagnostic was already reported

    NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), PP);
    if (Literal.hadError)
      return true; // a diagnostic was already reported.

    if (Literal.isFloatingLiteral() || Literal.isImaginary) {
      PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
      return true;
    }
    assert(Literal.isIntegerLiteral() && "Unknown ppnumber");

    // Complain about, and drop, any ud-suffix.
    if (Literal.hasUDSuffix())
      PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*integer*/1;

    // 'long long' is a C99 or C++11 feature.
    if (!PP.getLangOpts().C99 && Literal.isLongLong) {
      if (PP.getLangOpts().CPlusPlus)
        PP.Diag(PeekTok,
             PP.getLangOpts().CPlusPlus0x ?
             diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
      else
        PP.Diag(PeekTok, diag::ext_c99_longlong);
    }

    // Parse the integer literal into Result.
    if (Literal.GetIntegerValue(Result.Val)) {
      // Overflow parsing integer literal.
      if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large);
      Result.Val.setIsUnsigned(true);
    } else {
      // Set the signedness of the result to match whether there was a U suffix
      // or not.
      Result.Val.setIsUnsigned(Literal.isUnsigned);

      // Detect overflow based on whether the value is signed.  If signed
      // and if the value is too large, emit a warning "integer constant is so
      // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t
      // is 64-bits.
      if (!Literal.isUnsigned && Result.Val.isNegative()) {
        // Don't warn for a hex literal: 0x8000..0 shouldn't warn.
        if (ValueLive && Literal.getRadix() != 16)
          PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed);
        Result.Val.setIsUnsigned(true);
      }
    }

    // Consume the token.
    Result.setRange(PeekTok.getLocation());
    PP.LexNonComment(PeekTok);
    return false;
  }
  case tok::char_constant:          // 'x'
  case tok::wide_char_constant: {   // L'x'
  case tok::utf16_char_constant:    // u'x'
  case tok::utf32_char_constant:    // U'x'
    // Complain about, and drop, any ud-suffix.
    if (PeekTok.hasUDSuffix())
      PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0;

    SmallString<32> CharBuffer;
    bool CharInvalid = false;
    StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
    if (CharInvalid)
      return true;

    CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
                              PeekTok.getLocation(), PP, PeekTok.getKind());
    if (Literal.hadError())
      return true;  // A diagnostic was already emitted.

    // Character literals are always int or wchar_t, expand to intmax_t.
    const TargetInfo &TI = PP.getTargetInfo();
    unsigned NumBits;
    if (Literal.isMultiChar())
      NumBits = TI.getIntWidth();
    else if (Literal.isWide())
      NumBits = TI.getWCharWidth();
    else if (Literal.isUTF16())
      NumBits = TI.getChar16Width();
    else if (Literal.isUTF32())
      NumBits = TI.getChar32Width();
    else
      NumBits = TI.getCharWidth();

    // Set the width.
    llvm::APSInt Val(NumBits);
    // Set the value.
    Val = Literal.getValue();
    // Set the signedness. UTF-16 and UTF-32 are always unsigned
    if (!Literal.isUTF16() && !Literal.isUTF32())
      Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned);

    if (Result.Val.getBitWidth() > Val.getBitWidth()) {
      Result.Val = Val.extend(Result.Val.getBitWidth());
    } else {
      assert(Result.Val.getBitWidth() == Val.getBitWidth() &&
             "intmax_t smaller than char/wchar_t?");
      Result.Val = Val;
    }

    // Consume the token.
    Result.setRange(PeekTok.getLocation());
    PP.LexNonComment(PeekTok);
    return false;
  }
  case tok::l_paren: {
    SourceLocation Start = PeekTok.getLocation();
    PP.LexNonComment(PeekTok);  // Eat the (.
    // Parse the value and if there are any binary operators involved, parse
    // them.
    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;

    // If this is a silly value like (X), which doesn't need parens, check for
    // !(defined X).
    if (PeekTok.is(tok::r_paren)) {
      // Just use DT unmodified as our result.
    } else {
      // Otherwise, we have something like (x+y), and we consumed '(x'.
      if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP))
        return true;

      if (PeekTok.isNot(tok::r_paren)) {
        PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen)
          << Result.getRange();
        PP.Diag(Start, diag::note_matching) << "(";
        return true;
      }
      DT.State = DefinedTracker::Unknown;
    }
    Result.setRange(Start, PeekTok.getLocation());
    PP.LexNonComment(PeekTok);  // Eat the ).
    return false;
  }
  case tok::plus: {
    SourceLocation Start = PeekTok.getLocation();
    // Unary plus doesn't modify the value.
    PP.LexNonComment(PeekTok);
    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
    Result.setBegin(Start);
    return false;
  }
  case tok::minus: {
    SourceLocation Loc = PeekTok.getLocation();
    PP.LexNonComment(PeekTok);
    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
    Result.setBegin(Loc);

    // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
    Result.Val = -Result.Val;

    // -MININT is the only thing that overflows.  Unsigned never overflows.
    bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue();

    // If this operator is live and overflowed, report the issue.
    if (Overflow && ValueLive)
      PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange();

    DT.State = DefinedTracker::Unknown;
    return false;
  }

  case tok::tilde: {
    SourceLocation Start = PeekTok.getLocation();
    PP.LexNonComment(PeekTok);
    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
    Result.setBegin(Start);

    // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
    Result.Val = ~Result.Val;
    DT.State = DefinedTracker::Unknown;
    return false;
  }

  case tok::exclaim: {
    SourceLocation Start = PeekTok.getLocation();
    PP.LexNonComment(PeekTok);
    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
    Result.setBegin(Start);
    Result.Val = !Result.Val;
    // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
    Result.Val.setIsUnsigned(false);

    if (DT.State == DefinedTracker::DefinedMacro)
      DT.State = DefinedTracker::NotDefinedMacro;
    else if (DT.State == DefinedTracker::NotDefinedMacro)
      DT.State = DefinedTracker::DefinedMacro;
    return false;
  }

  // FIXME: Handle #assert
  }
}
Example #8
0
/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
/// tokens into the literal string token that should be produced by the C #
/// preprocessor operator.  If Charify is true, then it should be turned into
/// a character literal for the Microsoft charize (#@) extension.
///
Token MacroArgs::StringifyArgument(const Token *ArgToks,
                                   Preprocessor &PP, bool Charify,
                                   SourceLocation ExpansionLocStart,
                                   SourceLocation ExpansionLocEnd) {
  Token Tok;
  Tok.startToken();
  Tok.setKind(Charify ? tok::char_constant : tok::string_literal);

  const Token *ArgTokStart = ArgToks;

  // Stringify all the tokens.
  SmallString<128> Result;
  Result += "\"";

  bool isFirst = true;
  for (; ArgToks->isNot(tok::eof); ++ArgToks) {
    const Token &Tok = *ArgToks;
    if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
      Result += ' ';
    isFirst = false;

    // If this is a string or character constant, escape the token as specified
    // by 6.10.3.2p2.
    if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
        Tok.is(tok::char_constant) ||          // 'x'
        Tok.is(tok::wide_char_constant) ||     // L'x'.
        Tok.is(tok::utf8_char_constant) ||     // u8'x'.
        Tok.is(tok::utf16_char_constant) ||    // u'x'.
        Tok.is(tok::utf32_char_constant)) {    // U'x'.
      bool Invalid = false;
      std::string TokStr = PP.getSpelling(Tok, &Invalid);
      if (!Invalid) {
        std::string Str = Lexer::Stringify(TokStr);
        Result.append(Str.begin(), Str.end());
      }
    } else if (Tok.is(tok::code_completion)) {
      PP.CodeCompleteNaturalLanguage();
    } else {
      // Otherwise, just append the token.  Do some gymnastics to get the token
      // in place and avoid copies where possible.
      unsigned CurStrLen = Result.size();
      Result.resize(CurStrLen+Tok.getLength());
      const char *BufPtr = Result.data() + CurStrLen;
      bool Invalid = false;
      unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);

      if (!Invalid) {
        // If getSpelling returned a pointer to an already uniqued version of
        // the string instead of filling in BufPtr, memcpy it onto our string.
        if (ActualTokLen && BufPtr != &Result[CurStrLen])
          memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);

        // If the token was dirty, the spelling may be shorter than the token.
        if (ActualTokLen != Tok.getLength())
          Result.resize(CurStrLen+ActualTokLen);
      }
    }
  }

  // If the last character of the string is a \, and if it isn't escaped, this
  // is an invalid string literal, diagnose it as specified in C99.
  if (Result.back() == '\\') {
    // Count the number of consecutive \ characters.  If even, then they are
    // just escaped backslashes, otherwise it's an error.
    unsigned FirstNonSlash = Result.size()-2;
    // Guaranteed to find the starting " if nothing else.
    while (Result[FirstNonSlash] == '\\')
      --FirstNonSlash;
    if ((Result.size()-1-FirstNonSlash) & 1) {
      // Diagnose errors for things like: #define F(X) #X   /   F(\)
      PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
      Result.pop_back();  // remove one of the \'s.
    }
  }
  Result += '"';

  // If this is the charify operation and the result is not a legal character
  // constant, diagnose it.
  if (Charify) {
    // First step, turn double quotes into single quotes:
    Result[0] = '\'';
    Result[Result.size()-1] = '\'';

    // Check for bogus character.
    bool isBad = false;
    if (Result.size() == 3)
      isBad = Result[1] == '\'';   // ''' is not legal. '\' already fixed above.
    else
      isBad = (Result.size() != 4 || Result[1] != '\\');  // Not '\x'

    if (isBad) {
      PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
      Result = "' '";  // Use something arbitrary, but legal.
    }
  }

  PP.CreateString(Result, Tok,
                  ExpansionLocStart, ExpansionLocEnd);
  return Tok;
}
Example #9
0
/// HighlightMacros - This uses the macro table state from the end of the
/// file, to re-expand macros and insert (into the HTML) information about the
/// macro expansions.  This won't be perfectly perfect, but it will be
/// reasonably close.
void html::HighlightMacros(Rewriter &R, FileID FID, Preprocessor& PP) {
  // Re-lex the raw token stream into a token buffer.
  const SourceManager &SM = PP.getSourceManager();
  std::vector<Token> TokenStream;
  
  Lexer L(FID, SM, PP.getLangOptions());
  
  // Lex all the tokens in raw mode, to avoid entering #includes or expanding
  // macros.
  while (1) {
    Token Tok;
    L.LexFromRawLexer(Tok);
    
    // If this is a # at the start of a line, discard it from the token stream.
    // We don't want the re-preprocess step to see #defines, #includes or other
    // preprocessor directives.
    if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
      continue;

    // If this is a ## token, change its kind to unknown so that repreprocessing
    // it will not produce an error.
    if (Tok.is(tok::hashhash))
      Tok.setKind(tok::unknown);
    
    // If this raw token is an identifier, the raw lexer won't have looked up
    // the corresponding identifier info for it.  Do this now so that it will be
    // macro expanded when we re-preprocess it.
    if (Tok.is(tok::identifier)) {
      // Change the kind of this identifier to the appropriate token kind, e.g.
      // turning "for" into a keyword.
      Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
    }    
      
    TokenStream.push_back(Tok);
    
    if (Tok.is(tok::eof)) break;
  }
  
  // Temporarily change the diagnostics object so that we ignore any generated
  // diagnostics from this pass.
  IgnoringDiagClient TmpDC;
  Diagnostic TmpDiags(&TmpDC);
  
  Diagnostic *OldDiags = &PP.getDiagnostics();
  PP.setDiagnostics(TmpDiags);
  
  // Inform the preprocessor that we don't want comments.
  PP.SetCommentRetentionState(false, false);

  // Enter the tokens we just lexed.  This will cause them to be macro expanded
  // but won't enter sub-files (because we removed #'s).
  PP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false);
  
  TokenConcatenation ConcatInfo(PP);
  
  // Lex all the tokens.
  Token Tok;
  PP.Lex(Tok);
  while (Tok.isNot(tok::eof)) {
    // Ignore non-macro tokens.
    if (!Tok.getLocation().isMacroID()) {
      PP.Lex(Tok);
      continue;
    }
    
    // Okay, we have the first token of a macro expansion: highlight the
    // instantiation by inserting a start tag before the macro instantiation and
    // end tag after it.
    std::pair<SourceLocation, SourceLocation> LLoc =
      SM.getInstantiationRange(Tok.getLocation());
    
    // Ignore tokens whose instantiation location was not the main file.
    if (SM.getFileID(LLoc.first) != FID) {
      PP.Lex(Tok);
      continue;
    }

    assert(SM.getFileID(LLoc.second) == FID &&
           "Start and end of expansion must be in the same ultimate file!");

    std::string Expansion = PP.getSpelling(Tok);
    unsigned LineLen = Expansion.size();
    
    Token PrevTok = Tok;
    // Okay, eat this token, getting the next one.
    PP.Lex(Tok);
    
    // Skip all the rest of the tokens that are part of this macro
    // instantiation.  It would be really nice to pop up a window with all the
    // spelling of the tokens or something.
    while (!Tok.is(tok::eof) &&
           SM.getInstantiationLoc(Tok.getLocation()) == LLoc.first) {
      // Insert a newline if the macro expansion is getting large.
      if (LineLen > 60) {
        Expansion += "<br>";
        LineLen = 0;
      }
      
      LineLen -= Expansion.size();
      
      // If the tokens were already space separated, or if they must be to avoid
      // them being implicitly pasted, add a space between them.
      if (Tok.hasLeadingSpace() ||
          ConcatInfo.AvoidConcat(PrevTok, Tok))
        Expansion += ' ';
      
      // Escape any special characters in the token text.
      Expansion += EscapeText(PP.getSpelling(Tok));
      LineLen += Expansion.size();
      
      PrevTok = Tok;
      PP.Lex(Tok);
    }
    

    // Insert the expansion as the end tag, so that multi-line macros all get
    // highlighted.
    Expansion = "<span class='expansion'>" + Expansion + "</span></span>";

    HighlightRange(R, LLoc.first, LLoc.second,
                   "<span class='macro'>", Expansion.c_str());
  }

  // Restore diagnostics object back to its own thing.
  PP.setDiagnostics(*OldDiags);
}
Example #10
0
/// RewriteMacrosInInput - Implement -rewrite-macros mode.
void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
  SourceManager &SM = PP.getSourceManager();

  Rewriter Rewrite;
  Rewrite.setSourceMgr(SM, PP.getLangOpts());
  RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());

  std::vector<Token> RawTokens;
  LexRawTokensFromMainFile(PP, RawTokens);
  unsigned CurRawTok = 0;
  Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);


  // Get the first preprocessing token.
  PP.EnterMainSourceFile();
  Token PPTok;
  PP.Lex(PPTok);

  // Preprocess the input file in parallel with raw lexing the main file. Ignore
  // all tokens that are preprocessed from a file other than the main file (e.g.
  // a header).  If we see tokens that are in the preprocessed file but not the
  // lexed file, we have a macro expansion.  If we see tokens in the lexed file
  // that aren't in the preprocessed view, we have macros that expand to no
  // tokens, or macro arguments etc.
  while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
    SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());

    // If PPTok is from a different source file, ignore it.
    if (!SM.isFromMainFile(PPLoc)) {
      PP.Lex(PPTok);
      continue;
    }

    // If the raw file hits a preprocessor directive, they will be extra tokens
    // in the raw file that don't exist in the preprocsesed file.  However, we
    // choose to preserve them in the output file and otherwise handle them
    // specially.
    if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
      // If this is a #warning directive or #pragma mark (GNU extensions),
      // comment the line out.
      if (RawTokens[CurRawTok].is(tok::identifier)) {
        const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
        if (II->getName() == "warning") {
          // Comment out #warning.
          RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
        } else if (II->getName() == "pragma" &&
                   RawTokens[CurRawTok+1].is(tok::identifier) &&
                   (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
                    "mark")) {
          // Comment out #pragma mark.
          RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
        }
      }

      // Otherwise, if this is a #include or some other directive, just leave it
      // in the file by skipping over the line.
      RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
      while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
        RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
      continue;
    }

    // Okay, both tokens are from the same file.  Get their offsets from the
    // start of the file.
    unsigned PPOffs = SM.getFileOffset(PPLoc);
    unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());

    // If the offsets are the same and the token kind is the same, ignore them.
    if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
      RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
      PP.Lex(PPTok);
      continue;
    }

    // If the PP token is farther along than the raw token, something was
    // deleted.  Comment out the raw token.
    if (RawOffs <= PPOffs) {
      // Comment out a whole run of tokens instead of bracketing each one with
      // comments.  Add a leading space if RawTok didn't have one.
      bool HasSpace = RawTok.hasLeadingSpace();
      RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
      unsigned EndPos;

      do {
        EndPos = RawOffs+RawTok.getLength();

        RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
        RawOffs = SM.getFileOffset(RawTok.getLocation());

        if (RawTok.is(tok::comment)) {
          // Skip past the comment.
          RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
          break;
        }

      } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
               (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));

      RB.InsertTextBefore(EndPos, "*/");
      continue;
    }

    // Otherwise, there was a replacement an expansion.  Insert the new token
    // in the output buffer.  Insert the whole run of new tokens at once to get
    // them in the right order.
    unsigned InsertPos = PPOffs;
    std::string Expansion;
    while (PPOffs < RawOffs) {
      Expansion += ' ' + PP.getSpelling(PPTok);
      PP.Lex(PPTok);
      PPLoc = SM.getExpansionLoc(PPTok.getLocation());
      PPOffs = SM.getFileOffset(PPLoc);
    }
    Expansion += ' ';
    RB.InsertTextBefore(InsertPos, Expansion);
  }

  // Get the buffer corresponding to MainFileID.  If we haven't changed it, then
  // we are done.
  if (const RewriteBuffer *RewriteBuf =
      Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
    //printf("Changed:\n");
    *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
  } else {
    fprintf(stderr, "No changes\n");
  }
  OS->flush();
}