/// Determine whether the given text matches a property name. static bool textMatchesPropertyName(StringRef text, const InheritedNameSet *allPropertyNames) { if (!allPropertyNames) return false; SmallString<16> localScratch; auto name = camel_case::toLowercaseWord(text, localScratch); // A property with exactly this name. if (allPropertyNames->contains(name)) return true; // From here on, we'll be working with scratch space. if (name.data() != localScratch.data()) localScratch = name; if (localScratch.back() == 'y') { // If the last letter is a 'y', try 'ies'. localScratch.pop_back(); localScratch += "ies"; if (allPropertyNames->contains(localScratch)) return true; } else { // Otherwise, add an 's' and try again. localScratch += 's'; if (allPropertyNames->contains(localScratch)) return true; // Alternatively, try to add 'es'. localScratch.pop_back(); localScratch += "es"; if (allPropertyNames->contains(localScratch)) return true; } return false; }
void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) { Rewriter rewriter(Ctx.getSourceManager(), Ctx.getLangOpts()); RewritesReceiver Rec(rewriter); Editor->applyRewrites(Rec); for (Rewriter::buffer_iterator I = rewriter.buffer_begin(), E = rewriter.buffer_end(); I != E; ++I) { FileID FID = I->first; RewriteBuffer &buf = I->second; const FileEntry *file = Ctx.getSourceManager().getFileEntryForID(FID); assert(file); SmallString<512> newText; llvm::raw_svector_ostream vecOS(newText); buf.write(vecOS); vecOS.flush(); llvm::MemoryBuffer *memBuf = llvm::MemoryBuffer::getMemBufferCopy( StringRef(newText.data(), newText.size()), file->getName()); SmallString<64> filePath(file->getName()); FileMgr.FixupRelativePath(filePath); Remapper.remap(filePath.str(), memBuf); } if (IsOutputFile) { Remapper.flushToFile(MigrateDir, Ctx.getDiagnostics()); } else { Remapper.flushToDisk(MigrateDir, Ctx.getDiagnostics()); } }
static void PrintHeaderInfo(raw_ostream *OutputFile, const char* Filename, bool ShowDepth, unsigned CurrentIncludeDepth, bool MSStyle) { // Write to a temporary string to avoid unnecessary flushing on errs(). SmallString<512> Pathname(Filename); if (!MSStyle) Lexer::Stringify(Pathname); SmallString<256> Msg; if (MSStyle) Msg += "Note: including file:"; if (ShowDepth) { // The main source file is at depth 1, so skip one dot. for (unsigned i = 1; i != CurrentIncludeDepth; ++i) Msg += MSStyle ? ' ' : '.'; if (!MSStyle) Msg += ' '; } Msg += Pathname; Msg += '\n'; OutputFile->write(Msg.data(), Msg.size()); OutputFile->flush(); }
void HeaderIncludesCallback::FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind NewFileType, FileID PrevFID) { // Unless we are exiting a #include, make sure to skip ahead to the line the // #include directive was at. PresumedLoc UserLoc = SM.getPresumedLoc(Loc); if (UserLoc.isInvalid()) return; // Adjust the current include depth. if (Reason == PPCallbacks::EnterFile) { ++CurrentIncludeDepth; } else if (Reason == PPCallbacks::ExitFile) { if (CurrentIncludeDepth) --CurrentIncludeDepth; // We track when we are done with the predefines by watching for the first // place where we drop back to a nesting depth of 1. if (CurrentIncludeDepth == 1 && !HasProcessedPredefines) HasProcessedPredefines = true; return; } else return; // Show the header if we are (a) past the predefines, or (b) showing all // headers and in the predefines at a depth past the initial file and command // line buffers. bool ShowHeader = (HasProcessedPredefines || (ShowAllHeaders && CurrentIncludeDepth > 2)); // Dump the header include information we are past the predefines buffer or // are showing all headers. if (ShowHeader && Reason == PPCallbacks::EnterFile) { // Write to a temporary string to avoid unnecessary flushing on errs(). SmallString<512> Filename(UserLoc.getFilename()); if (!MSStyle) Lexer::Stringify(Filename); SmallString<256> Msg; if (MSStyle) Msg += "Note: including file:"; if (ShowDepth) { // The main source file is at depth 1, so skip one dot. for (unsigned i = 1; i != CurrentIncludeDepth; ++i) Msg += MSStyle ? ' ' : '.'; if (!MSStyle) Msg += ' '; } Msg += Filename; Msg += '\n'; OutputFile->write(Msg.data(), Msg.size()); OutputFile->flush(); } }
void Value::setName(const Twine &NewName) { // Fast path for common IRBuilder case of setName("") when there is no name. if (NewName.isTriviallyEmpty() && !hasName()) return; SmallString<256> NameData; NewName.toVector(NameData); const char *NameStr = NameData.data(); unsigned NameLen = NameData.size(); // Name isn't changing? if (getName() == StringRef(NameStr, NameLen)) return; assert(getType() != Type::getVoidTy(getContext()) && "Cannot assign a name to void values!"); // Get the symbol table to update for this object. ValueSymbolTable *ST; if (getSymTab(this, ST)) return; // Cannot set a name on this value (e.g. constant). if (!ST) { // No symbol table to update? Just do the change. if (NameLen == 0) { // Free the name for this value. Name->Destroy(); Name = 0; return; } if (Name) Name->Destroy(); // NOTE: Could optimize for the case the name is shrinking to not deallocate // then reallocated. // Create the new name. Name = ValueName::Create(NameStr, NameStr+NameLen); Name->setValue(this); return; } // NOTE: Could optimize for the case the name is shrinking to not deallocate // then reallocated. if (hasName()) { // Remove old name. ST->removeValueName(Name); Name->Destroy(); Name = 0; if (NameLen == 0) return; } // Name is changing to something new. Name = ST->createValueName(StringRef(NameStr, NameLen), this); }
void verifyEncoding(MCDwarfLineTableParams Params, int LineDelta, int AddrDelta, ArrayRef<uint8_t> ExpectedEncoding) { SmallString<16> Buffer; raw_svector_ostream EncodingOS(Buffer); MCDwarfLineAddr::Encode(getContext(), Params, LineDelta, AddrDelta, EncodingOS); ArrayRef<uint8_t> Encoding(reinterpret_cast<uint8_t *>(Buffer.data()), Buffer.size()); EXPECT_EQ(ExpectedEncoding, Encoding); }
void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) { TranslationUnitDecl *TU = Ctx.getTranslationUnitDecl(); if (MigrateProperty) for (DeclContext::decl_iterator D = TU->decls_begin(), DEnd = TU->decls_end(); D != DEnd; ++D) { if (ObjCInterfaceDecl *CDecl = dyn_cast<ObjCInterfaceDecl>(*D)) migrateObjCInterfaceDecl(Ctx, CDecl); else if (ObjCProtocolDecl *PDecl = dyn_cast<ObjCProtocolDecl>(*D)) ObjCProtocolDecls.insert(PDecl); else if (const ObjCImplementationDecl *ImpDecl = dyn_cast<ObjCImplementationDecl>(*D)) migrateProtocolConformance(Ctx, ImpDecl); else if (const EnumDecl *ED = dyn_cast<EnumDecl>(*D)) { DeclContext::decl_iterator N = D; ++N; if (N != DEnd) if (const TypedefDecl *TD = dyn_cast<TypedefDecl>(*N)) migrateNSEnumDecl(Ctx, ED, TD); } // migrate methods which can have instancetype as their result type. if (ObjCContainerDecl *CDecl = dyn_cast<ObjCContainerDecl>(*D)) migrateInstanceType(Ctx, CDecl); } Rewriter rewriter(Ctx.getSourceManager(), Ctx.getLangOpts()); RewritesReceiver Rec(rewriter); Editor->applyRewrites(Rec); for (Rewriter::buffer_iterator I = rewriter.buffer_begin(), E = rewriter.buffer_end(); I != E; ++I) { FileID FID = I->first; RewriteBuffer &buf = I->second; const FileEntry *file = Ctx.getSourceManager().getFileEntryForID(FID); assert(file); SmallString<512> newText; llvm::raw_svector_ostream vecOS(newText); buf.write(vecOS); vecOS.flush(); llvm::MemoryBuffer *memBuf = llvm::MemoryBuffer::getMemBufferCopy( StringRef(newText.data(), newText.size()), file->getName()); SmallString<64> filePath(file->getName()); FileMgr.FixupRelativePath(filePath); Remapper.remap(filePath.str(), memBuf); } if (IsOutputFile) { Remapper.flushToFile(MigrateDir, Ctx.getDiagnostics()); } else { Remapper.flushToDisk(MigrateDir, Ctx.getDiagnostics()); } }
bool MigrationProcess::applyTransform(TransformFn trans, RewriteListener *listener) { OwningPtr<CompilerInvocation> CInvok; CInvok.reset(createInvocationForMigration(OrigCI)); CInvok->getDiagnosticOpts().IgnoreWarnings = true; Remapper.applyMappings(CInvok->getPreprocessorOpts()); CapturedDiagList capturedDiags; std::vector<SourceLocation> ARCMTMacroLocs; assert(DiagClient); IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs()); IntrusiveRefCntPtr<DiagnosticsEngine> Diags( new DiagnosticsEngine(DiagID, DiagClient, /*ShouldOwnClient=*/false)); // Filter of all diagnostics. CaptureDiagnosticConsumer errRec(*Diags, capturedDiags); Diags->setClient(&errRec, /*ShouldOwnClient=*/false); OwningPtr<ARCMTMacroTrackerAction> ASTAction; ASTAction.reset(new ARCMTMacroTrackerAction(ARCMTMacroLocs)); OwningPtr<ASTUnit> Unit( ASTUnit::LoadFromCompilerInvocationAction(CInvok.take(), Diags, ASTAction.get())); if (!Unit) return true; Unit->setOwnsRemappedFileBuffers(false); // FileRemapper manages that. // Don't filter diagnostics anymore. Diags->setClient(DiagClient, /*ShouldOwnClient=*/false); ASTContext &Ctx = Unit->getASTContext(); if (Diags->hasFatalErrorOccurred()) { Diags->Reset(); DiagClient->BeginSourceFile(Ctx.getLangOpts(), &Unit->getPreprocessor()); capturedDiags.reportDiagnostics(*Diags); DiagClient->EndSourceFile(); return true; } // After parsing of source files ended, we want to reuse the // diagnostics objects to emit further diagnostics. // We call BeginSourceFile because DiagnosticConsumer requires that // diagnostics with source range information are emitted only in between // BeginSourceFile() and EndSourceFile(). DiagClient->BeginSourceFile(Ctx.getLangOpts(), &Unit->getPreprocessor()); Rewriter rewriter(Ctx.getSourceManager(), Ctx.getLangOpts()); TransformActions TA(*Diags, capturedDiags, Ctx, Unit->getPreprocessor()); MigrationPass pass(Ctx, OrigCI.getLangOpts()->getGC(), Unit->getSema(), TA, ARCMTMacroLocs); trans(pass); { RewritesApplicator applicator(rewriter, Ctx, listener); TA.applyRewrites(applicator); } DiagClient->EndSourceFile(); if (DiagClient->getNumErrors()) return true; for (Rewriter::buffer_iterator I = rewriter.buffer_begin(), E = rewriter.buffer_end(); I != E; ++I) { FileID FID = I->first; RewriteBuffer &buf = I->second; const FileEntry *file = Ctx.getSourceManager().getFileEntryForID(FID); assert(file); std::string newFname = file->getName(); newFname += "-trans"; SmallString<512> newText; llvm::raw_svector_ostream vecOS(newText); buf.write(vecOS); vecOS.flush(); llvm::MemoryBuffer *memBuf = llvm::MemoryBuffer::getMemBufferCopy( StringRef(newText.data(), newText.size()), newFname); SmallString<64> filePath(file->getName()); Unit->getFileManager().FixupRelativePath(filePath); Remapper.remap(filePath.str(), memBuf); } return false; }
static StringRef omitNeedlessWords(StringRef name, OmissionTypeName typeName, NameRole role, const InheritedNameSet *allPropertyNames, StringScratchSpace &scratch) { // If we have no name or no type name, there is nothing to do. if (name.empty() || typeName.empty()) return name; // Get the camel-case words in the name and type name. auto nameWords = camel_case::getWords(name); auto typeWords = camel_case::getWords(typeName.Name); // Match the last words in the type name to the last words in the // name. auto nameWordRevIter = nameWords.rbegin(), nameWordRevIterBegin = nameWordRevIter, firstMatchingNameWordRevIter = nameWordRevIter, nameWordRevIterEnd = nameWords.rend(); auto typeWordRevIter = typeWords.rbegin(), typeWordRevIterEnd = typeWords.rend(); bool anyMatches = false; auto matched = [&] { if (anyMatches) return; anyMatches = true; firstMatchingNameWordRevIter = nameWordRevIter; }; while (nameWordRevIter != nameWordRevIterEnd && typeWordRevIter != typeWordRevIterEnd) { // If the names match, continue. auto nameWord = *nameWordRevIter; if (matchNameWordToTypeWord(nameWord, *typeWordRevIter)) { matched(); ++nameWordRevIter; ++typeWordRevIter; continue; } // Special case: "Indexes" and "Indices" in the name match // "IndexSet" in the type. if ((matchNameWordToTypeWord(nameWord, "Indexes") || matchNameWordToTypeWord(nameWord, "Indices")) && *typeWordRevIter == "Set") { auto nextTypeWordRevIter = typeWordRevIter; ++nextTypeWordRevIter; if (nextTypeWordRevIter != typeWordRevIterEnd && matchNameWordToTypeWord("Index", *nextTypeWordRevIter)) { matched(); ++nameWordRevIter; typeWordRevIter = nextTypeWordRevIter; ++typeWordRevIter; continue; } } // Special case: "Index" in the name matches "Int" or "Integer" in the type. if (matchNameWordToTypeWord(nameWord, "Index") && (matchNameWordToTypeWord("Int", *typeWordRevIter) || matchNameWordToTypeWord("Integer", *typeWordRevIter))) { matched(); ++nameWordRevIter; ++typeWordRevIter; continue; } // Special case: if the word in the name ends in 's', and we have // a collection element type, see if this is a plural. if (!typeName.CollectionElement.empty() && nameWord.size() > 2 && nameWord.back() == 's' && role != NameRole::BaseNameSelf) { // Check <element name>s. auto shortenedNameWord = name.substr(0, nameWordRevIter.base().getPosition()-1); auto newShortenedNameWord = omitNeedlessWords(shortenedNameWord, typeName.CollectionElement, NameRole::Partial, allPropertyNames, scratch); if (shortenedNameWord != newShortenedNameWord) { matched(); unsigned targetSize = newShortenedNameWord.size(); while (nameWordRevIter.base().getPosition() > targetSize) ++nameWordRevIter; continue; } } // If this is a skippable suffix, skip it and keep looking. if (nameWordRevIter == nameWordRevIterBegin) { if (auto withoutSuffix = skipTypeSuffix(typeName.Name)) { typeName.Name = *withoutSuffix; typeWords = camel_case::getWords(typeName.Name); typeWordRevIter = typeWords.rbegin(); typeWordRevIterEnd = typeWords.rend(); continue; } } // If we're matching the base name of a method against the type of // 'Self', and we haven't matched anything yet, skip over words in // the name. if (role == NameRole::BaseNameSelf && !anyMatches) { ++nameWordRevIter; continue; } break; } StringRef origName = name; // If we matched anything above, update the name appropriately. if (anyMatches) { // Handle complete name matches. if (nameWordRevIter == nameWordRevIterEnd) { // If we're doing a partial match, return the empty string. if (role == NameRole::Partial) return ""; // Leave the name alone. return name; } // Don't strip just "Error". if (nameWordRevIter != nameWordRevIterBegin) { auto nameWordPrev = std::prev(nameWordRevIter); if (nameWordPrev == nameWordRevIterBegin && *nameWordPrev == "Error") return name; } switch (role) { case NameRole::Property: // Always strip off type information. name = name.substr(0, nameWordRevIter.base().getPosition()); break; case NameRole::BaseNameSelf: switch (getPartOfSpeech(*nameWordRevIter)) { case PartOfSpeech::Verb: { // Splice together the parts before and after the matched // type. For example, if we matched "ViewController" in // "dismissViewControllerAnimated", stitch together // "dismissAnimated". SmallString<16> newName = name.substr(0, nameWordRevIter.base().getPosition()); newName += name.substr(firstMatchingNameWordRevIter.base().getPosition()); name = scratch.copyString(newName); break; } case PartOfSpeech::Preposition: case PartOfSpeech::Gerund: case PartOfSpeech::Unknown: return name; } break; case NameRole::BaseName: case NameRole::FirstParameter: case NameRole::Partial: case NameRole::SubsequentParameter: // Classify the part of speech of the word before the type // information we would strip off. switch (getPartOfSpeech(*nameWordRevIter)) { case PartOfSpeech::Preposition: if (role == NameRole::BaseName) { // Strip off the part of the name that is redundant with // type information, so long as there's something preceding the // preposition. if (std::next(nameWordRevIter) != nameWordRevIterEnd) name = name.substr(0, nameWordRevIter.base().getPosition()); break; } SWIFT_FALLTHROUGH; case PartOfSpeech::Verb: case PartOfSpeech::Gerund: // Don't prune redundant type information from the base name if // there is a corresponding property (either singular or plural). if (allPropertyNames && role == NameRole::BaseName) { SmallString<16> localScratch; auto removedText = name.substr(nameWordRevIter.base().getPosition()); auto removedName = camel_case::toLowercaseWord(removedText, localScratch); // A property with exactly this name. if (allPropertyNames->contains(removedName)) return name; // From here on, we'll be working with scratch space. if (removedName.data() != localScratch.data()) localScratch = removedName; if (localScratch.back() == 'y') { // If the last letter is a 'y', try 'ies'. localScratch.pop_back(); localScratch += "ies"; if (allPropertyNames->contains(localScratch)) return name; } else { // Otherwise, add an 's' and try again. localScratch += 's'; if (allPropertyNames->contains(localScratch)) return name; // Alternatively, try to add 'es'. localScratch.pop_back(); localScratch += "es"; if (allPropertyNames->contains(localScratch)) return name; } } // Strip off the part of the name that is redundant with // type information. name = name.substr(0, nameWordRevIter.base().getPosition()); break; case PartOfSpeech::Unknown: // Assume it's a noun or adjective; don't strip anything. break; } break; } } // If we ended up with a vacuous name like "get" or "set", do nothing. if (isVacuousName(name)) return origName; switch (role) { case NameRole::BaseName: case NameRole::BaseNameSelf: case NameRole::Property: // If we ended up with a keyword for a property name or base name, // do nothing. if (isKeyword(name)) return origName; break; case NameRole::SubsequentParameter: case NameRole::FirstParameter: case NameRole::Partial: break; } // We're done. return name; }
C2::ExprResult C2Sema::ActOnNumericConstant(const Token& Tok) { #ifdef SEMA_DEBUG std::cerr << COL_SEMA << "SEMA: numeric constant" << ANSI_NORMAL"\n"; #endif // Fast path for a single digit (which is quite common). A single digit // cannot have a trigraph, escaped newline, radix prefix, or suffix. if (Tok.getLength() == 1) { const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok); return ActOnIntegerConstant(Tok.getLocation(), Val-'0'); } SmallString<128> SpellingBuffer; // NumericLiteralParser wants to overread by one character. Add padding to // the buffer in case the token is copied to the buffer. If getSpelling() // returns a StringRef to the memory buffer, it should have a null char at // the EOF, so it is also safe. SpellingBuffer.resize(Tok.getLength() + 1); // Get the spelling of the token, which eliminates trigraphs, etc. bool Invalid = false; StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid); if (Invalid) return ExprError(); NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP); if (Literal.hadError) return ExprError(); if (Literal.hasUDSuffix()) { assert(0 && "HUH?"); } Expr* Res; if (Literal.isFloatingLiteral()) { // clang::Sema::BuildFloatingLiteral() // TEMP Hardcoded const llvm::fltSemantics& Format = llvm::APFloat::IEEEsingle; APFloat Val(Format); APFloat::opStatus result = Literal.GetFloatValue(Val); // Overflow is always an error, but underflow is only an error if // we underflowed to zero (APFloat reports denormals as underflow). if ((result & APFloat::opOverflow) || ((result & APFloat::opUnderflow) && Val.isZero())) { assert(0 && "TODO"); #if 0 unsigned diagnostic; SmallString<20> buffer; if (result & APFloat::opOverflow) { diagnostic = diag::warn_float_overflow; APFloat::getLargest(Format).toString(buffer); } else { diagnostic = diag::warn_float_underflow; APFloat::getSmallest(Format).toString(buffer); } Diag(Tok.getLocation(), diagnostic) << Ty << StringRef(buffer.data(), buffer.size()); #endif } //bool isExact = (result == APFloat::opOK); //return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc); Res = new FloatingLiteral(Tok.getLocation(), Val); } else if (!Literal.isIntegerLiteral()) { return ExprError(); } else { QualType ty; const unsigned MaxWidth = 64; // for now limit to 64 bits llvm::APInt ResultVal(MaxWidth, 0); if (Literal.GetIntegerValue(ResultVal)) { Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << 1; } else { #if 0 // Octal, Hexadecimal, and integers with a U suffix are allowed to // be an unsigned. bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; // Check from smallest to largest, picking the smallest type we can. unsigned Width = 0; if (!Literal.isLong && !Literal.isLongLong) { // Are int/unsigned possibilities? unsigned IntSize = 64; // Does it fit in a unsigned? if (ResultVal.isIntN(IntSize)) { // Does it fit in a signed int? #if 0 if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) Ty = Context.IntTy; else if (AllowUnsigned) Ty = Context.UnsignedIntTy; #endif Width = IntSize; } } // Check long long if needed. if (Width == 0) { if (ResultVal.isIntN(64)) { #if 0 if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 || (getLangOpts().MicrosoftExt && Literal.isLongLong))) Ty = Context.LongLongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongLongTy; #endif Width = 64; } } if (Width == 0) { fprintf(stderr, "TOO LARGE\n"); assert(0 && "TODO"); } // set correct width if (ResultVal.getBitWidth() != Width) { ResultVal = ResultVal.trunc(Width); } #endif } Res = new IntegerLiteral(Tok.getLocation(), ResultVal, Literal.getRadix()); } return ExprResult(Res); }
/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of /// tokens into the literal string token that should be produced by the C # /// preprocessor operator. If Charify is true, then it should be turned into /// a character literal for the Microsoft charize (#@) extension. /// Token MacroArgs::StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd) { Token Tok; Tok.startToken(); Tok.setKind(Charify ? tok::char_constant : tok::string_literal); const Token *ArgTokStart = ArgToks; // Stringify all the tokens. SmallString<128> Result; Result += "\""; bool isFirst = true; for (; ArgToks->isNot(tok::eof); ++ArgToks) { const Token &Tok = *ArgToks; if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) Result += ' '; isFirst = false; // If this is a string or character constant, escape the token as specified // by 6.10.3.2p2. if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc. Tok.is(tok::char_constant) || // 'x' Tok.is(tok::wide_char_constant) || // L'x'. Tok.is(tok::utf8_char_constant) || // u8'x'. Tok.is(tok::utf16_char_constant) || // u'x'. Tok.is(tok::utf32_char_constant)) { // U'x'. bool Invalid = false; std::string TokStr = PP.getSpelling(Tok, &Invalid); if (!Invalid) { std::string Str = Lexer::Stringify(TokStr); Result.append(Str.begin(), Str.end()); } } else if (Tok.is(tok::code_completion)) { PP.CodeCompleteNaturalLanguage(); } else { // Otherwise, just append the token. Do some gymnastics to get the token // in place and avoid copies where possible. unsigned CurStrLen = Result.size(); Result.resize(CurStrLen+Tok.getLength()); const char *BufPtr = Result.data() + CurStrLen; bool Invalid = false; unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid); if (!Invalid) { // If getSpelling returned a pointer to an already uniqued version of // the string instead of filling in BufPtr, memcpy it onto our string. if (ActualTokLen && BufPtr != &Result[CurStrLen]) memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); // If the token was dirty, the spelling may be shorter than the token. if (ActualTokLen != Tok.getLength()) Result.resize(CurStrLen+ActualTokLen); } } } // If the last character of the string is a \, and if it isn't escaped, this // is an invalid string literal, diagnose it as specified in C99. if (Result.back() == '\\') { // Count the number of consecutive \ characters. If even, then they are // just escaped backslashes, otherwise it's an error. unsigned FirstNonSlash = Result.size()-2; // Guaranteed to find the starting " if nothing else. while (Result[FirstNonSlash] == '\\') --FirstNonSlash; if ((Result.size()-1-FirstNonSlash) & 1) { // Diagnose errors for things like: #define F(X) #X / F(\) PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); Result.pop_back(); // remove one of the \'s. } } Result += '"'; // If this is the charify operation and the result is not a legal character // constant, diagnose it. if (Charify) { // First step, turn double quotes into single quotes: Result[0] = '\''; Result[Result.size()-1] = '\''; // Check for bogus character. bool isBad = false; if (Result.size() == 3) isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. else isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' if (isBad) { PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); Result = "' '"; // Use something arbitrary, but legal. } } PP.CreateString(Result, Tok, ExpansionLocStart, ExpansionLocEnd); return Tok; }
/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there /// are more ## after it, chomp them iteratively. Return the result as Tok. /// If this returns true, the caller should immediately return the token. bool TokenLexer::PasteTokens(Token &Tok) { // MSVC: If previous token was pasted, this must be a recovery from an invalid // paste operation. Ignore spaces before this token to mimic MSVC output. // Required for generating valid UUID strings in some MS headers. if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) && Tokens[CurToken - 2].is(tok::hashhash)) Tok.clearFlag(Token::LeadingSpace); SmallString<128> Buffer; const char *ResultTokStrPtr = nullptr; SourceLocation StartLoc = Tok.getLocation(); SourceLocation PasteOpLoc; do { // Consume the ## operator if any. PasteOpLoc = Tokens[CurToken].getLocation(); if (Tokens[CurToken].is(tok::hashhash)) ++CurToken; assert(!isAtEnd() && "No token on the RHS of a paste operator!"); // Get the RHS token. const Token &RHS = Tokens[CurToken]; // Allocate space for the result token. This is guaranteed to be enough for // the two tokens. Buffer.resize(Tok.getLength() + RHS.getLength()); // Get the spelling of the LHS token in Buffer. const char *BufPtr = &Buffer[0]; bool Invalid = false; unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid); if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! memcpy(&Buffer[0], BufPtr, LHSLen); if (Invalid) return true; BufPtr = Buffer.data() + LHSLen; unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid); if (Invalid) return true; if (RHSLen && BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! memcpy(&Buffer[LHSLen], BufPtr, RHSLen); // Trim excess space. Buffer.resize(LHSLen+RHSLen); // Plop the pasted result (including the trailing newline and null) into a // scratch buffer where we can lex it. Token ResultTokTmp; ResultTokTmp.startToken(); // Claim that the tmp token is a string_literal so that we can get the // character pointer back from CreateString in getLiteralData(). ResultTokTmp.setKind(tok::string_literal); PP.CreateString(Buffer, ResultTokTmp); SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); ResultTokStrPtr = ResultTokTmp.getLiteralData(); // Lex the resultant pasted token into Result. Token Result; if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) { // Common paste case: identifier+identifier = identifier. Avoid creating // a lexer and other overhead. PP.IncrementPasteCounter(true); Result.startToken(); Result.setKind(tok::raw_identifier); Result.setRawIdentifierData(ResultTokStrPtr); Result.setLocation(ResultTokLoc); Result.setLength(LHSLen+RHSLen); } else { PP.IncrementPasteCounter(false); assert(ResultTokLoc.isFileID() && "Should be a raw location into scratch buffer"); SourceManager &SourceMgr = PP.getSourceManager(); FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); bool Invalid = false; const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID, &Invalid).data(); if (Invalid) return false; // Make a lexer to lex this string from. Lex just this one token. // Make a lexer object so that we lex and expand the paste result. Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), PP.getLangOpts(), ScratchBufStart, ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); // Lex a token in raw mode. This way it won't look up identifiers // automatically, lexing off the end will return an eof token, and // warnings are disabled. This returns true if the result token is the // entire buffer. bool isInvalid = !TL.LexFromRawLexer(Result); // If we got an EOF token, we didn't form even ONE token. For example, we // did "/ ## /" to get "//". isInvalid |= Result.is(tok::eof); // If pasting the two tokens didn't form a full new token, this is an // error. This occurs with "x ## +" and other stuff. Return with Tok // unmodified and with RHS as the next token to lex. if (isInvalid) { // Explicitly convert the token location to have proper expansion // information so that the user knows where it came from. SourceManager &SM = PP.getSourceManager(); SourceLocation Loc = SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); // Test for the Microsoft extension of /##/ turning into // here on the // error path. if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) && RHS.is(tok::slash)) { HandleMicrosoftCommentPaste(Tok, Loc); return true; } // Do not emit the error when preprocessing assembler code. if (!PP.getLangOpts().AsmPreprocessor) { // If we're in microsoft extensions mode, downgrade this from a hard // error to an extension that defaults to an error. This allows // disabling it. PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms : diag::err_pp_bad_paste) << Buffer; } // An error has occurred so exit loop. break; } // Turn ## into 'unknown' to avoid # ## # from looking like a paste // operator. if (Result.is(tok::hashhash)) Result.setKind(tok::unknown); } // Transfer properties of the LHS over the Result. Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); // Finally, replace LHS with the result, consume the RHS, and iterate. ++CurToken; Tok = Result; } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); SourceLocation EndLoc = Tokens[CurToken - 1].getLocation(); // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any // diagnostics for the expanded token should appear as if the token was // expanded from the full ## expression. Pull this information together into // a new SourceLocation that captures all of this. SourceManager &SM = PP.getSourceManager(); if (StartLoc.isFileID()) StartLoc = getExpansionLocForMacroDefLoc(StartLoc); if (EndLoc.isFileID()) EndLoc = getExpansionLocForMacroDefLoc(EndLoc); FileID MacroFID = SM.getFileID(MacroExpansionStart); while (SM.getFileID(StartLoc) != MacroFID) StartLoc = SM.getImmediateExpansionRange(StartLoc).first; while (SM.getFileID(EndLoc) != MacroFID) EndLoc = SM.getImmediateExpansionRange(EndLoc).second; Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, Tok.getLength())); // Now that we got the result token, it will be subject to expansion. Since // token pasting re-lexes the result token in raw mode, identifier information // isn't looked up. As such, if the result is an identifier, look up id info. if (Tok.is(tok::raw_identifier)) { // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. PP.LookUpIdentifierInfo(Tok); } return false; }
int main(int ac, char **av) { SourceMgr SrcMgr; //LLVMInitializeX86TargetInfo(); llvm::InitializeAllTargetInfos(); //LLVMInitializeX86AsmParser(); llvm::InitializeAllTargetMCs(); //LLVMInitializeX86TargetMC(); llvm::InitializeAllAsmParsers(); //LLVMInitializeX86AsmParser(); llvm::InitializeAllDisassemblers(); //LLVMInitializeX86Disassembler(); // arg0: // llvm::Target encapsulating the "x86_64-apple-darwin14.5.0" information // see /lib/Support/Triple.cpp for the details //spec = llvm::sys::getDefaultTargetTriple(); //std::string machSpec = "x86_64-apple-windows"; // will produce a COFF //std::string machSpec = "x86_64-apple-darwin14.5.0"; // will produce a Mach-O std::string machSpec = "arm-none-none-eabi"; // //std::string machSpec = "x86_64-apple-darwin"; //std::string machSpec = "x86_64-thumb-linux-gnu"; //std::string machSpec = "x86_64-unknown-linux-gnu"; printf("machine spec: %s\n", machSpec.c_str()); machSpec = Triple::normalize(machSpec); printf("machine spec (normalized): %s\n", machSpec.c_str()); Triple TheTriple(machSpec); // Get the target specific parser. std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(/*arch*/"", TheTriple, Error); if (!TheTarget) { errs() << Error; return -1; } machSpec = TheTriple.getTriple(); printf("machine spec (returned): %s\n", machSpec.c_str()); printf("Target.getName(): %s\n", TheTarget->getName()); printf("Target.getShortDescription(): %s\n", TheTarget->getShortDescription()); /* from the target we get almost everything */ std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(machSpec)); std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, machSpec)); std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo()); /* describes target instruction set */ MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(machSpec, "", ""); /* subtarget instr set */ MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, machSpec, /* specific CPU */ ""); // arg0: // llvm::SourceMgr (Support/SourceMgr.h) that holds assembler source // has vector of llvm::SrcBuffer encaps (Support/MemoryBuffer.h) and vector of include dirs //std::string asmSrc = ".org 0x100, 0xAA\nfoo:\nxor %eax, %ebx\npush %rbp\njmp foo\nrdtsc\n"; //std::string asmSrc = ".text\n" "ldr pc, data_foo\n" "\n" "data_foo:\n" " .int 0x8\n" "\n" "loop:\n" "b loop\n"; //std::string asmSrc = ".text\n" "mov r2, r1\n"; std::string asmSrc = ".text\n" "ldr pc, data_foo\n" "data_foo:\n" ".int 0x8\n" "loop:\n" "b loop\n"; std::unique_ptr<MemoryBuffer> memBuf = MemoryBuffer::getMemBuffer(asmSrc); SrcMgr.AddNewSourceBuffer(std::move(memBuf), SMLoc()); // arg1: the machine code context MCObjectFileInfo MOFI; MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); MOFI.InitMCObjectFileInfo(TheTriple, Reloc::Default, CodeModel::Default, Ctx); // this is the assembler interface // -methods per .s statements (emit bytes, handle directive, etc.) // -remembers current section // -implementations that write a .s, or .o in various formats // // 1. the output stream ... a formatted_raw_ostream wraps a raw_ostream to provide // tracking of line and column position for padding and shit // // but raw_ostream is abstract and is implemented by raw_fd_ostream, raw_string_ostream, etc. /* output stream: raw_svector_ostream is a raw_pwrite_stream is a raw_ostream since a SmallString is SmallVector (svector) we can use this and retrieve bytes later with its .data() method */ SmallString<1024> smallString; raw_svector_ostream rso(smallString); /* code emitter needs 1) instruction set info 2) register info */ MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); MCStreamer *as = TheTarget->createMCObjectStreamer( TheTriple, /* Triple */ Ctx, /* the MCContext */ *MAB, /* the AsmBackend, (fixups, relaxation, objs and elfs) */ rso, /* output stream raw_pwrite_stream */ CE, /* code emitter */ *STI, /* subtarget info */ true, /* relax all fixups */ true, /* incremental linker compatible */ false /* DWARFMustBeAtTheEnd */ ); std::string abi = "none"; MCTargetOptions toptions; toptions.MCUseDwarfDirectory = false; toptions.ABIName = abi; printf("trying to assemble, let's go..\n"); AssembleInput(TheTarget, SrcMgr, Ctx, *as, *MAI, *STI, *MCII, toptions); printf("done with AssembleInput()\n"); /* dump to file for debugging */ FILE *fp; fp = fopen("out.bin", "wb"); fwrite(smallString.data(), 1, smallString.size(), fp); fclose(fp); //int n = smallString.size(); int codeOffset=0, codeSize = 0; char *data = smallString.data(); if(*(uint32_t *)data == 0xFEEDFACF) { unsigned int idx = 0; idx += 0x20; /* skip mach_header_64 to first command */ idx += 0x48; /* advance into segment_command_64 to first section */ idx += 0x28; /* advance into section_64 to size */ uint64_t scn_size = *(uint64_t *)(data + idx); idx += 0x8; /* advance into section_64 to offset */ uint64_t scn_offset = *(uint64_t *)(data + idx); codeOffset = scn_offset; codeSize = scn_size; } else if(0==memcmp(data, "\x7F" "ELF\x01\x01\x01\x00", 8)) { /* assume four sections: NULL, .strtab, .text, .symtab */ uint32_t e_shoff = *(uint32_t *)(data + 0x20); uint32_t sh_offset = *(uint32_t *)(data + e_shoff + 2*0x28 + 0x10); /* second shdr */ uint32_t sh_size = *(uint32_t *)(data + e_shoff + 2*0x28 + 0x14); /* second shdr */ codeOffset = sh_offset; codeSize = sh_size; } else { printf("ERROR: couldn't identify type of output file\n"); } dump_bytes((unsigned char *)data + codeOffset, codeSize, 0); return 0; }