void PPCPassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createPPCBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch // intrinsics. bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ && getOptLevel() != CodeGenOpt::None; if (EnablePrefetch.getNumOccurrences() > 0) UsePrefetching = EnablePrefetch; if (UsePrefetching) addPass(createPPCLoopDataPrefetchPass()); if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or // multiple GEPs with single index. addPass(createSeparateConstOffsetFromGEPPass(TM, true)); // Call EarlyCSE pass to find and remove subexpressions in the lowered // result. addPass(createEarlyCSEPass()); // Do loop invariant code motion in case part of the lowered result is // invariant. addPass(createLICMPass()); } TargetPassConfig::addIRPasses(); }
/// This routine adds optimization passes based on selected optimization level, /// OptLevel. /// /// OptLevel - Optimization Level static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM, unsigned OptLevel, unsigned SizeLevel) { FPM.add(createVerifierPass()); // Verify that input is correct MPM.add(createDebugInfoVerifierPass()); // Verify that debug info is correct PassManagerBuilder Builder; Builder.OptLevel = OptLevel; Builder.SizeLevel = SizeLevel; if (DisableInline) { // No inlining pass } else if (OptLevel > 1) { Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel); } else { Builder.Inliner = createAlwaysInlinerPass(); } Builder.DisableUnitAtATime = !UnitAtATime; Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ? DisableLoopUnrolling : OptLevel == 0; // This is final, unless there is a #pragma vectorize enable if (DisableLoopVectorization) Builder.LoopVectorize = false; // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize) else if (!Builder.LoopVectorize) Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; // When #pragma vectorize is on for SLP, do the same as above Builder.SLPVectorize = DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2; Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); }
int main(int argc, char **argv) { llvm::cl::ParseCommandLineOptions(argc, argv); if (Input.getNumOccurrences()) { OwningPtr<MemoryBuffer> Buf; if (MemoryBuffer::getFileOrSTDIN(Input, Buf)) return 1; llvm::SourceMgr sm; if (DumpTokens) { yaml::dumpTokens(Buf->getBuffer(), outs()); } if (DumpCanonical) { yaml::Stream stream(Buf->getBuffer(), sm); dumpStream(stream); } } if (Verify) { llvm::TimerGroup Group("YAML parser benchmark"); benchmark(Group, "Fast", createJSONText(10, 500)); } else if (!DumpCanonical && !DumpTokens) { llvm::TimerGroup Group("YAML parser benchmark"); benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5)); benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500)); benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000)); } return 0; }
/// AddOptimizationPasses - This routine adds optimization passes /// based on selected optimization level, OptLevel. This routine /// duplicates llvm-gcc behaviour. /// /// OptLevel - Optimization Level static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM, unsigned OptLevel, unsigned SizeLevel) { FPM.add(createVerifierPass()); // Verify that input is correct PassManagerBuilder Builder; Builder.OptLevel = OptLevel; Builder.SizeLevel = SizeLevel; if (DisableInline) { // No inlining pass } else if (OptLevel > 1) { unsigned Threshold = 225; if (SizeLevel == 1) // -Os Threshold = 75; else if (SizeLevel == 2) // -Oz Threshold = 25; if (OptLevel > 2) Threshold = 275; Builder.Inliner = createFunctionInliningPass(Threshold); } else { Builder.Inliner = createAlwaysInlinerPass(); } Builder.DisableUnitAtATime = !UnitAtATime; Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ? DisableLoopUnrolling : OptLevel == 0; Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; Builder.SLPVectorize = true; Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); }
unsigned Inliner::getInlineThreshold(CallSite CS) const { int thres = InlineThreshold; // -inline-threshold or else selected by // overall opt level // If -inline-threshold is not given, listen to the optsize attribute when it // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; // Listen to the inlinehint attribute when it would increase the threshold // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint); if (InlineHint && HintThreshold > thres && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize)) thres = HintThreshold; // Listen to the cold attribute when it would decrease the threshold. bool ColdCallee = Callee && !Callee->isDeclaration() && Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::Cold); if (ColdCallee && ColdThreshold < thres) thres = ColdThreshold; return thres; }
thinlto_code_gen_t thinlto_create_codegen(void) { lto_initialize(); ThinLTOCodeGenerator *CodeGen = new ThinLTOCodeGenerator(); CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags()); if (OptLevel.getNumOccurrences()) { if (OptLevel < '0' || OptLevel > '3') report_fatal_error("Optimization level must be between 0 and 3"); CodeGen->setOptLevel(OptLevel - '0'); switch (OptLevel) { case '0': CodeGen->setCodeGenOptLevel(CodeGenOpt::None); break; case '1': CodeGen->setCodeGenOptLevel(CodeGenOpt::Less); break; case '2': CodeGen->setCodeGenOptLevel(CodeGenOpt::Default); break; case '3': CodeGen->setCodeGenOptLevel(CodeGenOpt::Aggressive); break; } } return wrap(CodeGen); }
// Helper function to handle -of, -od, etc. static void initFromString(char*& dest, const cl::opt<std::string>& src) { dest = 0; if (src.getNumOccurrences() != 0) { if (src.empty()) error("Expected argument to '-%s'", src.ArgStr); dest = mem.strdup(src.c_str()); } }
RegBankSelect::RegBankSelect(Mode RunningMode) : MachineFunctionPass(ID), OptMode(RunningMode) { initializeRegBankSelectPass(*PassRegistry::getPassRegistry()); if (RegBankSelectMode.getNumOccurrences() != 0) { OptMode = RegBankSelectMode; if (RegBankSelectMode != RunningMode) LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n"); } }
TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options) : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(nullptr), MRI(nullptr), MII(nullptr), STI(nullptr), RequireStructuredCFG(false), Options(Options) { if (EnableIPRA.getNumOccurrences()) this->Options.EnableIPRA = EnableIPRA; }
void BasicTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: // According to the Intel 64 and IA-32 Architectures Optimization Reference // Manual, Intel Core models and later have a loop stream detector // (and associated uop queue) that can benefit from partial unrolling. // The relevant requirements are: // - The loop must have no more than 4 (8 for Nehalem and later) branches // taken, and none of them may be calls. // - The loop can have no more than 18 (28 for Nehalem and later) uops. // According to the Software Optimization Guide for AMD Family 15h Processors, // models 30h-4fh (Steamroller and later) have a loop predictor and loop // buffer which can benefit from partial unrolling. // The relevant requirements are: // - The loop must have fewer than 16 branches // - The loop must have less than 40 uops in all executed loop branches // The number of taken branches in a loop is hard to estimate here, and // benchmarking has revealed that it is better not to be conservative when // estimating the branch count. As a result, we'll ignore the branch limits // until someone finds a case where it matters in practice. unsigned MaxOps; const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(); if (PartialUnrollingThreshold.getNumOccurrences() > 0) MaxOps = PartialUnrollingThreshold; else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; else return; // Scan the loop: don't unroll loops with calls. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) if (isa<CallInst>(J) || isa<InvokeInst>(J)) { ImmutableCallSite CS(J); if (const Function *F = CS.getCalledFunction()) { if (!TopTTI->isLoweredToCall(F)) continue; } return; } } // Enable runtime and partial unrolling up to the specified size. UP.Partial = UP.Runtime = true; UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; }
static CodeGenOpt::Level GetCodeGenOptLevel() { if (CodeGenOptLevel.getNumOccurrences()) return static_cast<CodeGenOpt::Level>(unsigned(CodeGenOptLevel)); if (OptLevelO1) return CodeGenOpt::Less; if (OptLevelO2) return CodeGenOpt::Default; if (OptLevelO3) return CodeGenOpt::Aggressive; return CodeGenOpt::None; }
static void processViewOptions() { if (!EnableAllViews.getNumOccurrences() && !EnableAllStats.getNumOccurrences()) return; if (EnableAllViews.getNumOccurrences()) { processOptionImpl(PrintSummaryView, EnableAllViews); processOptionImpl(PrintResourcePressureView, EnableAllViews); processOptionImpl(PrintTimelineView, EnableAllViews); processOptionImpl(PrintInstructionInfoView, EnableAllViews); } const cl::opt<bool> &Default = EnableAllViews.getPosition() < EnableAllStats.getPosition() ? EnableAllStats : EnableAllViews; processOptionImpl(PrintRegisterFileStats, Default); processOptionImpl(PrintDispatchStats, Default); processOptionImpl(PrintSchedulerStats, Default); processOptionImpl(PrintRetireStats, Default); }
unsigned PPCTTIImpl::getCacheLineSize() { // Check first if the user specified a custom line size. if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; // On P7, P8 or P9 we have a cache line size of 128. unsigned Directive = ST->getDarwinDirective(); if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) return 128; // On other processors return a default of 64 bytes. return 64; }
R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { setRequiresStructuredCFG(true); // Override the default since calls aren't supported for r600. if (EnableFunctionCalls && EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0) EnableFunctionCalls = false; }
/// Returns true if we can profitably unroll the multi-exit loop L. Currently, /// we return true only if UnrollRuntimeMultiExit is set to true. static bool canProfitablyUnrollMultiExitLoop( Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder) { #if !defined(NDEBUG) SmallVector<BasicBlock *, 8> OtherExitsDummyCheck; assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, PreserveLCSSA, UseEpilogRemainder) && "Should be safe to unroll before checking profitability!"); #endif // Priority goes to UnrollRuntimeMultiExit if it's supplied. return UnrollRuntimeMultiExit.getNumOccurrences() ? UnrollRuntimeMultiExit : false; }
unsigned Inliner::getInlineThreshold(CallSite CS) const { int thres = InlineThreshold; // -inline-threshold or else selected by // overall opt level // If -inline-threshold is not given, listen to the optsize attribute when it // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && // FIXME: Use Function::optForSize(). Caller->hasFnAttribute(Attribute::OptimizeForSize); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; // Listen to the inlinehint attribute when it would increase the threshold // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && Callee->hasFnAttribute(Attribute::InlineHint); if (InlineHint && HintThreshold > thres && !Caller->hasFnAttribute(Attribute::MinSize)) thres = HintThreshold; // Listen to the cold attribute when it would decrease the threshold. bool ColdCallee = Callee && !Callee->isDeclaration() && Callee->hasFnAttribute(Attribute::Cold); // Command line argument for InlineLimit will override the default // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, // do not use the default cold threshold even if it is smaller. if ((InlineLimit.getNumOccurrences() == 0 || ColdThreshold.getNumOccurrences() > 0) && ColdCallee && ColdThreshold < thres) thres = ColdThreshold; return thres; }
int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm object size dumper\n"); ToolName = argv[0]; if (OutputFormatShort.getNumOccurrences()) OutputFormat = OutputFormatShort; if (RadixShort.getNumOccurrences()) Radix = RadixShort; for (unsigned i = 0; i < ArchFlags.size(); ++i) { if (ArchFlags[i] == "all") { ArchAll = true; } else { Triple T = MachOObjectFile::getArch(ArchFlags[i]); if (T.getArch() == Triple::UnknownArch) { outs() << ToolName << ": for the -arch option: Unknown architecture " << "named '" << ArchFlags[i] << "'"; return 1; } } } if (InputFilenames.size() == 0) InputFilenames.push_back("a.out"); moreThanOneFile = InputFilenames.size() > 1; std::for_each(InputFilenames.begin(), InputFilenames.end(), PrintFileSectionSizes); return 0; }
// Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP) { UP.PeelCount = 0; if (!canPeel(L)) return; // Only try to peel innermost loops. if (!L->empty()) return; // If the user provided a peel count, use that. bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; if (UserPeelCount) { DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); UP.PeelCount = UnrollForcePeelCount; return; } // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. // We only do this in the presence of profile information, since otherwise // our estimates of the trip count are not reliable enough. if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) { Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); if (!PeelCount) return; DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount << "\n"); if (*PeelCount) { if ((*PeelCount <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); UP.PeelCount = *PeelCount; return; } DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); } } return; }
/// This routine adds optimization passes based on selected optimization level, /// OptLevel. /// /// OptLevel - Optimization Level static void AddOptimizationPasses(legacy::PassManagerBase &MPM, legacy::FunctionPassManager &FPM, TargetMachine *TM, unsigned OptLevel, unsigned SizeLevel) { if (!NoVerify || VerifyEach) FPM.add(createVerifierPass()); // Verify that input is correct PassManagerBuilder Builder; Builder.OptLevel = OptLevel; Builder.SizeLevel = SizeLevel; if (DisableInline) { // No inlining pass } else if (OptLevel > 1) { Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel); } else { Builder.Inliner = createAlwaysInlinerLegacyPass(); } Builder.DisableUnitAtATime = !UnitAtATime; Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ? DisableLoopUnrolling : OptLevel == 0; // This is final, unless there is a #pragma vectorize enable if (DisableLoopVectorization) Builder.LoopVectorize = false; // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize) else if (!Builder.LoopVectorize) Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; // When #pragma vectorize is on for SLP, do the same as above Builder.SLPVectorize = DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2; // Add target-specific passes that need to run as early as possible. if (TM) Builder.addExtension( PassManagerBuilder::EP_EarlyAsPossible, [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { TM->addEarlyAsPossiblePasses(PM); }); if (Coroutines) addCoroutinePassesToExtensionPoints(Builder); Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); }
/// Returns true if we can profitably unroll the multi-exit loop L. Currently, /// we return true only if UnrollRuntimeMultiExit is set to true. static bool canProfitablyUnrollMultiExitLoop( Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder) { #if !defined(NDEBUG) SmallVector<BasicBlock *, 8> OtherExitsDummyCheck; assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, PreserveLCSSA, UseEpilogRemainder) && "Should be safe to unroll before checking profitability!"); #endif // Priority goes to UnrollRuntimeMultiExit if it's supplied. if (UnrollRuntimeMultiExit.getNumOccurrences()) return UnrollRuntimeMultiExit; // The main pain point with multi-exit loop unrolling is that once unrolled, // we will not be able to merge all blocks into a straight line code. // There are branches within the unrolled loop that go to the OtherExits. // The second point is the increase in code size, but this is true // irrespective of multiple exits. // Note: Both the heuristics below are coarse grained. We are essentially // enabling unrolling of loops that have a single side exit other than the // normal LatchExit (i.e. exiting into a deoptimize block). // The heuristics considered are: // 1. low number of branches in the unrolled version. // 2. high predictability of these extra branches. // We avoid unrolling loops that have more than two exiting blocks. This // limits the total number of branches in the unrolled loop to be atmost // the unroll factor (since one of the exiting blocks is the latch block). SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); if (ExitingBlocks.size() > 2) return false; // The second heuristic is that L has one exit other than the latchexit and // that exit is a deoptimize block. We know that deoptimize blocks are rarely // taken, which also implies the branch leading to the deoptimize block is // highly predictable. return (OtherExits.size() == 1 && OtherExits[0]->getTerminatingDeoptimizeCall()); // TODO: These can be fine-tuned further to consider code size or deopt states // that are captured by the deoptimize exit block. // Also, we can extend this to support more cases, if we actually // know of kinds of multiexit loops that would benefit from unrolling. }
unsigned Inliner::getInlineThreshold(CallSite CS) const { int thres = InlineThreshold; // Listen to optsize when -inline-limit is not given. Function *Caller = CS.getCaller(); if (Caller && !Caller->isDeclaration() && Caller->hasFnAttr(Attribute::OptimizeForSize) && InlineLimit.getNumOccurrences() == 0) thres = OptSizeThreshold; // Listen to inlinehint when it would increase the threshold. Function *Callee = CS.getCalledFunction(); if (HintThreshold > thres && Callee && !Callee->isDeclaration() && Callee->hasFnAttr(Attribute::InlineHint)) thres = HintThreshold; return thres; }
unsigned Inliner::getInlineThreshold(CallSite CS) const { int thres = InlineThreshold; // -inline-threshold or else selected by // overall opt level // If -inline-threshold is not given, listen to the optsize attribute when it // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && Caller->hasFnAttr(Attribute::OptimizeForSize); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; // Listen to the inlinehint attribute when it would increase the threshold. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && Callee->hasFnAttr(Attribute::InlineHint); if (InlineHint && HintThreshold > thres) thres = HintThreshold; return thres; }
int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(argv[0]); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm objcopy utility\n"); ToolName = argv[0]; if (InputFilename.empty()) { cl::PrintHelpMessage(); return 2; } auto Reader = CreateReader(); auto Obj = Reader->create(); StringRef Output = OutputFilename.getNumOccurrences() ? OutputFilename : InputFilename; auto Writer = CreateWriter(*Obj, Output); HandleArgs(*Obj, *Reader); Writer->finalize(); Writer->write(); }
static std::string getProgram(const char *name, const cl::opt<std::string> &opt, const char *envVar = 0) { std::string path; const char *prog = NULL; if (opt.getNumOccurrences() > 0 && opt.length() > 0 && (prog = opt.c_str())) path = findProgramByName(prog); if (path.empty() && envVar && (prog = getenv(envVar))) path = findProgramByName(prog); if (path.empty()) path = findProgramByName(name); if (path.empty()) { error(Loc(), "failed to locate %s", name); fatal(); } return path; }
sys::Path getGcc() { const char *cc = NULL; if (gcc.getNumOccurrences() > 0 && gcc.length() > 0) cc = gcc.c_str(); if (!cc) cc = getenv("CC"); if (!cc) cc = "gcc"; sys::Path path = sys::Program::FindProgramByName(cc); if (path.empty() && !cc) { if (cc) { path.set(cc); } else { error("failed to locate gcc"); fatal(); } } return path; }
sys::Path getProgram(const char *name, const cl::opt<std::string> &opt, const char *envVar = 0) { const char *prog = NULL; if (opt.getNumOccurrences() > 0 && opt.length() > 0) prog = gcc.c_str(); if (!prog && envVar) prog = getenv(envVar); if (!prog) prog = name; sys::Path path = sys::Program::FindProgramByName(prog); if (path.empty() && !prog) { if (prog) { path.set(prog); } else { error("failed to locate %s", name); fatal(); } } return path; }
//===----------------------------------------------------------------------===// // main for opt // int main(int argc, char **argv) { sys::PrintStackTraceOnErrorSignal(); llvm::PrettyStackTraceProgram X(argc, argv); // Enable debug stream buffering. EnableDebugBuffering = true; llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. LLVMContext &Context = getGlobalContext(); InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmPrinters(); // Initialize passes PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeCore(Registry); initializeScalarOpts(Registry); initializeObjCARCOpts(Registry); initializeVectorization(Registry); initializeIPO(Registry); initializeAnalysis(Registry); initializeIPA(Registry); initializeTransformUtils(Registry); initializeInstCombine(Registry); initializeInstrumentation(Registry); initializeTarget(Registry); // For codegen passes, only passes that do IR to IR transformation are // supported. initializeCodeGenPreparePass(Registry); initializeAtomicExpandPass(Registry); initializeRewriteSymbolsPass(Registry); #ifdef LINK_POLLY_INTO_TOOLS polly::initializePollyPasses(Registry); #endif cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .bc modular optimizer and analysis printer\n"); if (AnalyzeOnly && NoOutput) { errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n"; return 1; } SMDiagnostic Err; // Load the input module... std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context); if (!M) { Err.print(argv[0], errs()); return 1; } // If we are supposed to override the target triple, do so now. if (!TargetTriple.empty()) M->setTargetTriple(Triple::normalize(TargetTriple)); // Figure out what stream we are supposed to write to... std::unique_ptr<tool_output_file> Out; if (NoOutput) { if (!OutputFilename.empty()) errs() << "WARNING: The -o (output filename) option is ignored when\n" "the --disable-output option is used.\n"; } else { // Default to standard output. if (OutputFilename.empty()) OutputFilename = "-"; std::error_code EC; Out.reset(new tool_output_file(OutputFilename, EC, sys::fs::F_None)); if (EC) { errs() << EC.message() << '\n'; return 1; } } // If the output is set to be emitted to standard out, and standard out is a // console, print out a warning message and refuse to do it. We don't // impress anyone by spewing tons of binary goo to a terminal. if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly) if (CheckBitcodeOutputToConsole(Out->os(), !Quiet)) NoOutput = true; if (PassPipeline.getNumOccurrences() > 0) { OutputKind OK = OK_NoOutput; if (!NoOutput) OK = OutputAssembly ? OK_OutputAssembly : OK_OutputBitcode; VerifierKind VK = VK_VerifyInAndOut; if (NoVerify) VK = VK_NoVerifier; else if (VerifyEach) VK = VK_VerifyEachPass; // The user has asked to use the new pass manager and provided a pipeline // string. Hand off the rest of the functionality to the new code for that // layer. return runPassPipeline(argv[0], Context, *M, Out.get(), PassPipeline, OK, VK) ? 0 : 1; } // Create a PassManager to hold and optimize the collection of passes we are // about to build. // PassManager Passes; // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfo *TLI = new TargetLibraryInfo(Triple(M->getTargetTriple())); // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) TLI->disableAllFunctions(); Passes.add(TLI); // Add an appropriate DataLayout instance for this module. const DataLayout *DL = M->getDataLayout(); if (!DL && !DefaultDataLayout.empty()) { M->setDataLayout(DefaultDataLayout); DL = M->getDataLayout(); } if (DL) Passes.add(new DataLayoutPass()); Triple ModuleTriple(M->getTargetTriple()); TargetMachine *Machine = nullptr; if (ModuleTriple.getArch()) Machine = GetTargetMachine(Triple(ModuleTriple)); std::unique_ptr<TargetMachine> TM(Machine); // Add internal analysis passes from the target machine. if (TM) TM->addAnalysisPasses(Passes); std::unique_ptr<FunctionPassManager> FPasses; if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) { FPasses.reset(new FunctionPassManager(M.get())); if (DL) FPasses->add(new DataLayoutPass()); if (TM) TM->addAnalysisPasses(*FPasses); } if (PrintBreakpoints) { // Default to standard output. if (!Out) { if (OutputFilename.empty()) OutputFilename = "-"; std::error_code EC; Out = llvm::make_unique<tool_output_file>(OutputFilename, EC, sys::fs::F_None); if (EC) { errs() << EC.message() << '\n'; return 1; } } Passes.add(createBreakpointPrinter(Out->os())); NoOutput = true; } // If the -strip-debug command line option was specified, add it. if (StripDebug) addPass(Passes, createStripSymbolsPass(true)); // Create a new optimization pass for each one specified on the command line for (unsigned i = 0; i < PassList.size(); ++i) { if (StandardLinkOpts && StandardLinkOpts.getPosition() < PassList.getPosition(i)) { AddStandardLinkPasses(Passes); StandardLinkOpts = false; } if (OptLevelO1 && OptLevelO1.getPosition() < PassList.getPosition(i)) { AddOptimizationPasses(Passes, *FPasses, 1, 0); OptLevelO1 = false; } if (OptLevelO2 && OptLevelO2.getPosition() < PassList.getPosition(i)) { AddOptimizationPasses(Passes, *FPasses, 2, 0); OptLevelO2 = false; } if (OptLevelOs && OptLevelOs.getPosition() < PassList.getPosition(i)) { AddOptimizationPasses(Passes, *FPasses, 2, 1); OptLevelOs = false; } if (OptLevelOz && OptLevelOz.getPosition() < PassList.getPosition(i)) { AddOptimizationPasses(Passes, *FPasses, 2, 2); OptLevelOz = false; } if (OptLevelO3 && OptLevelO3.getPosition() < PassList.getPosition(i)) { AddOptimizationPasses(Passes, *FPasses, 3, 0); OptLevelO3 = false; } const PassInfo *PassInf = PassList[i]; Pass *P = nullptr; if (PassInf->getTargetMachineCtor()) P = PassInf->getTargetMachineCtor()(TM.get()); else if (PassInf->getNormalCtor()) P = PassInf->getNormalCtor()(); else errs() << argv[0] << ": cannot create pass: "******"\n"; if (P) { PassKind Kind = P->getPassKind(); addPass(Passes, P); if (AnalyzeOnly) { switch (Kind) { case PT_BasicBlock: Passes.add(createBasicBlockPassPrinter(PassInf, Out->os(), Quiet)); break; case PT_Region: Passes.add(createRegionPassPrinter(PassInf, Out->os(), Quiet)); break; case PT_Loop: Passes.add(createLoopPassPrinter(PassInf, Out->os(), Quiet)); break; case PT_Function: Passes.add(createFunctionPassPrinter(PassInf, Out->os(), Quiet)); break; case PT_CallGraphSCC: Passes.add(createCallGraphPassPrinter(PassInf, Out->os(), Quiet)); break; default: Passes.add(createModulePassPrinter(PassInf, Out->os(), Quiet)); break; } } } if (PrintEachXForm) Passes.add(createPrintModulePass(errs())); } if (StandardLinkOpts) { AddStandardLinkPasses(Passes); StandardLinkOpts = false; } if (OptLevelO1) AddOptimizationPasses(Passes, *FPasses, 1, 0); if (OptLevelO2) AddOptimizationPasses(Passes, *FPasses, 2, 0); if (OptLevelOs) AddOptimizationPasses(Passes, *FPasses, 2, 1); if (OptLevelOz) AddOptimizationPasses(Passes, *FPasses, 2, 2); if (OptLevelO3) AddOptimizationPasses(Passes, *FPasses, 3, 0); if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) { FPasses->doInitialization(); for (Function &F : *M) FPasses->run(F); FPasses->doFinalization(); } // Check that the module is well formed on completion of optimization if (!NoVerify && !VerifyEach) { Passes.add(createVerifierPass()); Passes.add(createDebugInfoVerifierPass()); } // Write bitcode or assembly to the output as the last step... if (!NoOutput && !AnalyzeOnly) { if (OutputAssembly) Passes.add(createPrintModulePass(Out->os())); else Passes.add(createBitcodeWriterPass(Out->os())); } // Before executing passes, print the final values of the LLVM options. cl::PrintOptionValues(); // Now that we have all of the passes ready, run them. Passes.run(*M); // Declare success. if (!NoOutput || PrintBreakpoints) Out->keep(); return 0; }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &MF) { const Function &F = MF.getFunction(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || TRI->requiresFrameIndexReplacementScavenging(MF); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallFrameInfo(MF); // Determine placement of CSR spill/restore code and prolog/epilog code: // place all spills in the entry block, all restores in return blocks. calculateSaveRestoreBlocks(MF); // Handle CSR spilling and restoring, for targets that need it. if (MF.getTarget().usesPhysRegsForPEI()) spillCalleeSavedRegs(MF); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TFI->processFunctionBeforeFrameFinalized(MF, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(MF); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F.hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(MF); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(MF); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(MF, *RS); // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(F, StackSize); F.getContext().diagnose(DiagStackSize); } ORE->emit([&]() { return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", MF.getFunction().getSubprogram(), &MF.front()) << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; }); delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); MFI.setSavePoint(nullptr); MFI.setRestorePoint(nullptr); return true; }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { if (!SpillCalleeSavedRegisters) { const TargetMachine &TM = Fn.getTarget(); if (!TM.usesPhysRegsForPEI()) { SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, unsigned &, unsigned &, const MBBVector &, const MBBVector &) {}; ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; } else { SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; UsesCalleeSaves = true; } } const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || TRI->requiresFrameIndexReplacementScavenging(Fn); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallFrameInfo(Fn); // Determine placement of CSR spill/restore code and prolog/epilog code: // place all spills in the entry block, all restores in return blocks. calculateSaveRestoreBlocks(Fn); // Handle CSR spilling and restoring, for targets that need it. SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, SaveBlocks, RestoreBlocks); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { ScavengeFrameVirtualRegs(Fn, *RS); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); } // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = Fn.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(*F, StackSize); F->getContext().diagnose(DiagStackSize); } delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); MFI.setSavePoint(nullptr); MFI.setRestorePoint(nullptr); return true; }
static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) { if (!O.getNumOccurrences() || O.getPosition() < Default.getPosition()) O = Default.getValue(); }