Beispiel #1
0
void PPCPassConfig::addIRPasses() {
  if (TM->getOptLevel() != CodeGenOpt::None)
    addPass(createPPCBoolRetToIntPass());
  addPass(createAtomicExpandPass(&getPPCTargetMachine()));

  // For the BG/Q (or if explicitly requested), add explicit data prefetch
  // intrinsics.
  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
                        getOptLevel() != CodeGenOpt::None;
  if (EnablePrefetch.getNumOccurrences() > 0)
    UsePrefetching = EnablePrefetch;
  if (UsePrefetching)
    addPass(createPPCLoopDataPrefetchPass());

  if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
    // Call SeparateConstOffsetFromGEP pass to extract constants within indices
    // and lower a GEP with multiple indices to either arithmetic operations or
    // multiple GEPs with single index.
    addPass(createSeparateConstOffsetFromGEPPass(TM, true));
    // Call EarlyCSE pass to find and remove subexpressions in the lowered
    // result.
    addPass(createEarlyCSEPass());
    // Do loop invariant code motion in case part of the lowered result is
    // invariant.
    addPass(createLICMPass());
  }

  TargetPassConfig::addIRPasses();
}
Beispiel #2
0
/// This routine adds optimization passes based on selected optimization level,
/// OptLevel.
///
/// OptLevel - Optimization Level
static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
                                  unsigned OptLevel, unsigned SizeLevel) {
  FPM.add(createVerifierPass());          // Verify that input is correct
  MPM.add(createDebugInfoVerifierPass()); // Verify that debug info is correct

  PassManagerBuilder Builder;
  Builder.OptLevel = OptLevel;
  Builder.SizeLevel = SizeLevel;

  if (DisableInline) {
    // No inlining pass
  } else if (OptLevel > 1) {
    Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel);
  } else {
    Builder.Inliner = createAlwaysInlinerPass();
  }
  Builder.DisableUnitAtATime = !UnitAtATime;
  Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
                               DisableLoopUnrolling : OptLevel == 0;

  // This is final, unless there is a #pragma vectorize enable
  if (DisableLoopVectorization)
    Builder.LoopVectorize = false;
  // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize)
  else if (!Builder.LoopVectorize)
    Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;

  // When #pragma vectorize is on for SLP, do the same as above
  Builder.SLPVectorize =
      DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2;

  Builder.populateFunctionPassManager(FPM);
  Builder.populateModulePassManager(MPM);
}
Beispiel #3
0
int main(int argc, char **argv) {
  llvm::cl::ParseCommandLineOptions(argc, argv);
  if (Input.getNumOccurrences()) {
    OwningPtr<MemoryBuffer> Buf;
    if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
      return 1;

    llvm::SourceMgr sm;
    if (DumpTokens) {
      yaml::dumpTokens(Buf->getBuffer(), outs());
    }

    if (DumpCanonical) {
      yaml::Stream stream(Buf->getBuffer(), sm);
      dumpStream(stream);
    }
  }

  if (Verify) {
    llvm::TimerGroup Group("YAML parser benchmark");
    benchmark(Group, "Fast", createJSONText(10, 500));
  } else if (!DumpCanonical && !DumpTokens) {
    llvm::TimerGroup Group("YAML parser benchmark");
    benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5));
    benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500));
    benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000));
  }

  return 0;
}
Beispiel #4
0
/// AddOptimizationPasses - This routine adds optimization passes
/// based on selected optimization level, OptLevel. This routine
/// duplicates llvm-gcc behaviour.
///
/// OptLevel - Optimization Level
static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
                                  unsigned OptLevel, unsigned SizeLevel) {
  FPM.add(createVerifierPass());                  // Verify that input is correct

  PassManagerBuilder Builder;
  Builder.OptLevel = OptLevel;
  Builder.SizeLevel = SizeLevel;

  if (DisableInline) {
    // No inlining pass
  } else if (OptLevel > 1) {
    unsigned Threshold = 225;
    if (SizeLevel == 1)      // -Os
      Threshold = 75;
    else if (SizeLevel == 2) // -Oz
      Threshold = 25;
    if (OptLevel > 2)
      Threshold = 275;
    Builder.Inliner = createFunctionInliningPass(Threshold);
  } else {
    Builder.Inliner = createAlwaysInlinerPass();
  }
  Builder.DisableUnitAtATime = !UnitAtATime;
  Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
                               DisableLoopUnrolling : OptLevel == 0;

  Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
  Builder.SLPVectorize = true;

  Builder.populateFunctionPassManager(FPM);
  Builder.populateModulePassManager(MPM);
}
Beispiel #5
0
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold; // -inline-threshold or else selected by
                               // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
    Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
      OptSizeThreshold < thres)
    thres = OptSizeThreshold;

  // Listen to the inlinehint attribute when it would increase the threshold
  // and the caller does not need to minimize its size.
  Function *Callee = CS.getCalledFunction();
  bool InlineHint = Callee && !Callee->isDeclaration() &&
    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::InlineHint);
  if (InlineHint && HintThreshold > thres
      && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                               Attribute::MinSize))
    thres = HintThreshold;

  // Listen to the cold attribute when it would decrease the threshold.
  bool ColdCallee = Callee && !Callee->isDeclaration() &&
    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::Cold);
  if (ColdCallee && ColdThreshold < thres)
    thres = ColdThreshold;

  return thres;
}
Beispiel #6
0
Datei: lto.cpp Projekt: efcs/llvm
thinlto_code_gen_t thinlto_create_codegen(void) {
  lto_initialize();
  ThinLTOCodeGenerator *CodeGen = new ThinLTOCodeGenerator();
  CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags());

  if (OptLevel.getNumOccurrences()) {
    if (OptLevel < '0' || OptLevel > '3')
      report_fatal_error("Optimization level must be between 0 and 3");
    CodeGen->setOptLevel(OptLevel - '0');
    switch (OptLevel) {
    case '0':
      CodeGen->setCodeGenOptLevel(CodeGenOpt::None);
      break;
    case '1':
      CodeGen->setCodeGenOptLevel(CodeGenOpt::Less);
      break;
    case '2':
      CodeGen->setCodeGenOptLevel(CodeGenOpt::Default);
      break;
    case '3':
      CodeGen->setCodeGenOptLevel(CodeGenOpt::Aggressive);
      break;
    }
  }
  return wrap(CodeGen);
}
Beispiel #7
0
// Helper function to handle -of, -od, etc.
static void initFromString(char*& dest, const cl::opt<std::string>& src) {
    dest = 0;
    if (src.getNumOccurrences() != 0) {
        if (src.empty())
            error("Expected argument to '-%s'", src.ArgStr);
        dest = mem.strdup(src.c_str());
    }
}
Beispiel #8
0
RegBankSelect::RegBankSelect(Mode RunningMode)
    : MachineFunctionPass(ID), OptMode(RunningMode) {
  initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
  if (RegBankSelectMode.getNumOccurrences() != 0) {
    OptMode = RegBankSelectMode;
    if (RegBankSelectMode != RunningMode)
      LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
  }
}
TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
                             const Triple &TT, StringRef CPU, StringRef FS,
                             const TargetOptions &Options)
    : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
      TargetFS(FS), AsmInfo(nullptr), MRI(nullptr), MII(nullptr), STI(nullptr),
      RequireStructuredCFG(false), Options(Options) {
  if (EnableIPRA.getNumOccurrences())
    this->Options.EnableIPRA = EnableIPRA;
}
void BasicTTI::getUnrollingPreferences(Loop *L,
                                       UnrollingPreferences &UP) const {
  // This unrolling functionality is target independent, but to provide some
  // motivation for its intended use, for x86:

  // According to the Intel 64 and IA-32 Architectures Optimization Reference
  // Manual, Intel Core models and later have a loop stream detector
  // (and associated uop queue) that can benefit from partial unrolling.
  // The relevant requirements are:
  //  - The loop must have no more than 4 (8 for Nehalem and later) branches
  //    taken, and none of them may be calls.
  //  - The loop can have no more than 18 (28 for Nehalem and later) uops.

  // According to the Software Optimization Guide for AMD Family 15h Processors,
  // models 30h-4fh (Steamroller and later) have a loop predictor and loop
  // buffer which can benefit from partial unrolling.
  // The relevant requirements are:
  //  - The loop must have fewer than 16 branches
  //  - The loop must have less than 40 uops in all executed loop branches

  // The number of taken branches in a loop is hard to estimate here, and
  // benchmarking has revealed that it is better not to be conservative when
  // estimating the branch count. As a result, we'll ignore the branch limits
  // until someone finds a case where it matters in practice.

  unsigned MaxOps;
  const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>();
  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
    MaxOps = PartialUnrollingThreshold;
  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
    MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
  else
    return;

  // Scan the loop: don't unroll loops with calls.
  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
       I != E; ++I) {
    BasicBlock *BB = *I;

    for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
      if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
        ImmutableCallSite CS(J);
        if (const Function *F = CS.getCalledFunction()) {
          if (!TopTTI->isLoweredToCall(F))
            continue;
        }

        return;
      }
  }

  // Enable runtime and partial unrolling up to the specified size.
  UP.Partial = UP.Runtime = true;
  UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
}
Beispiel #11
0
static CodeGenOpt::Level GetCodeGenOptLevel() {
  if (CodeGenOptLevel.getNumOccurrences())
    return static_cast<CodeGenOpt::Level>(unsigned(CodeGenOptLevel));
  if (OptLevelO1)
    return CodeGenOpt::Less;
  if (OptLevelO2)
    return CodeGenOpt::Default;
  if (OptLevelO3)
    return CodeGenOpt::Aggressive;
  return CodeGenOpt::None;
}
Beispiel #12
0
static void processViewOptions() {
  if (!EnableAllViews.getNumOccurrences() &&
      !EnableAllStats.getNumOccurrences())
    return;

  if (EnableAllViews.getNumOccurrences()) {
    processOptionImpl(PrintSummaryView, EnableAllViews);
    processOptionImpl(PrintResourcePressureView, EnableAllViews);
    processOptionImpl(PrintTimelineView, EnableAllViews);
    processOptionImpl(PrintInstructionInfoView, EnableAllViews);
  }

  const cl::opt<bool> &Default =
      EnableAllViews.getPosition() < EnableAllStats.getPosition()
          ? EnableAllStats
          : EnableAllViews;
  processOptionImpl(PrintRegisterFileStats, Default);
  processOptionImpl(PrintDispatchStats, Default);
  processOptionImpl(PrintSchedulerStats, Default);
  processOptionImpl(PrintRetireStats, Default);
}
unsigned PPCTTIImpl::getCacheLineSize() {
  // Check first if the user specified a custom line size.
  if (CacheLineSize.getNumOccurrences() > 0)
    return CacheLineSize;

  // On P7, P8 or P9 we have a cache line size of 128.
  unsigned Directive = ST->getDarwinDirective();
  if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
      Directive == PPC::DIR_PWR9)
    return 128;

  // On other processors return a default of 64 bytes.
  return 64;
}
Beispiel #14
0
R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
                                     StringRef CPU, StringRef FS,
                                     TargetOptions Options,
                                     Optional<Reloc::Model> RM,
                                     Optional<CodeModel::Model> CM,
                                     CodeGenOpt::Level OL, bool JIT)
    : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
  setRequiresStructuredCFG(true);

  // Override the default since calls aren't supported for r600.
  if (EnableFunctionCalls &&
      EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
    EnableFunctionCalls = false;
}
Beispiel #15
0
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
/// we return true only if UnrollRuntimeMultiExit is set to true.
static bool canProfitablyUnrollMultiExitLoop(
    Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
    bool PreserveLCSSA, bool UseEpilogRemainder) {

#if !defined(NDEBUG)
  SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
  assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
                                      PreserveLCSSA, UseEpilogRemainder) &&
         "Should be safe to unroll before checking profitability!");
#endif
  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
  return UnrollRuntimeMultiExit.getNumOccurrences() ? UnrollRuntimeMultiExit
                                                    : false;
}
Beispiel #16
0
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold; // -inline-threshold or else selected by
                               // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
                 // FIXME: Use Function::optForSize().
                 Caller->hasFnAttribute(Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
      OptSizeThreshold < thres)
    thres = OptSizeThreshold;

  // Listen to the inlinehint attribute when it would increase the threshold
  // and the caller does not need to minimize its size.
  Function *Callee = CS.getCalledFunction();
  bool InlineHint = Callee && !Callee->isDeclaration() &&
                    Callee->hasFnAttribute(Attribute::InlineHint);
  if (InlineHint && HintThreshold > thres &&
      !Caller->hasFnAttribute(Attribute::MinSize))
    thres = HintThreshold;

  // Listen to the cold attribute when it would decrease the threshold.
  bool ColdCallee = Callee && !Callee->isDeclaration() &&
                    Callee->hasFnAttribute(Attribute::Cold);
  // Command line argument for InlineLimit will override the default
  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
  // do not use the default cold threshold even if it is smaller.
  if ((InlineLimit.getNumOccurrences() == 0 ||
       ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
      ColdThreshold < thres)
    thres = ColdThreshold;

  return thres;
}
Beispiel #17
0
int main(int argc, char **argv) {
  // Print a stack trace if we signal out.
  sys::PrintStackTraceOnErrorSignal();
  PrettyStackTraceProgram X(argc, argv);

  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
  cl::ParseCommandLineOptions(argc, argv, "llvm object size dumper\n");

  ToolName = argv[0];
  if (OutputFormatShort.getNumOccurrences())
    OutputFormat = OutputFormatShort;
  if (RadixShort.getNumOccurrences())
    Radix = RadixShort;

  for (unsigned i = 0; i < ArchFlags.size(); ++i) {
    if (ArchFlags[i] == "all") {
      ArchAll = true;
    } else {
      Triple T = MachOObjectFile::getArch(ArchFlags[i]);
      if (T.getArch() == Triple::UnknownArch) {
        outs() << ToolName << ": for the -arch option: Unknown architecture "
               << "named '" << ArchFlags[i] << "'";
        return 1;
      }
    }
  }

  if (InputFilenames.size() == 0)
    InputFilenames.push_back("a.out");

  moreThanOneFile = InputFilenames.size() > 1;
  std::for_each(InputFilenames.begin(), InputFilenames.end(),
                PrintFileSectionSizes);

  return 0;
}
Beispiel #18
0
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
                            TargetTransformInfo::UnrollingPreferences &UP) {
  UP.PeelCount = 0;
  if (!canPeel(L))
    return;

  // Only try to peel innermost loops.
  if (!L->empty())
    return;

  // If the user provided a peel count, use that.
  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
  if (UserPeelCount) {
    DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
                 << " iterations.\n");
    UP.PeelCount = UnrollForcePeelCount;
    return;
  }

  // If we don't know the trip count, but have reason to believe the average
  // trip count is low, peeling should be beneficial, since we will usually
  // hit the peeled section.
  // We only do this in the presence of profile information, since otherwise
  // our estimates of the trip count are not reliable enough.
  if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) {
    Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
    if (!PeelCount)
      return;

    DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
                 << "\n");

    if (*PeelCount) {
      if ((*PeelCount <= UnrollPeelMaxCount) &&
          (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
        DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n");
        UP.PeelCount = *PeelCount;
        return;
      }
      DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
      DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
      DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n");
      DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
    }
  }

  return;
}
Beispiel #19
0
/// This routine adds optimization passes based on selected optimization level,
/// OptLevel.
///
/// OptLevel - Optimization Level
static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
                                  legacy::FunctionPassManager &FPM,
                                  TargetMachine *TM, unsigned OptLevel,
                                  unsigned SizeLevel) {
  if (!NoVerify || VerifyEach)
    FPM.add(createVerifierPass()); // Verify that input is correct

  PassManagerBuilder Builder;
  Builder.OptLevel = OptLevel;
  Builder.SizeLevel = SizeLevel;

  if (DisableInline) {
    // No inlining pass
  } else if (OptLevel > 1) {
    Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel);
  } else {
    Builder.Inliner = createAlwaysInlinerLegacyPass();
  }
  Builder.DisableUnitAtATime = !UnitAtATime;
  Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
                               DisableLoopUnrolling : OptLevel == 0;

  // This is final, unless there is a #pragma vectorize enable
  if (DisableLoopVectorization)
    Builder.LoopVectorize = false;
  // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize)
  else if (!Builder.LoopVectorize)
    Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;

  // When #pragma vectorize is on for SLP, do the same as above
  Builder.SLPVectorize =
      DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2;

  // Add target-specific passes that need to run as early as possible.
  if (TM)
    Builder.addExtension(
        PassManagerBuilder::EP_EarlyAsPossible,
        [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
          TM->addEarlyAsPossiblePasses(PM);
        });

  if (Coroutines)
    addCoroutinePassesToExtensionPoints(Builder);

  Builder.populateFunctionPassManager(FPM);
  Builder.populateModulePassManager(MPM);
}
Beispiel #20
0
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
/// we return true only if UnrollRuntimeMultiExit is set to true.
static bool canProfitablyUnrollMultiExitLoop(
    Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
    bool PreserveLCSSA, bool UseEpilogRemainder) {

#if !defined(NDEBUG)
  SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
  assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
                                      PreserveLCSSA, UseEpilogRemainder) &&
         "Should be safe to unroll before checking profitability!");
#endif

  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
  if (UnrollRuntimeMultiExit.getNumOccurrences())
    return UnrollRuntimeMultiExit;

  // The main pain point with multi-exit loop unrolling is that once unrolled,
  // we will not be able to merge all blocks into a straight line code.
  // There are branches within the unrolled loop that go to the OtherExits.
  // The second point is the increase in code size, but this is true
  // irrespective of multiple exits.

  // Note: Both the heuristics below are coarse grained. We are essentially
  // enabling unrolling of loops that have a single side exit other than the
  // normal LatchExit (i.e. exiting into a deoptimize block).
  // The heuristics considered are:
  // 1. low number of branches in the unrolled version.
  // 2. high predictability of these extra branches.
  // We avoid unrolling loops that have more than two exiting blocks. This
  // limits the total number of branches in the unrolled loop to be atmost
  // the unroll factor (since one of the exiting blocks is the latch block).
  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);
  if (ExitingBlocks.size() > 2)
    return false;

  // The second heuristic is that L has one exit other than the latchexit and
  // that exit is a deoptimize block. We know that deoptimize blocks are rarely
  // taken, which also implies the branch leading to the deoptimize block is
  // highly predictable.
  return (OtherExits.size() == 1 &&
          OtherExits[0]->getTerminatingDeoptimizeCall());
  // TODO: These can be fine-tuned further to consider code size or deopt states
  // that are captured by the deoptimize exit block.
  // Also, we can extend this to support more cases, if we actually
  // know of kinds of multiexit loops that would benefit from unrolling.
}
Beispiel #21
0
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold;

  // Listen to optsize when -inline-limit is not given.
  Function *Caller = CS.getCaller();
  if (Caller && !Caller->isDeclaration() &&
      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
      InlineLimit.getNumOccurrences() == 0)
    thres = OptSizeThreshold;

  // Listen to inlinehint when it would increase the threshold.
  Function *Callee = CS.getCalledFunction();
  if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
      Callee->hasFnAttr(Attribute::InlineHint))
    thres = HintThreshold;

  return thres;
}
Beispiel #22
0
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold; // -inline-threshold or else selected by
                               // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
    Caller->hasFnAttr(Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres)
    thres = OptSizeThreshold;

  // Listen to the inlinehint attribute when it would increase the threshold.
  Function *Callee = CS.getCalledFunction();
  bool InlineHint = Callee && !Callee->isDeclaration() &&
    Callee->hasFnAttr(Attribute::InlineHint);
  if (InlineHint && HintThreshold > thres)
    thres = HintThreshold;

  return thres;
}
Beispiel #23
0
int main(int argc, char **argv) {
  // Print a stack trace if we signal out.
  sys::PrintStackTraceOnErrorSignal(argv[0]);
  PrettyStackTraceProgram X(argc, argv);
  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
  cl::ParseCommandLineOptions(argc, argv, "llvm objcopy utility\n");
  ToolName = argv[0];
  if (InputFilename.empty()) {
    cl::PrintHelpMessage();
    return 2;
  }

  auto Reader = CreateReader();
  auto Obj = Reader->create();
  StringRef Output =
      OutputFilename.getNumOccurrences() ? OutputFilename : InputFilename;
  auto Writer = CreateWriter(*Obj, Output);
  HandleArgs(*Obj, *Reader);
  Writer->finalize();
  Writer->write();
}
Beispiel #24
0
static std::string getProgram(const char *name, const cl::opt<std::string> &opt, const char *envVar = 0)
{
    std::string path;
    const char *prog = NULL;

    if (opt.getNumOccurrences() > 0 && opt.length() > 0 && (prog = opt.c_str()))
        path = findProgramByName(prog);

    if (path.empty() && envVar && (prog = getenv(envVar)))
        path = findProgramByName(prog);

    if (path.empty())
        path = findProgramByName(name);

    if (path.empty()) {
        error(Loc(), "failed to locate %s", name);
        fatal();
    }

    return path;
}
Beispiel #25
0
sys::Path getGcc() {
    const char *cc = NULL;
    
    if (gcc.getNumOccurrences() > 0 && gcc.length() > 0)
        cc = gcc.c_str();
    
    if (!cc)
        cc = getenv("CC");
    if (!cc)
        cc = "gcc";
    
    sys::Path path = sys::Program::FindProgramByName(cc);
    if (path.empty() && !cc) {
        if (cc) {
            path.set(cc);
        } else {
            error("failed to locate gcc");
            fatal();
        }
    }
    
    return path;
}
Beispiel #26
0
sys::Path getProgram(const char *name, const cl::opt<std::string> &opt, const char *envVar = 0)
{
    const char *prog = NULL;

    if (opt.getNumOccurrences() > 0 && opt.length() > 0)
        prog = gcc.c_str();

    if (!prog && envVar)
        prog = getenv(envVar);
    if (!prog)
        prog = name;

    sys::Path path = sys::Program::FindProgramByName(prog);
    if (path.empty() && !prog) {
        if (prog) {
            path.set(prog);
        } else {
            error("failed to locate %s", name);
            fatal();
        }
    }

    return path;
}
//===----------------------------------------------------------------------===//
// main for opt
//
int main(int argc, char **argv) {
  sys::PrintStackTraceOnErrorSignal();
  llvm::PrettyStackTraceProgram X(argc, argv);

  // Enable debug stream buffering.
  EnableDebugBuffering = true;

  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
  LLVMContext &Context = getGlobalContext();

  InitializeAllTargets();
  InitializeAllTargetMCs();
  InitializeAllAsmPrinters();

  // Initialize passes
  PassRegistry &Registry = *PassRegistry::getPassRegistry();
  initializeCore(Registry);
  initializeScalarOpts(Registry);
  initializeObjCARCOpts(Registry);
  initializeVectorization(Registry);
  initializeIPO(Registry);
  initializeAnalysis(Registry);
  initializeIPA(Registry);
  initializeTransformUtils(Registry);
  initializeInstCombine(Registry);
  initializeInstrumentation(Registry);
  initializeTarget(Registry);
  // For codegen passes, only passes that do IR to IR transformation are
  // supported.
  initializeCodeGenPreparePass(Registry);
  initializeAtomicExpandPass(Registry);
  initializeRewriteSymbolsPass(Registry);

#ifdef LINK_POLLY_INTO_TOOLS
  polly::initializePollyPasses(Registry);
#endif

  cl::ParseCommandLineOptions(argc, argv,
    "llvm .bc -> .bc modular optimizer and analysis printer\n");

  if (AnalyzeOnly && NoOutput) {
    errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
    return 1;
  }

  SMDiagnostic Err;

  // Load the input module...
  std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);

  if (!M) {
    Err.print(argv[0], errs());
    return 1;
  }

  // If we are supposed to override the target triple, do so now.
  if (!TargetTriple.empty())
    M->setTargetTriple(Triple::normalize(TargetTriple));

  // Figure out what stream we are supposed to write to...
  std::unique_ptr<tool_output_file> Out;
  if (NoOutput) {
    if (!OutputFilename.empty())
      errs() << "WARNING: The -o (output filename) option is ignored when\n"
                "the --disable-output option is used.\n";
  } else {
    // Default to standard output.
    if (OutputFilename.empty())
      OutputFilename = "-";

    std::error_code EC;
    Out.reset(new tool_output_file(OutputFilename, EC, sys::fs::F_None));
    if (EC) {
      errs() << EC.message() << '\n';
      return 1;
    }
  }

  // If the output is set to be emitted to standard out, and standard out is a
  // console, print out a warning message and refuse to do it.  We don't
  // impress anyone by spewing tons of binary goo to a terminal.
  if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly)
    if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
      NoOutput = true;

  if (PassPipeline.getNumOccurrences() > 0) {
    OutputKind OK = OK_NoOutput;
    if (!NoOutput)
      OK = OutputAssembly ? OK_OutputAssembly : OK_OutputBitcode;

    VerifierKind VK = VK_VerifyInAndOut;
    if (NoVerify)
      VK = VK_NoVerifier;
    else if (VerifyEach)
      VK = VK_VerifyEachPass;

    // The user has asked to use the new pass manager and provided a pipeline
    // string. Hand off the rest of the functionality to the new code for that
    // layer.
    return runPassPipeline(argv[0], Context, *M, Out.get(), PassPipeline,
                           OK, VK)
               ? 0
               : 1;
  }

  // Create a PassManager to hold and optimize the collection of passes we are
  // about to build.
  //
  PassManager Passes;

  // Add an appropriate TargetLibraryInfo pass for the module's triple.
  TargetLibraryInfo *TLI = new TargetLibraryInfo(Triple(M->getTargetTriple()));

  // The -disable-simplify-libcalls flag actually disables all builtin optzns.
  if (DisableSimplifyLibCalls)
    TLI->disableAllFunctions();
  Passes.add(TLI);

  // Add an appropriate DataLayout instance for this module.
  const DataLayout *DL = M->getDataLayout();
  if (!DL && !DefaultDataLayout.empty()) {
    M->setDataLayout(DefaultDataLayout);
    DL = M->getDataLayout();
  }

  if (DL)
    Passes.add(new DataLayoutPass());

  Triple ModuleTriple(M->getTargetTriple());
  TargetMachine *Machine = nullptr;
  if (ModuleTriple.getArch())
    Machine = GetTargetMachine(Triple(ModuleTriple));
  std::unique_ptr<TargetMachine> TM(Machine);

  // Add internal analysis passes from the target machine.
  if (TM)
    TM->addAnalysisPasses(Passes);

  std::unique_ptr<FunctionPassManager> FPasses;
  if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {
    FPasses.reset(new FunctionPassManager(M.get()));
    if (DL)
      FPasses->add(new DataLayoutPass());
    if (TM)
      TM->addAnalysisPasses(*FPasses);

  }

  if (PrintBreakpoints) {
    // Default to standard output.
    if (!Out) {
      if (OutputFilename.empty())
        OutputFilename = "-";

      std::error_code EC;
      Out = llvm::make_unique<tool_output_file>(OutputFilename, EC,
                                                sys::fs::F_None);
      if (EC) {
        errs() << EC.message() << '\n';
        return 1;
      }
    }
    Passes.add(createBreakpointPrinter(Out->os()));
    NoOutput = true;
  }

  // If the -strip-debug command line option was specified, add it.
  if (StripDebug)
    addPass(Passes, createStripSymbolsPass(true));

  // Create a new optimization pass for each one specified on the command line
  for (unsigned i = 0; i < PassList.size(); ++i) {
    if (StandardLinkOpts &&
        StandardLinkOpts.getPosition() < PassList.getPosition(i)) {
      AddStandardLinkPasses(Passes);
      StandardLinkOpts = false;
    }

    if (OptLevelO1 && OptLevelO1.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, 1, 0);
      OptLevelO1 = false;
    }

    if (OptLevelO2 && OptLevelO2.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, 2, 0);
      OptLevelO2 = false;
    }

    if (OptLevelOs && OptLevelOs.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, 2, 1);
      OptLevelOs = false;
    }

    if (OptLevelOz && OptLevelOz.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, 2, 2);
      OptLevelOz = false;
    }

    if (OptLevelO3 && OptLevelO3.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, 3, 0);
      OptLevelO3 = false;
    }

    const PassInfo *PassInf = PassList[i];
    Pass *P = nullptr;
    if (PassInf->getTargetMachineCtor())
      P = PassInf->getTargetMachineCtor()(TM.get());
    else if (PassInf->getNormalCtor())
      P = PassInf->getNormalCtor()();
    else
      errs() << argv[0] << ": cannot create pass: "******"\n";
    if (P) {
      PassKind Kind = P->getPassKind();
      addPass(Passes, P);

      if (AnalyzeOnly) {
        switch (Kind) {
        case PT_BasicBlock:
          Passes.add(createBasicBlockPassPrinter(PassInf, Out->os(), Quiet));
          break;
        case PT_Region:
          Passes.add(createRegionPassPrinter(PassInf, Out->os(), Quiet));
          break;
        case PT_Loop:
          Passes.add(createLoopPassPrinter(PassInf, Out->os(), Quiet));
          break;
        case PT_Function:
          Passes.add(createFunctionPassPrinter(PassInf, Out->os(), Quiet));
          break;
        case PT_CallGraphSCC:
          Passes.add(createCallGraphPassPrinter(PassInf, Out->os(), Quiet));
          break;
        default:
          Passes.add(createModulePassPrinter(PassInf, Out->os(), Quiet));
          break;
        }
      }
    }

    if (PrintEachXForm)
      Passes.add(createPrintModulePass(errs()));
  }

  if (StandardLinkOpts) {
    AddStandardLinkPasses(Passes);
    StandardLinkOpts = false;
  }

  if (OptLevelO1)
    AddOptimizationPasses(Passes, *FPasses, 1, 0);

  if (OptLevelO2)
    AddOptimizationPasses(Passes, *FPasses, 2, 0);

  if (OptLevelOs)
    AddOptimizationPasses(Passes, *FPasses, 2, 1);

  if (OptLevelOz)
    AddOptimizationPasses(Passes, *FPasses, 2, 2);

  if (OptLevelO3)
    AddOptimizationPasses(Passes, *FPasses, 3, 0);

  if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {
    FPasses->doInitialization();
    for (Function &F : *M)
      FPasses->run(F);
    FPasses->doFinalization();
  }

  // Check that the module is well formed on completion of optimization
  if (!NoVerify && !VerifyEach) {
    Passes.add(createVerifierPass());
    Passes.add(createDebugInfoVerifierPass());
  }

  // Write bitcode or assembly to the output as the last step...
  if (!NoOutput && !AnalyzeOnly) {
    if (OutputAssembly)
      Passes.add(createPrintModulePass(Out->os()));
    else
      Passes.add(createBitcodeWriterPass(Out->os()));
  }

  // Before executing passes, print the final values of the LLVM options.
  cl::PrintOptionValues();

  // Now that we have all of the passes ready, run them.
  Passes.run(*M);

  // Declare success.
  if (!NoOutput || PrintBreakpoints)
    Out->keep();

  return 0;
}
Beispiel #28
0
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
bool PEI::runOnMachineFunction(MachineFunction &MF) {
  const Function &F = MF.getFunction();
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();

  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
  FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
    TRI->requiresFrameIndexReplacementScavenging(MF);
  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();

  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
  // function's frame information. Also eliminates call frame pseudo
  // instructions.
  calculateCallFrameInfo(MF);

  // Determine placement of CSR spill/restore code and prolog/epilog code:
  // place all spills in the entry block, all restores in return blocks.
  calculateSaveRestoreBlocks(MF);

  // Handle CSR spilling and restoring, for targets that need it.
  if (MF.getTarget().usesPhysRegsForPEI())
    spillCalleeSavedRegs(MF);

  // Allow the target machine to make final modifications to the function
  // before the frame layout is finalized.
  TFI->processFunctionBeforeFrameFinalized(MF, RS);

  // Calculate actual frame offsets for all abstract stack objects...
  calculateFrameObjectOffsets(MF);

  // Add prolog and epilog code to the function.  This function is required
  // to align the stack frame as necessary for any stack variables or
  // called functions.  Because of this, calculateCalleeSavedRegisters()
  // must be called before this function in order to set the AdjustsStack
  // and MaxCallFrameSize variables.
  if (!F.hasFnAttribute(Attribute::Naked))
    insertPrologEpilogCode(MF);

  // Replace all MO_FrameIndex operands with physical register references
  // and actual offsets.
  //
  replaceFrameIndices(MF);

  // If register scavenging is needed, as we've enabled doing it as a
  // post-pass, scavenge the virtual registers that frame index elimination
  // inserted.
  if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging)
    scavengeFrameVirtualRegs(MF, *RS);

  // Warn on stack size when we exceeds the given limit.
  MachineFrameInfo &MFI = MF.getFrameInfo();
  uint64_t StackSize = MFI.getStackSize();
  if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
    DiagnosticInfoStackSize DiagStackSize(F, StackSize);
    F.getContext().diagnose(DiagStackSize);
  }
  ORE->emit([&]() {
    return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
                                             MF.getFunction().getSubprogram(),
                                             &MF.front())
           << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
  });

  delete RS;
  SaveBlocks.clear();
  RestoreBlocks.clear();
  MFI.setSavePoint(nullptr);
  MFI.setRestorePoint(nullptr);
  return true;
}
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
  if (!SpillCalleeSavedRegisters) {
    const TargetMachine &TM = Fn.getTarget();
    if (!TM.usesPhysRegsForPEI()) {
      SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
                                     unsigned &, unsigned &, const MBBVector &,
                                     const MBBVector &) {};
      ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
    } else {
      SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
      ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
      UsesCalleeSaves = true;
    }
  }

  const Function* F = Fn.getFunction();
  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();

  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
  FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
    TRI->requiresFrameIndexReplacementScavenging(Fn);
  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();

  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
  // function's frame information. Also eliminates call frame pseudo
  // instructions.
  calculateCallFrameInfo(Fn);

  // Determine placement of CSR spill/restore code and prolog/epilog code:
  // place all spills in the entry block, all restores in return blocks.
  calculateSaveRestoreBlocks(Fn);

  // Handle CSR spilling and restoring, for targets that need it.
  SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex,
                            SaveBlocks, RestoreBlocks);

  // Allow the target machine to make final modifications to the function
  // before the frame layout is finalized.
  TFI->processFunctionBeforeFrameFinalized(Fn, RS);

  // Calculate actual frame offsets for all abstract stack objects...
  calculateFrameObjectOffsets(Fn);

  // Add prolog and epilog code to the function.  This function is required
  // to align the stack frame as necessary for any stack variables or
  // called functions.  Because of this, calculateCalleeSavedRegisters()
  // must be called before this function in order to set the AdjustsStack
  // and MaxCallFrameSize variables.
  if (!F->hasFnAttribute(Attribute::Naked))
    insertPrologEpilogCode(Fn);

  // Replace all MO_FrameIndex operands with physical register references
  // and actual offsets.
  //
  replaceFrameIndices(Fn);

  // If register scavenging is needed, as we've enabled doing it as a
  // post-pass, scavenge the virtual registers that frame index elimination
  // inserted.
  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
      ScavengeFrameVirtualRegs(Fn, *RS);

      // Clear any vregs created by virtual scavenging.
      Fn.getRegInfo().clearVirtRegs();
  }

  // Warn on stack size when we exceeds the given limit.
  MachineFrameInfo &MFI = Fn.getFrameInfo();
  uint64_t StackSize = MFI.getStackSize();
  if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
    DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
    F->getContext().diagnose(DiagStackSize);
  }

  delete RS;
  SaveBlocks.clear();
  RestoreBlocks.clear();
  MFI.setSavePoint(nullptr);
  MFI.setRestorePoint(nullptr);
  return true;
}
Beispiel #30
0
static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) {
  if (!O.getNumOccurrences() || O.getPosition() < Default.getPosition())
    O = Default.getValue();
}