コード例 #1
0
ファイル: Inliner.cpp プロジェクト: chrislipa/fractalstream
// InlineCallIfPossible - If it is possible to inline the specified call site,
// do so and update the CallGraph for this operation.
static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
                                 const std::set<Function*> &SCCFunctions,
                                 const TargetData &TD) {
    Function *Callee = CS.getCalledFunction();
    Function *Caller = CS.getCaller();

    if (!InlineFunction(CS, &CG, &TD)) return false;

    // If the inlined function had a higher stack protection level than the
    // calling function, then bump up the caller's stack protection level.
    if (Callee->hasFnAttr(Attribute::StackProtectReq))
        Caller->addFnAttr(Attribute::StackProtectReq);
    else if (Callee->hasFnAttr(Attribute::StackProtect) &&
             !Caller->hasFnAttr(Attribute::StackProtectReq))
        Caller->addFnAttr(Attribute::StackProtect);

    // If we inlined the last possible call site to the function, delete the
    // function body now.
    if (Callee->use_empty() && Callee->hasLocalLinkage() &&
            !SCCFunctions.count(Callee)) {
        DOUT << "    -> Deleting dead function: " << Callee->getName() << "\n";
        CallGraphNode *CalleeNode = CG[Callee];

        // Remove any call graph edges from the callee to its callees.
        CalleeNode->removeAllCalledFunctions();

        // Removing the node for callee from the call graph and delete it.
        delete CG.removeFunctionFromModule(CalleeNode);
        ++NumDeleted;
    }
    return true;
}
コード例 #2
0
ファイル: Inliner.cpp プロジェクト: AmesianX/dagger
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold; // -inline-threshold or else selected by
                               // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
    Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
      OptSizeThreshold < thres)
    thres = OptSizeThreshold;

  // Listen to the inlinehint attribute when it would increase the threshold
  // and the caller does not need to minimize its size.
  Function *Callee = CS.getCalledFunction();
  bool InlineHint = Callee && !Callee->isDeclaration() &&
    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::InlineHint);
  if (InlineHint && HintThreshold > thres
      && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                               Attribute::MinSize))
    thres = HintThreshold;

  // Listen to the cold attribute when it would decrease the threshold.
  bool ColdCallee = Callee && !Callee->isDeclaration() &&
    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                         Attribute::Cold);
  if (ColdCallee && ColdThreshold < thres)
    thres = ColdThreshold;

  return thres;
}
コード例 #3
0
ファイル: Inliner.cpp プロジェクト: bkaradzic/SwiftShader
/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR.  The
/// InlinedArrayAllocas map keeps track of any allocas that are already
/// available from other functions inlined into the caller.  If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(
    CallSite CS, InlineFunctionInfo &IFI,
    InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
    bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
    ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
  Function *Callee = CS.getCalledFunction();
  Function *Caller = CS.getCaller();

  AAResults &AAR = AARGetter(*Callee);

  // Try to inline the function.  Get the list of static allocas that were
  // inlined.
  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
    return false;

  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
    ImportedFunctionsStats.recordInline(*Caller, *Callee);

  AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);

  if (!DisableInlinedAllocaMerging)
    mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);

  return true;
}
コード例 #4
0
ファイル: Inliner.cpp プロジェクト: 8l/SPIRV-LLVM
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold; // -inline-threshold or else selected by
                               // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
                 Caller->hasFnAttribute(Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
      OptSizeThreshold < thres)
    thres = OptSizeThreshold;

  // Listen to the inlinehint attribute when it would increase the threshold
  // and the caller does not need to minimize its size.
  Function *Callee = CS.getCalledFunction();
  bool InlineHint = Callee && !Callee->isDeclaration() &&
                    Callee->hasFnAttribute(Attribute::InlineHint);
  if (InlineHint && HintThreshold > thres &&
      !Caller->hasFnAttribute(Attribute::MinSize))
    thres = HintThreshold;

  // Listen to the cold attribute when it would decrease the threshold.
  bool ColdCallee = Callee && !Callee->isDeclaration() &&
                    Callee->hasFnAttribute(Attribute::Cold);
  // Command line argument for InlineLimit will override the default
  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
  // do not use the default cold threshold even if it is smaller.
  if ((InlineLimit.getNumOccurrences() == 0 ||
       ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
      ColdThreshold < thres)
    thres = ColdThreshold;

  return thres;
}
コード例 #5
0
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int Threshold = InlineThreshold; // -inline-threshold or else selected by
                                   // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
                 // FIXME: Use Function::optForSize().
                 Caller->hasFnAttribute(Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
      OptSizeThreshold < Threshold)
    Threshold = OptSizeThreshold;

  Function *Callee = CS.getCalledFunction();
  if (!Callee || Callee->isDeclaration())
    return Threshold;

  // If profile information is available, use that to adjust threshold of hot
  // and cold functions.
  // FIXME: The heuristic used below for determining hotness and coldness are
  // based on preliminary SPEC tuning and may not be optimal. Replace this with
  // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
  uint64_t FunctionCount = 0, MaxFunctionCount = 0;
  bool HasPGOCounts = false;
  if (Callee->getEntryCount() &&
      Callee->getParent()->getMaximumFunctionCount()) {
    HasPGOCounts = true;
    FunctionCount = Callee->getEntryCount().getValue();
    MaxFunctionCount =
        Callee->getParent()->getMaximumFunctionCount().getValue();
  }

  // Listen to the inlinehint attribute or profile based hotness information
  // when it would increase the threshold and the caller does not need to
  // minimize its size.
  bool InlineHint =
      Callee->hasFnAttribute(Attribute::InlineHint) ||
      (HasPGOCounts &&
       FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
  if (InlineHint && HintThreshold > Threshold &&
      !Caller->hasFnAttribute(Attribute::MinSize))
    Threshold = HintThreshold;

  // Listen to the cold attribute or profile based coldness information
  // when it would decrease the threshold.
  bool ColdCallee =
      Callee->hasFnAttribute(Attribute::Cold) ||
      (HasPGOCounts &&
       FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
  // Command line argument for InlineLimit will override the default
  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
  // do not use the default cold threshold even if it is smaller.
  if ((InlineLimit.getNumOccurrences() == 0 ||
       ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
      ColdThreshold < Threshold)
    Threshold = ColdThreshold;

  return Threshold;
}
コード例 #6
0
ファイル: Inliner.cpp プロジェクト: chrislipa/fractalstream
/// shouldInline - Return true if the inliner should attempt to inline
/// at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
    InlineCost IC = getInlineCost(CS);
    float FudgeFactor = getInlineFudgeFactor(CS);

    if (IC.isAlways()) {
        DOUT << "    Inlining: cost=always"
             << ", Call: " << *CS.getInstruction();
        return true;
    }

    if (IC.isNever()) {
        DOUT << "    NOT Inlining: cost=never"
             << ", Call: " << *CS.getInstruction();
        return false;
    }

    int Cost = IC.getValue();
    int CurrentThreshold = InlineThreshold;
    Function *Fn = CS.getCaller();
    if (Fn && !Fn->isDeclaration()
            && Fn->hasFnAttr(Attribute::OptimizeForSize)
            && InlineThreshold != 50) {
        CurrentThreshold = 50;
    }

    if (Cost >= (int)(CurrentThreshold * FudgeFactor)) {
        DOUT << "    NOT Inlining: cost=" << Cost
             << ", Call: " << *CS.getInstruction();
        return false;
    } else {
        DOUT << "    Inlining: cost=" << Cost
             << ", Call: " << *CS.getInstruction();
        return true;
    }
}
コード例 #7
0
ファイル: InlineCost.cpp プロジェクト: A2-Collaboration/root
InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
                                             int Threshold) {
  // Cannot inline indirect calls.
  if (!Callee)
    return llvm::InlineCost::getNever();

  // Calls to functions with always-inline attributes should be inlined
  // whenever possible.
  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
    if (isInlineViable(*Callee))
      return llvm::InlineCost::getAlways();
    return llvm::InlineCost::getNever();
  }

  // Never inline functions with conflicting attributes (unless callee has
  // always-inline attribute).
  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
    return llvm::InlineCost::getNever();

  // Don't inline this call if the caller has the optnone attribute.
  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
    return llvm::InlineCost::getNever();

  // Don't inline functions which can be redefined at link-time to mean
  // something else.  Don't inline functions marked noinline or call sites
  // marked noinline.
  if (Callee->mayBeOverridden() ||
      Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
    return llvm::InlineCost::getNever();

  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
        << "...\n");

  CallAnalyzer CA(Callee->getDataLayout(), TTIWP->getTTI(*Callee),
                  ACT->getAssumptionCache(*Callee), *Callee, Threshold);
  bool ShouldInline = CA.analyzeCall(CS);

  DEBUG(CA.dump());

  // Check if there was a reason to force inlining or no inlining.
  if (!ShouldInline && CA.getCost() < CA.getThreshold())
    return InlineCost::getNever();
  if (ShouldInline && CA.getCost() >= CA.getThreshold())
    return InlineCost::getAlways();

  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
コード例 #8
0
ファイル: InlineCost.cpp プロジェクト: baatar/llvm-duetto
InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
                                             int Threshold) {
  // Cannot inline indirect calls.
  if (!Callee)
    return llvm::InlineCost::getNever();

  // Calls to functions with always-inline attributes should be inlined
  // whenever possible.
  if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                           Attribute::AlwaysInline)) {
    if (isInlineViable(*Callee))
      return llvm::InlineCost::getAlways();
    return llvm::InlineCost::getNever();
  }

  // Don't inline functions which can be redefined at link-time to mean
  // something else.  Don't inline functions marked noinline or call sites
  // marked noinline.
  if (Callee->mayBeOverridden() ||
      Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                           Attribute::NoInline) ||
      CS.isNoInline())
    return llvm::InlineCost::getNever();

  //DUETTO: Do not inline server/client methods called from the other side
  const Function* caller=CS.getCaller();
  if((caller->hasFnAttribute(Attribute::Client) && Callee->hasFnAttribute(Attribute::Server)) ||
     (caller->hasFnAttribute(Attribute::Server) && Callee->hasFnAttribute(Attribute::Client)))
  {
    return llvm::InlineCost::getNever();
  }

  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
        << "...\n");

  CallAnalyzer CA(TD, *TTI, *Callee, Threshold);
  bool ShouldInline = CA.analyzeCall(CS);

  DEBUG(CA.dump());

  // Check if there was a reason to force inlining or no inlining.
  if (!ShouldInline && CA.getCost() < CA.getThreshold())
    return InlineCost::getNever();
  if (ShouldInline && CA.getCost() >= CA.getThreshold())
    return InlineCost::getAlways();

  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
コード例 #9
0
ファイル: Inliner.cpp プロジェクト: CPFL/guc
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold;

  // Listen to optsize when -inline-limit is not given.
  Function *Caller = CS.getCaller();
  if (Caller && !Caller->isDeclaration() &&
      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
      InlineLimit.getNumOccurrences() == 0)
    thres = OptSizeThreshold;

  // Listen to inlinehint when it would increase the threshold.
  Function *Callee = CS.getCalledFunction();
  if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
      Callee->hasFnAttr(Attribute::InlineHint))
    thres = HintThreshold;

  return thres;
}
コード例 #10
0
ファイル: Inliner.cpp プロジェクト: MoSyncLabs/llvm-mirror
unsigned Inliner::getInlineThreshold(CallSite CS) const {
  int thres = InlineThreshold; // -inline-threshold or else selected by
                               // overall opt level

  // If -inline-threshold is not given, listen to the optsize attribute when it
  // would decrease the threshold.
  Function *Caller = CS.getCaller();
  bool OptSize = Caller && !Caller->isDeclaration() &&
    Caller->hasFnAttr(Attribute::OptimizeForSize);
  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres)
    thres = OptSizeThreshold;

  // Listen to the inlinehint attribute when it would increase the threshold.
  Function *Callee = CS.getCalledFunction();
  bool InlineHint = Callee && !Callee->isDeclaration() &&
    Callee->hasFnAttr(Attribute::InlineHint);
  if (InlineHint && HintThreshold > thres)
    thres = HintThreshold;

  return thres;
}
コード例 #11
0
ファイル: PartialInlining.cpp プロジェクト: CTSRD-SOAAP/llvm
// Create the callsite to profile count map which is
// used to update the original function's entry count,
// after the function is partially inlined into the callsite.
void PartialInlinerImpl::computeCallsiteToProfCountMap(
    Function *DuplicateFunction,
    DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
  std::vector<User *> Users(DuplicateFunction->user_begin(),
                            DuplicateFunction->user_end());
  Function *CurrentCaller = nullptr;
  std::unique_ptr<BlockFrequencyInfo> TempBFI;
  BlockFrequencyInfo *CurrentCallerBFI = nullptr;

  auto ComputeCurrBFI = [&,this](Function *Caller) {
      // For the old pass manager:
      if (!GetBFI) {
        DominatorTree DT(*Caller);
        LoopInfo LI(DT);
        BranchProbabilityInfo BPI(*Caller, LI);
        TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
        CurrentCallerBFI = TempBFI.get();
      } else {
        // New pass manager:
        CurrentCallerBFI = &(*GetBFI)(*Caller);
      }
  };

  for (User *User : Users) {
    CallSite CS = getCallSite(User);
    Function *Caller = CS.getCaller();
    if (CurrentCaller != Caller) {
      CurrentCaller = Caller;
      ComputeCurrBFI(Caller);
    } else {
      assert(CurrentCallerBFI && "CallerBFI is not set");
    }
    BasicBlock *CallBB = CS.getInstruction()->getParent();
    auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
    if (Count)
      CallSiteToProfCountMap[User] = *Count;
    else
      CallSiteToProfCountMap[User] = 0;
  }
}
コード例 #12
0
ファイル: Inliner.cpp プロジェクト: chrislipa/fractalstream
bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
    CallGraph &CG = getAnalysis<CallGraph>();

    std::set<Function*> SCCFunctions;
    DOUT << "Inliner visiting SCC:";
    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
        Function *F = SCC[i]->getFunction();
        if (F) SCCFunctions.insert(F);
        DOUT << " " << (F ? F->getName() : "INDIRECTNODE");
    }

    // Scan through and identify all call sites ahead of time so that we only
    // inline call sites in the original functions, not call sites that result
    // from inlining other functions.
    std::vector<CallSite> CallSites;

    for (unsigned i = 0, e = SCC.size(); i != e; ++i)
        if (Function *F = SCC[i]->getFunction())
            for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
                for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
                    CallSite CS = CallSite::get(I);
                    if (CS.getInstruction() && (!CS.getCalledFunction() ||
                                                !CS.getCalledFunction()->isDeclaration()))
                        CallSites.push_back(CS);
                }

    DOUT << ": " << CallSites.size() << " call sites.\n";

    // Now that we have all of the call sites, move the ones to functions in the
    // current SCC to the end of the list.
    unsigned FirstCallInSCC = CallSites.size();
    for (unsigned i = 0; i < FirstCallInSCC; ++i)
        if (Function *F = CallSites[i].getCalledFunction())
            if (SCCFunctions.count(F))
                std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);

    // Now that we have all of the call sites, loop over them and inline them if
    // it looks profitable to do so.
    bool Changed = false;
    bool LocalChange;
    do {
        LocalChange = false;
        // Iterate over the outer loop because inlining functions can cause indirect
        // calls to become direct calls.
        for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi)
            if (Function *Callee = CallSites[CSi].getCalledFunction()) {
                // Calls to external functions are never inlinable.
                if (Callee->isDeclaration()) {
                    if (SCC.size() == 1) {
                        std::swap(CallSites[CSi], CallSites.back());
                        CallSites.pop_back();
                    } else {
                        // Keep the 'in SCC / not in SCC' boundary correct.
                        CallSites.erase(CallSites.begin()+CSi);
                    }
                    --CSi;
                    continue;
                }

                // If the policy determines that we should inline this function,
                // try to do so.
                CallSite CS = CallSites[CSi];
                if (shouldInline(CS)) {
                    Function *Caller = CS.getCaller();
                    // Attempt to inline the function...
                    if (InlineCallIfPossible(CS, CG, SCCFunctions,
                                             getAnalysis<TargetData>())) {
                        // Remove any cached cost info for this caller, as inlining the callee
                        // has increased the size of the caller (which may be the same as the
                        // callee).
                        resetCachedCostInfo(Caller);

                        // Remove this call site from the list.  If possible, use
                        // swap/pop_back for efficiency, but do not use it if doing so would
                        // move a call site to a function in this SCC before the
                        // 'FirstCallInSCC' barrier.
                        if (SCC.size() == 1) {
                            std::swap(CallSites[CSi], CallSites.back());
                            CallSites.pop_back();
                        } else {
                            CallSites.erase(CallSites.begin()+CSi);
                        }
                        --CSi;

                        ++NumInlined;
                        Changed = true;
                        LocalChange = true;
                    }
                }
            }
    } while (LocalChange);

    return Changed;
}
コード例 #13
0
ファイル: Inliner.cpp プロジェクト: 8l/SPIRV-LLVM
/// Return true if the inliner should attempt to inline at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
  InlineCost IC = getInlineCost(CS);
  
  if (IC.isAlways()) {
    DEBUG(dbgs() << "    Inlining: cost=always"
          << ", Call: " << *CS.getInstruction() << "\n");
    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
                         " should always be inlined (cost=always)");
    return true;
  }
  
  if (IC.isNever()) {
    DEBUG(dbgs() << "    NOT Inlining: cost=never"
          << ", Call: " << *CS.getInstruction() << "\n");
    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
                           " should never be inlined (cost=never)"));
    return false;
  }
  
  Function *Caller = CS.getCaller();
  if (!IC) {
    DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
          << ", thres=" << (IC.getCostDelta() + IC.getCost())
          << ", Call: " << *CS.getInstruction() << "\n");
    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
                           " too costly to inline (cost=") +
                         Twine(IC.getCost()) + ", threshold=" +
                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
    return false;
  }
  
  // Try to detect the case where the current inlining candidate caller (call
  // it B) is a static or linkonce-ODR function and is an inlining candidate
  // elsewhere, and the current candidate callee (call it C) is large enough
  // that inlining it into B would make B too big to inline later. In these
  // circumstances it may be best not to inline C into B, but to inline B into
  // its callers.
  //
  // This only applies to static and linkonce-ODR functions because those are
  // expected to be available for inlining in the translation units where they
  // are used. Thus we will always have the opportunity to make local inlining
  // decisions. Importantly the linkonce-ODR linkage covers inline functions
  // and templates in C++.
  //
  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
  // the internal implementation of the inline cost metrics rather than
  // treating them as truly abstract units etc.
  if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
    int TotalSecondaryCost = 0;
    // The candidate cost to be imposed upon the current function.
    int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
    // This bool tracks what happens if we do NOT inline C into B.
    bool callerWillBeRemoved = Caller->hasLocalLinkage();
    // This bool tracks what happens if we DO inline C into B.
    bool inliningPreventsSomeOuterInline = false;
    for (User *U : Caller->users()) {
      CallSite CS2(U);

      // If this isn't a call to Caller (it could be some other sort
      // of reference) skip it.  Such references will prevent the caller
      // from being removed.
      if (!CS2 || CS2.getCalledFunction() != Caller) {
        callerWillBeRemoved = false;
        continue;
      }

      InlineCost IC2 = getInlineCost(CS2);
      ++NumCallerCallersAnalyzed;
      if (!IC2) {
        callerWillBeRemoved = false;
        continue;
      }
      if (IC2.isAlways())
        continue;

      // See if inlining or original callsite would erase the cost delta of
      // this callsite. We subtract off the penalty for the call instruction,
      // which we would be deleting.
      if (IC2.getCostDelta() <= CandidateCost) {
        inliningPreventsSomeOuterInline = true;
        TotalSecondaryCost += IC2.getCost();
      }
    }
    // If all outer calls to Caller would get inlined, the cost for the last
    // one is set very low by getInlineCost, in anticipation that Caller will
    // be removed entirely.  We did not account for this above unless there
    // is only one caller of Caller.
    if (callerWillBeRemoved && !Caller->use_empty())
      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;

    if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() <<
           " Cost = " << IC.getCost() <<
           ", outer Cost = " << TotalSecondaryCost << '\n');
      emitAnalysis(
          CS, Twine("Not inlining. Cost of inlining " +
                    CS.getCalledFunction()->getName() +
                    " increases the cost of inlining " +
                    CS.getCaller()->getName() + " in other contexts"));
      return false;
    }
  }

  DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
        << ", thres=" << (IC.getCostDelta() + IC.getCost())
        << ", Call: " << *CS.getInstruction() << '\n');
  emitAnalysis(
      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
  return true;
}
コード例 #14
0
ファイル: Inliner.cpp プロジェクト: 8l/SPIRV-LLVM
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
  const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();

  SmallPtrSet<Function*, 8> SCCFunctions;
  DEBUG(dbgs() << "Inliner visiting SCC:");
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (F) SCCFunctions.insert(F);
    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
  }

  // Scan through and identify all call sites ahead of time so that we only
  // inline call sites in the original functions, not call sites that result
  // from inlining other functions.
  SmallVector<std::pair<CallSite, int>, 16> CallSites;
  
  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function*, int>, 8> InlineHistory;

  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (!F) continue;
    
    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
        CallSite CS(cast<Value>(I));
        // If this isn't a call, or it is a call to an intrinsic, it can
        // never be inlined.
        if (!CS || isa<IntrinsicInst>(I))
          continue;
        
        // If this is a direct call to an external function, we can never inline
        // it.  If it is an indirect call, inlining may resolve it to be a
        // direct call, so we keep it.
        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
          continue;
        
        CallSites.push_back(std::make_pair(CS, -1));
      }
  }

  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");

  // If there are no calls in this function, exit early.
  if (CallSites.empty())
    return false;
  
  // Now that we have all of the call sites, move the ones to functions in the
  // current SCC to the end of the list.
  unsigned FirstCallInSCC = CallSites.size();
  for (unsigned i = 0; i < FirstCallInSCC; ++i)
    if (Function *F = CallSites[i].first.getCalledFunction())
      if (SCCFunctions.count(F))
        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);

  
  InlinedArrayAllocasTy InlinedArrayAllocas;
  InlineFunctionInfo InlineInfo(&CG, AA, ACT);

  // Now that we have all of the call sites, loop over them and inline them if
  // it looks profitable to do so.
  bool Changed = false;
  bool LocalChange;
  do {
    LocalChange = false;
    // Iterate over the outer loop because inlining functions can cause indirect
    // calls to become direct calls.
    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
      CallSite CS = CallSites[CSi].first;
      
      Function *Caller = CS.getCaller();
      Function *Callee = CS.getCalledFunction();

      // If this call site is dead and it is to a readonly function, we should
      // just delete the call instead of trying to inline it, regardless of
      // size.  This happens because IPSCCP propagates the result out of the
      // call and then we're left with the dead call.
      if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
        DEBUG(dbgs() << "    -> Deleting dead call: "
                     << *CS.getInstruction() << "\n");
        // Update the call graph by deleting the edge from Callee to Caller.
        CG[Caller]->removeCallEdgeFor(CS);
        CS.getInstruction()->eraseFromParent();
        ++NumCallsDeleted;
      } else {
        // We can only inline direct calls to non-declarations.
        if (!Callee || Callee->isDeclaration()) continue;
      
        // If this call site was obtained by inlining another function, verify
        // that the include path for the function did not include the callee
        // itself.  If so, we'd be recursively inlining the same function,
        // which would provide the same callsites, which would cause us to
        // infinitely inline.
        int InlineHistoryID = CallSites[CSi].second;
        if (InlineHistoryID != -1 &&
            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
          continue;
        
        LLVMContext &CallerCtx = Caller->getContext();

        // Get DebugLoc to report. CS will be invalid after Inliner.
        DebugLoc DLoc = CS.getInstruction()->getDebugLoc();

        // If the policy determines that we should inline this function,
        // try to do so.
        if (!shouldInline(CS)) {
          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
                                       Twine(Callee->getName() +
                                             " will not be inlined into " +
                                             Caller->getName()));
          continue;
        }

        // Attempt to inline the function.
        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
                                  InlineHistoryID, InsertLifetime)) {
          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
                                       Twine(Callee->getName() +
                                             " will not be inlined into " +
                                             Caller->getName()));
          continue;
        }
        ++NumInlined;

        // Report the inline decision.
        emitOptimizationRemark(
            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
            Twine(Callee->getName() + " inlined into " + Caller->getName()));

        // If inlining this function gave us any new call sites, throw them
        // onto our worklist to process.  They are useful inline candidates.
        if (!InlineInfo.InlinedCalls.empty()) {
          // Create a new inline history entry for this, so that we remember
          // that these new callsites came about due to inlining Callee.
          int NewHistoryID = InlineHistory.size();
          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));

          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
               i != e; ++i) {
            Value *Ptr = InlineInfo.InlinedCalls[i];
            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
          }
        }
      }
      
      // If we inlined or deleted the last possible call site to the function,
      // delete the function body now.
      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
          // TODO: Can remove if in SCC now.
          !SCCFunctions.count(Callee) &&
          
          // The function may be apparently dead, but if there are indirect
          // callgraph references to the node, we cannot delete it yet, this
          // could invalidate the CGSCC iterator.
          CG[Callee]->getNumReferences() == 0) {
        DEBUG(dbgs() << "    -> Deleting dead function: "
              << Callee->getName() << "\n");
        CallGraphNode *CalleeNode = CG[Callee];
        
        // Remove any call graph edges from the callee to its callees.
        CalleeNode->removeAllCalledFunctions();
        
        // Removing the node for callee from the call graph and delete it.
        delete CG.removeFunctionFromModule(CalleeNode);
        ++NumDeleted;
      }

      // Remove this call site from the list.  If possible, use 
      // swap/pop_back for efficiency, but do not use it if doing so would
      // move a call site to a function in this SCC before the
      // 'FirstCallInSCC' barrier.
      if (SCC.isSingular()) {
        CallSites[CSi] = CallSites.back();
        CallSites.pop_back();
      } else {
        CallSites.erase(CallSites.begin()+CSi);
      }
      --CSi;

      Changed = true;
      LocalChange = true;
    }
  } while (LocalChange);

  return Changed;
}
コード例 #15
0
CallSite SancusModuleCreator::handleSancusCall(CallSite cs)
{
    assert(cs.arg_size() >= 2 && "sancus_call needs at least 2 arguments");

    auto numAsmArgs = cs.arg_size() - 2;

    if (numAsmArgs > 3)
    {
        report_fatal_error("Currently, maximum 3 arguments are supported by "
                           "sancus_call");
    }

    auto argTys = std::vector<Type*>{voidPtrTy, wordTy};
    auto entryVal = cs.getArgument(0);
    auto indexVal = cs.getArgument(1);
    auto args = std::vector<Value*>{entryVal, indexVal};

    auto inputConstraints = std::string{",r,r"};
    std::ostringstream argsAsm;
    auto regsUsage = unsigned{0x10};
    auto clobbers = std::string{",~{r6},~{r7},~{r8},~{r15}"};
    auto callerInfo = getSancusModuleInfo(cs.getCaller());

    for (decltype(numAsmArgs) i = 0; i < numAsmArgs; i++)
    {
        auto arg = cs.getArgument(i + 2);
        auto argTy = arg->getType();

        // TODO FunctionCcInfo should be used to check the argument. This is not
        // possible currently since it takes a Function* as argument.
        if (argTy->getPrimitiveSizeInBits() != 16 && !argTy->isPointerTy())
            report_fatal_error("Illegal argument type in call to sancus_call");

        inputConstraints += ",r";

        std::ostringstream regStream;
        regStream << "r" << 15 - i;
        auto regStr = regStream.str();

        if (regStr != "r15")
            clobbers += ",~{" + regStr + "}";

        argsAsm << "mov $" << i + 3 << ", " << regStr << "\n\t";
        args.push_back(arg);
        argTys.push_back(argTy);

        // see sm_exit.s to understand how this value is used.
        regsUsage >>= 1;
    }

    auto asmStr = Twine("push #1f\n\t"
                        "push r6\n\t"
                        "push r7\n\t"
                        "push r8\n\t"
                        "mov $2, r6\n\t"
                        "mov #" + Twine(regsUsage) + ", r7\n\t"
                        "mov $1, r8\n\t" + argsAsm.str() +
                        "br #" + callerInfo.getExitName() + "\n"
                        "1:\n\t"
                        "mov r15, $0"
                        ).str();

    auto constraintsStr = "=r" + inputConstraints + clobbers;
    auto resTy = wordTy;
    auto asmFuncTy = FunctionType::get(resTy, argTys, /*isVarArg=*/false);

    // NOTE hasSideEffects has to be true because the inline assembly may be
    // optimised away otherwise
    auto inlineAsm = InlineAsm::get(asmFuncTy, asmStr, constraintsStr,
                                    /*hasSideEffects=*/true);

    auto asmCall = CallInst::Create(inlineAsm, args);
    return CallSite(asmCall);
}
コード例 #16
0
ファイル: Inliner.cpp プロジェクト: 8l/SPIRV-LLVM
static void emitAnalysis(CallSite CS, const Twine &Msg) {
  Function *Caller = CS.getCaller();
  LLVMContext &Ctx = Caller->getContext();
  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
}
コード例 #17
0
ファイル: MemCpyOptimizer.cpp プロジェクト: 8l/SPIRV-LLVM
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
  // Find out what feeds this byval argument.
  Value *ByValArg = CS.getArgument(ArgNo);
  Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
  MemDepResult DepInfo =
    MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
                                 true, CS.getInstruction(),
                                 CS.getInstruction()->getParent());
  if (!DepInfo.isClobber())
    return false;

  // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
  // a memcpy, see if we can byval from the source of the memcpy instead of the
  // result.
  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
  if (!MDep || MDep->isVolatile() ||
      ByValArg->stripPointerCasts() != MDep->getDest())
    return false;

  // The length of the memcpy must be larger or equal to the size of the byval.
  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
  if (!C1 || C1->getValue().getZExtValue() < ByValSize)
    return false;

  // Get the alignment of the byval.  If the call doesn't specify the alignment,
  // then it is some target specific value that we can't know.
  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
  if (ByValAlign == 0) return false;

  // If it is greater than the memcpy, then we check to see if we can force the
  // source of the memcpy to the alignment we need.  If we fail, we bail out.
  AssumptionCache &AC =
      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
          *CS->getParent()->getParent());
  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  if (MDep->getAlignment() < ByValAlign &&
      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
    return false;

  // Verify that the copied-from memory doesn't change in between the memcpy and
  // the byval call.
  //    memcpy(a <- b)
  //    *b = 42;
  //    foo(*a)
  // It would be invalid to transform the second memcpy into foo(*b).
  //
  // NOTE: This is conservative, it will stop on any read from the source loc,
  // not just the defining memcpy.
  MemDepResult SourceDep =
    MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
                                 false, CS.getInstruction(), MDep->getParent());
  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
    return false;

  Value *TmpCast = MDep->getSource();
  if (MDep->getSource()->getType() != ByValArg->getType())
    TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
                              "tmpcast", CS.getInstruction());

  DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
               << "  " << *MDep << "\n"
               << "  " << *CS.getInstruction() << "\n");

  // Otherwise we're good!  Update the byval argument.
  CS.setArgument(ArgNo, TmpCast);
  ++NumMemCpyInstr;
  return true;
}
コード例 #18
0
ファイル: Inliner.cpp プロジェクト: 8l/SPIRV-LLVM
/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR.  The
/// InlinedArrayAllocas map keeps track of any allocas that are already
/// available from other functions inlined into the caller.  If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
                                 int InlineHistory, bool InsertLifetime) {
  Function *Callee = CS.getCalledFunction();
  Function *Caller = CS.getCaller();

  // Try to inline the function.  Get the list of static allocas that were
  // inlined.
  if (!InlineFunction(CS, IFI, InsertLifetime))
    return false;

  AdjustCallerSSPLevel(Caller, Callee);

  // Look at all of the allocas that we inlined through this call site.  If we
  // have already inlined other allocas through other calls into this function,
  // then we know that they have disjoint lifetimes and that we can merge them.
  //
  // There are many heuristics possible for merging these allocas, and the
  // different options have different tradeoffs.  One thing that we *really*
  // don't want to hurt is SRoA: once inlining happens, often allocas are no
  // longer address taken and so they can be promoted.
  //
  // Our "solution" for that is to only merge allocas whose outermost type is an
  // array type.  These are usually not promoted because someone is using a
  // variable index into them.  These are also often the most important ones to
  // merge.
  //
  // A better solution would be to have real memory lifetime markers in the IR
  // and not have the inliner do any merging of allocas at all.  This would
  // allow the backend to do proper stack slot coloring of all allocas that
  // *actually make it to the backend*, which is really what we want.
  //
  // Because we don't have this information, we do this simple and useful hack.
  //
  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
  
  // When processing our SCC, check to see if CS was inlined from some other
  // call site.  For example, if we're processing "A" in this code:
  //   A() { B() }
  //   B() { x = alloca ... C() }
  //   C() { y = alloca ... }
  // Assume that C was not inlined into B initially, and so we're processing A
  // and decide to inline B into A.  Doing this makes an alloca available for
  // reuse and makes a callsite (C) available for inlining.  When we process
  // the C call site we don't want to do any alloca merging between X and Y
  // because their scopes are not disjoint.  We could make this smarter by
  // keeping track of the inline history for each alloca in the
  // InlinedArrayAllocas but this isn't likely to be a significant win.
  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
    return true;
  
  // Loop over all the allocas we have so far and see if they can be merged with
  // a previously inlined alloca.  If not, remember that we had it.
  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
       AllocaNo != e; ++AllocaNo) {
    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
    
    // Don't bother trying to merge array allocations (they will usually be
    // canonicalized to be an allocation *of* an array), or allocations whose
    // type is not itself an array (because we're afraid of pessimizing SRoA).
    ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
    if (!ATy || AI->isArrayAllocation())
      continue;
    
    // Get the list of all available allocas for this array type.
    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
    
    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
    // that we have to be careful not to reuse the same "available" alloca for
    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
    // set to keep track of which "available" allocas are being used by this
    // function.  Also, AllocasForType can be empty of course!
    bool MergedAwayAlloca = false;
    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
      AllocaInst *AvailableAlloca = AllocasForType[i];

      unsigned Align1 = AI->getAlignment(),
               Align2 = AvailableAlloca->getAlignment();
      
      // The available alloca has to be in the right function, not in some other
      // function in this SCC.
      if (AvailableAlloca->getParent() != AI->getParent())
        continue;
      
      // If the inlined function already uses this alloca then we can't reuse
      // it.
      if (!UsedAllocas.insert(AvailableAlloca).second)
        continue;
      
      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
      // success!
      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
                   << *AvailableAlloca << '\n');
      
      AI->replaceAllUsesWith(AvailableAlloca);

      if (Align1 != Align2) {
        if (!Align1 || !Align2) {
          const DataLayout &DL = Caller->getParent()->getDataLayout();
          unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());

          Align1 = Align1 ? Align1 : TypeAlign;
          Align2 = Align2 ? Align2 : TypeAlign;
        }

        if (Align1 > Align2)
          AvailableAlloca->setAlignment(AI->getAlignment());
      }

      AI->eraseFromParent();
      MergedAwayAlloca = true;
      ++NumMergedAllocas;
      IFI.StaticAllocas[AllocaNo] = nullptr;
      break;
    }

    // If we already nuked the alloca, we're done with it.
    if (MergedAwayAlloca)
      continue;
    
    // If we were unable to merge away the alloca either because there are no
    // allocas of the right type available or because we reused them all
    // already, remember that this alloca came from an inlined function and mark
    // it used so we don't reuse it for other allocas from this inline
    // operation.
    AllocasForType.push_back(AI);
    UsedAllocas.insert(AI);
  }
  
  return true;
}
コード例 #19
0
ファイル: Inliner.cpp プロジェクト: CPFL/guc
/// shouldInline - Return true if the inliner should attempt to inline
/// at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
  InlineCost IC = getInlineCost(CS);
  
  if (IC.isAlways()) {
    DEBUG(dbgs() << "    Inlining: cost=always"
          << ", Call: " << *CS.getInstruction() << "\n");
    return true;
  }
  
  if (IC.isNever()) {
    DEBUG(dbgs() << "    NOT Inlining: cost=never"
          << ", Call: " << *CS.getInstruction() << "\n");
    return false;
  }
  
  int Cost = IC.getValue();
  Function *Caller = CS.getCaller();
  int CurrentThreshold = getInlineThreshold(CS);
  float FudgeFactor = getInlineFudgeFactor(CS);
  int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
  if (Cost >= AdjThreshold) {
    DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
          << ", thres=" << AdjThreshold
          << ", Call: " << *CS.getInstruction() << "\n");
    return false;
  }
  
  // Try to detect the case where the current inlining candidate caller
  // (call it B) is a static function and is an inlining candidate elsewhere,
  // and the current candidate callee (call it C) is large enough that
  // inlining it into B would make B too big to inline later.  In these
  // circumstances it may be best not to inline C into B, but to inline B
  // into its callers.
  if (Caller->hasLocalLinkage()) {
    int TotalSecondaryCost = 0;
    bool outerCallsFound = false;
    bool allOuterCallsWillBeInlined = true;
    bool someOuterCallWouldNotBeInlined = false;
    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
         I != E; ++I) {
      CallSite CS2(*I);

      // If this isn't a call to Caller (it could be some other sort
      // of reference) skip it.
      if (!CS2 || CS2.getCalledFunction() != Caller)
        continue;

      InlineCost IC2 = getInlineCost(CS2);
      if (IC2.isNever())
        allOuterCallsWillBeInlined = false;
      if (IC2.isAlways() || IC2.isNever())
        continue;

      outerCallsFound = true;
      int Cost2 = IC2.getValue();
      int CurrentThreshold2 = getInlineThreshold(CS2);
      float FudgeFactor2 = getInlineFudgeFactor(CS2);

      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
        allOuterCallsWillBeInlined = false;

      // See if we have this case.  We subtract off the penalty
      // for the call instruction, which we would be deleting.
      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
                (int)(CurrentThreshold2 * FudgeFactor2)) {
        someOuterCallWouldNotBeInlined = true;
        TotalSecondaryCost += Cost2;
      }
    }
    // If all outer calls to Caller would get inlined, the cost for the last
    // one is set very low by getInlineCost, in anticipation that Caller will
    // be removed entirely.  We did not account for this above unless there
    // is only one caller of Caller.
    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;

    if (outerCallsFound && someOuterCallWouldNotBeInlined && 
        TotalSecondaryCost < Cost) {
      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
           " Cost = " << Cost << 
           ", outer Cost = " << TotalSecondaryCost << '\n');
      return false;
    }
  }

  DEBUG(dbgs() << "    Inlining: cost=" << Cost
        << ", thres=" << AdjThreshold
        << ", Call: " << *CS.getInstruction() << '\n');
  return true;
}
コード例 #20
0
ファイル: Inliner.cpp プロジェクト: CPFL/guc
/// InlineCallIfPossible - If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR.  The
/// InlinedArrayAllocas map keeps track of any allocas that are already
/// available from other  functions inlined into the caller.  If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                 InlinedArrayAllocasTy &InlinedArrayAllocas) {
  Function *Callee = CS.getCalledFunction();
  Function *Caller = CS.getCaller();

  // Try to inline the function.  Get the list of static allocas that were
  // inlined.
  if (!InlineFunction(CS, IFI))
    return false;

  // If the inlined function had a higher stack protection level than the
  // calling function, then bump up the caller's stack protection level.
  if (Callee->hasFnAttr(Attribute::StackProtectReq))
    Caller->addFnAttr(Attribute::StackProtectReq);
  else if (Callee->hasFnAttr(Attribute::StackProtect) &&
           !Caller->hasFnAttr(Attribute::StackProtectReq))
    Caller->addFnAttr(Attribute::StackProtect);

  
  // Look at all of the allocas that we inlined through this call site.  If we
  // have already inlined other allocas through other calls into this function,
  // then we know that they have disjoint lifetimes and that we can merge them.
  //
  // There are many heuristics possible for merging these allocas, and the
  // different options have different tradeoffs.  One thing that we *really*
  // don't want to hurt is SRoA: once inlining happens, often allocas are no
  // longer address taken and so they can be promoted.
  //
  // Our "solution" for that is to only merge allocas whose outermost type is an
  // array type.  These are usually not promoted because someone is using a
  // variable index into them.  These are also often the most important ones to
  // merge.
  //
  // A better solution would be to have real memory lifetime markers in the IR
  // and not have the inliner do any merging of allocas at all.  This would
  // allow the backend to do proper stack slot coloring of all allocas that
  // *actually make it to the backend*, which is really what we want.
  //
  // Because we don't have this information, we do this simple and useful hack.
  //
  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
  
  // Loop over all the allocas we have so far and see if they can be merged with
  // a previously inlined alloca.  If not, remember that we had it.
  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
       AllocaNo != e; ++AllocaNo) {
    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
    
    // Don't bother trying to merge array allocations (they will usually be
    // canonicalized to be an allocation *of* an array), or allocations whose
    // type is not itself an array (because we're afraid of pessimizing SRoA).
    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
    if (ATy == 0 || AI->isArrayAllocation())
      continue;
    
    // Get the list of all available allocas for this array type.
    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
    
    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
    // that we have to be careful not to reuse the same "available" alloca for
    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
    // set to keep track of which "available" allocas are being used by this
    // function.  Also, AllocasForType can be empty of course!
    bool MergedAwayAlloca = false;
    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
      AllocaInst *AvailableAlloca = AllocasForType[i];
      
      // The available alloca has to be in the right function, not in some other
      // function in this SCC.
      if (AvailableAlloca->getParent() != AI->getParent())
        continue;
      
      // If the inlined function already uses this alloca then we can't reuse
      // it.
      if (!UsedAllocas.insert(AvailableAlloca))
        continue;
      
      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
      // success!
      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI);
      
      AI->replaceAllUsesWith(AvailableAlloca);
      AI->eraseFromParent();
      MergedAwayAlloca = true;
      ++NumMergedAllocas;
      break;
    }

    // If we already nuked the alloca, we're done with it.
    if (MergedAwayAlloca)
      continue;

    // If we were unable to merge away the alloca either because there are no
    // allocas of the right type available or because we reused them all
    // already, remember that this alloca came from an inlined function and mark
    // it used so we don't reuse it for other allocas from this inline
    // operation.
    AllocasForType.push_back(AI);
    UsedAllocas.insert(AI);
  }
  
  return true;
}
コード例 #21
0
ファイル: Inliner.cpp プロジェクト: bkaradzic/SwiftShader
/// Return the cost only if the inliner should attempt to inline at the given
/// CallSite. If we return the cost, we will emit an optimisation remark later
/// using that cost, so we won't do so from this function.
static Optional<InlineCost>
shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
             OptimizationRemarkEmitter &ORE) {
  using namespace ore;

  InlineCost IC = GetInlineCost(CS);
  Instruction *Call = CS.getInstruction();
  Function *Callee = CS.getCalledFunction();
  Function *Caller = CS.getCaller();

  if (IC.isAlways()) {
    LLVM_DEBUG(dbgs() << "    Inlining: cost=always"
                      << ", Call: " << *CS.getInstruction() << "\n");
    return IC;
  }

  if (IC.isNever()) {
    LLVM_DEBUG(dbgs() << "    NOT Inlining: cost=never"
                      << ", Call: " << *CS.getInstruction() << "\n");
    ORE.emit([&]() {
      return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
             << NV("Callee", Callee) << " not inlined into "
             << NV("Caller", Caller)
             << " because it should never be inlined (cost=never)";
    });
    return None;
  }

  if (!IC) {
    LLVM_DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
                      << ", thres=" << IC.getThreshold()
                      << ", Call: " << *CS.getInstruction() << "\n");
    ORE.emit([&]() {
      return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
             << NV("Callee", Callee) << " not inlined into "
             << NV("Caller", Caller) << " because too costly to inline (cost="
             << NV("Cost", IC.getCost())
             << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")";
    });
    return None;
  }

  int TotalSecondaryCost = 0;
  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
    LLVM_DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
                      << " Cost = " << IC.getCost()
                      << ", outer Cost = " << TotalSecondaryCost << '\n');
    ORE.emit([&]() {
      return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
                                      Call)
             << "Not inlining. Cost of inlining " << NV("Callee", Callee)
             << " increases the cost of inlining " << NV("Caller", Caller)
             << " in other contexts";
    });

    // IC does not bool() to false, so get an InlineCost that will.
    // This will not be inspected to make an error message.
    return None;
  }

  LLVM_DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
                    << ", thres=" << IC.getThreshold()
                    << ", Call: " << *CS.getInstruction() << '\n');
  return IC;
}
コード例 #22
0
ファイル: Inliner.cpp プロジェクト: bkaradzic/SwiftShader
static bool
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
                std::function<AssumptionCache &(Function &)> GetAssumptionCache,
                ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
                bool InsertLifetime,
                function_ref<InlineCost(CallSite CS)> GetInlineCost,
                function_ref<AAResults &(Function &)> AARGetter,
                ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
  SmallPtrSet<Function *, 8> SCCFunctions;
  LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
  for (CallGraphNode *Node : SCC) {
    Function *F = Node->getFunction();
    if (F)
      SCCFunctions.insert(F);
    LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
  }

  // Scan through and identify all call sites ahead of time so that we only
  // inline call sites in the original functions, not call sites that result
  // from inlining other functions.
  SmallVector<std::pair<CallSite, int>, 16> CallSites;

  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function *, int>, 8> InlineHistory;

  for (CallGraphNode *Node : SCC) {
    Function *F = Node->getFunction();
    if (!F || F->isDeclaration())
      continue;

    OptimizationRemarkEmitter ORE(F);
    for (BasicBlock &BB : *F)
      for (Instruction &I : BB) {
        CallSite CS(cast<Value>(&I));
        // If this isn't a call, or it is a call to an intrinsic, it can
        // never be inlined.
        if (!CS || isa<IntrinsicInst>(I))
          continue;

        // If this is a direct call to an external function, we can never inline
        // it.  If it is an indirect call, inlining may resolve it to be a
        // direct call, so we keep it.
        if (Function *Callee = CS.getCalledFunction())
          if (Callee->isDeclaration()) {
            using namespace ore;

            ORE.emit([&]() {
              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
                     << NV("Callee", Callee) << " will not be inlined into "
                     << NV("Caller", CS.getCaller())
                     << " because its definition is unavailable"
                     << setIsVerbose();
            });
            continue;
          }

        CallSites.push_back(std::make_pair(CS, -1));
      }
  }

  LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");

  // If there are no calls in this function, exit early.
  if (CallSites.empty())
    return false;

  // Now that we have all of the call sites, move the ones to functions in the
  // current SCC to the end of the list.
  unsigned FirstCallInSCC = CallSites.size();
  for (unsigned i = 0; i < FirstCallInSCC; ++i)
    if (Function *F = CallSites[i].first.getCalledFunction())
      if (SCCFunctions.count(F))
        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);

  InlinedArrayAllocasTy InlinedArrayAllocas;
  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);

  // Now that we have all of the call sites, loop over them and inline them if
  // it looks profitable to do so.
  bool Changed = false;
  bool LocalChange;
  do {
    LocalChange = false;
    // Iterate over the outer loop because inlining functions can cause indirect
    // calls to become direct calls.
    // CallSites may be modified inside so ranged for loop can not be used.
    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
      CallSite CS = CallSites[CSi].first;

      Function *Caller = CS.getCaller();
      Function *Callee = CS.getCalledFunction();

      // We can only inline direct calls to non-declarations.
      if (!Callee || Callee->isDeclaration())
        continue;

      Instruction *Instr = CS.getInstruction();

      bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);

      int InlineHistoryID;
      if (!IsTriviallyDead) {
        // If this call site was obtained by inlining another function, verify
        // that the include path for the function did not include the callee
        // itself.  If so, we'd be recursively inlining the same function,
        // which would provide the same callsites, which would cause us to
        // infinitely inline.
        InlineHistoryID = CallSites[CSi].second;
        if (InlineHistoryID != -1 &&
            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
          continue;
      }

      // FIXME for new PM: because of the old PM we currently generate ORE and
      // in turn BFI on demand.  With the new PM, the ORE dependency should
      // just become a regular analysis dependency.
      OptimizationRemarkEmitter ORE(Caller);

      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
      // If the policy determines that we should inline this function,
      // delete the call instead.
      if (!OIC)
        continue;

      // If this call site is dead and it is to a readonly function, we should
      // just delete the call instead of trying to inline it, regardless of
      // size.  This happens because IPSCCP propagates the result out of the
      // call and then we're left with the dead call.
      if (IsTriviallyDead) {
        LLVM_DEBUG(dbgs() << "    -> Deleting dead call: " << *Instr << "\n");
        // Update the call graph by deleting the edge from Callee to Caller.
        CG[Caller]->removeCallEdgeFor(CS);
        Instr->eraseFromParent();
        ++NumCallsDeleted;
      } else {
        // Get DebugLoc to report. CS will be invalid after Inliner.
        DebugLoc DLoc = CS->getDebugLoc();
        BasicBlock *Block = CS.getParent();

        // Attempt to inline the function.
        using namespace ore;

        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
                                  InlineHistoryID, InsertLifetime, AARGetter,
                                  ImportedFunctionsStats)) {
          ORE.emit([&]() {
            return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
                                            Block)
                   << NV("Callee", Callee) << " will not be inlined into "
                   << NV("Caller", Caller);
          });
          continue;
        }
        ++NumInlined;

        ORE.emit([&]() {
          bool AlwaysInline = OIC->isAlways();
          StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
          OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
          R << NV("Callee", Callee) << " inlined into ";
          R << NV("Caller", Caller);
          if (AlwaysInline)
            R << " with cost=always";
          else {
            R << " with cost=" << NV("Cost", OIC->getCost());
            R << " (threshold=" << NV("Threshold", OIC->getThreshold());
            R << ")";
          }
          return R;
        });

        // If inlining this function gave us any new call sites, throw them
        // onto our worklist to process.  They are useful inline candidates.
        if (!InlineInfo.InlinedCalls.empty()) {
          // Create a new inline history entry for this, so that we remember
          // that these new callsites came about due to inlining Callee.
          int NewHistoryID = InlineHistory.size();
          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));

          for (Value *Ptr : InlineInfo.InlinedCalls)
            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
        }
      }

      // If we inlined or deleted the last possible call site to the function,
      // delete the function body now.
      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
          // TODO: Can remove if in SCC now.
          !SCCFunctions.count(Callee) &&
          // The function may be apparently dead, but if there are indirect
          // callgraph references to the node, we cannot delete it yet, this
          // could invalidate the CGSCC iterator.
          CG[Callee]->getNumReferences() == 0) {
        LLVM_DEBUG(dbgs() << "    -> Deleting dead function: "
                          << Callee->getName() << "\n");
        CallGraphNode *CalleeNode = CG[Callee];

        // Remove any call graph edges from the callee to its callees.
        CalleeNode->removeAllCalledFunctions();

        // Removing the node for callee from the call graph and delete it.
        delete CG.removeFunctionFromModule(CalleeNode);
        ++NumDeleted;
      }

      // Remove this call site from the list.  If possible, use
      // swap/pop_back for efficiency, but do not use it if doing so would
      // move a call site to a function in this SCC before the
      // 'FirstCallInSCC' barrier.
      if (SCC.isSingular()) {
        CallSites[CSi] = CallSites.back();
        CallSites.pop_back();
      } else {
        CallSites.erase(CallSites.begin() + CSi);
      }
      --CSi;

      Changed = true;
      LocalChange = true;
    }
  } while (LocalChange);

  return Changed;
}
コード例 #23
0
ファイル: Inliner.cpp プロジェクト: smibarber/llvm-cfcss
/// shouldInline - Return true if the inliner should attempt to inline
/// at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
  InlineCost IC = getInlineCost(CS);
  
  if (IC.isAlways()) {
    DEBUG(dbgs() << "    Inlining: cost=always"
          << ", Call: " << *CS.getInstruction() << "\n");
    return true;
  }
  
  if (IC.isNever()) {
    DEBUG(dbgs() << "    NOT Inlining: cost=never"
          << ", Call: " << *CS.getInstruction() << "\n");
    return false;
  }
  
  int Cost = IC.getValue();
  Function *Caller = CS.getCaller();
  int CurrentThreshold = getInlineThreshold(CS);
  float FudgeFactor = getInlineFudgeFactor(CS);
  int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
  if (Cost >= AdjThreshold) {
    DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
          << ", thres=" << AdjThreshold
          << ", Call: " << *CS.getInstruction() << "\n");
    return false;
  }
  
  // Try to detect the case where the current inlining candidate caller (call
  // it B) is a static or linkonce-ODR function and is an inlining candidate
  // elsewhere, and the current candidate callee (call it C) is large enough
  // that inlining it into B would make B too big to inline later. In these
  // circumstances it may be best not to inline C into B, but to inline B into
  // its callers.
  //
  // This only applies to static and linkonce-ODR functions because those are
  // expected to be available for inlining in the translation units where they
  // are used. Thus we will always have the opportunity to make local inlining
  // decisions. Importantly the linkonce-ODR linkage covers inline functions
  // and templates in C++.
  if (Caller->hasLocalLinkage() ||
      Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
    int TotalSecondaryCost = 0;
    bool outerCallsFound = false;
    // This bool tracks what happens if we do NOT inline C into B.
    bool callerWillBeRemoved = true;
    // This bool tracks what happens if we DO inline C into B.
    bool inliningPreventsSomeOuterInline = false;
    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
         I != E; ++I) {
      CallSite CS2(*I);

      // If this isn't a call to Caller (it could be some other sort
      // of reference) skip it.  Such references will prevent the caller
      // from being removed.
      if (!CS2 || CS2.getCalledFunction() != Caller) {
        callerWillBeRemoved = false;
        continue;
      }

      InlineCost IC2 = getInlineCost(CS2);
      if (IC2.isNever())
        callerWillBeRemoved = false;
      if (IC2.isAlways() || IC2.isNever())
        continue;

      outerCallsFound = true;
      int Cost2 = IC2.getValue();
      int CurrentThreshold2 = getInlineThreshold(CS2);
      float FudgeFactor2 = getInlineFudgeFactor(CS2);

      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
        callerWillBeRemoved = false;

      // See if we have this case.  We subtract off the penalty
      // for the call instruction, which we would be deleting.
      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
                (int)(CurrentThreshold2 * FudgeFactor2)) {
        inliningPreventsSomeOuterInline = true;
        TotalSecondaryCost += Cost2;
      }
    }
    // If all outer calls to Caller would get inlined, the cost for the last
    // one is set very low by getInlineCost, in anticipation that Caller will
    // be removed entirely.  We did not account for this above unless there
    // is only one caller of Caller.
    if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;

    if (outerCallsFound && inliningPreventsSomeOuterInline &&
        TotalSecondaryCost < Cost) {
      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
           " Cost = " << Cost << 
           ", outer Cost = " << TotalSecondaryCost << '\n');
      return false;
    }
  }

  DEBUG(dbgs() << "    Inlining: cost=" << Cost
        << ", thres=" << AdjThreshold
        << ", Call: " << *CS.getInstruction() << '\n');
  return true;
}
コード例 #24
0
ファイル: Inliner.cpp プロジェクト: bkaradzic/SwiftShader
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
                                   CGSCCUpdateResult &UR) {
  const ModuleAnalysisManager &MAM =
      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
  bool Changed = false;

  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
  Module &M = *InitialC.begin()->getFunction().getParent();
  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);

  if (!ImportedFunctionsStats &&
      InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
    ImportedFunctionsStats =
        llvm::make_unique<ImportedFunctionsInliningStatistics>();
    ImportedFunctionsStats->setModuleInfo(M);
  }

  // We use a single common worklist for calls across the entire SCC. We
  // process these in-order and append new calls introduced during inlining to
  // the end.
  //
  // Note that this particular order of processing is actually critical to
  // avoid very bad behaviors. Consider *highly connected* call graphs where
  // each function contains a small amonut of code and a couple of calls to
  // other functions. Because the LLVM inliner is fundamentally a bottom-up
  // inliner, it can handle gracefully the fact that these all appear to be
  // reasonable inlining candidates as it will flatten things until they become
  // too big to inline, and then move on and flatten another batch.
  //
  // However, when processing call edges *within* an SCC we cannot rely on this
  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
  // functions we can end up incrementally inlining N calls into each of
  // N functions because each incremental inlining decision looks good and we
  // don't have a topological ordering to prevent explosions.
  //
  // To compensate for this, we don't process transitive edges made immediate
  // by inlining until we've done one pass of inlining across the entire SCC.
  // Large, highly connected SCCs still lead to some amount of code bloat in
  // this model, but it is uniformly spread across all the functions in the SCC
  // and eventually they all become too large to inline, rather than
  // incrementally maknig a single function grow in a super linear fashion.
  SmallVector<std::pair<CallSite, int>, 16> Calls;

  FunctionAnalysisManager &FAM =
      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
          .getManager();

  // Populate the initial list of calls in this SCC.
  for (auto &N : InitialC) {
    auto &ORE =
        FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
    // We want to generally process call sites top-down in order for
    // simplifications stemming from replacing the call with the returned value
    // after inlining to be visible to subsequent inlining decisions.
    // FIXME: Using instructions sequence is a really bad way to do this.
    // Instead we should do an actual RPO walk of the function body.
    for (Instruction &I : instructions(N.getFunction()))
      if (auto CS = CallSite(&I))
        if (Function *Callee = CS.getCalledFunction()) {
          if (!Callee->isDeclaration())
            Calls.push_back({CS, -1});
          else if (!isa<IntrinsicInst>(I)) {
            using namespace ore;
            ORE.emit([&]() {
              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
                     << NV("Callee", Callee) << " will not be inlined into "
                     << NV("Caller", CS.getCaller())
                     << " because its definition is unavailable"
                     << setIsVerbose();
            });
          }
        }
  }
  if (Calls.empty())
    return PreservedAnalyses::all();

  // Capture updatable variables for the current SCC and RefSCC.
  auto *C = &InitialC;
  auto *RC = &C->getOuterRefSCC();

  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function *, int>, 16> InlineHistory;

  // Track a set vector of inlined callees so that we can augment the caller
  // with all of their edges in the call graph before pruning out the ones that
  // got simplified away.
  SmallSetVector<Function *, 4> InlinedCallees;

  // Track the dead functions to delete once finished with inlining calls. We
  // defer deleting these to make it easier to handle the call graph updates.
  SmallVector<Function *, 4> DeadFunctions;

  // Loop forward over all of the calls. Note that we cannot cache the size as
  // inlining can introduce new calls that need to be processed.
  for (int i = 0; i < (int)Calls.size(); ++i) {
    // We expect the calls to typically be batched with sequences of calls that
    // have the same caller, so we first set up some shared infrastructure for
    // this caller. We also do any pruning we can at this layer on the caller
    // alone.
    Function &F = *Calls[i].first.getCaller();
    LazyCallGraph::Node &N = *CG.lookup(F);
    if (CG.lookupSCC(N) != C)
      continue;
    if (F.hasFnAttribute(Attribute::OptimizeNone))
      continue;

    LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");

    // Get a FunctionAnalysisManager via a proxy for this particular node. We
    // do this each time we visit a node as the SCC may have changed and as
    // we're going to mutate this particular function we want to make sure the
    // proxy is in place to forward any invalidation events. We can use the
    // manager we get here for looking up results for functions other than this
    // node however because those functions aren't going to be mutated by this
    // pass.
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
            .getManager();

    // Get the remarks emission analysis for the caller.
    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);

    std::function<AssumptionCache &(Function &)> GetAssumptionCache =
        [&](Function &F) -> AssumptionCache & {
      return FAM.getResult<AssumptionAnalysis>(F);
    };
    auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
      return FAM.getResult<BlockFrequencyAnalysis>(F);
    };

    auto GetInlineCost = [&](CallSite CS) {
      Function &Callee = *CS.getCalledFunction();
      auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
      return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
                           PSI, &ORE);
    };

    // Now process as many calls as we have within this caller in the sequnece.
    // We bail out as soon as the caller has to change so we can update the
    // call graph and prepare the context of that new caller.
    bool DidInline = false;
    for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
      int InlineHistoryID;
      CallSite CS;
      std::tie(CS, InlineHistoryID) = Calls[i];
      Function &Callee = *CS.getCalledFunction();

      if (InlineHistoryID != -1 &&
          InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
        continue;

      // Check if this inlining may repeat breaking an SCC apart that has
      // already been split once before. In that case, inlining here may
      // trigger infinite inlining, much like is prevented within the inliner
      // itself by the InlineHistory above, but spread across CGSCC iterations
      // and thus hidden from the full inline history.
      if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
          UR.InlinedInternalEdges.count({&N, C})) {
        LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
                             "previously split out of this SCC by inlining: "
                          << F.getName() << " -> " << Callee.getName() << "\n");
        continue;
      }

      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
      // Check whether we want to inline this callsite.
      if (!OIC)
        continue;

      // Setup the data structure used to plumb customization into the
      // `InlineFunction` routine.
      InlineFunctionInfo IFI(
          /*cg=*/nullptr, &GetAssumptionCache, PSI,
          &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
          &FAM.getResult<BlockFrequencyAnalysis>(Callee));

      // Get DebugLoc to report. CS will be invalid after Inliner.
      DebugLoc DLoc = CS->getDebugLoc();
      BasicBlock *Block = CS.getParent();

      using namespace ore;

      if (!InlineFunction(CS, IFI)) {
        ORE.emit([&]() {
          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
                 << NV("Callee", &Callee) << " will not be inlined into "
                 << NV("Caller", &F);
        });
        continue;
      }
      DidInline = true;
      InlinedCallees.insert(&Callee);

      ORE.emit([&]() {
        bool AlwaysInline = OIC->isAlways();
        StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
        OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
        R << NV("Callee", &Callee) << " inlined into ";
        R << NV("Caller", &F);
        if (AlwaysInline)
          R << " with cost=always";
        else {
          R << " with cost=" << NV("Cost", OIC->getCost());
          R << " (threshold=" << NV("Threshold", OIC->getThreshold());
          R << ")";
        }
        return R;
      });

      // Add any new callsites to defined functions to the worklist.
      if (!IFI.InlinedCallSites.empty()) {
        int NewHistoryID = InlineHistory.size();
        InlineHistory.push_back({&Callee, InlineHistoryID});
        for (CallSite &CS : reverse(IFI.InlinedCallSites))
          if (Function *NewCallee = CS.getCalledFunction())
            if (!NewCallee->isDeclaration())
              Calls.push_back({CS, NewHistoryID});
      }

      if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
        ImportedFunctionsStats->recordInline(F, Callee);

      // Merge the attributes based on the inlining.
      AttributeFuncs::mergeAttributesForInlining(F, Callee);

      // For local functions, check whether this makes the callee trivially
      // dead. In that case, we can drop the body of the function eagerly
      // which may reduce the number of callers of other functions to one,
      // changing inline cost thresholds.
      if (Callee.hasLocalLinkage()) {
        // To check this we also need to nuke any dead constant uses (perhaps
        // made dead by this operation on other functions).
        Callee.removeDeadConstantUsers();
        if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
          Calls.erase(
              std::remove_if(Calls.begin() + i + 1, Calls.end(),
                             [&Callee](const std::pair<CallSite, int> &Call) {
                               return Call.first.getCaller() == &Callee;
                             }),
              Calls.end());
          // Clear the body and queue the function itself for deletion when we
          // finish inlining and call graph updates.
          // Note that after this point, it is an error to do anything other
          // than use the callee's address or delete it.
          Callee.dropAllReferences();
          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
                 "Cannot put cause a function to become dead twice!");
          DeadFunctions.push_back(&Callee);
        }
      }
    }

    // Back the call index up by one to put us in a good position to go around
    // the outer loop.
    --i;

    if (!DidInline)
      continue;
    Changed = true;

    // Add all the inlined callees' edges as ref edges to the caller. These are
    // by definition trivial edges as we always have *some* transitive ref edge
    // chain. While in some cases these edges are direct calls inside the
    // callee, they have to be modeled in the inliner as reference edges as
    // there may be a reference edge anywhere along the chain from the current
    // caller to the callee that causes the whole thing to appear like
    // a (transitive) reference edge that will require promotion to a call edge
    // below.
    for (Function *InlinedCallee : InlinedCallees) {
      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
      for (LazyCallGraph::Edge &E : *CalleeN)
        RC->insertTrivialRefEdge(N, E.getNode());
    }

    // At this point, since we have made changes we have at least removed
    // a call instruction. However, in the process we do some incremental
    // simplification of the surrounding code. This simplification can
    // essentially do all of the same things as a function pass and we can
    // re-use the exact same logic for updating the call graph to reflect the
    // change.
    LazyCallGraph::SCC *OldC = C;
    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
    LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
    RC = &C->getOuterRefSCC();

    // If this causes an SCC to split apart into multiple smaller SCCs, there
    // is a subtle risk we need to prepare for. Other transformations may
    // expose an "infinite inlining" opportunity later, and because of the SCC
    // mutation, we will revisit this function and potentially re-inline. If we
    // do, and that re-inlining also has the potentially to mutate the SCC
    // structure, the infinite inlining problem can manifest through infinite
    // SCC splits and merges. To avoid this, we capture the originating caller
    // node and the SCC containing the call edge. This is a slight over
    // approximation of the possible inlining decisions that must be avoided,
    // but is relatively efficient to store.
    // FIXME: This seems like a very heavyweight way of retaining the inline
    // history, we should look for a more efficient way of tracking it.
    if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
          return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
        })) {
      LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
                           "retaining this to avoid infinite inlining.\n");
      UR.InlinedInternalEdges.insert({&N, OldC});
    }
    InlinedCallees.clear();
  }

  // Now that we've finished inlining all of the calls across this SCC, delete
  // all of the trivially dead functions, updating the call graph and the CGSCC
  // pass manager in the process.
  //
  // Note that this walks a pointer set which has non-deterministic order but
  // that is OK as all we do is delete things and add pointers to unordered
  // sets.
  for (Function *DeadF : DeadFunctions) {
    // Get the necessary information out of the call graph and nuke the
    // function there. Also, cclear out any cached analyses.
    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
            .getManager();
    FAM.clear(*DeadF, DeadF->getName());
    AM.clear(DeadC, DeadC.getName());
    auto &DeadRC = DeadC.getOuterRefSCC();
    CG.removeDeadFunction(*DeadF);

    // Mark the relevant parts of the call graph as invalid so we don't visit
    // them.
    UR.InvalidatedSCCs.insert(&DeadC);
    UR.InvalidatedRefSCCs.insert(&DeadRC);

    // And delete the actual function from the module.
    M.getFunctionList().erase(DeadF);
  }

  if (!Changed)
    return PreservedAnalyses::all();

  // Even if we change the IR, we update the core CGSCC data structures and so
  // can preserve the proxy to the function analysis manager.
  PreservedAnalyses PA;
  PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
  return PA;
}
コード例 #25
0
ファイル: PartialInlining.cpp プロジェクト: CTSRD-SOAAP/llvm
bool PartialInlinerImpl::shouldPartialInline(
    CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
    OptimizationRemarkEmitter &ORE) {

  using namespace ore;
  if (SkipCostAnalysis)
    return true;

  Instruction *Call = CS.getInstruction();
  Function *Callee = CS.getCalledFunction();
  assert(Callee == Cloner.ClonedFunc);

  Function *Caller = CS.getCaller();
  auto &CalleeTTI = (*GetTTI)(*Callee);
  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
                                *GetAssumptionCache, GetBFI, PSI, &ORE);

  if (IC.isAlways()) {
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
             << NV("Callee", Cloner.OrigFunc)
             << " should always be fully inlined, not partially");
    return false;
  }

  if (IC.isNever()) {
    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
             << NV("Caller", Caller)
             << " because it should never be inlined (cost=never)");
    return false;
  }

  if (!IC) {
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
             << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
             << NV("Caller", Caller) << " because too costly to inline (cost="
             << NV("Cost", IC.getCost()) << ", threshold="
             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
    return false;
  }
  const DataLayout &DL = Caller->getParent()->getDataLayout();

  // The savings of eliminating the call:
  int NonWeightedSavings = getCallsiteCost(CS, DL);
  BlockFrequency NormWeightedSavings(NonWeightedSavings);

  // Weighted saving is smaller than weighted cost, return false
  if (NormWeightedSavings < WeightedOutliningRcost) {
    ORE.emit(
        OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
        << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
        << NV("Caller", Caller) << " runtime overhead (overhead="
        << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
        << ", savings="
        << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
        << " of making the outlined call is too high");

    return false;
  }

  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
           << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
           << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
           << " (threshold="
           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
  return true;
}
コード例 #26
0
ファイル: PartialInlining.cpp プロジェクト: CTSRD-SOAAP/llvm
bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
  int NonWeightedRcost;
  int SizeCost;

  if (Cloner.OutlinedFunc == nullptr)
    return false;

  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);

  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
  auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;

  // The call sequence to the outlined function is larger than the original
  // outlined region size, it does not increase the chances of inlining
  // the function with outlining (The inliner uses the size increase to
  // model the cost of inlining a callee).
  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
    OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
    DebugLoc DLoc;
    BasicBlock *Block;
    std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
                                        DLoc, Block)
             << ore::NV("Function", Cloner.OrigFunc)
             << " not partially inlined into callers (Original Size = "
             << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
             << ", Size of call sequence to outlined function = "
             << ore::NV("NewSize", SizeCost) << ")");
    return false;
  }

  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
         "F's users should all be replaced!");

  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
                            Cloner.ClonedFunc->user_end());

  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
  if (Cloner.OrigFunc->getEntryCount())
    computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);

  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
  uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);

  bool AnyInline = false;
  for (User *User : Users) {
    CallSite CS = getCallSite(User);

    if (IsLimitReached())
      continue;

    OptimizationRemarkEmitter ORE(CS.getCaller());

    if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
      continue;

    ORE.emit(
        OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
        << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
        << ore::NV("Caller", CS.getCaller()));

    InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
    InlineFunction(CS, IFI);

    // Now update the entry count:
    if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
      uint64_t CallSiteCount = CallSiteToProfCountMap[User];
      CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
    }

    AnyInline = true;
    NumPartialInlining++;
    // Update the stats
    NumPartialInlined++;
  }

  if (AnyInline) {
    Cloner.IsFunctionInlined = true;
    if (CalleeEntryCount)
      Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
  }

  return AnyInline;
}
コード例 #27
0
ファイル: Inliner.cpp プロジェクト: bryant/llvm
/// Return true if the inliner should attempt to inline at the given CallSite.
static bool shouldInline(CallSite CS,
                         function_ref<InlineCost(CallSite CS)> GetInlineCost,
                         OptimizationRemarkEmitter &ORE) {
  using namespace ore;
  InlineCost IC = GetInlineCost(CS);
  Instruction *Call = CS.getInstruction();
  Function *Callee = CS.getCalledFunction();

  if (IC.isAlways()) {
    DEBUG(dbgs() << "    Inlining: cost=always"
                 << ", Call: " << *CS.getInstruction() << "\n");
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
             << NV("Callee", Callee)
             << " should always be inlined (cost=always)");
    return true;
  }

  if (IC.isNever()) {
    DEBUG(dbgs() << "    NOT Inlining: cost=never"
                 << ", Call: " << *CS.getInstruction() << "\n");
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
             << NV("Callee", Callee)
             << " should never be inlined (cost=never)");
    return false;
  }

  Function *Caller = CS.getCaller();
  if (!IC) {
    DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
                 << ", thres=" << (IC.getCostDelta() + IC.getCost())
                 << ", Call: " << *CS.getInstruction() << "\n");
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
             << NV("Callee", Callee) << " too costly to inline (cost="
             << NV("Cost", IC.getCost()) << ", threshold="
             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
    return false;
  }

  int TotalSecondaryCost = 0;
  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
    DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
                 << " Cost = " << IC.getCost()
                 << ", outer Cost = " << TotalSecondaryCost << '\n');
    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
                                        "IncreaseCostInOtherContexts", Call)
             << "Not inlining. Cost of inlining " << NV("Callee", Callee)
             << " increases the cost of inlining " << NV("Caller", Caller)
             << " in other contexts");
    return false;
  }

  DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
               << ", thres=" << (IC.getCostDelta() + IC.getCost())
               << ", Call: " << *CS.getInstruction() << '\n');
  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call)
           << NV("Callee", Callee) << " can be inlined into "
           << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
           << " (threshold="
           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
  return true;
}