Exemple #1
0
void SILPerformanceInliner::collectAppliesToInline(
    SILFunction *Caller, SmallVectorImpl<FullApplySite> &Applies,
    DominanceAnalysis *DA, SILLoopAnalysis *LA) {
  DominanceInfo *DT = DA->get(Caller);
  SILLoopInfo *LI = LA->get(Caller);

  ConstantTracker constTracker(Caller);
  DominanceOrder domOrder(&Caller->front(), DT, Caller->size());

  // Go through all instructions and find candidates for inlining.
  // We do this in dominance order for the constTracker.
  SmallVector<FullApplySite, 8> InitialCandidates;
  while (SILBasicBlock *block = domOrder.getNext()) {
    constTracker.beginBlock();
    unsigned loopDepth = LI->getLoopDepth(block);
    for (auto I = block->begin(), E = block->end(); I != E; ++I) {
      constTracker.trackInst(&*I);

      if (!FullApplySite::isa(&*I))
        continue;

      FullApplySite AI = FullApplySite(&*I);

      DEBUG(llvm::dbgs() << "    Check:" << *I);

      auto *Callee = getEligibleFunction(AI);
      if (Callee) {
        if (isProfitableToInline(AI, loopDepth, DA, LA, constTracker))
          InitialCandidates.push_back(AI);
      }
    }
    domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) {
      if (ColdBlockInfo::isSlowPath(block, child)) {
        // Handle cold blocks separately.
        visitColdBlocks(InitialCandidates, child, DT);
        return false;
      }
      return true;
    });
  }

  // Calculate how many times a callee is called from this caller.
  llvm::DenseMap<SILFunction *, unsigned> CalleeCount;
  for (auto AI : InitialCandidates) {
    SILFunction *Callee = AI.getCalleeFunction();
    assert(Callee && "apply_inst does not have a direct callee anymore");
    CalleeCount[Callee]++;
  }

  // Now copy each candidate callee that has a small enough number of
  // call sites into the final set of call sites.
  for (auto AI : InitialCandidates) {
    SILFunction *Callee = AI.getCalleeFunction();
    assert(Callee && "apply_inst does not have a direct callee anymore");

    const unsigned CallsToCalleeThreshold = 1024;
    if (CalleeCount[Callee] <= CallsToCalleeThreshold)
      Applies.push_back(AI);
  }
}
/// Return true if inlining this call site is profitable.
bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
                                              unsigned loopDepthOfAI,
                                              DominanceAnalysis *DA,
                                              SILLoopAnalysis *LA,
                                              ConstantTracker &callerTracker,
                                              unsigned &NumCallerBlocks) {
  SILFunction *Callee = AI.getCalleeFunction();
  
  if (Callee->getInlineStrategy() == AlwaysInline)
    return true;
  
  ConstantTracker constTracker(Callee, &callerTracker, AI);
  
  DominanceInfo *DT = DA->get(Callee);
  SILLoopInfo *LI = LA->get(Callee);

  DominanceOrder domOrder(&Callee->front(), DT, Callee->size());
  
  // Calculate the inlining cost of the callee.
  unsigned CalleeCost = 0;
  unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold :
                                               RemovedCallBenefit;
  Benefit += loopDepthOfAI * LoopBenefitFactor;
  int testThreshold = TestThreshold;

  while (SILBasicBlock *block = domOrder.getNext()) {
    constTracker.beginBlock();
    unsigned loopDepth = LI->getLoopDepth(block);
    for (SILInstruction &I : *block) {
      constTracker.trackInst(&I);
      
      auto ICost = instructionInlineCost(I);
      
      if (testThreshold >= 0) {
        // We are in test-mode: use a simplified cost model.
        CalleeCost += testCost(&I);
      } else {
        // Use the regular cost model.
        CalleeCost += unsigned(ICost);
      }
      
      if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) {
        
        // Check if the callee is passed as an argument. If so, increase the
        // threshold, because inlining will (probably) eliminate the closure.
        SILInstruction *def = constTracker.getDefInCaller(AI->getCallee());
        if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) {

          DEBUG(llvm::dbgs() << "        Boost: apply const function at"
                             << *AI);
          Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor;
          testThreshold *= 2;
        }
      }
    }
    // Don't count costs in blocks which are dead after inlining.
    SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(),
                                              constTracker);
    if (takenBlock) {
      Benefit += ConstTerminatorBenefit + TestOpt;
      DEBUG(llvm::dbgs() << "      Take bb" << takenBlock->getDebugID() <<
            " of" << *block->getTerminator());
      domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) {
        return child->getSinglePredecessor() != block || child == takenBlock;
      });
    } else {
      domOrder.pushChildren(block);
    }
  }

  unsigned Threshold = Benefit; // The default.
  if (testThreshold >= 0) {
    // We are in testing mode.
    Threshold = testThreshold;
  } else if (AI.getFunction()->isThunk()) {
    // Only inline trivial functions into thunks (which will not increase the
    // code size).
    Threshold = TrivialFunctionThreshold;
  } else {
    // The default case.
    // We reduce the benefit if the caller is too large. For this we use a
    // cubic function on the number of caller blocks. This starts to prevent
    // inlining at about 800 - 1000 caller blocks.
    unsigned blockMinus =
      (NumCallerBlocks * NumCallerBlocks) / BlockLimitDenominator *
                          NumCallerBlocks / BlockLimitDenominator;
    if (Threshold > blockMinus + TrivialFunctionThreshold)
      Threshold -= blockMinus;
    else
      Threshold = TrivialFunctionThreshold;
  }

  if (CalleeCost > Threshold) {
    DEBUG(llvm::dbgs() << "        NO: Function too big to inline, "
          "cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
    return false;
  }
  DEBUG(llvm::dbgs() << "        YES: ready to inline, "
        "cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
  NumCallerBlocks += Callee->size();
  return true;
}
Exemple #3
0
bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
                                                 Weight CallerWeight,
                                                 ConstantTracker &callerTracker,
                                                 int &NumCallerBlocks,
                                                 bool IsGeneric) {
  SILFunction *Callee = AI.getReferencedFunction();
  SILLoopInfo *LI = LA->get(Callee);
  ShortestPathAnalysis *SPA = getSPA(Callee, LI);
  assert(SPA->isValid());

  ConstantTracker constTracker(Callee, &callerTracker, AI);
  DominanceInfo *DT = DA->get(Callee);
  SILBasicBlock *CalleeEntry = &Callee->front();
  DominanceOrder domOrder(CalleeEntry, DT, Callee->size());

  // Calculate the inlining cost of the callee.
  int CalleeCost = 0;
  int Benefit = 0;
  
  // Start with a base benefit.
  int BaseBenefit = RemovedCallBenefit;
  const SILOptions &Opts = Callee->getModule().getOptions();
  
  // For some reason -Ounchecked can accept a higher base benefit without
  // increasing the code size too much.
  if (Opts.Optimization == SILOptions::SILOptMode::OptimizeUnchecked)
    BaseBenefit *= 2;

  CallerWeight.updateBenefit(Benefit, BaseBenefit);

  // Go through all blocks of the function, accumulate the cost and find
  // benefits.
  while (SILBasicBlock *block = domOrder.getNext()) {
    constTracker.beginBlock();
    Weight BlockW = SPA->getWeight(block, CallerWeight);

    for (SILInstruction &I : *block) {
      constTracker.trackInst(&I);
      
      CalleeCost += (int)instructionInlineCost(I);

      if (FullApplySite AI = FullApplySite::isa(&I)) {
        
        // Check if the callee is passed as an argument. If so, increase the
        // threshold, because inlining will (probably) eliminate the closure.
        SILInstruction *def = constTracker.getDefInCaller(AI.getCallee());
        if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def)))
          BlockW.updateBenefit(Benefit, RemovedClosureBenefit);
      } else if (auto *LI = dyn_cast<LoadInst>(&I)) {
        // Check if it's a load from a stack location in the caller. Such a load
        // might be optimized away if inlined.
        if (constTracker.isStackAddrInCaller(LI->getOperand()))
          BlockW.updateBenefit(Benefit, RemovedLoadBenefit);
      } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
        // Check if it's a store to a stack location in the caller. Such a load
        // might be optimized away if inlined.
        if (constTracker.isStackAddrInCaller(SI->getDest()))
          BlockW.updateBenefit(Benefit, RemovedStoreBenefit);
      } else if (isa<StrongReleaseInst>(&I) || isa<ReleaseValueInst>(&I)) {
        SILValue Op = stripCasts(I.getOperand(0));
        if (SILArgument *Arg = dyn_cast<SILArgument>(Op)) {
          if (Arg->isFunctionArg() && Arg->getArgumentConvention() ==
              SILArgumentConvention::Direct_Guaranteed) {
            BlockW.updateBenefit(Benefit, RefCountBenefit);
          }
        }
      } else if (auto *BI = dyn_cast<BuiltinInst>(&I)) {
        if (BI->getBuiltinInfo().ID == BuiltinValueKind::OnFastPath)
          BlockW.updateBenefit(Benefit, FastPathBuiltinBenefit);
      }
    }
    // Don't count costs in blocks which are dead after inlining.
    SILBasicBlock *takenBlock = constTracker.getTakenBlock(block->getTerminator());
    if (takenBlock) {
      BlockW.updateBenefit(Benefit, RemovedTerminatorBenefit);
      domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) {
        return child->getSinglePredecessor() != block || child == takenBlock;
      });
    } else {
      domOrder.pushChildren(block);
    }
  }

  if (AI.getFunction()->isThunk()) {
    // Only inline trivial functions into thunks (which will not increase the
    // code size).
    if (CalleeCost > TrivialFunctionThreshold)
      return false;

    DEBUG(
      
      dumpCaller(AI.getFunction());
      llvm::dbgs() << "    decision {" << CalleeCost << " into thunk} " <<
          Callee->getName() << '\n';
    );
    return true;
  }