Esempio n. 1
0
/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
///   size increase. Thus there is no need to adjust the frequency.
/// * If there are more than one BB, sinking would lead to code size increase.
///   In this case, we add some "tax" to the total frequency to make it harder
///   to sink. E.g.
///     Freq(Preheader) = 100
///     Freq(BBs) = sum(50, 49) = 99
///   Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to
///   BBs as the difference is too small to justify the code size increase.
///   To model this, The adjusted Freq(BBs) will be:
///     AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
                                      BlockFrequencyInfo &BFI) {
  BlockFrequency T = 0;
  for (BasicBlock *B : BBs)
    T += BFI.getBlockFreq(B);
  if (BBs.size() > 1)
    T /= BranchProbability(SinkFrequencyPercentThreshold, 100);
  return T;
}
Esempio n. 2
0
/// Merge an autorelease with a retain into a fused call.
bool ObjCARCContract::contractAutorelease(
    Function &F, Instruction *Autorelease, ARCInstKind Class,
    SmallPtrSetImpl<Instruction *> &DependingInstructions,
    SmallPtrSetImpl<const BasicBlock *> &Visited) {
  const Value *Arg = GetArgRCIdentityRoot(Autorelease);

  // Check that there are no instructions between the retain and the autorelease
  // (such as an autorelease_pop) which may change the count.
  CallInst *Retain = nullptr;
  if (Class == ARCInstKind::AutoreleaseRV)
    FindDependencies(RetainAutoreleaseRVDep, Arg,
                     Autorelease->getParent(), Autorelease,
                     DependingInstructions, Visited, PA);
  else
    FindDependencies(RetainAutoreleaseDep, Arg,
                     Autorelease->getParent(), Autorelease,
                     DependingInstructions, Visited, PA);

  Visited.clear();
  if (DependingInstructions.size() != 1) {
    DependingInstructions.clear();
    return false;
  }

  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
  DependingInstructions.clear();

  if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain ||
      GetArgRCIdentityRoot(Retain) != Arg)
    return false;

  Changed = true;
  ++NumPeeps;

  LLVM_DEBUG(dbgs() << "    Fusing retain/autorelease!\n"
                       "        Autorelease:"
                    << *Autorelease
                    << "\n"
                       "        Retain: "
                    << *Retain << "\n");

  Function *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
                              ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
                              : ARCRuntimeEntryPointKind::RetainAutorelease);
  Retain->setCalledFunction(Decl);

  LLVM_DEBUG(dbgs() << "        New RetainAutorelease: " << *Retain << "\n");

  EraseInstruction(Autorelease);
  return true;
}
Esempio n. 3
0
/// Return a set of basic blocks to insert sinked instructions.
///
/// The returned set of basic blocks (BBsToSinkInto) should satisfy:
///
/// * Inside the loop \p L
/// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
///   that domintates the UseBB
/// * Has minimum total frequency that is no greater than preheader frequency
///
/// The purpose of the function is to find the optimal sinking points to
/// minimize execution cost, which is defined as "sum of frequency of
/// BBsToSinkInto".
/// As a result, the returned BBsToSinkInto needs to have minimum total
/// frequency.
/// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
/// frequency, the optimal solution is not sinking (return empty set).
///
/// \p ColdLoopBBs is used to help find the optimal sinking locations.
/// It stores a list of BBs that is:
///
/// * Inside the loop \p L
/// * Has a frequency no larger than the loop's preheader
/// * Sorted by BB frequency
///
/// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
/// To avoid expensive computation, we cap the maximum UseBBs.size() in its
/// caller.
static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
                  const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                  DominatorTree &DT, BlockFrequencyInfo &BFI) {
  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
  if (UseBBs.size() == 0)
    return BBsToSinkInto;

  BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
  SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;

  // For every iteration:
  //   * Pick the ColdestBB from ColdLoopBBs
  //   * Find the set BBsDominatedByColdestBB that satisfy:
  //     - BBsDominatedByColdestBB is a subset of BBsToSinkInto
  //     - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
  //   * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
  //     BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
  //     BBsToSinkInto
  for (BasicBlock *ColdestBB : ColdLoopBBs) {
    BBsDominatedByColdestBB.clear();
    for (BasicBlock *SinkedBB : BBsToSinkInto)
      if (DT.dominates(ColdestBB, SinkedBB))
        BBsDominatedByColdestBB.insert(SinkedBB);
    if (BBsDominatedByColdestBB.size() == 0)
      continue;
    if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
        BFI.getBlockFreq(ColdestBB)) {
      for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
        BBsToSinkInto.erase(DominatedBB);
      }
      BBsToSinkInto.insert(ColdestBB);
    }
  }

  // If the total frequency of BBsToSinkInto is larger than preheader frequency,
  // do not sink.
  if (adjustedSumFreq(BBsToSinkInto, BFI) >
      BFI.getBlockFreq(L.getLoopPreheader()))
    BBsToSinkInto.clear();
  return BBsToSinkInto;
}