コード例 #1
0
ファイル: BlockFrequencyInfo.cpp プロジェクト: JaredCJR/llvm
BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
                                               FunctionAnalysisManager &AM) {
  BlockFrequencyInfo BFI;
  BFI.calculate(F, AM.getResult<BranchProbabilityAnalysis>(F),
                AM.getResult<LoopAnalysis>(F));
  return BFI;
}
コード例 #2
0
ファイル: LoopSink.cpp プロジェクト: bryant/llvm
/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
///   size increase. Thus there is no need to adjust the frequency.
/// * If there are more than one BB, sinking would lead to code size increase.
///   In this case, we add some "tax" to the total frequency to make it harder
///   to sink. E.g.
///     Freq(Preheader) = 100
///     Freq(BBs) = sum(50, 49) = 99
///   Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to
///   BBs as the difference is too small to justify the code size increase.
///   To model this, The adjusted Freq(BBs) will be:
///     AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
                                      BlockFrequencyInfo &BFI) {
  BlockFrequency T = 0;
  for (BasicBlock *B : BBs)
    T += BFI.getBlockFreq(B);
  if (BBs.size() > 1)
    T /= BranchProbability(SinkFrequencyPercentThreshold, 100);
  return T;
}
コード例 #3
0
ファイル: LoopSink.cpp プロジェクト: bryant/llvm
/// Return a set of basic blocks to insert sinked instructions.
///
/// The returned set of basic blocks (BBsToSinkInto) should satisfy:
///
/// * Inside the loop \p L
/// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
///   that domintates the UseBB
/// * Has minimum total frequency that is no greater than preheader frequency
///
/// The purpose of the function is to find the optimal sinking points to
/// minimize execution cost, which is defined as "sum of frequency of
/// BBsToSinkInto".
/// As a result, the returned BBsToSinkInto needs to have minimum total
/// frequency.
/// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
/// frequency, the optimal solution is not sinking (return empty set).
///
/// \p ColdLoopBBs is used to help find the optimal sinking locations.
/// It stores a list of BBs that is:
///
/// * Inside the loop \p L
/// * Has a frequency no larger than the loop's preheader
/// * Sorted by BB frequency
///
/// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
/// To avoid expensive computation, we cap the maximum UseBBs.size() in its
/// caller.
static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
                  const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                  DominatorTree &DT, BlockFrequencyInfo &BFI) {
  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
  if (UseBBs.size() == 0)
    return BBsToSinkInto;

  BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
  SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;

  // For every iteration:
  //   * Pick the ColdestBB from ColdLoopBBs
  //   * Find the set BBsDominatedByColdestBB that satisfy:
  //     - BBsDominatedByColdestBB is a subset of BBsToSinkInto
  //     - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
  //   * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
  //     BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
  //     BBsToSinkInto
  for (BasicBlock *ColdestBB : ColdLoopBBs) {
    BBsDominatedByColdestBB.clear();
    for (BasicBlock *SinkedBB : BBsToSinkInto)
      if (DT.dominates(ColdestBB, SinkedBB))
        BBsDominatedByColdestBB.insert(SinkedBB);
    if (BBsDominatedByColdestBB.size() == 0)
      continue;
    if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
        BFI.getBlockFreq(ColdestBB)) {
      for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
        BBsToSinkInto.erase(DominatedBB);
      }
      BBsToSinkInto.insert(ColdestBB);
    }
  }

  // If the total frequency of BBsToSinkInto is larger than preheader frequency,
  // do not sink.
  if (adjustedSumFreq(BBsToSinkInto, BFI) >
      BFI.getBlockFreq(L.getLoopPreheader()))
    BBsToSinkInto.clear();
  return BBsToSinkInto;
}
コード例 #4
0
ファイル: PartialInlining.cpp プロジェクト: CTSRD-SOAAP/llvm
// Create the callsite to profile count map which is
// used to update the original function's entry count,
// after the function is partially inlined into the callsite.
void PartialInlinerImpl::computeCallsiteToProfCountMap(
    Function *DuplicateFunction,
    DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
  std::vector<User *> Users(DuplicateFunction->user_begin(),
                            DuplicateFunction->user_end());
  Function *CurrentCaller = nullptr;
  std::unique_ptr<BlockFrequencyInfo> TempBFI;
  BlockFrequencyInfo *CurrentCallerBFI = nullptr;

  auto ComputeCurrBFI = [&,this](Function *Caller) {
      // For the old pass manager:
      if (!GetBFI) {
        DominatorTree DT(*Caller);
        LoopInfo LI(DT);
        BranchProbabilityInfo BPI(*Caller, LI);
        TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
        CurrentCallerBFI = TempBFI.get();
      } else {
        // New pass manager:
        CurrentCallerBFI = &(*GetBFI)(*Caller);
      }
  };

  for (User *User : Users) {
    CallSite CS = getCallSite(User);
    Function *Caller = CS.getCaller();
    if (CurrentCaller != Caller) {
      CurrentCaller = Caller;
      ComputeCurrBFI(Caller);
    } else {
      assert(CurrentCallerBFI && "CallerBFI is not set");
    }
    BasicBlock *CallBB = CS.getInstruction()->getParent();
    auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
    if (Count)
      CallSiteToProfCountMap[User] = *Count;
    else
      CallSiteToProfCountMap[User] = 0;
  }
}
コード例 #5
0
/// Sinks instructions from loop's preheader to the loop body if the
/// sum frequency of inserted copy is smaller than preheader's frequency.
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
        DominatorTree &DT,
        BlockFrequencyInfo &BFI,
        ScalarEvolution *SE) {
    BasicBlock *Preheader = L.getLoopPreheader();
    if (!Preheader)
        return false;

    // Enable LoopSink only when runtime profile is available.
    // With static profile, the sinking decision may be sub-optimal.
    if (!Preheader->getParent()->getEntryCount())
        return false;

    const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
    // If there are no basic blocks with lower frequency than the preheader then
    // we can avoid the detailed analysis as we will never find profitable sinking
    // opportunities.
    if (all_of(L.blocks(), [&](const BasicBlock *BB) {
    return BFI.getBlockFreq(BB) > PreheaderFreq;
    }))
    return false;

    bool Changed = false;
    AliasSetTracker CurAST(AA);

    // Compute alias set.
    for (BasicBlock *BB : L.blocks())
        CurAST.add(*BB);

    // Sort loop's basic blocks by frequency
    SmallVector<BasicBlock *, 10> ColdLoopBBs;
    SmallDenseMap<BasicBlock *, int, 16> LoopBlockNumber;
    int i = 0;
    for (BasicBlock *B : L.blocks())
        if (BFI.getBlockFreq(B) < BFI.getBlockFreq(L.getLoopPreheader())) {
            ColdLoopBBs.push_back(B);
            LoopBlockNumber[B] = ++i;
        }
    std::stable_sort(ColdLoopBBs.begin(), ColdLoopBBs.end(),
    [&](BasicBlock *A, BasicBlock *B) {
        return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
    });

    // Traverse preheader's instructions in reverse order becaue if A depends
    // on B (A appears after B), A needs to be sinked first before B can be
    // sinked.
    for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
        Instruction *I = &*II++;
        if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
            continue;
        if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
            Changed = true;
    }

    if (Changed && SE)
        SE->forgetLoopDispositions(&L);
    return Changed;
}
コード例 #6
0
ファイル: ConstantHoisting.cpp プロジェクト: jvesely/llvm
/// Given \p BBs as input, find another set of BBs which collectively
/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
/// set found in \p BBs.
static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
                                 BasicBlock *Entry,
                                 SmallPtrSet<BasicBlock *, 8> &BBs) {
  assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
  // Nodes on the current path to the root.
  SmallPtrSet<BasicBlock *, 8> Path;
  // Candidates includes any block 'BB' in set 'BBs' that is not strictly
  // dominated by any other blocks in set 'BBs', and all nodes in the path
  // in the dominator tree from Entry to 'BB'.
  SmallPtrSet<BasicBlock *, 16> Candidates;
  for (auto BB : BBs) {
    // Ignore unreachable basic blocks.
    if (!DT.isReachableFromEntry(BB))
      continue;
    Path.clear();
    // Walk up the dominator tree until Entry or another BB in BBs
    // is reached. Insert the nodes on the way to the Path.
    BasicBlock *Node = BB;
    // The "Path" is a candidate path to be added into Candidates set.
    bool isCandidate = false;
    do {
      Path.insert(Node);
      if (Node == Entry || Candidates.count(Node)) {
        isCandidate = true;
        break;
      }
      assert(DT.getNode(Node)->getIDom() &&
             "Entry doens't dominate current Node");
      Node = DT.getNode(Node)->getIDom()->getBlock();
    } while (!BBs.count(Node));

    // If isCandidate is false, Node is another Block in BBs dominating
    // current 'BB'. Drop the nodes on the Path.
    if (!isCandidate)
      continue;

    // Add nodes on the Path into Candidates.
    Candidates.insert(Path.begin(), Path.end());
  }

  // Sort the nodes in Candidates in top-down order and save the nodes
  // in Orders.
  unsigned Idx = 0;
  SmallVector<BasicBlock *, 16> Orders;
  Orders.push_back(Entry);
  while (Idx != Orders.size()) {
    BasicBlock *Node = Orders[Idx++];
    for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
      if (Candidates.count(ChildDomNode->getBlock()))
        Orders.push_back(ChildDomNode->getBlock());
    }
  }

  // Visit Orders in bottom-up order.
  using InsertPtsCostPair =
      std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>;

  // InsertPtsMap is a map from a BB to the best insertion points for the
  // subtree of BB (subtree not including the BB itself).
  DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
  InsertPtsMap.reserve(Orders.size() + 1);
  for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
    BasicBlock *Node = *RIt;
    bool NodeInBBs = BBs.count(Node);
    SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
    BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;

    // Return the optimal insert points in BBs.
    if (Node == Entry) {
      BBs.clear();
      if (InsertPtsFreq > BFI.getBlockFreq(Node) ||
          (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1))
        BBs.insert(Entry);
      else
        BBs.insert(InsertPts.begin(), InsertPts.end());
      break;
    }

    BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
    // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
    // will update its parent's ParentInsertPts and ParentPtsFreq.
    SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
    BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
    // Choose to insert in Node or in subtree of Node.
    // Don't hoist to EHPad because we may not find a proper place to insert
    // in EHPad.
    // If the total frequency of InsertPts is the same as the frequency of the
    // target Node, and InsertPts contains more than one nodes, choose hoisting
    // to reduce code size.
    if (NodeInBBs ||
        (!Node->isEHPad() &&
         (InsertPtsFreq > BFI.getBlockFreq(Node) ||
          (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) {
      ParentInsertPts.insert(Node);
      ParentPtsFreq += BFI.getBlockFreq(Node);
    } else {
      ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
      ParentPtsFreq += InsertPtsFreq;
    }
  }
}