/// Return adjusted total frequency of \p BBs. /// /// * If there is only one BB, sinking instruction will not introduce code /// size increase. Thus there is no need to adjust the frequency. /// * If there are more than one BB, sinking would lead to code size increase. /// In this case, we add some "tax" to the total frequency to make it harder /// to sink. E.g. /// Freq(Preheader) = 100 /// Freq(BBs) = sum(50, 49) = 99 /// Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to /// BBs as the difference is too small to justify the code size increase. /// To model this, The adjusted Freq(BBs) will be: /// AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold% static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs, BlockFrequencyInfo &BFI) { BlockFrequency T = 0; for (BasicBlock *B : BBs) T += BFI.getBlockFreq(B); if (BBs.size() > 1) T /= BranchProbability(SinkFrequencyPercentThreshold, 100); return T; }
/// Merge an autorelease with a retain into a fused call. bool ObjCARCContract::contractAutorelease( Function &F, Instruction *Autorelease, ARCInstKind Class, SmallPtrSetImpl<Instruction *> &DependingInstructions, SmallPtrSetImpl<const BasicBlock *> &Visited) { const Value *Arg = GetArgRCIdentityRoot(Autorelease); // Check that there are no instructions between the retain and the autorelease // (such as an autorelease_pop) which may change the count. CallInst *Retain = nullptr; if (Class == ARCInstKind::AutoreleaseRV) FindDependencies(RetainAutoreleaseRVDep, Arg, Autorelease->getParent(), Autorelease, DependingInstructions, Visited, PA); else FindDependencies(RetainAutoreleaseDep, Arg, Autorelease->getParent(), Autorelease, DependingInstructions, Visited, PA); Visited.clear(); if (DependingInstructions.size() != 1) { DependingInstructions.clear(); return false; } Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); DependingInstructions.clear(); if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain || GetArgRCIdentityRoot(Retain) != Arg) return false; Changed = true; ++NumPeeps; LLVM_DEBUG(dbgs() << " Fusing retain/autorelease!\n" " Autorelease:" << *Autorelease << "\n" " Retain: " << *Retain << "\n"); Function *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV : ARCRuntimeEntryPointKind::RetainAutorelease); Retain->setCalledFunction(Decl); LLVM_DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n"); EraseInstruction(Autorelease); return true; }
/// Return a set of basic blocks to insert sinked instructions. /// /// The returned set of basic blocks (BBsToSinkInto) should satisfy: /// /// * Inside the loop \p L /// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto /// that domintates the UseBB /// * Has minimum total frequency that is no greater than preheader frequency /// /// The purpose of the function is to find the optimal sinking points to /// minimize execution cost, which is defined as "sum of frequency of /// BBsToSinkInto". /// As a result, the returned BBsToSinkInto needs to have minimum total /// frequency. /// Additionally, if the total frequency of BBsToSinkInto exceeds preheader /// frequency, the optimal solution is not sinking (return empty set). /// /// \p ColdLoopBBs is used to help find the optimal sinking locations. /// It stores a list of BBs that is: /// /// * Inside the loop \p L /// * Has a frequency no larger than the loop's preheader /// * Sorted by BB frequency /// /// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()). /// To avoid expensive computation, we cap the maximum UseBBs.size() in its /// caller. static SmallPtrSet<BasicBlock *, 2> findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, DominatorTree &DT, BlockFrequencyInfo &BFI) { SmallPtrSet<BasicBlock *, 2> BBsToSinkInto; if (UseBBs.size() == 0) return BBsToSinkInto; BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end()); SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB; // For every iteration: // * Pick the ColdestBB from ColdLoopBBs // * Find the set BBsDominatedByColdestBB that satisfy: // - BBsDominatedByColdestBB is a subset of BBsToSinkInto // - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB // * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove // BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to // BBsToSinkInto for (BasicBlock *ColdestBB : ColdLoopBBs) { BBsDominatedByColdestBB.clear(); for (BasicBlock *SinkedBB : BBsToSinkInto) if (DT.dominates(ColdestBB, SinkedBB)) BBsDominatedByColdestBB.insert(SinkedBB); if (BBsDominatedByColdestBB.size() == 0) continue; if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) > BFI.getBlockFreq(ColdestBB)) { for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) { BBsToSinkInto.erase(DominatedBB); } BBsToSinkInto.insert(ColdestBB); } } // If the total frequency of BBsToSinkInto is larger than preheader frequency, // do not sink. if (adjustedSumFreq(BBsToSinkInto, BFI) > BFI.getBlockFreq(L.getLoopPreheader())) BBsToSinkInto.clear(); return BBsToSinkInto; }