void BlockGenerator::createScalarFinalization(Region &R) { // The exit block of the __unoptimized__ region. BasicBlock *ExitBB = R.getExitingBlock(); // The merge block __just after__ the region and the optimized region. BasicBlock *MergeBB = R.getExit(); // The exit block of the __optimized__ region. BasicBlock *OptExitBB = *(pred_begin(MergeBB)); if (OptExitBB == ExitBB) OptExitBB = *(++pred_begin(MergeBB)); Builder.SetInsertPoint(OptExitBB->getTerminator()); for (const auto &EscapeMapping : EscapeMap) { // Extract the escaping instruction and the escaping users as well as the // alloca the instruction was demoted to. Instruction *EscapeInst = EscapeMapping.getFirst(); const auto &EscapeMappingValue = EscapeMapping.getSecond(); const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second; AllocaInst *ScalarAddr = EscapeMappingValue.first; // Reload the demoted instruction in the optimized version of the SCoP. Instruction *EscapeInstReload = Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload"); // Create the merge PHI that merges the optimized and unoptimized version. PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2, EscapeInst->getName() + ".merge"); MergePHI->insertBefore(MergeBB->getFirstInsertionPt()); // Add the respective values to the merge PHI. MergePHI->addIncoming(EscapeInstReload, OptExitBB); MergePHI->addIncoming(EscapeInst, ExitBB); // The information of scalar evolution about the escaping instruction needs // to be revoked so the new merged instruction will be used. if (SE.isSCEVable(EscapeInst->getType())) SE.forgetValue(EscapeInst); // Replace all uses of the demoted instruction with the merge PHI. for (Instruction *EUser : EscapeUsers) EUser->replaceUsesOfWith(EscapeInst, MergePHI); } }
void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S) { assert(Stmt.isRegionStmt() && "Only region statements can be copied by the block generator"); // Forget all old mappings. BlockMap.clear(); RegionMaps.clear(); IncompletePHINodeMap.clear(); // The region represented by the statement. Region *R = Stmt.getRegion(); // Create a dedicated entry for the region where we can reload all demoted // inputs. BasicBlock *EntryBB = R->getEntry(); BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; // Iterate over all blocks in the region in a breadth-first search. std::deque<BasicBlock *> Blocks; SmallPtrSet<BasicBlock *, 8> SeenBlocks; Blocks.push_back(EntryBB); SeenBlocks.insert(EntryBB); while (!Blocks.empty()) { BasicBlock *BB = Blocks.front(); Blocks.pop_front(); // First split the block and update dominance information. BasicBlock *BBCopy = splitBB(BB); BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy); // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Get the mapping for this block and initialize it with the mapping // available at its immediate dominator (in the new region). ValueMapT &RegionMap = RegionMaps[BBCopy]; RegionMap = RegionMaps[BBCopyIDom]; // Copy the block with the BlockGenerator. copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Add values to incomplete PHI nodes waiting for this block to be copied. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, GlobalMap, LTS); IncompletePHINodeMap[BB].clear(); // And continue with new successors inside the region. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) if (R->contains(*SI) && SeenBlocks.insert(*SI).second) Blocks.push_back(*SI); } // Now create a new dedicated region exit block and add it to the region map. BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit"); BlockMap[R->getExit()] = ExitBBCopy; repairDominance(R->getExit(), ExitBBCopy); // As the block generator doesn't handle control flow we need to add the // region control flow by hand after all blocks have been copied. for (BasicBlock *BB : SeenBlocks) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *BBCopy = BlockMap[BB]; Instruction *BICopy = BBCopy->getTerminator(); ValueMapT &RegionMap = RegionMaps[BBCopy]; RegionMap.insert(BlockMap.begin(), BlockMap.end()); Builder.SetInsertPoint(BBCopy); copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); BICopy->eraseFromParent(); } // Add counting PHI nodes to all loops in the region that can be used as // replacement for SCEVs refering to the old loop. for (BasicBlock *BB : SeenBlocks) { Loop *L = LI.getLoopFor(BB); if (L == nullptr || L->getHeader() != BB) continue; BasicBlock *BBCopy = BlockMap[BB]; Value *NullVal = Builder.getInt32(0); PHINode *LoopPHI = PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv"); Instruction *LoopPHIInc = BinaryOperator::CreateAdd( LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc"); LoopPHI->insertBefore(BBCopy->begin()); LoopPHIInc->insertBefore(BBCopy->getTerminator()); for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) { if (!R->contains(PredBB)) continue; if (L->contains(PredBB)) LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]); else LoopPHI->addIncoming(NullVal, BlockMap[PredBB]); } for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy))) if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0) LoopPHI->addIncoming(NullVal, PredBBCopy); LTS[L] = SE.getUnknown(LoopPHI); } // Add all mappings from the region to the global map so outside uses will use // the copied instructions. for (auto &BBMap : RegionMaps) GlobalMap.insert(BBMap.second.begin(), BBMap.second.end()); // Reset the old insert point for the build. Builder.SetInsertPoint(ExitBBCopy->begin()); }
/// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and /// substitute the exit values from the loop into any instructions outside of /// the loop that use the final values of the current expressions. /// /// This is mostly redundant with the regular IndVarSimplify activities that /// happen later, except that it's more powerful in some cases, because it's /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Verify the input to the pass in already in LCSSA form. assert(L->isLCSSAForm(*DT)); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitBlocks[i]; // If there are no PHI nodes in this exit block, then no values defined // inside the loop are used on this path, skip it. PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); if (!PN) continue; unsigned NumPreds = PN->getNumIncomingValues(); // Iterate over all of the PHI nodes. BasicBlock::iterator BBI = ExitBB->begin(); while ((PN = dyn_cast<PHINode>(BBI++))) { if (PN->use_empty()) continue; // dead use, don't replace it // SCEV only supports integer expressions for now. if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) continue; // It's necessary to tell ScalarEvolution about this explicitly so that // it can walk the def-use list and forget all SCEVs, as it may not be // watching the PHI itself. Once the new exit value is in place, there // may not be a def-use connection between the loop and every instruction // which got a SCEVAddRecExpr for that loop. SE->forgetValue(PN); // Iterate over all of the values in all the PHI nodes. for (unsigned i = 0; i != NumPreds; ++i) { // If the value being merged in is not integer or is not defined // in the loop, skip it. Value *InVal = PN->getIncomingValue(i); if (!isa<Instruction>(InVal)) continue; // If this pred is for a subloop, not L itself, skip it. if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) continue; // The Block is in a subloop, skip it. // Check that InVal is defined in the loop. Instruction *Inst = cast<Instruction>(InVal); if (!L->contains(Inst)) continue; // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; Changed = true; ++NumReplaced; Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); } } if (NumPreds != 1) { // Clone the PHI and delete the original one. This lets IVUsers and // any other maps purge the original user from their records. PHINode *NewPN = cast<PHINode>(PN->clone()); NewPN->takeName(PN); NewPN->insertBefore(PN); PN->replaceAllUsesWith(NewPN); PN->eraseFromParent(); } } } }
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); Changed = false; // If there are any floating-point recurrences, attempt to // transform them to use integer recurrences. RewriteNonIntegerIVs(L); BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions // that are recurrent in the loop, and substitute the exit values from the // loop into any instructions outside of the loop that use the final values of // the current expressions. // if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. const Type *LargestType = 0; bool NeedCannIV = false; if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { LargestType = BackedgeTakenCount->getType(); LargestType = SE->getEffectiveSCEVType(LargestType); // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. if (ExitingBlock) NeedCannIV = true; } for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { const Type *Ty = SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) LargestType = Ty; NeedCannIV = true; } // Now that we know the largest of the induction variable expressions // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; if (NeedCannIV) { // Check to see if the loop already has any canonical-looking induction // variables. If any are present and wider than the planned canonical // induction variable, temporarily remove them, so that the Rewriter // doesn't attempt to reuse them. SmallVector<PHINode *, 2> OldCannIVs; while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) { if (SE->getTypeSizeInBits(OldCannIV->getType()) > SE->getTypeSizeInBits(LargestType)) OldCannIV->removeFromParent(); else break; OldCannIVs.push_back(OldCannIV); } IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType); ++NumInserted; Changed = true; DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); // Now that the official induction variable is established, reinsert // any old canonical-looking variables after it so that the IR remains // consistent. They will be deleted as part of the dead-PHI deletion at // the end of the pass. while (!OldCannIVs.empty()) { PHINode *OldCannIV = OldCannIVs.pop_back_val(); OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); } } // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. ICmpInst *NewICmp = 0; if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && !BackedgeTakenCount->isZero() && ExitingBlock) { assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); // Can't rewrite non-branch yet. if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, ExitingBlock, BI, Rewriter); } // Rewrite IV-derived expressions. Clears the rewriter cache. RewriteIVExpressions(L, Rewriter); // The Rewriter may not be used from this point on. // Loop-invariant instructions in the preheader that aren't used in the // loop may be sunk below the loop to reduce register pressure. SinkUnusedInvariants(L); // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. if (NewICmp) IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); return Changed; }
/// For each (predecessor, conditions from predecessors) pair, it will split the /// basic block containing the call site, hook it up to the predecessor and /// replace the call instruction with new call instructions, which contain /// constraints based on the conditions from their predecessors. /// For example, in the IR below with an OR condition, the call-site can /// be split. In this case, Preds for Tail is [(Header, a == null), /// (TBB, a != null, b == null)]. Tail is replaced by 2 split blocks, containing /// CallInst1, which has constraints based on the conditions from Head and /// CallInst2, which has constraints based on the conditions coming from TBB. /// /// From : /// /// Header: /// %c = icmp eq i32* %a, null /// br i1 %c %Tail, %TBB /// TBB: /// %c2 = icmp eq i32* %b, null /// br i1 %c %Tail, %End /// Tail: /// %ca = call i1 @callee (i32* %a, i32* %b) /// /// to : /// /// Header: // PredBB1 is Header /// %c = icmp eq i32* %a, null /// br i1 %c %Tail-split1, %TBB /// TBB: // PredBB2 is TBB /// %c2 = icmp eq i32* %b, null /// br i1 %c %Tail-split2, %End /// Tail-split1: /// %ca1 = call @callee (i32* null, i32* %b) // CallInst1 /// br %Tail /// Tail-split2: /// %ca2 = call @callee (i32* nonnull %a, i32* null) // CallInst2 /// br %Tail /// Tail: /// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2] /// /// Note that in case any arguments at the call-site are constrained by its /// predecessors, new call-sites with more constrained arguments will be /// created in createCallSitesOnPredicatedArgument(). static void splitCallSite( CallSite CS, const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds) { Instruction *Instr = CS.getInstruction(); BasicBlock *TailBB = Instr->getParent(); bool IsMustTailCall = CS.isMustTailCall(); PHINode *CallPN = nullptr; // `musttail` calls must be followed by optional `bitcast`, and `ret`. The // split blocks will be terminated right after that so there're no users for // this phi in a `TailBB`. if (!IsMustTailCall && Instr->getNumUses()) CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call"); DEBUG(dbgs() << "split call-site : " << *Instr << " into \n"); assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2."); // ValueToValueMapTy is neither copy nor moveable, so we use a simple array // here. ValueToValueMapTy ValueToValueMaps[2]; for (unsigned i = 0; i < Preds.size(); i++) { BasicBlock *PredBB = Preds[i].first; BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween( TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i]); assert(SplitBlock && "Unexpected new basic block split."); Instruction *NewCI = &*std::prev(SplitBlock->getTerminator()->getIterator()); CallSite NewCS(NewCI); addConditions(NewCS, Preds[i].second); // Handle PHIs used as arguments in the call-site. for (PHINode &PN : TailBB->phis()) { unsigned ArgNo = 0; for (auto &CI : CS.args()) { if (&*CI == &PN) { NewCS.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock)); } ++ArgNo; } } DEBUG(dbgs() << " " << *NewCI << " in " << SplitBlock->getName() << "\n"); if (CallPN) CallPN->addIncoming(NewCI, SplitBlock); // Clone and place bitcast and return instructions before `TI` if (IsMustTailCall) copyMustTailReturn(SplitBlock, Instr, NewCI); } NumCallSiteSplit++; // FIXME: remove TI in `copyMustTailReturn` if (IsMustTailCall) { // Remove superfluous `br` terminators from the end of the Split blocks // NOTE: Removing terminator removes the SplitBlock from the TailBB's // predecessors. Therefore we must get complete list of Splits before // attempting removal. SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB))); assert(Splits.size() == 2 && "Expected exactly 2 splits!"); for (unsigned i = 0; i < Splits.size(); i++) Splits[i]->getTerminator()->eraseFromParent(); // Erase the tail block once done with musttail patching TailBB->eraseFromParent(); return; } auto *OriginalBegin = &*TailBB->begin(); // Replace users of the original call with a PHI mering call-sites split. if (CallPN) { CallPN->insertBefore(OriginalBegin); Instr->replaceAllUsesWith(CallPN); } // Remove instructions moved to split blocks from TailBB, from the duplicated // call instruction to the beginning of the basic block. If an instruction // has any uses, add a new PHI node to combine the values coming from the // split blocks. The new PHI nodes are placed before the first original // instruction, so we do not end up deleting them. By using reverse-order, we // do not introduce unnecessary PHI nodes for def-use chains from the call // instruction to the beginning of the block. auto I = Instr->getReverseIterator(); while (I != TailBB->rend()) { Instruction *CurrentI = &*I++; if (!CurrentI->use_empty()) { // If an existing PHI has users after the call, there is no need to create // a new one. if (isa<PHINode>(CurrentI)) continue; PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size()); for (auto &Mapping : ValueToValueMaps) NewPN->addIncoming(Mapping[CurrentI], cast<Instruction>(Mapping[CurrentI])->getParent()); NewPN->insertBefore(&*TailBB->begin()); CurrentI->replaceAllUsesWith(NewPN); } CurrentI->eraseFromParent(); // We are done once we handled the first original instruction in TailBB. if (CurrentI == OriginalBegin) break; } }