bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); DEBUG(L->dump()); DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { DEBUG(dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. Also update the dominator info for // OtherExits and their immediate successors, since we have new edges into // OtherExits. SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. if (!newVal) newVal = Phi->getIncomingValue(i); Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif // Update the dominator info because the immediate dominator is no longer the // header of the original Loop. BB has edges both from L and remainder code. // Since the preheader determines which loop is run (L or directly jump to // the remainder code), we set the immediate dominator as the preheader. if (DT) { DT->changeImmediateDominator(BB, PreHeader); // Also update the IDom for immediate successors of BB. If the current // IDom is the header, update the IDom to be the preheader because that is // the nearest common dominator of all predecessors of SuccBB. We need to // check for IDom being the header because successors of exit blocks can // have edges from outside the loop, and we should not incorrectly update // the IDom in that case. for (BasicBlock *SuccBB: successors(BB)) if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { assert(!SuccBB->getSinglePredecessor() && "BB should be the IDom then!"); DT->changeImmediateDominator(SuccBB, PreHeader); } } } } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } if (remainderLoop && UnrollRemainder) { DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1, /*Force*/false, /*AllowRuntime*/false, /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true, /*PreserveOnlyFirst*/false, /*TripMultiple*/1, /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE, PreserveLCSSA); } NumRuntimeUnrolled++; return true; }
/// optimizeCheck - replace the given check CallInst with the check's fast /// version if all the source memory objects can be found and it is obvious /// that none of them have been freed at the point where the check is made. /// Returns the new call if possible and NULL otherwise. /// /// This currently works only with memory objects that can't be freed: /// * global variables /// * allocas that trivially have function scope /// * byval arguments /// bool ExactCheckOpt::optimizeCheck(CallInst *CI, CheckInfoType *Info) { // Examined values SmallSet<Value*, 16> Visited; // Potential memory objects SmallSet<Value*, 4> Objects; std::queue<Value*> Q; // Start from the the pointer operand Value *StartPtr = CI->getArgOperand(Info->PtrArgNo)->stripPointerCasts(); Q.push(StartPtr); // Use BFS to find all potential memory objects while(!Q.empty()) { Value *o = Q.front()->stripPointerCasts(); Q.pop(); if(Visited.count(o)) continue; Visited.insert(o); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(o)) { if (CE->getOpcode() == Instruction::GetElementPtr) { Q.push(CE->getOperand(0)); } else { // Exit early if any of the objects are unsupported. if (!isSimpleMemoryObject(o)) return false; Objects.insert(o); } } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(o)) { Q.push(GEP->getPointerOperand()); // It is fine to ignore the case of indexing into null with a pointer // because that case is invalid for LLVM-aware objects such as allocas, // globals, and objects pointed to by noalias pointers. } else if(PHINode *PHI = dyn_cast<PHINode>(o)) { for (unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i) Q.push(PHI->getIncomingValue(i)); } else if (SelectInst *SI = dyn_cast<SelectInst>(o)) { Q.push(SI->getTrueValue()); Q.push(SI->getFalseValue()); } else { // Exit early if any of the objects are unsupported. if (!isSimpleMemoryObject(o)) return false; Objects.insert(o); } } // Mapping from the initial value to the corresponding size and void pointer: // * memory object -> its size and pointer // * phi/select -> corresponding phi/select for the sizes and pointers // * anything else -> the corresponding size and pointer on the path std::map <Value*, PtrSizePair> M; Module &Mod = *CI->getParent()->getParent()->getParent(); Type *SizeTy = getSizeType(Info, Mod); // Add non-instruction non-constant allocation object pointers to the front // of the function's entry block. BasicBlock &EntryBlock = CI->getParent()->getParent()->getEntryBlock(); Instruction *FirstInsertionPoint = ++BasicBlock::iterator(EntryBlock.begin()); for (SmallSet<Value*, 16>::const_iterator It = Objects.begin(), E = Objects.end(); It != E; ++It) { // Obj is a memory object pointer: alloca, argument, load, callinst, etc. Value *Obj = *It; // Insert instruction-based allocation pointers just after the allocation. Instruction *InsertBefore = FirstInsertionPoint; if (Instruction *I = dyn_cast<Instruction>(Obj)) InsertBefore = ++BasicBlock::iterator(I); IRBuilder<> Builder(InsertBefore); SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Obj); assert(ObjSizeEval->bothKnown(SizeOffset)); assert(dyn_cast<ConstantInt>(SizeOffset.second)->isZero()); Value *Size = Builder.CreateIntCast(SizeOffset.first, SizeTy, /*isSigned=*/false); Value *Ptr = Builder.CreatePointerCast(Obj, VoidPtrTy); M[Obj] = std::make_pair(Ptr, Size); } // Create the rest of the size values and object pointers. // The phi nodes will be finished later. for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(), E = Visited.end(); I != E; ++I) { getPtrAndSize(*I, SizeTy, M); } // Finalize the phi nodes. for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(), E = Visited.end(); I != E; ++I) { if (PHINode *PHI = dyn_cast<PHINode>(*I)) { assert(M.count(PHI)); PHINode *PtrPHI = cast<PHINode>(M[PHI].first); PHINode *SizePHI = cast<PHINode>(M[PHI].second); for(unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i) { Value *IncomingValue = PHI->getIncomingValue(i)->stripPointerCasts(); assert(M.count(IncomingValue)); PtrPHI->addIncoming(M[IncomingValue].first, PHI->getIncomingBlock(i)); SizePHI->addIncoming(M[IncomingValue].second, PHI->getIncomingBlock(i)); } } } // Insert the fast version of the check just before the regular version. assert(M.count(StartPtr) && "The memory object and its size should be known"); createFastCheck(Info, CI, M[StartPtr].first, M[StartPtr].second); return true; }
void GNUstep::IMPCacher::CacheLookup(Instruction *lookup, Value *slot, Value *version, bool isSuperMessage) { // If this IMP is already cached, don't cache it again. if (lookup->getMetadata(IMPCacheFlagKind)) { return; } lookup->setMetadata(IMPCacheFlagKind, AlreadyCachedFlag); bool isInvoke = false; BasicBlock *beforeLookupBB = lookup->getParent(); BasicBlock *lookupBB = SplitBlock(beforeLookupBB, lookup, Owner); BasicBlock *lookupFinishedBB = lookupBB; BasicBlock *afterLookupBB; if (InvokeInst *inv = dyn_cast<InvokeInst>(lookup)) { afterLookupBB = inv->getNormalDest(); lookupFinishedBB = BasicBlock::Create(Context, "done_lookup", lookupBB->getParent()); CGBuilder B(lookupFinishedBB); B.CreateBr(afterLookupBB); inv->setNormalDest(lookupFinishedBB); isInvoke = true; } else { BasicBlock::iterator iter = lookup; iter++; afterLookupBB = SplitBlock(iter->getParent(), iter, Owner); } removeTerminator(beforeLookupBB); CGBuilder B = CGBuilder(beforeLookupBB); // Load the slot and check that neither it nor the version is 0. Value *versionValue = B.CreateLoad(version); Value *receiverPtr = lookup->getOperand(0); Value *receiver = receiverPtr; if (!isSuperMessage) { receiver = B.CreateLoad(receiverPtr); } // For small objects, we skip the cache entirely. // FIXME: Class messages are never to small objects... bool is64Bit = llvm::Module::Pointer64 == B.GetInsertBlock()->getParent()->getParent()->getPointerSize(); LLVMType *intPtrTy = is64Bit ? Type::getInt64Ty(Context) : Type::getInt32Ty(Context); // Receiver as an integer Value *receiverSmallObject = B.CreatePtrToInt(receiver, intPtrTy); // Receiver is a small object... receiverSmallObject = B.CreateAnd(receiverSmallObject, is64Bit ? 7 : 1); // Receiver is not a small object. receiverSmallObject = B.CreateICmpNE(receiverSmallObject, Constant::getNullValue(intPtrTy)); // Ideally, we'd call objc_msgSend() here, but for now just skip the cache // lookup Value *isCacheEmpty = B.CreateICmpEQ(versionValue, Constant::getNullValue(IntTy)); Value *receiverNil = B.CreateICmpEQ(receiver, Constant::getNullValue(receiver->getType())); isCacheEmpty = B.CreateOr(isCacheEmpty, receiverNil); isCacheEmpty = B.CreateOr(isCacheEmpty, receiverSmallObject); BasicBlock *cacheLookupBB = BasicBlock::Create(Context, "cache_check", lookupBB->getParent()); B.CreateCondBr(isCacheEmpty, lookupBB, cacheLookupBB); // Check the cache node is current B.SetInsertPoint(cacheLookupBB); Value *slotValue = B.CreateLoad(slot, "slot_value"); Value *slotVersion = B.CreateStructGEP(slotValue, 3); // Note: Volatile load because the slot version might have changed in // another thread. slotVersion = B.CreateLoad(slotVersion, true, "slot_version"); Value *slotCachedFor = B.CreateStructGEP(slotValue, 1); slotCachedFor = B.CreateLoad(slotCachedFor, true, "slot_owner"); Value *cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy)); Value *isVersionCorrect = B.CreateICmpEQ(slotVersion, versionValue); Value *isOwnerCorrect = B.CreateICmpEQ(slotCachedFor, cls); Value *isSlotValid = B.CreateAnd(isVersionCorrect, isOwnerCorrect); // If this slot is still valid, skip the lookup. B.CreateCondBr(isSlotValid, afterLookupBB, lookupBB); // Perform the real lookup and cache the result removeTerminator(lookupFinishedBB); // Replace the looked up slot with the loaded one B.SetInsertPoint(afterLookupBB, afterLookupBB->begin()); PHINode *newLookup = IRBuilderCreatePHI(&B, lookup->getType(), 3, "new_lookup"); // Not volatile, so a redundant load elimination pass can do some phi // magic with this later. lookup->replaceAllUsesWith(newLookup); B.SetInsertPoint(lookupFinishedBB); Value * newReceiver = receiver; if (!isSuperMessage) { newReceiver = B.CreateLoad(receiverPtr); } BasicBlock *storeCacheBB = BasicBlock::Create(Context, "cache_store", lookupBB->getParent()); // Don't store the cached lookup if we are doing forwarding tricks. // Also skip caching small object messages for now Value *skipCacheWrite = B.CreateOr(B.CreateICmpNE(receiver, newReceiver), receiverSmallObject); skipCacheWrite = B.CreateOr(skipCacheWrite, receiverNil); B.CreateCondBr(skipCacheWrite, afterLookupBB, storeCacheBB); B.SetInsertPoint(storeCacheBB); // Store it even if the version is 0, because we always check that the // version is not 0 at the start and an occasional redundant store is // probably better than a branch every time. B.CreateStore(lookup, slot); B.CreateStore(B.CreateLoad(B.CreateStructGEP(lookup, 3)), version); cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy)); B.CreateStore(cls, B.CreateStructGEP(lookup, 1)); B.CreateBr(afterLookupBB); newLookup->addIncoming(lookup, lookupFinishedBB); newLookup->addIncoming(slotValue, cacheLookupBB); newLookup->addIncoming(lookup, storeCacheBB); }
bool LoopInterchangeTransform::adjustLoopBranches() { DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); BasicBlock *InnerLoopLatchPredecessor = InnerLoopLatch->getUniquePredecessor(); BasicBlock *InnerLoopLatchSuccessor; BasicBlock *OuterLoopLatchSuccessor; BranchInst *OuterLoopLatchBI = dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); BranchInst *InnerLoopLatchBI = dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); BranchInst *OuterLoopHeaderBI = dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); BranchInst *InnerLoopHeaderBI = dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || !InnerLoopHeaderBI) return false; BranchInst *InnerLoopLatchPredecessorBI = dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); BranchInst *OuterLoopPredecessorBI = dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) return false; BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); if (!InnerLoopHeaderSuccessor) return false; // Adjust Loop Preheader and headers unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); } NumSucc = OuterLoopHeaderBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) OuterLoopHeaderBI->setSuccessor(i, LoopExit); else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); } // Adjust reduction PHI's now that the incoming block has changed. updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, OuterLoopHeader); BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); InnerLoopHeaderBI->eraseFromParent(); // -------------Adjust loop latches----------- if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); else InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); for (unsigned i = 0; i < NumSucc; ++i) { if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); } // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with // the value and remove this PHI node from inner loop. SmallVector<PHINode *, 8> LcssaVec; for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) { PHINode *LcssaPhi = cast<PHINode>(I); LcssaVec.push_back(LcssaPhi); } for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) { PHINode *P = *I; Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); P->replaceAllUsesWith(Incoming); P->eraseFromParent(); } if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); else OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); else InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); } else { OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); } return true; }
/// CleanupAndPrepareModules - Get the specified modules ready for code /// generator testing. /// static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test, Module *Safe) { // Clean up the modules, removing extra cruft that we don't need anymore... Test = BD.performFinalCleanups(Test); // If we are executing the JIT, we have several nasty issues to take care of. if (!BD.isExecutingJIT()) return; // First, if the main function is in the Safe module, we must add a stub to // the Test module to call into it. Thus, we create a new function `main' // which just calls the old one. if (Function *oldMain = Safe->getFunction("main")) if (!oldMain->isDeclaration()) { // Rename it oldMain->setName("llvm_bugpoint_old_main"); // Create a NEW `main' function with same type in the test module. Function *newMain = Function::Create(oldMain->getFunctionType(), GlobalValue::ExternalLinkage, "main", Test); // Create an `oldmain' prototype in the test module, which will // corresponds to the real main function in the same module. Function *oldMainProto = Function::Create(oldMain->getFunctionType(), GlobalValue::ExternalLinkage, oldMain->getName(), Test); // Set up and remember the argument list for the main function. std::vector<Value*> args; for (Function::arg_iterator I = newMain->arg_begin(), E = newMain->arg_end(), OI = oldMain->arg_begin(); I != E; ++I, ++OI) { I->setName(OI->getName()); // Copy argument names from oldMain args.push_back(I); } // Call the old main function and return its result BasicBlock *BB = BasicBlock::Create(Safe->getContext(), "entry", newMain); CallInst *call = CallInst::Create(oldMainProto, args.begin(), args.end(), "", BB); // If the type of old function wasn't void, return value of call ReturnInst::Create(Safe->getContext(), call, BB); } // The second nasty issue we must deal with in the JIT is that the Safe // module cannot directly reference any functions defined in the test // module. Instead, we use a JIT API call to dynamically resolve the // symbol. // Add the resolver to the Safe module. // Prototype: void *getPointerToNamedFunction(const char* Name) Constant *resolverFunc = Safe->getOrInsertFunction("getPointerToNamedFunction", PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())), PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())), (Type *)0); // Use the function we just added to get addresses of functions we need. for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) { if (F->isDeclaration() && !F->use_empty() && &*F != resolverFunc && !F->isIntrinsic() /* ignore intrinsics */) { Function *TestFn = Test->getFunction(F->getName()); // Don't forward functions which are external in the test module too. if (TestFn && !TestFn->isDeclaration()) { // 1. Add a string constant with its name to the global file Constant *InitArray = ConstantArray::get(F->getContext(), F->getName()); GlobalVariable *funcName = new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/, GlobalValue::InternalLinkage, InitArray, F->getName() + "_name"); // 2. Use `GetElementPtr *funcName, 0, 0' to convert the string to an // sbyte* so it matches the signature of the resolver function. // GetElementPtr *funcName, ulong 0, ulong 0 std::vector<Constant*> GEPargs(2, Constant::getNullValue(Type::getInt32Ty(F->getContext()))); Value *GEP = ConstantExpr::getGetElementPtr(funcName, &GEPargs[0], 2); std::vector<Value*> ResolverArgs; ResolverArgs.push_back(GEP); // Rewrite uses of F in global initializers, etc. to uses of a wrapper // function that dynamically resolves the calls to F via our JIT API if (!F->use_empty()) { // Create a new global to hold the cached function pointer. Constant *NullPtr = ConstantPointerNull::get(F->getType()); GlobalVariable *Cache = new GlobalVariable(*F->getParent(), F->getType(), false, GlobalValue::InternalLinkage, NullPtr,F->getName()+".fpcache"); // Construct a new stub function that will re-route calls to F const FunctionType *FuncTy = F->getFunctionType(); Function *FuncWrapper = Function::Create(FuncTy, GlobalValue::InternalLinkage, F->getName() + "_wrapper", F->getParent()); BasicBlock *EntryBB = BasicBlock::Create(F->getContext(), "entry", FuncWrapper); BasicBlock *DoCallBB = BasicBlock::Create(F->getContext(), "usecache", FuncWrapper); BasicBlock *LookupBB = BasicBlock::Create(F->getContext(), "lookupfp", FuncWrapper); // Check to see if we already looked up the value. Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB); Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal, NullPtr, "isNull"); BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB); // Resolve the call to function F via the JIT API: // // call resolver(GetElementPtr...) CallInst *Resolver = CallInst::Create(resolverFunc, ResolverArgs.begin(), ResolverArgs.end(), "resolver", LookupBB); // Cast the result from the resolver to correctly-typed function. CastInst *CastedResolver = new BitCastInst(Resolver, PointerType::getUnqual(F->getFunctionType()), "resolverCast", LookupBB); // Save the value in our cache. new StoreInst(CastedResolver, Cache, LookupBB); BranchInst::Create(DoCallBB, LookupBB); PHINode *FuncPtr = PHINode::Create(NullPtr->getType(), "fp", DoCallBB); FuncPtr->addIncoming(CastedResolver, LookupBB); FuncPtr->addIncoming(CachedVal, EntryBB); // Save the argument list. std::vector<Value*> Args; for (Function::arg_iterator i = FuncWrapper->arg_begin(), e = FuncWrapper->arg_end(); i != e; ++i) Args.push_back(i); // Pass on the arguments to the real function, return its result if (F->getReturnType() == Type::getVoidTy(F->getContext())) { CallInst::Create(FuncPtr, Args.begin(), Args.end(), "", DoCallBB); ReturnInst::Create(F->getContext(), DoCallBB); } else { CallInst *Call = CallInst::Create(FuncPtr, Args.begin(), Args.end(), "retval", DoCallBB); ReturnInst::Create(F->getContext(),Call, DoCallBB); } // Use the wrapper function instead of the old function F->replaceAllUsesWith(FuncWrapper); } } } } if (verifyModule(*Test) || verifyModule(*Safe)) { errs() << "Bugpoint has a bug, which corrupted a module!!\n"; abort(); } }
// First thing we need to do is scan the whole function for values that are // live across unwind edges. Each value that is live across an unwind edge // we spill into a stack location, guaranteeing that there is nothing live // across the unwind edge. This process also splits all critical edges // coming out of invoke's. void LowerInvoke:: splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; SplitCriticalEdge(II, 0, this); SplitCriticalEdge(II, 1, this); assert(!isa<PHINode>(II->getNormalDest()) && !isa<PHINode>(II->getUnwindDest()) && "critical edge splitting left single entry phi nodes?"); } Function *F = Invokes.back()->getParent()->getParent(); // To avoid having to handle incoming arguments specially, we lower each arg // to a copy instruction in the entry block. This ensures that the argument // value itself cannot be live across the entry block. BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsertPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); Instruction *NI = InsertValueInst::Create(AI, EI, 0); NI->insertAfter(EI); AI->replaceAllUsesWith(NI); // Set the operand of the instructions back to the AllocaInst. EI->setOperand(0, AI); NI->setOperand(0, AI); } else { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. CastInst *NC = new BitCastInst( AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. // Normally it's forbidden to replace a CastInst's operand because it // could cause the opcode to reflect an illegal conversion. However, // we're replacing it here with the same value it was constructed with. // We do this because the above replaceAllUsesWith() clobbered the // operand, but we want this one to remain. NC->setOperand(0, AI); } } // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && !isa<PHINode>(Inst->use_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction*,16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Scan all of the uses and see if the live range is live across an unwind // edge. If we find a use live across an invoke edge, create an alloca // and spill the value. std::set<InvokeInst*> InvokesWithStoreInserted; // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { NeedsSpill = true; } } // If we decided we need a spill, do it. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); } } }
// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new // BasicBlock, and converting all returns to unconditional branches to this // new basic block. The singular exit node is returned. // // If there are no return stmts in the Function, a null pointer is returned. // bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks in a function, tracking all of the blocks that // return. // std::vector<BasicBlock*> ReturningBlocks; std::vector<BasicBlock*> UnreachableBlocks; for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) if (isa<ReturnInst>(I->getTerminator())) ReturningBlocks.push_back(I); else if (isa<UnreachableInst>(I->getTerminator())) UnreachableBlocks.push_back(I); // Then unreachable blocks. if (UnreachableBlocks.empty()) { UnreachableBlock = nullptr; } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), E = UnreachableBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; BB->getInstList().pop_back(); // Remove the unreachable inst. BranchInst::Create(UnreachableBlock, BB); } } // Now handle return blocks. if (ReturningBlocks.empty()) { ReturnBlock = nullptr; return false; // No blocks return } else if (ReturningBlocks.size() == 1) { ReturnBlock = ReturningBlocks.front(); // Already has a single return block return false; } // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. // BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); } // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. // for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(), E = ReturningBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... if (PN) PN->addIncoming(BB->getTerminator()->getOperand(0), BB); BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } ReturnBlock = NewRetBlock; return true; }
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and /// an unconditional branch in it. void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { if (SinglePred != DestBB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB // to handle the new incoming edges it is about to have. PHINode *PN; for (BasicBlock::iterator BBI = DestBB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { // Remove the incoming value for BB, and remember it. Value *InVal = PN->removeIncomingValue(BB, false); // Two options: either the InVal is a phi node defined in BB or it is some // value that dominates BB. PHINode *InValPhi = dyn_cast<PHINode>(InVal); if (InValPhi && InValPhi->getParent() == BB) { // Add all of the input values of the input PHI as inputs of this phi. for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InValPhi->getIncomingValue(i), InValPhi->getIncomingBlock(i)); } else { // Otherwise, add one instance of the dominating value for each edge that // we will be adding. if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) PN->addIncoming(InVal, *PI); } } } // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); ++NumElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); }
void IrGen::visit(AstOperator& op) { const auto& astOperands = op.args().childs(); Value* llvmResult = nullptr; // unary non-arithmetic operators if (op.op() == AstOperator::eNot) { llvmResult = m_builder.CreateNot(callAcceptOn(*astOperands.front()), "not"); } else if (op.op() == AstOperator::eAddrOf) { astOperands.front()->accept(*this); llvmResult = astOperands.front()->ir().irAddrOfIrObject(); } else if (op.op() == AstOperator::eDeref) { op.ir().setAddrOfIrObject( callAcceptOn(*astOperands.front()), EInitStatus::eInitialized); } // binary logical short circuit operators else if (op.isBinaryLogicalShortCircuit()) { const auto opname = op.op() == AstOperator::eAnd ? string{"and"} : string{"or"}; Function* functionIr = m_builder.GetInsertBlock()->getParent(); BasicBlock* rhsBB = BasicBlock::Create(llvmContext, opname + "_rhs"); BasicBlock* mergeBB = BasicBlock::Create(llvmContext, opname + "_merge"); // current/lhs BB: auto llvmLhs = callAcceptOn(*astOperands.front()); assert(llvmLhs); if (op.op() == AstOperator::eAnd) { m_builder.CreateCondBr(llvmLhs, rhsBB, mergeBB); } else if (op.op() == AstOperator::eOr) { m_builder.CreateCondBr(llvmLhs, mergeBB, rhsBB); } else { assert(false); } BasicBlock* lhsLastBB = m_builder.GetInsertBlock(); // rhsBB: functionIr->getBasicBlockList().push_back(rhsBB); m_builder.SetInsertPoint(rhsBB); auto llvmRhs = callAcceptOn(*astOperands.back()); m_builder.CreateBr(mergeBB); BasicBlock* rhsLastBB = m_builder.GetInsertBlock(); // mergeBB: functionIr->getBasicBlockList().push_back(mergeBB); m_builder.SetInsertPoint(mergeBB); PHINode* phi = m_builder.CreatePHI(Type::getInt1Ty(llvmContext), 2, opname); assert(phi); phi->addIncoming(llvmLhs, lhsLastBB); phi->addIncoming(llvmRhs, rhsLastBB); llvmResult = phi; } // assignment operators else if (op.op() == AstOperator::eAssign || op.op() == AstOperator::eVoidAssign) { astOperands.front()->accept(*this); auto llvmRhs = callAcceptOn(*astOperands.back()); m_builder.CreateStore( llvmRhs, astOperands.front()->ir().irAddrOfIrObject()); if (op.op() == AstOperator::eVoidAssign) { llvmResult = m_abstractObject; // void } else { // op.object() is the same as astOperands.front().object(), so // 'returning the result' is now a nop. } } // binary arithmetic operators else if (astOperands.size() == 2) { auto llvmLhs = callAcceptOn(*astOperands.front()); auto llvmRhs = callAcceptOn(*astOperands.back()); if (astOperands.front()->objType().is(ObjType::eStoredAsIntegral)) { switch (op.op()) { // clang-format off case AstOperator::eSub : llvmResult = m_builder.CreateSub (llvmLhs, llvmRhs, "sub"); break; case AstOperator::eAdd : llvmResult = m_builder.CreateAdd (llvmLhs, llvmRhs, "add"); break; case AstOperator::eMul : llvmResult = m_builder.CreateMul (llvmLhs, llvmRhs, "mul"); break; case AstOperator::eDiv : llvmResult = m_builder.CreateSDiv (llvmLhs, llvmRhs, "div"); break; case AstOperator::eEqualTo : llvmResult = m_builder.CreateICmpEQ(llvmLhs, llvmRhs, "cmp"); break; default: assert(false); // clang-format on } } else { switch (op.op()) { // clang-format off case AstOperator::eSub : llvmResult = m_builder.CreateFSub (llvmLhs, llvmRhs, "fsub"); break; case AstOperator::eAdd : llvmResult = m_builder.CreateFAdd (llvmLhs, llvmRhs, "add"); break; case AstOperator::eMul : llvmResult = m_builder.CreateFMul (llvmLhs, llvmRhs, "mul"); break; case AstOperator::eDiv : llvmResult = m_builder.CreateFDiv (llvmLhs, llvmRhs, "div"); break; case AstOperator::eEqualTo : llvmResult = m_builder.CreateFCmpOEQ(llvmLhs, llvmRhs, "cmp"); break; default: assert(false); // clang-format on } } assert(llvmResult); } // unary arithmetic operators else { assert(astOperands.size() == 1); const auto& operand = astOperands.front(); const auto& objType = operand->objType(); auto llvmOperand = callAcceptOn(*operand); if (objType.is(ObjType::eStoredAsIntegral)) { auto llvmZero = ConstantInt::get(llvmContext, APInt(objType.size(), 0)); switch (op.op()) { case '-': llvmResult = m_builder.CreateSub(llvmZero, llvmOperand, "neg"); break; case '+': llvmResult = llvmOperand; break; default: assert(false); } } else { auto llvmZero = ConstantFP::get(llvmContext, APFloat(0.0)); switch (op.op()) { case '-': llvmResult = m_builder.CreateFSub(llvmZero, llvmOperand, "fneg"); break; case '+': llvmResult = llvmOperand; break; default: assert(false); } } assert(llvmResult); } if (llvmResult) { allocateAndInitLocalIrObjectFor(op, llvmResult); } }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = 0; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (UnrollProlog) { VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, ValueToValueMapTy &VMap, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI name is added as an operand of a PHI node in either // the loop header or the loop exit block. for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); SBI != SBE; ++SBI) { for (BasicBlock::iterator BBI = (*SBI)->begin(); PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", PrologEnd->getTerminator()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap[I]; } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, LastPrologBB); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); } else { PN->addIncoming(NewPN, PrologEnd); } } } // Create a branch around the orignal loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologEnd->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1) // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and // and all of the iterations of this loop were executed by the prologue. Note // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); BasicBlock *Exit = L->getUniqueExitBlock(); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI, P->mustPreserveAnalysisID(LCSSAID)); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPH); InsertPt->eraseFromParent(); }
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && !isa<PHINode>(Inst->use_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction*, 16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock*, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(*Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode*, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (SmallPtrSet<PHINode*, 8>::iterator I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(UnwindBlock->begin()); } }
// This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); PHINode *InnerInductionVar; PHINode *OuterInductionVar; // We currently handle only 1 induction variable inside the loop. We also do // not handle reductions as of now. if (getPHICount(InnerLoopHeader) > 1) return true; if (getPHICount(OuterLoopHeader) > 1) return true; InnerInductionVar = getInductionVariable(InnerLoop, SE); OuterInductionVar = getInductionVariable(OuterLoop, SE); if (!OuterInductionVar || !InnerInductionVar) { DEBUG(dbgs() << "Induction variable not found\n"); return true; } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); return true; } // TODO: Loops with LCSSA PHI's are currently not handled. if (isa<PHINode>(OuterLoopLatch->begin())) { DEBUG(dbgs() << "Found and LCSSA PHI in outer loop latch\n"); return true; } if (InnerLoopLatch != InnerLoopHeader && isa<PHINode>(InnerLoopLatch->begin())) { DEBUG(dbgs() << "Found and LCSSA PHI in inner loop latch\n"); return true; } // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have // more than 1 user of induction.next since it would result in broken code // after split. // e.g. // for(i=0;i<N;i++) { // for(j = 0;j<M;j++) { // A[j+1][i+2] = A[j][i]+k; // } // } bool FoundInduction = false; Instruction *InnerIndexVarInc = nullptr; if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); else InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); if (!InnerIndexVarInc) return true; // Since we split the inner loop latch on this induction variable. Make sure // we do not have any instruction between the induction variable and branch // instruction. for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend(); I != E && !FoundInduction; ++I) { if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I)) continue; const Instruction &Ins = *I; // We found an instruction. If this is not induction variable then it is not // safe to split this loop latch. if (!Ins.isIdenticalTo(InnerIndexVarInc)) return true; else FoundInduction = true; } // The loop latch ended and we didnt find the induction variable return as // current limitation. if (!FoundInduction) return true; return false; }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { PN->addIncoming(NewPN, PrologExit); } } } // Make sure that created prolog loop is in simplified form SmallVector<BasicBlock *, 4> PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) // This means %xtraiter is (BECount + 1) and all of the iterations of this // loop were executed by the prologue. Note that if BECount <u (Count - 1) // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); }
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. if (!HasValueForBlock(BB)) return GetValueAtEndOfBlock(BB); // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) SingularValue = nullptr; } } else { bool isFirstPred = true; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) SingularValue = nullptr; } } // If there are no predecessors, just return undef. if (PredValues.empty()) return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa<PHINode>(BB->begin())) { SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast<PHINode>(It)); ++It) { if (IsEquivalentPHI(SomePHI, ValueMapping)) return SomePHI; } } // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *V = SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } // Set the DebugLoc of the inserted PHI, if available. DebugLoc DL; if (const Instruction *I = BB->getFirstNonPHI()) DL = I->getDebugLoc(); InsertedPHI->setDebugLoc(DL); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; }
/// SplitLandingPadPredecessors - This method transforms the landing pad, /// OrigBB, by introducing two new basic blocks into the function. One of those /// new basic blocks gets the predecessors listed in Preds. The other basic /// block gets the remaining predecessors of OrigBB. The landingpad instruction /// OrigBB is clone into both of the new basic blocks. The new blocks are given /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, /// it does not preserve LoopSimplify (because it's complicated to handle the /// case where one of the edges being split is an exit of a loop with other /// exits). /// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef<BasicBlock*> Preds, const char *Suffix1, const char *Suffix2, Pass *P, SmallVectorImpl<BasicBlock*> &NewBBs) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert // it right before the original block. BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), OrigBB->getName() + Suffix1, OrigBB->getParent(), OrigBB); NewBBs.push_back(NewBB1); // The new block unconditionally branches to the old block. BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); // Move the edges from Preds to point to NewBB1 instead of OrigBB. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); } // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); // Move the remaining edges from OrigBB to point to NewBB2. SmallVector<BasicBlock*, 8> NewBB2Preds; for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); i != e; ) { BasicBlock *Pred = *i++; if (Pred == NewBB1) continue; assert(!isa<IndirectBrInst>(Pred->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); NewBB2Preds.push_back(Pred); e = pred_end(OrigBB); } BasicBlock *NewBB2 = 0; if (!NewBB2Preds.empty()) { // Create another basic block for the rest of OrigBB's predecessors. NewBB2 = BasicBlock::Create(OrigBB->getContext(), OrigBB->getName() + Suffix2, OrigBB->getParent(), OrigBB); NewBBs.push_back(NewBB2); // The new block unconditionally branches to the old block. BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); // Move the remaining edges from OrigBB to point to NewBB2. for (SmallVectorImpl<BasicBlock*>::iterator i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); } LandingPadInst *LPad = OrigBB->getLandingPadInst(); Instruction *Clone1 = LPad->clone(); Clone1->setName(Twine("lpad") + Suffix1); NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); if (NewBB2) { Instruction *Clone2 = LPad->clone(); Clone2->setName(Twine("lpad") + Suffix2); NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); // Create a PHI node for the two cloned landingpad instructions. PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); PN->addIncoming(Clone1, NewBB1); PN->addIncoming(Clone2, NewBB2); LPad->replaceAllUsesWith(PN); LPad->eraseFromParent(); } else { // There is no second clone. Just replace the landing pad with the first // clone. LPad->replaceAllUsesWith(Clone1); LPad->eraseFromParent(); } }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap<const Value*, Value*> &ValueMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(ValueMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses ValueMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); LLVMContext &Context = OldFunc->getContext(); unsigned DbgKind = Context.getMetadata().getMDKind("dbg"); MDNode *TheCallMD = NULL; SmallVector<Value *, 4> MDVs; if (TheCall && TheCall->hasMetadata()) TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); Context.getMetadata().addMD(DbgKind, NewMD, I); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. Context.getMetadata().removeMD(DbgKind, I); } } PHIToResolve.push_back(cast<PHINode>(OldI)); } } // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); Context.getMetadata().addMD(DbgKind, NewMD, I); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. Context.getMetadata().removeMD(DbgKind, I); } } RemapInstruction(I, ValueMap); } } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(ValueMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(ValueMap[OldI] == PN && "ValueMap mismatch"); ValueMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // ValueMap. Single entry phi nodes can have multiple ValueMap entries // pointing at them. Thus, deleting one would require scanning the ValueMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the ValueMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
/// Renames all variables in the specified BasicBlock. /// Only variables that need to be rename will be. /// void SSI::rename(BasicBlock *BB) { SmallPtrSet<Instruction*, 8> defined; // Iterate through instructions and make appropriate renaming. // For SSI_PHI (b = PHI()), store b at value_stack as a new // definition of the variable it represents. // For SSI_SIG (b = PHI(a)), substitute a with the current // value of a, present in the value_stack. // Then store bin the value_stack as the new definition of a. // For all other instructions (b = OP(a, c, d, ...)), we need to substitute // all operands with its current value, present in value_stack. for (BasicBlock::iterator begin = BB->begin(), end = BB->end(); begin != end; ++begin) { Instruction *I = begin; if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions Instruction* position; // Treat SSI_PHI if ((position = getPositionPhi(PN))) { value_stack[position].push_back(PN); defined.insert(position); // Treat SSI_SIG } else if ((position = getPositionSigma(PN))) { substituteUse(I); value_stack[position].push_back(PN); defined.insert(position); } // Treat all other PHI functions else { substituteUse(I); } } // Treat all other functions else { substituteUse(I); } } // This loop iterates in all BasicBlocks that are successors of the current // BasicBlock. For each SSI_PHI instruction found, insert an operand. // This operand is the current operand in value_stack for the variable // in "position". And the BasicBlock this operand represents is the current // BasicBlock. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *BB_succ = *SI; for (BasicBlock::iterator begin = BB_succ->begin(), notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { Instruction *I = begin; PHINode *PN = dyn_cast<PHINode>(I); Instruction* position; if (PN && ((position = getPositionPhi(PN)))) { PN->addIncoming(value_stack[position].back(), BB); } } } // This loop calls rename on all children from this block. This time children // refers to a successor block in the dominance tree. DomTreeNode *DTN = DT_->getNode(BB); for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end(); begin != end; ++begin) { DomTreeNodeBase<BasicBlock> *DTN_children = *begin; BasicBlock *BB_children = DTN_children->getBlock(); rename(BB_children); } // Now we remove all inserted definitions of a variable from the top of // the stack leaving the previous one as the top. for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), DE = defined.end(); DI != DE; ++DI) value_stack[*DI].pop_back(); }
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool UsesNewEH = false; SmallVector<ResumeInst*, 16> Resumes; for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) Resumes.push_back(RI); else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) UsesNewEH = II->getUnwindDest()->isLandingPad(); } if (Resumes.empty()) return UsesNewEH; // Find the rewind function if we didn't already. if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); } // Create the basic block where the _Unwind_Resume call will live. LLVMContext &Ctx = Fn.getContext(); unsigned ResumesSize = Resumes.size(); if (ResumesSize == 1) { // Instead of creating a new BB and PHI node, just append the call to // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); BasicBlock *UnwindBB = RI->getParent(); Value *ExnObj = GetExceptionObject(RI); // Call the _Unwind_Resume function. CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); return true; } BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. for (SmallVectorImpl<ResumeInst*>::iterator I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { ResumeInst *RI = *I; BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); Value *ExnObj = GetExceptionObject(RI); PN->addIncoming(ExnObj, Parent); ++NumResumesLowered; } // Call the function. CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); return true; }
/// When there is a phi node that is created in a BasicBlock and it is used /// as an operand of another phi function used in the same BasicBlock, /// LLVM looks this as an error. So on the second phi, the first phi is called /// P and the BasicBlock it incomes is B. This P will be replaced by the value /// it has for BasicBlock B. It also includes undef values for predecessors /// that were not included in the phi. /// void SSI::fixPhis() { for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(), end = phisToFix.end(); begin != end; ++begin) { PHINode *PN = *begin; for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i)); if (PN_father && PN->getParent() == PN_father->getParent() && !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) { BasicBlock *BB = PN->getIncomingBlock(i); int pos = PN_father->getBasicBlockIndex(BB); PN->setIncomingValue(i, PN_father->getIncomingValue(pos)); } } } for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(), end = phis.end(); begin != end; ++begin) { PHINode *PN = begin->first; BasicBlock *BB = PN->getParent(); pred_iterator PI = pred_begin(BB), PE = pred_end(BB); SmallVector<BasicBlock*, 8> Preds(PI, PE); for (unsigned size = Preds.size(); PI != PE && PN->getNumIncomingValues() != size; ++PI) { bool found = false; for (unsigned i = 0, pn_end = PN->getNumIncomingValues(); i < pn_end; ++i) { if (PN->getIncomingBlock(i) == *PI) { found = true; break; } } if (!found) { PN->addIncoming(UndefValue::get(PN->getType()), *PI); } } } }
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { BasicBlock &BB = *BBI++; // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); if (Ret == 0) continue; // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. if (Ret != &BB.front()) { // Check for something else in the block. BasicBlock::iterator I = Ret; --I; // Skip over debug info. while (isa<DbgInfoIntrinsic>(I) && I != BB.begin()) --I; if (!isa<DbgInfoIntrinsic>(I) && (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 || Ret->getOperand(0) != I)) continue; } // If this is the first returning block, remember it and keep going. if (RetBlock == 0) { RetBlock = &BB; continue; } // Otherwise, we found a duplicate return block. Merge the two. Changed = true; // Case when there is no input to the return or when the returned values // agree is trivial. Note that they can't agree if there are phis in the // blocks. if (Ret->getNumOperands() == 0 || Ret->getOperand(0) == cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { BB.replaceAllUsesWith(RetBlock); BB.eraseFromParent(); continue; } // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), std::distance(PB, PE), "merge", &RetBlock->front()); for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } // Turn BB into a block that just unconditionally branches to the return // block. This handles the case when the two return blocks have a common // predecessor but that return different things. RetBlockPHI->addIncoming(Ret->getOperand(0), &BB); BB.getTerminator()->eraseFromParent(); BranchInst::Create(RetBlock, &BB); } return Changed; }
PHI_iterator(PHINode *P, bool) // end iterator : PHI(P), idx(PHI->getNumIncomingValues()) {}
// This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); PHINode *InnerInductionVar; SmallVector<PHINode *, 8> Inductions; SmallVector<PHINode *, 8> Reductions; if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) return true; // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { DEBUG(dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n"); return true; } if (Reductions.size() > 0) InnerLoopHasReduction = true; InnerInductionVar = Inductions.pop_back_val(); Reductions.clear(); if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) return true; // Outer loop cannot have reduction because then loops will not be tightly // nested. if (!Reductions.empty()) return true; // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) return true; // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); return true; } // TODO: We only handle LCSSA PHI's corresponding to reduction for now. BasicBlock *LoopExitBlock = getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) return true; LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) return true; // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have // more than 1 user of induction.next since it would result in broken code // after split. // e.g. // for(i=0;i<N;i++) { // for(j = 0;j<M;j++) { // A[j+1][i+2] = A[j][i]+k; // } // } Instruction *InnerIndexVarInc = nullptr; if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); else InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); if (!InnerIndexVarInc) return true; // Since we split the inner loop latch on this induction variable. Make sure // we do not have any instruction between the induction variable and branch // instruction. bool FoundInduction = false; for (const Instruction &I : reverse(*InnerLoopLatch)) { if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I)) continue; // We found an instruction. If this is not induction variable then it is not // safe to split this loop latch. if (!I.isIdenticalTo(InnerIndexVarInc)) return true; FoundInduction = true; break; } // The loop latch ended and we didn't find the induction variable return as // current limitation. if (!FoundInduction) return true; return false; }
Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { promoteSingleBlockAlloca(AI, Info, LBI, AST); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; } // If we haven't computed dominator tree levels, do so now. if (DomLevels.empty()) { SmallVector<DomTreeNode *, 32> Worklist; DomTreeNode *Root = DT.getRootNode(); DomLevels[Root] = 0; Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); unsigned ChildLevel = DomLevels[Node] + 1; for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { DomLevels[*CI] = ChildLevel; Worklist.push_back(*CI); } } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non-deterministic order, thus creating // non-deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
void GNUstep::IMPCacher::SpeculativelyInline(Instruction *call, Function *function) { BasicBlock *beforeCallBB = call->getParent(); BasicBlock *callBB = SplitBlock(beforeCallBB, call, Owner); BasicBlock *inlineBB = BasicBlock::Create(Context, "inline", callBB->getParent()); BasicBlock::iterator iter = call; iter++; BasicBlock *afterCallBB = SplitBlock(iter->getParent(), iter, Owner); removeTerminator(beforeCallBB); // Put a branch before the call, testing whether the callee really is the // function IRBuilder<> B = IRBuilder<>(beforeCallBB); Value *callee = isa<CallInst>(call) ? cast<CallInst>(call)->getCalledValue() : cast<InvokeInst>(call)->getCalledValue(); const FunctionType *FTy = function->getFunctionType(); const FunctionType *calleeTy = cast<FunctionType>( cast<PointerType>(callee->getType())->getElementType()); if (calleeTy != FTy) { callee = B.CreateBitCast(callee, function->getType()); } Value *isInlineValid = B.CreateICmpEQ(callee, function); B.CreateCondBr(isInlineValid, inlineBB, callBB); // In the inline BB, add a copy of the call, but this time calling the real // version. Instruction *inlineCall = call->clone(); Value *inlineResult= inlineCall; inlineBB->getInstList().push_back(inlineCall); B.SetInsertPoint(inlineBB); if (calleeTy != FTy) { for (unsigned i=0 ; i<FTy->getNumParams() ; i++) { LLVMType *callType = calleeTy->getParamType(i); LLVMType *argType = FTy->getParamType(i); if (callType != argType) { inlineCall->setOperand(i, new BitCastInst(inlineCall->getOperand(i), argType, "", inlineCall)); } } if (FTy->getReturnType() != calleeTy->getReturnType()) { if (FTy->getReturnType() == Type::getVoidTy(Context)) { inlineResult = Constant::getNullValue(calleeTy->getReturnType()); } else { inlineResult = new BitCastInst(inlineCall, calleeTy->getReturnType(), "", inlineBB); } } } B.CreateBr(afterCallBB); // Unify the return values if (call->getType() != Type::getVoidTy(Context)) { PHINode *phi = CreatePHI(call->getType(), 2, "", afterCallBB->begin()); call->replaceAllUsesWith(phi); phi->addIncoming(call, callBB); phi->addIncoming(inlineResult, inlineBB); } // Really do the real inlining InlineFunctionInfo IFI(0, 0); if (CallInst *c = dyn_cast<CallInst>(inlineCall)) { c->setCalledFunction(function); InlineFunction(c, IFI); } else if (InvokeInst *c = dyn_cast<InvokeInst>(inlineCall)) { c->setCalledFunction(function); InlineFunction(c, IFI); } }
/// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source /// operands, i.e., it was just added. static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { PHINode *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumIncomingValues() == 0) return PHI; return nullptr; }
// Output for-loop as: // ... // start = startexpr // goto loop // loop: // variable = phi [start, loopheader], [nextvariable, loopend] // ... // bodyexpr // ... // loopend: // step = stepexpr // nextvariable = variable + step // endcond = endexpr // br endcond, loop, endloop // outloop: Value *ForExprAST::codegen() { // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->codegen(); if (!StartVal) return nullptr; // Make the new basic block for the loop header, inserting after current // block. Function *TheFunction = Builder.GetInsertBlock()->getParent(); BasicBlock *PreheaderBB = Builder.GetInsertBlock(); BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); // Insert an explicit fall through from the current block to the LoopBB. Builder.CreateBr(LoopBB); // Start insertion in LoopBB. Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. Value *OldVal = NamedValues[VarName]; NamedValues[VarName] = Variable; // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. if (!Body->codegen()) return nullptr; // Emit the step value. Value *StepVal = nullptr; if (Step) { StepVal = Step->codegen(); if (!StepVal) return nullptr; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); // Compute the end condition. Value *EndCond = End->codegen(); if (!EndCond) return nullptr; // Convert condition to a bool by comparing equal to 0.0. EndCond = Builder.CreateFCmpONE( EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); // Create the "after loop" block and insert it. BasicBlock *LoopEndBB = Builder.GetInsertBlock(); BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); // Insert the conditional branch into the end of LoopEndBB. Builder.CreateCondBr(EndCond, LoopBB, AfterBB); // Any new code will be inserted in AfterBB. Builder.SetInsertPoint(AfterBB); // Add a new entry to the PHI node for the backedge. Variable->addIncoming(NextVar, LoopEndBB); // Restore the unshadowed variable. if (OldVal) NamedValues[VarName] = OldVal; else NamedValues.erase(VarName); // for expr always returns 0.0. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); return NewLoop; } else return nullptr; }