static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) { // check if the comparison is always false ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or); if (C) { ++ChecksSkipped; // If non-zero, nothing to do. if (!C->getZExtValue()) return; } ++ChecksAdded; BasicBlock::iterator SplitI = IRB.GetInsertPoint(); BasicBlock *OldBB = SplitI->getParent(); BasicBlock *Cont = OldBB->splitBasicBlock(SplitI); OldBB->getTerminator()->eraseFromParent(); if (C) { // If we have a constant zero, unconditionally branch. // FIXME: We should really handle this differently to bypass the splitting // the block. BranchInst::Create(GetTrapBB(IRB), OldBB); return; } // Create the conditional branch. BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB); }
void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) { if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL) return; // Deciding where to emit the addrspacecast pair. BasicBlock::iterator InsertPt; if (Argument *Arg = dyn_cast<Argument>(Ptr)) { // Insert at the functon entry if Ptr is an argument. InsertPt = Arg->getParent()->getEntryBlock().begin(); } else { // Insert right after Ptr if Ptr is an instruction. InsertPt = ++cast<Instruction>(Ptr)->getIterator(); assert(InsertPt != InsertPt->getParent()->end() && "We don't call this function with Ptr being a terminator."); } Instruction *PtrInGlobal = new AddrSpaceCastInst( Ptr, PointerType::get(Ptr->getType()->getPointerElementType(), ADDRESS_SPACE_GLOBAL), Ptr->getName(), &*InsertPt); Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(), Ptr->getName(), &*InsertPt); // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal. Ptr->replaceAllUsesWith(PtrInGeneric); PtrInGlobal->setOperand(0, Ptr); }
void ModuloSchedulerDriverPass::duplicateValuesWithMultipleUses(BasicBlock* bb, Instruction* ind) { // While we keep duplicating nodes (and create more possible work), keep going bool keep_going = false; do { keep_going = false; // For each instruction in this BB for (BasicBlock::iterator it = bb->begin(); it!= bb->end(); ++it) { // if it is not the induction variable and it has more than one use if ((!dyn_cast<PHINode>(it)) && // Do not clone PHINodes (ind != it) && // Do not clone induction pointer // Only clone when you have more than one #uses (instructionPriority::getLocalUses(it,bb) >1)) { Instruction* cloned = it->clone(); // duplicate it it->getParent()->getInstList().insert(it, cloned); //Can also do: cloned->insertBefore(it); // on newer LLVMS cloned->setName("cloned"); instructionPriority::replaceFirstUseOfWith(it, cloned); // we may have created potential candidates for duplication. // you have to keep going keep_going = true; } } // foe rach inst } while (keep_going); }
/// runOnFunction - Top level algorithm. /// bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); bool Changed = false; std::vector<Instruction *> HalfPowrs; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // Look for calls. bool IsHalfPowr = false; if (CallInst *CI = dyn_cast<CallInst>(I)) { // Look for direct calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); if (Callee && Callee->hasExternalLinkage()) { // Look for calls with well-known names. if (Callee->getName() == "__half_powrf4") IsHalfPowr = true; } } if (IsHalfPowr) HalfPowrs.push_back(I); // We're looking for sequences of up to three such calls, which we'll // simplify as a group. if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) { I = InlineHalfPowrs(HalfPowrs, I); E = I->getParent()->end(); HalfPowrs.clear(); Changed = true; } } assert(HalfPowrs.empty() && "Block had no terminator!"); } return Changed; }
// dceInstruction - Inspect the instruction at *BBI and figure out if it's // [trivially] dead. If so, remove the instruction and update the iterator // to point to the instruction that immediately succeeded the original // instruction. // bool llvm::dceInstruction(BasicBlock::iterator &BBI) { // Look for un"used" definitions... if (isInstructionTriviallyDead(BBI)) { BBI = BBI->getParent()->getInstList().erase(BBI); // Bye bye return true; } return false; }
bool LowerIntrinsics::IsRootLiveAt(Value *Root, BasicBlock::iterator II, GCRootMapType &GCRoots) { if (!isa<Argument>(Root) && !isa<Instruction>(Root)) return false; std::vector<Value *> &Ptrs = GCRoots[Root]; for (std::vector<Value *>::iterator RI = Ptrs.begin(), RE = Ptrs.end(); RI != RE; ++RI) { // Quick bail-out for calls that aren't even in the dominance subtree // of the root. if (isa<Instruction>(*RI) && !DT->dominates(cast<Instruction>(*RI), &*II)) continue; // We now need to determine whether the root is live. Since our // liveness works on basic blocks, we need a little special handling // here. // // First, we check to see whether there's a use after the call site. // If there is, the root is clearly live. SmallSet<Instruction *, 16> ImmediateSuccessors; BasicBlock::iterator SI = II, SE = II->getParent()->end(); for (++SI; SI != SE; ++SI) ImmediateSuccessors.insert(&*SI); bool IsUsedAfterCall = false; for (Value::use_iterator UI = (*RI)->use_begin(), UE = (*RI)->use_end(); UI != UE; ++UI) { if (isa<Instruction>(*UI) && ImmediateSuccessors.count(cast<Instruction>(*UI))) { IsUsedAfterCall = true; break; } } // If the root isn't used in this basic block, then we check to see // whether it's live-out of this block. If not, it's dead and we skip // it. if (!IsUsedAfterCall && !LV->isLiveOut(**RI, *II->getParent())) continue; return true; } return false; }
bool RegToMem::runOnFunction(Function &F) { if (F.isDeclaration()) return false; // Insert all new allocas into entry block. BasicBlock *BBEntry = &F.getEntryBlock(); assert(pred_begin(BBEntry) == pred_end(BBEntry) && "Entry block to function must not have predecessors!"); // Find first non-alloca instruction and create insertion point. This is // safe if block is well-formed: it always have terminator, otherwise // we'll get and assertion. BasicBlock::iterator I = BBEntry->begin(); while (isa<AllocaInst>(I)) ++I; CastInst *AllocaInsertionPoint = new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), Type::getInt32Ty(F.getContext()), "reg2mem alloca point", I); // Find the escaped instructions. But don't create stack slots for // allocas in entry block. std::list<Instruction*> WorkList; for (Function::iterator ibb = F.begin(), ibe = F.end(); ibb != ibe; ++ibb) for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); iib != iie; ++iib) { if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) && valueEscapes(iib)) { WorkList.push_front(&*iib); } } // Demote escaped instructions NumRegsDemoted += WorkList.size(); for (std::list<Instruction*>::iterator ilb = WorkList.begin(), ile = WorkList.end(); ilb != ile; ++ilb) DemoteRegToStack(**ilb, false, AllocaInsertionPoint); WorkList.clear(); // Find all phi's for (Function::iterator ibb = F.begin(), ibe = F.end(); ibb != ibe; ++ibb) for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); iib != iie; ++iib) if (isa<PHINode>(iib)) WorkList.push_front(&*iib); // Demote phi nodes NumPhisDemoted += WorkList.size(); for (std::list<Instruction*>::iterator ilb = WorkList.begin(), ile = WorkList.end(); ilb != ile; ++ilb) DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); return true; }
/// doConstantPropagation - If an instruction references constants, try to fold /// them together... /// bool llvm::doConstantPropagation(BasicBlock::iterator &II) { if (Constant *C = ConstantFoldInstruction(II)) { // Replaces all of the uses of a variable with uses of the constant. II->replaceAllUsesWith(C); // Remove the instruction from the basic block... II = II->getParent()->getInstList().erase(II); return true; } return false; }
void fixStack(Function *f) { // Try to remove phi node and demote reg to stack std::vector<PHINode *> tmpPhi; std::vector<Instruction *> tmpReg; BasicBlock *bbEntry = f->begin(); do { tmpPhi.clear(); tmpReg.clear(); for (Function::iterator i = f->begin(); i != f->end(); ++i) { for (BasicBlock::iterator j = i->begin(); j != i->end(); ++j) { if (isa<PHINode>(j)) { PHINode *phi = cast<PHINode>(j); tmpPhi.push_back(phi); continue; } if (!(isa<AllocaInst>(j) && j->getParent() == bbEntry) && (valueEscapes(j) || j->isUsedOutsideOfBlock(i))) { tmpReg.push_back(j); continue; } } } for (unsigned int i = 0; i != tmpReg.size(); ++i) { DemoteRegToStack(*tmpReg.at(i), f->begin()->getTerminator()); } for (unsigned int i = 0; i != tmpPhi.size(); ++i) { DemotePHIToStack(tmpPhi.at(i), f->begin()->getTerminator()); } } while (tmpReg.size() != 0 || tmpPhi.size() != 0); }
bool LoopBarriers::ProcessLoop(Loop *L, LPPassManager &LPM) { bool isBLoop = false; bool changed = false; for (Loop::block_iterator i = L->block_begin(), e = L->block_end(); i != e && !isBLoop; ++i) { for (BasicBlock::iterator j = (*i)->begin(), e = (*i)->end(); j != e; ++j) { if (isa<BarrierInst>(j)) { isBLoop = true; break; } } } LLVMContext &LC = getGlobalContext(); IntegerType * IntTy = IntegerType::get(LC, 32); Value *Args = ConstantInt::get(IntTy, 0); for (Loop::block_iterator i = L->block_begin(), e = L->block_end(); i != e && isBLoop; ++i) { for (BasicBlock::iterator j = (*i)->begin(), e = (*i)->end(); j != e; ++j) { if (isa<BarrierInst>(j)) { BasicBlock *preheader = L->getLoopPreheader(); assert((preheader != NULL) && "Non-canonicalized loop found!\n"); Instruction *PhdrBarrierInst = BarrierInst::createBarrier(Args, preheader->getTerminator()); MDNode* PhdrAuxBarrierInfo = MDNode::get(LC, MDString::get(LC, "auxiliary phdr barrier")); PhdrBarrierInst->setMetadata("aux.phdr.barrier", PhdrAuxBarrierInfo); preheader->setName(preheader->getName() + ".loopbarrier"); BasicBlock *header = L->getHeader(); if (header->getFirstNonPHI() != &header->front()) { Instruction *HdrBarrierInst = BarrierInst::createBarrier(Args, header->getFirstNonPHI()); MDNode* HdrAuxBarrierInfo = MDNode::get(LC, MDString::get(LC, "auxiliary phihdr barrier")); HdrBarrierInst->setMetadata("aux.phihdr.barrier", HdrAuxBarrierInfo); header->setName(header->getName() + ".phibarrier"); } /* SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); */ BasicBlock *brexit = L->getExitingBlock(); if (brexit != NULL) { Instruction *ExitingBarrierInst = BarrierInst::createBarrier(Args, brexit->getTerminator()); MDNode* ExitingAuxBarrierInfo = MDNode::get(LC, MDString::get(LC, "auxiliary exiting barrier")); ExitingBarrierInst->setMetadata("aux.exiting.barrier", ExitingAuxBarrierInfo); brexit->setName(brexit->getName() + ".brexitbarrier"); } BasicBlock *latch = L->getLoopLatch(); if (latch != NULL && brexit != latch) { Instruction *LatchBarrierInst = BarrierInst::createBarrier(Args, latch->getTerminator()); MDNode* LatchAuxBarrierInfo = MDNode::get(LC, MDString::get(LC, "auxiliary latch barrier")); LatchBarrierInst->setMetadata("aux.latch.barrier", LatchAuxBarrierInfo); latch->setName(latch->getName() + ".latchbarrier"); return changed; } BasicBlock *Header = L->getHeader(); typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits; InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(Header); InvBlockTraits::ChildIteratorType PE = InvBlockTraits::child_end(Header); BasicBlock *Latch = NULL; for (; PI != PE; ++PI) { InvBlockTraits::NodeType *N = *PI; if (L->contains(N)) { Latch = N; if (DT->dominates(j->getParent(), Latch)) { BarrierInst::createBarrier(Args, Latch->getTerminator()); Latch->setName(Latch->getName() + ".latchbarrier"); } } } return true; } } } BasicBlock *preheader = L->getLoopPreheader(); assert((preheader != NULL) && "Non-canonicalized loop found!\n"); TerminatorInst *t = preheader->getTerminator(); Instruction *prev = NULL; if (&preheader->front() != t) { // If t is not the first/only instruction in the block, get the previous // instruction. prev = t->getPrevNode(); } if (prev && isa<BarrierInst>(prev)) { BasicBlock *new_b = SplitBlock(preheader, t, this); new_b->setName(preheader->getName() + ".postbarrier_dummy"); return true; } return changed; }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was succesful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { assert(L->isLCSSAForm()); BasicBlock *Header = L->getHeader(); BasicBlock *LatchBlock = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; return false; } // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); // Find trip multiple if count is not available unsigned TripMultiple = 1; if (TripCount == 0) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) DOUT << " Trip Count = " << TripCount << "\n"; if (TripMultiple != 1) DOUT << " Trip Multiple = " << TripMultiple << "\n"; // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } if (CompletelyUnroll) { DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { DEBUG(errs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DOUT << " with a breakout at trip " << BreakoutTrip; } else if (TripMultiple != 1) { DOUT << " with " << TripMultiple << " trips per branch"; } DOUT << "!\n"; } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. typedef DenseMap<const Value*, Value*> ValueMapTy; ValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); OrigPHINode.push_back(PN); if (Instruction *I = dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) if (L->contains(I->getParent())) LastValueMap[I] = I; } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); for (unsigned It = 1; It != Count; ++It) { char SuffixBuffer[100]; sprintf(SuffixBuffer, ".%d", It); std::vector<BasicBlock*> NewBlocks; for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { ValueMapTy ValueMap; BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI->getParent())) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks except // the successor of the latch block. The successor of the exit block will // be updated specially after unrolling all the way. if (*BB != LatchBlock) for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); UI != UE;) { Instruction *UseInst = cast<Instruction>(*UI); ++UI; if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) { PHINode *phi = cast<PHINode>(UseInst); Value *Incoming = phi->getIncomingValueForBlock(*BB); phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) { Latches.push_back(New); // Also, clear out the new latch's back edge so that it doesn't look // like a new loop, so that it's amenable to being merged with adjacent // blocks later on. TerminatorInst *Term = New->getTerminator(); assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); Term->setSuccessor(!ContinueOnTrue, NULL); } NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, LastValueMap); } // The latch block exits the loop. If there are any PHI nodes in the // successor blocks, update them to use the appropriate values computed as the // last iteration of the loop. if (Count != 1) { SmallPtrSet<PHINode*, 8> Users; for (Value::use_iterator UI = LatchBlock->use_begin(), UE = LatchBlock->use_end(); UI != UE; ++UI) if (PHINode *phi = dyn_cast<PHINode>(*UI)) Users.insert(phi); BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); SI != SE; ++SI) { PHINode *PN = *SI; Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI->getParent())) InVal = LastValueMap[InVal]; } PN->addIncoming(InVal, LastIterationBB); } } // Now, if we're doing complete unrolling, loop over the PHI nodes in the // original block, setting them to their incoming values. if (CompletelyUnroll) { BasicBlock *Preheader = L->getLoopPreheader(); for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { Term->setUnconditionalDest(Dest); // Merge adjacent basic blocks, if possible. if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { std::replace(Latches.begin(), Latches.end(), Dest, Fold); std::replace(Headers.begin(), Headers.end(), Dest, Fold); } } } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Constant *C = ConstantFoldInstruction(Inst, Header->getContext())) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != NULL) LPM->deleteLoopFromQueue(L); // If we didn't completely unroll the loop, it should still be in LCSSA form. if (!CompletelyUnroll) assert(L->isLCSSAForm()); return true; }
/// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGCLowering::runOnFunction(Function &F) { // Quick exit for functions that do not use the shadow stack GC. if (!F.hasGC() || F.getGC() != std::string("shadow-stack")) return false; LLVMContext &Context = F.getContext(); // Find calls to llvm.gcroot. CollectRoots(F); // If there are no roots in this function, then there is no need to add a // stack map entry for it. if (Roots.empty()) return false; // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); // Build the shadow stack entry at the very start of the function. BasicBlock::iterator IP = F.getEntryBlock().begin(); IRBuilder<> AtEntry(IP->getParent(), IP); Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame"); while (isa<AllocaInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, 1, "gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; SlotPtr->takeName(OriginalAlloca); OriginalAlloca->replaceAllUsesWith(SlotPtr); } // Move past the original stores inserted by GCStrategy::InitRoots. This isn't // really necessary (the collector would never see the intermediate state at // runtime), but it's nicer not to push the half-initialized entry onto the // shadow stack. while (isa<StoreInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, 0, "gc_frame.next"); Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, "gc_newhead"); AtEntry.CreateStore(CurrentHead, EntryNextPtr); AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup"); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } // Delete the original allocas (which are no longer used) and the intrinsic // calls (which are no longer valid). Doing this last avoids invalidating // iterators. for (unsigned I = 0, E = Roots.size(); I != E; ++I) { Roots[I].first->eraseFromParent(); Roots[I].second->eraseFromParent(); } Roots.clear(); return true; }
void GNUstep::IMPCacher::CacheLookup(Instruction *lookup, Value *slot, Value *version, bool isSuperMessage) { // If this IMP is already cached, don't cache it again. if (lookup->getMetadata(IMPCacheFlagKind)) { return; } lookup->setMetadata(IMPCacheFlagKind, AlreadyCachedFlag); bool isInvoke = false; BasicBlock *beforeLookupBB = lookup->getParent(); BasicBlock *lookupBB = SplitBlock(beforeLookupBB, lookup, Owner); BasicBlock *lookupFinishedBB = lookupBB; BasicBlock *afterLookupBB; if (InvokeInst *inv = dyn_cast<InvokeInst>(lookup)) { afterLookupBB = inv->getNormalDest(); lookupFinishedBB = BasicBlock::Create(Context, "done_lookup", lookupBB->getParent()); CGBuilder B(lookupFinishedBB); B.CreateBr(afterLookupBB); inv->setNormalDest(lookupFinishedBB); isInvoke = true; } else { BasicBlock::iterator iter = lookup; iter++; afterLookupBB = SplitBlock(iter->getParent(), iter, Owner); } removeTerminator(beforeLookupBB); CGBuilder B = CGBuilder(beforeLookupBB); // Load the slot and check that neither it nor the version is 0. Value *versionValue = B.CreateLoad(version); Value *receiverPtr = lookup->getOperand(0); Value *receiver = receiverPtr; if (!isSuperMessage) { receiver = B.CreateLoad(receiverPtr); } // For small objects, we skip the cache entirely. // FIXME: Class messages are never to small objects... bool is64Bit = llvm::Module::Pointer64 == B.GetInsertBlock()->getParent()->getParent()->getPointerSize(); LLVMType *intPtrTy = is64Bit ? Type::getInt64Ty(Context) : Type::getInt32Ty(Context); // Receiver as an integer Value *receiverSmallObject = B.CreatePtrToInt(receiver, intPtrTy); // Receiver is a small object... receiverSmallObject = B.CreateAnd(receiverSmallObject, is64Bit ? 7 : 1); // Receiver is not a small object. receiverSmallObject = B.CreateICmpNE(receiverSmallObject, Constant::getNullValue(intPtrTy)); // Ideally, we'd call objc_msgSend() here, but for now just skip the cache // lookup Value *isCacheEmpty = B.CreateICmpEQ(versionValue, Constant::getNullValue(IntTy)); Value *receiverNil = B.CreateICmpEQ(receiver, Constant::getNullValue(receiver->getType())); isCacheEmpty = B.CreateOr(isCacheEmpty, receiverNil); isCacheEmpty = B.CreateOr(isCacheEmpty, receiverSmallObject); BasicBlock *cacheLookupBB = BasicBlock::Create(Context, "cache_check", lookupBB->getParent()); B.CreateCondBr(isCacheEmpty, lookupBB, cacheLookupBB); // Check the cache node is current B.SetInsertPoint(cacheLookupBB); Value *slotValue = B.CreateLoad(slot, "slot_value"); Value *slotVersion = B.CreateStructGEP(slotValue, 3); // Note: Volatile load because the slot version might have changed in // another thread. slotVersion = B.CreateLoad(slotVersion, true, "slot_version"); Value *slotCachedFor = B.CreateStructGEP(slotValue, 1); slotCachedFor = B.CreateLoad(slotCachedFor, true, "slot_owner"); Value *cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy)); Value *isVersionCorrect = B.CreateICmpEQ(slotVersion, versionValue); Value *isOwnerCorrect = B.CreateICmpEQ(slotCachedFor, cls); Value *isSlotValid = B.CreateAnd(isVersionCorrect, isOwnerCorrect); // If this slot is still valid, skip the lookup. B.CreateCondBr(isSlotValid, afterLookupBB, lookupBB); // Perform the real lookup and cache the result removeTerminator(lookupFinishedBB); // Replace the looked up slot with the loaded one B.SetInsertPoint(afterLookupBB, afterLookupBB->begin()); PHINode *newLookup = IRBuilderCreatePHI(&B, lookup->getType(), 3, "new_lookup"); // Not volatile, so a redundant load elimination pass can do some phi // magic with this later. lookup->replaceAllUsesWith(newLookup); B.SetInsertPoint(lookupFinishedBB); Value * newReceiver = receiver; if (!isSuperMessage) { newReceiver = B.CreateLoad(receiverPtr); } BasicBlock *storeCacheBB = BasicBlock::Create(Context, "cache_store", lookupBB->getParent()); // Don't store the cached lookup if we are doing forwarding tricks. // Also skip caching small object messages for now Value *skipCacheWrite = B.CreateOr(B.CreateICmpNE(receiver, newReceiver), receiverSmallObject); skipCacheWrite = B.CreateOr(skipCacheWrite, receiverNil); B.CreateCondBr(skipCacheWrite, afterLookupBB, storeCacheBB); B.SetInsertPoint(storeCacheBB); // Store it even if the version is 0, because we always check that the // version is not 0 at the start and an occasional redundant store is // probably better than a branch every time. B.CreateStore(lookup, slot); B.CreateStore(B.CreateLoad(B.CreateStructGEP(lookup, 3)), version); cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy)); B.CreateStore(cls, B.CreateStructGEP(lookup, 1)); B.CreateBr(afterLookupBB); newLookup->addIncoming(lookup, lookupFinishedBB); newLookup->addIncoming(slotValue, cacheLookupBB); newLookup->addIncoming(lookup, storeCacheBB); }
void GNUstep::IMPCacher::SpeculativelyInline(Instruction *call, Function *function) { BasicBlock *beforeCallBB = call->getParent(); BasicBlock *callBB = SplitBlock(beforeCallBB, call, Owner); BasicBlock *inlineBB = BasicBlock::Create(Context, "inline", callBB->getParent()); BasicBlock::iterator iter = call; iter++; BasicBlock *afterCallBB = SplitBlock(iter->getParent(), iter, Owner); removeTerminator(beforeCallBB); // Put a branch before the call, testing whether the callee really is the // function IRBuilder<> B = IRBuilder<>(beforeCallBB); Value *callee = isa<CallInst>(call) ? cast<CallInst>(call)->getCalledValue() : cast<InvokeInst>(call)->getCalledValue(); const FunctionType *FTy = function->getFunctionType(); const FunctionType *calleeTy = cast<FunctionType>( cast<PointerType>(callee->getType())->getElementType()); if (calleeTy != FTy) { callee = B.CreateBitCast(callee, function->getType()); } Value *isInlineValid = B.CreateICmpEQ(callee, function); B.CreateCondBr(isInlineValid, inlineBB, callBB); // In the inline BB, add a copy of the call, but this time calling the real // version. Instruction *inlineCall = call->clone(); Value *inlineResult= inlineCall; inlineBB->getInstList().push_back(inlineCall); B.SetInsertPoint(inlineBB); if (calleeTy != FTy) { for (unsigned i=0 ; i<FTy->getNumParams() ; i++) { LLVMType *callType = calleeTy->getParamType(i); LLVMType *argType = FTy->getParamType(i); if (callType != argType) { inlineCall->setOperand(i, new BitCastInst(inlineCall->getOperand(i), argType, "", inlineCall)); } } if (FTy->getReturnType() != calleeTy->getReturnType()) { if (FTy->getReturnType() == Type::getVoidTy(Context)) { inlineResult = Constant::getNullValue(calleeTy->getReturnType()); } else { inlineResult = new BitCastInst(inlineCall, calleeTy->getReturnType(), "", inlineBB); } } } B.CreateBr(afterCallBB); // Unify the return values if (call->getType() != Type::getVoidTy(Context)) { PHINode *phi = CreatePHI(call->getType(), 2, "", afterCallBB->begin()); call->replaceAllUsesWith(phi); phi->addIncoming(call, callBB); phi->addIncoming(inlineResult, inlineBB); } // Really do the real inlining InlineFunctionInfo IFI(0, 0); if (CallInst *c = dyn_cast<CallInst>(inlineCall)) { c->setCalledFunction(function); InlineFunction(c, IFI); } else if (InvokeInst *c = dyn_cast<InvokeInst>(inlineCall)) { c->setCalledFunction(function); InlineFunction(c, IFI); } }
bool LoopUnroll::visitLoop(Loop *L) { bool Changed = false; // Recurse through all subloops before we process this loop. Copy the loop // list so that the child can update the loop tree if it needs to delete the // loop. std::vector<Loop*> SubLoops(L->begin(), L->end()); for (unsigned i = 0, e = SubLoops.size(); i != e; ++i) Changed |= visitLoop(SubLoops[i]); // We only handle single basic block loops right now. if (L->getBlocks().size() != 1) return Changed; BasicBlock *BB = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); if (BI == 0) return Changed; // Must end in a conditional branch ConstantInt *TripCountC = dyn_cast_or_null<ConstantInt>(L->getTripCount()); if (!TripCountC) return Changed; // Must have constant trip count! unsigned TripCount = TripCountC->getRawValue(); if (TripCount != TripCountC->getRawValue() || TripCount == 0) return Changed; // More than 2^32 iterations??? unsigned LoopSize = ApproximateLoopSize(L); DEBUG(std::cerr << "Loop Unroll: F[" << BB->getParent()->getName() << "] Loop %" << BB->getName() << " Loop Size = " << LoopSize << " Trip Count = " << TripCount << " - "); uint64_t Size = (uint64_t)LoopSize*(uint64_t)TripCount; if (Size > UnrollThreshold) { DEBUG(std::cerr << "TOO LARGE: " << Size << ">" << UnrollThreshold << "\n"); return Changed; } DEBUG(std::cerr << "UNROLLING!\n"); BasicBlock *LoopExit = BI->getSuccessor(L->contains(BI->getSuccessor(0))); // Create a new basic block to temporarily hold all of the cloned code. BasicBlock *NewBlock = new BasicBlock(); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. std::map<const Value*, Value*> LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { OrigPHINode.push_back(PN); if (Instruction *I =dyn_cast<Instruction>(PN->getIncomingValueForBlock(BB))) if (I->getParent() == BB) LastValueMap[I] = I; } // Remove the exit branch from the loop BB->getInstList().erase(BI); assert(TripCount != 0 && "Trip count of 0 is impossible!"); for (unsigned It = 1; It != TripCount; ++It) { char SuffixBuffer[100]; sprintf(SuffixBuffer, ".%d", It); std::map<const Value*, Value*> ValueMap; BasicBlock *New = CloneBasicBlock(BB, ValueMap, SuffixBuffer); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(BB); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (InValI->getParent() == BB) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } for (BasicBlock::iterator I = New->begin(), E = New->end(); I != E; ++I) RemapInstruction(I, ValueMap); // Now that all of the instructions are remapped, splice them into the end // of the NewBlock. NewBlock->getInstList().splice(NewBlock->end(), New->getInstList()); delete New; // LastValue map now contains values from this iteration. std::swap(LastValueMap, ValueMap); } // If there was more than one iteration, replace any uses of values computed // in the loop with values computed during the last iteration of the loop. if (TripCount != 1) { std::set<User*> Users; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) Users.insert(I->use_begin(), I->use_end()); // We don't want to reprocess entries with PHI nodes in them. For this // reason, we look at each operand of each user exactly once, performing the // stubstitution exactly once. for (std::set<User*>::iterator UI = Users.begin(), E = Users.end(); UI != E; ++UI) { Instruction *I = cast<Instruction>(*UI); if (I->getParent() != BB && I->getParent() != NewBlock) RemapInstruction(I, LastValueMap); } } // Now that we cloned the block as many times as we needed, stitch the new // code into the original block and delete the temporary block. BB->getInstList().splice(BB->end(), NewBlock->getInstList()); delete NewBlock; // Now loop over the PHI nodes in the original block, setting them to their // incoming values. BasicBlock *Preheader = L->getLoopPreheader(); for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); BB->getInstList().erase(PN); } // Finally, add an unconditional branch to the block to continue into the exit // block. new BranchInst(LoopExit, BB); // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) BB->getInstList().erase(Inst); else if (Constant *C = ConstantFoldInstruction(Inst)) { Inst->replaceAllUsesWith(C); BB->getInstList().erase(Inst); } } // Update the loop information for this loop. Loop *Parent = L->getParentLoop(); // Move all of the basic blocks in the loop into the parent loop. LI->changeLoopFor(BB, Parent); // Remove the loop from the parent. if (Parent) delete Parent->removeChildLoop(std::find(Parent->begin(), Parent->end(),L)); else delete LI->removeLoop(std::find(LI->begin(), LI->end(), L)); // FIXME: Should update dominator analyses // Now that everything is up-to-date that will be, we fold the loop block into // the preheader and exit block, updating our analyses as we go. LoopExit->getInstList().splice(LoopExit->begin(), BB->getInstList(), BB->getInstList().begin(), prior(BB->getInstList().end())); LoopExit->getInstList().splice(LoopExit->begin(), Preheader->getInstList(), Preheader->getInstList().begin(), prior(Preheader->getInstList().end())); // Make all other blocks in the program branch to LoopExit now instead of // Preheader. Preheader->replaceAllUsesWith(LoopExit); // Remove BB and LoopExit from our analyses. LI->removeBlock(Preheader); LI->removeBlock(BB); // If the preheader was the entry block of this function, move the exit block // to be the new entry of the loop. Function *F = LoopExit->getParent(); if (Preheader == &F->front()) F->getBasicBlockList().splice(F->begin(), F->getBasicBlockList(), LoopExit); // Actually delete the blocks now. F->getBasicBlockList().erase(Preheader); F->getBasicBlockList().erase(BB); ++NumUnrolled; return true; }
// run - Run the transformation on the program. We grab the function // prototypes for longjmp and setjmp. If they are used in the program, // then we can go directly to the places they're at and transform them. bool LowerSetJmp::runOnModule(Module& M) { bool Changed = false; // These are what the functions are called. Function* SetJmp = M.getFunction("llvm.setjmp"); Function* LongJmp = M.getFunction("llvm.longjmp"); // This program doesn't have longjmp and setjmp calls. if ((!LongJmp || LongJmp->use_empty()) && (!SetJmp || SetJmp->use_empty())) return false; // Initialize some values and functions we'll need to transform the // setjmp/longjmp functions. doInitialization(M); if (SetJmp) { for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); B != E; ++B) { BasicBlock* BB = cast<Instruction>(*B)->getParent(); for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks), E = df_ext_end(BB, DFSBlocks); I != E; ++I) /* empty */; } while (!SetJmp->use_empty()) { assert(isa<CallInst>(SetJmp->use_back()) && "User of setjmp intrinsic not a call?"); TransformSetJmpCall(cast<CallInst>(SetJmp->use_back())); Changed = true; } } if (LongJmp) while (!LongJmp->use_empty()) { assert(isa<CallInst>(LongJmp->use_back()) && "User of longjmp intrinsic not a call?"); TransformLongJmpCall(cast<CallInst>(LongJmp->use_back())); Changed = true; } // Now go through the affected functions and convert calls and invokes // to new invokes... for (std::map<Function*, AllocaInst*>::iterator B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { Function* F = B->first; for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { visit(*IB++); if (IB != BB->end() && IB->getParent() != BB) break; // The next instruction got moved to a different block! } } DFSBlocks.clear(); SJMap.clear(); RethrowBBMap.clear(); PrelimBBMap.clear(); SwitchValMap.clear(); SetJmpIDMap.clear(); return Changed; }