/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. This way we don't // need to support "partial unrolling by 1". if (TripCount == 0 && Count < 2) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); if (SE) SE->forgetLoop(L); // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) DT->runOnFunction(*L->getHeader()->getParent()); // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != NULL) LPM->deleteLoopFromQueue(L); return true; }
/// \brief Recursively handle the condition leading to a loop Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) { // Only search through PHI nodes which are inside the loop. If we try this // with PHI nodes that are outside of the loop, we end up inserting new PHI // nodes outside of the loop which depend on values defined inside the loop. // This will break the module with // 'Instruction does not dominate all users!' errors. PHINode *Phi = nullptr; if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) { BasicBlock *Parent = Phi->getParent(); PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); Value *Ret = NewPhi; // Handle all non-constant incoming values first for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); BasicBlock *From = Phi->getIncomingBlock(i); if (isa<ConstantInt>(Incoming)) { NewPhi->addIncoming(Broken, From); continue; } Phi->setIncomingValue(i, BoolFalse); Value *PhiArg = handleLoopCondition(Incoming, Broken, L, Term); NewPhi->addIncoming(PhiArg, From); } BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); if (Incoming != BoolTrue) continue; BasicBlock *From = Phi->getIncomingBlock(i); if (From == IDom) { // We're in the following situation: // IDom/From // | \ // | If-block // | / // Parent // where we want to break out of the loop if the If-block is not taken. // Due to the depth-first traversal, there should be an end.cf // intrinsic in Parent, and we insert an else.break before it. // // Note that the end.cf need not be the first non-phi instruction // of parent, particularly when we're dealing with a multi-level // break, but it should occur within a group of intrinsic calls // at the beginning of the block. CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt()); while (OldEnd && OldEnd->getCalledFunction() != EndCf) OldEnd = dyn_cast<CallInst>(OldEnd->getNextNode()); if (OldEnd && OldEnd->getCalledFunction() == EndCf) { Value *Args[] = { OldEnd->getArgOperand(0), NewPhi }; Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); continue; } } TerminatorInst *Insert = From->getTerminator(); Value *PhiArg = CallInst::Create(Break, Broken, "", Insert); NewPhi->setIncomingValue(i, PhiArg); } eraseIfUnused(Phi); return Ret; } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { BasicBlock *Parent = Inst->getParent(); Instruction *Insert; if (L->contains(Inst)) { Insert = Parent->getTerminator(); } else { Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime(); } Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Insert); // Insert IfBreak before TERM for constant COND. } else if (isa<ConstantInt>(Cond)) { Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Term); } else { llvm_unreachable("Unhandled loop condition!"); } return nullptr; }
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst *, 16> Returns; SmallVector<InvokeInst *, 16> Invokes; SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (BasicBlock &BB : F) if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) { if (Function *Callee = II->getCalledFunction()) if (Callee->getIntrinsicID() == Intrinsic::donothing) { // Remove the NOP invoke. BranchInst::Create(II->getNormalDest(), II); II->eraseFromParent(); continue; } Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; NumInvokes += Invokes.size(); lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = &F.front(); IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. Value *JBufPtr = Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0, "jbuf_fp_gep"); Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2, "jbuf_sp_gep"); Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. Builder.CreateCall(BuiltinSetupDispatchFn, {}); // Store a pointer to the function context so that the back-end will know // where to look for it. Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); } // Mark call instructions that aren't nounwind as no-action (call_site == // -1). Skip the entry block, as prior to then, no function context has been // created for this function and any unexpected exceptions thrown will go // directly to the caller's context, which is what we want anyway, so no need // to do anything here. for (BasicBlock &BB : F) { if (&BB == &F.front()) continue; for (Instruction &I : BB) if (I.mayThrow()) insertCallSiteStore(&I, -1); } // Register the function context and make sure it's known to not throw CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the // jmpbuf to the new value. for (BasicBlock &BB : F) { if (&BB == &F.front()) continue; for (Instruction &I : BB) { if (auto *CI = dyn_cast<CallInst>(&I)) { if (CI->getCalledFunction() != StackRestoreFn) continue; } else if (!isa<AllocaInst>(&I)) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(&I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } } // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (ReturnInst *Return : Returns) CallInst::Create(UnregisterFn, FuncCtx, "", Return); return true; }
// Note: branch conditions, by definition, only have a chain user. // This is why it should not be saved in a map for recall. Value* ARMIREmitter::visitBRCOND(const SDNode *N) { // Get the address const ConstantSDNode *DestNode = dyn_cast<ConstantSDNode>(N->getOperand(0)); if (!DestNode) { printError("visitBRCOND: Not a constant integer for branch!"); return NULL; } uint64_t DestInt = DestNode->getSExtValue(); uint64_t PC = Dec->getDisassembler()->getDebugOffset(N->getDebugLoc()); // Note: pipeline is 8 bytes uint64_t Tgt = PC + 8 + DestInt; Function *F = IRB->GetInsertBlock()->getParent(); BasicBlock *CurBB = IRB->GetInsertBlock(); BasicBlock *BBTgt = Dec->getOrCreateBasicBlock(Tgt, F); // Parse the branch condition code const ConstantSDNode *CCNode = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (!CCNode) { printError("visitBRCOND: Condition code is not a constant integer!"); return NULL; } ARMCC::CondCodes ARMcc = ARMCC::CondCodes(CCNode->getZExtValue()); // Unconditional branch if (ARMcc == ARMCC::AL) { Instruction *Br = IRB->CreateBr(BBTgt); Br->setDebugLoc(N->getDebugLoc()); return Br; } // If not a conditional branch, find the successor block and look at CC BasicBlock *NextBB = NULL; Function::iterator BI = F->begin(), BE= F->end(); while (BI != BE && BI->getName() != CurBB->getName()) ++BI; ++BI; if (BI == BE) { // NOTE: This should never happen... NextBB = Dec->getOrCreateBasicBlock("end", F); } else { NextBB = &(*BI); } SDNode *CPSR = N->getOperand(2)->getOperand(1).getNode(); SDNode *CMPNode = NULL; for (SDNode::use_iterator I = CPSR->use_begin(), E = CPSR->use_end(); I != E; ++I) { if (I->getOpcode() == ISD::CopyToReg) { CMPNode = I->getOperand(2).getNode(); } } if (CMPNode == NULL) { errs() << "ARMIREmitter ERROR: Could not find CMP SDNode for ARMBRCond!\n"; return NULL; } Value *Cmp = NULL; Value *LHS = visit(CMPNode->getOperand(0).getNode()); Value *RHS = visit(CMPNode->getOperand(1).getNode()); // See ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC); in ARMISelLowering.cpp // TODO: Add support for conditions that handle floating point switch(ARMcc) { default: printError("Unknown condition code"); return NULL; case ARMCC::EQ: Cmp = IRB->CreateICmpEQ(LHS, RHS); break; case ARMCC::NE: Cmp = IRB->CreateICmpNE(LHS, RHS); break; case ARMCC::HS: // HS - unsigned higher or same (or carry set) Cmp = IRB->CreateICmpUGE(LHS, RHS); break; case ARMCC::LO: // LO - unsigned lower (or carry clear) Cmp = IRB->CreateICmpULT(LHS, RHS); break; case ARMCC::MI: // MI - minus (negative) printError("Condition code MI is not handled at this time!"); return NULL; // break; case ARMCC::PL: // PL - plus (positive or zero) printError("Condition code PL is not handled at this time!"); return NULL; // break; case ARMCC::VS: // VS - V Set (signed overflow) printError("Condition code VS is not handled at this time!"); return NULL; // break; case ARMCC::VC: // VC - V clear (no signed overflow) printError("Condition code VC is not handled at this time!"); return NULL; // break; case ARMCC::HI: // HI - unsigned higher Cmp = IRB->CreateICmpUGT(LHS, RHS); break; case ARMCC::LS: // LS - unsigned lower or same Cmp = IRB->CreateICmpULE(LHS, RHS); break; case ARMCC::GE: // GE - signed greater or equal Cmp = IRB->CreateICmpSGE(LHS, RHS); break; case ARMCC::LT: // LT - signed less than Cmp = IRB->CreateICmpSLT(LHS, RHS); break; case ARMCC::GT: // GT - signed greater than Cmp = IRB->CreateICmpSGT(LHS, RHS); break; case ARMCC::LE: // LE - signed less than or equal Cmp = IRB->CreateICmpSLE(LHS, RHS); break; } (dyn_cast<Instruction>(Cmp))->setDebugLoc(N->getOperand(2)->getDebugLoc()); // Conditional branch Instruction *Br = IRB->CreateCondBr(Cmp, BBTgt, NextBB); Br->setDebugLoc(N->getDebugLoc()); return Br; }
/// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges /// that end at it are unwind edges from invoke instructions. If we inlined /// through an invoke we could have a normal branch from the previous /// unwind block through to the landing pad for the original invoke. /// Abnormal landing pads are fixed up by redirecting all unwind edges to /// a new basic block which falls through to the original. bool DwarfEHPrepare::NormalizeLandingPads() { bool Changed = false; const MCAsmInfo *MAI = TM->getMCAsmInfo(); bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (!isa<InvokeInst>(TI)) continue; BasicBlock *LPad = TI->getSuccessor(1); // Skip landing pads that have already been normalized. if (LandingPads.count(LPad)) continue; // Check that only invoke unwind edges end at the landing pad. bool OnlyUnwoundTo = true; bool SwitchOK = usingSjLjEH; for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { TerminatorInst *PT = (*PI)->getTerminator(); // The SjLj dispatch block uses a switch instruction. This is effectively // an unwind edge, so we can disregard it here. There will only ever // be one dispatch, however, so if there are multiple switches, one // of them truly is a normal edge, not an unwind edge. if (SwitchOK && isa<SwitchInst>(PT)) { SwitchOK = false; continue; } if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { OnlyUnwoundTo = false; break; } } if (OnlyUnwoundTo) { // Only unwind edges lead to the landing pad. Remember the landing pad. LandingPads.insert(LPad); continue; } // At least one normal edge ends at the landing pad. Redirect the unwind // edges to a new basic block which falls through into this one. // Create the new basic block. BasicBlock *NewBB = BasicBlock::Create(F->getContext(), LPad->getName() + "_unwind_edge"); // Insert it into the function right before the original landing pad. LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); // Redirect unwind edges from the original landing pad to NewBB. for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { TerminatorInst *PT = (*PI++)->getTerminator(); if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) // Unwind to the new block. PT->setSuccessor(1, NewBB); } // If there are any PHI nodes in LPad, we need to update them so that they // merge incoming values from NewBB instead. for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { PHINode *PN = cast<PHINode>(II); pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); // Check to see if all of the values coming in via unwind edges are the // same. If so, we don't need to create a new PHI node. Value *InVal = PN->getIncomingValueForBlock(*PB); for (pred_iterator PI = PB; PI != PE; ++PI) { if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { InVal = 0; break; } } if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); // Now use this new PHI as the common incoming value for NewBB in PN. InVal = NewPN; } // Revector exactly one entry in the PHI node to come from NewBB // and delete all other entries that come from unwind edges. If // there are both normal and unwind edges from the same predecessor, // this leaves an entry for the normal edge. for (pred_iterator PI = PB; PI != PE; ++PI) PN->removeIncomingValue(*PI); PN->addIncoming(InVal, NewBB); } // Add a fallthrough from NewBB to the original landing pad. BranchInst::Create(LPad, NewBB); // Now update DominatorTree and DominanceFrontier analysis information. if (DT) DT->splitBlock(NewBB); if (DF) DF->splitBlock(NewBB); // Remember the newly constructed landing pad. The original landing pad // LPad is no longer a landing pad now that all unwind edges have been // revectored to NewBB. LandingPads.insert(NewBB); ++NumLandingPadsSplit; Changed = true; } return Changed; }
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = &getAnalysis<LoopInfo>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); array_pod_sort(ExitBlocks.begin(), ExitBlocks.end()); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; // The bit we are stealing from the pointer represents whether this basic // block is the header of a subloop, in which case we only process its phis. typedef PointerIntPair<BasicBlock*, 1> WorklistItem; SmallVector<WorklistItem, 16> VisitStack; SmallPtrSet<BasicBlock*, 32> Visited; bool Changed = false; bool LocalChanged; do { LocalChanged = false; VisitStack.clear(); Visited.clear(); VisitStack.push_back(WorklistItem(L->getHeader(), false)); while (!VisitStack.empty()) { WorklistItem Item = VisitStack.pop_back_val(); BasicBlock *BB = Item.getPointer(); bool IsSubloopHeader = Item.getInt(); // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not // empty and we only bother simplifying instructions that are in it. if (!ToSimplify->empty() && !ToSimplify->count(I)) continue; // Don't bother simplifying unused instructions. if (!I->use_empty()) { Value *V = SimplifyInstruction(I, TD, TLI, DT); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) Next->insert(cast<Instruction>(*UI)); I->replaceAllUsesWith(V); LocalChanged = true; ++NumSimplified; } } LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); if (IsSubloopHeader && !isa<PHINode>(I)) break; } // Add all successors to the worklist, except for loop exit blocks and the // bodies of subloops. We visit the headers of loops so that we can process // their phis, but we contract the rest of the subloop body and only follow // edges leading back to the original loop. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *SuccBB = *SI; if (!Visited.insert(SuccBB)) continue; const Loop *SuccLoop = LI->getLoopFor(SuccBB); if (SuccLoop && SuccLoop->getHeader() == SuccBB && L->contains(SuccLoop)) { VisitStack.push_back(WorklistItem(SuccBB, true)); SmallVector<BasicBlock*, 8> SubLoopExitBlocks; SuccLoop->getExitBlocks(SubLoopExitBlocks); for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) { BasicBlock *ExitBB = SubLoopExitBlocks[i]; if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB)) VisitStack.push_back(WorklistItem(ExitBB, false)); } continue; } bool IsExitBlock = std::binary_search(ExitBlocks.begin(), ExitBlocks.end(), SuccBB); if (IsExitBlock) continue; VisitStack.push_back(WorklistItem(SuccBB, false)); } } // Place the list of instructions to simplify on the next loop iteration // into ToSimplify. std::swap(ToSimplify, Next); Next->clear(); Changed |= LocalChanged; } while (LocalChanged); return Changed; }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (UnrollProlog) { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }
/// UnswitchNontrivialCondition - We determined that the loop is profitable /// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); LoopBlocks.clear(); NewBlocks.clear(); // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); LoopBlocks.push_back(NewPreheader); // We want the loop to come after the preheader, but before the exit blocks. LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Split all of the edges from inside the loop to their exit blocks. Update // the appropriate Phi nodes as we do so. SplitExitEdges(L, ExitBlocks); // The exit blocks may have been changed due to edge splitting, recompute. ExitBlocks.clear(); L->getUniqueExitBlocks(ExitBlocks); // Add exit blocks to the loop blocks. LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } // Splice the newly inserted blocks into the function right before the // original preheader. F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop // as well. ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); assert(NewExit->getTerminator()->getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"); BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); // If the successor of the exit block had PHI nodes, add an entry for // NewExit. PHINode *PN; for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); ValueToValueMapTy::iterator It = VMap.find(V); if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } // Rewrite the code to refer to itself. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && "Preheader splitting did not work correctly!"); // Emit the new branch that selects between the two versions of this loop. EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); LPM->deleteSimpleAnalysisValue(OldBR, L); OldBR->eraseFromParent(); LoopProcessWorklist.push_back(NewLoop); redoLoop = true; // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. WeakVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); // It's possible that simplifying one loop could cause the other to be // changed to another value or a constant. If its a constant, don't simplify // it. if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && LICHandle && !isa<Constant>(LICHandle)) RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); }
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has // the value specified by Val in the specified loop, or we know it does NOT have // that value. Rewrite any uses of LIC or of properties correlated to it. void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual) { assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); // FIXME: Support correlated properties, like: // for (...) // if (li1 < li2) // ... // if (li1 > li2) // ... // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, // selects, switches. std::vector<Instruction*> Worklist; LLVMContext &Context = Val->getContext(); // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. if (IsEqual || (isa<ConstantInt>(Val) && Val->getType()->isIntegerTy(1))) { Value *Replacement; if (IsEqual) Replacement = Val; else Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast<ConstantInt>(Val)->getZExtValue()); for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); UI != E; ++UI) { Instruction *U = dyn_cast<Instruction>(*UI); if (!U || !L->contains(U)) continue; U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } SimplifyCode(Worklist, L); return; } // Otherwise, we don't know the precise value of LIC, but we do know that it // is certainly NOT "Val". As such, simplify any uses in the loop that we // can. This case occurs when we unswitch switch statements. for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); UI != E; ++UI) { Instruction *U = dyn_cast<Instruction>(*UI); if (!U || !L->contains(U)) continue; Worklist.push_back(U); // TODO: We could do other simplifications, for example, turning // 'icmp eq LIC, Val' -> false. // If we know that LIC is not Val, use this info to simplify code. SwitchInst *SI = dyn_cast<SwitchInst>(U); if (SI == 0 || !isa<ConstantInt>(Val)) continue; unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); if (DeadCase == 0) continue; // Default case is live for multiple values. // Found a dead case value. Don't remove PHI nodes in the // successor if they become single-entry, those PHI nodes may // be in the Users list. BasicBlock *Switch = SI->getParent(); BasicBlock *SISucc = SI->getSuccessor(DeadCase); BasicBlock *Latch = L->getLoopLatch(); if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the // transformation isn't safe since it will delete the sole predecessor edge // to the latch. if (Latch && DT->dominates(SISucc, Latch)) continue; // FIXME: This is a hack. We need to keep the successor around // and hooked up so as to preserve the loop structure, because // trying to update it is complicated. So instead we preserve the // loop structure and put the block on a dead code path. SplitEdge(Switch, SISucc, this); // Compute the successors instead of relying on the return value // of SplitEdge, since it may have split the switch successor // after PHI nodes. BasicBlock *NewSISucc = SI->getSuccessor(DeadCase); BasicBlock *OldSISucc = *succ_begin(NewSISucc); // Create an "unreachable" destination. BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", Switch->getParent(), OldSISucc); new UnreachableInst(Context, Abort); // Force the new case destination to branch to the "unreachable" // block while maintaining a (dead) CFG edge to the old block. NewSISucc->getTerminator()->eraseFromParent(); BranchInst::Create(Abort, OldSISucc, ConstantInt::getTrue(Context), NewSISucc); // Release the PHI operands for this edge. for (BasicBlock::iterator II = NewSISucc->begin(); PHINode *PN = dyn_cast<PHINode>(II); ++II) PN->setIncomingValue(PN->getBasicBlockIndex(Switch), UndefValue::get(PN->getType())); // Tell the domtree about the new block. We don't fully update the // domtree here -- instead we force it to do a full recomputation // after the pass is complete -- but we do need to inform it of // new blocks. if (DT) DT->addNewBlock(Abort, NewSISucc); } SimplifyCode(Worklist, L); }
void PromoteMem2Reg::run() { Function &F = *DF.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { RewriteSingleStoreAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { PromoteSingleBlockAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the stores are all that is // left. if (Info.UsingBlocks.empty()) { // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) ConvertDebugDeclareToDebugValue(DDI, SI); SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) DDI->eraseFromParent(); ++NumLocalPromoted; continue; } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // sections of dead code that were not processed on the dominance frontier. // Just delete the users now. // if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = PN->hasConstantValue(&DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVector<BasicBlock*, 16>::iterator EntIt = std::lower_bound(Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
/// ComputeLiveInBlocks - Determine which blocks the value is live in. These /// are blocks which lead to uses. Knowing this allows us to avoid inserting /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes /// would be dead). void PromoteMem2Reg:: ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, const SmallPtrSet<BasicBlock*, 32> &DefBlocks, SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. SmallVector<BasicBlock*, 64> LiveInBlockWorklist; LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), Info.UsingBlocks.begin(), Info.UsingBlocks.end()); // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, // the value isn't really live-in. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { BasicBlock *BB = LiveInBlockWorklist[i]; if (!DefBlocks.count(BB)) continue; // Okay, this is a block that both uses and defines the value. If the first // reference to the alloca is a def (store), then we know it isn't live-in. for (BasicBlock::iterator I = BB->begin(); ; ++I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (SI->getOperand(1) != AI) continue; // We found a store to the alloca before a load. The alloca is not // actually live-in here. LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); LiveInBlockWorklist.pop_back(); --i, --e; break; } if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (LI->getOperand(0) != AI) continue; // Okay, we found a load before a store to the alloca. It is actually // live into this block. break; } } } // Now that we have a set of blocks where the phi is live-in, recursively add // their predecessors until we find the full region the value is live. while (!LiveInBlockWorklist.empty()) { BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); // The block really is live in here, insert it into the set. If already in // the set, then it has already been processed. if (!LiveInBlocks.insert(BB)) continue; // Since the value is live into BB, it is either defined in a predecessor or // live into it to. Add the preds to the worklist unless they are a // defining block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; // The value is not live into a predecessor if it defines the value. if (DefBlocks.count(P)) continue; // Otherwise it is, add to the worklist. LiveInBlockWorklist.push_back(P); } } }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { // Don't insert coverage for unreachable blocks: we will never call // __sanitizer_cov() for them, so counting them in // NumberOfInstrumentedBlocks() might complicate calculation of code coverage // percentage. Also, unreachable instructions frequently have no debug // locations. if (isa<UnreachableInst>(BB.getTerminator())) return; BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); // Keep static allocas and llvm.localescape calls in the entry block. Even // if we aren't splitting the block, it's nice for allocas to be before // calls. IP = PrepareToSplitEntryBlock(BB, IP); } else { EntryLoc = IP->getDebugLoc(); } IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (Options.TraceBB) { IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } if (Options.Use8bitCounters) { IRB.SetInsertPoint(&*IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); LoadInst *LI = IRB.CreateLoad(P); Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); StoreInst *SI = IRB.CreateStore(Inc, P); SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } }
/// InsertStackProtectors - Insert code into the prologue and epilogue of the /// function. /// /// - The prologue code loads and stores the stack guard onto the stack. /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { bool HasPrologue = false; bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = &*I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; if (!HasPrologue) { HasPrologue = true; SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); } if (SupportsSelectionDAGSP) { // Since we have a potential tail call, insert the special stack check // intrinsic. Instruction *InsertionPt = nullptr; if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { InsertionPt = CI; } else { InsertionPt = RI; // At this point we know that BB has a return statement so it *DOES* // have a terminator. assert(InsertionPt != nullptr && "BB must have a terminator instruction at this point."); } Function *Intrinsic = Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); } else { // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. // // For each block with a return instruction, convert this: // // return: // ... // ret ... // // into this: // // return: // ... // %1 = load __stack_chk_guard // %2 = load StackGuardSlot // %3 = cmp i1 %1, %2 // br i1 %3, label %SP_return, label %CallStackCheckFailBlk // // SP_return: // ret ... // // CallStackCheckFailBlk: // call void @__stack_chk_fail() // unreachable // Create the FailBB. We duplicate the BB every time since the MI tail // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { DT->addNewBlock(NewBB, BB); DT->addNewBlock(FailBB, BB); } // Remove default branch instruction to the new BB. BB->getTerminator()->eraseFromParent(); // Move the newly created basic block to the point right after the old // basic block so that it's in the "fall through" position. NewBB->moveAfter(BB); // Generate the stack protector instructions in the old basic block. IRBuilder<> B(BB); LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); unsigned SuccessWeight = BranchProbabilityInfo::getBranchWeightStackProtector(true); unsigned FailureWeight = BranchProbabilityInfo::getBranchWeightStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) .createBranchWeights(SuccessWeight, FailureWeight); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. return HasPrologue; }
/// Generates code to divide two unsigned scalar 32-bit integers. Returns the /// quotient, rounded towards 0. Builder's insert point should be pointing where /// the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's // implementation of __udivsi3.c. Here, we do a lower-level IR based approach // that's been hand-tuned to lessen the amount of control flow involved. // Some helper values IntegerType *I32Ty = Builder.getInt32Ty(); ConstantInt *Zero = Builder.getInt32(0); ConstantInt *One = Builder.getInt32(1); ConstantInt *ThirtyOne = Builder.getInt32(31); ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1); ConstantInt *True = Builder.getTrue(); BasicBlock *IBB = Builder.GetInsertBlock(); Function *F = IBB->getParent(); Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, I32Ty); // Our CFG is going to look like: // +---------------------+ // | special-cases | // | ... | // +---------------------+ // | | // | +----------+ // | | bb1 | // | | ... | // | +----------+ // | | | // | | +------------+ // | | | preheader | // | | | ... | // | | +------------+ // | | | // | | | +---+ // | | | | | // | | +------------+ | // | | | do-while | | // | | | ... | | // | | +------------+ | // | | | | | // | +-----------+ +---+ // | | loop-exit | // | | ... | // | +-----------+ // | | // +-------+ // | ... | // | end | // +-------+ BasicBlock *SpecialCases = Builder.GetInsertBlock(); SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), "udiv-end"); BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), "udiv-loop-exit", F, End); BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), "udiv-do-while", F, End); BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), "udiv-preheader", F, End); BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), "udiv-bb1", F, End); // We'll be overwriting the terminator to insert our extra blocks SpecialCases->getTerminator()->eraseFromParent(); // First off, check for special cases: dividend or divisor is zero, divisor // is greater than dividend, and divisor is 1. // ; special-cases: // ; %ret0_1 = icmp eq i32 %divisor, 0 // ; %ret0_2 = icmp eq i32 %dividend, 0 // ; %ret0_3 = or i1 %ret0_1, %ret0_2 // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) // ; %sr = sub nsw i32 %tmp0, %tmp1 // ; %ret0_4 = icmp ugt i32 %sr, 31 // ; %ret0 = or i1 %ret0_3, %ret0_4 // ; %retDividend = icmp eq i32 %sr, 31 // ; %retVal = select i1 %ret0, i32 0, i32 %dividend // ; %earlyRet = or i1 %ret0, %retDividend // ; br i1 %earlyRet, label %end, label %bb1 Builder.SetInsertPoint(SpecialCases); Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True); Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True); Value *SR = Builder.CreateSub(Tmp0, Tmp1); Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne); Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne); Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); Builder.CreateCondBr(EarlyRet, End, BB1); // ; bb1: ; preds = %special-cases // ; %sr_1 = add i32 %sr, 1 // ; %tmp2 = sub i32 31, %sr // ; %q = shl i32 %dividend, %tmp2 // ; %skipLoop = icmp eq i32 %sr_1, 0 // ; br i1 %skipLoop, label %loop-exit, label %preheader Builder.SetInsertPoint(BB1); Value *SR_1 = Builder.CreateAdd(SR, One); Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR); Value *Q = Builder.CreateShl(Dividend, Tmp2); Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); // ; preheader: ; preds = %bb1 // ; %tmp3 = lshr i32 %dividend, %sr_1 // ; %tmp4 = add i32 %divisor, -1 // ; br label %do-while Builder.SetInsertPoint(Preheader); Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); Builder.CreateBr(DoWhile); // ; do-while: ; preds = %do-while, %preheader // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] // ; %tmp5 = shl i32 %r_1, 1 // ; %tmp6 = lshr i32 %q_2, 31 // ; %tmp7 = or i32 %tmp5, %tmp6 // ; %tmp8 = shl i32 %q_2, 1 // ; %q_1 = or i32 %carry_1, %tmp8 // ; %tmp9 = sub i32 %tmp4, %tmp7 // ; %tmp10 = ashr i32 %tmp9, 31 // ; %carry = and i32 %tmp10, 1 // ; %tmp11 = and i32 %tmp10, %divisor // ; %r = sub i32 %tmp7, %tmp11 // ; %sr_2 = add i32 %sr_3, -1 // ; %tmp12 = icmp eq i32 %sr_2, 0 // ; br i1 %tmp12, label %loop-exit, label %do-while Builder.SetInsertPoint(DoWhile); PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2); PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2); PHINode *R_1 = Builder.CreatePHI(I32Ty, 2); PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2); Value *Tmp5 = Builder.CreateShl(R_1, One); Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne); Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); Value *Tmp8 = Builder.CreateShl(Q_2, One); Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); Value *Tmp10 = Builder.CreateAShr(Tmp9, 31); Value *Carry = Builder.CreateAnd(Tmp10, One); Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); Value *R = Builder.CreateSub(Tmp7, Tmp11); Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); // ; loop-exit: ; preds = %do-while, %bb1 // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] // ; %tmp13 = shl i32 %q_3, 1 // ; %q_4 = or i32 %carry_2, %tmp13 // ; br label %end Builder.SetInsertPoint(LoopExit); PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2); PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2); Value *Tmp13 = Builder.CreateShl(Q_3, One); Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); Builder.CreateBr(End); // ; end: ; preds = %loop-exit, %special-cases // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] // ; ret i32 %q_5 Builder.SetInsertPoint(End, End->begin()); PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2); // Populate the Phis, since all values have now been created. Our Phis were: // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] Carry_1->addIncoming(Zero, Preheader); Carry_1->addIncoming(Carry, DoWhile); // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] SR_3->addIncoming(SR_1, Preheader); SR_3->addIncoming(SR_2, DoWhile); // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] R_1->addIncoming(Tmp3, Preheader); R_1->addIncoming(R, DoWhile); // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] Q_2->addIncoming(Q, Preheader); Q_2->addIncoming(Q_1, DoWhile); // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] Carry_2->addIncoming(Zero, BB1); Carry_2->addIncoming(Carry, DoWhile); // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] Q_3->addIncoming(Q, BB1); Q_3->addIncoming(Q_1, DoWhile); // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] Q_5->addIncoming(Q_4, LoopExit); Q_5->addIncoming(RetVal, SpecialCases); return Q_5; }
bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] // fence? // cmpxchg.start: // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure // cmpxchg.success: // fence? // br label %cmpxchg.end // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); // If the the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); Builder.CreateCondBr(StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : LoopBB); // Make sure later instructions don't get reordered with a fence if necessary. Builder.SetInsertPoint(SuccessBB); insertTrailingFence(Builder, SuccessOrder); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(FailureBB); insertTrailingFence(Builder, FailureOrder); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg // succeeded or not. We expose this to later passes by converting any // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. // Setup the builder so we can create any PHIs we need. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; for (auto User : CI->users()) { ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); if (!EV) continue; assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && "weird extraction from { iN, i1 }"); if (EV->getIndices()[0] == 0) EV->replaceAllUsesWith(Loaded); else EV->replaceAllUsesWith(Success); PrunedInsts.push_back(EV); } // We can remove the instructions now we're no longer iterating through them. for (auto EV : PrunedInsts) EV->eraseFromParent(); if (!CI->use_empty()) { // Some use of the full struct return that we don't understand has happened, // so we've got to reconstruct it properly. Value *Res; Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); } CI->eraseFromParent(); return true; }
/// SimplifyCode - Okay, now that we have simplified some instructions in the /// loop, walk over it and constant prop, dce, and fold control flow where /// possible. Note that this is effectively a very simple loop-structure-aware /// optimizer. During processing of this loop, L could very well be deleted, so /// it must not be used. /// /// FIXME: When the loop optimizer is more mature, separate this out to a new /// pass. /// void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { while (!Worklist.empty()) { Instruction *I = Worklist.back(); Worklist.pop_back(); // Simple DCE. if (isInstructionTriviallyDead(I)) { DEBUG(dbgs() << "Remove dead instruction '" << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) Worklist.push_back(Use); LPM->deleteSimpleAnalysisValue(I, L); RemoveFromWorklist(I, Worklist); I->eraseFromParent(); ++NumSimplify; continue; } // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. if (Value *V = SimplifyInstruction(I, 0, DT)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; } // Special case hacks that appear commonly in unswitched code. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { if (BI->isUnconditional()) { // If BI's parent is the only pred of the successor, fold the two blocks // together. BasicBlock *Pred = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); BasicBlock *SinglePred = Succ->getSinglePredecessor(); if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " << Succ->getName() << "\n"); // Resolve any single entry PHI nodes in Succ. while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); // If Succ has any successors with PHI nodes, update them to have // entries coming from Pred instead of Succ. Succ->replaceAllUsesWith(Pred); // Move all of the successor contents from Succ to Pred. Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), Succ->end()); LPM->deleteSimpleAnalysisValue(BI, L); BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); // Remove Succ from the loop tree. LI->removeBlock(Succ); LPM->deleteSimpleAnalysisValue(Succ, L); Succ->eraseFromParent(); ++NumSimplify; continue; } if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ // Conditional branch. Turn it into an unconditional branch, then // remove dead blocks. continue; // FIXME: Enable. DEBUG(dbgs() << "Folded branch: " << *BI); BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); DeadSucc->removePredecessor(BI->getParent(), true); Worklist.push_back(BranchInst::Create(LiveSucc, BI)); LPM->deleteSimpleAnalysisValue(BI, L); BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); ++NumSimplify; RemoveBlockIfDead(DeadSucc, Worklist, L); } continue; } } }
/// SimplifyStoreAtEndOfBlock - Turn things like: /// if () { *P = v1; } else { *P = v2 } /// into a phi node with a store in the successor. /// /// Simplify things like: /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; if (++PI == pred_end(DestBB)) return false; P = *PI; if (P != StoreBB) { if (OtherBB) return false; OtherBB = P; } if (++PI != pred_end(DestBB)) return false; // Bail out if all the relevant blocks aren't distinct (this can happen, // for example, if SI is in an infinite loop) if (StoreBB == DestBB || OtherBB == DestBB) return false; // Verify that the other block ends in a branch and is not otherwise empty. BasicBlock::iterator BBI(OtherBB->getTerminator()); BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; break; } // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the xform. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || BBI == OtherBB->begin()) return false; } // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { // FIXME: This should really be AA driven. if (I->mayReadFromMemory() || I->mayWriteToMemory()) return false; } } // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had AA tags, merge them. AAMDNodes AATags; SI.getAAMetadata(AATags); if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); } // Nuke the old stores. eraseInstFromFunction(SI); eraseInstFromFunction(*OtherStore); return true; }
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the /// observable behavior of the program other than finite running time. Note /// we do ensure that this never remove a loop that might be infinite, as doing /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. BasicBlock *preheader = L->getLoopPreheader(); if (!preheader) return false; // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) return false; SmallVector<BasicBlock*, 4> exitingBlocks; L->getExitingBlocks(exitingBlocks); SmallVector<BasicBlock*, 4> exitBlocks; L->getUniqueExitBlocks(exitBlocks); // We require that the loop only have a single exit block. Otherwise, we'd // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. if (exitBlocks.size() != 1) return false; // Finally, we have to check that the loop really is dead. bool Changed = false; if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. BasicBlock *exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. // Tell ScalarEvolution that the loop is deleted. Do this before // deleting the loop so that ScalarEvolution can look at the loop // to determine what it needs to clean up. SE.forgetLoop(L); // Connect the preheader directly to the exit block. TerminatorInst *TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. BasicBlock *exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); for (unsigned i = 1; i < exitingBlocks.size(); ++i) P->removeIncomingValue(exitingBlocks[i]); ++BI; } // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. DominatorTree &DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { // Move all of the block's children to be children of the preheader, which // allows us to remove the domtree entry for the block. ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end()); for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(), DE = ChildNodes.end(); DI != DE; ++DI) { DT.changeImmediateDominator(*DI, DT[preheader]); } ChildNodes.clear(); DT.eraseNode(*LI); // Remove the block from the reference counting scheme, so that we can // delete it freely later. (*LI)->dropAllReferences(); } // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its // entry from the loop's block list. We do that in the next section. for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) (*LI)->eraseFromParent(); // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. LoopInfo &loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), E = blocks.end(); I != E; ++I) loopInfo.removeBlock(*I); // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); Changed = true; ++NumDeleted; return Changed; }
bool run() { // This enumerates the locals that we actually care about and packs them. So for example // if we use local 1, 3, 4, 5, 7, then we remap them: 1->0, 3->1, 4->2, 5->3, 7->4. We // treat a variable as being "used" if there exists an access to it (SetLocal, GetLocal, // Flush, PhantomLocal). BitVector usedLocals; // Collect those variables that are used from IR. bool hasNodesThatNeedFixup = false; for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = block->size(); nodeIndex--;) { Node* node = block->at(nodeIndex); switch (node->op()) { case GetLocal: case SetLocal: case Flush: case PhantomLocal: { VariableAccessData* variable = node->variableAccessData(); if (variable->local().isArgument()) break; usedLocals.set(variable->local().toLocal()); break; } case GetLocalUnlinked: { VirtualRegister operand = node->unlinkedLocal(); if (operand.isArgument()) break; usedLocals.set(operand.toLocal()); hasNodesThatNeedFixup = true; break; } case LoadVarargs: case ForwardVarargs: { LoadVarargsData* data = node->loadVarargsData(); if (data->count.isLocal()) usedLocals.set(data->count.toLocal()); if (data->start.isLocal()) { // This part really relies on the contiguity of stack layout // assignments. ASSERT(VirtualRegister(data->start.offset() + data->limit - 1).isLocal()); for (unsigned i = data->limit; i--;) usedLocals.set(VirtualRegister(data->start.offset() + i).toLocal()); } // the else case shouldn't happen. hasNodesThatNeedFixup = true; break; } case PutStack: case GetStack: { StackAccessData* stack = node->stackAccessData(); if (stack->local.isArgument()) break; usedLocals.set(stack->local.toLocal()); break; } default: break; } } } for (InlineCallFrameSet::iterator iter = m_graph.m_plan.inlineCallFrames->begin(); !!iter; ++iter) { InlineCallFrame* inlineCallFrame = *iter; if (inlineCallFrame->isVarargs()) { usedLocals.set(VirtualRegister( JSStack::ArgumentCount + inlineCallFrame->stackOffset).toLocal()); } for (unsigned argument = inlineCallFrame->arguments.size(); argument-- > 1;) { usedLocals.set(VirtualRegister( virtualRegisterForArgument(argument).offset() + inlineCallFrame->stackOffset).toLocal()); } } Vector<unsigned> allocation(usedLocals.size()); m_graph.m_nextMachineLocal = 0; for (unsigned i = 0; i < usedLocals.size(); ++i) { if (!usedLocals.get(i)) { allocation[i] = UINT_MAX; continue; } allocation[i] = m_graph.m_nextMachineLocal++; } for (unsigned i = m_graph.m_variableAccessData.size(); i--;) { VariableAccessData* variable = &m_graph.m_variableAccessData[i]; if (!variable->isRoot()) continue; if (variable->local().isArgument()) { variable->machineLocal() = variable->local(); continue; } size_t local = variable->local().toLocal(); if (local >= allocation.size()) continue; if (allocation[local] == UINT_MAX) continue; variable->machineLocal() = assign(allocation, variable->local()); } for (StackAccessData* data : m_graph.m_stackAccessData) { if (!data->local.isLocal()) { data->machineLocal = data->local; continue; } if (static_cast<size_t>(data->local.toLocal()) >= allocation.size()) continue; if (allocation[data->local.toLocal()] == UINT_MAX) continue; data->machineLocal = assign(allocation, data->local); } // This register is never valid for DFG code blocks. codeBlock()->setActivationRegister(VirtualRegister()); if (LIKELY(!m_graph.hasDebuggerEnabled())) codeBlock()->setScopeRegister(VirtualRegister()); else codeBlock()->setScopeRegister(assign(allocation, codeBlock()->scopeRegister())); for (unsigned i = m_graph.m_inlineVariableData.size(); i--;) { InlineVariableData data = m_graph.m_inlineVariableData[i]; InlineCallFrame* inlineCallFrame = data.inlineCallFrame; if (inlineCallFrame->isVarargs()) { inlineCallFrame->argumentCountRegister = assign( allocation, VirtualRegister(inlineCallFrame->stackOffset + JSStack::ArgumentCount)); } for (unsigned argument = inlineCallFrame->arguments.size(); argument-- > 1;) { ArgumentPosition& position = m_graph.m_argumentPositions[ data.argumentPositionStart + argument]; VariableAccessData* variable = position.someVariable(); ValueSource source; if (!variable) source = ValueSource(SourceIsDead); else { source = ValueSource::forFlushFormat( variable->machineLocal(), variable->flushFormat()); } inlineCallFrame->arguments[argument] = source.valueRecovery(); } RELEASE_ASSERT(inlineCallFrame->isClosureCall == !!data.calleeVariable); if (inlineCallFrame->isClosureCall) { VariableAccessData* variable = data.calleeVariable->find(); ValueSource source = ValueSource::forFlushFormat( variable->machineLocal(), variable->flushFormat()); inlineCallFrame->calleeRecovery = source.valueRecovery(); } else RELEASE_ASSERT(inlineCallFrame->calleeRecovery.isConstant()); } // Fix GetLocalUnlinked's variable references. if (hasNodesThatNeedFixup) { for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) { BasicBlock* block = m_graph.block(blockIndex); if (!block) continue; for (unsigned nodeIndex = block->size(); nodeIndex--;) { Node* node = block->at(nodeIndex); switch (node->op()) { case GetLocalUnlinked: { node->setUnlinkedMachineLocal(assign(allocation, node->unlinkedLocal())); break; } case LoadVarargs: case ForwardVarargs: { LoadVarargsData* data = node->loadVarargsData(); data->machineCount = assign(allocation, data->count); data->machineStart = assign(allocation, data->start); break; } default: break; } } } } return true; }
void validate() { // NB. This code is not written for performance, since it is not intended to run // in release builds. // Validate that all local variables at the head of the root block are dead. BasicBlock* root = m_graph.m_blocks[0].get(); for (unsigned i = 0; i < root->variablesAtHead.numberOfLocals(); ++i) V_EQUAL((static_cast<VirtualRegister>(i), 0), static_cast<Node*>(0), root->variablesAtHead.local(i)); // Validate ref counts and uses. HashMap<Node*, unsigned> myRefCounts; for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) { BasicBlock* block = m_graph.m_blocks[blockIndex].get(); if (!block || !block->isReachable) continue; for (size_t i = 0; i < block->numNodes(); ++i) myRefCounts.add(block->node(i), 0); } HashSet<Node*> acceptableNodes; for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) { BasicBlock* block = m_graph.m_blocks[blockIndex].get(); if (!block || !block->isReachable) continue; for (size_t i = 0; i < block->numNodes(); ++i) { Node* node = block->node(i); acceptableNodes.add(node); if (!node->shouldGenerate()) continue; for (unsigned j = 0; j < m_graph.numChildren(node); ++j) { // Phi children in LoadStore form are invalid. if (m_graph.m_form == LoadStore && block->isPhiIndex(i)) continue; Edge edge = m_graph.child(node, j); if (!edge) continue; myRefCounts.find(edge.node())->value++; // Unless I'm a Flush, Phantom, GetLocal, or Phi, my children should hasResult(). switch (node->op()) { case Flush: case GetLocal: VALIDATE((node, edge), edge->hasVariableAccessData()); VALIDATE((node, edge), edge->variableAccessData() == node->variableAccessData()); break; case PhantomLocal: VALIDATE((node, edge), edge->hasVariableAccessData()); VALIDATE((node, edge), edge->variableAccessData() == node->variableAccessData()); VALIDATE((node, edge), edge->op() != SetLocal); break; case Phi: VALIDATE((node, edge), edge->hasVariableAccessData()); if (m_graph.m_unificationState == LocallyUnified) break; VALIDATE((node, edge), edge->variableAccessData() == node->variableAccessData()); break; case Phantom: switch (m_graph.m_form) { case LoadStore: if (j) { VALIDATE((node, edge), edge->hasResult()); break; } switch (edge->op()) { case Phi: case SetArgument: case SetLocal: break; default: VALIDATE((node, edge), edge->hasResult()); break; } break; case ThreadedCPS: VALIDATE((node, edge), edge->hasResult()); break; } break; default: VALIDATE((node, edge), edge->hasResult()); break; } } } } for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) { BasicBlock* block = m_graph.m_blocks[blockIndex].get(); if (!block || !block->isReachable) continue; HashSet<Node*> phisInThisBlock; HashSet<Node*> nodesInThisBlock; for (size_t i = 0; i < block->numNodes(); ++i) { Node* node = block->node(i); nodesInThisBlock.add(node); if (block->isPhiIndex(i)) phisInThisBlock.add(node); if (m_graph.m_refCountState == ExactRefCount) V_EQUAL((node), myRefCounts.get(node), node->adjustedRefCount()); else V_EQUAL((node), node->refCount(), 1); for (unsigned j = 0; j < m_graph.numChildren(node); ++j) { Edge edge = m_graph.child(node, j); if (!edge) continue; VALIDATE((node, edge), acceptableNodes.contains(edge.node())); } } for (size_t i = 0; i < block->phis.size(); ++i) { Node* node = block->phis[i]; ASSERT(phisInThisBlock.contains(node)); VALIDATE((node), node->op() == Phi); VirtualRegister local = node->local(); for (unsigned j = 0; j < m_graph.numChildren(node); ++j) { // Phi children in LoadStore form are invalid. if (m_graph.m_form == LoadStore && block->isPhiIndex(i)) continue; Edge edge = m_graph.child(node, j); if (!edge) continue; VALIDATE( (node, edge), edge->op() == SetLocal || edge->op() == SetArgument || edge->op() == Flush || edge->op() == Phi || edge->op() == ZombieHint || edge->op() == MovHint || edge->op() == MovHintAndCheck); if (phisInThisBlock.contains(edge.node())) continue; if (nodesInThisBlock.contains(edge.node())) { VALIDATE( (node, edge), edge->op() == SetLocal || edge->op() == ZombieHint || edge->op() == MovHint || edge->op() == MovHintAndCheck || edge->op() == SetArgument || edge->op() == Flush); continue; } // There must exist a predecessor block that has this node index in // its tail variables. bool found = false; for (unsigned k = 0; k < block->m_predecessors.size(); ++k) { BasicBlock* prevBlock = m_graph.m_blocks[block->m_predecessors[k]].get(); VALIDATE((Block, block->m_predecessors[k]), prevBlock); VALIDATE((Block, block->m_predecessors[k]), prevBlock->isReachable); Node* prevNode = prevBlock->variablesAtTail.operand(local); // If we have a Phi that is not referring to *this* block then all predecessors // must have that local available. VALIDATE((local, blockIndex, Block, block->m_predecessors[k]), prevNode); switch (prevNode->op()) { case GetLocal: case Flush: case PhantomLocal: prevNode = prevNode->child1().node(); break; default: break; } if (node->shouldGenerate()) { VALIDATE((local, block->m_predecessors[k], prevNode), prevNode->shouldGenerate()); } VALIDATE( (local, block->m_predecessors[k], prevNode), prevNode->op() == SetLocal || prevNode->op() == MovHint || prevNode->op() == MovHintAndCheck || prevNode->op() == ZombieHint || prevNode->op() == SetArgument || prevNode->op() == Phi); if (prevNode == edge.node()) { found = true; break; } // At this point it cannot refer into this block. VALIDATE((local, block->m_predecessors[k], prevNode), !prevBlock->isInBlock(edge.node())); } VALIDATE((node, edge), found); } } Operands<size_t> getLocalPositions( block->variablesAtHead.numberOfArguments(), block->variablesAtHead.numberOfLocals()); Operands<size_t> setLocalPositions( block->variablesAtHead.numberOfArguments(), block->variablesAtHead.numberOfLocals()); for (size_t i = 0; i < block->variablesAtHead.numberOfArguments(); ++i) { VALIDATE((static_cast<VirtualRegister>(argumentToOperand(i)), blockIndex), !block->variablesAtHead.argument(i) || block->variablesAtHead.argument(i)->hasVariableAccessData()); if (m_graph.m_form == ThreadedCPS) VALIDATE((static_cast<VirtualRegister>(argumentToOperand(i)), blockIndex), !block->variablesAtTail.argument(i) || block->variablesAtTail.argument(i)->hasVariableAccessData()); getLocalPositions.argument(i) = notSet; setLocalPositions.argument(i) = notSet; } for (size_t i = 0; i < block->variablesAtHead.numberOfLocals(); ++i) { VALIDATE((static_cast<VirtualRegister>(i), blockIndex), !block->variablesAtHead.local(i) || block->variablesAtHead.local(i)->hasVariableAccessData()); if (m_graph.m_form == ThreadedCPS) VALIDATE((static_cast<VirtualRegister>(i), blockIndex), !block->variablesAtTail.local(i) || block->variablesAtTail.local(i)->hasVariableAccessData()); getLocalPositions.local(i) = notSet; setLocalPositions.local(i) = notSet; } for (size_t i = 0; i < block->size(); ++i) { Node* node = block->at(i); ASSERT(nodesInThisBlock.contains(node)); VALIDATE((node), node->op() != Phi); for (unsigned j = 0; j < m_graph.numChildren(node); ++j) { Edge edge = m_graph.child(node, j); if (!edge) continue; VALIDATE((node, edge), nodesInThisBlock.contains(edge.node())); switch (node->op()) { case PhantomLocal: case GetLocal: case Flush: break; case Phantom: if (m_graph.m_form == LoadStore && !j) break; default: VALIDATE((node, edge), !phisInThisBlock.contains(edge.node())); break; } } if (!node->shouldGenerate()) continue; switch (node->op()) { case GetLocal: if (node->variableAccessData()->isCaptured()) break; // Ignore GetLocal's that we know to be dead, but that the graph // doesn't yet know to be dead. if (!myRefCounts.get(node)) break; if (m_graph.m_form == ThreadedCPS) VALIDATE((node, blockIndex), getLocalPositions.operand(node->local()) == notSet); getLocalPositions.operand(node->local()) = i; break; case SetLocal: if (node->variableAccessData()->isCaptured()) break; // Only record the first SetLocal. There may be multiple SetLocals // because of flushing. if (setLocalPositions.operand(node->local()) != notSet) break; setLocalPositions.operand(node->local()) = i; break; default: break; } } if (m_graph.m_form == LoadStore) continue; for (size_t i = 0; i < block->variablesAtHead.numberOfArguments(); ++i) { checkOperand( blockIndex, getLocalPositions, setLocalPositions, argumentToOperand(i)); } for (size_t i = 0; i < block->variablesAtHead.numberOfLocals(); ++i) { checkOperand( blockIndex, getLocalPositions, setLocalPositions, i); } } }
/// Insert code in the prolog code when unrolling a loop with a /// run-time trip-count. /// /// This method assumes that the loop unroll factor is total number /// of loop bodes in the loop after unrolling. (Some folks refer /// to the unroll factor as the number of *extra* copies added). /// We assume also that the loop unroll factor is a power-of-two. So, after /// unrolling the loop, the number of loop bodies executed is 2, /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for /// the switch instruction is generated. /// /// extraiters = tripcount % loopfactor /// if (extraiters == 0) jump Loop: /// else jump Prol /// Prol: LoopBody; /// extraiters -= 1 // Omitted if unroll factor is 2. /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. /// if (tripcount < loopfactor) jump End /// Loop: /// ... /// End: /// bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, bool AllowExpensiveTripCount, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) return false; // Use Scalar Evolution to compute the trip count. This allows more // loops to be unrolled than relying on induction var simplification if (!LPM) return false; auto *SEWP = LPM->getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); if (!SEWP) return false; ScalarEvolution &SE = SEWP->getSE(); // Only unroll loops with a computable trip count and the trip count needs // to be an int value (allowing a pointer type is a TODO item) const SCEV *BECountSC = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) return false; unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = SE.getAddExpr(BECountSC, SE.getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; BasicBlock *Header = L->getHeader(); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) return false; // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if (!isPowerOf2_32(Count)) return false; // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) return false; // If this loop is nested, then the loop unroller changes the code in // parent loop, so the Scalar Evolution pass needs to be run again if (Loop *ParentLoop = L->getParentLoop()) SE.forgetLoop(ParentLoop); // Grab analyses that we preserve. auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // If ModVal is zero, we know that either // 1. there are no iteration to be run in the prologue loop // OR // 2. the addition computing TripCount overflowed // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the // number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod"); // Branch to either the extra iterations or the cloned/unrolled loop // We will fix up the true branch label when adding loop body copies B.CreateCondBr(BranchVal, PEnd, PEnd); assert(PreHeaderBR->isUnconditional() && PreHeaderBR->getSuccessor(0) == PEnd && "CFG edges in Preheader are not correct"); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; bool UnrollPrologue = Count == 2; // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks, VMap, LI); // Insert the cloned blocks into function just before the original loop F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Rewrite the cloned instruction operands to use the values // created when the clone is created. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) { RemapInstruction(&*I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); } } // Connect the prolog code to the original loop and update the // PHI functions. BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI, LPM->getAsPass()); NumRuntimeUnrolled++; return true; }
/// ProcessInstruction - Given an instruction in the loop, check to see if it /// has any uses that are outside the current loop. If so, insert LCSSA PHI /// nodes and rewrite the uses. bool LCSSA::ProcessInstruction(Instruction *Inst, const SmallVectorImpl<BasicBlock*> &ExitBlocks) { SmallVector<Use*, 16> UsesToRewrite; BasicBlock *InstBB = Inst->getParent(); for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { User *U = *UI; BasicBlock *UserBB = cast<Instruction>(U)->getParent(); if (PHINode *PN = dyn_cast<PHINode>(U)) UserBB = PN->getIncomingBlock(UI); if (InstBB != UserBB && !inLoop(UserBB)) UsesToRewrite.push_back(&UI.getUse()); } // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) return false; ++NumLCSSA; // We are applying the transformation // Invoke instructions are special in that their result value is not available // along their unwind edge. The code below tests to see whether DomBB dominates // the value, so adjust DomBB to the normal destination block, which is // effectively where the value is first usable. BasicBlock *DomBB = Inst->getParent(); if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT->getNode(DomBB); SmallVector<PHINode*, 16> AddedPHIs; SSAUpdater SSAUpdate; SSAUpdate.Initialize(Inst->getType(), Inst->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(), BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { BasicBlock *ExitBB = *BBI; if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue; // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(Inst->getType(), PredCache.GetNumPreds(ExitBB), Inst->getName()+".lcssa", ExitBB->begin()); // Add inputs from inside the loop for this PHI. for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { PN->addIncoming(Inst, *PI); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. if (!inLoop(*PI)) UsesToRewrite.push_back( &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); } AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); } // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses in // the same block. It assumes the PHI we inserted is at the end of the // block. Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. if (UsesToRewrite[i]->get()->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); UsesToRewrite[i]->set(UserBB->begin()); continue; } // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UsesToRewrite[i]); } // Remove PHI nodes that did not have any uses rewritten. for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { if (AddedPHIs[i]->use_empty()) AddedPHIs[i]->eraseFromParent(); } return true; }
bool isLinkListLoop(Loop * pLoop, map<PHINode *, set<Value *> > & LinkListHeaderMapping ) { BasicBlock * pHeader = pLoop->getHeader(); vector<PHINode *> vecToDo; for(BasicBlock::iterator II = pHeader->begin(); II != pHeader->end(); II ++ ) { if(isa<PHINode>(II)) { if(PointerType * pPointerType = dyn_cast<PointerType>(II->getType())) { if(isa<StructType>(pPointerType->getElementType())) { vecToDo.push_back(cast<PHINode>(II)); } } } else { break; } } if(vecToDo.size() == 0) { return false; } vector<PHINode *>::iterator itVecPhiBegin = vecToDo.begin(); vector<PHINode *>::iterator itVecPhiEnd = vecToDo.end(); for(; itVecPhiBegin != itVecPhiEnd; itVecPhiBegin ++ ) { PHINode * pPHI = *itVecPhiBegin; bool bFlag = true; for(unsigned i = 0; i < pPHI->getNumIncomingValues(); i ++ ) { if(pLoop->contains(pPHI->getIncomingBlock(i))) { if(Instruction * pInst = dyn_cast<Instruction>(pPHI->getIncomingValue(i))) { if(!isReachableThroughLinkListDereference(pPHI, pInst, pLoop)) { bFlag = false; break; } } else { bFlag = false; break; } } } if(bFlag) { for(unsigned i = 0; i < pPHI->getNumIncomingValues(); i ++ ) { if(!pLoop->contains(pPHI->getIncomingBlock(i))) { LinkListHeaderMapping[pPHI].insert(pPHI->getIncomingValue(i)); } } } } if(LinkListHeaderMapping.size() > 0) { return true; } else { return false; } }
/// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs /// though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get the personality function from the callee if it contains a landing pad. Value *CalleePersonality = 0; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); CalleePersonality = LP->getPersonalityFn(); break; } // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. if (CalleePersonality) { for (Function::const_iterator I = Caller->begin(), E = Caller->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. if (LP->getPersonalityFn() != CalleePersonality) return false; break; } } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; // Try to determine the size of the allocation. ConstantInt *AllocaSize = 0; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { if (IFI.TD) { Type *AllocaType = AI->getAllocatedType(); uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. if (AllocaArraySize != ~0ULL && UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } } } builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI, AllocaSize); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } return true; }
// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new // BasicBlock, and converting all returns to unconditional branches to this // new basic block. The singular exit node is returned. // // If there are no return stmts in the Function, a null pointer is returned. // bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks in a function, tracking all of the blocks that // return. // std::vector<BasicBlock*> ReturningBlocks; std::vector<BasicBlock*> UnwindingBlocks; std::vector<BasicBlock*> UnreachableBlocks; for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) if (isa<ReturnInst>(I->getTerminator())) ReturningBlocks.push_back(I); else if (isa<UnwindInst>(I->getTerminator())) UnwindingBlocks.push_back(I); else if (isa<UnreachableInst>(I->getTerminator())) UnreachableBlocks.push_back(I); // Handle unwinding blocks first. if (UnwindingBlocks.empty()) { UnwindBlock = 0; } else if (UnwindingBlocks.size() == 1) { UnwindBlock = UnwindingBlocks.front(); } else { UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F); new UnwindInst(F.getContext(), UnwindBlock); for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(), E = UnwindingBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; BB->getInstList().pop_back(); // Remove the unwind insn BranchInst::Create(UnwindBlock, BB); } } // Then unreachable blocks. if (UnreachableBlocks.empty()) { UnreachableBlock = 0; } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), E = UnreachableBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; BB->getInstList().pop_back(); // Remove the unreachable inst. BranchInst::Create(UnreachableBlock, BB); } } // Now handle return blocks. if (ReturningBlocks.empty()) { ReturnBlock = 0; return false; // No blocks return } else if (ReturningBlocks.size() == 1) { ReturnBlock = ReturningBlocks.front(); // Already has a single return block return false; } // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. // BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); PHINode *PN = 0; if (F.getReturnType()->isVoidTy()) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); } // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. // for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(), E = ReturningBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... if (PN) PN->addIncoming(BB->getTerminator()->getOperand(0), BB); BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } ReturnBlock = NewRetBlock; return true; }
// // Method: InsertFreesAtEnd() // // Description: // Insert free instructions so that the memory allocated by the specified // malloc instruction is freed on function exit. // void ConvertUnsafeAllocas::InsertFreesAtEnd(Instruction *MI) { assert (MI && "MI is NULL!\n"); // // Get the dominance frontier information about the malloc instruction's // basic block. We cache the information in case we end up processing // multiple instructions from the same function. // BasicBlock *currentBlock = MI->getParent(); Function * F = currentBlock->getParent(); DominanceFrontier * dfmt = &getAnalysis<DominanceFrontier>(*F); DominatorTree * domTree = &getAnalysis<DominatorTree>(*F); DominanceFrontier::const_iterator it = dfmt->find(currentBlock); #if 0 // // If the basic block has a dominance frontier, use it. // if (it != dfmt->end()) { const DominanceFrontier::DomSetType &S = it->second; if (S.size() > 0) { DominanceFrontier::DomSetType::iterator pCurrent = S.begin(), pEnd = S.end(); for (; pCurrent != pEnd; ++pCurrent) { BasicBlock *frontierBlock = *pCurrent; // One of its predecessors is dominated by currentBlock; // need to insert a free in that predecessor for (pred_iterator SI = pred_begin(frontierBlock), SE = pred_end(frontierBlock); SI != SE; ++SI) { BasicBlock *predecessorBlock = *SI; if (domTree->dominates (predecessorBlock, currentBlock)) { // Get the terminator Instruction *InsertPt = predecessorBlock->getTerminator(); new FreeInst(MI, InsertPt); } } } return; } } #endif // // There is no dominance frontier; insert frees on all returns; // std::vector<Instruction*> FreePoints; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (isa<ReturnInst>(BB->getTerminator()) || isa<ResumeInst>(BB->getTerminator())) FreePoints.push_back(BB->getTerminator()); // // We have the Free points; now we construct the free instructions at each // of the points. // std::vector<Instruction*>::iterator fpI = FreePoints.begin(), fpE = FreePoints.end(); for (; fpI != fpE ; ++ fpI) { // // Determine whether the allocation dominates the return. If not, then // don't insert a free instruction for now. // Instruction *InsertPt = *fpI; if (domTree->dominates (MI->getParent(), InsertPt->getParent())) { CallInst::Create (kfree, MI, "", InsertPt); } else { ++MissingFrees; } } }
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (BasicBlock &BB : F) { for (Instruction &Inst : BB) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. if (Inst.use_empty()) continue; if (Inst.hasOneUse() && cast<Instruction>(Inst.user_back())->getParent() == &BB && !isa<PHINode>(Inst.user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (auto *AI = dyn_cast<AllocaInst>(&Inst)) if (AI->isStaticAlloca()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; for (User *U : Inst.users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != &BB || isa<PHINode>(UI)) Users.push_back(UI); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock *, 32> LiveBBs; LiveBBs.insert(&BB); while (!Users.empty()) { Instruction *U = Users.pop_back_val(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (InvokeInst *Invoke : Invokes) { BasicBlock *UnwindBlock = Invoke->getUnwindDest(); if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) { LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (InvokeInst *Invoke : Invokes) { BasicBlock *UnwindBlock = Invoke->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode *, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (PHINode *PN : PHIsToDemote) DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(&UnwindBlock->front()); } }
bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { AtomicOrdering Order = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: // [...] // fence? // atomicrmw.start: // %loaded = @load.linked(%addr) // %new = some_op iN %loaded, %incr // %stored = @store_conditional(%new, %addr) // %try_again = icmp i32 ne %stored, 0 // br i1 %try_again, label %loop, label %atomicrmw.end // atomicrmw.end: // fence? // [...] BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // This grabs the DebugLoc from AI. IRBuilder<> Builder(AI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked( Builder, Addr, MemOpOrder); Value *NewVal; switch (AI->getOperation()) { case AtomicRMWInst::Xchg: NewVal = AI->getValOperand(); break; case AtomicRMWInst::Add: NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Sub: NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::And: NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Nand: NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), "new"); break; case AtomicRMWInst::Or: NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Xor: NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Max: NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Min: NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::UMax: NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::UMin: NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); break; default: llvm_unreachable("Unknown atomic op"); } Value *StoreSuccess = TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional( Builder, NewVal, Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); insertTrailingFence(Builder, Order); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return true; }
bool LowerEmSetjmp::runOnModule(Module &M) { TheModule = &M; Function *Setjmp = TheModule->getFunction("setjmp"); Function *Longjmp = TheModule->getFunction("longjmp"); if (!Setjmp && !Longjmp) return false; Type *i32 = Type::getInt32Ty(M.getContext()); Type *Void = Type::getVoidTy(M.getContext()); // Add functions Function *EmSetjmp = NULL; if (Setjmp) { SmallVector<Type*, 2> EmSetjmpTypes; EmSetjmpTypes.push_back(Setjmp->getFunctionType()->getParamType(0)); EmSetjmpTypes.push_back(i32); // extra param that says which setjmp in the function it is FunctionType *EmSetjmpFunc = FunctionType::get(i32, EmSetjmpTypes, false); EmSetjmp = Function::Create(EmSetjmpFunc, GlobalValue::ExternalLinkage, "emscripten_setjmp", TheModule); } Function *EmLongjmp = Longjmp ? Function::Create(Longjmp->getFunctionType(), GlobalValue::ExternalLinkage, "emscripten_longjmp", TheModule) : NULL; SmallVector<Type*, 1> IntArgTypes; IntArgTypes.push_back(i32); FunctionType *IntIntFunc = FunctionType::get(i32, IntArgTypes, false); Function *CheckLongjmp = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_check_longjmp", TheModule); // gets control flow Function *GetLongjmpResult = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_get_longjmp_result", TheModule); // gets int value longjmp'd FunctionType *VoidFunc = FunctionType::get(Void, false); Function *PrepSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_prep_setjmp", TheModule); Function *CleanupSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_cleanup_setjmp", TheModule); Function *PreInvoke = TheModule->getFunction("emscripten_preinvoke"); if (!PreInvoke) PreInvoke = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule); FunctionType *IntFunc = FunctionType::get(i32, false); Function *PostInvoke = TheModule->getFunction("emscripten_postinvoke"); if (!PostInvoke) PostInvoke = Function::Create(IntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule); // Process all callers of setjmp and longjmp. Start with setjmp. typedef std::vector<PHINode*> Phis; typedef std::map<Function*, Phis> FunctionPhisMap; FunctionPhisMap SetjmpOutputPhis; std::vector<Instruction*> ToErase; if (Setjmp) { for (Instruction::user_iterator UI = Setjmp->user_begin(), UE = Setjmp->user_end(); UI != UE; ++UI) { User *U = *UI; if (CallInst *CI = dyn_cast<CallInst>(U)) { BasicBlock *SJBB = CI->getParent(); // The tail is everything right after the call, and will be reached once when setjmp is // called, and later when longjmp returns to the setjmp BasicBlock *Tail = SplitBlock(SJBB, CI->getNextNode()); // Add a phi to the tail, which will be the output of setjmp, which indicates if this is the // first call or a longjmp back. The phi directly uses the right value based on where we // arrive from PHINode *SetjmpOutput = PHINode::Create(i32, 2, "", Tail->getFirstNonPHI()); SetjmpOutput->addIncoming(ConstantInt::get(i32, 0), SJBB); // setjmp initial call returns 0 CI->replaceAllUsesWith(SetjmpOutput); // The proper output is now this, not the setjmp call itself // longjmp returns to the setjmp will add themselves to this phi Phis& P = SetjmpOutputPhis[SJBB->getParent()]; P.push_back(SetjmpOutput); // fix call target SmallVector<Value *, 2> Args; Args.push_back(CI->getArgOperand(0)); Args.push_back(ConstantInt::get(i32, P.size())); // our index in the function is our place in the array + 1 CallInst::Create(EmSetjmp, Args, "", CI); ToErase.push_back(CI); } else { errs() << **UI << "\n"; report_fatal_error("bad use of setjmp, should only call it"); } } } // Update longjmp FIXME: we could avoid throwing in longjmp as an optimization when longjmping back into the current function perhaps? if (Longjmp) Longjmp->replaceAllUsesWith(EmLongjmp); // Update all setjmping functions for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) { Function *F = I->first; Phis& P = I->second; CallInst::Create(PrepSetjmp, "", F->begin()->begin()); // Update each call that can longjmp so it can return to a setjmp where relevant for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) { BasicBlock *BB = BBI++; for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { Instruction *I = Iter++; CallInst *CI; if ((CI = dyn_cast<CallInst>(I))) { Value *V = CI->getCalledValue(); if (V == PrepSetjmp || V == EmSetjmp || V == CheckLongjmp || V == GetLongjmpResult || V == PreInvoke || V == PostInvoke) continue; if (Function *CF = dyn_cast<Function>(V)) if (CF->isIntrinsic()) continue; // TODO: proper analysis of what can actually longjmp. Currently we assume anything but setjmp can. // This may longjmp, so we need to check if it did. Split at that point, and // envelop the call in pre/post invoke, if we need to CallInst *After; Instruction *Check = NULL; if (Iter != E && (After = dyn_cast<CallInst>(Iter)) && After->getCalledValue() == PostInvoke) { // use the pre|postinvoke that exceptions lowering already made Check = Iter++; } BasicBlock *Tail = SplitBlock(BB, Iter); // Iter already points to the next instruction, as we need TerminatorInst *TI = BB->getTerminator(); if (!Check) { // no existing pre|postinvoke, create our own CallInst::Create(PreInvoke, "", CI); Check = CallInst::Create(PostInvoke, "", TI); // CI is at the end of the block // If we are calling a function that is noreturn, we must remove that attribute. The code we // insert here does expect it to return, after we catch the exception. if (CI->doesNotReturn()) { if (Function *F = dyn_cast<Function>(CI->getCalledValue())) { F->removeFnAttr(Attribute::NoReturn); } CI->setAttributes(CI->getAttributes().removeAttribute(TheModule->getContext(), AttributeSet::FunctionIndex, Attribute::NoReturn)); assert(!CI->doesNotReturn()); } } // We need to replace the terminator in Tail - SplitBlock makes BB go straight to Tail, we need to check if a longjmp occurred, and // go to the right setjmp-tail if so SmallVector<Value *, 1> Args; Args.push_back(Check); Instruction *LongjmpCheck = CallInst::Create(CheckLongjmp, Args, "", BB); Instruction *LongjmpResult = CallInst::Create(GetLongjmpResult, Args, "", BB); SwitchInst *SI = SwitchInst::Create(LongjmpCheck, Tail, 2, BB); // -1 means no longjmp happened, continue normally (will hit the default switch case). 0 means a longjmp that is not ours to handle, needs a rethrow. Otherwise // the index mean is the same as the index in P+1 (to avoid 0). for (unsigned i = 0; i < P.size(); i++) { SI->addCase(cast<ConstantInt>(ConstantInt::get(i32, i+1)), P[i]->getParent()); P[i]->addIncoming(LongjmpResult, BB); } ToErase.push_back(TI); // new terminator is now the switch // we are splitting the block here, and must continue to find other calls in the block - which is now split. so continue // to traverse in the Tail BB = Tail; Iter = BB->begin(); E = BB->end(); } else if (InvokeInst *CI = dyn_cast<InvokeInst>(I)) { // XXX check if target is setjmp (void)CI; report_fatal_error("TODO: invoke inside setjmping functions"); } } } // add a cleanup before each return for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) { BasicBlock *BB = BBI++; TerminatorInst *TI = BB->getTerminator(); if (isa<ReturnInst>(TI)) { CallInst::Create(CleanupSetjmp, "", TI); } } } for (unsigned i = 0; i < ToErase.size(); i++) { ToErase[i]->eraseFromParent(); } // Finally, our modifications to the cfg can break dominance of SSA variables. For example, // if (x()) { .. setjmp() .. } // if (y()) { .. longjmp() .. } // We must split the longjmp block, and it can jump into the setjmp one. But that means that when // we split the setjmp block, it's first part no longer dominates its second part - there is // a theoretically possible control flow path where x() is false, then y() is true and we // reach the second part of the setjmp block, without ever reaching the first part. So, // we recalculate regs vs. mem for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) { Function *F = I->first; doRegToMem(*F); doMemToReg(*F); } return true; }
/// isSafeToPromoteArgument - As you might guess from the name of this method, /// it checks to see if it is both safe and useful to promote the argument. /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { typedef std::set<IndicesVector> GEPIndicesSet; // Quick exit for unused arguments if (Arg->use_empty()) return true; // We can only promote this argument if all of the uses are loads, or are GEP // instructions (with constant indices) that are subsequently loaded. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load // would have happened in the callee anyway (ie, there is a load in the entry // block) or the pointer passed in at every call site is guaranteed to be // valid. // In the former case, invalid loads can happen, but would have happened // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. // // This set will contain all sets of indices that are loaded in the entry // block, and thus are safe to unconditionally load in the caller. GEPIndicesSet SafeToUnconditionallyLoad; // This set contains all the sets of indices that we are planning to promote. // This makes it possible to limit the number of arguments added. GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. if (isByVal || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as // safe. BasicBlock *EntryBlock = Arg->getParent()->begin(); // Declare this here so we can reuse it IndicesVector Indices; for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); I != E; ++I) if (LoadInst *LI = dyn_cast<LoadInst>(I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { V = GEP->getPointerOperand(); if (V == Arg) { // This load actually loads (part of) Arg? Check the indices then. Indices.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) Indices.push_back(CI->getSExtValue()); else // We found a non-constant GEP index for this argument? Bail out // right away, can't promote this argument at all. return false; // Indices checked out, mark them as safe MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); Indices.clear(); } } else if (V == Arg) { // Direct loads are equivalent to a GEP with a single 0 index. MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); } } // Now, iterate all uses of the argument to see if there are any uses that are // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. SmallVector<LoadInst*, 16> Loads; IndicesVector Operands; for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); UI != E; ++UI) { User *U = *UI; Operands.clear(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Don't hack volatile/atomic loads if (!LI->isSimple()) return false; Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. getAnalysis<AliasAnalysis>().deleteValue(GEP); GEP->eraseFromParent(); // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? return isSafeToPromoteArgument(Arg, isByVal); } // Ensure that all of the indices are constants. for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e; ++i) if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) Operands.push_back(C->getSExtValue()); else return false; // Not a constant operand GEP! // Ensure that the only users of the GEP are load instructions. for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { // Don't hack volatile/atomic loads if (!LI->isSimple()) return false; Loads.push_back(LI); } else { // Other uses than load? return false; } } else { return false; // Not a load or a GEP. } // Now, see if it is safe to promote this load / loads of this GEP. Loading // is safe if Operands, or a prefix of Operands, is marked as safe. if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) return false; // See if we are already promoting a load with these indices. If not, check // to make sure that we aren't promoting too many elements. If so, nothing // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { DEBUG(dbgs() << "argpromotion not promoting argument '" << Arg->getName() << "' because it would require adding more " << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number // of elements of the aggregate. return false; } ToPromote.insert(Operands); } } if (Loads.empty()) return true; // No users, this is a dead argument. // Okay, now we know that the argument is only used by load instructions and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. // Because there could be several/many load instructions, remember which // blocks we know to be transparent to the load. SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to // the load itself. LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); AliasAnalysis::Location Loc = AA.getLocation(Load); if (AA.canInstructionRangeModify(BB->front(), *Load, Loc)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > I = idf_ext_begin(P, TranspBlocks), E = idf_ext_end(P, TranspBlocks); I != E; ++I) if (AA.canBasicBlockModify(**I, Loc)) return false; } } // If the path from the entry of the function to each load is free of // instructions that potentially invalidate the load, we can make the // transformation! return true; }