/// HandleFloatingPointIV - If the loop has floating induction variable /// then insert corresponding integer induction variable if possible. /// For example, /// for(double i = 0; i < 10000; ++i) /// bar(i) /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; // Check incoming value. ConstantFP *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); int64_t InitValue; if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) return; // Check IV increment. Reject this PN if increment operation is not // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); int64_t IncValue; if (IncValueVal == 0 || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); Instruction *U1 = cast<Instruction>(*IncrUse++); if (IncrUse == Incr->use_end()) return; Instruction *U2 = cast<Instruction>(*IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't // only used by a branch, we can't transform it. FCmpInst *Compare = dyn_cast<FCmpInst>(U1); if (!Compare) Compare = dyn_cast<FCmpInst>(U2); if (Compare == 0 || !Compare->hasOneUse() || !isa<BranchInst>(Compare->use_back())) return; BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); // We need to verify that the branch actually controls the iteration count // of the loop. If not, the new IV can overflow and no one will notice. // The branch block must be in the loop and one of the successors must be out // of the loop. assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); if (!L->contains(TheBr->getParent()) || (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); int64_t ExitValue; if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { default: return; // Unknown comparison. case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; case CmpInst::FCMP_ONE: case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; case CmpInst::FCMP_OGT: case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; case CmpInst::FCMP_OGE: case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; // If not actually striding (add x, 0.0), avoid touching the code. if (IncValue == 0) return; // Positive and negative strides have different safety conditions. if (IncValue > 0) { // If we have a positive stride, we require the init to be less than the // exit value and an equality or less than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } unsigned Leftover = Range % uint32_t(IncValue); // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } unsigned Leftover = Range % uint32_t(-IncValue); // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, ConstantInt::get(Int32Ty, ExitValue), Compare->getName()); // In the following deletions, PN may become dead and may be deleted. // Use a WeakVH to observe whether this happens. WeakVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); RecursivelyDeleteTriviallyDeadInstructions(Compare); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction // variable, we chose to eliminate the IV and rewrite it in terms of an // int->fp cast. // // We give preference to sitofp over uitofp because it is faster on most // platforms. if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstNonPHI()); PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN); } // Add a new IVUsers entry for the newly-created integer PHI. IU->AddUsersIfInteresting(NewPHI); }
// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new // BasicBlock, and converting all returns to unconditional branches to this // new basic block. The singular exit node is returned. // // If there are no return stmts in the Function, a null pointer is returned. // bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks in a function, tracking all of the blocks that // return. // std::vector<BasicBlock*> ReturningBlocks; std::vector<BasicBlock*> UnreachableBlocks; for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) if (isa<ReturnInst>(I->getTerminator())) ReturningBlocks.push_back(I); else if (isa<UnreachableInst>(I->getTerminator())) UnreachableBlocks.push_back(I); // Then unreachable blocks. if (UnreachableBlocks.empty()) { UnreachableBlock = nullptr; } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), E = UnreachableBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; BB->getInstList().pop_back(); // Remove the unreachable inst. BranchInst::Create(UnreachableBlock, BB); } } // Now handle return blocks. if (ReturningBlocks.empty()) { ReturnBlock = nullptr; return false; // No blocks return } else if (ReturningBlocks.size() == 1) { ReturnBlock = ReturningBlocks.front(); // Already has a single return block return false; } // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. // BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); } // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. // for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(), E = ReturningBlocks.end(); I != E; ++I) { BasicBlock *BB = *I; // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... if (PN) PN->addIncoming(BB->getTerminator()->getOperand(0), BB); BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } ReturnBlock = NewRetBlock; return true; }
/// CleanupAndPrepareModules - Get the specified modules ready for code /// generator testing. /// static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test, Module *Safe) { // Clean up the modules, removing extra cruft that we don't need anymore... Test = BD.performFinalCleanups(Test); // If we are executing the JIT, we have several nasty issues to take care of. if (!BD.isExecutingJIT()) return; // First, if the main function is in the Safe module, we must add a stub to // the Test module to call into it. Thus, we create a new function `main' // which just calls the old one. if (Function *oldMain = Safe->getFunction("main")) if (!oldMain->isDeclaration()) { // Rename it oldMain->setName("llvm_bugpoint_old_main"); // Create a NEW `main' function with same type in the test module. Function *newMain = Function::Create(oldMain->getFunctionType(), GlobalValue::ExternalLinkage, "main", Test); // Create an `oldmain' prototype in the test module, which will // corresponds to the real main function in the same module. Function *oldMainProto = Function::Create(oldMain->getFunctionType(), GlobalValue::ExternalLinkage, oldMain->getName(), Test); // Set up and remember the argument list for the main function. std::vector<Value*> args; for (Function::arg_iterator I = newMain->arg_begin(), E = newMain->arg_end(), OI = oldMain->arg_begin(); I != E; ++I, ++OI) { I->setName(OI->getName()); // Copy argument names from oldMain args.push_back(I); } // Call the old main function and return its result BasicBlock *BB = BasicBlock::Create(Safe->getContext(), "entry", newMain); CallInst *call = CallInst::Create(oldMainProto, args.begin(), args.end(), "", BB); // If the type of old function wasn't void, return value of call ReturnInst::Create(Safe->getContext(), call, BB); } // The second nasty issue we must deal with in the JIT is that the Safe // module cannot directly reference any functions defined in the test // module. Instead, we use a JIT API call to dynamically resolve the // symbol. // Add the resolver to the Safe module. // Prototype: void *getPointerToNamedFunction(const char* Name) Constant *resolverFunc = Safe->getOrInsertFunction("getPointerToNamedFunction", PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())), PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())), (Type *)0); // Use the function we just added to get addresses of functions we need. for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) { if (F->isDeclaration() && !F->use_empty() && &*F != resolverFunc && !F->isIntrinsic() /* ignore intrinsics */) { Function *TestFn = Test->getFunction(F->getName()); // Don't forward functions which are external in the test module too. if (TestFn && !TestFn->isDeclaration()) { // 1. Add a string constant with its name to the global file Constant *InitArray = ConstantArray::get(F->getContext(), F->getName()); GlobalVariable *funcName = new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/, GlobalValue::InternalLinkage, InitArray, F->getName() + "_name"); // 2. Use `GetElementPtr *funcName, 0, 0' to convert the string to an // sbyte* so it matches the signature of the resolver function. // GetElementPtr *funcName, ulong 0, ulong 0 std::vector<Constant*> GEPargs(2, Constant::getNullValue(Type::getInt32Ty(F->getContext()))); Value *GEP = ConstantExpr::getGetElementPtr(funcName, &GEPargs[0], 2); std::vector<Value*> ResolverArgs; ResolverArgs.push_back(GEP); // Rewrite uses of F in global initializers, etc. to uses of a wrapper // function that dynamically resolves the calls to F via our JIT API if (!F->use_empty()) { // Create a new global to hold the cached function pointer. Constant *NullPtr = ConstantPointerNull::get(F->getType()); GlobalVariable *Cache = new GlobalVariable(*F->getParent(), F->getType(), false, GlobalValue::InternalLinkage, NullPtr,F->getName()+".fpcache"); // Construct a new stub function that will re-route calls to F const FunctionType *FuncTy = F->getFunctionType(); Function *FuncWrapper = Function::Create(FuncTy, GlobalValue::InternalLinkage, F->getName() + "_wrapper", F->getParent()); BasicBlock *EntryBB = BasicBlock::Create(F->getContext(), "entry", FuncWrapper); BasicBlock *DoCallBB = BasicBlock::Create(F->getContext(), "usecache", FuncWrapper); BasicBlock *LookupBB = BasicBlock::Create(F->getContext(), "lookupfp", FuncWrapper); // Check to see if we already looked up the value. Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB); Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal, NullPtr, "isNull"); BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB); // Resolve the call to function F via the JIT API: // // call resolver(GetElementPtr...) CallInst *Resolver = CallInst::Create(resolverFunc, ResolverArgs.begin(), ResolverArgs.end(), "resolver", LookupBB); // Cast the result from the resolver to correctly-typed function. CastInst *CastedResolver = new BitCastInst(Resolver, PointerType::getUnqual(F->getFunctionType()), "resolverCast", LookupBB); // Save the value in our cache. new StoreInst(CastedResolver, Cache, LookupBB); BranchInst::Create(DoCallBB, LookupBB); PHINode *FuncPtr = PHINode::Create(NullPtr->getType(), "fp", DoCallBB); FuncPtr->addIncoming(CastedResolver, LookupBB); FuncPtr->addIncoming(CachedVal, EntryBB); // Save the argument list. std::vector<Value*> Args; for (Function::arg_iterator i = FuncWrapper->arg_begin(), e = FuncWrapper->arg_end(); i != e; ++i) Args.push_back(i); // Pass on the arguments to the real function, return its result if (F->getReturnType() == Type::getVoidTy(F->getContext())) { CallInst::Create(FuncPtr, Args.begin(), Args.end(), "", DoCallBB); ReturnInst::Create(F->getContext(), DoCallBB); } else { CallInst *Call = CallInst::Create(FuncPtr, Args.begin(), Args.end(), "retval", DoCallBB); ReturnInst::Create(F->getContext(),Call, DoCallBB); } // Use the wrapper function instead of the old function F->replaceAllUsesWith(FuncWrapper); } } } } if (verifyModule(*Test) || verifyModule(*Safe)) { errs() << "Bugpoint has a bug, which corrupted a module!!\n"; abort(); } }
/// SplitLandingPadPredecessors - This method transforms the landing pad, /// OrigBB, by introducing two new basic blocks into the function. One of those /// new basic blocks gets the predecessors listed in Preds. The other basic /// block gets the remaining predecessors of OrigBB. The landingpad instruction /// OrigBB is clone into both of the new basic blocks. The new blocks are given /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, /// it does not preserve LoopSimplify (because it's complicated to handle the /// case where one of the edges being split is an exit of a loop with other /// exits). /// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef<BasicBlock*> Preds, const char *Suffix1, const char *Suffix2, Pass *P, SmallVectorImpl<BasicBlock*> &NewBBs) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert // it right before the original block. BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), OrigBB->getName() + Suffix1, OrigBB->getParent(), OrigBB); NewBBs.push_back(NewBB1); // The new block unconditionally branches to the old block. BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); // Move the edges from Preds to point to NewBB1 instead of OrigBB. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); } // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); // Move the remaining edges from OrigBB to point to NewBB2. SmallVector<BasicBlock*, 8> NewBB2Preds; for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); i != e; ) { BasicBlock *Pred = *i++; if (Pred == NewBB1) continue; assert(!isa<IndirectBrInst>(Pred->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); NewBB2Preds.push_back(Pred); e = pred_end(OrigBB); } BasicBlock *NewBB2 = 0; if (!NewBB2Preds.empty()) { // Create another basic block for the rest of OrigBB's predecessors. NewBB2 = BasicBlock::Create(OrigBB->getContext(), OrigBB->getName() + Suffix2, OrigBB->getParent(), OrigBB); NewBBs.push_back(NewBB2); // The new block unconditionally branches to the old block. BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); // Move the remaining edges from OrigBB to point to NewBB2. for (SmallVectorImpl<BasicBlock*>::iterator i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); } LandingPadInst *LPad = OrigBB->getLandingPadInst(); Instruction *Clone1 = LPad->clone(); Clone1->setName(Twine("lpad") + Suffix1); NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); if (NewBB2) { Instruction *Clone2 = LPad->clone(); Clone2->setName(Twine("lpad") + Suffix2); NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); // Create a PHI node for the two cloned landingpad instructions. PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); PN->addIncoming(Clone1, NewBB1); PN->addIncoming(Clone2, NewBB2); LPad->replaceAllUsesWith(PN); LPad->eraseFromParent(); } else { // There is no second clone. Just replace the landing pad with the first // clone. LPad->replaceAllUsesWith(Clone1); LPad->eraseFromParent(); } }
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. if (!HasValueForBlock(BB)) return GetValueAtEndOfBlock(BB); // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) SingularValue = nullptr; } } else { bool isFirstPred = true; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) SingularValue = nullptr; } } // If there are no predecessors, just return undef. if (PredValues.empty()) return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa<PHINode>(BB->begin())) { SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast<PHINode>(It)); ++It) { if (IsEquivalentPHI(SomePHI, ValueMapping)) return SomePHI; } } // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *V = SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } // Set the DebugLoc of the inserted PHI, if available. DebugLoc DL; if (const Instruction *I = BB->getFirstNonPHI()) DL = I->getDebugLoc(); InsertedPHI->setDebugLoc(DL); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; }
/// Create a clone of the blocks in a loop and connect them together. /// If UnrollProlog is true, loop structure will not be cloned, otherwise a new /// loop will be created including all cloned blocks, and the iterator of it /// switches to count NewIter down to 0. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = 0; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F); NewBlocks.push_back(NewBB); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if UnrollProlog is true, create a direct jump to // InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (UnrollProlog) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[I]); if (UnrollProlog) { VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (VMap[InVal]) NewPHI->setIncomingValue(idx, VMap[InVal]); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } }
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and /// an unconditional branch in it. void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { if (SinglePred != DestBB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB // to handle the new incoming edges it is about to have. PHINode *PN; for (BasicBlock::iterator BBI = DestBB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) { // Remove the incoming value for BB, and remember it. Value *InVal = PN->removeIncomingValue(BB, false); // Two options: either the InVal is a phi node defined in BB or it is some // value that dominates BB. PHINode *InValPhi = dyn_cast<PHINode>(InVal); if (InValPhi && InValPhi->getParent() == BB) { // Add all of the input values of the input PHI as inputs of this phi. for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InValPhi->getIncomingValue(i), InValPhi->getIncomingBlock(i)); } else { // Otherwise, add one instance of the dominating value for each edge that // we will be adding. if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) PN->addIncoming(InVal, *PI); } } } // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); if (PFI) { PFI->replaceAllUses(BB, DestBB); PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); ++NumElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); }
bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { Module &M = *F.getParent(); LLVMContext &C = F.getContext(); IRBuilder<> IRB(C); SmallVector<Instruction *, 64> ToErase; // Vector of %setjmpTable values std::vector<Instruction *> SetjmpTableInsts; // Vector of %setjmpTableSize values std::vector<Instruction *> SetjmpTableSizeInsts; // Setjmp preparation // This instruction effectively means %setjmpTableSize = 4. // We create this as an instruction intentionally, and we don't want to fold // this instruction to a constant 4, because this value will be used in // SSAUpdater.AddAvailableValue(...) later. BasicBlock &EntryBB = F.getEntryBlock(); BinaryOperator *SetjmpTableSize = BinaryOperator::Create( Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize", &*EntryBB.getFirstInsertionPt()); // setjmpTable = (int *) malloc(40); Instruction *SetjmpTable = CallInst::CreateMalloc( SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40), nullptr, nullptr, "setjmpTable"); // setjmpTable[0] = 0; IRB.SetInsertPoint(SetjmpTableSize); IRB.CreateStore(IRB.getInt32(0), SetjmpTable); SetjmpTableInsts.push_back(SetjmpTable); SetjmpTableSizeInsts.push_back(SetjmpTableSize); // Setjmp transformation std::vector<PHINode *> SetjmpRetPHIs; Function *SetjmpF = M.getFunction("setjmp"); for (User *U : SetjmpF->users()) { auto *CI = dyn_cast<CallInst>(U); if (!CI) report_fatal_error("Does not support indirect calls to setjmp"); BasicBlock *BB = CI->getParent(); if (BB->getParent() != &F) // in other function continue; // The tail is everything right after the call, and will be reached once // when setjmp is called, and later when longjmp returns to the setjmp BasicBlock *Tail = SplitBlock(BB, CI->getNextNode()); // Add a phi to the tail, which will be the output of setjmp, which // indicates if this is the first call or a longjmp back. The phi directly // uses the right value based on where we arrive from IRB.SetInsertPoint(Tail->getFirstNonPHI()); PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret"); // setjmp initial call returns 0 SetjmpRet->addIncoming(IRB.getInt32(0), BB); // The proper output is now this, not the setjmp call itself CI->replaceAllUsesWith(SetjmpRet); // longjmp returns to the setjmp will add themselves to this phi SetjmpRetPHIs.push_back(SetjmpRet); // Fix call target // Our index in the function is our place in the array + 1 to avoid index // 0, because index 0 means the longjmp is not ours to handle. IRB.SetInsertPoint(CI); Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()), SetjmpTable, SetjmpTableSize}; Instruction *NewSetjmpTable = IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable"); Instruction *NewSetjmpTableSize = IRB.CreateLoad(TempRet0GV, "setjmpTableSize"); SetjmpTableInsts.push_back(NewSetjmpTable); SetjmpTableSizeInsts.push_back(NewSetjmpTableSize); ToErase.push_back(CI); } // Update each call that can longjmp so it can return to a setjmp where // relevant. // Because we are creating new BBs while processing and don't want to make // all these newly created BBs candidates again for longjmp processing, we // first make the vector of candidate BBs. std::vector<BasicBlock *> BBs; for (BasicBlock &BB : F) BBs.push_back(&BB); // BBs.size() will change within the loop, so we query it every time for (unsigned i = 0; i < BBs.size(); i++) { BasicBlock *BB = BBs[i]; for (Instruction &I : *BB) { assert(!isa<InvokeInst>(&I)); auto *CI = dyn_cast<CallInst>(&I); if (!CI) continue; const Value *Callee = CI->getCalledValue(); if (!canLongjmp(M, Callee)) continue; Value *Threw = nullptr; BasicBlock *Tail; if (Callee->getName().startswith(InvokePrefix)) { // If invoke wrapper has already been generated for this call in // previous EH phase, search for the load instruction // %__THREW__.val = __THREW__; // in postamble after the invoke wrapper call LoadInst *ThrewLI = nullptr; StoreInst *ThrewResetSI = nullptr; for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end(); I != IE; ++I) { if (auto *LI = dyn_cast<LoadInst>(I)) if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand())) if (GV == ThrewGV) { Threw = ThrewLI = LI; break; } } // Search for the store instruction after the load above // __THREW__ = 0; for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end(); I != IE; ++I) { if (auto *SI = dyn_cast<StoreInst>(I)) if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand())) if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) { ThrewResetSI = SI; break; } } assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke"); assert(ThrewResetSI && "Cannot find __THREW__ store after invoke"); Tail = SplitBlock(BB, ThrewResetSI->getNextNode()); } else { // Wrap call with invoke wrapper and generate preamble/postamble Threw = wrapInvoke(CI); ToErase.push_back(CI); Tail = SplitBlock(BB, CI->getNextNode()); } // We need to replace the terminator in Tail - SplitBlock makes BB go // straight to Tail, we need to check if a longjmp occurred, and go to the // right setjmp-tail if so ToErase.push_back(BB->getTerminator()); // Generate a function call to testSetjmp function and preamble/postamble // code to figure out (1) whether longjmp occurred (2) if longjmp // occurred, which setjmp it corresponds to Value *Label = nullptr; Value *LongjmpResult = nullptr; BasicBlock *EndBB = nullptr; wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label, LongjmpResult, EndBB); assert(Label && LongjmpResult && EndBB); // Create switch instruction IRB.SetInsertPoint(EndBB); SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size()); // -1 means no longjmp happened, continue normally (will hit the default // switch case). 0 means a longjmp that is not ours to handle, needs a // rethrow. Otherwise the index is the same as the index in P+1 (to avoid // 0). for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) { SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent()); SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB); } // We are splitting the block here, and must continue to find other calls // in the block - which is now split. so continue to traverse in the Tail BBs.push_back(Tail); } } // Erase everything we no longer need in this function for (Instruction *I : ToErase) I->eraseFromParent(); // Free setjmpTable buffer before each return instruction for (BasicBlock &BB : F) { TerminatorInst *TI = BB.getTerminator(); if (isa<ReturnInst>(TI)) CallInst::CreateFree(SetjmpTable, TI); } // Every call to saveSetjmp can change setjmpTable and setjmpTableSize // (when buffer reallocation occurs) // entry: // setjmpTableSize = 4; // setjmpTable = (int *) malloc(40); // setjmpTable[0] = 0; // ... // somebb: // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); // setjmpTableSize = __tempRet0; // So we need to make sure the SSA for these variables is valid so that every // saveSetjmp and testSetjmp calls have the correct arguments. SSAUpdater SetjmpTableSSA; SSAUpdater SetjmpTableSizeSSA; SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable"); SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize"); for (Instruction *I : SetjmpTableInsts) SetjmpTableSSA.AddAvailableValue(I->getParent(), I); for (Instruction *I : SetjmpTableSizeInsts) SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I); for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end(); UI != UE;) { // Grab the use before incrementing the iterator. Use &U = *UI; // Increment the iterator before removing the use from the list. ++UI; if (Instruction *I = dyn_cast<Instruction>(U.getUser())) if (I->getParent() != &EntryBB) SetjmpTableSSA.RewriteUse(U); } for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end(); UI != UE;) { Use &U = *UI; ++UI; if (Instruction *I = dyn_cast<Instruction>(U.getUser())) if (I->getParent() != &EntryBB) SetjmpTableSizeSSA.RewriteUse(U); } // Finally, our modifications to the cfg can break dominance of SSA variables. // For example, in this code, // if (x()) { .. setjmp() .. } // if (y()) { .. longjmp() .. } // We must split the longjmp block, and it can jump into the block splitted // from setjmp one. But that means that when we split the setjmp block, it's // first part no longer dominates its second part - there is a theoretically // possible control flow path where x() is false, then y() is true and we // reach the second part of the setjmp block, without ever reaching the first // part. So, we rebuild SSA form here. rebuildSSA(F); return true; }
/// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs /// though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get the personality function from the callee if it contains a landing pad. Value *CalleePersonality = 0; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); CalleePersonality = LP->getPersonalityFn(); break; } // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. if (CalleePersonality) { for (Function::const_iterator I = Caller->begin(), E = Caller->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. if (LP->getPersonalityFn() != CalleePersonality) return false; break; } } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; builder.CreateLifetimeStart(AI); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) BranchInst::Create(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } return true; }
/// UnswitchNontrivialCondition - We determined that the loop is profitable /// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); LoopBlocks.clear(); NewBlocks.clear(); // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); LoopBlocks.push_back(NewPreheader); // We want the loop to come after the preheader, but before the exit blocks. LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // Split all of the edges from inside the loop to their exit blocks. Update // the appropriate Phi nodes as we do so. SplitExitEdges(L, ExitBlocks); // The exit blocks may have been changed due to edge splitting, recompute. ExitBlocks.clear(); L->getUniqueExitBlocks(ExitBlocks); // Add exit blocks to the loop blocks. LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); // Next step, clone all of the basic blocks that make up the loop (including // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } // Splice the newly inserted blocks into the function right before the // original preheader. F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); // Recalculate unswitching quota, inherit simplified switches info for NewBB, // Probably clone more loop-unswitch related loop properties. BranchesInfo.cloneData(NewLoop, L, VMap); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop // as well. ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); assert(NewExit->getTerminator()->getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"); BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); // If the successor of the exit block had PHI nodes, add an entry for // NewExit. PHINode *PN; for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); ValueToValueMapTy::iterator It = VMap.find(V); if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { PN = PHINode::Create(LPad->getType(), 0, "", ExitSucc->getFirstInsertionPt()); for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); I != E; ++I) { BasicBlock *BB = *I; LandingPadInst *LPI = BB->getLandingPadInst(); LPI->replaceAllUsesWith(PN); PN->addIncoming(LPI, BB); } } } // Rewrite the code to refer to itself. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && "Preheader splitting did not work correctly!"); // Emit the new branch that selects between the two versions of this loop. EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); LPM->deleteSimpleAnalysisValue(OldBR, L); OldBR->eraseFromParent(); LoopProcessWorklist.push_back(NewLoop); redoLoop = true; // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. WeakVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); // It's possible that simplifying one loop could cause the other to be // changed to another value or a constant. If its a constant, don't simplify // it. if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && LICHandle && !isa<Constant>(LICHandle)) RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); }
bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set // then we actually perform accumulator recursion elimination instead of // simple tail recursion elimination. If the operation is an LLVM instruction // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then // we are handling the case when the return instruction returns a constant C // which is different to the constant returned by other return instructions // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. BasicBlock::iterator BBI = CI; for (++BBI; &*BBI != Ret; ++BBI) { if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction // accumulates. AccumulatorRecursionInstr = BBI; } else { return false; // Otherwise, we cannot eliminate the tail recursion! } } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a // constant, return the value returned by the tail call, or that are being // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && !getCommonReturnValue(0, CI)) { // One case remains that we are able to handle: the current return // instruction returns a constant, and all other return instructions // return a different constant. if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) return false; // Current return instruction does not return a constant. // Check that all other return instructions return a common constant. If // so, record it in AccumulatorRecursionEliminationInitVal. AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); if (!AccumulatorRecursionEliminationInitVal) return false; } BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); // If this tail call is marked 'tail' and if there are any allocas in the // entry block, move them up to the new entry block. TailCallsAreMarkedTail = CI->isTailCall(); if (TailCallsAreMarkedTail) // Move all fixed sized allocas from OldEntry to NewEntry. for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), NEBI = NewEntry->begin(); OEBI != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) if (isa<ConstantInt>(AI->getArraySize())) AI->moveBefore(NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); ArgumentPHIs.push_back(PN); } } // If this function has self recursive calls in the tail position where some // are marked tail and some are not, only transform one flavor or another. We // have to choose whether we move allocas in the entry block to the new entry // block or not, so we can't make a good choice for both. NOTE: We could do // slightly better here in the case that the function has no entry block // allocas. if (TailCallsAreMarkedTail && !CI->isTailCall()) return false; // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion // eliminations will happen on this function because of the way the // accumulator recursion predicate is set up. // if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), std::distance(PB, PE) + 1, "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, // computed earlier. For any other existing branches to this block (due to // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else AccPN->addIncoming(AccPN, P); } if (AccRecInstr) { // Add an incoming argument for the current block, which is computed by // our associative and commutative accumulator instruction. AccPN->addIncoming(AccRecInstr, BB); // Next, rewrite the accumulator recursion instruction so that it does not // use the result of the call anymore, instead, use the PHI node we just // inserted. AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); } else { // Add an incoming argument for the current block, which is just the // constant returned by the current return instruction. AccPN->addIncoming(Ret->getReturnValue(), BB); } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will // actually rewrite the return value we are destroying, but that's ok. for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) RI->setOperand(0, AccPN); ++NumAccumAdded; } // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); NewBI->setDebugLoc(CI->getDebugLoc()); BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; return true; }
/// SimplifyStoreAtEndOfBlock - Turn things like: /// if () { *P = v1; } else { *P = v2 } /// into a phi node with a store in the successor. /// /// Simplify things like: /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; if (++PI == pred_end(DestBB)) return false; P = *PI; if (P != StoreBB) { if (OtherBB) return false; OtherBB = P; } if (++PI != pred_end(DestBB)) return false; // Bail out if all the relevant blocks aren't distinct (this can happen, // for example, if SI is in an infinite loop) if (StoreBB == DestBB || OtherBB == DestBB) return false; // Verify that the other block ends in a branch and is not otherwise empty. BasicBlock::iterator BBI(OtherBB->getTerminator()); BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; break; } // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the xform. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || BBI == OtherBB->begin()) return false; } // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { // FIXME: This should really be AA driven. if (I->mayReadFromMemory() || I->mayWriteToMemory()) return false; } } // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had AA tags, merge them. AAMDNodes AATags; SI.getAAMetadata(AATags); if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); } // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); return true; }
// If we have a PHI node with a vector type that has only 2 uses: feed // itself and be an operand of extractelemnt at a constant location, // try to replace the PHI of the vector type with a PHI of a scalar type Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. if (!PN->hasNUses(2)) return NULL; // If so, it's known at this point that one operand is PHI and the other is // an extractelement node. Find the PHI user that is not the extractelement // node. Value::use_iterator iu = PN->use_begin(); Instruction *PHIUser = dyn_cast<Instruction>(*iu); if (PHIUser == cast<Instruction>(&EI)) PHIUser = cast<Instruction>(*(++iu)); // Verify that this PHI user has one use, which is the PHI itself, // and that it is a binary operation which is cheap to scalarize. // otherwise return NULL. if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) || !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) return NULL; // Create a scalar PHI node that will replace the vector PHI node // just before the current PHI node. PHINode * scalarPHI = cast<PHINode>( InsertNewInstWith(PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); // Scalarize each PHI operand. for (unsigned i=0; i < PN->getNumIncomingValues(); i++) { Value *PHIInVal = PN->getIncomingValue(i); BasicBlock *inBB = PN->getIncomingBlock(i); Value *Elt = EI.getIndexOperand(); // If the operand is the PHI induction variable: if (PHIInVal == PHIUser) { // Scalarize the binary operation. Its first operand is the // scalar PHI and the second operand is extracted from the other // vector operand. BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); unsigned opId = (B0->getOperand(0) == PN) ? 1: 0; Value *Op = Builder->CreateExtractElement( B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt"); Value *newPHIUser = InsertNewInstWith( BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op), *B0); scalarPHI->addIncoming(newPHIUser, inBB); } else { // Scalarize PHI input: Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); // Insert the new instruction into the predecessor basic block. Instruction *pos = dyn_cast<Instruction>(PHIInVal); BasicBlock::iterator InsertPos; if (pos && !isa<PHINode>(pos)) { InsertPos = pos; ++InsertPos; } else { InsertPos = inBB->getFirstInsertionPt(); } InsertNewInstWith(newEI, *InsertPos); scalarPHI->addIncoming(newEI, inBB); } } return ReplaceInstUsesWith(EI, scalarPHI); }
/// InsertUniqueBackedgeBlock - This method is called when the specified loop /// has more than one backedge in it. If this occurs, revector all of these /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// BasicBlock * LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return 0; // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return 0; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), PN->getName()+".be", BETerminator); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = 0; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (UniqueValue == 0) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); if (AA) AA->deleteValue(NewPN); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, LI->getBase()); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { PN->addIncoming(NewPN, PrologExit); } } } // Make sure that created prolog loop is in simplified form SmallVector<BasicBlock *, 4> PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) // This means %xtraiter is (BECount + 1) and all of the iterations of this // loop were executed by the prologue. Note that if BECount <u (Count - 1) // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); }
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is generally defined as the number of times the loop header /// executes. UnrollLoop relaxes the definition to permit early exits: here /// TripCount is the iteration on which control exits LatchBlock if no early /// exits were taken. Note that UnrollLoop assumes that the loop counter test /// terminates LatchBlock in order to remove unnecesssary instances of the /// test. In other words, control may exit the loop prior to TripCount /// iterations via an early branch, but control may not exit the loop from the /// LatchBlock's terminator prior to TripCount iterations. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); if (SE) SE->forgetLoop(L); // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; std::vector<BasicBlock*> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks. for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; ++SI) { if (L->contains(*SI)) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); } // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { PHINode *PN = OrigPHINode[i]; if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { Dest = LoopExit; NeedConditional = false; } // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { if (*SI == Headers[i]) continue; for (BasicBlock::iterator BBI = (*SI)->begin(); PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Merge adjacent basic blocks, if possible. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) DT->runOnFunction(*L->getHeader()->getParent()); // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { Instruction *Inst = I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); else if (Value *V = SimplifyInstruction(Inst)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); } } NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; // Remove the loop from the LoopPassManager if it's completely removed. if (CompletelyUnroll && LPM != NULL) LPM->deleteLoopFromQueue(L); return true; }
/// optimizeCheck - replace the given check CallInst with the check's fast /// version if all the source memory objects can be found and it is obvious /// that none of them have been freed at the point where the check is made. /// Returns the new call if possible and NULL otherwise. /// /// This currently works only with memory objects that can't be freed: /// * global variables /// * allocas that trivially have function scope /// * byval arguments /// bool ExactCheckOpt::optimizeCheck(CallInst *CI, CheckInfoType *Info) { // Examined values SmallSet<Value*, 16> Visited; // Potential memory objects SmallSet<Value*, 4> Objects; std::queue<Value*> Q; // Start from the the pointer operand Value *StartPtr = CI->getArgOperand(Info->PtrArgNo)->stripPointerCasts(); Q.push(StartPtr); // Use BFS to find all potential memory objects while(!Q.empty()) { Value *o = Q.front()->stripPointerCasts(); Q.pop(); if(Visited.count(o)) continue; Visited.insert(o); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(o)) { if (CE->getOpcode() == Instruction::GetElementPtr) { Q.push(CE->getOperand(0)); } else { // Exit early if any of the objects are unsupported. if (!isSimpleMemoryObject(o)) return false; Objects.insert(o); } } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(o)) { Q.push(GEP->getPointerOperand()); // It is fine to ignore the case of indexing into null with a pointer // because that case is invalid for LLVM-aware objects such as allocas, // globals, and objects pointed to by noalias pointers. } else if(PHINode *PHI = dyn_cast<PHINode>(o)) { for (unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i) Q.push(PHI->getIncomingValue(i)); } else if (SelectInst *SI = dyn_cast<SelectInst>(o)) { Q.push(SI->getTrueValue()); Q.push(SI->getFalseValue()); } else { // Exit early if any of the objects are unsupported. if (!isSimpleMemoryObject(o)) return false; Objects.insert(o); } } // Mapping from the initial value to the corresponding size and void pointer: // * memory object -> its size and pointer // * phi/select -> corresponding phi/select for the sizes and pointers // * anything else -> the corresponding size and pointer on the path std::map <Value*, PtrSizePair> M; Module &Mod = *CI->getParent()->getParent()->getParent(); Type *SizeTy = getSizeType(Info, Mod); // Add non-instruction non-constant allocation object pointers to the front // of the function's entry block. BasicBlock &EntryBlock = CI->getParent()->getParent()->getEntryBlock(); Instruction *FirstInsertionPoint = ++BasicBlock::iterator(EntryBlock.begin()); for (SmallSet<Value*, 16>::const_iterator It = Objects.begin(), E = Objects.end(); It != E; ++It) { // Obj is a memory object pointer: alloca, argument, load, callinst, etc. Value *Obj = *It; // Insert instruction-based allocation pointers just after the allocation. Instruction *InsertBefore = FirstInsertionPoint; if (Instruction *I = dyn_cast<Instruction>(Obj)) InsertBefore = ++BasicBlock::iterator(I); IRBuilder<> Builder(InsertBefore); SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Obj); assert(ObjSizeEval->bothKnown(SizeOffset)); assert(dyn_cast<ConstantInt>(SizeOffset.second)->isZero()); Value *Size = Builder.CreateIntCast(SizeOffset.first, SizeTy, /*isSigned=*/false); Value *Ptr = Builder.CreatePointerCast(Obj, VoidPtrTy); M[Obj] = std::make_pair(Ptr, Size); } // Create the rest of the size values and object pointers. // The phi nodes will be finished later. for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(), E = Visited.end(); I != E; ++I) { getPtrAndSize(*I, SizeTy, M); } // Finalize the phi nodes. for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(), E = Visited.end(); I != E; ++I) { if (PHINode *PHI = dyn_cast<PHINode>(*I)) { assert(M.count(PHI)); PHINode *PtrPHI = cast<PHINode>(M[PHI].first); PHINode *SizePHI = cast<PHINode>(M[PHI].second); for(unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i) { Value *IncomingValue = PHI->getIncomingValue(i)->stripPointerCasts(); assert(M.count(IncomingValue)); PtrPHI->addIncoming(M[IncomingValue].first, PHI->getIncomingBlock(i)); SizePHI->addIncoming(M[IncomingValue].second, PHI->getIncomingBlock(i)); } } } // Insert the fast version of the check just before the regular version. assert(M.count(StartPtr) && "The memory object and its size should be known"); createFastCheck(Info, CI, M[StartPtr].first, M[StartPtr].second); return true; }
Value *ForExprAST::Codegen() { // Output this as: // ... // start = startexpr // goto loop // loop: // variable = phi [start, loopheader], [nextvariable, loopend] // ... // bodyexpr // ... // loopend: // step = stepexpr // nextvariable = variable + step // endcond = endexpr // br endcond, loop, endloop // outloop: // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->Codegen(); if (StartVal == 0) return 0; // Make the new basic block for the loop header, inserting after current // block. Function *TheFunction = Builder.GetInsertBlock()->getParent(); BasicBlock *PreheaderBB = Builder.GetInsertBlock(); BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); // Insert an explicit fall through from the current block to the LoopBB. Builder.CreateBr(LoopBB); // Start insertion in LoopBB. Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. Value *OldVal = NamedValues[VarName]; NamedValues[VarName] = Variable; // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. if (Body->Codegen() == 0) return 0; // Emit the step value. Value *StepVal; if (Step) { StepVal = Step->Codegen(); if (StepVal == 0) return 0; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); // Compute the end condition. Value *EndCond = End->Codegen(); if (EndCond == 0) return EndCond; // Convert condition to a bool by comparing equal to 0.0. EndCond = Builder.CreateFCmpONE(EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); // Create the "after loop" block and insert it. BasicBlock *LoopEndBB = Builder.GetInsertBlock(); BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); // Insert the conditional branch into the end of LoopEndBB. Builder.CreateCondBr(EndCond, LoopBB, AfterBB); // Any new code will be inserted in AfterBB. Builder.SetInsertPoint(AfterBB); // Add a new entry to the PHI node for the backedge. Variable->addIncoming(NextVar, LoopEndBB); // Restore the unshadowed variable. if (OldVal) NamedValues[VarName] = OldVal; else NamedValues.erase(VarName); // for expr always returns 0.0. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); }
/// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, ValueToValueMapTy &VMap, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI name is added as an operand of a PHI node in either // the loop header or the loop exit block. for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); SBI != SBE; ++SBI) { for (BasicBlock::iterator BBI = (*SBI)->begin(); PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", PrologEnd->getTerminator()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast<Instruction>(V)) { if (L->contains(I)) { V = VMap[I]; } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, LastPrologBB); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); } else { PN->addIncoming(NewPN, PrologEnd); } } } // Create a branch around the orignal loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologEnd->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1) // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and // and all of the iterations of this loop were executed by the prologue. Note // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); BasicBlock *Exit = L->getUniqueExitBlock(); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI, P->mustPreserveAnalysisID(LCSSAID)); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPH); InsertPt->eraseFromParent(); }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm()) return false; BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop if (!Exit) return false; // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). const SCEV *BECountSC = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) return false; unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) return false; BasicBlock *Latch = L->getLoopLatch(); // Loop structure is the following: // // PreHeader // Header // ... // Latch // Exit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split Exit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // Exit Exit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // Exit Exit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); NumRuntimeUnrolled++; return true; }
void IrGen::visit(AstOperator& op) { const auto& astOperands = op.args().childs(); Value* llvmResult = nullptr; // unary non-arithmetic operators if (op.op() == AstOperator::eNot) { llvmResult = m_builder.CreateNot(callAcceptOn(*astOperands.front()), "not"); } else if (op.op() == AstOperator::eAddrOf) { astOperands.front()->accept(*this); llvmResult = astOperands.front()->ir().irAddrOfIrObject(); } else if (op.op() == AstOperator::eDeref) { op.ir().setAddrOfIrObject( callAcceptOn(*astOperands.front()), EInitStatus::eInitialized); } // binary logical short circuit operators else if (op.isBinaryLogicalShortCircuit()) { const auto opname = op.op() == AstOperator::eAnd ? string{"and"} : string{"or"}; Function* functionIr = m_builder.GetInsertBlock()->getParent(); BasicBlock* rhsBB = BasicBlock::Create(llvmContext, opname + "_rhs"); BasicBlock* mergeBB = BasicBlock::Create(llvmContext, opname + "_merge"); // current/lhs BB: auto llvmLhs = callAcceptOn(*astOperands.front()); assert(llvmLhs); if (op.op() == AstOperator::eAnd) { m_builder.CreateCondBr(llvmLhs, rhsBB, mergeBB); } else if (op.op() == AstOperator::eOr) { m_builder.CreateCondBr(llvmLhs, mergeBB, rhsBB); } else { assert(false); } BasicBlock* lhsLastBB = m_builder.GetInsertBlock(); // rhsBB: functionIr->getBasicBlockList().push_back(rhsBB); m_builder.SetInsertPoint(rhsBB); auto llvmRhs = callAcceptOn(*astOperands.back()); m_builder.CreateBr(mergeBB); BasicBlock* rhsLastBB = m_builder.GetInsertBlock(); // mergeBB: functionIr->getBasicBlockList().push_back(mergeBB); m_builder.SetInsertPoint(mergeBB); PHINode* phi = m_builder.CreatePHI(Type::getInt1Ty(llvmContext), 2, opname); assert(phi); phi->addIncoming(llvmLhs, lhsLastBB); phi->addIncoming(llvmRhs, rhsLastBB); llvmResult = phi; } // assignment operators else if (op.op() == AstOperator::eAssign || op.op() == AstOperator::eVoidAssign) { astOperands.front()->accept(*this); auto llvmRhs = callAcceptOn(*astOperands.back()); m_builder.CreateStore( llvmRhs, astOperands.front()->ir().irAddrOfIrObject()); if (op.op() == AstOperator::eVoidAssign) { llvmResult = m_abstractObject; // void } else { // op.object() is the same as astOperands.front().object(), so // 'returning the result' is now a nop. } } // binary arithmetic operators else if (astOperands.size() == 2) { auto llvmLhs = callAcceptOn(*astOperands.front()); auto llvmRhs = callAcceptOn(*astOperands.back()); if (astOperands.front()->objType().is(ObjType::eStoredAsIntegral)) { switch (op.op()) { // clang-format off case AstOperator::eSub : llvmResult = m_builder.CreateSub (llvmLhs, llvmRhs, "sub"); break; case AstOperator::eAdd : llvmResult = m_builder.CreateAdd (llvmLhs, llvmRhs, "add"); break; case AstOperator::eMul : llvmResult = m_builder.CreateMul (llvmLhs, llvmRhs, "mul"); break; case AstOperator::eDiv : llvmResult = m_builder.CreateSDiv (llvmLhs, llvmRhs, "div"); break; case AstOperator::eEqualTo : llvmResult = m_builder.CreateICmpEQ(llvmLhs, llvmRhs, "cmp"); break; default: assert(false); // clang-format on } } else { switch (op.op()) { // clang-format off case AstOperator::eSub : llvmResult = m_builder.CreateFSub (llvmLhs, llvmRhs, "fsub"); break; case AstOperator::eAdd : llvmResult = m_builder.CreateFAdd (llvmLhs, llvmRhs, "add"); break; case AstOperator::eMul : llvmResult = m_builder.CreateFMul (llvmLhs, llvmRhs, "mul"); break; case AstOperator::eDiv : llvmResult = m_builder.CreateFDiv (llvmLhs, llvmRhs, "div"); break; case AstOperator::eEqualTo : llvmResult = m_builder.CreateFCmpOEQ(llvmLhs, llvmRhs, "cmp"); break; default: assert(false); // clang-format on } } assert(llvmResult); } // unary arithmetic operators else { assert(astOperands.size() == 1); const auto& operand = astOperands.front(); const auto& objType = operand->objType(); auto llvmOperand = callAcceptOn(*operand); if (objType.is(ObjType::eStoredAsIntegral)) { auto llvmZero = ConstantInt::get(llvmContext, APInt(objType.size(), 0)); switch (op.op()) { case '-': llvmResult = m_builder.CreateSub(llvmZero, llvmOperand, "neg"); break; case '+': llvmResult = llvmOperand; break; default: assert(false); } } else { auto llvmZero = ConstantFP::get(llvmContext, APFloat(0.0)); switch (op.op()) { case '-': llvmResult = m_builder.CreateFSub(llvmZero, llvmOperand, "fneg"); break; case '+': llvmResult = llvmOperand; break; default: assert(false); } } assert(llvmResult); } if (llvmResult) { allocateAndInitLocalIrObjectFor(op, llvmResult); } }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. /// In particular, it does not preserve LoopSimplify (because it's /// complicated to handle the case where one of the edges being split /// is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; Loop *L = LI ? LI->getLoopFor(BB) : 0; bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); // Move the edges from Preds to point to NewBB instead of BB. // While here, if we need to preserve loop analyses, collect // some information about how this split will affect loops. bool HasLoopExit = false; bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { // If we need to preserve LCSSA, determine if any of // the preds is a loop exit. if (PreserveLCSSA) if (Loop *PL = LI->getLoopFor(Preds[i])) if (!PL->contains(BB)) HasLoopExit = true; // If we need to preserve LoopInfo, note whether any of the // preds crosses an interesting loop boundary. if (L) { if (L->contains(Preds[i])) IsLoopEntry = false; else SplitMakesNewLoopHeader = true; } } } // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; if (L) { if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an // adjacent loop). To find this, examine each of the predecessors and // determine which loops enclose them, and select the most-nested loop // which contains the loop containing the block being split. Loop *InnermostPredLoop = 0; for (unsigned i = 0; i != NumPreds; ++i) if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { // Seek a loop which actually contains the block being split (to // avoid adjacent loops). while (PredLoop && !PredLoop->contains(BB)) PredLoop = PredLoop->getParentLoop(); // Select the most-nested of these loops which contains the block. if (PredLoop && PredLoop->contains(BB) && (!InnermostPredLoop || InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) InnermostPredLoop = PredLoop; } if (InnermostPredLoop) InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } return NewBB; }
/// Renames all variables in the specified BasicBlock. /// Only variables that need to be rename will be. /// void SSI::rename(BasicBlock *BB) { SmallPtrSet<Instruction*, 8> defined; // Iterate through instructions and make appropriate renaming. // For SSI_PHI (b = PHI()), store b at value_stack as a new // definition of the variable it represents. // For SSI_SIG (b = PHI(a)), substitute a with the current // value of a, present in the value_stack. // Then store bin the value_stack as the new definition of a. // For all other instructions (b = OP(a, c, d, ...)), we need to substitute // all operands with its current value, present in value_stack. for (BasicBlock::iterator begin = BB->begin(), end = BB->end(); begin != end; ++begin) { Instruction *I = begin; if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions Instruction* position; // Treat SSI_PHI if ((position = getPositionPhi(PN))) { value_stack[position].push_back(PN); defined.insert(position); // Treat SSI_SIG } else if ((position = getPositionSigma(PN))) { substituteUse(I); value_stack[position].push_back(PN); defined.insert(position); } // Treat all other PHI functions else { substituteUse(I); } } // Treat all other functions else { substituteUse(I); } } // This loop iterates in all BasicBlocks that are successors of the current // BasicBlock. For each SSI_PHI instruction found, insert an operand. // This operand is the current operand in value_stack for the variable // in "position". And the BasicBlock this operand represents is the current // BasicBlock. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *BB_succ = *SI; for (BasicBlock::iterator begin = BB_succ->begin(), notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { Instruction *I = begin; PHINode *PN = dyn_cast<PHINode>(I); Instruction* position; if (PN && ((position = getPositionPhi(PN)))) { PN->addIncoming(value_stack[position].back(), BB); } } } // This loop calls rename on all children from this block. This time children // refers to a successor block in the dominance tree. DomTreeNode *DTN = DT_->getNode(BB); for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end(); begin != end; ++begin) { DomTreeNodeBase<BasicBlock> *DTN_children = *begin; BasicBlock *BB_children = DTN_children->getBlock(); rename(BB_children); } // Now we remove all inserted definitions of a variable from the top of // the stack leaving the previous one as the top. for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), DE = defined.end(); DI != DE; ++DI) value_stack[*DI].pop_back(); }
/// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Update PHI nodes at the unrolling loop exit and epilog loop exit /// - Create PHI nodes at the unrolling loop exit to combine /// values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); // Loop structure should be the following: // // PreHeader // NewPreHeader // Header // ... // Latch // NewExit (PN) // EpilogPreHeader // EpilogHeader // ... // EpilogLatch // Exit (EpilogPN) // Update PHI nodes at NewExit and Exit. for (Instruction &BBI : *NewExit) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit // Basicaly it should look like: // NewExit: // PN = PHI [I, Latch] // ... // Exit: // EpilogPN = PHI [PN, EpilogPreHeader] // // There is EpilogPreHeader incoming block instead of NewExit as // NewExit was spilt 1 more time to get EpilogPreHeader. assert(PN->hasOneUse() && "The phi should have 1 use"); PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); // Add incoming PreHeader from branch around the Loop PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); Value *V = PN->getIncomingValueForBlock(Latch); Instruction *I = dyn_cast<Instruction>(V); if (I && L->contains(I)) // If value comes from an instruction in the loop add VMap value. V = VMap.lookup(I); // For the instruction out of the loop, constant or undefined value // insert value itself. EpilogPN->addIncoming(V, EpilogLatch); assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && "EpilogPN should have EpilogPreHeader incoming block"); // Change EpilogPreHeader incoming block to NewExit. EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), NewExit); // Now PHIs should look like: // NewExit: // PN = PHI [I, Latch], [undef, PreHeader] // ... // Exit: // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] } // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). // Update corresponding PHI nodes in epilog loop. for (BasicBlock *Succ : successors(Latch)) { // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast<PHINode>(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", NewExit->getFirstNonPHI()); // Adding a value to the new PHI node from the unrolling loop preheader. NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast<PHINode>(VMap[&BBI]); VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); } } Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); if (DT) DT->changeImmediateDominator(Exit, NewExit); // Split the main loop exit to maintain canonicalization guarantees. SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, PreserveLCSSA); }
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool UsesNewEH = false; SmallVector<ResumeInst*, 16> Resumes; for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) Resumes.push_back(RI); else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) UsesNewEH = II->getUnwindDest()->isLandingPad(); } if (Resumes.empty()) return UsesNewEH; // Find the rewind function if we didn't already. if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); } // Create the basic block where the _Unwind_Resume call will live. LLVMContext &Ctx = Fn.getContext(); unsigned ResumesSize = Resumes.size(); if (ResumesSize == 1) { // Instead of creating a new BB and PHI node, just append the call to // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); BasicBlock *UnwindBB = RI->getParent(); Value *ExnObj = GetExceptionObject(RI); // Call the _Unwind_Resume function. CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); return true; } BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. for (SmallVectorImpl<ResumeInst*>::iterator I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { ResumeInst *RI = *I; BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); Value *ExnObj = GetExceptionObject(RI); PN->addIncoming(ExnObj, Parent); ++NumResumesLowered; } // Call the function. CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); return true; }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector<Metadata *, 1> DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); return NewLoop; } else return nullptr; }
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { BasicBlock &BB = *BBI++; // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); if (Ret == 0) continue; // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. if (Ret != &BB.front()) { // Check for something else in the block. BasicBlock::iterator I = Ret; --I; // Skip over debug info. while (isa<DbgInfoIntrinsic>(I) && I != BB.begin()) --I; if (!isa<DbgInfoIntrinsic>(I) && (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 || Ret->getOperand(0) != I)) continue; } // If this is the first returning block, remember it and keep going. if (RetBlock == 0) { RetBlock = &BB; continue; } // Otherwise, we found a duplicate return block. Merge the two. Changed = true; // Case when there is no input to the return or when the returned values // agree is trivial. Note that they can't agree if there are phis in the // blocks. if (Ret->getNumOperands() == 0 || Ret->getOperand(0) == cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { BB.replaceAllUsesWith(RetBlock); BB.eraseFromParent(); continue; } // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), std::distance(PB, PE), "merge", &RetBlock->front()); for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } // Turn BB into a block that just unconditionally branches to the return // block. This handles the case when the two return blocks have a common // predecessor but that return different things. RetBlockPHI->addIncoming(Ret->getOperand(0), &BB); BB.getTerminator()->eraseFromParent(); BranchInst::Create(RetBlock, &BB); } return Changed; }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); DEBUG(L->dump()); DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { DEBUG(dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. Also update the dominator info for // OtherExits and their immediate successors, since we have new edges into // OtherExits. SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. if (!newVal) newVal = Phi->getIncomingValue(i); Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif // Update the dominator info because the immediate dominator is no longer the // header of the original Loop. BB has edges both from L and remainder code. // Since the preheader determines which loop is run (L or directly jump to // the remainder code), we set the immediate dominator as the preheader. if (DT) { DT->changeImmediateDominator(BB, PreHeader); // Also update the IDom for immediate successors of BB. If the current // IDom is the header, update the IDom to be the preheader because that is // the nearest common dominator of all predecessors of SuccBB. We need to // check for IDom being the header because successors of exit blocks can // have edges from outside the loop, and we should not incorrectly update // the IDom in that case. for (BasicBlock *SuccBB: successors(BB)) if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { assert(!SuccBB->getSinglePredecessor() && "BB should be the IDom then!"); DT->changeImmediateDominator(SuccBB, PreHeader); } } } } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); } if (remainderLoop && UnrollRemainder) { DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1, /*Force*/false, /*AllowRuntime*/false, /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true, /*PreserveOnlyFirst*/false, /*TripMultiple*/1, /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE, PreserveLCSSA); } NumRuntimeUnrolled++; return true; }
void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { promoteSingleBlockAlloca(AI, Info, LBI, AST); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; } // If we haven't computed dominator tree levels, do so now. if (DomLevels.empty()) { SmallVector<DomTreeNode *, 32> Worklist; DomTreeNode *Root = DT.getRootNode(); DomLevels[Root] = 0; Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); unsigned ChildLevel = DomLevels[Node] + 1; for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { DomLevels[*CI] = ChildLevel; Worklist.push_back(*CI); } } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non-deterministic order, thus creating // non-deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
/// Lower a llvm.bitset.test call to its implementation. Returns the value to /// replace the call with. Value *LowerBitSets::lowerBitSetCall( CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, Constant *CombinedGlobalIntAddr, const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); const DataLayout &DL = M->getDataLayout(); if (BSI.containsValue(DL, GlobalLayout, Ptr)) return ConstantInt::getTrue(M->getContext()); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); BasicBlock *InitialBB = CI->getParent(); IRBuilder<> B(CI); Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); if (BSI.isSingleOffset()) return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); Value *BitOffset; if (BSI.AlignLog2 == 0) { BitOffset = PtrOffset; } else { // We need to check that the offset both falls within our range and is // suitably aligned. We can check both properties at the same time by // performing a right rotate by log2(alignment) followed by an integer // comparison against the bitset size. The rotate will move the lower // order bits that need to be zero into the higher order bits of the // result, causing the comparison to fail if they are nonzero. The rotate // also conveniently gives us a bit offset to use during the load from // the bitset. Value *OffsetSHR = B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); Value *OffsetSHL = B.CreateShl( PtrOffset, ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); } Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); // If the bit set is all ones, testing against it is unnecessary. if (BSI.isAllOnes()) return OffsetInRange; TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); IRBuilder<> ThenB(Term); // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); // The value we want is 0 if we came directly from the initial block // (having failed the range or alignment checks), or the loaded bit if // we came from the block in which we loaded it. B.SetInsertPoint(CI); PHINode *P = B.CreatePHI(Int1Ty, 2); P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); P->addIncoming(Bit, ThenB.GetInsertBlock()); return P; }