// We generate a loop of either of the following structures: // // BeforeBB BeforeBB // | | // v v // GuardBB PreHeaderBB // / | | _____ // __ PreHeaderBB | v \/ | // / \ / | HeaderBB latch // latch HeaderBB | |\ | // \ / \ / | \------/ // < \ / | // \ / v // ExitBB ExitBB // // depending on whether or not we know that it is executed at least once. If // not, GuardBB checks if the loop is executed at least once. If this is the // case we branch to PreHeaderBB and subsequently to the HeaderBB, which // contains the loop iv 'polly.indvar', the incremented loop iv // 'polly.indvar_next' as well as the condition to check if we execute another // iteration of the loop. After the loop has finished, we branch to ExitBB. Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, PollyIRBuilder &Builder, Pass *P, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBB, ICmpInst::Predicate Predicate, LoopAnnotator *Annotator, bool Parallel, bool UseGuard) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); assert(LoopIVType && "UB is not integer?"); BasicBlock *BeforeBB = Builder.GetInsertBlock(); BasicBlock *GuardBB = UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); BasicBlock *PreHeaderBB = BasicBlock::Create(Context, "polly.loop_preheader", F); if (Annotator) { Annotator->Begin(HeaderBB); if (Parallel) Annotator->SetCurrentParallel(); } // Update LoopInfo Loop *OuterLoop = LI.getLoopFor(BeforeBB); Loop *NewLoop = new Loop(); if (OuterLoop) OuterLoop->addChildLoop(NewLoop); else LI.addTopLevelLoop(NewLoop); if (OuterLoop && GuardBB) OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase()); else if (OuterLoop) OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase()); NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase()); // ExitBB ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P); ExitBB->setName("polly.loop_exit"); // BeforeBB if (GuardBB) { BeforeBB->getTerminator()->setSuccessor(0, GuardBB); DT.addNewBlock(GuardBB, BeforeBB); // GuardBB Builder.SetInsertPoint(GuardBB); Value *LoopGuard; LoopGuard = Builder.CreateICmp(Predicate, LB, UB); LoopGuard->setName("polly.loop_guard"); Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); DT.addNewBlock(PreHeaderBB, GuardBB); } else { BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); DT.addNewBlock(PreHeaderBB, BeforeBB); } // PreHeaderBB Builder.SetInsertPoint(PreHeaderBB); Builder.CreateBr(HeaderBB); // HeaderBB DT.addNewBlock(HeaderBB, PreHeaderBB); Builder.SetInsertPoint(HeaderBB); PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); IV->addIncoming(LB, PreHeaderBB); Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); Value *LoopCondition; UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub"); LoopCondition = Builder.CreateICmp(Predicate, IV, UB); LoopCondition->setName("polly.loop_cond"); Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); IV->addIncoming(IncrementedIV, HeaderBB); if (GuardBB) DT.changeImmediateDominator(ExitBB, GuardBB); else DT.changeImmediateDominator(ExitBB, BeforeBB); // The loop body should be added here. Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); return IV; }
void WinEHPrepare::removeImplausibleInstructions(Function &F) { // Remove implausible terminators and replace them with UnreachableInst. for (auto &Funclet : FuncletBlocks) { BasicBlock *FuncletPadBB = Funclet.first; std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second; Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI(); auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI); auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad); auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad); for (BasicBlock *BB : BlocksInFunclet) { for (Instruction &I : *BB) { CallSite CS(&I); if (!CS) continue; Value *FuncletBundleOperand = nullptr; if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet)) FuncletBundleOperand = BU->Inputs.front(); if (FuncletBundleOperand == FuncletPad) continue; // Skip call sites which are nounwind intrinsics or inline asm. auto *CalledFn = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) || CS.isInlineAsm())) continue; // This call site was not part of this funclet, remove it. if (CS.isInvoke()) { // Remove the unwind edge if it was an invoke. removeUnwindEdge(BB); // Get a pointer to the new call. BasicBlock::iterator CallI = std::prev(BB->getTerminator()->getIterator()); auto *CI = cast<CallInst>(&*CallI); changeToUnreachable(CI, /*UseLLVMTrap=*/false); } else { changeToUnreachable(&I, /*UseLLVMTrap=*/false); } // There are no more instructions in the block (except for unreachable), // we are done. break; } Instruction *TI = BB->getTerminator(); // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst. bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad; // The token consumed by a CatchReturnInst must match the funclet token. bool IsUnreachableCatchret = false; if (auto *CRI = dyn_cast<CatchReturnInst>(TI)) IsUnreachableCatchret = CRI->getCatchPad() != CatchPad; // The token consumed by a CleanupReturnInst must match the funclet token. bool IsUnreachableCleanupret = false; if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad; if (IsUnreachableRet || IsUnreachableCatchret || IsUnreachableCleanupret) { changeToUnreachable(TI, /*UseLLVMTrap=*/false); } else if (isa<InvokeInst>(TI)) { if (Personality == EHPersonality::MSVC_CXX && CleanupPad) { // Invokes within a cleanuppad for the MSVC++ personality never // transfer control to their unwind edge: the personality will // terminate the program. removeUnwindEdge(BB); } } } } }
Function *PartialInlinerImpl::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; BasicBlock *ReturnBlock = nullptr; BasicBlock *NonReturnBlock = nullptr; unsigned ReturnCount = 0; for (BasicBlock *BB : successors(EntryBlock)) { if (isa<ReturnInst>(BB->getTerminator())) { ReturnBlock = BB; ReturnCount++; } else NonReturnBlock = BB; } if (ReturnCount != 1) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(DuplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = NewReturnBlock; NewReturnBlock = NewReturnBlock->splitBasicBlock( NewReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); Instruction *Ins = &NewReturnBlock->front(); while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); Ins = NewReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), NewEntryBlock); OldPhi->removeIncomingValue(NewEntryBlock); ++I; } NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) if (&BB != NewEntryBlock && &BB != NewReturnBlock && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*DuplicateFunction); // Extract the body of the if. Function *ExtractedFunction = CodeExtractor(ToExtract, &DT).extractCodeRegion(); // Inline the top-level if test into all callers. std::vector<User *> Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); for (User *User : Users) if (CallInst *CI = dyn_cast<CallInst>(User)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. DuplicateFunction->replaceAllUsesWith(F); DuplicateFunction->eraseFromParent(); ++NumPartialInlined; return ExtractedFunction; }
void DSWP::cleanup(Loop *L, LPPassManager &LPM) { // Move some instructions that may not have been inserted in the right // place, delete the old loop, and clean up our aux data structures for this // loop. /* * move the produce instructions, which have been inserted after the branch, * in front of it */ for (int i = 0; i < MAX_THREAD; i++) { for (Function::iterator bi = allFunc[i]->begin(), be = allFunc[i]->end(); bi != be; ++bi) { BasicBlock *bb = bi; TerminatorInst *term = NULL; for (BasicBlock::iterator ii = bb->begin(), ie = bb->end(); ii != ie; ++ii) { Instruction *inst = ii; if (isa<TerminatorInst>(inst)) { term = dyn_cast<TerminatorInst>(inst); break; } } if (term == NULL) { error("term cannot be null"); } while (true) { Instruction *last = &bb->getInstList().back(); if (isa<TerminatorInst>(last)) break; last->moveBefore(term); } } } /* * move the phi nodes to the top of the block */ for (int i = 0; i < MAX_THREAD; i++) { for (Function::iterator bi = allFunc[i]->begin(), be = allFunc[i]->end(); bi != be; ++bi) { BasicBlock *bb = bi; Instruction *first_nonphi = bb->getFirstNonPHI(); BasicBlock::iterator ii = bb->begin(), ie = bb->end(); // advance the iterator up to one past first_nonphi while (&(*ii) != first_nonphi) { ++ii; } ++ii; // move any phi nodes after the first nonphi to before it for (BasicBlock::iterator i_next; ii != ie; ii = i_next) { i_next = ii; ++i_next; Instruction *inst = ii; if (isa<PHINode>(inst)) { inst->moveBefore(first_nonphi); } } } } cout << "begin to delete loop" << endl; for (Loop::block_iterator bi = L->block_begin(), be = L->block_end(); bi != be; ++bi) { BasicBlock *BB = *bi; for (BasicBlock::iterator ii = BB->begin(), i_next, ie = BB->end(); ii != ie; ii = i_next) { i_next = ii; ++i_next; Instruction &inst = *ii; inst.replaceAllUsesWith(UndefValue::get(inst.getType())); inst.eraseFromParent(); } } // Delete the basic blocks only afterwards // so that backwards branch instructions don't break for (Loop::block_iterator bi = L->block_begin(), be = L->block_end(); bi != be; ++bi) { BasicBlock *BB = *bi; BB->eraseFromParent(); } LPM.deleteLoopFromQueue(L); }
void WinEHPrepare::cloneCommonBlocks(Function &F) { // We need to clone all blocks which belong to multiple funclets. Values are // remapped throughout the funclet to propagate both the new instructions // *and* the new basic blocks themselves. for (auto &Funclets : FuncletBlocks) { BasicBlock *FuncletPadBB = Funclets.first; std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second; Value *FuncletToken; if (FuncletPadBB == &F.getEntryBlock()) FuncletToken = ConstantTokenNone::get(F.getContext()); else FuncletToken = FuncletPadBB->getFirstNonPHI(); std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone; ValueToValueMapTy VMap; for (BasicBlock *BB : BlocksInFunclet) { ColorVector &ColorsForBB = BlockColors[BB]; // We don't need to do anything if the block is monochromatic. size_t NumColorsForBB = ColorsForBB.size(); if (NumColorsForBB == 1) continue; DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << " Cloning block \'" << BB->getName() << "\' for funclet \'" << FuncletPadBB->getName() << "\'.\n"); // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName())); // Insert the clone immediately after the original to ensure determinism // and to keep the same relative ordering of any funclet's blocks. CBB->insertInto(&F, BB->getNextNode()); // Add basic block mapping. VMap[BB] = CBB; // Record delta operations that we need to perform to our color mappings. Orig2Clone.emplace_back(BB, CBB); } // If nothing was cloned, we're done cloning in this funclet. if (Orig2Clone.empty()) continue; // Update our color mappings to reflect that one block has lost a color and // another has gained a color. for (auto &BBMapping : Orig2Clone) { BasicBlock *OldBlock = BBMapping.first; BasicBlock *NewBlock = BBMapping.second; BlocksInFunclet.push_back(NewBlock); ColorVector &NewColors = BlockColors[NewBlock]; assert(NewColors.empty() && "A new block should only have one color!"); NewColors.push_back(FuncletPadBB); DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << " Assigned color \'" << FuncletPadBB->getName() << "\' to block \'" << NewBlock->getName() << "\'.\n"); BlocksInFunclet.erase( std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock), BlocksInFunclet.end()); ColorVector &OldColors = BlockColors[OldBlock]; OldColors.erase( std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB), OldColors.end()); DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << " Removed color \'" << FuncletPadBB->getName() << "\' from block \'" << OldBlock->getName() << "\'.\n"); } // Loop over all of the instructions in this funclet, fixing up operand // references as we go. This uses VMap to do all the hard work. for (BasicBlock *BB : BlocksInFunclet) // Loop over all instructions, fixing each one as we find it... for (Instruction &I : *BB) RemapInstruction(&I, VMap, RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); // Catchrets targeting cloned blocks need to be updated separately from // the loop above because they are not in the current funclet. SmallVector<CatchReturnInst *, 2> FixupCatchrets; for (auto &BBMapping : Orig2Clone) { BasicBlock *OldBlock = BBMapping.first; BasicBlock *NewBlock = BBMapping.second; FixupCatchrets.clear(); for (BasicBlock *Pred : predecessors(OldBlock)) if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator())) if (CatchRet->getCatchSwitchParentPad() == FuncletToken) FixupCatchrets.push_back(CatchRet); for (CatchReturnInst *CatchRet : FixupCatchrets) CatchRet->setSuccessor(NewBlock); } auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) { unsigned NumPreds = PN->getNumIncomingValues(); for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd; ++PredIdx) { BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx); bool EdgeTargetsFunclet; if (auto *CRI = dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) { EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken); } else { ColorVector &IncomingColors = BlockColors[IncomingBlock]; assert(!IncomingColors.empty() && "Block not colored!"); assert((IncomingColors.size() == 1 || llvm::all_of(IncomingColors, [&](BasicBlock *Color) { return Color != FuncletPadBB; })) && "Cloning should leave this funclet's blocks monochromatic"); EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB); } if (IsForOldBlock != EdgeTargetsFunclet) continue; PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false); // Revisit the next entry. --PredIdx; --PredEnd; } }; for (auto &BBMapping : Orig2Clone) { BasicBlock *OldBlock = BBMapping.first; BasicBlock *NewBlock = BBMapping.second; for (PHINode &OldPN : OldBlock->phis()) { UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true); } for (PHINode &NewPN : NewBlock->phis()) { UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false); } } // Check to see if SuccBB has PHI nodes. If so, we need to add entries to // the PHI nodes for NewBB now. for (auto &BBMapping : Orig2Clone) { BasicBlock *OldBlock = BBMapping.first; BasicBlock *NewBlock = BBMapping.second; for (BasicBlock *SuccBB : successors(NewBlock)) { for (PHINode &SuccPN : SuccBB->phis()) { // Ok, we have a PHI node. Figure out what the incoming value was for // the OldBlock. int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock); if (OldBlockIdx == -1) break; Value *IV = SuccPN.getIncomingValue(OldBlockIdx); // Remap the value if necessary. if (auto *Inst = dyn_cast<Instruction>(IV)) { ValueToValueMapTy::iterator I = VMap.find(Inst); if (I != VMap.end()) IV = I->second; } SuccPN.addIncoming(IV, NewBlock); } } } for (ValueToValueMapTy::value_type VT : VMap) { // If there were values defined in BB that are used outside the funclet, // then we now have to update all uses of the value to use either the // original value, the cloned value, or some PHI derived value. This can // require arbitrary PHI insertion, of which we are prepared to do, clean // these up now. SmallVector<Use *, 16> UsesToRename; auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first)); if (!OldI) continue; auto *NewI = cast<Instruction>(VT.second); // Scan all uses of this instruction to see if it is used outside of its // funclet, and if so, record them in UsesToRename. for (Use &U : OldI->uses()) { Instruction *UserI = cast<Instruction>(U.getUser()); BasicBlock *UserBB = UserI->getParent(); ColorVector &ColorsForUserBB = BlockColors[UserBB]; assert(!ColorsForUserBB.empty()); if (ColorsForUserBB.size() > 1 || *ColorsForUserBB.begin() != FuncletPadBB) UsesToRename.push_back(&U); } // If there are no uses outside the block, we're done with this // instruction. if (UsesToRename.empty()) continue; // We found a use of OldI outside of the funclet. Rename all uses of OldI // that are outside its funclet to be uses of the appropriate PHI node // etc. SSAUpdater SSAUpdate; SSAUpdate.Initialize(OldI->getType(), OldI->getName()); SSAUpdate.AddAvailableValue(OldI->getParent(), OldI); SSAUpdate.AddAvailableValue(NewI->getParent(), NewI); while (!UsesToRename.empty()) SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val()); } } }
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), /// sink it into user blocks to reduce the number of virtual /// registers that must be created and coalesced. /// /// Return true if any changes are made. /// static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If this is a noop copy, EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(CI->getType()); // This is an fp<->int conversion? if (SrcVT.isInteger() != DstVT.isInteger()) return false; // If this is an extension, it will be a zero or sign extension, which // isn't a noop. if (SrcVT.bitsLT(DstVT)) return false; // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. if (SrcVT != DstVT) return false; BasicBlock *DefBB = CI->getParent(); /// InsertedCasts - Only insert a cast in each block once. DenseMap<BasicBlock*, CastInst*> InsertedCasts; bool MadeChange = false; for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); // Figure out which BB this cast is used in. For PHI's this is the // appropriate predecessor block. BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) { UserBB = PN->getIncomingBlock(UI); } // Preincrement use iterator so we don't invalidate it. ++UI; // If this user is in the same block as the cast, don't change the cast. if (UserBB == DefBB) continue; // If we have already inserted a cast into this block, use it. CastInst *&InsertedCast = InsertedCasts[UserBB]; if (!InsertedCast) { BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); MadeChange = true; } // Replace a use of the cast with a use of the new cast. TheUse = InsertedCast; } // If we removed all uses, nuke the cast. if (CI->use_empty()) { CI->eraseFromParent(); MadeChange = true; } return MadeChange; }
bool ProfilingPass::runOnFunction(Function &F) { LLVMContext& context = F.getContext(); Module *m = F.getParent(); string funcname = F.getNameStr(); string injectfunc("injectFault"); if((profileoption == 'p' || profileoption == 'c' || profileoption == 'i') && (funcname.find(injectfunc) != string::npos)) return false; std::vector<Instruction*> insert_worklist; for (inst_iterator In = inst_begin(F), E = inst_end(F); In != E; ++In) { Instruction *I = dyn_cast<Instruction>(&*In); //errs()<<*I<<"\n"; if(profileoption == 'b') { if(CmpInst *ci = dyn_cast<CmpInst>(I)) if(is_used_by_branch(ci)){ vector<const Type*> argTypes(1); argTypes[0] = Type::getInt32Ty(context); // enum for the options FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 ); Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType); // get the injection function vector<Value*> countArgs(1); const IntegerType* itype = IntegerType::get(context,32); Value* branchVal = ConstantInt::get(itype, BRANCH ); countArgs[0] = branchVal; //enum for branch CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", I); } } else if(profileoption == 'd') { //see if the current instruction is a cmp instruction that leads to a conditional branch //add the instrumentation to the defs of this cmp instruction //--> Static time deduction since branch not known if executed or not CmpInst *ci = dyn_cast<CmpInst>(I); //errs() <<"reached here:\n"; if(!ci) continue; //traverse def-use chain //int is_used_by_branch = 0; if(!is_used_by_branch(I)) continue; //the defines of this instruction I --> would be injectFaultCalls. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Instruction *v = dyn_cast<Instruction>(*i); if(!v) continue; //errs() <<"reached here:"<< *v << "\n"; //do profiling for the def vector<const Type*> argTypes(1); argTypes[0] = Type::getInt32Ty(context); // enum for the options FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 ); Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType); // get the injection function vector<Value*> countArgs(1); const IntegerType* itype = IntegerType::get(context,32); Value* defVal = ConstantInt::get(itype, DEF ); countArgs[0] = defVal; //enum for branch Instruction *beforeInst; if(isa<PHINode>(v)) { BasicBlock *bb = v->getParent(); beforeInst = bb->getFirstNonPHI(); } else beforeInst = v; CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", beforeInst); // insert the profiling call before the def:v } } else if(profileoption == 'a' ) { Instruction *beforeInst; //consider all instructions profiling vector<const Type*> argTypes(1); argTypes[0] = Type::getInt32Ty(context); // enum for the options FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 ); Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType); // get the injection function vector<Value*> countArgs(1); const IntegerType* itype = IntegerType::get(context,32); Value* allVal = ConstantInt::get(itype, ALL ); if(isa<PHINode>(I)) { BasicBlock *bb = I->getParent(); beforeInst = bb->getFirstNonPHI(); } else beforeInst = I; countArgs[0] = allVal; //enum for All inst CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", beforeInst); // Insert the inject call before the instruction } //in fact, here we only use backwardslice ('s') else if(profileoption == 's') { const Type* returnType = I->getType(); if (returnType->isVoidTy() || !filter(I))//Here we can insert a new filter /////////////////////////////////////////////// { //errs()<<"filter not passed\n"; continue; } //for injection into all instructions except invoke instructions (these are same as unconditional branch instructions with exception handling mechanism) if((isa<InvokeInst>(I)) #ifdef EXCLUDE_CASTINST || (isa<CastInst>(I)) #endif ) // cast instruction added by Jiesheng continue; //errs()<<"filter passed\n"; Instruction *beforeInst; if(isa<PHINode>(I)) { BasicBlock *bb = I->getParent(); beforeInst = bb->getFirstNonPHI(); } else beforeInst = I; insert_worklist.push_back(beforeInst); } } while(!insert_worklist.empty()) { Instruction* beforeInst = insert_worklist.back(); insert_worklist.pop_back(); vector<const Type*> argTypes(1); argTypes[0] = Type::getInt32Ty(context); // enum for the options FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 ); Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType); vector<Value*> countArgs(1); const IntegerType* itype = IntegerType::get(context,32); Value* allVal = ConstantInt::get(itype, BACKWARD_SLICE ); countArgs[0] = allVal; //enum for All inst CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", beforeInst); } return true; }
bool ObjCARCContract::runOnFunction(Function &F) { if (!EnableARCOpts) return false; // If nothing in the Module uses ARC, don't do anything. if (!Run) return false; Changed = false; AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults()); DenseMap<BasicBlock *, ColorVector> BlockColors; if (F.hasPersonalityFn() && isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) BlockColors = colorEHFunclets(F); LLVM_DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n"); // Track whether it's ok to mark objc_storeStrong calls with the "tail" // keyword. Be conservative if the function has variadic arguments. // It seems that functions which "return twice" are also unsafe for the // "tail" argument, because they are setjmp, which could need to // return to an earlier stack state. bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice(); // For ObjC library calls which return their argument, replace uses of the // argument with uses of the call return value, if it dominates the use. This // reduces register pressure. SmallPtrSet<Instruction *, 4> DependingInstructions; SmallPtrSet<const BasicBlock *, 4> Visited; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) { Instruction *Inst = &*I++; LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); // First try to peephole Inst. If there is nothing further we can do in // terms of undoing objc-arc-expand, process the next inst. if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited, TailOkForStoreStrongs, BlockColors)) continue; // Otherwise, try to undo objc-arc-expand. // Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts // and such; to do the replacement, the argument must have type i8*. // Function for replacing uses of Arg dominated by Inst. auto ReplaceArgUses = [Inst, this](Value *Arg) { // If we're compiling bugpointed code, don't get in trouble. if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) return; // Look through the uses of the pointer. for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE; ) { // Increment UI now, because we may unlink its element. Use &U = *UI++; unsigned OperandNo = U.getOperandNo(); // If the call's return value dominates a use of the call's argument // value, rewrite the use to use the return value. We check for // reachability here because an unreachable call is considered to // trivially dominate itself, which would lead us to rewriting its // argument in terms of its return value, which would lead to // infinite loops in GetArgRCIdentityRoot. if (!DT->isReachableFromEntry(U) || !DT->dominates(Inst, U)) continue; Changed = true; Instruction *Replacement = Inst; Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); BasicBlock *IncomingBB = PHI->getIncomingBlock(ValNo); if (Replacement->getType() != UseTy) { // A catchswitch is both a pad and a terminator, meaning a basic // block with a catchswitch has no insertion point. Keep going up // the dominator tree until we find a non-catchswitch. BasicBlock *InsertBB = IncomingBB; while (isa<CatchSwitchInst>(InsertBB->getFirstNonPHI())) { InsertBB = DT->getNode(InsertBB)->getIDom()->getBlock(); } assert(DT->dominates(Inst, &InsertBB->back()) && "Invalid insertion point for bitcast"); Replacement = new BitCastInst(Replacement, UseTy, "", &InsertBB->back()); } // While we're here, rewrite all edges for this PHI, rather // than just one use at a time, to minimize the number of // bitcasts we emit. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == IncomingBB) { // Keep the UI iterator valid. if (UI != UE && &PHI->getOperandUse( PHINode::getOperandNumForIncomingValue(i)) == &*UI) ++UI; PHI->setIncomingValue(i, Replacement); } } else { if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", cast<Instruction>(U.getUser())); U.set(Replacement); } } }; Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); Value *OrigArg = Arg; // TODO: Change this to a do-while. for (;;) { ReplaceArgUses(Arg); // If Arg is a no-op casted pointer, strip one level of casts and iterate. if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) Arg = BI->getOperand(0); else if (isa<GEPOperator>(Arg) && cast<GEPOperator>(Arg)->hasAllZeroIndices()) Arg = cast<GEPOperator>(Arg)->getPointerOperand(); else if (isa<GlobalAlias>(Arg) && !cast<GlobalAlias>(Arg)->isInterposable()) Arg = cast<GlobalAlias>(Arg)->getAliasee(); else { // If Arg is a PHI node, get PHIs that are equivalent to it and replace // their uses. if (PHINode *PN = dyn_cast<PHINode>(Arg)) { SmallVector<Value *, 1> PHIList; getEquivalentPHIs(*PN, PHIList); for (Value *PHI : PHIList) ReplaceArgUses(PHI); } break; } } // Replace bitcast users of Arg that are dominated by Inst. SmallVector<BitCastInst *, 2> BitCastUsers; // Add all bitcast users of the function argument first. for (User *U : OrigArg->users()) if (auto *BC = dyn_cast<BitCastInst>(U)) BitCastUsers.push_back(BC); // Replace the bitcasts with the call return. Iterate until list is empty. while (!BitCastUsers.empty()) { auto *BC = BitCastUsers.pop_back_val(); for (User *U : BC->users()) if (auto *B = dyn_cast<BitCastInst>(U)) BitCastUsers.push_back(B); ReplaceArgUses(BC); } } // If this function has no escaping allocas or suspicious vararg usage, // objc_storeStrong calls can be marked with the "tail" keyword. if (TailOkForStoreStrongs) for (CallInst *CI : StoreStrongCalls) CI->setTailCall(); StoreStrongCalls.clear(); return Changed; }
/// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (DT) { if (Header == *BB) { // The header is dominated by the preheader. DT->addNewBlock(NewBB, InsertTop); } else { // Copy information from original loop to unrolled loop. BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); } } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (CreateRemainderLoop) { Loop *NewLoop = NewLoops[L]; MDNode *LoopID = NewLoop->getLoopID(); assert(NewLoop && "L should have been cloned"); // Only add loop metadata if the loop is not going to be completely // unrolled. if (UnrollRemainder) return NewLoop; Optional<MDNode *> NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); if (NewLoopID.hasValue()) { NewLoop->setLoopID(NewLoopID.getValue()); // Do not setLoopAlreadyUnrolled if loop attributes have been defined // explicitly. return NewLoop; } // Add unroll disable metadata to disable future unrolling for this loop. NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else return nullptr; }
Value* ModuloSchedulerDriverPass::copyLoopBodyToHeader(Instruction* inst, Instruction* induction, BasicBlock* header, int offset){ // Holds the body of the interesting loop BasicBlock *body = inst->getParent(); assert(header && "Header is null"); assert(header->getTerminator() && "Header has no terminator"); // Maps the old instructions to the new Instructions DenseMap<const Value *, Value *> ValueMap; // Do the actual clone stringstream iname; iname<<"___"<<offset<<"___"; BasicBlock* newBB = CloneBasicBlock(body, ValueMap, iname.str().c_str()); // Fixing the dependencies for each of the instructions in the cloned BB // They now depend on themselves rather on the old cloned BB. for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) { for (Instruction::op_iterator ops = (it)->op_begin(); ops != (it)->op_end(); ++ops) { if (ValueMap.end() != ValueMap.find(*ops)) { //*ops = ValueMap[*ops]; it->replaceUsesOfWith(*ops, ValueMap[*ops]); } } } // Fixing the PHI nodes since they are no longer needed for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) { if (PHINode *phi = dyn_cast<PHINode>(it)) { // Taking the preheader entryfrom the PHI node Value* prevalue = phi->getIncomingValue(phi->getBasicBlockIndex(header)); assert(prevalue && "no prevalue. Don't know what to do"); // If we are handling a PHI node which is the induction index ? A[PHI(i,0)] ? // If so, turn it into A[i + offset] if (ValueMap[induction] == phi) { Instruction *add = subscripts::incrementValue(prevalue, offset); //add->insertBefore(phi); This is the same as next line (compiles on LLVM2.1) phi->getParent()->getInstList().insert(phi, add); phi->replaceAllUsesWith(add); } else { // eliminating the PHI node all together // This is just a regular variable or constant. No need to increment // the index. phi->replaceAllUsesWith(prevalue); } } } // Move all non PHI and non terminator instructions into the header. while (!newBB->getFirstNonPHI()->isTerminator()) { Instruction* inst = newBB->getFirstNonPHI(); if (dyn_cast<StoreInst>(inst)) { inst->eraseFromParent(); } else { inst->moveBefore(header->getTerminator()); } } newBB->dropAllReferences(); return ValueMap[inst]; }
/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging /// their control flow to better facilitate subsequent optimization. Instruction * SimplifyHalfPowrLibCalls:: InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, Instruction *InsertPt) { std::vector<BasicBlock *> Bodies; BasicBlock *NewBlock = 0; for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) { CallInst *Call = cast<CallInst>(HalfPowrs[i]); Function *Callee = Call->getCalledFunction(); // Minimally sanity-check the CFG of half_powr to ensure that it contains // the kind of code we expect. If we're running this pass, we have // reason to believe it will be what we expect. Function::iterator I = Callee->begin(); BasicBlock *Prologue = I++; if (I == Callee->end()) break; BasicBlock *SubnormalHandling = I++; if (I == Callee->end()) break; BasicBlock *Body = I++; if (I != Callee->end()) break; if (SubnormalHandling->getSinglePredecessor() != Prologue) break; BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator()); if (!PBI || !PBI->isConditional()) break; BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator()); if (!SNBI || SNBI->isConditional()) break; if (!isa<ReturnInst>(Body->getTerminator())) break; Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); // Inline the call, taking care of what code ends up where. NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); InlineFunctionInfo IFI(0, TD); bool B = InlineFunction(Call, IFI); assert(B && "half_powr didn't inline?"); B=B; BasicBlock *NewBody = NewBlock->getSinglePredecessor(); assert(NewBody); Bodies.push_back(NewBody); } if (!NewBlock) return InsertPt; // Put the code for all the bodies into one block, to facilitate // subsequent optimization. (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this); for (unsigned i = 0, e = Bodies.size(); i != e; ++i) { BasicBlock *Body = Bodies[i]; Instruction *FNP = Body->getFirstNonPHI(); // Splice the insts from body into NewBlock. NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(), FNP, Body->getTerminator()); } return NewBlock->begin(); }
/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// void LICM::PromoteAliasSet(AliasSet &AS) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (isa<LoadInst>(Use)) assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); else if (isa<StoreInst>(Use)) { assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); if (Use->getOperand(0) == ASIV) return; } else return; // Not a load or store. if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); LoopUses.push_back(Use); } } // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); // It wants to know some value of the same type as what we'll be inserting. Value *SomeValue; if (isa<LoadInst>(LoopUses[0])) SomeValue = LoopUses[0]; else SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); SSA.Initialize(SomeValue->getType(), SomeValue->getName()); // First step: bucket up uses of the pointers by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { Instruction *User = LoopUses[i]; UsesByBlock[User->getParent()].push_back(User); } // Okay, now we can iterate over all the blocks in the loop with uses, // processing them. Keep track of which loads are loading a live-in value. SmallVector<LoadInst*, 32> LiveInLoads; DenseMap<Value*, Value*> ReplacedLoads; for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { Instruction *User = LoopUses[LoopUse]; std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (isa<StoreInst>(User)) { SSA.AddAvailableValue(User->getParent(), cast<StoreInst>(User)->getOperand(0)); } else { // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); } BlockUses.clear(); continue; } // Otherwise, check to see if this block is all loads. If so, we can queue // them all as live in loads. bool HasStore = false; for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { if (isa<StoreInst>(BlockUses[i])) { HasStore = true; break; } } if (!HasStore) { for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); BlockUses.clear(); continue; } // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. BasicBlock *BB = User->getParent(); Value *StoredValue = 0; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { // If this is a load from an unrelated pointer, ignore it. if (!PointerMustAliases.count(L->getOperand(0))) continue; // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { L->replaceAllUsesWith(StoredValue); ReplacedLoads[L] = StoredValue; } else { LiveInLoads.push_back(L); } continue; } if (StoreInst *S = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!PointerMustAliases.count(S->getOperand(1))) continue; // Remember that this is the active value in the block. StoredValue = S->getOperand(0); } } // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } // Now that all the intra-loop values are classified, set up the preheader. // It gets a load of the pointer we're promoting, and it is the live-out value // from the preheader. LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Now that the preheader is good to go, set up the exit blocks. Each exit // block gets a store of the live-out values that feed them. Since we've // already told the SSA updater about the defs in the loop and the preheader // definition, it is all set and we can start using it. SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); new StoreInst(LiveInValue, SomePtr, InsertPos); } // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { LoadInst *ALoad = LiveInLoads[i]; Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); ALoad->replaceAllUsesWith(NewVal); CurAST->copyValue(ALoad, NewVal); ReplacedLoads[ALoad] = NewVal; } // If the preheader load is itself a pointer, we need to tell alias analysis // about the new pointer we created in the preheader block and about any PHI // nodes that just got inserted. if (PreheaderLoad->getType()->isPointerTy()) { // Copy any value stored to or loaded from a must-alias of the pointer. CurAST->copyValue(SomeValue, PreheaderLoad); for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(SomeValue, NewPHIs[i]); } // Now that everything is rewritten, delete the old instructions from the body // of the loop. They should all be dead now. for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { Instruction *User = LoopUses[i]; // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively // propagate the updates until we get to the real value. if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); while (RLI != ReplacedLoads.end()) { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } User->replaceAllUsesWith(NewVal); CurAST->copyValue(User, NewVal); } CurAST->deleteValue(User); User->eraseFromParent(); } // fwew, we're done! }
/// sink - When an instruction is found to only be used outside of the loop, /// this function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; ++NumSunk; Changed = true; // The case where there is only a single exit node of this loop is common // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); // This instruction is no longer in the AST for the current loop, because // we just sunk it out of the loop. If we just sunk it into an outer // loop, we will rediscover the operation when we process it. CurAST->deleteValue(&I); } return; } if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); return; } // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the // hard work of inserting PHI nodes as necessary. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSA(&NewPHIs); if (!I.use_empty()) SSA.Initialize(I.getType(), I.getName()); // Insert a copy of the instruction in each exit block of the loop that is // dominated by the instruction. Each exit block is known to only be in the // ExitBlocks list once. BasicBlock *InstOrigBB = I.getParent(); unsigned NumInserted = 0; for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) continue; // Insert the code after the last PHI node. BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); // If this is the first exit block processed, just move the original // instruction, otherwise clone the original instruction and insert // the copy. Instruction *New; if (NumInserted++ == 0) { I.moveBefore(InsertPt); New = &I; } else { New = I.clone(); if (!I.getName().empty()) New->setName(I.getName()+".le"); ExitBlock->getInstList().insert(InsertPt, New); } // Now that we have inserted the instruction, inform SSAUpdater. if (!I.use_empty()) SSA.AddAvailableValue(ExitBlock, New); } // If the instruction doesn't dominate any exit blocks, it must be dead. if (NumInserted == 0) { CurAST->deleteValue(&I); if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); return; } // Next, rewrite uses of the instruction, inserting PHI nodes as needed. for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { // Grab the use before incrementing the iterator. Use &U = UI.getUse(); // Increment the iterator before removing the use from the list. ++UI; SSA.RewriteUseAfterInsertions(U); } // Update CurAST for NewPHIs if I had pointer type. if (I.getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(&I, NewPHIs[i]); // Finally, remove the instruction from CurAST. It is no longer in the loop. CurAST->deleteValue(&I); }
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); // If both result of the {s|z}xt and its source are live out, rewrite all // other uses of the source with result of extension. Value *Src = I->getOperand(0); if (Src->hasOneUse()) return false; // Only do this xform if truncating is free. if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) return false; // Only safe to perform the optimization if the source is also defined in // this block. if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) return false; bool DefIsLiveOut = false; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); // Figure out which BB this ext is used in. BasicBlock *UserBB = User->getParent(); if (UserBB == DefBB) continue; DefIsLiveOut = true; break; } if (!DefIsLiveOut) return false; // Make sure non of the uses are PHI nodes. for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); BasicBlock *UserBB = User->getParent(); if (UserBB == DefBB) continue; // Be conservative. We don't want this xform to end up introducing // reloads just before load / store instructions. if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) return false; } // InsertedTruncs - Only insert one trunc in each block once. DenseMap<BasicBlock*, Instruction*> InsertedTruncs; bool MadeChange = false; for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); // Figure out which BB this ext is used in. BasicBlock *UserBB = User->getParent(); if (UserBB == DefBB) continue; // Both src and def are live in this block. Rewrite the use. Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; if (!InsertedTrunc) { BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); } // Replace a use of the {s|z}ext source with a use of the result. TheUse = InsertedTrunc; MadeChange = true; } return MadeChange; }
virtual bool runOnModule(Module &M) { LLVMContext &C = M.getContext(); //errs() << "Pass called sucessfully!\n"; std::vector<BranchInst*> vCondBranch; //Function *pi = (Function*)M.getOrInsertFunction("piInt", Type::getInt32Ty(C), Type::getInt32Ty(C), NULL); //BasicBlock* piBlock = BasicBlock::Create(C, "piFuncBlock", pi); //IRBuilder<> builder(piBlock); //builder.CreateRet(pi->arg_begin()); //first go through and grab all of the conditional branches for(Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) { for(Function::iterator b = MI->begin(), end = MI->end(); b != end; ++b) { //leave func defs alone if (!MI->isDeclaration()) { if(BranchInst *bi = dyn_cast<BranchInst>(b->getTerminator())) { if(bi->isConditional()) { //errs() << "Have a conditional branch!\n"; vCondBranch.push_back(bi); } } } } } for(std::vector<BranchInst*>::iterator i = vCondBranch.begin(), e = vCondBranch.end(); i != e; ++i) { DEBUG_PRINT("Examining a conditional branch!"); Value *ops[2]; if(CmpInst *cmp = dyn_cast<CmpInst>((*i)->getCondition())) { if (cmp->getNumOperands() < 2) continue; if (!cmp->isIntPredicate()) continue; ops[0] = cmp->getOperand(0); ops[1] = cmp->getOperand(1); //for(unsigned int x = 0; x < (*i)->getNumSuccessors(); ++x) //{ BasicBlock* trueBlock = (*i)->getSuccessor(0); BasicBlock* falseBlock = (*i)->getSuccessor(1); //BasicBlock* curr = (*i)->getSuccessor(x); IRBuilder<> builder(trueBlock->getFirstNonPHI()); //builder.SetInsertPoint(curr->getFirstNonPHI()); if(!isa<Constant>(ops[0])) { if(isa<LoadInst>(ops[0])) { pred_iterator PI = pred_begin(trueBlock); BasicBlock* Pred = *PI; if(++PI != pred_end(trueBlock)) continue; PHINode* pi; pi = PHINode::Create(ops[0]->getType(), 1, "piFunc_t", trueBlock->begin()); pi->addIncoming(ops[0], Pred); builder.SetInsertPoint(trueBlock->getFirstNonPHI()); builder.CreateStore(pi, ((LoadInst*)ops[0])->getOperand(0)); PI = pred_begin(falseBlock); Pred = *PI; if(++PI != pred_end(falseBlock)) continue; pi = PHINode::Create(ops[0]->getType(), 1, "piFunc_f", falseBlock->begin()); pi->addIncoming(ops[0], Pred); builder.SetInsertPoint(falseBlock->getFirstNonPHI()); builder.CreateStore(pi, ((LoadInst*)ops[0])->getOperand(0)); } } if(!isa<Constant>(ops[1])) { if(isa<LoadInst>(ops[1])) { pred_iterator PI = pred_begin(trueBlock); BasicBlock* Pred = *PI; if(++PI != pred_end(trueBlock)) continue; PHINode* pi; pi = PHINode::Create(ops[1]->getType(), 1, "piFunc_t", trueBlock->begin()); pi->addIncoming(ops[1], Pred); builder.SetInsertPoint(trueBlock->getFirstNonPHI()); builder.CreateStore(pi, ((LoadInst*)ops[1])->getOperand(0)); PI = pred_begin(falseBlock); Pred = *PI; if(++PI != pred_end(falseBlock)) continue; pi = PHINode::Create(ops[1]->getType(), 1, "piFunc_f", falseBlock->begin()); pi->addIncoming(ops[1], Pred); builder.SetInsertPoint(falseBlock->getFirstNonPHI()); builder.CreateStore(pi, ((LoadInst*)ops[1])->getOperand(0)); } } //} } } DEBUG_PRINT("Done!"); return true; }
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA, Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); return false; } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); if (!LatchBR || LatchBR->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. LLVM_DEBUG( dbgs() << "Loop latch not terminated by a conditional branch.\n"); return false; } unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); if (L->contains(LatchExit)) { // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. LLVM_DEBUG( dbgs() << "One of the loop latch successors must be the exit block.\n"); return false; } // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { LLVM_DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); return false; } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { LLVM_DEBUG( dbgs() << "Count failed constraint on overflow trip count calculation.\n"); return false; } // Loop structure is the following: // // PreHeader // Header // ... // Latch // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split LatchExit to create phi nodes from branch above. SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, nullptr, PreserveLCSSA); // NewExit gets its DebugLoc from LatchExit, which is not part of the // original Loop. // Fix this by setting Loop's DebugLoc to NewExit. auto *NewExitTerminator = NewExit->getTerminator(); NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) DT->changeImmediateDominator(NewExit, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector<BasicBlock *> NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. for (auto *BB : OtherExits) { for (auto &II : *BB) { // Given we preserve LCSSA form, we know that the values used outside the // loop will be used through these phi nodes at the exit blocks that are // transformed below. if (!isa<PHINode>(II)) break; PHINode *Phi = cast<PHINode>(&II); unsigned oldNumOperands = Phi->getNumIncomingValues(); // Add the incoming values from the remainder code to the end of the phi // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap.lookup(Phi->getIncomingValue(i)); // newVal can be a constant or derived from values outside the loop, and // hence need not have a VMap value. Also, since lookup already generated // a default "null" VMap entry for this value, we need to populate that // VMap entry correctly, with the mapped entry being itself. if (!newVal) { newVal = Phi->getIncomingValue(i); VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i); } Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } } #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) for (BasicBlock *SuccBB : successors(BB)) { assert(!(any_of(OtherExits, [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || SuccBB == LatchExit) && "Breaks the definition of dedicated exits!"); } #endif } // Update the immediate dominator of the exit blocks and blocks that are // reachable from the exit blocks. This is needed because we now have paths // from both the original loop and the remainder code reaching the exit // blocks. While the IDom of these exit blocks were from the original loop, // now the IDom is the preheader (which decides whether the original loop or // remainder code should run). if (DT && !L->getExitingBlock()) { SmallVector<BasicBlock *, 16> ChildrenToUpdate; // NB! We have to examine the dom children of all loop blocks, not just // those which are the IDom of the exit blocks. This is because blocks // reachable from the exit blocks can have their IDom as the nearest common // dominator of the exit blocks. for (auto *BB : L->blocks()) { auto *DomNodeBB = DT->getNode(BB); for (auto *DomChild : DomNodeBB->getChildren()) { auto *DomChildBB = DomChild->getBlock(); if (!L->contains(LI->getLoopFor(DomChildBB))) ChildrenToUpdate.push_back(DomChildBB); } } for (auto *BB : ChildrenToUpdate) DT->changeImmediateDominator(BB, PreHeader); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the any // of its parent loops, so the Scalar Evolution pass needs to be run again. SE->forgetTopmostLoop(L); // Verify that the Dom Tree is correct. #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) if (DT) assert(DT->verify(DominatorTree::VerificationLevel::Full)); #endif // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); } auto UnrollResult = LoopUnrollResult::Unmodified; if (remainderLoop && UnrollRemainder) { LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollResult = UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, /*Force*/ false, /*AllowRuntime*/ false, /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA); } if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) *ResultLoop = remainderLoop; NumRuntimeUnrolled++; return true; }
Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return 0; BasicBlock* returnBlock = 0; BasicBlock* nonReturnBlock = 0; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) if (isa<ReturnInst>((*SI)->getTerminator())) { returnBlock = *SI; returnCount++; } else nonReturnBlock = *SI; if (returnCount != 1) return 0; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function* duplicateFunction = CloneFunction(F, VMap, /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(duplicateFunction); // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( newReturnBlock->getFirstNonPHI()); BasicBlock::iterator I = preReturn->begin(); BasicBlock::iterator Ins = newReturnBlock->begin(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); retPhi->addIncoming(I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); ++I; } newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); // Gather up the blocks that we're going to extract. std::vector<BasicBlock*> toExtract; toExtract.push_back(newNonReturnBlock); for (Function::iterator FI = duplicateFunction->begin(), FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) toExtract.push_back(FI); // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.runOnFunction(*duplicateFunction); // Extract the body of the if. Function* extractedFunction = CodeExtractor(toExtract, &DT).extractCodeRegion(); InlineFunctionInfo IFI; // Inline the top-level if test into all callers. std::vector<User*> Users(duplicateFunction->use_begin(), duplicateFunction->use_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) if (CallInst *CI = dyn_cast<CallInst>(*UI)) InlineFunction(CI, IFI); else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); ++NumPartialInlined; return extractedFunction; }
void LoopInterchangeTransform::splitOuterLoopLatch() { BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch; OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock, OuterLoopLatch->getFirstNonPHI(), DT, LI); }