/// Many allocas are only used within a single basic block. If this is the /// case, avoid traversing the CFG and inserting a lot of potentially useless /// PHI nodes by just performing a single linear pass over the basic block /// using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), /// return false. This is necessary in cases where, due to control flow, the /// alloca is undefined only on some control flow paths. e.g. code like /// this is correct in LLVM IR: /// // A is an alloca with no stores so far /// for (...) { /// int t = *A; /// if (!first_iteration) /// use(t); /// *A = 42; /// } static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to // make it efficient to get the index of various operations in the block. // Walk the use-def list of the alloca, getting the locations of all stores. using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>; StoresByIndexTy StoresByIndex; for (User *U : AI->users()) if (StoreInst *SI = dyn_cast<StoreInst>(U)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // Sort the stores by their index, making it efficient to do a lookup with a // binary search. llvm::sort(StoresByIndex, less_first()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), less_first()); if (I == StoresByIndex.begin()) { if (StoresByIndex.empty()) // If there are no stores, the load takes the undef value. LI->replaceAllUsesWith(UndefValue::get(LI->getType())); else // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; } else { // Otherwise, there was a store before this load, the load takes its value. // Note, if the load was marked as nonnull we don't want to lose that // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); } LI->eraseFromParent(); LBI.deleteValue(LI); } // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DII, SI, DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); } AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { DII->eraseFromParent(); LBI.deleteValue(DII); } ++NumLocalPromoted; return true; }
/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic /// block. If this is the case, avoid traversing the CFG and inserting a lot of /// potentially useless PHI nodes by just performing a single linear pass over /// the basic block using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), /// return true. This is necessary in cases where, due to control flow, the /// alloca is potentially undefined on some control flow paths. e.g. code like /// this is potentially correct: /// /// for (...) { if (c) { A = undef; undef = B; } } /// /// ... so long as A is not used before undef is set. /// void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to // make it efficient to get the index of various operations in the block. // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); // Walk the use-def list of the alloca, getting the locations of all stores. typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; StoresByIndexTy StoresByIndex; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // If there are no stores to the alloca, just replace any loads with undef. if (StoresByIndex.empty()) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { LI->replaceAllUsesWith(UndefValue::get(LI->getType())); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LBI.deleteValue(LI); LI->eraseFromParent(); } return; } // Sort the stores by their index, making it efficient to do a lookup with a // binary search. std::sort(StoresByIndex.begin(), StoresByIndex.end()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), StoreIndexSearchPredicate()); // If there is no store before this load, then we can't promote this load. if (I == StoresByIndex.begin()) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(LI->getParent()); continue; } // Otherwise, there was a store before this load, the load takes its value. --I; LI->replaceAllUsesWith(I->second->getOperand(0)); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } }
/// Many allocas are only used within a single basic block. If this is the /// case, avoid traversing the CFG and inserting a lot of potentially useless /// PHI nodes by just performing a single linear pass over the basic block /// using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), /// return false. This is necessary in cases where, due to control flow, the /// alloca is undefined only on some control flow paths. e.g. code like /// this is correct in LLVM IR: /// // A is an alloca with no stores so far /// for (...) { /// int t = *A; /// if (!first_iteration) /// use(t); /// *A = 42; /// } static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, AliasSetTracker *AST) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to // make it efficient to get the index of various operations in the block. // Walk the use-def list of the alloca, getting the locations of all stores. typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; StoresByIndexTy StoresByIndex; for (User *U : AI->users()) if (StoreInst *SI = dyn_cast<StoreInst>(U)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // Sort the stores by their index, making it efficient to do a lookup with a // binary search. std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), less_first()); if (I == StoresByIndex.begin()) { if (StoresByIndex.empty()) // If there are no stores, the load takes the undef value. LI->replaceAllUsesWith(UndefValue::get(LI->getType())); else // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; } else // Otherwise, there was a store before this load, the load takes its value. LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { DIBuilder DIB(*AI->getParent()->getParent()->getParent(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, SI, DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) { DDI->eraseFromParent(); LBI.deleteValue(DDI); } ++NumLocalPromoted; return true; }