/// RewriteSingleStoreAlloca - If there is only a single store to this value, /// replace any loads of it that are directly dominated by the definition with /// the value stored. void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); int StoreIndex = -1; // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { Instruction *UserInst = cast<Instruction>(*UI++); if (!isa<LoadInst>(UserInst)) { assert(UserInst == OnlyStore && "Should only have load/stores"); continue; } LoadInst *LI = cast<LoadInst>(UserInst); // Okay, if we have a load from the alloca, we want to replace it with the // only value stored to the alloca. We can do this if the value is // dominated by the store. If not, we use the rest of the mem2reg machinery // to insert the phi nodes as needed. if (!StoringGlobalVal) { // Non-instructions are always dominated. if (LI->getParent() == StoreBB) { // If we have a use that is in the same block as the store, compare the // indices of the two instructions to see which one came first. If the // load came before the store, we can't handle it. if (StoreIndex == -1) StoreIndex = LBI.getInstructionIndex(OnlyStore); if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(StoreBB); continue; } } else if (LI->getParent() != StoreBB && !dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to // check their relationships. If the store doesn't dom the use, bail // out. Info.UsingBlocks.push_back(LI->getParent()); continue; } } // Otherwise, we *can* safely rewrite this load. Value *ReplVal = OnlyStore->getOperand(0); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } }
bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { if (!WidenLoads) return false; if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && canWidenScalarExtLoad(I)) { IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = Builder.getInt32Ty(); Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace()); Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT); LoadInst *WidenLoad = Builder.CreateLoad(BitCast); WidenLoad->copyMetadata(I); // If we have range metadata, we need to convert the type, and not make // assumptions about the high bits. if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) { ConstantInt *Lower = mdconst::extract<ConstantInt>(Range->getOperand(0)); if (Lower->getValue().isNullValue()) { WidenLoad->setMetadata(LLVMContext::MD_range, nullptr); } else { Metadata *LowAndHigh[] = { ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))), // Don't make assumptions about the high bits. ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0)) }; WidenLoad->setMetadata(LLVMContext::MD_range, MDNode::get(Mod->getContext(), LowAndHigh)); } } int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType()); Type *IntNTy = Builder.getIntNTy(TySize); Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy); Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType()); I.replaceAllUsesWith(ValOrig); I.eraseFromParent(); return true; } return false; }
/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic /// block. If this is the case, avoid traversing the CFG and inserting a lot of /// potentially useless PHI nodes by just performing a single linear pass over /// the basic block using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), /// return true. This is necessary in cases where, due to control flow, the /// alloca is potentially undefined on some control flow paths. e.g. code like /// this is potentially correct: /// /// for (...) { if (c) { A = undef; undef = B; } } /// /// ... so long as A is not used before undef is set. /// void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to // make it efficient to get the index of various operations in the block. // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); // Walk the use-def list of the alloca, getting the locations of all stores. typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; StoresByIndexTy StoresByIndex; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // If there are no stores to the alloca, just replace any loads with undef. if (StoresByIndex.empty()) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { LI->replaceAllUsesWith(UndefValue::get(LI->getType())); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LBI.deleteValue(LI); LI->eraseFromParent(); } return; } // Sort the stores by their index, making it efficient to do a lookup with a // binary search. std::sort(StoresByIndex.begin(), StoresByIndex.end()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), StoreIndexSearchPredicate()); // If there is no store before this load, then we can't promote this load. if (I == StoresByIndex.begin()) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(LI->getParent()); continue; } // Otherwise, there was a store before this load, the load takes its value. --I; LI->replaceAllUsesWith(I->second->getOperand(0)); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } }
/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. // std::map<Argument*, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. std::map<IndicesVector, LoadInst*> OriginalLoads; // Attributes - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // First, determine the new argument list unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); IndicesVector Indices; Indices.reserve(User->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(User)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(User->use_back()); OriginalLoads[Indices] = OrigLoad; } // Add a parameter to the function for each element passed in. for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), SI->begin(), SI->end())); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); const Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Get the alias analysis information that we need to update to reflect our // changes. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. if (Attributes attrs = CallPAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); const Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. if (Attributes attrs = CallPAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); AttributesVec.clear(); // Update the alias analysis implementation to know that we are replacing // the old call with a new one. AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); New->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(I2); I2->takeName(I); AA.replaceWithNewValue(I, I2); ++I2; continue; } if (ByValArgsToTransform.count(I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); continue; } if (I->use_empty()) { AA.deleteValue(I); continue; } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); *It != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->use_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); } AA.deleteValue(GEP); GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) ++I2; } // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling // reference to the CallgraphNode, just leave the dead function around for // someone else to nuke. CallGraphNode *CGN = CG[F]; if (CGN->getNumReferences() == 0) delete CG.removeFunctionFromModule(CGN); else F->setLinkage(Function::ExternalLinkage); return NF_CGN; }
/// Many allocas are only used within a single basic block. If this is the /// case, avoid traversing the CFG and inserting a lot of potentially useless /// PHI nodes by just performing a single linear pass over the basic block /// using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), /// return true. This is necessary in cases where, due to control flow, the /// alloca is potentially undefined on some control flow paths. e.g. code like /// this is potentially correct: /// /// for (...) { if (c) { A = undef; undef = B; } } /// /// ... so long as A is not used before undef is set. static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, AliasSetTracker *AST) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to // make it efficient to get the index of various operations in the block. // Walk the use-def list of the alloca, getting the locations of all stores. typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; StoresByIndexTy StoresByIndex; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // Sort the stores by their index, making it efficient to do a lookup with a // binary search. std::sort(StoresByIndex.begin(), StoresByIndex.end(), StoreIndexSearchPredicate()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), StoreIndexSearchPredicate()); if (I == StoresByIndex.begin()) // If there is no store before this load, the load takes the undef value. LI->replaceAllUsesWith(UndefValue::get(LI->getType())); else // Otherwise, there was a store before this load, the load takes its value. LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { DIBuilder DIB(*AI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, SI, DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) DDI->eraseFromParent(); ++NumLocalPromoted; }
/// \brief Rewrite as many loads as possible given a single store. /// /// When there is only a single store, we can use the domtree to trivially /// replace all of the dominated loads with the stored value. Do so, and return /// true if this has successfully promoted the alloca entirely. If this returns /// false there were some loads which were not dominated by the single store /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, AliasSetTracker *AST) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); int StoreIndex = -1; // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { Instruction *UserInst = cast<Instruction>(*UI++); if (!isa<LoadInst>(UserInst)) { assert(UserInst == OnlyStore && "Should only have load/stores"); continue; } LoadInst *LI = cast<LoadInst>(UserInst); // Okay, if we have a load from the alloca, we want to replace it with the // only value stored to the alloca. We can do this if the value is // dominated by the store. If not, we use the rest of the mem2reg machinery // to insert the phi nodes as needed. if (!StoringGlobalVal) { // Non-instructions are always dominated. if (LI->getParent() == StoreBB) { // If we have a use that is in the same block as the store, compare the // indices of the two instructions to see which one came first. If the // load came before the store, we can't handle it. if (StoreIndex == -1) StoreIndex = LBI.getInstructionIndex(OnlyStore); if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(StoreBB); continue; } } else if (LI->getParent() != StoreBB && !DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to // check their relationships. If the store doesn't dom the use, bail // out. Info.UsingBlocks.push_back(LI->getParent()); continue; } } // Otherwise, we *can* safely rewrite this load. Value *ReplVal = OnlyStore->getOperand(0); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } // Finally, after the scan, check to see if the store is all that is left. if (!Info.UsingBlocks.empty()) return false; // If not, we'll have to fall back for the remainder. // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { DIBuilder DIB(*AI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); return true; }
/// updateCallSites - Update all sites that call F to use NF. CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { CallGraph &CG = getAnalysis<CallGraph>(); SmallVector<Value*, 16> Args; // Attributes - Keep track of the parameter attributes for the arguments. SmallVector<AttributeWithIndex, 8> ArgAttrsVec; // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); while (!F->use_empty()) { CallSite CS(*F->use_begin()); Instruction *Call = CS.getInstruction(); const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs)); // Copy arguments, however skip first one. CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); Value *FirstCArg = *AI; ++AI; // 0th parameter attribute is reserved for return type. // 1th parameter attribute is for first 1st sret argument. unsigned ParamIndex = 2; while (AI != AE) { Args.push_back(*AI); if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); ++ParamIndex; ++AI; } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs)); AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end()); // Build new call instruction. Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewPAL); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewPAL); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); ArgAttrsVec.clear(); New->takeName(Call); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->removeCallEdgeFor(Call); CalleeNode->addCalledFunction(New, NF_CGN); // Update all users of sret parameter to extract value using extractvalue. for (Value::use_iterator UI = FirstCArg->use_begin(), UE = FirstCArg->use_end(); UI != UE; ) { User *U2 = *UI++; CallInst *C2 = dyn_cast<CallInst>(U2); if (C2 && (C2 == Call)) continue; GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2); ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2)); Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), "evi", UGEP); while(!UGEP->use_empty()) { // isSafeToUpdateAllCallers has checked that all GEP uses are // LoadInsts LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); L->replaceAllUsesWith(GR); L->eraseFromParent(); } UGEP->eraseFromParent(); continue; } Call->eraseFromParent(); } return NF_CGN; }
void LoadAndStorePromoter:: run(const SmallVectorImpl<Instruction*> &Insts) const { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element. DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; UsesByBlock[User->getParent()].push_back(User); } // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. SmallVector<LoadInst*, 32> LiveInLoads; DenseMap<Value*, Value*> ReplacedLoads; for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; BasicBlock *BB = User->getParent(); std::vector<Instruction*> &BlockUses = UsesByBlock[BB]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (StoreInst *SI = dyn_cast<StoreInst>(User)) { updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); } else // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); BlockUses.clear(); continue; } // Otherwise, check to see if this block is all loads. bool HasStore = false; for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { if (isa<StoreInst>(BlockUses[i])) { HasStore = true; break; } } // If so, we can queue them all as live in loads. We don't have an // efficient way to tell which on is first in the block and don't want to // scan large blocks, so just add all loads as live ins. if (!HasStore) { for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); BlockUses.clear(); continue; } // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. Value *StoredValue = 0; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { // If this is a load from an unrelated pointer, ignore it. if (!isInstInList(L, Insts)) continue; // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { replaceLoadWithValue(L, StoredValue); L->replaceAllUsesWith(StoredValue); ReplacedLoads[L] = StoredValue; } else { LiveInLoads.push_back(L); } continue; } if (StoreInst *SI = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; updateDebugInfo(SI); // Remember that this is the active value in the block. StoredValue = SI->getOperand(0); } } // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { LoadInst *ALoad = LiveInLoads[i]; Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); replaceLoadWithValue(ALoad, NewVal); // Avoid assertions in unreachable code. if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); ALoad->replaceAllUsesWith(NewVal); ReplacedLoads[ALoad] = NewVal; } // Allow the client to do stuff before we start nuking things. doExtraRewritesBeforeFinalDeletion(); // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively // propagate the updates until we get to the real value. if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); while (RLI != ReplacedLoads.end()) { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } replaceLoadWithValue(cast<LoadInst>(User), NewVal); User->replaceAllUsesWith(NewVal); } instructionDeleted(User); User->eraseFromParent(); } }
/// Many allocas are only used within a single basic block. If this is the /// case, avoid traversing the CFG and inserting a lot of potentially useless /// PHI nodes by just performing a single linear pass over the basic block /// using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), /// return false. This is necessary in cases where, due to control flow, the /// alloca is undefined only on some control flow paths. e.g. code like /// this is correct in LLVM IR: /// // A is an alloca with no stores so far /// for (...) { /// int t = *A; /// if (!first_iteration) /// use(t); /// *A = 42; /// } static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to // make it efficient to get the index of various operations in the block. // Walk the use-def list of the alloca, getting the locations of all stores. using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>; StoresByIndexTy StoresByIndex; for (User *U : AI->users()) if (StoreInst *SI = dyn_cast<StoreInst>(U)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // Sort the stores by their index, making it efficient to do a lookup with a // binary search. llvm::sort(StoresByIndex, less_first()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), less_first()); if (I == StoresByIndex.begin()) { if (StoresByIndex.empty()) // If there are no stores, the load takes the undef value. LI->replaceAllUsesWith(UndefValue::get(LI->getType())); else // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; } else { // Otherwise, there was a store before this load, the load takes its value. // Note, if the load was marked as nonnull we don't want to lose that // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); } LI->eraseFromParent(); LBI.deleteValue(LI); } // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DII, SI, DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); } AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { DII->eraseFromParent(); LBI.deleteValue(DII); } ++NumLocalPromoted; return true; }
/// Rewrite as many loads as possible given a single store. /// /// When there is only a single store, we can use the domtree to trivially /// replace all of the dominated loads with the stored value. Do so, and return /// true if this has successfully promoted the alloca entirely. If this returns /// false there were some loads which were not dominated by the single store /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); int StoreIndex = -1; // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast<Instruction>(*UI++); if (!isa<LoadInst>(UserInst)) { assert(UserInst == OnlyStore && "Should only have load/stores"); continue; } LoadInst *LI = cast<LoadInst>(UserInst); // Okay, if we have a load from the alloca, we want to replace it with the // only value stored to the alloca. We can do this if the value is // dominated by the store. If not, we use the rest of the mem2reg machinery // to insert the phi nodes as needed. if (!StoringGlobalVal) { // Non-instructions are always dominated. if (LI->getParent() == StoreBB) { // If we have a use that is in the same block as the store, compare the // indices of the two instructions to see which one came first. If the // load came before the store, we can't handle it. if (StoreIndex == -1) StoreIndex = LBI.getInstructionIndex(OnlyStore); if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { // Can't handle this load, bail out. Info.UsingBlocks.push_back(StoreBB); continue; } } else if (LI->getParent() != StoreBB && !DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to // check their relationships. If the store doesn't dom the use, bail // out. Info.UsingBlocks.push_back(LI->getParent()); continue; } } // Otherwise, we *can* safely rewrite this load. Value *ReplVal = OnlyStore->getOperand(0); // If the replacement value is the load, this must occur in unreachable // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); LI->eraseFromParent(); LBI.deleteValue(LI); } // Finally, after the scan, check to see if the store is all that is left. if (!Info.UsingBlocks.empty()) return false; // If not, we'll have to fall back for the remainder. // Record debuginfo for the store and remove the declaration's // debuginfo. for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); DII->eraseFromParent(); LBI.deleteValue(DII); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); AI->eraseFromParent(); LBI.deleteValue(AI); return true; }
/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// void LICM::PromoteAliasSet(AliasSet &AS) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (isa<LoadInst>(Use)) assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); else if (isa<StoreInst>(Use)) { assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); if (Use->getOperand(0) == ASIV) return; } else return; // Not a load or store. if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); LoopUses.push_back(Use); } } // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); // It wants to know some value of the same type as what we'll be inserting. Value *SomeValue; if (isa<LoadInst>(LoopUses[0])) SomeValue = LoopUses[0]; else SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); SSA.Initialize(SomeValue->getType(), SomeValue->getName()); // First step: bucket up uses of the pointers by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { Instruction *User = LoopUses[i]; UsesByBlock[User->getParent()].push_back(User); } // Okay, now we can iterate over all the blocks in the loop with uses, // processing them. Keep track of which loads are loading a live-in value. SmallVector<LoadInst*, 32> LiveInLoads; DenseMap<Value*, Value*> ReplacedLoads; for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { Instruction *User = LoopUses[LoopUse]; std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (isa<StoreInst>(User)) { SSA.AddAvailableValue(User->getParent(), cast<StoreInst>(User)->getOperand(0)); } else { // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); } BlockUses.clear(); continue; } // Otherwise, check to see if this block is all loads. If so, we can queue // them all as live in loads. bool HasStore = false; for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { if (isa<StoreInst>(BlockUses[i])) { HasStore = true; break; } } if (!HasStore) { for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); BlockUses.clear(); continue; } // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. BasicBlock *BB = User->getParent(); Value *StoredValue = 0; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { // If this is a load from an unrelated pointer, ignore it. if (!PointerMustAliases.count(L->getOperand(0))) continue; // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { L->replaceAllUsesWith(StoredValue); ReplacedLoads[L] = StoredValue; } else { LiveInLoads.push_back(L); } continue; } if (StoreInst *S = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!PointerMustAliases.count(S->getOperand(1))) continue; // Remember that this is the active value in the block. StoredValue = S->getOperand(0); } } // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } // Now that all the intra-loop values are classified, set up the preheader. // It gets a load of the pointer we're promoting, and it is the live-out value // from the preheader. LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Now that the preheader is good to go, set up the exit blocks. Each exit // block gets a store of the live-out values that feed them. Since we've // already told the SSA updater about the defs in the loop and the preheader // definition, it is all set and we can start using it. SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); new StoreInst(LiveInValue, SomePtr, InsertPos); } // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { LoadInst *ALoad = LiveInLoads[i]; Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); ALoad->replaceAllUsesWith(NewVal); CurAST->copyValue(ALoad, NewVal); ReplacedLoads[ALoad] = NewVal; } // If the preheader load is itself a pointer, we need to tell alias analysis // about the new pointer we created in the preheader block and about any PHI // nodes that just got inserted. if (PreheaderLoad->getType()->isPointerTy()) { // Copy any value stored to or loaded from a must-alias of the pointer. CurAST->copyValue(SomeValue, PreheaderLoad); for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(SomeValue, NewPHIs[i]); } // Now that everything is rewritten, delete the old instructions from the body // of the loop. They should all be dead now. for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { Instruction *User = LoopUses[i]; // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively // propagate the updates until we get to the real value. if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); while (RLI != ReplacedLoads.end()) { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } User->replaceAllUsesWith(NewVal); CurAST->copyValue(User, NewVal); } CurAST->deleteValue(User); User->eraseFromParent(); } // fwew, we're done! }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform, Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. std::map<Argument *, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. // We need to keep the original loads for each argument and the elements // of the argument that are accessed. std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeSet, 8> ArgAttrVec; AttributeList PAL = F->getAttributes(); // First, determine the new argument list unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), AttributeSet()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; // There may be remaining metadata uses of the argument for things like // llvm.dbg.value. Replace them with undef. I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); Type *SrcTy; if (LoadInst *L = dyn_cast<LoadInst>(UI)) SrcTy = L->getType(); else SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(UI->user_back()); OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( cast<PointerType>(I->getType()->getScalarType())->getElementType(), ArgIndex.second)); ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } Type *RetTy = FTy->getReturnType(); // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(F->getSubprogram()); F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), PAL.getRetAttributes(), ArgAttrVec)); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value *, 16> Args; while (!F->use_empty()) { CallSite CS(F->user_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttributeList &CallPAL = CS.getAttributes(); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value *> Ops; for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); for (auto II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, II)); // Keep track of the type we're currently indexing. if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) ElTy = ElPTy->getElementType(); else ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); } } // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; CS.getOperandBundlesAsDefs(OpBundles); CallSite NewCS; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles, "", Call); } else { auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); NewCS = NewCall; } NewCS.setCallingConv(CS.getCallingConv()); NewCS.setAttributes( AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), CallPAL.getRetAttributes(), ArgAttrVec)); NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) NewCS->setProfWeight(W); Args.clear(); ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) (*ReplaceCallSite)(CS, NewCS); if (!Call->use_empty()) { Call->replaceAllUsesWith(NewCS.getInstruction()); NewCS->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } const DataLayout &DL = F->getParent()->getDataLayout(); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); ++I2; continue; } if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, I->getParamAlignment(), "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName() + "." + Twine(i)); new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. for (User *U : TheAlloca->users()) { CallInst *Call = dyn_cast<CallInst>(U); if (!Call) continue; Call->setTailCall(false); } continue; } if (I->use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->user_back()); L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. std::advance(I2, ArgIndices.size()); } return NF; }
Function * futamurize( const Function * orig_func, DenseMap<const Value*, Value*> &argmap, std::set<const unsigned char *> &constant_addresses_set ) { LLVMContext &context = getGlobalContext(); // Make a copy of the function, removing constant arguments Function * specialized_func = CloneFunction( orig_func, argmap ); specialized_func->setName( orig_func->getNameStr() + "_1" ); // add it to our module LLVM_Module->getFunctionList().push_back( specialized_func ); printf("\nspecialized_func = %p <%s>\n", specialized_func, specialized_func->getName().data()); //~ specialized_func->dump(); // Optimize it FunctionPassManager PM( LLVM_Module ); createStandardFunctionPasses( &PM, 3 ); PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. PM.add(createJumpThreadingPass()); // Thread jumps. PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createInstructionCombiningPass()); // Combine silly seq's PM.add(createTailCallEliminationPass()); // Eliminate tail calls PM.add(createCFGSimplificationPass()); // Merge & remove BBs PM.add(createReassociatePass()); // Reassociate expressions PM.add(createLoopRotatePass()); // Rotate Loop PM.add(createLICMPass()); // Hoist loop invariants PM.add(createLoopUnswitchPass( false )); PM.add(createInstructionCombiningPass()); PM.add(createIndVarSimplifyPass()); // Canonicalize indvars PM.add(createLoopDeletionPass()); // Delete dead loops PM.add(createLoopUnroll2Pass()); // Unroll small loops PM.add(createInstructionCombiningPass()); // Clean up after the unroller PM.add(createGVNPass()); // Remove redundancies PM.add(createMemCpyOptPass()); // Remove memcpy / form memset PM.add(createSCCPPass()); // Constant prop with SCCP PM.add(createPromoteMemoryToRegisterPass()); PM.add(createConstantPropagationPass()); PM.add(createDeadStoreEliminationPass()); PM.add(createAggressiveDCEPass()); PM.add(new MemoryDependenceAnalysis()); //~ PM.add(createAAEvalPass()); const PassInfo * pinfo = Pass::lookupPassInfo( "print-alias-sets" ); if( !pinfo ) { printf( "print-alias-sets not found\n" ); exit(-1); } PM.add( pinfo->createPass() ); FunctionPassManager PM_Inline( LLVM_Module ); PM_Inline.add(createSingleFunctionInliningPass()); bool Changed = false; int iterations = 2; int inline_iterations = 6; do { Changed = false; // first do some optimizations PM.doInitialization(); PM.run( *specialized_func ); PM.doFinalization(); // Load from Constant Memory detection const TargetData *TD = LLVM_EE->getTargetData(); for (inst_iterator I = inst_begin(specialized_func), E = inst_end(specialized_func); I != E; ++I) { Instruction * inst = (Instruction *) &*I; // get all Load instructions LoadInst * load = dyn_cast<LoadInst>( inst ); if( !load ) continue; if( load->isVolatile() ) continue; if (load->use_empty()) continue; // Don't muck with dead instructions... // get the address loaded by load instruction Value *ptr_value = load->getPointerOperand(); // we're only interested in constant addresses ConstantExpr * ptr_constant_expr = dyn_cast<ConstantExpr>( ptr_value ); if( !ptr_constant_expr ) continue; ptr_constant_expr->dump(); // compute real address of constant pointer expression Constant * ptr_constant = ConstantFoldConstantExpression( ptr_constant_expr, TD ); if( !ptr_constant ) continue; ptr_constant->dump(); // convert to int constant ConstantInt *int_constant = dyn_cast<ConstantInt>( ConstantExpr::getPtrToInt( ptr_constant, Type::getInt64Ty( context ))); if( !int_constant ) continue; int_constant->dump(); // get data size int data_length = TD->getTypeAllocSize( load->getType() ); ptr_value->getType()->dump(); // get real address (at last !) const unsigned char * c_ptr = (const unsigned char *) int_constant->getLimitedValue(); printf( "%ld %d %d\n", c_ptr, constant_addresses_set.count( c_ptr ), data_length ); // check what's in this address int isconst = 1; for( int offset=0; offset<data_length; offset++ ) isconst &= constant_addresses_set.count( c_ptr + offset ); if( !isconst ) continue; printf( "It is constant.\n" ); // make a LLVM const with the data Constant *new_constant = NULL; switch( data_length ) { case 1: new_constant = ConstantInt::get( Type::getInt8Ty( context ), *(uint8_t*)c_ptr, false /* signed */ ); break; case 2: new_constant = ConstantInt::get( Type::getInt16Ty( context ), *(uint16_t*)c_ptr, false /* signed */ ); break; case 4: new_constant = ConstantInt::get( Type::getInt32Ty( context ), *(uint32_t*)c_ptr, false /* signed */ ); break; case 8: new_constant = ConstantInt::get( Type::getInt64Ty( context ), *(uint64_t*)c_ptr, false /* signed */ ); break; default: { StringRef const_data ( (const char *) c_ptr, data_length ); new_constant = ConstantArray::get( context, const_data, false /* dont add terminating null */ ); } } if( !new_constant ) continue; new_constant->dump(); //~ // get the type that is loaded const Type *Ty = load->getType(); // do we need a cast ? if( load->getType() != new_constant->getType() ) { new_constant = ConstantExpr::getBitCast( new_constant, Ty ); new_constant->dump(); } // zap the load and replace with constant address load->replaceAllUsesWith( new_constant ); printf( "\nREPLACED :...\n" ); load->dump(); new_constant->dump(); Changed = true; } if( Changed ) continue; // re-optimize and do another pass of constant load elimination // if we can't do anything else, do an inlining pass if( inline_iterations > 0 ) { inline_iterations --; PM_Inline.doInitialization(); Changed |= PM_Inline.run( *specialized_func ); PM_Inline.doFinalization(); //~ for( int i=0; i<3; i++ ) { PM.doInitialization(); Changed |= PM.run( *specialized_func ); PM.doFinalization(); } } if( iterations>0 && !Changed ) iterations--; } while( Changed || iterations>0 ); return specialized_func; }