/// isSafeToPromoteArgument - As you might guess from the name of this method, /// it checks to see if it is both safe and useful to promote the argument. /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { typedef std::set<IndicesVector> GEPIndicesSet; // Quick exit for unused arguments if (Arg->use_empty()) return true; // We can only promote this argument if all of the uses are loads, or are GEP // instructions (with constant indices) that are subsequently loaded. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load // would have happened in the callee anyway (ie, there is a load in the entry // block) or the pointer passed in at every call site is guaranteed to be // valid. // In the former case, invalid loads can happen, but would have happened // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. // // This set will contain all sets of indices that are loaded in the entry // block, and thus are safe to unconditionally load in the caller. GEPIndicesSet SafeToUnconditionallyLoad; // This set contains all the sets of indices that we are planning to promote. // This makes it possible to limit the number of arguments added. GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as // safe. BasicBlock *EntryBlock = Arg->getParent()->begin(); // Declare this here so we can reuse it IndicesVector Indices; for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); I != E; ++I) if (LoadInst *LI = dyn_cast<LoadInst>(I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { V = GEP->getPointerOperand(); if (V == Arg) { // This load actually loads (part of) Arg? Check the indices then. Indices.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) Indices.push_back(CI->getSExtValue()); else // We found a non-constant GEP index for this argument? Bail out // right away, can't promote this argument at all. return false; // Indices checked out, mark them as safe MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); Indices.clear(); } } else if (V == Arg) { // Direct loads are equivalent to a GEP with a single 0 index. MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); } } // Now, iterate all uses of the argument to see if there are any uses that are // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. SmallVector<LoadInst*, 16> Loads; IndicesVector Operands; for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); UI != E; ++UI) { User *U = *UI; Operands.clear(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. getAnalysis<AliasAnalysis>().deleteValue(GEP); GEP->eraseFromParent(); // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? return isSafeToPromoteArgument(Arg, isByVal); } // Ensure that all of the indices are constants. for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e; ++i) if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) Operands.push_back(C->getSExtValue()); else return false; // Not a constant operand GEP! // Ensure that the only users of the GEP are load instructions. for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); } else { // Other uses than load? return false; } } else { return false; // Not a load or a GEP. } // Now, see if it is safe to promote this load / loads of this GEP. Loading // is safe if Operands, or a prefix of Operands, is marked as safe. if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) return false; // See if we are already promoting a load with these indices. If not, check // to make sure that we aren't promoting too many elements. If so, nothing // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { DEBUG(dbgs() << "argpromotion not promoting argument '" << Arg->getName() << "' because it would require adding more " << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number // of elements of the aggregate. return false; } ToPromote.insert(Operands); } } if (Loads.empty()) return true; // No users, this is a dead argument. // Okay, now we know that the argument is only used by load instructions and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. // Because there could be several/many load instructions, remember which // blocks we know to be transparent to the load. SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to // the load itself. LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); const PointerType *LoadTy = cast<PointerType>(Load->getPointerOperand()->getType()); unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > I = idf_ext_begin(P, TranspBlocks), E = idf_ext_end(P, TranspBlocks); I != E; ++I) if (AA.canBasicBlockModify(**I, Arg, LoadSize)) return false; } } // If the path from the entry of the function to each load is free of // instructions that potentially invalidate the load, we can make the // transformation! return true; }
/// processStore - When GVN is scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones /// (currently 4) it attempts to merge them together into a memcpy/memset. bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (SI->isVolatile()) return false; TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (!LI->isVolatile() && LI->hasOneUse()) { MemDepResult dep = MD->getDependency(LI); CallInst *C = 0; if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst())) C = dyn_cast<CallInst>(dep.getInst()); if (C) { bool changed = performCallSlotOptzn(LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); LI->eraseFromParent(); ++NumMemCpyInstr; return true; } } } } LLVMContext &Context = SI->getContext(); // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. Value *ByteVal = isBytewiseValue(SI->getOperand(0)); if (!ByteVal) return false; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); Module *M = SI->getParent()->getParent()->getParent(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); Value *StartPtr = SI->getPointerOperand(); BasicBlock::iterator BI = SI; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { // If the call is readnone, ignore it, otherwise bail out. We don't even // allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (AA.getModRefBehavior(CallSite(BI)) == AliasAnalysis::DoesNotAccessMemory) continue; // TODO: If this is a memset, try to join it in. break; } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI)) break; // If this is a non-store instruction it is fine, ignore it. StoreInst *NextStore = dyn_cast<StoreInst>(BI); if (NextStore == 0) continue; // If this is a store, see if we can merge it in. if (NextStore->isVolatile()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return false; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addStore(0, SI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. bool MadeChange = false; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. We put // the memset right before the first instruction that isn't part of this // memset block. This ensure that the memset is dominated by any addressing // instruction needed by the start of the block. BasicBlock::iterator InsertPt = BI; // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { const Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } // Cast the start ptr to be i8* as memset requires. const PointerType* StartPTy = cast<PointerType>(StartPtr->getType()); const PointerType *i8Ptr = Type::getInt8PtrTy(Context, StartPTy->getAddressSpace()); if (StartPTy!= i8Ptr) StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); Value *Ops[] = { StartPtr, ByteVal, // Start, value // size ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), // align ConstantInt::get(Type::getInt32Ty(Context), Alignment), // volatile ConstantInt::getFalse(Context), }; const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *C << '\n'); C=C; // Don't invalidate the iterator BBI = BI; // Zap all the stores. for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) (*SI)->eraseFromParent(); ++NumMemSetInfer; MadeChange = true; } return MadeChange; }