/// tryMergingIntoMemset - When scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consecutive ones, it /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { if (TD == 0) return 0; // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) break; continue; } if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); } else { MemSetInst *MSI = cast<MemSetInst>(BI); if (MSI->isVolatile() || ByteVal != MSI->getValue() || !isa<ConstantInt>(MSI->getLength())) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) break; Ranges.addMemSet(Offset, MSI); } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return 0; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addInst(0, StartInst); // If we create any memsets, we put it right before the first instruction that // isn't part of the memset block. This ensure that the memset is dominated // by any addressing instruction needed by the start of the block. IRBuilder<> Builder(BI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. Instruction *AMemSet = 0; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { MD->removeInstruction(*SI); (*SI)->eraseFromParent(); } ++NumMemSetInfer; } return AMemSet; }
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<LoadInst *, 4> aggrLoads; SmallVector<MemTransferInst *, 4> aggrMemcpys; SmallVector<MemSetInst *, 4> aggrMemsets; DataLayout *TD = &getAnalysis<DataLayout>(); LLVMContext &Context = F.getParent()->getContext(); // // Collect all the aggrLoads, aggrMemcpys and addrMemsets. // //const BasicBlock *firstBB = &F.front(); // first BB in F for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { if (LoadInst * load = dyn_cast<LoadInst>(II)) { if (load->hasOneUse() == false) continue; if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; User *use = *(load->use_begin()); if (StoreInst * store = dyn_cast<StoreInst>(use)) { if (store->getOperand(0) != load) //getValueOperand continue; aggrLoads.push_back(load); } } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) { Value *len = intr->getLength(); // If the number of elements being copied is greater // than MaxAggrCopySize, lower it to a loop if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemcpys.push_back(intr); } } else { // turn variable length memcpy/memmov into loop aggrMemcpys.push_back(intr); } } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) { Value *len = memsetintr->getLength(); if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemsets.push_back(memsetintr); } } else { // turn variable length memset into loop aggrMemsets.push_back(memsetintr); } } } } if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && (aggrMemsets.size() == 0)) return false; // // Do the transformation of an aggr load/copy/set to a loop // for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) { LoadInst *load = aggrLoads[i]; StoreInst *store = dyn_cast<StoreInst>(*load->use_begin()); Value *srcAddr = load->getOperand(0); Value *dstAddr = store->getOperand(1); unsigned numLoads = TD->getTypeStoreSize(load->getType()); Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), store->isVolatile(), Context, F); store->eraseFromParent(); load->eraseFromParent(); } for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) { MemTransferInst *cpy = aggrMemcpys[i]; Value *len = cpy->getLength(); // llvm 2.7 version of memcpy does not have volatile // operand yet. So always making it non-volatile // optimistically, so that we don't see unnecessary // st.volatile in ptx convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false, false, Context, F); cpy->eraseFromParent(); } for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) { MemSetInst *memsetinst = aggrMemsets[i]; Value *len = memsetinst->getLength(); Value *val = memsetinst->getValue(); convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, F); memsetinst->eraseFromParent(); } return true; }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); Assert(FT->getReturnType() == I.getType(), "Undefined behavior: Call return type mismatches " "callee return type", &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } } } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", &I); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, MSI->getAlignment(), nullptr, MemRef::Write); break; } case Intrinsic::vastart: Assert(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; } }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); // TODO: Check function alignment? visitMemoryReference(I, Callee, 0, 0); if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) { Assert1(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); const FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); Assert1(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); // TODO: Check argument types (in case the callee was casted) // TODO: Check ABI-significant attributes. // TODO: Check noalias attribute. // TODO: Check sret attribute. } // TODO: Check the "tail" keyword constraints. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0); visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. unsigned Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts())) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != AliasAnalysis::MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0); visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0); break; } case Intrinsic::vastart: Assert1(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), 0, 0); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), 0, 0); visitMemoryReference(I, CS.getArgument(1), 0, 0); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), 0, 0); break; case Intrinsic::stackrestore: visitMemoryReference(I, CS.getArgument(0), 0, 0); break; } }