bool IRTranslator::translateStore(const StoreInst &SI) { assert(SI.isSimple() && "only simple loads are supported at the moment"); MachineFunction &MF = MIRBuilder.getMF(); unsigned Val = getOrCreateVReg(*SI.getValueOperand()); unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); LLT VTy{*SI.getValueOperand()->getType()}, PTy{*SI.getPointerOperand()->getType()}; MIRBuilder.buildStore( VTy, PTy, Val, Addr, *MF.getMachineMemOperand(MachinePointerInfo(SI.getPointerOperand()), MachineMemOperand::MOStore, VTy.getSizeInBits() / 8, getMemOpAlignment(SI))); return true; }
bool Scalarizer::visitStoreInst(StoreInst &SI) { if (!ScalarizeLoadStore) return false; if (!SI.isSimple()) return false; VectorLayout Layout; Value *FullValue = SI.getValueOperand(); if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout)) return false; unsigned NumElems = Layout.VecTy->getNumElements(); IRBuilder<> Builder(SI.getParent(), &SI); Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); Scatterer Val = scatter(&SI, FullValue); ValueVector Stores; Stores.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) { unsigned Align = Layout.getElemAlign(I); Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align); } transferMetadata(&SI, Stores); return true; }
void InstrumentMemoryAccesses::visitStoreInst(StoreInst &SI) { // Instrument a store instruction with a store check. uint64_t Bytes = TD->getTypeStoreSize(SI.getValueOperand()->getType()); Value *AccessSize = ConstantInt::get(SizeTy, Bytes); instrument(SI.getPointerOperand(), AccessSize, StoreCheckFunction, SI); ++StoresInstrumented; }
void Interpreter::visitStoreInst(StoreInst &I) { ExecutionContext &SF = ECStack.back(); GenericValue Val = getOperandValue(I.getOperand(0), SF); GenericValue SRC = getOperandValue(I.getPointerOperand(), SF); StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC), I.getOperand(0)->getType()); }
/* * Build information about functions that store on pointer arguments * For simplification, we only consider a function to store on an argument * if it has exactly one StoreInst to that argument and the arg has no other use. */ int DeadStoreEliminationPass::getFnThatStoreOnArgs(Module &M) { int numStores = 0; DEBUG(errs() << "Getting functions that store on arguments...\n"); for (Module::iterator F = M.begin(); F != M.end(); ++F) { if (F->arg_empty() || F->isDeclaration()) continue; // Get args std::set<Value*> args; for (Function::arg_iterator formalArgIter = F->arg_begin(); formalArgIter != F->arg_end(); ++formalArgIter) { Value *formalArg = formalArgIter; if (formalArg->getType()->isPointerTy()) { args.insert(formalArg); } } // Find stores on arguments for (Function::iterator BB = F->begin(); BB != F->end(); ++BB) { for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { Instruction *inst = I; if (!isa<StoreInst>(inst)) continue; StoreInst *SI = dyn_cast<StoreInst>(inst); Value *ptrOp = SI->getPointerOperand(); if (args.count(ptrOp) && ptrOp->hasNUses(1)) { fnThatStoreOnArgs[F].insert(ptrOp); numStores++; DEBUG(errs() << " " << F->getName() << " stores on argument " << ptrOp->getName() << "\n"); } } } } DEBUG(errs() << "\n"); return numStores; }
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) return false; Value *V = SI.getValueOperand(); Type *T = V->getType(); if (!T->isAggregateType()) return false; if (auto *ST = dyn_cast<StructType>(T)) { // If the struct only have one element, we unpack. unsigned Count = ST->getNumElements(); if (Count == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } // We don't want to break loads with padding here as we'd loose // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); auto *SL = DL.getStructLayout(ST); if (SL->hasPadding()) return false; SmallString<16> EltName = V->getName(); EltName += ".elt"; auto *Addr = SI.getPointerOperand(); SmallString<16> AddrName = Addr->getName(); AddrName += ".repack"; auto *IdxType = Type::getInt32Ty(ST->getContext()); auto *Zero = ConstantInt::get(IdxType, 0); for (unsigned i = 0; i < Count; i++) { Value *Indices[2] = { Zero, ConstantInt::get(IdxType, i), }; auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName); auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); IC.Builder->CreateStore(Val, Ptr); } return true; } if (auto *AT = dyn_cast<ArrayType>(T)) { // If the array only have one element, we unpack. if (AT->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } } return false; }
void PropagateJuliaAddrspaces::visitStoreInst(StoreInst &SI) { unsigned AS = SI.getPointerAddressSpace(); if (!isSpecialAS(AS)) return; Value *Replacement = LiftPointer(SI.getPointerOperand(), SI.getValueOperand()->getType(), &SI); if (!Replacement) return; SI.setOperand(StoreInst::getPointerOperandIndex(), Replacement); }
/// \brief Combine stores to match the type of value being stored. /// /// The core idea here is that the memory does not have any intrinsic type and /// where we can we should match the type of a store to the type of value being /// stored. /// /// However, this routine must never change the width of a store or the number of /// stores as that would introduce a semantic change. This combine is expected to /// be a semantic no-op which just allows stores to more closely model the types /// of their incoming values. /// /// Currently, we also refuse to change the precise type used for an atomic or /// volatile store. This is debatable, and might be reasonable to change later. /// However, it is risky in case some backend or other part of LLVM is relying /// on the exact type stored to select appropriate atomic operations. /// /// \returns true if the store was successfully combined away. This indicates /// the caller must erase the store instruction. We have to let the caller erase /// the store instruction sas otherwise there is no way to signal whether it was /// combined or not: IC.EraseInstFromFunction returns a null pointer. static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) return false; Value *Ptr = SI.getPointerOperand(); Value *V = SI.getValueOperand(); unsigned AS = SI.getPointerAddressSpace(); SmallVector<std::pair<unsigned, MDNode *>, 8> MD; SI.getAllMetadata(MD); // Fold away bit casts of the stored value by storing the original type. if (auto *BC = dyn_cast<BitCastInst>(V)) { V = BC->getOperand(0); StoreInst *NewStore = IC.Builder->CreateAlignedStore( V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; MDNode *N = MDPair.second; // Note, essentially every kind of metadata should be preserved here! This // routine is supposed to clone a store instruction changing *only its // type*. The only metadata it makes sense to drop is metadata which is // invalidated when the pointer type changes. This should essentially // never be the case in LLVM, but we explicitly switch over only known // metadata to be conservatively correct. If you are adding metadata to // LLVM which pertains to stores, you almost certainly want to add it // here. switch (ID) { case LLVMContext::MD_dbg: case LLVMContext::MD_tbaa: case LLVMContext::MD_prof: case LLVMContext::MD_fpmath: case LLVMContext::MD_tbaa_struct: case LLVMContext::MD_alias_scope: case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: case LLVMContext::MD_nonnull: // All of these directly apply. NewStore->setMetadata(ID, N); break; case LLVMContext::MD_invariant_load: case LLVMContext::MD_range: break; } } return true; } // FIXME: We should also canonicalize loads of vectors when their elements are // cast to other types. return false; }
// -- handle store instruction -- void UnsafeTypeCastingCheck::handleStoreInstruction (Instruction *inst) { StoreInst *sinst = dyn_cast<StoreInst>(inst); if (sinst == NULL) utccAbort("handleStoreInstruction cannot process with a non-store instruction"); Value *pt = sinst->getPointerOperand(); Value *vl = sinst->getValueOperand(); UTCC_TYPE ptt = queryPointedType(pt); UTCC_TYPE vlt = queryExprType(vl); setPointedType(pt, vlt); }
bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( Instruction *Addr) const { AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); Function *Func = (*Blocks.begin())->getParent(); for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; for (Instruction &II : BB) { if (isa<DbgInfoIntrinsic>(II)) continue; unsigned Opcode = II.getOpcode(); Value *MemAddr = nullptr; switch (Opcode) { case Instruction::Store: case Instruction::Load: { if (Opcode == Instruction::Store) { StoreInst *SI = cast<StoreInst>(&II); MemAddr = SI->getPointerOperand(); } else { LoadInst *LI = cast<LoadInst>(&II); MemAddr = LI->getPointerOperand(); } // Global variable can not be aliased with locals. if (dyn_cast<Constant>(MemAddr)) break; Value *Base = MemAddr->stripInBoundsConstantOffsets(); if (!dyn_cast<AllocaInst>(Base) || Base == AI) return false; break; } default: { IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); if (IntrInst) { if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) break; return false; } // Treat all the other cases conservatively if it has side effects. if (II.mayHaveSideEffects()) return false; } } } } return true; }
// Not an instruction handled below to turn into a vector. // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { // Must be the stored pointer operand, not a stored value. StoreInst *SI = cast<StoreInst>(Inst); return SI->getPointerOperand() == User; } default: return false; } }
/// \brief Check loop instructions safe for Loop versioning. /// It returns true if it's safe else returns false. /// Consider following: /// 1) Check all load store in loop body are non atomic & non volatile. /// 2) Check function call safety, by ensuring its not accessing memory. /// 3) Loop body shouldn't have any may throw instruction. bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { assert(I != nullptr && "Null instruction found!"); // Check function call safety if (isa<CallInst>(I) && !AA->doesNotAccessMemory(CallSite(I))) { DEBUG(dbgs() << " Unsafe call site found.\n"); return false; } // Avoid loops with possiblity of throw if (I->mayThrow()) { DEBUG(dbgs() << " May throw instruction found in loop body\n"); return false; } // If current instruction is load instructions // make sure it's a simple load (non atomic & non volatile) if (I->mayReadFromMemory()) { LoadInst *Ld = dyn_cast<LoadInst>(I); if (!Ld || !Ld->isSimple()) { DEBUG(dbgs() << " Found a non-simple load.\n"); return false; } LoadAndStoreCounter++; collectStridedAccess(Ld); Value *Ptr = Ld->getPointerOperand(); // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; } // If current instruction is store instruction // make sure it's a simple store (non atomic & non volatile) else if (I->mayWriteToMemory()) { StoreInst *St = dyn_cast<StoreInst>(I); if (!St || !St->isSimple()) { DEBUG(dbgs() << " Found a non-simple store.\n"); return false; } LoadAndStoreCounter++; collectStridedAccess(St); Value *Ptr = St->getPointerOperand(); // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; IsReadOnlyLoop = false; } return true; }
void TracingNoGiri::visitStoreInst(StoreInst &SI) { instrumentLock(&SI); // Cast the pointer into a void pointer type. Value * Pointer = SI.getPointerOperand(); Pointer = castTo(Pointer, VoidPtrType, Pointer->getName(), &SI); // Get the size of the stored data. uint64_t size = TD->getTypeStoreSize(SI.getOperand(0)->getType()); Value *StoreSize = ConstantInt::get(Int64Type, size); // Get the ID of the store instruction. Value *StoreID = ConstantInt::get(Int32Type, lsNumPass->getID(&SI)); // Create the call to the run-time to record the store instruction. std::vector<Value *> args=make_vector<Value *>(StoreID, Pointer, StoreSize, 0); CallInst::Create(RecordStore, args, "", &SI); instrumentUnlock(&SI); ++NumStores; // Update statistics }
void GCInvariantVerifier::visitStoreInst(StoreInst &SI) { Type *VTy = SI.getValueOperand()->getType(); if (VTy->isPointerTy()) { /* We currently don't obey this for arguments. That's ok - they're externally rooted. */ if (!isa<Argument>(SI.getValueOperand())) { unsigned AS = cast<PointerType>(VTy)->getAddressSpace(); Check(AS != AddressSpace::CalleeRooted && AS != AddressSpace::Derived, "Illegal store of decayed value", &SI); } } VTy = SI.getPointerOperand()->getType(); if (VTy->isPointerTy()) { unsigned AS = cast<PointerType>(VTy)->getAddressSpace(); Check(AS != AddressSpace::CalleeRooted, "Illegal store to callee rooted value", &SI); } }
// Not an instruction handled below to turn into a vector. // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: { LoadInst *LI = cast<LoadInst>(Inst); // Currently only handle the case where the Pointer Operand is a GEP so check for that case. return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile(); } case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { // Must be the stored pointer operand, not a stored value, plus // since it should be canonical form, the User should be a GEP. StoreInst *SI = cast<StoreInst>(Inst); return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile(); } default: return false; } }
StructuredModuleEditor::ValueList StructuredModuleEditor::getUseChain( Value *V) { ValueList Vals; for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { Value *ValueToPush; Instruction *Inst = dyn_cast<Instruction>(*UI); if (Inst && Inst->getOpcode() == Instruction::Store) { StoreInst *StInst = dyn_cast<StoreInst>(Inst); Value *Storee = StInst->getPointerOperand(); ValueToPush = Storee; } else ValueToPush = *UI; Vals.push_back(ValueToPush); } return Vals; }
void ExecutorUtil::checkStoreInst(Instruction *inst, std::vector<GlobalSharedTaint> &glSet, std::vector<GlobalSharedTaint> &sharedSet, AliasAnalysis &AA, RelFlowSet &flowSet) { bool relToShared = false; StoreInst *store = dyn_cast<StoreInst>(inst); Value *pointer = store->getPointerOperand(); for (unsigned i = 0; i < sharedSet.size(); i++) { if (ExecutorUtil::findValueFromTaintSet(pointer, sharedSet[i].instSet, sharedSet[i].valueSet)) { // Related to shared if (Verbose > 0) { std::cout << "shared store inst: " << std::endl; inst->dump(); } flowSet.sharedWriteVec.insert(sharedSet[i].gv); relToShared = true; break; } } if (!relToShared) { for (unsigned i = 0; i < glSet.size(); i++) { if (ExecutorUtil::findValueFromTaintSet(pointer, glSet[i].instSet, glSet[i].valueSet)) { // Related to global if (Verbose > 0) { std::cout << "global store inst: " << std::endl; inst->dump(); } flowSet.globalWriteVec.insert(glSet[i].gv); break; } } } }
// Lowers this interleaved access group into X86-specific // instructions/intrinsics. bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { SmallVector<Instruction *, 4> DecomposedVectors; SmallVector<Value *, 4> TransposedVectors; VectorType *ShuffleTy = Shuffles[0]->getType(); if (isa<LoadInst>(Inst)) { // Try to generate target-sized register(/instruction). decompose(Inst, Factor, ShuffleTy, DecomposedVectors); Type *ShuffleEltTy = Inst->getType(); unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor; // Perform matrix-transposition in order to compute interleaved // results by generating some sort of (optimized) target-specific // instructions. switch (NumSubVecElems) { default: return false; case 4: transpose_4x4(DecomposedVectors, TransposedVectors); break; case 8: case 16: case 32: deinterleave8bitStride3(DecomposedVectors, TransposedVectors, NumSubVecElems); break; } // Now replace the unoptimized-interleaved-vectors with the // transposed-interleaved vectors. for (unsigned i = 0, e = Shuffles.size(); i < e; ++i) Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); return true; } Type *ShuffleEltTy = ShuffleTy->getVectorElementType(); unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor; // Lower the interleaved stores: // 1. Decompose the interleaved wide shuffle into individual shuffle // vectors. decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors); // 2. Transpose the interleaved-vectors into vectors of contiguous // elements. switch (NumSubVecElems) { case 4: transpose_4x4(DecomposedVectors, TransposedVectors); break; case 16: case 32: interleave8bitStride4(DecomposedVectors, TransposedVectors, NumSubVecElems); break; default: return false; } // 3. Concatenate the contiguous-vectors back into a wide vector. Value *WideVec = concatenateVectors(Builder, TransposedVectors); // 4. Generate a store instruction for wide-vec. StoreInst *SI = cast<StoreInst>(Inst); Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), SI->getAlignment()); return true; }
void Lint::visitStoreInst(StoreInst &I) { visitMemoryReference(I, I.getPointerOperand(), DL->getTypeStoreSize(I.getOperand(0)->getType()), I.getAlignment(), I.getOperand(0)->getType(), MemRef::Write); }
bool runOnModule(Module &M) override { // LOAD: look at each generated function, each a load is followed by writing // to a pointer argument with attribute denoting it to be write channel "CHANNELWR" // we change the load: // 0. replace the original CHANNELWR channel with an address port and a size port (optional) // 1. in the absence of burst, replace the load instruction with an address write to // the address port // 2. in the presence of burst, move load outside of the involved loop and make one // address write + one size write // 3. add in new function to read memory and write to fifo....the same fifo the downstream // guys are reading -- this newly added function would break them into reasonable bursts // STORE: let's not do this first // 0. replace the original store with an address port and a size port(optional) and a data port // 1. in the case of burst, address req get moved outside but actual data is written into the // data port as they get created // newly created memory access function errs()<<"into func run\n"; std::vector<Function*> memoryAccessFunctions; // top level functions layout pipeline accessed at the end std::vector<Function*> pipelineLevelFunctions; std::vector<Function*> topLevelFunctions; for(auto funcIter = M.begin(); funcIter!=M.end(); funcIter++) { Function& curFunc = *funcIter; if(!curFunc.hasFnAttribute(GENERATEDATTR)) { if(curFunc.hasFnAttribute(TRANSFORMEDATTR)) topLevelFunctions.push_back(funcIter); continue; } LoopInfo* funcLI=&getAnalysis<LoopInfo>(curFunc); // iterate through the basicblocks and see if the loaded value // is written to a channel out put -- we do not convert stores // the argument involved here are all old arguments std::map<Instruction*, Argument*> load2Port; std::set<Argument*> addressArg; std::set<Argument*> burstedArg; std::map<Instruction*, Argument*> store2Port; for(auto bbIter = curFunc.begin(); bbIter!= curFunc.end(); bbIter++) { BasicBlock& curBB = *bbIter; for(auto insIter = curBB.begin(); insIter!=curBB.end(); insIter++) { Instruction& curIns = *insIter; if(isa<LoadInst>(curIns)) { LoadInst& li = cast<LoadInst>(curIns); // we check if the result of this is directly written to an output port // using store int numUser = std::distance(curIns.user_begin(),curIns.user_end()); if(numUser==1 ) { auto soleUserIter = curIns.user_begin(); if(isa<StoreInst>(*soleUserIter)) { StoreInst* si = cast<StoreInst>(*soleUserIter); Value* written2 = si->getPointerOperand(); if(isa<Argument>(*written2)) { Argument& channelArg = cast<Argument>(*written2); // make sure this is wrchannel if(isArgChannel(&channelArg)) { load2Port[&li] = &channelArg; addressArg.insert(&channelArg); if(burstAccess&& analyzeLoadBurstable(&li,funcLI)) burstedArg.insert(&channelArg); } } } } } //FIXME: not doing storeInst else if(isa<StoreInst>(curIns)) { } } } // now we have the loadInst which will be converted to pipelined mem access // we need to create a bunch of new functions -- we then use these new functions // in our new top levels --- after which everything original is deleted std::string functionName = curFunc.getName(); functionName += "MemTrans"; Type* rtType = curFunc.getReturnType(); // old to new argument map std::map<Argument*,Argument*> oldDataFifoArg2newAddrArg; // these are arguments of the newly created function std::map<Argument*,Argument*> addressArg2SizeArg; std::vector<Type*> paramsType; for(auto argIter = curFunc.arg_begin();argIter!=curFunc.arg_end();argIter++) { Argument* curArg = &cast<Argument>(*argIter); paramsType.push_back(curArg->getType()); } for(int numBurstSize = 0; numBurstSize<burstedArg.size();numBurstSize++) { paramsType.push_back(PointerType::get(Type::getInt32Ty(M.getContext()),0)); } FunctionType* newFuncType = FunctionType::get(rtType,ArrayRef<Type*>(paramsType),false); Constant* newFunc = M.getOrInsertFunction(functionName, newFuncType ); Function* memTransFunc = cast<Function>(newFunc); auto newArgIter = memTransFunc->arg_begin(); for(auto oldArgIter = curFunc.arg_begin(); oldArgIter!=curFunc.arg_end(); oldArgIter++, newArgIter++) { Argument* oldArg = &cast<Argument>(*oldArgIter); Argument* newArg = &cast<Argument>(*newArgIter); oldDataFifoArg2newAddrArg[oldArg] = newArg; } auto burstedArgIter = burstedArg.begin(); while(newArgIter!=memTransFunc->arg_end()) { Argument* newBurstArg = &cast<Argument>(*newArgIter); Argument* originalDataFifoArg = *burstedArgIter; Argument* newAddressArg = oldDataFifoArg2newAddrArg[originalDataFifoArg]; addressArg2SizeArg[newAddressArg] = newBurstArg; newArgIter++; burstedArgIter++; } // if bursted access is in a loop, we want to take it out of the loop // make it pre-header -- to do this, we associate each loadIns with std::map<BasicBlock*,std::vector<Instruction*>*> bb2BurstedLoads; for(auto load2PortIter = load2Port.begin(); load2PortIter!=load2Port.end(); load2PortIter++) { Instruction* ldInst = load2PortIter->first; Argument* ldArg = load2PortIter->second; BasicBlock* ldParent = ldInst->getParent(); if(burstedArg.count(ldArg)) { // this load is to be bursted if(!bb2BurstedLoads.count(ldParent)) bb2BurstedLoads[ldParent] = new std::vector<Instruction*>(); bb2BurstedLoads[ldParent]->push_back(ldInst); } } // now memTransFunc is the new function // we will now populate it, we mirror everybb std::map<BasicBlock*,BasicBlock*> oldBB2NewBB; // also we need a few preheaders to do the burst load for(auto bbIter = curFunc.begin(); bbIter!= curFunc.end(); bbIter++) { BasicBlock& oldBB = *bbIter; } // FIXME: release bb2BurstedLoads } return false; }
/// processStore - When GVN is scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones /// (currently 4) it attempts to merge them together into a memcpy/memset. bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (SI->isVolatile()) return false; LLVMContext &Context = SI->getContext(); // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. Value *ByteVal = isBytewiseValue(SI->getOperand(0)); if (!ByteVal) return false; TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); Module *M = SI->getParent()->getParent()->getParent(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); Value *StartPtr = SI->getPointerOperand(); BasicBlock::iterator BI = SI; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { // If the call is readnone, ignore it, otherwise bail out. We don't even // allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (AA.getModRefBehavior(CallSite::get(BI)) == AliasAnalysis::DoesNotAccessMemory) continue; // TODO: If this is a memset, try to join it in. break; } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI)) break; // If this is a non-store instruction it is fine, ignore it. StoreInst *NextStore = dyn_cast<StoreInst>(BI); if (NextStore == 0) continue; // If this is a store, see if we can merge it in. if (NextStore->isVolatile()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return false; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addStore(0, SI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. bool MadeChange = false; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. We put // the memset right before the first instruction that isn't part of this // memset block. This ensure that the memset is dominated by any addressing // instruction needed by the start of the block. BasicBlock::iterator InsertPt = BI; // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { const Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } // Cast the start ptr to be i8* as memset requires. const PointerType* StartPTy = cast<PointerType>(StartPtr->getType()); const PointerType *i8Ptr = Type::getInt8PtrTy(Context, StartPTy->getAddressSpace()); if (StartPTy!= i8Ptr) StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); Value *Ops[] = { StartPtr, ByteVal, // Start, value // size ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), // align ConstantInt::get(Type::getInt32Ty(Context), Alignment), // volatile ConstantInt::get(Type::getInt1Ty(Context), 0), }; const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i]; dbgs() << "With: " << *C); C=C; // Don't invalidate the iterator BBI = BI; // Zap all the stores. for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) (*SI)->eraseFromParent(); ++NumMemSetInfer; MadeChange = true; } return MadeChange; }
/// /// runOnFunction /// bool LongLongMemAccessLowering::runOnFunction(Function &F) { std::vector<StoreInst *> storeInstV; std::vector<LoadInst *> loadInstV; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI) { Instruction *inst = BI; if (StoreInst *storeInst = dyn_cast<StoreInst>(inst)) { if (!storeInst->getValueOperand()->getType()->isIntegerTy(64)) { continue; } assert(cast<PointerType>(storeInst->getPointerOperand()->getType()) ->getElementType()->isIntegerTy(64)); storeInstV.push_back(storeInst); } else if (LoadInst *loadInst = dyn_cast<LoadInst>(inst)) { if (!loadInst->getType()->isIntegerTy(64)) { continue; } assert(cast<PointerType>(loadInst->getPointerOperand()->getType()) ->getElementType()->isIntegerTy(64)); loadInstV.push_back(loadInst); } } } for (unsigned i = 0; i < storeInstV.size(); ++i) { StoreInst *inst = storeInstV.at(i); Value *storeVal = inst->getValueOperand(); Value *storePointer = inst->getPointerOperand(); // Insert new instructions BitCastInst *storeAddrLo = new BitCastInst(storePointer, Type::getInt32PtrTy(F.getContext()), "", inst); ConstantInt *offsetConst = ConstantInt::getSigned(Type::getInt32Ty(F.getContext()), 1); std::vector<Value *> gepIdxList(1, offsetConst); GetElementPtrInst *storeAddrHi = GetElementPtrInst::Create(storeAddrLo, gepIdxList, "", inst); TruncInst *storeValLo = new TruncInst(storeVal, Type::getInt32Ty(F.getContext()), "", inst); Value *aShrOffset = ConstantInt::getSigned(Type::getInt64Ty(F.getContext()), 32); BinaryOperator *storeValAShr = BinaryOperator::Create(Instruction::AShr, storeVal, aShrOffset, "", inst); TruncInst *storeValHi = new TruncInst(storeValAShr, Type::getInt32Ty(F.getContext()), "", inst); StoreInst *storeLo = new StoreInst(storeValLo, storeAddrLo, inst); StoreInst *storeHi = new StoreInst(storeValHi, storeAddrHi, inst); storeLo->setAlignment(4); storeHi->setAlignment(4); // Remove inst inst->eraseFromParent(); } for (unsigned i = 0; i < loadInstV.size(); ++i) { LoadInst *inst = loadInstV.at(i); Value *loadPointer = inst->getPointerOperand(); // Insert new instructions BitCastInst *loadAddrLo = new BitCastInst(loadPointer, Type::getInt32PtrTy(F.getContext()), "", inst); ConstantInt *offsetConst = ConstantInt::getSigned(Type::getInt32Ty(F.getContext()), 1); std::vector<Value *> gepIdxList(1, offsetConst); GetElementPtrInst *loadAddrHi = GetElementPtrInst::Create(loadAddrLo, gepIdxList, "", inst); LoadInst *loadLo = new LoadInst(loadAddrLo, "", inst); LoadInst *loadHi = new LoadInst(loadAddrHi, "", inst); ZExtInst *loadLoLL = new ZExtInst(loadLo, Type::getInt64Ty(F.getContext()), "", inst); ZExtInst *loadHiLL = new ZExtInst(loadHi, Type::getInt64Ty(F.getContext()), "", inst); Value *shlOffset = ConstantInt::getSigned(Type::getInt64Ty(F.getContext()), 32); BinaryOperator *loadHiLLShl = BinaryOperator::Create(Instruction::Shl, loadHiLL, shlOffset, "", inst); BinaryOperator *loadValue = BinaryOperator::Create(Instruction::Or, loadLoLL, loadHiLLShl, ""); // Replace inst with new "loaded" value, the old value is deleted ReplaceInstWithInst(inst, loadValue); } return true; // function is modified }
/* * Clone a given function removing dead stores */ Function* DeadStoreEliminationPass::cloneFunctionWithoutDeadStore(Function *Fn, Instruction* caller, std::string suffix) { Function *NF = Function::Create(Fn->getFunctionType(), Fn->getLinkage()); NF->copyAttributesFrom(Fn); // Copy the parameter names, to ease function inspection afterwards. Function::arg_iterator NFArg = NF->arg_begin(); for (Function::arg_iterator Arg = Fn->arg_begin(), ArgEnd = Fn->arg_end(); Arg != ArgEnd; ++Arg, ++NFArg) { NFArg->setName(Arg->getName()); } // To avoid name collision, we should select another name. NF->setName(Fn->getName() + suffix); // Fill clone content ValueToValueMapTy VMap; SmallVector<ReturnInst*, 8> Returns; Function::arg_iterator NI = NF->arg_begin(); for (Function::arg_iterator I = Fn->arg_begin(); NI != NF->arg_end(); ++I, ++NI) { VMap[I] = NI; } CloneAndPruneFunctionInto(NF, Fn, VMap, false, Returns); // Remove dead stores std::set<Value*> deadArgs = deadArguments[caller]; std::set<Value*> removeStoresTo; Function::arg_iterator NFArgIter = NF->arg_begin(); for (Function::arg_iterator FnArgIter = Fn->arg_begin(); FnArgIter != Fn->arg_end(); ++FnArgIter, ++NFArgIter) { Value *FnArg = FnArgIter; if (deadArgs.count(FnArg)) { removeStoresTo.insert(NFArgIter); } } std::vector<Instruction*> toRemove; for (Function::iterator BB = NF->begin(); BB != NF->end(); ++BB) { for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { Instruction *inst = I; if (!isa<StoreInst>(inst)) continue; StoreInst *SI = dyn_cast<StoreInst>(inst); Value *ptrOp = SI->getPointerOperand(); if (removeStoresTo.count(ptrOp)) { DEBUG(errs() << "will remove this store: " << *inst << "\n"); toRemove.push_back(inst); } } } for (std::vector<Instruction*>::iterator it = toRemove.begin(); it != toRemove.end(); ++it) { Instruction* inst = *it; inst->eraseFromParent(); RemovedStores++; } // Insert the clone function before the original Fn->getParent()->getFunctionList().insert(Fn, NF); return NF; }
/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }
static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load, Instruction *Release, ProvenanceAnalysis &PA, AliasAnalysis *AA) { StoreInst *Store = nullptr; bool SawRelease = false; // Get the location associated with Load. MemoryLocation Loc = MemoryLocation::get(Load); // Walk down to find the store and the release, which may be in either order. for (auto I = std::next(BasicBlock::iterator(Load)), E = Load->getParent()->end(); I != E; ++I) { // If we found the store we were looking for and saw the release, // break. There is no more work to be done. if (Store && SawRelease) break; // Now we know that we have not seen either the store or the release. If I // is the release, mark that we saw the release and continue. Instruction *Inst = &*I; if (Inst == Release) { SawRelease = true; continue; } // Otherwise, we check if Inst is a "good" store. Grab the instruction class // of Inst. ARCInstKind Class = GetBasicARCInstKind(Inst); // If Inst is an unrelated retain, we don't care about it. // // TODO: This is one area where the optimization could be made more // aggressive. if (IsRetain(Class)) continue; // If we have seen the store, but not the release... if (Store) { // We need to make sure that it is safe to move the release from its // current position to the store. This implies proving that any // instruction in between Store and the Release conservatively can not use // the RCIdentityRoot of Release. If we can prove we can ignore Inst, so // continue... if (!CanUse(Inst, Load, PA, Class)) { continue; } // Otherwise, be conservative and return nullptr. return nullptr; } // Ok, now we know we have not seen a store yet. See if Inst can write to // our load location, if it can not, just ignore the instruction. if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod)) continue; Store = dyn_cast<StoreInst>(Inst); // If Inst can, then check if Inst is a simple store. If Inst is not a // store or a store that is not simple, then we have some we do not // understand writing to this memory implying we can not move the load // over the write to any subsequent store that we may find. if (!Store || !Store->isSimple()) return nullptr; // Then make sure that the pointer we are storing to is Ptr. If so, we // found our Store! if (Store->getPointerOperand() == Loc.Ptr) continue; // Otherwise, we have an unknown store to some other ptr that clobbers // Loc.Ptr. Bail! return nullptr; } // If we did not find the store or did not see the release, fail. if (!Store || !SawRelease) return nullptr; // We succeeded! return Store; }
BlockFlow::BlockFlow(BasicBlock *b, std::vector<BoundsCheck*> *chks, ConstraintGraph *graph, std::map<BasicBlock*,BlockFlow*> *f) { blk = b; flows = f; checks = chks; cg = graph; numInsts = 0; isEntry = false; outSet.allChecks = true; killAll = false; killAllLoc = 0; for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i) { Instruction *inst = &*i; numInsts++; instructions.push_back(inst); instLoc[i] = numInsts; StoreInst *SI = dyn_cast<StoreInst>(inst); if (isa<CallInst>(inst)) { killAll = true; killAllLoc = numInsts; } if (SI != NULL) { storeSet.insert(SI->getPointerOperand()); Value *to = SI->getPointerOperand(); Type* T = to->getType(); bool isPointer = T->isPointerTy() && T->getContainedType(0)->isPointerTy(); if (isPointer) { killAll = true; killAllLoc = numInsts; } lastStoreLoc[to] = numInsts; } } #if DEBUG_GLOBAL errs() << "Identifying Downward Exposed Bounds Checks:" << blk->getName() << "\n"; #endif for (std::vector<BoundsCheck*>::iterator it = checks->begin(), et = checks->end(); it != et; it++) { BoundsCheck *chk = *it; if (!chk->stillExists()) continue; // May require fixing later? unsigned int loc = instLoc[chk->getInsertPoint()]; Value *var = chk->getVariable(); if (var == NULL) { var = chk->getIndex(); if (var == NULL) { errs() << "Could not identify index value for following check:\n"; chk->print(); continue; } } /** bool downwardExposed = true; // Find downward exposed checks // Inefficient, basically go through the remaining instructions // and see if there is a store to the same location for (unsigned int i = loc; i <= numInsts; i++) { Instruction *inst = instructions.at(i-1); StoreInst *SI = dyn_cast<StoreInst>(inst); if (isa<CallInst>(inst)) { downwardExposed = false; #if DEBUG_GLOBAL errs() << "Following Check is not downward exposed\n"; chk->print(); #endif break; } if (SI != NULL) { if (var == SI->getPointerOperand()) { downwardExposed = false; #if DEBUG_GLOBAL errs() << "Following Check is not downward exposed\n"; chk->print(); #endif break; } else { Value *to = SI->getPointerOperand(); Type* T = to->getType(); bool isPointer = T->isPointerTy() && T->getContainedType(0)->isPointerTy(); if (isPointer) { downwardExposed = false; #if DEBUG_GLOBAL errs() << "Following Check is not downward exposed\n"; chk->print(); #endif break; } } } } if (!downwardExposed) continue; **/ if (loc < killAllLoc) { continue; } bool blkHasStore = false; if (lastStoreLoc.find(var) != lastStoreLoc.end()) { blkHasStore = true; } GlobalCheck *gCheck; // Insert lower bounds check if downward exposed, and index <= to variable it references if (chk->hasLowerBoundsCheck()){ if (chk->comparisonKnown && chk->comparedToVar <= 0) { bool add = true; ConstraintGraph::CompareEnum varChange = cg->identifyMemoryChange(var); // Check if there is a store instruction after the check if (blkHasStore && (lastStoreLoc[var] > loc)) { // If index variable becomes smaller across basic block, can't add lower bounds check if (varChange == ConstraintGraph::LESS_THAN || varChange == ConstraintGraph::UNKNOWN) { add = false; } } for (std::vector<GlobalCheck*>::iterator gi = globalChecks.begin(), ge = globalChecks.end(); gi != ge; gi++) { GlobalCheck *c = *gi; if (!c->isUpper && (c->var == var)) { add = false; } } if (add) { #if DEBUG_GLOBAL errs() << "==============================" << "\n"; errs() << "Adding Lower-Bound Check to GEN set:\n"; chk->print(); #endif gCheck = new GlobalCheck(chk, var, NULL, false, loc); globalChecks.push_back(gCheck); } } } // Insert upper bounds check if downward exposed, and index >= variable it references if (chk->hasUpperBoundsCheck()){ if (chk->comparisonKnown && chk->comparedToVar >= 0) { bool add = true; ConstraintGraph::CompareEnum varChange = cg->identifyMemoryChange(var); // Check if there is a store instruction after the check if (blkHasStore && (lastStoreLoc[var] > loc)) { // If index variable becomes bigger across basic block, can't add upper bounds check if (varChange == ConstraintGraph::GREATER_THAN || varChange == ConstraintGraph::UNKNOWN) { add = false; } } for (std::vector<GlobalCheck*>::iterator gi = globalChecks.begin(), ge = globalChecks.end(); gi != ge; gi++) { GlobalCheck *c = *gi; if (c->isUpper && (c->var == var)) { add = false; } } if (add) { #if DEBUG_GLOBAL errs() << "==============================" << "\n"; errs() << "Adding Exposed Upper-Bound Check to GEN set:\n"; chk->print(); #endif gCheck = new GlobalCheck(chk, var, chk->getUpperBound(), true, loc); globalChecks.push_back(gCheck); } } } } }
/// Attempt to merge an objc_release with a store, load, and objc_retain to form /// an objc_storeStrong. This can be a little tricky because the instructions /// don't always appear in order, and there may be unrelated intervening /// instructions. void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); if (Load->getParent() != BB) return; // Walk down to find the store and the release, which may be in either order. BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); StoreInst *Store = 0; bool SawRelease = false; for (; !Store || !SawRelease; ++I) { if (I == End) return; Instruction *Inst = I; if (Inst == Release) { SawRelease = true; continue; } InstructionClass Class = GetBasicInstructionClass(Inst); // Unrelated retains are harmless. if (IsRetain(Class)) continue; if (Store) { // The store is the point where we're going to put the objc_storeStrong, // so make sure there are no uses after it. if (CanUse(Inst, Load, PA, Class)) return; } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { // We are moving the load down to the store, so check for anything // else which writes to the memory between the load and the store. Store = dyn_cast<StoreInst>(Inst); if (!Store || !Store->isSimple()) return; if (Store->getPointerOperand() != Loc.Ptr) return; } } Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); // Walk up to find the retain. I = Store; BasicBlock::iterator Begin = BB->begin(); while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) --I; Instruction *Retain = I; if (GetBasicInstructionClass(Retain) != IC_Retain) return; if (GetObjCArg(Retain) != New) return; Changed = true; ++NumStoreStrongs; LLVMContext &C = Release->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *I8XX = PointerType::getUnqual(I8X); Value *Args[] = { Load->getPointerOperand(), New }; if (Args[0]->getType() != I8XX) Args[0] = new BitCastInst(Args[0], I8XX, "", Store); if (Args[1]->getType() != I8X) Args[1] = new BitCastInst(Args[1], I8X, "", Store); CallInst *StoreStrong = CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); // We can't set the tail flag yet, because we haven't yet determined // whether there are any escaping allocas. Remember this call, so that // we can set the tail flag once we know it's safe. StoreStrongCalls.insert(StoreStrong); if (&*Iter == Store) ++Iter; Store->eraseFromParent(); Release->eraseFromParent(); EraseInstruction(Retain); if (Load->use_empty()) Load->eraseFromParent(); }
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2 || !VectorType::isValidElementType(AllocaTy->getElementType())) { DEBUG(dbgs() << " Cannot convert type to vector\n"); return false; } std::map<GetElementPtrInst*, Value*> GEPVectorIdx; std::vector<Value*> WorkList; for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); continue; } Value *Index = GEPToVectorIndex(GEP); // If we can't compute a vector index from this GEP, then we can't // promote this alloca to vector. if (!Index) { DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); return false; } GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); } } VectorType *VectorTy = arrayTypeToVecType(AllocaTy); DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " << *VectorTy << '\n'); for (Value *V : WorkList) { Instruction *Inst = cast<Instruction>(V); IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); Inst->replaceAllUsesWith(ExtractElement); Inst->eraseFromParent(); break; } case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); StoreInst *SI = cast<StoreInst>(Inst); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); break; } case Instruction::BitCast: case Instruction::AddrSpaceCast: break; default: llvm_unreachable("Inconsistency in instructions promotable to vector"); } } return true; }
void Lint::visitStoreInst(StoreInst &I) { visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getOperand(0)->getType()); }
Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) { Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) ScalarTy = SI->getValueOperand()->getType(); VectorType *VecTy = VectorType::get(ScalarTy, VF); // Check if all of the operands are constants or identical. bool AllConst = true; bool AllSameScalar = true; for (unsigned i = 0, e = VF; i < e; ++i) { AllConst &= !!dyn_cast<Constant>(VL[i]); AllSameScalar &= (VL[0] == VL[i]); // Must have a single use. Instruction *I = dyn_cast<Instruction>(VL[i]); if (I && (I->getNumUses() > 1 || I->getParent() != BB)) return Scalarize(VL, VecTy); } // Is this a simple vector constant. if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); // Scalarize unknown structures. Instruction *VL0 = dyn_cast<Instruction>(VL[0]); if (!VL0) return Scalarize(VL, VecTy); unsigned Opcode = VL0->getOpcode(); for (unsigned i = 0, e = VF; i < e; ++i) { Instruction *I = dyn_cast<Instruction>(VL[i]); // If not all of the instructions are identical then we have to scalarize. if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); } switch (Opcode) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { ValueList LHSVL, RHSVL; for (int i = 0; i < VF; ++i) { RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); } Value *RHS = vectorizeTree(RHSVL, VF); Value *LHS = vectorizeTree(LHSVL, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0); return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); } case Instruction::Load: { LoadInst *LI = dyn_cast<LoadInst>(VL0); unsigned Alignment = LI->getAlignment(); // Check if all of the loads are consecutive. for (unsigned i = 1, e = VF; i < e; ++i) if (!isConsecutiveAccess(VL[i-1], VL[i])) return Scalarize(VL, VecTy); IRBuilder<> Builder(GetLastInstr(VL, VF)); Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo()); LI = Builder.CreateLoad(VecPtr); LI->setAlignment(Alignment); return LI; } case Instruction::Store: { StoreInst *SI = dyn_cast<StoreInst>(VL0); unsigned Alignment = SI->getAlignment(); ValueList ValueOp; for (int i = 0; i < VF; ++i) ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); Value *VecValue = vectorizeTree(ValueOp, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo()); Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); for (int i = 0; i < VF; ++i) cast<Instruction>(VL[i])->eraseFromParent(); return 0; } default: return Scalarize(VL, VecTy); } }