static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) return false; Value *V = SI.getValueOperand(); Type *T = V->getType(); if (!T->isAggregateType()) return false; if (auto *ST = dyn_cast<StructType>(T)) { // If the struct only have one element, we unpack. if (ST->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } } if (auto *AT = dyn_cast<ArrayType>(T)) { // If the array only have one element, we unpack. if (AT->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } } return false; }
void InstrumentMemoryAccesses::visitStoreInst(StoreInst &SI) { // Instrument a store instruction with a store check. uint64_t Bytes = TD->getTypeStoreSize(SI.getValueOperand()->getType()); Value *AccessSize = ConstantInt::get(SizeTy, Bytes); instrument(SI.getPointerOperand(), AccessSize, StoreCheckFunction, SI); ++StoresInstrumented; }
void TempScopInfo::buildAccessFunctions(Region &R, ParamSetType &Params, BasicBlock &BB) { AccFuncSetType Functions; for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction &Inst = *I; if (isa<LoadInst>(&Inst) || isa<StoreInst>(&Inst)) { // Create the SCEVAffFunc. if (LoadInst *ld = dyn_cast<LoadInst>(&Inst)) { unsigned size = TD->getTypeStoreSize(ld->getType()); Functions.push_back( std::make_pair(SCEVAffFunc(SCEVAffFunc::ReadMem, size), &Inst)); } else {//Else it must be a StoreInst. StoreInst *st = cast<StoreInst>(&Inst); unsigned size = TD->getTypeStoreSize(st->getValueOperand()->getType()); Functions.push_back( std::make_pair(SCEVAffFunc(SCEVAffFunc::WriteMem, size), &Inst)); } Value *Ptr = getPointerOperand(Inst); buildAffineFunction(SE->getSCEV(Ptr), Functions.back().first, R, Params); } } if (Functions.empty()) return; AccFuncSetType &Accs = AccFuncMap[&BB]; Accs.insert(Accs.end(), Functions.begin(), Functions.end()); }
bool IRTranslator::translateStore(const StoreInst &SI) { assert(SI.isSimple() && "only simple loads are supported at the moment"); MachineFunction &MF = MIRBuilder.getMF(); unsigned Val = getOrCreateVReg(*SI.getValueOperand()); unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); LLT VTy{*SI.getValueOperand()->getType()}, PTy{*SI.getPointerOperand()->getType()}; MIRBuilder.buildStore( VTy, PTy, Val, Addr, *MF.getMachineMemOperand(MachinePointerInfo(SI.getPointerOperand()), MachineMemOperand::MOStore, VTy.getSizeInBits() / 8, getMemOpAlignment(SI))); return true; }
bool Scalarizer::visitStoreInst(StoreInst &SI) { if (!ScalarizeLoadStore) return false; if (!SI.isSimple()) return false; VectorLayout Layout; Value *FullValue = SI.getValueOperand(); if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout)) return false; unsigned NumElems = Layout.VecTy->getNumElements(); IRBuilder<> Builder(SI.getParent(), &SI); Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); Scatterer Val = scatter(&SI, FullValue); ValueVector Stores; Stores.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) { unsigned Align = Layout.getElemAlign(I); Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align); } transferMetadata(&SI, Stores); return true; }
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) return false; Value *V = SI.getValueOperand(); Type *T = V->getType(); if (!T->isAggregateType()) return false; if (auto *ST = dyn_cast<StructType>(T)) { // If the struct only have one element, we unpack. unsigned Count = ST->getNumElements(); if (Count == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } // We don't want to break loads with padding here as we'd loose // the knowledge that padding exists for the rest of the pipeline. const DataLayout &DL = IC.getDataLayout(); auto *SL = DL.getStructLayout(ST); if (SL->hasPadding()) return false; SmallString<16> EltName = V->getName(); EltName += ".elt"; auto *Addr = SI.getPointerOperand(); SmallString<16> AddrName = Addr->getName(); AddrName += ".repack"; auto *IdxType = Type::getInt32Ty(ST->getContext()); auto *Zero = ConstantInt::get(IdxType, 0); for (unsigned i = 0; i < Count; i++) { Value *Indices[2] = { Zero, ConstantInt::get(IdxType, i), }; auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName); auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); IC.Builder->CreateStore(Val, Ptr); } return true; } if (auto *AT = dyn_cast<ArrayType>(T)) { // If the array only have one element, we unpack. if (AT->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } } return false; }
void PropagateJuliaAddrspaces::visitStoreInst(StoreInst &SI) { unsigned AS = SI.getPointerAddressSpace(); if (!isSpecialAS(AS)) return; Value *Replacement = LiftPointer(SI.getPointerOperand(), SI.getValueOperand()->getType(), &SI); if (!Replacement) return; SI.setOperand(StoreInst::getPointerOperandIndex(), Replacement); }
/// \brief Combine stores to match the type of value being stored. /// /// The core idea here is that the memory does not have any intrinsic type and /// where we can we should match the type of a store to the type of value being /// stored. /// /// However, this routine must never change the width of a store or the number of /// stores as that would introduce a semantic change. This combine is expected to /// be a semantic no-op which just allows stores to more closely model the types /// of their incoming values. /// /// Currently, we also refuse to change the precise type used for an atomic or /// volatile store. This is debatable, and might be reasonable to change later. /// However, it is risky in case some backend or other part of LLVM is relying /// on the exact type stored to select appropriate atomic operations. /// /// \returns true if the store was successfully combined away. This indicates /// the caller must erase the store instruction. We have to let the caller erase /// the store instruction sas otherwise there is no way to signal whether it was /// combined or not: IC.EraseInstFromFunction returns a null pointer. static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) return false; Value *Ptr = SI.getPointerOperand(); Value *V = SI.getValueOperand(); unsigned AS = SI.getPointerAddressSpace(); SmallVector<std::pair<unsigned, MDNode *>, 8> MD; SI.getAllMetadata(MD); // Fold away bit casts of the stored value by storing the original type. if (auto *BC = dyn_cast<BitCastInst>(V)) { V = BC->getOperand(0); StoreInst *NewStore = IC.Builder->CreateAlignedStore( V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; MDNode *N = MDPair.second; // Note, essentially every kind of metadata should be preserved here! This // routine is supposed to clone a store instruction changing *only its // type*. The only metadata it makes sense to drop is metadata which is // invalidated when the pointer type changes. This should essentially // never be the case in LLVM, but we explicitly switch over only known // metadata to be conservatively correct. If you are adding metadata to // LLVM which pertains to stores, you almost certainly want to add it // here. switch (ID) { case LLVMContext::MD_dbg: case LLVMContext::MD_tbaa: case LLVMContext::MD_prof: case LLVMContext::MD_fpmath: case LLVMContext::MD_tbaa_struct: case LLVMContext::MD_alias_scope: case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: case LLVMContext::MD_nonnull: // All of these directly apply. NewStore->setMetadata(ID, N); break; case LLVMContext::MD_invariant_load: case LLVMContext::MD_range: break; } } return true; } // FIXME: We should also canonicalize loads of vectors when their elements are // cast to other types. return false; }
void GCInvariantVerifier::visitStoreInst(StoreInst &SI) { Type *VTy = SI.getValueOperand()->getType(); if (VTy->isPointerTy()) { /* We currently don't obey this for arguments. That's ok - they're externally rooted. */ if (!isa<Argument>(SI.getValueOperand())) { unsigned AS = cast<PointerType>(VTy)->getAddressSpace(); Check(AS != AddressSpace::CalleeRooted && AS != AddressSpace::Derived, "Illegal store of decayed value", &SI); } } VTy = SI.getPointerOperand()->getType(); if (VTy->isPointerTy()) { unsigned AS = cast<PointerType>(VTy)->getAddressSpace(); Check(AS != AddressSpace::CalleeRooted, "Illegal store to callee rooted value", &SI); } }
IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops) { unsigned Size; Type *SizeType; enum IRAccess::TypeKind Type; if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { SizeType = Load->getType(); Size = TD->getTypeStoreSize(SizeType); Type = IRAccess::READ; } else { StoreInst *Store = cast<StoreInst>(Inst); SizeType = Store->getValueOperand()->getType(); Size = TD->getTypeStoreSize(SizeType); Type = IRAccess::MUST_WRITE; } const SCEV *AccessFunction = SE->getSCEVAtScope(getPointerOperand(*Inst), L); const SCEVUnknown *BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); assert(BasePointer && "Could not find base pointer"); AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); auto AccItr = InsnToMemAcc.find(Inst); if (PollyDelinearize && AccItr != InsnToMemAcc.end()) return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, true, AccItr->second.DelinearizedSubscripts, AccItr->second.Shape->DelinearizedSizes); // Check if the access depends on a loop contained in a non-affine subregion. bool isVariantInNonAffineLoop = false; if (BoxedLoops) { SetVector<const Loop *> Loops; findLoops(AccessFunction, Loops); for (const Loop *L : Loops) if (BoxedLoops->count(L)) isVariantInNonAffineLoop = true; } bool IsAffine = !isVariantInNonAffineLoop && isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue()); SmallVector<const SCEV *, 4> Subscripts, Sizes; Subscripts.push_back(AccessFunction); Sizes.push_back(SE->getConstant(ZeroOffset->getType(), Size)); if (!IsAffine && Type == IRAccess::MUST_WRITE) Type = IRAccess::MAY_WRITE; return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, IsAffine, Subscripts, Sizes); }
// -- handle store instruction -- void UnsafeTypeCastingCheck::handleStoreInstruction (Instruction *inst) { StoreInst *sinst = dyn_cast<StoreInst>(inst); if (sinst == NULL) utccAbort("handleStoreInstruction cannot process with a non-store instruction"); Value *pt = sinst->getPointerOperand(); Value *vl = sinst->getValueOperand(); UTCC_TYPE ptt = queryPointedType(pt); UTCC_TYPE vlt = queryExprType(vl); setPointedType(pt, vlt); }
void MutationGen::genSTDStore(Instruction * inst, StringRef fname, int index){ StoreInst *st = cast<StoreInst>(inst); Type* t = st->getValueOperand()->getType(); if(isSupportedType(t)){ std::stringstream ss; ss<<"STD:"<<std::string(fname)<<":"<<index<< ":"<<inst->getOpcode() << ":"<<0<<'\n'; ofresult<<ss.str(); ofresult.flush(); muts_num++; } }
/// \brief Combine stores to match the type of value being stored. /// /// The core idea here is that the memory does not have any intrinsic type and /// where we can we should match the type of a store to the type of value being /// stored. /// /// However, this routine must never change the width of a store or the number of /// stores as that would introduce a semantic change. This combine is expected to /// be a semantic no-op which just allows stores to more closely model the types /// of their incoming values. /// /// Currently, we also refuse to change the precise type used for an atomic or /// volatile store. This is debatable, and might be reasonable to change later. /// However, it is risky in case some backend or other part of LLVM is relying /// on the exact type stored to select appropriate atomic operations. /// /// \returns true if the store was successfully combined away. This indicates /// the caller must erase the store instruction. We have to let the caller erase /// the store instruction as otherwise there is no way to signal whether it was /// combined or not: IC.EraseInstFromFunction returns a null pointer. static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. if (!SI.isSimple()) return false; Value *V = SI.getValueOperand(); // Fold away bit casts of the stored value by storing the original type. if (auto *BC = dyn_cast<BitCastInst>(V)) { V = BC->getOperand(0); combineStoreToNewValue(IC, SI, V); return true; } // FIXME: We should also canonicalize loads of vectors when their elements are // cast to other types. return false; }
void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB) { AccFuncSetType Functions; for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction &Inst = *I; if (isa<LoadInst>(&Inst) || isa<StoreInst>(&Inst)) { unsigned Size; enum IRAccess::TypeKind Type; if (LoadInst *Load = dyn_cast<LoadInst>(&Inst)) { Size = TD->getTypeStoreSize(Load->getType()); Type = IRAccess::READ; } else { StoreInst *Store = cast<StoreInst>(&Inst); Size = TD->getTypeStoreSize(Store->getValueOperand()->getType()); Type = IRAccess::WRITE; } const SCEV *AccessFunction = SE->getSCEV(getPointerOperand(Inst)); const SCEVUnknown *BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); assert(BasePointer && "Could not find base pointer"); AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); bool IsAffine = isAffineExpr(&R, AccessFunction, *SE, BasePointer->getValue()); Functions.push_back( std::make_pair(IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, IsAffine), &Inst)); } } if (Functions.empty()) return; AccFuncSetType &Accs = AccFuncMap[&BB]; Accs.insert(Accs.end(), Functions.begin(), Functions.end()); }
/// store {atomic|volatile} T %val, T* %ptr memory_order, align sizeof(T) /// becomes: /// call void @llvm.nacl.atomic.store.i<size>(%val, %ptr, memory_order) void AtomicVisitor::visitStoreInst(StoreInst &I) { return; // XXX EMSCRIPTEN if (I.isSimple()) return; PointerHelper<StoreInst> PH(*this, I); const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = findAtomicIntrinsic(I, Intrinsic::nacl_atomic_store, PH.PET); checkAlignment(I, I.getAlignment(), PH.BitSize / CHAR_BIT); Value *V = I.getValueOperand(); if (!V->getType()->isIntegerTy()) { // The store isn't of an integer type. We define atomics in terms of // integers, so bitcast the value to store to an integer of the // proper width. CastInst *Cast = createCast(I, V, Type::getIntNTy(C, PH.BitSize), V->getName() + ".cast"); Cast->setDebugLoc(I.getDebugLoc()); V = Cast; } checkSizeMatchesType(I, PH.BitSize, V->getType()); Value *Args[] = {V, PH.P, freezeMemoryOrder(I, I.getOrdering())}; replaceInstructionWithIntrinsicCall(I, Intrinsic, PH.OriginalPET, PH.PET, Args); }
void Executor::executeStore(Instruction *i, Value *addr) { if (DisabledSymbolicExeCurRun) { return; } assert(i && "Expecting an instruction!"); StoreInst *store = (StoreInst*) i; Value *val = store->getValueOperand(); // val is a constant value if (isa<ConstantInt>(val)) { // P = P - ptr PMAP->remove(addr); } // val in domain(S) else if (SMAP->contains(val)) { SymbolPtr Sval = SMAP->get(val); // P' = P[ptr->S(val)] PMAP->update(addr, Sval); } else { // S = S - val SMAP->remove(val); // P = P - ptr PMAP->remove(addr); } }
unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { if (!VTTI) return -1; switch (I->getOpcode()) { case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { return VTTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { ExtractElementInst * EEI = cast<ExtractElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return VTTI->getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return VTTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } default: // We don't have any information on this instruction. return -1; } }
/// Attempt to merge an objc_release with a store, load, and objc_retain to form /// an objc_storeStrong. An objc_storeStrong: /// /// objc_storeStrong(i8** %old_ptr, i8* new_value) /// /// is equivalent to the following IR sequence: /// /// ; Load old value. /// %old_value = load i8** %old_ptr (1) /// /// ; Increment the new value and then release the old value. This must occur /// ; in order in case old_value releases new_value in its destructor causing /// ; us to potentially have a dangling ptr. /// tail call i8* @objc_retain(i8* %new_value) (2) /// tail call void @objc_release(i8* %old_value) (3) /// /// ; Store the new_value into old_ptr /// store i8* %new_value, i8** %old_ptr (4) /// /// The safety of this optimization is based around the following /// considerations: /// /// 1. We are forming the store strong at the store. Thus to perform this /// optimization it must be safe to move the retain, load, and release to /// (4). /// 2. We need to make sure that any re-orderings of (1), (2), (3), (4) are /// safe. void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release, inst_iterator &Iter) { // See if we are releasing something that we just loaded. auto *Load = dyn_cast<LoadInst>(GetArgRCIdentityRoot(Release)); if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); if (Load->getParent() != BB) return; // First scan down the BB from Load, looking for a store of the RCIdentityRoot // of Load's StoreInst *Store = findSafeStoreForStoreStrongContraction(Load, Release, PA, AA); // If we fail, bail. if (!Store) return; // Then find what new_value's RCIdentity Root is. Value *New = GetRCIdentityRoot(Store->getValueOperand()); // Then walk up the BB and look for a retain on New without any intervening // instructions which conservatively might decrement ref counts. Instruction *Retain = findRetainForStoreStrongContraction(New, Store, Release, PA); // If we fail, bail. if (!Retain) return; Changed = true; ++NumStoreStrongs; DEBUG( llvm::dbgs() << " Contracting retain, release into objc_storeStrong.\n" << " Old:\n" << " Store: " << *Store << "\n" << " Release: " << *Release << "\n" << " Retain: " << *Retain << "\n" << " Load: " << *Load << "\n"); LLVMContext &C = Release->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *I8XX = PointerType::getUnqual(I8X); Value *Args[] = { Load->getPointerOperand(), New }; if (Args[0]->getType() != I8XX) Args[0] = new BitCastInst(Args[0], I8XX, "", Store); if (Args[1]->getType() != I8X) Args[1] = new BitCastInst(Args[1], I8X, "", Store); Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong); CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); // We can't set the tail flag yet, because we haven't yet determined // whether there are any escaping allocas. Remember this call, so that // we can set the tail flag once we know it's safe. StoreStrongCalls.insert(StoreStrong); DEBUG(llvm::dbgs() << " New Store Strong: " << *StoreStrong << "\n"); if (&*Iter == Store) ++Iter; Store->eraseFromParent(); Release->eraseFromParent(); EraseInstruction(Retain); if (Load->use_empty()) Load->eraseFromParent(); }
/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2 || !VectorType::isValidElementType(AllocaTy->getElementType())) { DEBUG(dbgs() << " Cannot convert type to vector\n"); return false; } std::map<GetElementPtrInst*, Value*> GEPVectorIdx; std::vector<Value*> WorkList; for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); continue; } Value *Index = GEPToVectorIndex(GEP); // If we can't compute a vector index from this GEP, then we can't // promote this alloca to vector. if (!Index) { DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); return false; } GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); } } VectorType *VectorTy = arrayTypeToVecType(AllocaTy); DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " << *VectorTy << '\n'); for (Value *V : WorkList) { Instruction *Inst = cast<Instruction>(V); IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); Inst->replaceAllUsesWith(ExtractElement); Inst->eraseFromParent(); break; } case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); StoreInst *SI = cast<StoreInst>(Inst); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); break; } case Instruction::BitCast: case Instruction::AddrSpaceCast: break; default: llvm_unreachable("Inconsistency in instructions promotable to vector"); } } return true; }
/// /// runOnFunction /// bool LongLongMemAccessLowering::runOnFunction(Function &F) { std::vector<StoreInst *> storeInstV; std::vector<LoadInst *> loadInstV; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI) { Instruction *inst = BI; if (StoreInst *storeInst = dyn_cast<StoreInst>(inst)) { if (!storeInst->getValueOperand()->getType()->isIntegerTy(64)) { continue; } assert(cast<PointerType>(storeInst->getPointerOperand()->getType()) ->getElementType()->isIntegerTy(64)); storeInstV.push_back(storeInst); } else if (LoadInst *loadInst = dyn_cast<LoadInst>(inst)) { if (!loadInst->getType()->isIntegerTy(64)) { continue; } assert(cast<PointerType>(loadInst->getPointerOperand()->getType()) ->getElementType()->isIntegerTy(64)); loadInstV.push_back(loadInst); } } } for (unsigned i = 0; i < storeInstV.size(); ++i) { StoreInst *inst = storeInstV.at(i); Value *storeVal = inst->getValueOperand(); Value *storePointer = inst->getPointerOperand(); // Insert new instructions BitCastInst *storeAddrLo = new BitCastInst(storePointer, Type::getInt32PtrTy(F.getContext()), "", inst); ConstantInt *offsetConst = ConstantInt::getSigned(Type::getInt32Ty(F.getContext()), 1); std::vector<Value *> gepIdxList(1, offsetConst); GetElementPtrInst *storeAddrHi = GetElementPtrInst::Create(storeAddrLo, gepIdxList, "", inst); TruncInst *storeValLo = new TruncInst(storeVal, Type::getInt32Ty(F.getContext()), "", inst); Value *aShrOffset = ConstantInt::getSigned(Type::getInt64Ty(F.getContext()), 32); BinaryOperator *storeValAShr = BinaryOperator::Create(Instruction::AShr, storeVal, aShrOffset, "", inst); TruncInst *storeValHi = new TruncInst(storeValAShr, Type::getInt32Ty(F.getContext()), "", inst); StoreInst *storeLo = new StoreInst(storeValLo, storeAddrLo, inst); StoreInst *storeHi = new StoreInst(storeValHi, storeAddrHi, inst); storeLo->setAlignment(4); storeHi->setAlignment(4); // Remove inst inst->eraseFromParent(); } for (unsigned i = 0; i < loadInstV.size(); ++i) { LoadInst *inst = loadInstV.at(i); Value *loadPointer = inst->getPointerOperand(); // Insert new instructions BitCastInst *loadAddrLo = new BitCastInst(loadPointer, Type::getInt32PtrTy(F.getContext()), "", inst); ConstantInt *offsetConst = ConstantInt::getSigned(Type::getInt32Ty(F.getContext()), 1); std::vector<Value *> gepIdxList(1, offsetConst); GetElementPtrInst *loadAddrHi = GetElementPtrInst::Create(loadAddrLo, gepIdxList, "", inst); LoadInst *loadLo = new LoadInst(loadAddrLo, "", inst); LoadInst *loadHi = new LoadInst(loadAddrHi, "", inst); ZExtInst *loadLoLL = new ZExtInst(loadLo, Type::getInt64Ty(F.getContext()), "", inst); ZExtInst *loadHiLL = new ZExtInst(loadHi, Type::getInt64Ty(F.getContext()), "", inst); Value *shlOffset = ConstantInt::getSigned(Type::getInt64Ty(F.getContext()), 32); BinaryOperator *loadHiLLShl = BinaryOperator::Create(Instruction::Shl, loadHiLL, shlOffset, "", inst); BinaryOperator *loadValue = BinaryOperator::Create(Instruction::Or, loadLoLL, loadHiLLShl, ""); // Replace inst with new "loaded" value, the old value is deleted ReplaceInstWithInst(inst, loadValue); } return true; // function is modified }
/// Attempt to merge an objc_release with a store, load, and objc_retain to form /// an objc_storeStrong. This can be a little tricky because the instructions /// don't always appear in order, and there may be unrelated intervening /// instructions. void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); if (Load->getParent() != BB) return; // Walk down to find the store and the release, which may be in either order. BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); StoreInst *Store = 0; bool SawRelease = false; for (; !Store || !SawRelease; ++I) { if (I == End) return; Instruction *Inst = I; if (Inst == Release) { SawRelease = true; continue; } InstructionClass Class = GetBasicInstructionClass(Inst); // Unrelated retains are harmless. if (IsRetain(Class)) continue; if (Store) { // The store is the point where we're going to put the objc_storeStrong, // so make sure there are no uses after it. if (CanUse(Inst, Load, PA, Class)) return; } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { // We are moving the load down to the store, so check for anything // else which writes to the memory between the load and the store. Store = dyn_cast<StoreInst>(Inst); if (!Store || !Store->isSimple()) return; if (Store->getPointerOperand() != Loc.Ptr) return; } } Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); // Walk up to find the retain. I = Store; BasicBlock::iterator Begin = BB->begin(); while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) --I; Instruction *Retain = I; if (GetBasicInstructionClass(Retain) != IC_Retain) return; if (GetObjCArg(Retain) != New) return; Changed = true; ++NumStoreStrongs; LLVMContext &C = Release->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *I8XX = PointerType::getUnqual(I8X); Value *Args[] = { Load->getPointerOperand(), New }; if (Args[0]->getType() != I8XX) Args[0] = new BitCastInst(Args[0], I8XX, "", Store); if (Args[1]->getType() != I8X) Args[1] = new BitCastInst(Args[1], I8X, "", Store); CallInst *StoreStrong = CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); // We can't set the tail flag yet, because we haven't yet determined // whether there are any escaping allocas. Remember this call, so that // we can set the tail flag once we know it's safe. StoreStrongCalls.insert(StoreStrong); if (&*Iter == Store) ++Iter; Store->eraseFromParent(); Release->eraseFromParent(); EraseInstruction(Retain); if (Load->use_empty()) Load->eraseFromParent(); }