bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const { Type *Ty = I.getType(); const DataLayout &DL = Mod->getDataLayout(); int TySize = DL.getTypeSizeInBits(Ty); unsigned Align = I.getAlignment() ? I.getAlignment() : DL.getABITypeAlignment(Ty); return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I); }
/// %res = load {atomic|volatile} T* %ptr memory_order, align sizeof(T) /// becomes: /// %res = call T @llvm.nacl.atomic.load.i<size>(%ptr, memory_order) void AtomicVisitor::visitLoadInst(LoadInst &I) { return; // XXX EMSCRIPTEN if (I.isSimple()) return; PointerHelper<LoadInst> PH(*this, I); const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic = findAtomicIntrinsic(I, Intrinsic::nacl_atomic_load, PH.PET); checkAlignment(I, I.getAlignment(), PH.BitSize / CHAR_BIT); Value *Args[] = {PH.P, freezeMemoryOrder(I, I.getOrdering())}; replaceInstructionWithIntrinsicCall(I, Intrinsic, PH.OriginalPET, PH.PET, Args); }
bool CallAnalyzer::visitLoad(LoadInst &I) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { if (I.isSimple()) { accumulateSROACost(CostIt, InlineConstants::InstrCost); return true; } disableSROA(CostIt); } return false; }
bool IRTranslator::translateLoad(const LoadInst &LI) { assert(LI.isSimple() && "only simple loads are supported at the moment"); MachineFunction &MF = MIRBuilder.getMF(); unsigned Res = getOrCreateVReg(LI); unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); LLT VTy{*LI.getType()}, PTy{*LI.getPointerOperand()->getType()}; MIRBuilder.buildLoad( VTy, PTy, Res, Addr, *MF.getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), MachineMemOperand::MOLoad, VTy.getSizeInBits() / 8, getMemOpAlignment(LI))); return true; }
/// \brief Check loop instructions safe for Loop versioning. /// It returns true if it's safe else returns false. /// Consider following: /// 1) Check all load store in loop body are non atomic & non volatile. /// 2) Check function call safety, by ensuring its not accessing memory. /// 3) Loop body shouldn't have any may throw instruction. bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { assert(I != nullptr && "Null instruction found!"); // Check function call safety if (isa<CallInst>(I) && !AA->doesNotAccessMemory(CallSite(I))) { DEBUG(dbgs() << " Unsafe call site found.\n"); return false; } // Avoid loops with possiblity of throw if (I->mayThrow()) { DEBUG(dbgs() << " May throw instruction found in loop body\n"); return false; } // If current instruction is load instructions // make sure it's a simple load (non atomic & non volatile) if (I->mayReadFromMemory()) { LoadInst *Ld = dyn_cast<LoadInst>(I); if (!Ld || !Ld->isSimple()) { DEBUG(dbgs() << " Found a non-simple load.\n"); return false; } LoadAndStoreCounter++; collectStridedAccess(Ld); Value *Ptr = Ld->getPointerOperand(); // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; } // If current instruction is store instruction // make sure it's a simple store (non atomic & non volatile) else if (I->mayWriteToMemory()) { StoreInst *St = dyn_cast<StoreInst>(I); if (!St || !St->isSimple()) { DEBUG(dbgs() << " Found a non-simple store.\n"); return false; } LoadAndStoreCounter++; collectStridedAccess(St); Value *Ptr = St->getPointerOperand(); // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; IsReadOnlyLoop = false; } return true; }
bool Scalarizer::visitLoadInst(LoadInst &LI) { if (!ScalarizeLoadStore) return false; if (!LI.isSimple()) return false; VectorLayout Layout; if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout)) return false; unsigned NumElems = Layout.VecTy->getNumElements(); IRBuilder<> Builder(LI.getParent(), &LI); Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); ValueVector Res; Res.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I), LI.getName() + ".i" + Twine(I)); gather(&LI, Res); return true; }
/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }
/// \brief Compute the reduction possible for a given instruction if we are able /// to SROA an alloca. /// /// The reduction for this instruction is added to the SROAReduction output /// parameter. Returns false if this instruction is expected to defeat SROA in /// general. static bool countCodeReductionForSROAInst(Instruction *I, SmallVectorImpl<Value *> &Worklist, unsigned &SROAReduction) { if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isSimple()) return false; SROAReduction += InlineConstants::InstrCost; return true; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (!SI->isSimple()) return false; SROAReduction += InlineConstants::InstrCost; return true; } if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { // If the GEP has variable indices, we won't be able to do much with it. if (!GEP->hasAllConstantIndices()) return false; // A non-zero GEP will likely become a mask operation after SROA. if (GEP->hasAllZeroIndices()) SROAReduction += InlineConstants::InstrCost; Worklist.push_back(GEP); return true; } if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { // Track pointer through bitcasts. Worklist.push_back(BCI); SROAReduction += InlineConstants::InstrCost; return true; } // We just look for non-constant operands to ICmp instructions as those will // defeat SROA. The actual reduction for these happens even without SROA. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) return isa<Constant>(ICI->getOperand(1)); if (SelectInst *SI = dyn_cast<SelectInst>(I)) { // SROA can handle a select of alloca iff all uses of the alloca are // loads, and dereferenceable. We assume it's dereferenceable since // we're told the input is an alloca. for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); if (LI == 0 || !LI->isSimple()) return false; } // We don't know whether we'll be deleting the rest of the chain of // instructions from the SelectInst on, because we don't know whether // the other side of the select is also an alloca or not. return true; } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: return false; case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // SROA can usually chew through these intrinsics. SROAReduction += InlineConstants::InstrCost; return true; } } // If there is some other strange instruction, we're not going to be // able to do much if we inline this. return false; }
/// Attempt to merge an objc_release with a store, load, and objc_retain to form /// an objc_storeStrong. This can be a little tricky because the instructions /// don't always appear in order, and there may be unrelated intervening /// instructions. void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); if (Load->getParent() != BB) return; // Walk down to find the store and the release, which may be in either order. BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); StoreInst *Store = 0; bool SawRelease = false; for (; !Store || !SawRelease; ++I) { if (I == End) return; Instruction *Inst = I; if (Inst == Release) { SawRelease = true; continue; } InstructionClass Class = GetBasicInstructionClass(Inst); // Unrelated retains are harmless. if (IsRetain(Class)) continue; if (Store) { // The store is the point where we're going to put the objc_storeStrong, // so make sure there are no uses after it. if (CanUse(Inst, Load, PA, Class)) return; } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { // We are moving the load down to the store, so check for anything // else which writes to the memory between the load and the store. Store = dyn_cast<StoreInst>(Inst); if (!Store || !Store->isSimple()) return; if (Store->getPointerOperand() != Loc.Ptr) return; } } Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); // Walk up to find the retain. I = Store; BasicBlock::iterator Begin = BB->begin(); while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) --I; Instruction *Retain = I; if (GetBasicInstructionClass(Retain) != IC_Retain) return; if (GetObjCArg(Retain) != New) return; Changed = true; ++NumStoreStrongs; LLVMContext &C = Release->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *I8XX = PointerType::getUnqual(I8X); Value *Args[] = { Load->getPointerOperand(), New }; if (Args[0]->getType() != I8XX) Args[0] = new BitCastInst(Args[0], I8XX, "", Store); if (Args[1]->getType() != I8X) Args[1] = new BitCastInst(Args[1], I8X, "", Store); CallInst *StoreStrong = CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); // We can't set the tail flag yet, because we haven't yet determined // whether there are any escaping allocas. Remember this call, so that // we can set the tail flag once we know it's safe. StoreStrongCalls.insert(StoreStrong); if (&*Iter == Store) ++Iter; Store->eraseFromParent(); Release->eraseFromParent(); EraseInstruction(Retain); if (Load->use_empty()) Load->eraseFromParent(); }
bool runOnFunction(Function &F) override { AliasAnalysis AA = getAnalysis<AliasAnalysis>(); DependenceAnalysis *DA = &(getAnalysis<DependenceAnalysis>()); // iterate over basic blocks Function *func = &F; unsigned bb_num = 0; for (Function::iterator BB = func->begin(), BE = func->end(); BB != BE; ++BB) { errs() << "BB-" << bb_num << "\n"; bb_num++; // iterator over instructions unsigned inst_num = 0; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) { Instruction *Ins = dyn_cast<Instruction>(I); if (!Ins) return false; LoadInst *Ld = dyn_cast<LoadInst>(I); StoreInst *St = dyn_cast<StoreInst>(I); if (!St && !Ld) continue; if (Ld && !Ld->isSimple()) return false; if (St && !St->isSimple()) return false; inst_num++; MemInstr.push_back(&*I); errs() << "MemInst-" << inst_num << ":" << *I << "\n"; } ValueVector::iterator I, IE, J, JE; for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { for (J = I, JE = MemInstr.end(); J != JE; ++J) { std::vector<char> Dep; Instruction *Src = dyn_cast<Instruction>(*I); Instruction *Des = dyn_cast<Instruction>(*J); if (Src == Des) continue; if (isa<LoadInst>(Src) && isa<LoadInst>(Des)) continue; if (auto D = DA->depends(Src, Des, true)) { errs() << "Found Dependency between:\nSrc:" << *Src << "\nDes:" << *Des << "\n"; if (D->isFlow()) { errs () << "Flow dependence not handled"; return false; } if (D->isAnti()) { errs() << "Found Anti dependence \n"; AliasAnalysis::AliasResult AA_dep = AA.alias(Src, Des); AliasAnalysis::AliasResult AA_dep_1 = AA.alias(Des, Src); errs() << "The Ld->St alias result is " << AA_dep << "\n"; errs() << "The St->Ld alias result is " << AA_dep_1 << "\n"; unsigned Levels = D->getLevels(); errs() << "levels = " << Levels << "\n"; char Direction; for (unsigned II = 1; II <= Levels; ++II) { const SCEV *Distance = D->getDistance(II); const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance); if (SCEVConst) { const ConstantInt *CI = SCEVConst->getValue(); //int64_t it_dist = CI->getUniqueInteger().getSExtValue(); //int it_dist = CI->getUniqueInteger().getSExtValue(); unsigned it_dist = abs(CI->getUniqueInteger().getSExtValue()); errs() << "distance is not null\n"; //errs() << "distance = "<< *CI << "\n"; errs() << "distance = "<< it_dist << "\n"; if (CI->isNegative()) Direction = '<'; else if (CI->isZero()) Direction = '='; else Direction = '>'; Dep.push_back(Direction); } else if (D->isScalar(II)) { Direction = 'S'; Dep.push_back(Direction); } else { unsigned Dir = D->getDirection(II); if (Dir == Dependence::DVEntry::LT || Dir == Dependence::DVEntry::LE) Direction = '<'; else if (Dir == Dependence::DVEntry::GT || Dir == Dependence::DVEntry::GE) Direction = '>'; else if (Dir == Dependence::DVEntry::EQ) Direction = '='; else Direction = '*'; Dep.push_back(Direction); } } } } } } } errs() << "------Hello World!--------\n"; return false; }