void Closure::unpack_struct(Scope<Value *> &dst, Value *src, IRBuilder<> *builder) { // src should be a pointer to a struct of the type returned by build_type int idx = 0; LLVMContext &context = builder->getContext(); vector<string> nm = names(); for (size_t i = 0; i < nm.size(); i++) { Value *ptr = builder->CreateConstInBoundsGEP2_32(src, 0, idx++); LoadInst *load = builder->CreateLoad(ptr); if (load->getType()->isPointerTy()) { // Give it a unique type so that tbaa tells llvm that this can't alias anything load->setMetadata("tbaa", MDNode::get(context, vec<Value *>(MDString::get(context, nm[i])))); } dst.push(nm[i], load); load->setName(nm[i]); } }
/// check if value in memory at `memAddr` was changed when accessing `val`, store result into flag void RedoBBBuilder::insertCheck(Value *val, Value *memAddr, Value* flag) { for (auto it = val->use_begin(), ite = val->use_end(); it != ite; it++) { if (StoreInst *SI = dyn_cast<StoreInst>(*it)) { // skip those not in current top loop if (!isCurrentTopLoop(*SI)) { continue; } // skip instruction we don't interested in if (!shouldCheck(*SI)) { continue; } // if we have checked this store for this memAddr CmpInst *chkRes = 0; if (StoreToCheckMap.count(SI)) { for (auto pair : StoreToCheckMap[SI]) { if (pair.first == memAddr) { chkRes = pair.second; DEBUG(dbgs() << " Found existing check '" << chkRes->getName() << "' for (" << *SI << " ) in " << SI->getParent()->getName() << "\n"); } } } if (!chkRes) { // check if before and after the store, the memore address changed // // %orig = load %memAddr // the STORE we checking // %modified = load %memAddr // %cmp = icmp ne, %orig, %modified LoadInst *orig = new LoadInst(memAddr, "", SI); Instruction *next = SI->getNextNode(); LoadInst *modified = new LoadInst(memAddr, "", next); chkRes = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, orig, modified, "chk", next); CheckingInstrs.insert(orig); CheckingInstrs.insert(modified); CheckingInstrs.insert(chkRes); // set consitant name orig->setName(chkRes->getName() + ".orig"); modified->setName(chkRes->getName() + ".mod"); StoreToCheckMap[SI].push_back({memAddr, chkRes}); DEBUG(dbgs() << " Inserted check '" << chkRes->getName() << "' for (" << *SI << " ) in " << SI->getParent()->getName() << "\n"); } // if we have stored the check result to the flag Check pair = {memAddr, chkRes}; for (auto f : CheckToFlagMap[pair]) { if (f == flag) { DEBUG(dbgs() << " Existing flag store found\n"); return; } } DEBUG(dbgs() << " Check result stored to " << flag->getName() << "\n"); // merge old value and new value with or // // %oldflgval = load %flag // %newflgval = or %oldflgval, %chkRes // store %newflgval, %flag // the next instr after STORE we checking Instruction *next = chkRes->getNextNode(); LoadInst *oldflgval = new LoadInst(flag, flag->getName() + ".oldval", next); auto *newflgval = BinaryOperator::Create(Instruction::Or, oldflgval, chkRes, flag->getName() + ".newval", next); StoreInst *st = new StoreInst(newflgval, flag, next); CheckingInstrs.insert(oldflgval); CheckingInstrs.insert(newflgval); CheckingInstrs.insert(st); CheckToFlagMap[pair].push_back(flag); } } }
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) return false; auto &TPC = getAnalysis<TargetPassConfig>(); const TargetMachine &TM = TPC.getTM<TargetMachine>(); const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); LLVMContext &Ctx = F.getParent()->getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&*EntryBlock.begin()); const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); unsigned MaxAlign; // FIXME: Alignment is broken broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) return false; CallInst *KernArgSegment = Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {}, nullptr, F.getName() + ".kernarg.segment"); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize)); unsigned AS = KernArgSegment->getType()->getPointerAddressSpace(); uint64_t ExplicitArgOffset = 0; for (Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); unsigned Align = DL.getABITypeAlignment(ArgTy); unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; if (Arg.use_empty()) continue; if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) { // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing // modes on SI to know the high bits are 0 so pointer adds don't wrap. We // can't represent this with range metadata because it's only allowed for // integer types. if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) continue; // FIXME: We can replace this with equivalent alias.scope/noalias // metadata, but this appears to be a lot of work. if (Arg.hasNoAliasAttr()) continue; } VectorType *VT = dyn_cast<VectorType>(ArgTy); bool IsV3 = VT && VT->getNumElements() == 3; bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType(); VectorType *V4Ty = nullptr; int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, KernArgBaseAlign); Value *ArgPtr; Type *AdjustedArgTy; if (DoShiftOpt) { // FIXME: Handle aggregate types // Since we don't have sub-dword scalar loads, avoid doing an extload by // loading earlier than the argument address, and extracting the relevant // bits. // // Additionally widen any sub-dword load to i32 even if suitably aligned, // so that CSE between different argument loads works easily. ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, AlignDownOffset, Arg.getName() + ".kernarg.offset.align.down"); AdjustedArgTy = Builder.getInt32Ty(); } else { ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, EltOffset, Arg.getName() + ".kernarg.offset"); AdjustedArgTy = ArgTy; } if (IsV3 && Size >= 32) { V4Ty = VectorType::get(VT->getVectorElementType(), 4); // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads AdjustedArgTy = V4Ty; } ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); LoadInst *Load = Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); if (isa<PointerType>(ArgTy)) { if (Arg.hasNonNullAttr()) Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {})); uint64_t DerefBytes = Arg.getDereferenceableBytes(); if (DerefBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable, MDNode::get(Ctx, MDB.createConstant( ConstantInt::get(Builder.getInt64Ty(), DerefBytes)))); } uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes(); if (DerefOrNullBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable_or_null, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), DerefOrNullBytes)))); } unsigned ParamAlign = Arg.getParamAlignment(); if (ParamAlign != 0) { Load->setMetadata( LLVMContext::MD_align, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), ParamAlign)))); } } // TODO: Convert noalias arg to !noalias if (DoShiftOpt) { Value *ExtractBits = OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8); IntegerType *ArgIntTy = Builder.getIntNTy(Size); Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy); Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy, Arg.getName() + ".load"); Arg.replaceAllUsesWith(NewVal); } else if (IsV3) { Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty), {0, 1, 2}, Arg.getName() + ".load"); Arg.replaceAllUsesWith(Shuf); } else { Load->setName(Arg.getName() + ".load"); Arg.replaceAllUsesWith(Load); } } KernArgSegment->addAttribute( AttributeList::ReturnIndex, Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); return true; }
void Closure::unpack_struct(Scope<Value *> &dst, llvm::Type * #if LLVM_VERSION >= 37 type #endif , Value *src, IRBuilder<> *builder) { // type, type of src should be a pointer to a struct of the type returned by build_type int idx = 0; LLVMContext &context = builder->getContext(); vector<string> nm = names(); for (size_t i = 0; i < nm.size(); i++) { #if LLVM_VERSION >= 37 Value *ptr = builder->CreateConstInBoundsGEP2_32(type, src, 0, idx++); #else Value *ptr = builder->CreateConstInBoundsGEP2_32(src, 0, idx++); #endif LoadInst *load = builder->CreateLoad(ptr); if (load->getType()->isPointerTy()) { // Give it a unique type so that tbaa tells llvm that this can't alias anything LLVMMDNodeArgumentType md_args[] = {MDString::get(context, nm[i])}; load->setMetadata("tbaa", MDNode::get(context, md_args)); } dst.push(nm[i], load); load->setName(nm[i]); } }