bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { // Merge is safe for "normal" internal globals only if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) continue; PointerType *PT = dyn_cast<PointerType>(I->getType()); assert(PT && "Global variable is not a pointer!"); unsigned AddressSpace = PT->getAddressSpace(); // Ignore fancy-aligned globals for now. unsigned Alignment = TD->getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); if (Alignment > TD->getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. if (I->getName().startswith("llvm.") || I->getName().startswith(".llvm.")) continue; if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) BSSGlobals[AddressSpace].push_back(I); else if (I->isConstant()) ConstGlobals[AddressSpace].push_back(I); else Globals[AddressSpace].push_back(I); } } for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator I = Globals.begin(), E = Globals.end(); I != E; ++I) if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); // FIXME: This currently breaks the EH processing due to way how the // typeinfo detection works. We might want to detect the TIs and ignore // them in the future. // if (ConstGlobals.size() > 1) // Changed |= doMerge(ConstGlobals, M, true); return Changed; }
/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, const DataLayout *DL) { User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); PointerType *DestTy = cast<PointerType>(CI->getType()); Type *DestPTy = DestTy->getElementType(); if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) return 0; Type *SrcPTy = SrcTy->getElementType(); if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Type *IdxTy = DL ? DL->getIntPtrType(SrcTy) : Type::getInt64Ty(SrcTy->getContext()); Value *Idx = Constant::getNullValue(IdxTy); Value *Idxs[2] = { Idx, Idx }; CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } if (IC.getDataLayout() && (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. (SrcPTy->isPtrOrPtrVectorTy() == LI.getType()->isPtrOrPtrVectorTy()) && IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } } } return 0; }
bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { if (!TM || skipFunction(F)) return false; FunctionType *FTy = F.getFunctionType(); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast<PointerType>(ParamTy); if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { LocalMemAvailable = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); return false; } } const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); LocalMemAvailable = ST.getLocalMemorySize(); if (LocalMemAvailable == 0) return false; // Check how much local memory is being used by global objects for (GlobalVariable &GV : Mod->globals()) { if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) continue; for (User *U : GV.users()) { Instruction *Use = dyn_cast<Instruction>(U); if (!Use) continue; if (Use->getParent()->getParent() == &F) { LocalMemAvailable -= Mod->getDataLayout().getTypeAllocSize(GV.getValueType()); break; } } } LocalMemAvailable = std::max(0, LocalMemAvailable); DEBUG(dbgs() << LocalMemAvailable << " bytes free in local memory.\n"); BasicBlock &EntryBB = *F.begin(); for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I); ++I; if (AI) handleAlloca(*AI); } return true; }
// Decides whether V is an addrspacecast and shortcutting V in load/store is // valid and beneficial. static bool isEliminableAddrSpaceCast(Value *V) { // Returns false if V is not even an addrspacecast. Operator *Cast = dyn_cast<Operator>(V); if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) return false; Value *Src = Cast->getOperand(0); PointerType *SrcTy = cast<PointerType>(Src->getType()); PointerType *DestTy = cast<PointerType>(Cast->getType()); // TODO: For now, we only handle the case where the addrspacecast only changes // the address space but not the type. If the type also changes, we could // still get rid of the addrspacecast by adding an extra bitcast, but we // rarely see such scenarios. if (SrcTy->getElementType() != DestTy->getElementType()) return false; // Checks whether the addrspacecast is from a non-generic address space to the // generic address space. return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); }
Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) { PointerType *PT = cast<PointerType>(Ptr->getType()); if (PT->getElementType()->isIntegerTy(8)) return Ptr; // Otherwise, we need to insert a bitcast. PT = getInt8PtrTy(PT->getAddressSpace()); BitCastInst *BCI = new BitCastInst(Ptr, PT, ""); BB->getInstList().insert(InsertPt, BCI); SetInstDebugLocation(BCI); return BCI; }
bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const { const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs; PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType()); // TODO: It might be useful for any out arguments, not just privates. if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() && !AnyAddressSpace) || Arg.hasByValAttr() || Arg.hasStructRetAttr() || DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) { return false; } return checkArgumentUses(Arg); }
bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { const FunctionType *FTy = F.getFunctionType(); LocalMemAvailable = ST.getLocalMemorySize(); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so // we cannot use local memory in the pass. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { const Type *ParamTy = FTy->getParamType(i); if (ParamTy->isPointerTy() && ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { LocalMemAvailable = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); break; } } if (LocalMemAvailable > 0) { // Check how much local memory is being used by global objects for (Module::global_iterator I = Mod->global_begin(), E = Mod->global_end(); I != E; ++I) { GlobalVariable *GV = I; PointerType *GVTy = GV->getType(); if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) continue; for (Value::use_iterator U = GV->use_begin(), UE = GV->use_end(); U != UE; ++U) { Instruction *Use = dyn_cast<Instruction>(*U); if (!Use) continue; if (Use->getParent()->getParent() == &F) LocalMemAvailable -= Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType()); } } } LocalMemAvailable = std::max(0, LocalMemAvailable); DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n"); visit(F); return false; }
bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { if (!TM || F.hasFnAttribute(Attribute::OptimizeNone)) return false; FunctionType *FTy = F.getFunctionType(); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast<PointerType>(ParamTy); if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { LocalMemAvailable = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); return false; } } const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); LocalMemAvailable = ST.getLocalMemorySize(); if (LocalMemAvailable == 0) return false; // Check how much local memory is being used by global objects for (GlobalVariable &GV : Mod->globals()) { if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) continue; for (Use &U : GV.uses()) { Instruction *Use = dyn_cast<Instruction>(U); if (!Use) continue; if (Use->getParent()->getParent() == &F) LocalMemAvailable -= Mod->getDataLayout().getTypeAllocSize(GV.getValueType()); } } LocalMemAvailable = std::max(0, LocalMemAvailable); DEBUG(dbgs() << LocalMemAvailable << " bytes free in local memory.\n"); visit(F); return true; }
Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, IRBuilder<> &Builder) { PointerType *GVType = GV->getType(); Value *CVTA = nullptr; // See if the address space conversion requires the operand to be bitcast // to i8 addrspace(n)* first. EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true); if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) { // A bitcast to i8 addrspace(n)* on the operand is needed. LLVMContext &Context = M->getContext(); unsigned int AddrSpace = GVType->getAddressSpace(); Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace); CVTA = Builder.CreateBitCast(GV, DestTy, "cvta"); // Insert the address space conversion. Type *ResultType = PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC); SmallVector<Type *, 2> ParamTypes; ParamTypes.push_back(ResultType); ParamTypes.push_back(DestTy); Function *CVTAFunction = Intrinsic::getDeclaration( M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta"); // Another bitcast from i8 * to <the element type of GVType> * is // required. DestTy = PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC); CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta"); } else { // A simple CVTA is enough. SmallVector<Type *, 2> ParamTypes; ParamTypes.push_back(PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC)); ParamTypes.push_back(GVType); Function *CVTAFunction = Intrinsic::getDeclaration( M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta"); } return CVTA; }
static Value *getFieldAddress(IRBuilder<> &Builder, Value *Base, uint32_t Offset, Type *FieldTy) { // The base value should be an i8* or i8[]*. assert(Base->getType()->isPointerTy()); assert(Base->getType()->getPointerElementType()->isIntegerTy(8) || Base->getType() ->getPointerElementType() ->getArrayElementType() ->isIntegerTy(8)); Type *Int32Ty = Type::getInt32Ty(Builder.getContext()); Value *Indices[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, Offset)}; Value *Address = Builder.CreateInBoundsGEP(Base, Indices); PointerType *AddressTy = cast<PointerType>(Address->getType()); if (AddressTy->getElementType() != FieldTy) { AddressTy = PointerType::get(FieldTy, AddressTy->getAddressSpace()); Address = Builder.CreatePointerCast(Address, AddressTy); } return Address; }
bool GlobalMerge::doInitialization(Module &M) { if (!EnableGlobalMerge) return false; IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO(); auto &DL = M.getDataLayout(); DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals, BSSGlobals; bool Changed = false; setMustKeepGlobalVariables(M); // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection() || GV.hasImplicitSection()) continue; // It's not safe to merge globals that may be preempted if (TM && !TM->shouldAssumeDSOLocal(M, &GV)) continue; if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && !GV.hasInternalLinkage()) continue; PointerType *PT = dyn_cast<PointerType>(GV.getType()); assert(PT && "Global variable is not a pointer!"); unsigned AddressSpace = PT->getAddressSpace(); // Ignore fancy-aligned globals for now. unsigned Alignment = DL.getPreferredAlignment(&GV); Type *Ty = GV.getValueType(); if (Alignment > DL.getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. if (GV.getName().startswith("llvm.") || GV.getName().startswith(".llvm.")) continue; // Ignore all "required" globals: if (isMustKeepGlobalVariable(&GV)) continue; if (DL.getTypeAllocSize(Ty) < MaxOffset) { if (TM && TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) BSSGlobals[AddressSpace].push_back(&GV); else if (GV.isConstant()) ConstGlobals[AddressSpace].push_back(&GV); else Globals[AddressSpace].push_back(&GV); } } for (auto &P : Globals) if (P.second.size() > 1) Changed |= doMerge(P.second, M, false, P.first); for (auto &P : BSSGlobals) if (P.second.size() > 1) Changed |= doMerge(P.second, M, false, P.first); if (EnableGlobalMergeOnConst) for (auto &P : ConstGlobals) if (P.second.size() > 1) Changed |= doMerge(P.second, M, true, P.first); return Changed; }
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (skipFunction(F)) return false; // TODO: Could probably handle variadic functions. if (F.isVarArg() || F.hasStructRetAttr() || AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); unsigned ReturnNumRegs = 0; SmallSet<int, 4> OutArgIndexes; SmallVector<Type *, 4> ReturnTypes; Type *RetTy = F.getReturnType(); if (!RetTy->isVoidTy()) { ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4; if (ReturnNumRegs >= MaxNumRetRegs) return false; ReturnTypes.push_back(RetTy); } SmallVector<Argument *, 4> OutArgs; for (Argument &Arg : F.args()) { if (isOutArgumentCandidate(Arg)) { LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg << " in function " << F.getName() << '\n'); OutArgs.push_back(&Arg); } } if (OutArgs.empty()) return false; using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>; DenseMap<ReturnInst *, ReplacementVec> Replacements; SmallVector<ReturnInst *, 4> Returns; for (BasicBlock &BB : F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back())) Returns.push_back(RI); } if (Returns.empty()) return false; bool Changing; do { Changing = false; // Keep retrying if we are able to successfully eliminate an argument. This // helps with cases with multiple arguments which may alias, such as in a // sincos implemntation. If we have 2 stores to arguments, on the first // attempt the MDA query will succeed for the second store but not the // first. On the second iteration we've removed that out clobbering argument // (by effectively moving it into another function) and will find the second // argument is OK to move. for (Argument *OutArg : OutArgs) { bool ThisReplaceable = true; SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores; Type *ArgTy = OutArg->getType()->getPointerElementType(); // Skip this argument if converting it will push us over the register // count to return limit. // TODO: This is an approximation. When legalized this could be more. We // can ask TLI for exactly how many. unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4; if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs) continue; // An argument is convertible only if all exit blocks are able to replace // it. for (ReturnInst *RI : Returns) { BasicBlock *BB = RI->getParent(); MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg), true, BB->end(), BB, RI); StoreInst *SI = nullptr; if (Q.isDef()) SI = dyn_cast<StoreInst>(Q.getInst()); if (SI) { LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); ReplaceableStores.emplace_back(RI, SI); } else { ThisReplaceable = false; break; } } if (!ThisReplaceable) continue; // Try the next argument candidate. for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) { Value *ReplVal = Store.second->getValueOperand(); auto &ValVec = Replacements[Store.first]; if (llvm::find_if(ValVec, [OutArg](const std::pair<Argument *, Value *> &Entry) { return Entry.first == OutArg;}) != ValVec.end()) { LLVM_DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n'); // It is possible to see stores to the same argument multiple times, // but we expect these would have been optimized out already. ThisReplaceable = false; break; } ValVec.emplace_back(OutArg, ReplVal); Store.second->eraseFromParent(); } if (ThisReplaceable) { ReturnTypes.push_back(ArgTy); OutArgIndexes.insert(OutArg->getArgNo()); ++NumOutArgumentsReplaced; Changing = true; } } } while (Changing); if (Replacements.empty()) return false; LLVMContext &Ctx = F.getParent()->getContext(); StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); FunctionType *NewFuncTy = FunctionType::get(NewRetTy, F.getFunctionType()->params(), F.isVarArg()); LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, F.getName() + ".body"); F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc); NewFunc->copyAttributesFrom(&F); NewFunc->setComdat(F.getComdat()); // We want to preserve the function and param attributes, but need to strip // off any return attributes, e.g. zeroext doesn't make sense with a struct. NewFunc->stealArgumentListFrom(F); AttrBuilder RetAttrs; RetAttrs.addAttribute(Attribute::SExt); RetAttrs.addAttribute(Attribute::ZExt); RetAttrs.addAttribute(Attribute::NoAlias); NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs); // TODO: How to preserve metadata? // Move the body of the function into the new rewritten function, and replace // this function with a stub. NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList()); for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) { ReturnInst *RI = Replacement.first; IRBuilder<> B(RI); B.SetCurrentDebugLocation(RI->getDebugLoc()); int RetIdx = 0; Value *NewRetVal = UndefValue::get(NewRetTy); Value *RetVal = RI->getReturnValue(); if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) { Argument *Arg = ReturnPoint.first; Value *Val = ReturnPoint.second; Type *EltTy = Arg->getType()->getPointerElementType(); if (Val->getType() != EltTy) { Type *EffectiveEltTy = EltTy; if (StructType *CT = dyn_cast<StructType>(EltTy)) { assert(CT->getNumElements() == 1); EffectiveEltTy = CT->getElementType(0); } if (DL->getTypeSizeInBits(EffectiveEltTy) != DL->getTypeSizeInBits(Val->getType())) { assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType())); Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()), { 0, 1, 2 }); } Val = B.CreateBitCast(Val, EffectiveEltTy); // Re-create single element composite. if (EltTy != EffectiveEltTy) Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0); } NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++); } if (RetVal) RI->setOperand(0, NewRetVal); else { B.CreateRet(NewRetVal); RI->eraseFromParent(); } } SmallVector<Value *, 16> StubCallArgs; for (Argument &Arg : F.args()) { if (OutArgIndexes.count(Arg.getArgNo())) { // It's easier to preserve the type of the argument list. We rely on // DeadArgumentElimination to take care of these. StubCallArgs.push_back(UndefValue::get(Arg.getType())); } else { StubCallArgs.push_back(&Arg); } } BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F); IRBuilder<> B(StubBB); CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs); int RetIdx = RetTy->isVoidTy() ? 0 : 1; for (Argument &Arg : F.args()) { if (!OutArgIndexes.count(Arg.getArgNo())) continue; PointerType *ArgType = cast<PointerType>(Arg.getType()); auto *EltTy = ArgType->getElementType(); unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL->getABITypeAlignment(EltTy); Value *Val = B.CreateExtractValue(StubCall, RetIdx++); Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace()); // We can peek through bitcasts, so the type may not match. Value *PtrVal = B.CreateBitCast(&Arg, PtrTy); B.CreateAlignedStore(Val, PtrVal, Align); } if (!RetTy->isVoidTy()) { B.CreateRet(B.CreateExtractValue(StubCall, 0)); } else { B.CreateRetVoid(); } // The function is now a stub we want to inline. F.addFnAttr(Attribute::AlwaysInline); ++NumOutArgumentFunctionsReplaced; return true; }
bool GlobalMerge::doInitialization(Module &M) { if (!EnableGlobalMerge) return false; auto &DL = M.getDataLayout(); DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; bool Changed = false; setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { // Merge is safe for "normal" internal or external globals only if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) continue; if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && !I->hasInternalLinkage()) continue; PointerType *PT = dyn_cast<PointerType>(I->getType()); assert(PT && "Global variable is not a pointer!"); unsigned AddressSpace = PT->getAddressSpace(); // Ignore fancy-aligned globals for now. unsigned Alignment = DL.getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); if (Alignment > DL.getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. if (I->getName().startswith("llvm.") || I->getName().startswith(".llvm.")) continue; // Ignore all "required" globals: if (isMustKeepGlobalVariable(I)) continue; if (DL.getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) BSSGlobals[AddressSpace].push_back(I); else if (I->isConstant()) ConstGlobals[AddressSpace].push_back(I); else Globals[AddressSpace].push_back(I); } } for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator I = Globals.begin(), E = Globals.end(); I != E; ++I) if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); if (EnableGlobalMergeOnConst) for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) if (I->second.size() > 1) Changed |= doMerge(I->second, M, true, I->first); return Changed; }
bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { FunctionType *FTy = F.getFunctionType(); const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast<PointerType>(ParamTy); if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { LocalMemLimit = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); return false; } } LocalMemLimit = ST.getLocalMemorySize(); if (LocalMemLimit == 0) return false; const DataLayout &DL = Mod->getDataLayout(); // Check how much local memory is being used by global objects CurrentLocalMemUsage = 0; for (GlobalVariable &GV : Mod->globals()) { if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) continue; for (const User *U : GV.users()) { const Instruction *Use = dyn_cast<Instruction>(U); if (!Use) continue; if (Use->getParent()->getParent() == &F) { unsigned Align = GV.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(GV.getValueType()); // FIXME: Try to account for padding here. The padding is currently // determined from the inverse order of uses in the function. I'm not // sure if the use list order is in any way connected to this, so the // total reported size is likely incorrect. uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); CurrentLocalMemUsage += AllocSize; break; } } } unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, F); // Restrict local memory usage so that we don't drastically reduce occupancy, // unless it is already significantly reduced. // TODO: Have some sort of hint or other heuristics to guess occupancy based // on other factors.. unsigned OccupancyHint = ST.getWavesPerEU(F).second; if (OccupancyHint == 0) OccupancyHint = 7; // Clamp to max value. OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); // Check the hint but ignore it if it's obviously wrong from the existing LDS // usage. MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); // Round up to the next tier of usage. unsigned MaxSizeWithWaveCount = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); // Program is possibly broken by using more local mem than available. if (CurrentLocalMemUsage > MaxSizeWithWaveCount) return false; LocalMemLimit = MaxSizeWithWaveCount; DEBUG( dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" << " Rounding size to " << MaxSizeWithWaveCount << " with a maximum occupancy of " << MaxOccupancy << '\n' << " and " << (LocalMemLimit - CurrentLocalMemUsage) << " available for promotion\n" ); return true; }
/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P /// when possible. This makes it generally easy to do alias analysis and/or /// SROA/mem2reg of the memory object. static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { User *CI = cast<User>(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); Type *DestPTy = CI->getType()->getPointerElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (!SrcTy) return nullptr; Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return nullptr; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) /// on 32-bit hosts. SmallVector<Value*, 4> NewGEPIndices; // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); while (1) { if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ break; NewGEPIndices.push_back(Zero); SrcPTy = STy->getElementType(0); } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { NewGEPIndices.push_back(Zero); SrcPTy = ATy->getElementType(); } else { break; } } SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return nullptr; // If the pointers point into different address spaces don't do the // transformation. if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) return nullptr; // If the pointers point to values of different sizes don't do the // transformation. if (!IC.getDataLayout() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return nullptr; // If the pointers point to pointers to different address spaces don't do the // transformation. It is not safe to introduce an addrspacecast instruction in // this case since, depending on the target, addrspacecast may not be a no-op // cast. if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) return nullptr; // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before // the store, cast the value to be stored. Value *NewCast; Instruction::CastOps opcode = Instruction::BitCast; Type* CastSrcTy = DestPTy; Type* CastDstTy = SrcPTy; if (CastDstTy->isPointerTy()) { if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; } else if (CastDstTy->isIntegerTy()) { if (CastSrcTy->isPointerTy()) opcode = Instruction::PtrToInt; } // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); Value *SIOp0 = SI.getOperand(0); NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); SI.setOperand(0, NewCast); SI.setOperand(1, CastOp); return &SI; }
/// Constants comparison: /// 1. Check whether type of L constant could be losslessly bitcasted to R /// type. /// 2. Compare constant contents. /// For more details see declaration comments. int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) const { Type *TyL = L->getType(); Type *TyR = R->getType(); // Check whether types are bitcastable. This part is just re-factored // Type::canLosslesslyBitCastTo method, but instead of returning true/false, // we also pack into result which type is "less" for us. int TypesRes = cmpTypes(TyL, TyR); if (TypesRes != 0) { // Types are different, but check whether we can bitcast them. if (!TyL->isFirstClassType()) { if (TyR->isFirstClassType()) return -1; // Neither TyL nor TyR are values of first class type. Return the result // of comparing the types return TypesRes; } if (!TyR->isFirstClassType()) { if (TyL->isFirstClassType()) return 1; return TypesRes; } // Vector -> Vector conversions are always lossless if the two vector types // have the same size, otherwise not. unsigned TyLWidth = 0; unsigned TyRWidth = 0; if (auto *VecTyL = dyn_cast<VectorType>(TyL)) TyLWidth = VecTyL->getBitWidth(); if (auto *VecTyR = dyn_cast<VectorType>(TyR)) TyRWidth = VecTyR->getBitWidth(); if (TyLWidth != TyRWidth) return cmpNumbers(TyLWidth, TyRWidth); // Zero bit-width means neither TyL nor TyR are vectors. if (!TyLWidth) { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); if (PTyL && PTyR) { unsigned AddrSpaceL = PTyL->getAddressSpace(); unsigned AddrSpaceR = PTyR->getAddressSpace(); if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) return Res; } if (PTyL) return 1; if (PTyR) return -1; // TyL and TyR aren't vectors, nor pointers. We don't know how to // bitcast them. return TypesRes; } } // OK, types are bitcastable, now check constant contents. if (L->isNullValue() && R->isNullValue()) return TypesRes; if (L->isNullValue() && !R->isNullValue()) return 1; if (!L->isNullValue() && R->isNullValue()) return -1; auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L)); auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R)); if (GlobalValueL && GlobalValueR) { return cmpGlobalValues(GlobalValueL, GlobalValueR); } if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) return Res; if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { const auto *SeqR = cast<ConstantDataSequential>(R); // This handles ConstantDataArray and ConstantDataVector. Note that we // compare the two raw data arrays, which might differ depending on the host // endianness. This isn't a problem though, because the endiness of a module // will affect the order of the constants, but this order is the same // for a given input module and host platform. return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); } switch (L->getValueID()) { case Value::UndefValueVal: case Value::ConstantTokenNoneVal: return TypesRes; case Value::ConstantIntVal: { const APInt &LInt = cast<ConstantInt>(L)->getValue(); const APInt &RInt = cast<ConstantInt>(R)->getValue(); return cmpAPInts(LInt, RInt); } case Value::ConstantFPVal: { const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); return cmpAPFloats(LAPF, RAPF); } case Value::ConstantArrayVal: { const ConstantArray *LA = cast<ConstantArray>(L); const ConstantArray *RA = cast<ConstantArray>(R); uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); if (int Res = cmpNumbers(NumElementsL, NumElementsR)) return Res; for (uint64_t i = 0; i < NumElementsL; ++i) { if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), cast<Constant>(RA->getOperand(i)))) return Res; } return 0; } case Value::ConstantStructVal: { const ConstantStruct *LS = cast<ConstantStruct>(L); const ConstantStruct *RS = cast<ConstantStruct>(R); unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); if (int Res = cmpNumbers(NumElementsL, NumElementsR)) return Res; for (unsigned i = 0; i != NumElementsL; ++i) { if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), cast<Constant>(RS->getOperand(i)))) return Res; } return 0; } case Value::ConstantVectorVal: { const ConstantVector *LV = cast<ConstantVector>(L); const ConstantVector *RV = cast<ConstantVector>(R); unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); if (int Res = cmpNumbers(NumElementsL, NumElementsR)) return Res; for (uint64_t i = 0; i < NumElementsL; ++i) { if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), cast<Constant>(RV->getOperand(i)))) return Res; } return 0; } case Value::ConstantExprVal: { const ConstantExpr *LE = cast<ConstantExpr>(L); const ConstantExpr *RE = cast<ConstantExpr>(R); unsigned NumOperandsL = LE->getNumOperands(); unsigned NumOperandsR = RE->getNumOperands(); if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) return Res; for (unsigned i = 0; i < NumOperandsL; ++i) { if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), cast<Constant>(RE->getOperand(i)))) return Res; } return 0; } case Value::BlockAddressVal: { const BlockAddress *LBA = cast<BlockAddress>(L); const BlockAddress *RBA = cast<BlockAddress>(R); if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) return Res; if (LBA->getFunction() == RBA->getFunction()) { // They are BBs in the same function. Order by which comes first in the // BB order of the function. This order is deterministic. Function* F = LBA->getFunction(); BasicBlock *LBB = LBA->getBasicBlock(); BasicBlock *RBB = RBA->getBasicBlock(); if (LBB == RBB) return 0; for(BasicBlock &BB : F->getBasicBlockList()) { if (&BB == LBB) { assert(&BB != RBB); return -1; } if (&BB == RBB) return 1; } llvm_unreachable("Basic Block Address does not point to a basic block in " "its function."); return -1; } else { // cmpValues said the functions are the same. So because they aren't // literally the same pointer, they must respectively be the left and // right functions. assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); // cmpValues will tell us if these are equivalent BasicBlocks, in the // context of their respective functions. return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); } } default: // Unknown constant, abort. DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); llvm_unreachable("Constant ValueID not recognized."); return -1; } }
/// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); const DataLayout &DL = FnL->getParent()->getDataLayout(); if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL.getIntPtrType(TyL); if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL.getIntPtrType(TyR); if (TyL == TyR) return 0; if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) return Res; switch (TyL->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. LLVM_FALLTHROUGH; case Type::IntegerTyID: return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), cast<IntegerType>(TyR)->getBitWidth()); // TyL == TyR would have returned true earlier, because types are uniqued. case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: case Type::LabelTyID: case Type::MetadataTyID: case Type::TokenTyID: return 0; case Type::PointerTyID: { assert(PTyL && PTyR && "Both types must be pointers here."); return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); } case Type::StructTyID: { StructType *STyL = cast<StructType>(TyL); StructType *STyR = cast<StructType>(TyR); if (STyL->getNumElements() != STyR->getNumElements()) return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); if (STyL->isPacked() != STyR->isPacked()) return cmpNumbers(STyL->isPacked(), STyR->isPacked()); for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) return Res; } return 0; } case Type::FunctionTyID: { FunctionType *FTyL = cast<FunctionType>(TyL); FunctionType *FTyR = cast<FunctionType>(TyR); if (FTyL->getNumParams() != FTyR->getNumParams()) return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); if (FTyL->isVarArg() != FTyR->isVarArg()) return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) return Res; for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) return Res; } return 0; } case Type::ArrayTyID: case Type::VectorTyID: { auto *STyL = cast<SequentialType>(TyL); auto *STyR = cast<SequentialType>(TyR); if (STyL->getNumElements() != STyR->getNumElements()) return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); return cmpTypes(STyL->getElementType(), STyR->getElementType()); } } }
GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name, GlobalObject *Aliasee) { PointerType *PTy = Aliasee->getType(); return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name, Aliasee); }
/// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); if (DL) { if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); } if (TyL == TyR) return 0; if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) return Res; switch (TyL->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: case Type::VectorTyID: // TyL == TyR would have returned true earlier. return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: case Type::LabelTyID: case Type::MetadataTyID: return 0; case Type::PointerTyID: { assert(PTyL && PTyR && "Both types must be pointers here."); return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); } case Type::StructTyID: { StructType *STyL = cast<StructType>(TyL); StructType *STyR = cast<StructType>(TyR); if (STyL->getNumElements() != STyR->getNumElements()) return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); if (STyL->isPacked() != STyR->isPacked()) return cmpNumbers(STyL->isPacked(), STyR->isPacked()); for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { if (int Res = cmpType(STyL->getElementType(i), STyR->getElementType(i))) return Res; } return 0; } case Type::FunctionTyID: { FunctionType *FTyL = cast<FunctionType>(TyL); FunctionType *FTyR = cast<FunctionType>(TyR); if (FTyL->getNumParams() != FTyR->getNumParams()) return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); if (FTyL->isVarArg() != FTyR->isVarArg()) return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType())) return Res; for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i))) return Res; } return 0; } case Type::ArrayTyID: { ArrayType *ATyL = cast<ArrayType>(TyL); ArrayType *ATyR = cast<ArrayType>(TyR); if (ATyL->getNumElements() != ATyR->getNumElements()) return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); return cmpType(ATyL->getElementType(), ATyR->getElementType()); } } }
unsigned GlobalValue::getAddressSpace() const { PointerType *PtrTy = getType(); return PtrTy->getAddressSpace(); }