/// CreateGlobalString - Make a new global variable with an initializer that /// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { Constant *StrConstant = ConstantDataArray::getString(Context, Str); Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::PrivateLinkage, StrConstant); GV->setName(Name); GV->setUnnamedAddr(true); return GV; }
GlobalVariable *GCOVProfiler::getEdgeStateValue() { GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred"); if (!GV) { GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false, GlobalValue::InternalLinkage, ConstantInt::get(Type::getInt32Ty(*Ctx), 0xffffffff), "__llvm_gcov_global_state_pred"); GV->setUnnamedAddr(true); } return GV; }
/// CreateGlobalString - Make a new global variable with an initializer that /// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name, unsigned AddressSpace) { Constant *StrConstant = ConstantDataArray::getString(Context, Str); Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::PrivateLinkage, StrConstant, Name, nullptr, GlobalVariable::NotThreadLocal, AddressSpace); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); return GV; }
static Constant *getGlobalString(LLVMContext &C, Module &M, const StringRef &str) { Constant *strArray = ConstantDataArray::getString(C, str); GlobalVariable *strVar = new GlobalVariable(M, strArray->getType(), true, GlobalValue::PrivateLinkage, strArray, ""); strVar->setUnnamedAddr(true); strVar->setAlignment(1); std::vector<Value *> params; params.push_back(ConstantInt::get(TypeBuilder<types::i<32>, true>::get(C), 0)); params.push_back(ConstantInt::get(TypeBuilder<types::i<32>, true>::get(C), 0)); return ConstantExpr::getInBoundsGetElementPtr(strVar, params); }
// All edges with successors that aren't branches are "complex", because it // requires complex logic to pick which counter to update. GlobalVariable *GCOVProfiler::buildEdgeLookupTable( Function *F, GlobalVariable *Counters, const UniqueVector<BasicBlock *> &Preds, const UniqueVector<BasicBlock *> &Succs) { // TODO: support invoke, threads. We rely on the fact that nothing can modify // the whole-Module pred edge# between the time we set it and the time we next // read it. Threads and invoke make this untrue. // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); ArrayType *EdgeTableTy = ArrayType::get( Int64PtrTy, Succs.size() * Preds.size()); Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; Constant *NullValue = Constant::getNullValue(Int64PtrTy); for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) EdgeTable[i] = NullValue; unsigned Edge = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { for (int i = 0; i != Successors; ++i) { BasicBlock *Succ = TI->getSuccessor(i); IRBuilder<> builder(Succ); Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); } } Edge += Successors; } ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size()); GlobalVariable *EdgeTableGV = new GlobalVariable( *M, EdgeTableTy, true, GlobalValue::InternalLinkage, ConstantArray::get(EdgeTableTy, V), "__llvm_gcda_edge_table"); EdgeTableGV->setUnnamedAddr(true); return EdgeTableGV; }
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); const Function &ContainingFunction = *I.getParent()->getParent(); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(true); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { // FIXME: What is this for? It doesn't make sense to promote arbitrary // function calls. If the call is to a defined function that can also be // promoted, we should be able to do this once that function is also // rewritten. std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
// FIXME: Should try to pick the most likely to be profitable allocas first. bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return false; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I, AS)) return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return false; } // Not likely to have sufficient local memory for promotion. if (!SufficientLDS) return false; const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); unsigned Align = I.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(I.getAllocatedType()); // FIXME: This computed padding is likely wrong since it depends on inverse // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory // could could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); NewSize += AllocSize; if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); return false; } CurrentLocalMemUsage = NewSize; std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(CI->getOperand(1))) CI->setOperand(1, ConstantPointerNull::get(NewTy)); continue; } // The operand's value should be corrected on its own and we don't want to // touch the users. if (isa<AddrSpaceCastInst>(V)) continue; Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); // Adjust the types of any constant operands. if (SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isa<ConstantPointerNull>(SI->getOperand(1))) SI->setOperand(1, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(SI->getOperand(2))) SI->setOperand(2, ConstantPointerNull::get(NewTy)); } else if (PHINode *Phi = dyn_cast<PHINode>(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { if (isa<ConstantPointerNull>(Phi->getIncomingValue(I))) Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); } } continue; } IntrinsicInst *Intr = cast<IntrinsicInst>(Call); Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(), MemCpy->getRawSource(), MemCpy->getSourceAlignment(), MemCpy->getLength(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(), MemMove->getRawSource(), MemMove->getSourceAlignment(), MemMove->getLength(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)}); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->print(errs()); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } return true; }
bool ConstantMerge::runOnModule(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); // Map unique <constants, has-unknown-alignment> pairs to globals. We don't // want to merge globals of unknown alignment with those of explicit // alignment. If we have TargetData, we always know the alignment. DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; bool MadeChange = false; // Iterate constant merging while we are still making progress. Merging two // constants together may allow us to merge other constants together if the // second level constants have initializers which point to the globals that // were just merged. while (1) { // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; // If this GV is dead, remove it. GV->removeDeadConstantUsers(); if (GV->use_empty() && GV->hasLocalLinkage()) { GV->eraseFromParent(); continue; } // Only process constants with initializers in the default address space. if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || GV->getType()->getAddressSpace() != 0 || GV->hasSection() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV)) continue; // This transformation is legal for weak ODR globals in the sense it // doesn't change semantics, but we really don't want to perform it // anyway; it's likely to pessimize code generation, and some tools // (like the Darwin linker in cases involving CFString) don't expect it. if (GV->isWeakForLinker()) continue; Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); GlobalVariable *&Slot = CMap[Pair]; // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible // it cannot be replace, but can be the canonical constant we merge with. if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) Slot = GV; } // Second: identify all globals that can be merged together, filling in // the Replacements vector. We cannot do the replacement in this pass // because doing so may cause initializers of other globals to be rewritten, // invalidating the Constant* pointers in CMap. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; // Only process constants with initializers in the default address space. if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || GV->getType()->getAddressSpace() != 0 || GV->hasSection() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV)) continue; // We can only replace constant with local linkage. if (!GV->hasLocalLinkage()) continue; Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); GlobalVariable *Slot = CMap[Pair]; if (!Slot || Slot == GV) continue; if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr()) continue; if (!GV->hasUnnamedAddr()) Slot->setUnnamedAddr(false); // Make all uses of the duplicate constant use the canonical version. Replacements.push_back(std::make_pair(GV, Slot)); } if (Replacements.empty()) return MadeChange; CMap.clear(); // Now that we have figured out which replacements must be made, do them all // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { // Bump the alignment if necessary. if (Replacements[i].first->getAlignment() || Replacements[i].second->getAlignment()) { Replacements[i].second->setAlignment(std::max( Replacements[i].first->getAlignment(), Replacements[i].second->getAlignment())); } // Eliminate any uses of the dead global. Replacements[i].first->replaceAllUsesWith(Replacements[i].second); // Delete the global value from the module. assert(Replacements[i].first->hasLocalLinkage() && "Refusing to delete an externally visible global variable."); Replacements[i].first->eraseFromParent(); } NumMerged += Replacements.size(); Replacements.clear(); } }