void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr, const CXXDeleteExpr *expr, QualType eltTy, llvm::Value *&numElements, llvm::Value *&allocPtr, CharUnits &cookieSize) { // Derive a char* in the same address space as the pointer. ptr = CGF.Builder.CreateElementBitCast(ptr, CGF.Int8Ty); // If we don't need an array cookie, bail out early. if (!requiresArrayCookie(expr, eltTy)) { allocPtr = ptr.getPointer(); numElements = nullptr; cookieSize = CharUnits::Zero(); return; } cookieSize = getArrayCookieSizeImpl(eltTy); Address allocAddr = CGF.Builder.CreateConstInBoundsByteGEP(ptr, -cookieSize); allocPtr = allocAddr.getPointer(); numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize); }
DominatingValue<RValue>::saved_type DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) { if (rv.isScalar()) { llvm::Value *V = rv.getScalarVal(); // These automatically dominate and don't need to be saved. if (!DominatingLLVMValue::needsSaving(V)) return saved_type(V, ScalarLiteral); // Everything else needs an alloca. Address addr = CGF.CreateDefaultAlignTempAlloca(V->getType(), "saved-rvalue"); CGF.Builder.CreateStore(V, addr); return saved_type(addr.getPointer(), ScalarAddress); } if (rv.isComplex()) { CodeGenFunction::ComplexPairTy V = rv.getComplexVal(); llvm::Type *ComplexTy = llvm::StructType::get(V.first->getType(), V.second->getType(), (void*) nullptr); Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex"); CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0, CharUnits())); CharUnits offset = CharUnits::fromQuantity( CGF.CGM.getDataLayout().getTypeAllocSize(V.first->getType())); CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1, offset)); return saved_type(addr.getPointer(), ComplexAddress); } assert(rv.isAggregate()); Address V = rv.getAggregateAddress(); // TODO: volatile? if (!DominatingLLVMValue::needsSaving(V.getPointer())) return saved_type(V.getPointer(), AggregateLiteral, V.getAlignment().getQuantity()); Address addr = CGF.CreateTempAlloca(V.getType(), CGF.getPointerAlign(), "saved-rvalue"); CGF.Builder.CreateStore(V.getPointer(), addr); return saved_type(addr.getPointer(), AggregateAddress, V.getAlignment().getQuantity()); }
static void createStoreInstBefore(llvm::Value *value, Address addr, llvm::Instruction *beforeInst) { auto store = new llvm::StoreInst(value, addr.getPointer(), beforeInst); store->setAlignment(addr.getAlignment().getQuantity()); }
// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local // array and kernels are launched using cudaLaunchKernel(). void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args) { // Build the shadow stack entry at the very start of the function. // Calculate amount of space we will need for all arguments. If we have no // args, allocate a single pointer so we still have a valid pointer to the // argument array that we can pass to runtime, even if it will be unused. Address KernelArgs = CGF.CreateTempAlloca( VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size()))); // Store pointers to the arguments in a locally allocated launch_args. for (unsigned i = 0; i < Args.size(); ++i) { llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); CGF.Builder.CreateDefaultAlignedStore( VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); } llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); // Lookup cudaLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); IdentifierInfo &cudaLaunchKernelII = CGM.getContext().Idents.get("cudaLaunchKernel"); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) cudaLaunchKernelFD = FD; } if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), "Can't find declaration for cudaLaunchKernel()"); return; } // Create temporary dim3 grid_dim, block_dim. ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1); QualType Dim3Ty = GridDimParam->getType(); Address GridDim = CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim"); Address BlockDim = CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim"); Address ShmemSize = CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size"); Address Stream = CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream"); llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {/*gridDim=*/GridDim.getType(), /*blockDim=*/BlockDim.getType(), /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), "__cudaPopCallConfiguration"); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty); LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()), cudaLaunchKernelFD->getParamDecl(3)->getType()); LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)), cudaLaunchKernelFD->getParamDecl(4)->getType()); LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)), cudaLaunchKernelFD->getParamDecl(5)->getType()); QualType QT = cudaLaunchKernelFD->getType(); QualType CQT = QT.getCanonicalType(); llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); CGF.EmitBlock(EndBlock); }