// CopyObject - Utility to copy an object. Calls copy constructor as necessary. // DestPtr is casted to the right type. static void CopyObject(CodeGenFunction &CGF, const Expr *E, llvm::Value *DestPtr, llvm::Value *ExceptionPtrPtr) { QualType ObjectType = E->getType(); // Store the throw exception in the exception object. if (!CGF.hasAggregateLLVMType(ObjectType)) { llvm::Value *Value = CGF.EmitScalarExpr(E); const llvm::Type *ValuePtrTy = Value->getType()->getPointerTo(); CGF.Builder.CreateStore(Value, CGF.Builder.CreateBitCast(DestPtr, ValuePtrTy)); } else { const llvm::Type *Ty = CGF.ConvertType(ObjectType)->getPointerTo(); const CXXRecordDecl *RD = cast<CXXRecordDecl>(ObjectType->getAs<RecordType>()->getDecl()); llvm::Value *This = CGF.Builder.CreateBitCast(DestPtr, Ty); if (RD->hasTrivialCopyConstructor()) { CGF.EmitAggExpr(E, This, false); } else if (CXXConstructorDecl *CopyCtor = RD->getCopyConstructor(CGF.getContext(), 0)) { llvm::BasicBlock *PrevLandingPad = CGF.getInvokeDest(); if (CGF.Exceptions) { CodeGenFunction::EHCleanupBlock Cleanup(CGF); llvm::Constant *FreeExceptionFn = getFreeExceptionFn(CGF); // Load the exception pointer. llvm::Value *ExceptionPtr = CGF.Builder.CreateLoad(ExceptionPtrPtr); CGF.Builder.CreateCall(FreeExceptionFn, ExceptionPtr); } llvm::Value *Src = CGF.EmitLValue(E).getAddress(); CGF.setInvokeDest(PrevLandingPad); llvm::BasicBlock *TerminateHandler = CGF.getTerminateHandler(); PrevLandingPad = CGF.getInvokeDest(); CGF.setInvokeDest(TerminateHandler); // Stolen from EmitClassAggrMemberwiseCopy llvm::Value *Callee = CGF.CGM.GetAddrOfCXXConstructor(CopyCtor, Ctor_Complete); CallArgList CallArgs; CallArgs.push_back(std::make_pair(RValue::get(This), CopyCtor->getThisType(CGF.getContext()))); // Push the Src ptr. CallArgs.push_back(std::make_pair(RValue::get(Src), CopyCtor->getParamDecl(0)->getType())); QualType ResultType = CopyCtor->getType()->getAs<FunctionType>()->getResultType(); CGF.EmitCall(CGF.CGM.getTypes().getFunctionInfo(ResultType, CallArgs), Callee, CallArgs, CopyCtor); CGF.setInvokeDest(PrevLandingPad); } else llvm_unreachable("uncopyable object"); } }
static RValue emitAtomicLibcall(CodeGenFunction &CGF, StringRef fnName, QualType resultType, CallArgList &args) { const CGFunctionInfo &fnInfo = CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args, FunctionType::ExtInfo(), RequiredArgs::All); llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args); }
// CopyObject - Utility to copy an object. Calls copy constructor as necessary. // N is casted to the right type. static void CopyObject(CodeGenFunction &CGF, QualType ObjectType, bool WasPointer, bool WasPointerReference, llvm::Value *E, llvm::Value *N) { // Store the throw exception in the exception object. if (WasPointer || !CGF.hasAggregateLLVMType(ObjectType)) { llvm::Value *Value = E; if (!WasPointer) Value = CGF.Builder.CreateLoad(Value); const llvm::Type *ValuePtrTy = Value->getType()->getPointerTo(0); if (WasPointerReference) { llvm::Value *Tmp = CGF.CreateTempAlloca(Value->getType(), "catch.param"); CGF.Builder.CreateStore(Value, Tmp); Value = Tmp; ValuePtrTy = Value->getType()->getPointerTo(0); } N = CGF.Builder.CreateBitCast(N, ValuePtrTy); CGF.Builder.CreateStore(Value, N); } else { const llvm::Type *Ty = CGF.ConvertType(ObjectType)->getPointerTo(0); const CXXRecordDecl *RD; RD = cast<CXXRecordDecl>(ObjectType->getAs<RecordType>()->getDecl()); llvm::Value *This = CGF.Builder.CreateBitCast(N, Ty); if (RD->hasTrivialCopyConstructor()) { CGF.EmitAggregateCopy(This, E, ObjectType); } else if (CXXConstructorDecl *CopyCtor = RD->getCopyConstructor(CGF.getContext(), 0)) { llvm::Value *Src = E; // Stolen from EmitClassAggrMemberwiseCopy llvm::Value *Callee = CGF.CGM.GetAddrOfCXXConstructor(CopyCtor, Ctor_Complete); CallArgList CallArgs; CallArgs.push_back(std::make_pair(RValue::get(This), CopyCtor->getThisType(CGF.getContext()))); // Push the Src ptr. CallArgs.push_back(std::make_pair(RValue::get(Src), CopyCtor->getParamDecl(0)->getType())); const FunctionProtoType *FPT = CopyCtor->getType()->getAs<FunctionProtoType>(); CGF.EmitCall(CGF.CGM.getTypes().getFunctionInfo(CallArgs, FPT), Callee, ReturnValueSlot(), CallArgs, CopyCtor); } else llvm_unreachable("uncopyable object"); } }
RValue EmitUPCCall(CodeGenFunction &CGF, llvm::StringRef Name, QualType ResultTy, const CallArgList& Args) { ASTContext &Context = CGF.CGM.getContext(); llvm::SmallVector<QualType, 5> ArgTypes; for (CallArgList::const_iterator iter = Args.begin(), end = Args.end(); iter != end; ++iter) { ArgTypes.push_back(iter->Ty); } QualType FuncType = Context.getFunctionType(ResultTy, ArgTypes, FunctionProtoType::ExtProtoInfo()); const CGFunctionInfo &Info = CGF.getTypes().arrangeFreeFunctionCall(Args, FuncType->castAs<FunctionType>()); llvm::FunctionType * FTy = cast<llvm::FunctionType>(CGF.ConvertType(FuncType)); llvm::Value * Fn = CGF.CGM.CreateRuntimeFunction(FTy, Name); return CGF.EmitCall(Info, Fn, ReturnValueSlot(), Args); }
// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local // array and kernels are launched using cudaLaunchKernel(). void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args) { // Build the shadow stack entry at the very start of the function. // Calculate amount of space we will need for all arguments. If we have no // args, allocate a single pointer so we still have a valid pointer to the // argument array that we can pass to runtime, even if it will be unused. Address KernelArgs = CGF.CreateTempAlloca( VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size()))); // Store pointers to the arguments in a locally allocated launch_args. for (unsigned i = 0; i < Args.size(); ++i) { llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); CGF.Builder.CreateDefaultAlignedStore( VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); } llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); // Lookup cudaLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); IdentifierInfo &cudaLaunchKernelII = CGM.getContext().Idents.get("cudaLaunchKernel"); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) cudaLaunchKernelFD = FD; } if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), "Can't find declaration for cudaLaunchKernel()"); return; } // Create temporary dim3 grid_dim, block_dim. ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1); QualType Dim3Ty = GridDimParam->getType(); Address GridDim = CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim"); Address BlockDim = CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim"); Address ShmemSize = CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size"); Address Stream = CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream"); llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {/*gridDim=*/GridDim.getType(), /*blockDim=*/BlockDim.getType(), /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), "__cudaPopCallConfiguration"); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty); LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()), cudaLaunchKernelFD->getParamDecl(3)->getType()); LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)), cudaLaunchKernelFD->getParamDecl(4)->getType()); LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)), cudaLaunchKernelFD->getParamDecl(5)->getType()); QualType QT = cudaLaunchKernelFD->getType(); QualType CQT = QT.getCanonicalType(); llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); CGF.EmitBlock(EndBlock); }