__host__ __device__ void checked_launch_kernel_on_device(void* kernel, ::dim3 grid_dim, ::dim3 block_dim, int shared_memory_size, cudaStream_t stream, int device, const Args&... args) { #if __cuda_lib_has_cudart // record the current device int current_device = 0; throw_on_error(cudaGetDevice(¤t_device), "cuda::detail::checked_launch_kernel_after_event_on_device(): cudaGetDevice()"); if(current_device != device) { # ifndef __CUDA_ARCH__ throw_on_error(cudaSetDevice(device), "cuda::detail::checked_launch_kernel_after_event_on_device(): cudaSetDevice()"); # else throw_on_error(cudaErrorNotSupported, "cuda::detail::checked_launch_kernel_after_event_on_device(): CUDA kernel launch only allowed on the current device in __device__ code"); # endif // __CUDA_ARCH__ } #else // the error message we return depends on how the program was compiled const char* error_message = # ifndef __CUDA_ARCH__ "cuda::detail::checked_launch_kernel_on_device(): CUDA kernel launch from host requires nvcc" # else "cuda::detail::checked_launch_kernel_on_device(): CUDA kernel launch from device requires arch=sm_35 or better and rdc=true" # endif ; throw_on_error(cudaErrorNotSupported, error_message); #endif // __cuda_lib_has_cudart checked_launch_kernel(kernel, grid_dim, block_dim, shared_memory_size, stream, args...); #if __cuda_lib_has_cudart // restore the device # ifndef __CUDA_ARCH__ if(current_device != device) { throw_on_error(cudaSetDevice(current_device), "cuda::detail::checked_launch_kernel_after_event_on_device: cudaSetDevice()"); } # endif // __CUDA_ARCH__ #else throw_on_error(cudaErrorNotSupported, "cuda::detail::checked_launch_kernel_after_event_on_device(): cudaSetDevice requires CUDART"); #endif // __cuda_lib_has_cudart }
__host__ __device__ void checked_launch_kernel_after_event(void* kernel, ::dim3 grid_dim, ::dim3 block_dim, int shared_memory_size, cudaStream_t stream, cudaEvent_t event, const Args&... args) { #if __cuda_lib_has_cudart if(event) { // make the next launch wait on the event throw_on_error(cudaStreamWaitEvent(stream, event, 0), "cuda::detail::checked_launch_kernel_after_event(): cudaStreamWaitEvent()"); } #else // the error message we return depends on how the program was compiled const char* error_message = # ifndef __CUDA_ARCH__ "cuda::detail::checked_launch_kernel_after_event(): CUDA kernel launch from host requires nvcc" # else "cuda::detail::checked_launch_kernel_after_event(): CUDA kernel launch from device requires arch=sm_35 or better and rdc=true" # endif ; throw_on_error(cudaErrorNotSupported, error_message); #endif // __cuda_lib_has_cudart checked_launch_kernel(kernel, grid_dim, block_dim, shared_memory_size, stream, args...); }