Exemplo n.º 1
0
__host__ __device__
cudaError_t triple_chevrons(void* kernel, ::dim3 grid_dim, ::dim3 block_dim, int shared_memory_size, cudaStream_t stream, const Args&... args)
{
  // reference the kernel to encourage the compiler not to optimize it away
  workaround_unused_variable_warning(kernel);

#if __cuda_lib_has_cudart
#  ifndef __CUDA_ARCH__
  cudaConfigureCall(grid_dim, block_dim, shared_memory_size, stream);
  setup_kernel_arguments(0, args...);
  return cudaLaunch(kernel);
#  else
  // XXX generalize to multiple arguments
  if(sizeof...(Args) != 1)
  {
    return cudaErrorNotSupported;
  }

  using Arg = typename first_type<Args...>::type;

  void *param_buffer = cudaGetParameterBuffer(std::alignment_of<Arg>::value, sizeof(Arg));
  std::memcpy(param_buffer, &first_parameter(args...), sizeof(Arg));
  return cudaLaunchDevice(kernel, param_buffer, grid_dim, block_dim, shared_memory_size, stream);
#  endif // __CUDA_ARCH__
#else // __cuda_lib_has_cudart
  return cudaErrorNotSupported;
#endif
}
Exemplo n.º 2
0
        __host__ __device__
        static void supported_path(unsigned int num_blocks, unsigned int block_size, size_t num_dynamic_smem_bytes, cudaStream_t stream, task_type task)
        {
#if __BULK_HAS_CUDART__
#  ifndef __CUDA_ARCH__
          cudaConfigureCall(dim3(num_blocks), dim3(block_size), num_dynamic_smem_bytes, stream);
          cudaSetupArgument(task, 0);
          bulk::detail::throw_on_error(cudaLaunch(super_t::global_function_pointer()), "after cudaLaunch in triple_chevron_launcher::launch()");
#  else
          void *param_buffer = cudaGetParameterBuffer(alignment_of<task_type>::value, sizeof(task_type));
          std::memcpy(param_buffer, &task, sizeof(task_type));
          bulk::detail::throw_on_error(cudaLaunchDevice(reinterpret_cast<void*>(super_t::global_function_pointer()), param_buffer, dim3(num_blocks), dim3(block_size), num_dynamic_smem_bytes, stream),
                                       "after cudaLaunchDevice in triple_chevron_launcher::launch()");
#  endif // __CUDA_ARCH__
#endif // __BULK_HAS_CUDART__
        }