Пример #1
0
__host__ __device__
cudaError_t triple_chevrons(void* kernel, ::dim3 grid_dim, ::dim3 block_dim, int shared_memory_size, cudaStream_t stream, const Args&... args)
{
  // reference the kernel to encourage the compiler not to optimize it away
  workaround_unused_variable_warning(kernel);

#if __cuda_lib_has_cudart
#  ifndef __CUDA_ARCH__
  cudaConfigureCall(grid_dim, block_dim, shared_memory_size, stream);
  setup_kernel_arguments(0, args...);
  return cudaLaunch(kernel);
#  else
  // XXX generalize to multiple arguments
  if(sizeof...(Args) != 1)
  {
    return cudaErrorNotSupported;
  }

  using Arg = typename first_type<Args...>::type;

  void *param_buffer = cudaGetParameterBuffer(std::alignment_of<Arg>::value, sizeof(Arg));
  std::memcpy(param_buffer, &first_parameter(args...), sizeof(Arg));
  return cudaLaunchDevice(kernel, param_buffer, grid_dim, block_dim, shared_memory_size, stream);
#  endif // __CUDA_ARCH__
#else // __cuda_lib_has_cudart
  return cudaErrorNotSupported;
#endif
}
Пример #2
0
void setup_kernel_arguments(size_t offset, const Arg1& arg1, const Args&... args)
{
  offset = align_up<Arg1>(offset);

  cudaSetupArgument(arg1, offset);
  setup_kernel_arguments(offset + sizeof(Arg1), args...);
}