Example #1
0
    void Stream::addCallback( cudaStreamCallback_t callback, void * userData )
    {
#if !defined(NDEBUG)
      int device;
      CUDA_VERIFY( cudaGetDevice( &device ) );
      int majorCap, minorCap;
      CUDA_VERIFY( cudaDeviceGetAttribute( &majorCap, cudaDevAttrComputeCapabilityMajor, device ) );
      CUDA_VERIFY( cudaDeviceGetAttribute( &minorCap, cudaDevAttrComputeCapabilityMinor, device ) );
      DP_ASSERT( ( 1 < majorCap ) || ( ( 1 == majorCap ) && ( 1 <= minorCap ) ) );
#endif
      CUDA_VERIFY( cudaStreamAddCallback( m_stream, callback, userData, 0 ) );
    }
Example #2
0
int oskar_device_compute_capability(void)
{
    int version = 0;
#ifdef OSKAR_HAVE_CUDA
    int major = 0, minor = 0, device = 0;
    cudaGetDevice(&device);
    cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device);
    cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device);
    version = 10 * major + minor;
#endif
    return version;
}
__host__ __device__
int max_grid_size_of_bulk_then_execute_concurrent_kernel(const agency::cuda::device_id& device, const Function& f, Shape block_dim, const agency::cuda::async_future<T>& predecessor, const ResultFactory& result_factory, const OuterFactory& outer_factory, const InnerFactory& inner_factory)
{
  const size_t block_dimension = agency::detail::shape_size<Shape>::value;
  constexpr auto kernel = detail::bulk_then_execute_kernel<block_dimension,Function,T,ResultFactory,OuterFactory,InnerFactory>::value;

  int max_active_blocks_per_multiprocessor = 0;
  detail::throw_on_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_active_blocks_per_multiprocessor, kernel, agency::detail::shape_cast<int>(block_dim), device.native_handle()), "cuda::detail::max_grid_size_of_bulk_then_execute_concurrent_kernel(): CUDA error after cudaOccupancyMaxActiveBlocksPerMultiprocessor()");

  int num_multiprocessors = 0;
  detail::throw_on_error(cudaDeviceGetAttribute(&num_multiprocessors, cudaDevAttrMultiProcessorCount, device.native_handle()), "cuda::detail::max_grid_size_of_bulk_then_execute_concurrent_kernel(): CUDA error after cudaDeviceGetAttribute()");

  return max_active_blocks_per_multiprocessor * num_multiprocessors;
}