GpuSurfDetectorInternal::GpuSurfDetectorInternal(GpuSurfConfiguration config) : m_initialized(false), m_config(config) { int deviceCount; int device; cudaError_t err; cudaGetDeviceCount(&deviceCount); ASRL_ASSERT_GT(deviceCount,0,"There are no CUDA capable devices present"); err = cudaGetDevice(&device); ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device: " << cudaGetErrorString(err)); //std::cout << "Found device " << device << std::endl; err = cudaGetDeviceProperties(&m_deviceProp,device); ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device properties: " << cudaGetErrorString(err)); // Some more checking... ASRL_ASSERT_GE(m_deviceProp.major,1,"Minimum compute capability 1.1 is necessary"); ASRL_ASSERT_GE(m_deviceProp.minor,1,"Minimum compute capability 1.1 is necessary"); m_maxmin.init(ASRL_SURF_MAX_CANDIDATES,false); m_maxmin.memset(0); }
void CudaSynchronizedMemory<T>::pullFromDeviceAsync(cudaStream_t stream, size_t nElements) { ASRL_ASSERT_GT(m_size,0, "The array is empty"); ASRL_ASSERT(m_pageLocked, "Asynchronous transfer is only valid for page-locked host memory"); if(nElements > m_size) nElements = m_size; cudaError_t err = (cudaMemcpyAsync((void*) m_host, (void *)m_device, nElements*sizeof(T), cudaMemcpyDeviceToHost, stream)); ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to copy " << typeid(T).name() << " array of size " << m_size << " from device. Stream " << stream << ": (" << err << "): " << cudaGetErrorString(err)); }