Impl() { // Initialize CUDA. check_cuda_error(cuInit(0)); int device_count; check_cuda_error(cuDeviceGetCount(&device_count)); m_devices.reserve(static_cast<std::size_t>(device_count)); m_contexts.resize(static_cast<std::size_t>(device_count), nullptr); for (int i = 0; i < device_count; ++i) m_devices.emplace_back(i); }
void bc2s_feature<A>::bind(const cuda_gpu&) { bindTexture2d(s1(), bc2s_tex_s1); bindTexture2d(s2(), bc2s_tex_s2); check_cuda_error(); }
/* * Advance the simulation by <n> generations by mapping the OpenGL pixel buffer * objects for writing from CUDA, executing the kernel <n> times, and unmapping * the pixel buffer object. */ void advance_generations(unsigned long n) { uint8_t* device_bufs[2]; size_t size; DEBUG2("Mapping CUDA resources and retrieving device buffer pointers\n"); cudaGraphicsMapResources(2, cuda_graphics_resources, (cudaStream_t)0); cudaGraphicsResourceGetMappedPointer((void**)&device_bufs[0], &size, cuda_graphics_resources[0]); cudaGraphicsResourceGetMappedPointer((void**)&device_bufs[1], &size, cuda_graphics_resources[1]); check_cuda_error(); while (n--) { DEBUG2("Launching kernel (grid.width = %u, grid.height = %u)\n", grid.width, grid.height); launch_kernel(device_bufs[grid.which_buf], device_bufs[!grid.which_buf], grid.width, grid.height); grid.which_buf ^= 1; } DEBUG2("Unmapping CUDA resources\n"); cudaGraphicsUnmapResources(2, cuda_graphics_resources, (cudaStream_t)0); cudaStreamSynchronize(0); }
void init_parallel_component() { /*DEBUG("Looking for CUDA devices\n");*/ /*device_query();*/ DEBUG("Setting CUDA GL device\n"); cudaError_t cuda_err = cudaGLSetGLDevice(0); check_cuda_error(); }
CUcontext CUDADeviceList::get_primary_context(const CUDADevice& device) const { assert(device.m_cuda_device_number < size()); assert(device.m_compute_mode != CU_COMPUTEMODE_PROHIBITED); const CUdevice dev = device.m_cuda_device_number; // Create a primary context for the device if needed. if (impl->m_contexts[dev] == nullptr) { CUcontext ctx; check_cuda_error(cuDevicePrimaryCtxRetain(&ctx, dev)); // todo: set context flags here? //cuDevicePrimaryCtxSetFlags(dev, ...); check_cuda_error(cuCtxPushCurrent(ctx)); impl->m_contexts[dev] = ctx; } return impl->m_contexts[dev]; }
void create_grid(unsigned width, unsigned height, uint8_t* data) { grid.which_buf = 0; grid.width = width; grid.height = height; size_t size = (size_t)width * (size_t)height; DEBUG("Creating %u by %u grid \n", width, height); uint8_t* buf; if (data) { buf = data; } else { DEBUG("Allocating host buffer of size %lu\n", (unsigned long)size); buf = (uint8_t*)zalloc(size); add_R_pentonimo(buf, width, height); } DEBUG("Generating OpenGL buffer object names\n"); glGenBuffers(2, &pixel_buffer_objects[0]); DEBUG("Binding and initializing OpenGL buffer objects\n"); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pixel_buffer_objects[0]); glBufferData(GL_PIXEL_UNPACK_BUFFER, size, buf, GL_DYNAMIC_DRAW); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pixel_buffer_objects[1]); memset(buf, 0, size); glBufferData(GL_PIXEL_UNPACK_BUFFER, size, buf, GL_DYNAMIC_DRAW); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); DEBUG("Registering OpenGL buffer objects with CUDA\n"); cudaGraphicsGLRegisterBuffer(&cuda_graphics_resources[0], pixel_buffer_objects[0], cudaGraphicsMapFlagsNone); cudaGraphicsGLRegisterBuffer(&cuda_graphics_resources[1], pixel_buffer_objects[1], cudaGraphicsMapFlagsNone); check_cuda_error(); DEBUG("Freeing host buffer\n"); free(buf); }
inline void ffast4_feature<V, T>::update(const image2d_V& in, const image2d_V& in_s2) { frame_cpt_++; swap_buffers(); dim3 dimblock(16, 16, 1); if (T == CPU) dimblock = dim3(in.ncols(), 2, 1); dim3 dimgrid = grid_dimension(in.domain(), dimblock); blurred_s1_ = in; blurred_s2_ = in_s2; //local_jet_static2_<0,0,1, 0,0,2, 6>::run(in, blurred_s1_, blurred_s2_, tmp_, pertinence2_); // if (!(frame_cpt_ % 5)) { if (target == unsigned(GPU)) { bindTexture2d(blurred_s1_, s1_tex); bindTexture2d(blurred_s2_, s2_tex); } pw_call<FFAST4_sig(target, V)>(flag<target>(), dimgrid, dimblock, color_blurred_, blurred_s1_, blurred_s2_, //*f_, pertinence_, grad_thresh_); // filter_pertinence<i_float1><<<dimgrid, dimblock>>> // (pertinence_, pertinence2_); // copy(pertinence2_, pertinence_); if (target == unsigned(GPU)) { cudaUnbindTexture(s1_tex); cudaUnbindTexture(s2_tex); check_cuda_error(); } } }
void cuda_event_create( cudaEvent_t *event) { assert( event); check_cuda_error( cudaEventCreate( event)); }
void cuda_device_synchronize() { check_cuda_error( cudaDeviceSynchronize()); }
void cuda_set_device( int device) { check_cuda_error( cudaSetDevice( device)); }
void cuda_choose_device( int *device, const struct cudaDeviceProp *prop) { check_cuda_error( cudaChooseDevice( device, prop)); }
void cuda_get_device_properties( struct cudaDeviceProp *prop, int device) { check_cuda_error( cudaGetDeviceProperties( prop, device)); }
float cuda_event_elapsed_time( cudaEvent_t start, cudaEvent_t end) { float ms; check_cuda_error( cudaEventElapsedTime( &ms, start, end)); return ms; }
void cuda_stream_synchronize( cudaStream_t stream) { check_cuda_error( cudaStreamSynchronize( stream)); }
void cuda_event_record( cudaEvent_t event, cudaStream_t stream) { check_cuda_error( cudaEventRecord( event, stream)); }
void cuda_event_synchronize( cudaEvent_t event) { check_cuda_error( cudaEventSynchronize( event)); }
void cuda_check_last_error() { check_cuda_error( cudaPeekAtLastError()); }
void cuda_stream_create( cudaStream_t *stream) { check_cuda_error( cudaStreamCreate( stream)); }
int cuda_get_device_count() { int count; check_cuda_error( cudaGetDeviceCount( &count)); return count; }