void OpenCLCommandQueue :: create(OpenCLContext &ctx, const OpenCLDevice &dev, bool ordered, bool profiling) { destroy(); detach(); cl_command_queue_properties properties = 0; if(profiling) { properties |= CL_QUEUE_PROFILING_ENABLE; } if(! ordered) { if(GET_FLAG(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, dev.get_queue_properties())) { properties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } else { opencl_error(USER_OPENCL_ERROR, "Device doesn't support out of order execution ... disabling"); } } cl_int res = CL_SUCCESS; cl_command_queue command_queue = clCreateCommandQueue( ctx.get_context(), dev.get_device(), properties, &res ); if(opencl_error(res, "clCreateCommandQueue error creating command queue")) { return; } mCommandQueue = command_queue; ctx.attach_resource(this); }
/// Partitions the device into multiple sub-devices according to /// \p properties. /// /// \opencl_version_warning{1,2} std::vector<device> partition(const cl_device_partition_property *properties) const { // get sub-device count uint_ count = 0; int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // get sub-device ids std::vector<cl_device_id> ids(count); ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // convert ids to device objects std::vector<device> devices(count); for(size_t i = 0; i < count; i++){ devices[i] = device(ids[i], false); } return devices; }
bool load_binary(const string& kernel_path, const string& clbin) { /* read binary into memory */ vector<uint8_t> binary; if(!path_read_binary(clbin, binary)) { opencl_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str())); return false; } /* create program */ cl_int status; size_t size = binary.size(); const uint8_t *bytes = &binary[0]; cpProgram = clCreateProgramWithBinary(cxContext, 1, &cdDevice, &size, &bytes, &status, &ciErr); if(opencl_error(status) || opencl_error(ciErr)) { opencl_error(string_printf("OpenCL failed create program from cached binary %s.", clbin.c_str())); return false; } if(!build_kernel(kernel_path)) return false; return true; }
bool opencl_version_check() { char version[256]; int major, minor, req_major = 1, req_minor = 1; clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) { opencl_error(string_printf("OpenCL: failed to parse platform version string (%s).", version)); return false; } if(!((major == req_major && minor >= req_minor) || (major > req_major))) { opencl_error(string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor)); return false; } clGetDeviceInfo(cdDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL); if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) { opencl_error(string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version)); return false; } if(!((major == req_major && minor >= req_minor) || (major > req_major))) { opencl_error(string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor)); return false; } /* we don't check CL_DEVICE_VERSION since for e.g. nvidia sm 1.3 cards this is 1.0 even if the language features are there, just limited shared memory */ return true; }
bool opencl_version_check() { char version[256]; int major, minor, req_major = 1, req_minor = 1; clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) { opencl_error(string_printf("OpenCL: failed to parse platform version string (%s).", version)); return false; } if(!((major == req_major && minor >= req_minor) || (major > req_major))) { opencl_error(string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor)); return false; } clGetDeviceInfo(cdDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL); if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) { opencl_error(string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version)); return false; } if(!((major == req_major && minor >= req_minor) || (major > req_major))) { opencl_error(string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor)); return false; } return true; }
static cl_image_format format_from_array(AlloArray *array, bool normalized = true) { cl_image_format format; switch(array->header.components) { case 1: format.image_channel_order = CL_A; break; case 2: format.image_channel_order = CL_RA; break; case 3: format.image_channel_order = CL_RGB; break; case 4: format.image_channel_order = CL_RGBA; break; default: opencl_error(USER_OPENCL_ERROR, "format_from_array invalid number of components"); break; } switch(array->header.type) { /* floating point numbers */ case AlloFloat32Ty: format.image_channel_data_type = CL_FLOAT; break; /* signed integers */ AlloSInt8Ty: if(normalized){ format.image_channel_data_type = CL_SNORM_INT8; break; } else { format.image_channel_data_type = CL_SIGNED_INT8; break; } AlloSInt16Ty: if(normalized){ format.image_channel_data_type = CL_SNORM_INT16; break; } else { format.image_channel_data_type = CL_SIGNED_INT16; break; } AlloSInt32Ty: format.image_channel_data_type = CL_SIGNED_INT32; break; /* unsigned integers */ AlloUInt8Ty: if(normalized){ format.image_channel_data_type = CL_UNORM_INT8; break; } else { format.image_channel_data_type = CL_UNSIGNED_INT8; break; } AlloUInt16Ty: if(normalized){ format.image_channel_data_type = CL_UNORM_INT16; break; } else { format.image_channel_data_type = CL_UNSIGNED_INT16; break; } AlloUInt32Ty: format.image_channel_data_type = CL_UNSIGNED_INT32; break; default: opencl_error(USER_OPENCL_ERROR, "format_from_array invalid type"); break; } return format; }
/// Links the programs in \p programs with \p options in \p context. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clLinkProgram} static program link(const std::vector<program> &programs, const context &context, const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret; cl_program program_ = clLinkProgram( context.get(), 0, 0, options_string, static_cast<uint_>(programs.size()), reinterpret_cast<const cl_program*>(&programs[0]), 0, 0, &ret ); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return program(program_, false); }
void OpenCLImage2D :: create( OpenCLContext &ctx, cl_mem_flags usage, const cl_image_format *format, size_t width, size_t height, size_t rowstride, void *ptr ) { destroy(); detach(); usage = OpenCLMemoryBuffer::check_memory_flags(usage, ptr); cl_int res = CL_SUCCESS; cl_mem mem = clCreateImage2D( ctx.get_context(), usage, format, width, height, rowstride, ptr, &res ); if(opencl_error(res, "clCreateImage2D error creating buffer")) { return; } mMem = mem; ctx.attach_resource(this); }
OpenCLEvent OpenCLImage2D :: enqueue_read( OpenCLCommandQueue &queue, bool block, const size_t origin[3], const size_t region[3], size_t rowstride, void *ptr ) { cl_event event = 0; cl_int res = clEnqueueReadImage( queue.get_command_queue(), mMem, block ? CL_TRUE : CL_FALSE, origin, region, rowstride, 0, ptr, 0, NULL, &event ); if(opencl_error(res, "clEnqueueReadImage error enqueuing read event")) { return OpenCLEvent(); } return OpenCLEvent(event); }
/// Enqueues a command to copy data from \p src_image to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyImage} event enqueue_copy_image(const image3d &src_image, const image3d &dst_image, const size_t src_origin[3], const size_t dst_origin[3], const size_t region[3], const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); BOOST_ASSERT_MSG(src_image.get_format() == dst_image.get_format(), "Source and destination image formats must match."); event event_; cl_int ret = clEnqueueCopyImage( m_queue, src_image.get(), dst_image.get(), src_origin, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to read data from \p image to host memory. /// /// \see_opencl_ref{clEnqueueReadImage} void enqueue_read_image(const image2d &image, const size_t origin[2], const size_t region[2], size_t row_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); const size_t origin3[3] = { origin[0], origin[1], size_t(0) }; const size_t region3[3] = { region[0], region[1], size_t(1) }; cl_int ret = clEnqueueReadImage( m_queue, image.get(), CL_TRUE, origin3, region3, row_pitch, 0, host_ptr, events.size(), events.get_event_ptr(), 0 ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } }
/// Enqueues a command to fill \p buffer with \p pattern. /// /// \see_opencl_ref{clEnqueueFillBuffer} /// /// \opencl_version_warning{1,2} /// /// \see fill() event enqueue_fill_buffer(const buffer &buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueFillBuffer( m_queue, buffer.get(), pattern, pattern_size, offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to copy data from \p src_buffer to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyBufferToImage} event enqueue_copy_buffer_to_image(const buffer &src_buffer, const image3d &dst_image, size_t src_offset, const size_t dst_origin[3], const size_t region[3], const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBufferToImage( m_queue, src_buffer.get(), dst_image.get(), src_offset, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
rcl_status cl_release_kernel(struct client_state* state, kernel_t kernel) { struct kernel_state* kernel_state; cl_int retval; uint32_t i; if (!vector_valid_idx(&state->kernels, kernel)) { log_print(log_error, "Kernel %" PRIu32 " not found", kernel); return RCL_INVALID_KERNEL; } kernel_state = *vector_element(&state->kernels, kernel, struct kernel_state*); retval = clReleaseKernel(kernel_state->id); if (retval) return opencl_error(retval); vector_remove(&state->kernels, kernel); for (i = 0; i < kernel_state->argument_count; i++) { if (!kernel_state->arguments[i].is_set || kernel_state->arguments[i].is_buffer) { continue; } free(kernel_state->arguments[i].argument.value.value); } free(kernel_state->arguments); free(kernel_state->name); free(kernel_state); return RCL_OK; }
rcl_status cl_build_program(struct client_state* state, program_t program_id, const char* options) { cl_int error; struct program_state* program_state; if (!vector_valid_idx(&state->programs, program_id)) { log_print(log_error, "Program %" PRIu32 " not found", program_id); return RCL_INVALID_PROGRAM; } log_print(log_notice, "Building program %" PRIu32 " with flags \"%s\"", program_id, options); program_state = *vector_element(&state->programs, program_id, struct program_state*); free(program_state->flags); program_state->flags = NULL; if (options) { program_state->flags = strdup(options); if (!program_state->flags) return RCL_HOST_RESOURCE; } error = clBuildProgram(program_state->id, 1, &device_id, options, NULL, NULL); if (error != CL_SUCCESS) { log_print(log_error, "Error building program: %s", clerror_name(error)); return opencl_error(error); } return error; }
/// Enqueues a command to write data from host memory to \p buffer. /// The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy_async() event enqueue_write_buffer_async(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to copy data from \p src_buffer to /// \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBuffer} /// /// \see copy() event enqueue_copy_buffer(const buffer &src_buffer, const buffer &dst_buffer, size_t src_offset, size_t dst_offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); BOOST_ASSERT(dst_offset + size <= dst_buffer.size()); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBuffer( m_queue, src_buffer.get(), dst_buffer.get(), src_offset, dst_offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to fill \p image with \p fill_color. /// /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} event enqueue_fill_image(const image3d &image, const void *fill_color, const size_t origin[3], const size_t region[3], const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueFillImage( m_queue, image.get(), fill_color, origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to map \p buffer into the host address space. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), 0, &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; }
/// Enqueues a command to migrate \p mem_objects. /// /// \see_opencl_ref{clEnqueueMigrateMemObjects} /// /// \opencl_version_warning{1,2} event enqueue_migrate_memory_objects(uint_ num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueMigrateMemObjects( m_queue, num_mem_objects, mem_objects, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to write data from host memory to \p image. /// /// \see_opencl_ref{clEnqueueWriteImage} void enqueue_write_image(const image3d &image, const size_t origin[3], const size_t region[3], size_t input_row_pitch, size_t input_slice_pitch, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = clEnqueueWriteImage( m_queue, image.get(), CL_TRUE, origin, region, input_row_pitch, input_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), 0 ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } }
/// Enqueues a kernel for execution. /// /// \see_opencl_ref{clEnqueueNDRangeKernel} event enqueue_nd_range_kernel(const kernel &kernel, size_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, static_cast<cl_uint>(work_dim), global_work_offset, global_work_size, local_work_size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} event enqueue_copy_image_to_buffer(const image2d &src_image, const buffer &dst_buffer, const size_t src_origin[2], const size_t region[2], size_t dst_offset, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); const size_t src_origin3[3] = { src_origin[0], src_origin[1], size_t(0) }; const size_t region3[3] = { region[0], region[1], size_t(1) }; event event_; cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), dst_buffer.get(), src_origin3, region3, dst_offset, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef CL_VERSION_2_0 size_t one = 1; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, events.size(), events.get_event_ptr(), &event_.get() ); #else cl_int ret = clEnqueueTask( m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() ); #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
void OpenCLImage2D :: create( OpenCLContext &ctx, cl_mem_flags usage, AlloArray *array ) { destroy(); detach(); usage = OpenCLMemoryBuffer::check_memory_flags(usage, array->data.ptr); bool at_least_2d = array->header.dimcount >= 2; size_t width = array->header.dim[0]; size_t height = at_least_2d ? array->header.dim[1] : 1; size_t rowstride = at_least_2d ? array->header.stride[1] : allo_array_size(array); cl_image_format format = OpenCLImageFormat::format_from_array(array); cl_int res = CL_SUCCESS; cl_mem mem = clCreateImage2D( ctx.get_context(), usage, &format, width, height, rowstride, array->data.ptr, &res ); if(opencl_error(res, "clCreateImage2D error creating buffer")) { return; } mMem = mem; ctx.attach_resource(this); }
/// Enqueues a function to execute on the host. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, uint_ num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Builds the program with \p options. /// /// If the program fails to compile, this function will throw an /// opencl_error exception. /// \code /// try { /// // attempt to compile to program /// program.build(); /// } /// catch(boost::compute::opencl_error &e){ /// // program failed to compile, print out the build log /// std::cout << program.build_log() << std::endl; /// } /// \endcode /// /// \see_opencl_ref{clBuildProgram} void build(const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret = clBuildProgram(m_program, 0, 0, options_string, 0, 0); #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION if(ret != CL_SUCCESS){ // print the error, source code and build log std::cerr << "Boost.Compute: " << "kernel compilation failed (" << ret << ")\n" << "--- source ---\n" << source() << "\n--- build log ---\n" << build_log() << std::endl; } #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } }
/// Enqueues a command to fill \p size bytes of data at \p svm_ptr with /// \p pattern. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemFill} event enqueue_svm_fill(void *svm_ptr, const void *pattern, size_t pattern_size, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemFill( m_queue, svm_ptr, pattern, pattern_size, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
/// Blocks until the actions corresponding to the event have /// completed. void wait() const { cl_int ret = clWaitForEvents(1, &m_event); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } }
/// Enqueues a command to read data from \p buffer to host memory. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy() void enqueue_read_buffer(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), 0 ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } }