/// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} event enqueue_copy_image_to_buffer(const image2d &src_image, const buffer &dst_buffer, const size_t src_origin[2], const size_t region[2], size_t dst_offset, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); const size_t src_origin3[3] = { src_origin[0], src_origin[1], size_t(0) }; const size_t region3[3] = { region[0], region[1], size_t(1) }; event event_; cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), dst_buffer.get(), src_origin3, region3, dst_offset, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
void timedImageBufferCLCopy( cl_command_queue queue, cl_mem srcImg, cl_mem dstBuf ) { CPerfCounter t1; cl_int ret; cl_event ev; t1.Start(); ret = clEnqueueCopyImageToBuffer( queue, srcImg, dstBuf, imageOrigin, imageRegion, 0, 0, NULL, &ev ); ASSERT_CL_RETURN( ret ); clFlush( queue ); spinForEventsComplete( 1, &ev ); t1.Stop(); tlog->Timer( "%32s %lf s %8.2lf GB/s\n", "clEnqueueCopyImageToBuffer():", t1.GetElapsedTime(), nBytesRegion, 1 ); }
cl_int WINAPI wine_clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, size_t * src_origin, size_t * region, size_t dst_offset, cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event) { cl_int ret; TRACE("\n"); ret = clEnqueueCopyImageToBuffer(command_queue, src_image, dst_buffer, src_origin, region, dst_offset, num_events_in_wait_list, event_wait_list, event); return ret; }
int main(void) { cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES]; cl_uint ndevices; cl_uint i, j; size_t el, row, col; CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms)); for (i = 0; i < nplatforms; i++) { CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices)); /* Only test the devices we actually have room for */ if (ndevices > MAX_DEVICES) ndevices = MAX_DEVICES; for (j = 0; j < ndevices; j++) { /* skip devices that do not support images */ cl_bool has_img; CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL)); if (!has_img) continue; cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err); CHECK_OPENCL_ERROR_IN("clCreateCommandQueue"); cl_ulong alloc; size_t max_height; size_t max_width; #define MAXALLOC (1024U*1024U) CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc), &alloc, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_width), &max_width, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_height), &max_height, NULL)); while (alloc > MAXALLOC) alloc /= 2; // fit at least one max_width inside the alloc (shrink max_width for this) while (max_width*pixel_size > alloc) max_width /= 2; // round number of elements to next multiple of max_width elements const size_t nels = (alloc/pixel_size/max_width)*max_width; const size_t buf_size = nels*pixel_size; cl_image_desc img_desc; memset(&img_desc, 0, sizeof(img_desc)); img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = max_width; img_desc.image_height = nels/max_width; img_desc.image_depth = 1; cl_ushort null_pixel[4] = {0, 0, 0, 0}; cl_ushort *host_buf = malloc(buf_size); TEST_ASSERT(host_buf); for (el = 0; el < nels; el+=4) { host_buf[el] = el & CHANNEL_MAX; host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX; host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX; host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX; } cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateImage"); CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL)); const size_t offset = 0; const size_t origin[] = {0, 0, 0}; const size_t region[] = {img_desc.image_width, img_desc.image_height, 1}; CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL)); size_t row_pitch, slice_pitch; cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region, &row_pitch, &slice_pitch, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapImage"); CHECK_CL_ERROR(clFinish(queue)); for (row = 0; row < img_desc.image_height; ++row) { for (col = 0; col < img_desc.image_width; ++col) { cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4; cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4; if (memcmp(img_pixel, buf_pixel, pixel_size) != 0) printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n", row, col, (size_t)(buf_pixel - host_buf), buf_pixel[0], buf_pixel[1], buf_pixel[2], buf_pixel[3], img_pixel[0], img_pixel[1], img_pixel[2], img_pixel[3]); TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0); } } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL)); /* Clear the buffer, and ensure it has been cleared */ CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL)); cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer"); CHECK_CL_ERROR(clFinish(queue)); for (el = 0; el < nels; ++el) { #if 0 // debug if (buf_map[el] != 0) { printf("%zu/%zu => %u\n", el, nels, buf_map[el]); } #endif TEST_ASSERT(buf_map[el] == 0); } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL)); /* Copy data from image to buffer, and check that it's again equal to the original buffer */ CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL)); buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_CL_ERROR(clFinish(queue)); TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0); CHECK_CL_ERROR ( clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL)); CHECK_CL_ERROR (clFinish (queue)); free(host_buf); CHECK_CL_ERROR (clReleaseMemObject (img)); CHECK_CL_ERROR (clReleaseMemObject (buf)); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseContext (context)); } } return EXIT_SUCCESS; }
END_TEST START_TEST (test_copy_image_buffer) { cl_platform_id platform = 0; cl_device_id device; cl_context ctx; cl_command_queue queue; cl_mem image, buffer; cl_int result; cl_event event; unsigned char image_buffer[3*3*4] = { 255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 255, 0, 128, 0, 0, 0, 0, 128, 0, 0, 0, 0, 128, 0, 64, 0, 0, 0, 0, 64, 0, 0, 0, 0, 64, 0 }; // Square that will be put in image_buffer at (1, 0) unsigned char buffer_buffer[2*2*4+1] = { 33, // Oh, a padding ! 255, 255, 255, 0, 255, 0, 255, 0, 0, 255, 255, 0, 255, 255, 0, 0 }; // What we must get once re-reading 2x2 rect at (1, 1) unsigned char correct_data[2*2*4] = { 0, 255, 255, 0, 255, 255, 0, 0, 0, 64, 0, 0, 0, 0, 64, 0 }; cl_image_format fmt; fmt.image_channel_data_type = CL_UNORM_INT8; fmt.image_channel_order = CL_RGBA; size_t origin[3] = {1, 0, 0}; size_t region[3] = {2, 2, 1}; result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0); fail_if( result != CL_SUCCESS, "unable to get the default device" ); ctx = clCreateContext(0, 1, &device, 0, 0, &result); fail_if( result != CL_SUCCESS || ctx == 0, "unable to create a valid context" ); queue = clCreateCommandQueue(ctx, device, 0, &result); fail_if( result != CL_SUCCESS || queue == 0, "cannot create a command queue" ); image = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &fmt, 3, 3, 0, image_buffer, &result); fail_if( result != CL_SUCCESS, "unable to create a 3x3 bgra image" ); buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(buffer_buffer), buffer_buffer, &result); fail_if( result != CL_SUCCESS, "unable to create a buffer object" ); // Write buffer in image result = clEnqueueCopyBufferToImage(queue, buffer, image, 1, origin, region, 0, 0, &event); fail_if( result != CL_SUCCESS, "unable to queue a copy buffer to image event, buffer offset 1, image 2x2 @ (1, 0)" ); result = clWaitForEvents(1, &event); fail_if( result != CL_SUCCESS, "cannot wait for event" ); clReleaseEvent(event); // Read it back into buffer, again with an offset origin[1] = 1; result = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 1, 0, 0, &event); fail_if( result != CL_SUCCESS, "unable to queue a copy image to buffer event, buffer offset 1, image 2x2 @ (1, 1)" ); result = clWaitForEvents(1, &event); fail_if( result != CL_SUCCESS, "cannot wait for event" ); fail_if( std::memcmp(buffer_buffer + 1, correct_data, sizeof(correct_data)) != 0, "copying data around isn't working the expected way" ); // Map the image and check pointers unsigned char *mapped; size_t row_pitch; origin[0] = 0; origin[1] = 0; origin[2] = 0; mapped = (unsigned char *)clEnqueueMapImage(queue, image, 1, CL_MAP_READ, origin, region, &row_pitch, 0, 0, 0, 0, &result); fail_if( result != CL_SUCCESS, "unable to map an image" ); fail_if( mapped != image_buffer, "mapped aread doesn't match host ptr" ); clReleaseEvent(event); clReleaseMemObject(image); clReleaseMemObject(buffer); clReleaseCommandQueue(queue); clReleaseContext(ctx); }