/* Java->C glue code: * Java package: com.jogamp.opencl.impl.CLImpl * Java method: java.nio.ByteBuffer clEnqueueMapImage(long command_queue, long image, int blocking_map, long map_flags, com.jogamp.gluegen.runtime.PointerBuffer origin, com.jogamp.gluegen.runtime.PointerBuffer range, com.jogamp.gluegen.runtime.PointerBuffer image_row_pitch, com.jogamp.gluegen.runtime.PointerBuffer image_slice_pitch, int num_events_in_wait_list, com.jogamp.gluegen.runtime.PointerBuffer event_wait_list, com.jogamp.gluegen.runtime.PointerBuffer event, java.nio.IntBuffer errcode_ret) * C function: void * clEnqueueMapImage(cl_command_queue command_queue, cl_mem image, uint32_t blocking_map, uint64_t map_flags, const size_t * , const size_t * , size_t * image_row_pitch, size_t * image_slice_pitch, uint32_t num_events_in_wait_list, cl_event * event_wait_list, cl_event * event, int32_t * errcode_ret); */ JNIEXPORT jobject JNICALL Java_com_jogamp_opencl_impl_CLImpl_clEnqueueMapImage0__JJIJLjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2IILjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2I(JNIEnv *env, jobject _unused, jlong command_queue, jlong image, jint blocking_map, jlong map_flags, jobject origin, jint origin_byte_offset, jobject range, jint range_byte_offset, jobject image_row_pitch, jint image_row_pitch_byte_offset, jobject image_slice_pitch, jint image_slice_pitch_byte_offset, jint num_events_in_wait_list, jobject event_wait_list, jint event_wait_list_byte_offset, jobject event, jint event_byte_offset, jobject errcode_ret, jint errcode_ret_byte_offset) { size_t * _origin_ptr = NULL; size_t * _range_ptr = NULL; size_t * _image_row_pitch_ptr = NULL; size_t * _image_slice_pitch_ptr = NULL; cl_event * _event_wait_list_ptr = NULL; cl_event * _event_ptr = NULL; int32_t * _errcode_ret_ptr = NULL; size_t * elements = NULL; size_t * depth = NULL; size_t pixels; cl_int status; void * _res; if (origin != NULL) { _origin_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, origin)) + origin_byte_offset); } if (range != NULL) { _range_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, range)) + range_byte_offset); } if (image_row_pitch != NULL) { _image_row_pitch_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, image_row_pitch)) + image_row_pitch_byte_offset); } if (image_slice_pitch != NULL) { _image_slice_pitch_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, image_slice_pitch)) + image_slice_pitch_byte_offset); } if (event_wait_list != NULL) { _event_wait_list_ptr = (cl_event *) (((char*) (*env)->GetDirectBufferAddress(env, event_wait_list)) + event_wait_list_byte_offset); } if (event != NULL) { _event_ptr = (cl_event *) (((char*) (*env)->GetDirectBufferAddress(env, event)) + event_byte_offset); } if (errcode_ret != NULL) { _errcode_ret_ptr = (int32_t *) (((char*) (*env)->GetDirectBufferAddress(env, errcode_ret)) + errcode_ret_byte_offset); } _res = clEnqueueMapImage((cl_command_queue) (intptr_t) command_queue, (cl_mem) (intptr_t) image, (uint32_t) blocking_map, (uint64_t) map_flags, (size_t *) _origin_ptr, (size_t *) _range_ptr, (size_t *) _image_row_pitch_ptr, (size_t *) _image_slice_pitch_ptr, (uint32_t) num_events_in_wait_list, (cl_event *) _event_wait_list_ptr, (cl_event *) _event_ptr, (int32_t *) _errcode_ret_ptr); if (_res == NULL) return NULL; // calculate buffer size status = clGetImageInfo((cl_mem) (intptr_t) image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), (void *) elements, NULL); status |= clGetImageInfo((cl_mem) (intptr_t) image, CL_IMAGE_DEPTH, sizeof(size_t), (void *) depth, NULL); if(status != CL_SUCCESS) return NULL; if(*depth == 0) { // 2D pixels = (*_image_row_pitch_ptr) * _range_ptr[1] + _range_ptr[0]; }else{ // 3D pixels = (*_image_slice_pitch_ptr) * _range_ptr[2] + (*_image_row_pitch_ptr) * _range_ptr[1] + _range_ptr[0]; } return (*env)->NewDirectByteBuffer(env, _res, pixels * (*elements)); }
void * WINAPI wine_clEnqueueMapImage(cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, size_t * origin, size_t * region, size_t * image_row_pitch, size_t * image_slice_pitch, cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { void * ret; TRACE("\n"); ret = clEnqueueMapImage(command_queue, image, blocking_map, map_flags, origin, region, image_row_pitch, image_slice_pitch, num_events_in_wait_list, event_wait_list, event, errcode_ret); return ret; }
/** * \brief ocl::Image::map Maps the Image into the host memory. * * No data transfer is performed. Note that in order to map data of the Image the active queue must be a cpu and must have been allocated * with the Image access mode AllocHost. You cannot modify the Image with OpenCL until unmap. * \param ptr is returned and contains the address of a pointer to the host memory. * \param origin is the 3D offset in bytes from which the image is read. * \param region is the 3D region in bytes to be mapped. * \param access specifies in what way the host_mem is used. * \param list contains all events for which this command has to wait. * \return event which can be integrated into other EventList */ ocl::Event ocl::Image::mapAsync(void **ptr, size_t *origin, const size_t *region, Memory::Access access, const EventList &list) const { TRUE_ASSERT(this->activeQueue().device().isCpu(), "Device " << this->activeQueue().device().name() << " is not a cpu!"); cl_int status; cl_event event_id; cl_map_flags flags = access; *ptr = clEnqueueMapImage(this->activeQueue().id(), this->id(), CL_TRUE, flags, origin, region, 0, 0, list.size(), list.events().data(), &event_id, &status); OPENCL_SAFE_CALL (status ) ; TRUE_ASSERT(ptr != NULL, "Could not map image!"); return ocl::Event(event_id, this->context()); }
/** * \brief ocl::Image::map Maps the Image into the host memory. * * No data transfer is performed. Note that in order to map data of the Image the active queue must be a cpu and must have been allocated * with the Image access mode AllocHost. You cannot modify the Image with OpenCL until unmap. * \param origin is the 3D offset in bytes from which the image is read. * \param region is the 3D region in bytes to be mapped. * \param access specifies in what way the host_mem is used. * \return a void pointer to the mapped host memory location. */ void * ocl::Image::map(size_t *origin, const size_t *region, Memory::Access access) const { TRUE_ASSERT(this->activeQueue().device().isCpu(), "Device " << this->activeQueue().device().name() << " is not a cpu!"); cl_int status; cl_map_flags flags = access; void *pointer = clEnqueueMapImage(this->activeQueue().id(), this->id(), CL_TRUE, flags, origin, region, 0, 0, 0, NULL, NULL, &status); OPENCL_SAFE_CALL (status ) ; TRUE_ASSERT(pointer != NULL, "Could not map image!"); OPENCL_SAFE_CALL( clFinish(this->activeQueue().id()) ); return pointer; }
void run() { cl_int retval; void *destination_data = NULL; const int work_dimensions = 2; size_t image_dimensions[3] = { bitmapwidth, bitmapheight, 0 }; size_t image_origin[3] = { 0, 0, 0 }; size_t image_pitch = 0; // set kernel arguments. these match the kernel function declaration clSetKernelArg(kernel, 0, sizeof(cl_mem), &srcimage); clSetKernelArg(kernel, 1, sizeof(cl_mem), &destimage); // execute the kernel function printf("Applying function '%s' to image\n", kernel_fn_name); // 1st param: command queue // 2nd param: kernel to execute // 3rd param: amount of work dimensions to use (1-3) // 4th param: global work offset, not used, set to NULL // 5th param: global work size, work dimension range // 6th param: local work size, or NULL if OpenCL divides global work size to local // 7th param: num events in wait list // 8th param: event wait list // 9th param: event clEnqueueNDRangeKernel( command_queue, kernel, work_dimensions, NULL, image_dimensions, NULL, 0, NULL, NULL); // add image data mapping to command queue // image data must be mapped to access it // 1st param: command queue // 2nd param: image to map // 3rd param: run blocking or in non-blocking mode // 4th param: image origin to map from // 5th param: image dimensions to map // 6th param: mapped horline size // 7th param: mapped 3D image slice size // 8th param: num events in wait list // 9th param: event wait list // 10th param: event // 11th param: error code on return, or NULL if not used destination_data = clEnqueueMapImage( command_queue, destimage, CL_FALSE, CL_MAP_READ, image_origin, image_dimensions, &image_pitch, NULL, 0, NULL, NULL, &retval); // run queue until all items are finished clFinish(command_queue); // write destination image data to bitmap file const char *output_name = "output.bmp"; writeBitmapFile(output_name, destination_data, bitmapwidth, bitmapheight); printf("Wrote image '%s'\n", output_name); // unmap destination image // 1st param: command queue // 2nd param: image to unmap // 3rd param: pointer received from mapping function // 4th param: num events in wait list // 5th param: event wait list // 6th param: event clEnqueueUnmapMemObject( command_queue, destimage, destination_data, 0, NULL, NULL); // run queue until all items are finished clFinish(command_queue); printf("All done.\n"); }
int main(void) { cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES]; cl_uint ndevices; cl_uint i, j; size_t el, row, col; CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms)); for (i = 0; i < nplatforms; i++) { CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices)); /* Only test the devices we actually have room for */ if (ndevices > MAX_DEVICES) ndevices = MAX_DEVICES; for (j = 0; j < ndevices; j++) { /* skip devices that do not support images */ cl_bool has_img; CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL)); if (!has_img) continue; cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err); CHECK_OPENCL_ERROR_IN("clCreateCommandQueue"); cl_ulong alloc; size_t max_height; size_t max_width; #define MAXALLOC (1024U*1024U) CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc), &alloc, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_width), &max_width, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_height), &max_height, NULL)); while (alloc > MAXALLOC) alloc /= 2; // fit at least one max_width inside the alloc (shrink max_width for this) while (max_width*pixel_size > alloc) max_width /= 2; // round number of elements to next multiple of max_width elements const size_t nels = (alloc/pixel_size/max_width)*max_width; const size_t buf_size = nels*pixel_size; cl_image_desc img_desc; memset(&img_desc, 0, sizeof(img_desc)); img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = max_width; img_desc.image_height = nels/max_width; img_desc.image_depth = 1; cl_ushort null_pixel[4] = {0, 0, 0, 0}; cl_ushort *host_buf = malloc(buf_size); TEST_ASSERT(host_buf); for (el = 0; el < nels; el+=4) { host_buf[el] = el & CHANNEL_MAX; host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX; host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX; host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX; } cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateImage"); CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL)); const size_t offset = 0; const size_t origin[] = {0, 0, 0}; const size_t region[] = {img_desc.image_width, img_desc.image_height, 1}; CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL)); size_t row_pitch, slice_pitch; cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region, &row_pitch, &slice_pitch, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapImage"); CHECK_CL_ERROR(clFinish(queue)); for (row = 0; row < img_desc.image_height; ++row) { for (col = 0; col < img_desc.image_width; ++col) { cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4; cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4; if (memcmp(img_pixel, buf_pixel, pixel_size) != 0) printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n", row, col, (size_t)(buf_pixel - host_buf), buf_pixel[0], buf_pixel[1], buf_pixel[2], buf_pixel[3], img_pixel[0], img_pixel[1], img_pixel[2], img_pixel[3]); TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0); } } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL)); /* Clear the buffer, and ensure it has been cleared */ CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL)); cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer"); CHECK_CL_ERROR(clFinish(queue)); for (el = 0; el < nels; ++el) { #if 0 // debug if (buf_map[el] != 0) { printf("%zu/%zu => %u\n", el, nels, buf_map[el]); } #endif TEST_ASSERT(buf_map[el] == 0); } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL)); /* Copy data from image to buffer, and check that it's again equal to the original buffer */ CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL)); buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_CL_ERROR(clFinish(queue)); TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0); CHECK_CL_ERROR ( clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL)); CHECK_CL_ERROR (clFinish (queue)); free(host_buf); CHECK_CL_ERROR (clReleaseMemObject (img)); CHECK_CL_ERROR (clReleaseMemObject (buf)); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseContext (context)); } } return EXIT_SUCCESS; }
bool CL_Image3D::Map(const CL_CommandQueue* pCommandQueue, CL_MapAccess AccessType, size_t uOriginX, size_t uOriginY, size_t uOriginZ, size_t uWidth, size_t uHeight, size_t uDepth, void** ppMappedOutput, bool bIsBlocking, CL_Event* pNewEvent, const CL_EventPool* pWaitList) { CL_CPP_CONDITIONAL_RETURN_FALSE(!m_Image); CL_CPP_CONDITIONAL_RETURN_FALSE(!pCommandQueue); CL_CPP_CONDITIONAL_RETURN_FALSE(!ppMappedOutput); cl_map_flags uFlagParam = 0; // Determine the access type. switch(AccessType) { case CL_MapAccess_Read: uFlagParam = CL_MAP_READ; break; case CL_MapAccess_Write: uFlagParam = CL_MAP_WRITE; break; default: return false; } const size_t uOrigin[3] = { uOriginX, uOriginY, uOriginZ }; const size_t uRegion[3] = { uWidth, uHeight, uDepth }; cl_uint uNumWaitEvents = pWaitList ? pWaitList->GetNumEvents() : 0; const cl_event* pWaitEvents = pWaitList ? pWaitList->GetEventPool() : NULL; cl_event NewEvent = NULL; // These must be specified in a call to clEnqueueMapImage(), otherwise the operation will fail. // We don't need to do anything with them since we already know the values of both. size_t uOutputRowPitch = 0; size_t uOutputSlicePitch = 0; const cl_command_queue CommandQueue = pCommandQueue->GetCommandQueue(); // Map a location in host memory onto the buffer object. cl_int iErrorCode = CL_SUCCESS; void* pMap = clEnqueueMapImage(CommandQueue, m_Image, (bIsBlocking) ? CL_TRUE : CL_FALSE, uFlagParam, uOrigin, uRegion, &uOutputRowPitch, &uOutputSlicePitch, uNumWaitEvents, pWaitEvents, &NewEvent, &iErrorCode); CL_CPP_CATCH_ERROR(iErrorCode); CL_CPP_CONDITIONAL_RETURN_FALSE(!pMap); (*ppMappedOutput) = pMap; if(NewEvent) { if(pNewEvent) pNewEvent->SetEvent(NewEvent); clReleaseEvent(NewEvent); } return true; }
int main(int argc, char** argv) { GLFWwindow* glfwwindow; {//OpenGL/GLFW Init if (!glfwInit()) { std::cerr << "Error: GLFW init failed" << std::endl; exit(EXIT_FAILURE); } glfwwindow = glfwCreateWindow(200, 200, "Nvidia interop bug demo", nullptr, nullptr); glfwMakeContextCurrent(glfwwindow); glewInit(); std::cout << "OpenGL Info: " << (char*)glGetString(GL_VENDOR) << " " << (char*)glGetString(GL_RENDERER) << std::endl; } cl_context clcontext; cl_command_queue clqueue; {//OpenCL init cl_platform_id platform = nullptr; {//Platform init cl_uint numPlatforms; clGetPlatformIDs(0, nullptr, &numPlatforms); if (numPlatforms == 0) { std::cerr << "Error: No OpenCL platforms available" << std::endl; return EXIT_FAILURE; } cl_platform_id* all_platforms = new cl_platform_id[numPlatforms]; clGetPlatformIDs(numPlatforms, all_platforms, nullptr); for (size_t i = 0; i < numPlatforms; i++) //Select Nvidia out of the platforms { char name[300]; clGetPlatformInfo(all_platforms[i], CL_PLATFORM_NAME, sizeof(name), &name, nullptr); std::string namestring(name); if (namestring.find("NVIDIA") != std::string::npos || namestring.find("Nvidia") != std::string::npos) platform = all_platforms[i]; } if (platform == nullptr) { std::cerr << "No Nvidia OpenCL platform found, will default to platform 0 "; } delete[] all_platforms; } { //Create shared context cl_context_properties properties[7]; properties[0] = CL_CONTEXT_PLATFORM; //This is different for other operating systems than Windows properties[1] = (cl_context_properties)platform; properties[2] = CL_GL_CONTEXT_KHR; properties[3] = (cl_context_properties)wglGetCurrentContext(); properties[4] = CL_WGL_HDC_KHR; properties[5] = (cl_context_properties)wglGetCurrentDC(); properties[6] = 0; clcontext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, nullptr); } cl_device_id cldevice; { //Create cldevice cl_device_id* devices; cl_command_queue commandQueue = nullptr; size_t numDevices = 0; // First get the size of the devices buffer clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, 0, nullptr, &numDevices); if (numDevices == 0) { std::cerr << "Error: No OpenCL devices available" << std::endl; return EXIT_FAILURE; } devices = new cl_device_id[numDevices]; clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, numDevices, devices, nullptr); cldevice = devices[0]; delete[] devices; } { //Create CL command queue clqueue = clCreateCommandQueue(clcontext, cldevice, 0, nullptr); } char platformname[300]; char devicename[300]; clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platformname), &platformname, nullptr); clGetDeviceInfo(cldevice, CL_DEVICE_NAME, sizeof(devicename), &devicename, nullptr); std::cout << "OpenCL platform " << platformname << " device " << devicename << std::endl; } size_t size = 200 * 200 * 4; //w=200, h=200, 4 bytes per channel char* databuffer = new char[size]; GLuint glbuffer, gltexture; cl_mem unsharedbuffer, sharedbuffer, unsharedtexture, sharedtexture; { //Init test data glGenBuffers(1, &glbuffer); glBindBuffer(GL_ARRAY_BUFFER, glbuffer); glBufferData(GL_ARRAY_BUFFER, size, databuffer, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, GL_NONE); glGenTextures(1, &gltexture); glBindTexture(GL_TEXTURE_2D, gltexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 200, 200, 0, GL_RGBA, GL_UNSIGNED_BYTE, databuffer); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); //Intel needs this for shared textures glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); //Intel needs this for shared textures glBindTexture(GL_TEXTURE_2D, GL_NONE); sharedtexture = clCreateFromGLTexture(clcontext, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, gltexture, nullptr); sharedbuffer = clCreateFromGLBuffer(clcontext, CL_MEM_READ_WRITE, glbuffer, nullptr); unsharedbuffer = clCreateBuffer(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, databuffer, nullptr); cl_image_format imgformat; cl_image_desc desc; imgformat.image_channel_data_type = CL_UNSIGNED_INT8; imgformat.image_channel_order = CL_RGBA; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = 200; desc.image_height = 200; desc.image_depth = 1; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; desc.buffer = nullptr; unsharedtexture = clCreateImage(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imgformat, &desc, databuffer, nullptr); } { const size_t origin[3] = { 0, 0, 0 }; const size_t region[3] = { 200, 200, 1 }; size_t pitch; // //MAIN PART BEGINS HERE // { //OpenGL buffer std::cout << "Mapping buffer with OpenGL: "; glBindBuffer(GL_ARRAY_BUFFER, glbuffer); void* glmapptr = glMapBuffer(GL_ARRAY_BUFFER, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); glUnmapBuffer(GL_ARRAY_BUFFER); glBindBuffer(GL_ARRAY_BUFFER, GL_NONE); std::cout << "OK" << std::endl; glFinish(); } { //OpenCL unshared texture std::cout << "Mapping unshared texture with OpenCL: "; void* unsimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared objects clEnqueueUnmapMemObject(clqueue, unsharedtexture, unsimgptr, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } { //OpenCL shared texture std::cout << "Mapping shared texture with OpenCL: "; clEnqueueAcquireGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr); void* shdimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine shared objects clEnqueueUnmapMemObject(clqueue, unsharedtexture, shdimgptr, 0, nullptr, nullptr); clEnqueueReleaseGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } { //OpenCL unshared buffer std::cout << "Mapping unshared buffer with OpenCL: "; void* unsbufptr = clEnqueueMapBuffer(clqueue, unsharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared buffers clEnqueueUnmapMemObject(clqueue, unsharedbuffer, unsbufptr, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } { //OpenCL shared buffer std::cout << "Mapping shared buffer with OpenCL (EXPECTING CRASH ON NVIDIA SYSTEMS): " << std::endl; clEnqueueAcquireGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr); // //CRITICAL PART BEGINS HERE // void* shdbufptr = clEnqueueMapBuffer(clqueue, sharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr); //On Nvidia systems when using shared objects, error 0xC0000005 occurs in ntdll.dll: write access violation at position 0xSOMETHING //This leaves my application in an unusable state //But it works fine everywhere else (tested on ARM, AMD, Intel systems) // //CRITICAL PART ENDS HERE // std::cout << "did not fail" << std::endl; clEnqueueUnmapMemObject(clqueue, sharedbuffer, shdbufptr, 0, nullptr, nullptr); clEnqueueReleaseGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } // //MAIN PART ENDS HERE // } clFinish(clqueue); delete[] databuffer; clReleaseMemObject(sharedbuffer); clReleaseMemObject(unsharedbuffer); clReleaseMemObject(sharedtexture); clReleaseMemObject(unsharedtexture); clReleaseCommandQueue(clqueue); clReleaseContext(clcontext); glDeleteTextures(1, &gltexture); glDeleteBuffers(1, &glbuffer); glfwDestroyWindow(glfwwindow); glfwTerminate(); return EXIT_SUCCESS; }
END_TEST START_TEST (test_copy_image_buffer) { cl_platform_id platform = 0; cl_device_id device; cl_context ctx; cl_command_queue queue; cl_mem image, buffer; cl_int result; cl_event event; unsigned char image_buffer[3*3*4] = { 255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 255, 0, 128, 0, 0, 0, 0, 128, 0, 0, 0, 0, 128, 0, 64, 0, 0, 0, 0, 64, 0, 0, 0, 0, 64, 0 }; // Square that will be put in image_buffer at (1, 0) unsigned char buffer_buffer[2*2*4+1] = { 33, // Oh, a padding ! 255, 255, 255, 0, 255, 0, 255, 0, 0, 255, 255, 0, 255, 255, 0, 0 }; // What we must get once re-reading 2x2 rect at (1, 1) unsigned char correct_data[2*2*4] = { 0, 255, 255, 0, 255, 255, 0, 0, 0, 64, 0, 0, 0, 0, 64, 0 }; cl_image_format fmt; fmt.image_channel_data_type = CL_UNORM_INT8; fmt.image_channel_order = CL_RGBA; size_t origin[3] = {1, 0, 0}; size_t region[3] = {2, 2, 1}; result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0); fail_if( result != CL_SUCCESS, "unable to get the default device" ); ctx = clCreateContext(0, 1, &device, 0, 0, &result); fail_if( result != CL_SUCCESS || ctx == 0, "unable to create a valid context" ); queue = clCreateCommandQueue(ctx, device, 0, &result); fail_if( result != CL_SUCCESS || queue == 0, "cannot create a command queue" ); image = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &fmt, 3, 3, 0, image_buffer, &result); fail_if( result != CL_SUCCESS, "unable to create a 3x3 bgra image" ); buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(buffer_buffer), buffer_buffer, &result); fail_if( result != CL_SUCCESS, "unable to create a buffer object" ); // Write buffer in image result = clEnqueueCopyBufferToImage(queue, buffer, image, 1, origin, region, 0, 0, &event); fail_if( result != CL_SUCCESS, "unable to queue a copy buffer to image event, buffer offset 1, image 2x2 @ (1, 0)" ); result = clWaitForEvents(1, &event); fail_if( result != CL_SUCCESS, "cannot wait for event" ); clReleaseEvent(event); // Read it back into buffer, again with an offset origin[1] = 1; result = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 1, 0, 0, &event); fail_if( result != CL_SUCCESS, "unable to queue a copy image to buffer event, buffer offset 1, image 2x2 @ (1, 1)" ); result = clWaitForEvents(1, &event); fail_if( result != CL_SUCCESS, "cannot wait for event" ); fail_if( std::memcmp(buffer_buffer + 1, correct_data, sizeof(correct_data)) != 0, "copying data around isn't working the expected way" ); // Map the image and check pointers unsigned char *mapped; size_t row_pitch; origin[0] = 0; origin[1] = 0; origin[2] = 0; mapped = (unsigned char *)clEnqueueMapImage(queue, image, 1, CL_MAP_READ, origin, region, &row_pitch, 0, 0, 0, 0, &result); fail_if( result != CL_SUCCESS, "unable to map an image" ); fail_if( mapped != image_buffer, "mapped aread doesn't match host ptr" ); clReleaseEvent(event); clReleaseMemObject(image); clReleaseMemObject(buffer); clReleaseCommandQueue(queue); clReleaseContext(ctx); }
void timedImageMappedWrite( cl_command_queue queue, cl_mem image, unsigned char v ) { CPerfCounter t1, t2, t3; cl_int ret; cl_event ev; void *ptr; cl_map_flags mapFlag = CL_MAP_READ | CL_MAP_WRITE; t1.Reset(); t2.Reset(); t3.Reset(); if( !mapRW ) mapFlag = CL_MAP_WRITE; size_t rowPitch; t1.Start(); ptr = clEnqueueMapImage( queue, image, CL_FALSE, mapFlag, imageOrigin, imageRegion, &rowPitch, NULL, 0, NULL, &ev, &ret ); ASSERT_CL_RETURN( ret ); clFlush( queue ); spinForEventsComplete( 1, &ev ); t1.Stop(); t2.Start(); memset2DPitch( ptr, v, imageRegion[0] * nChannels * nBytesPerChannel, imageRegion[1], rowPitch ); t2.Stop(); t3.Start(); ret = clEnqueueUnmapMemObject( queue, image, (void *) ptr, 0, NULL, &ev ); ASSERT_CL_RETURN( ret ); clFlush( queue ); spinForEventsComplete( 1, &ev ); t3.Stop(); const char *msg; if( mapRW ) msg = "clEnqueueMapImage(READ|WRITE):"; else msg = "clEnqueueMapImage(WRITE):"; tlog->Timer( "%32s %lf s [ %8.2lf GB/s ]\n", msg, t1.GetElapsedTime(), nBytesRegion, 1 ); tlog->Timer( "%32s %lf s %8.2lf GB/s\n", "memset():", t2.GetElapsedTime(), nBytesRegion, 1 ); tlog->Timer( "%32s %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t3.GetElapsedTime(), nBytesRegion, 1 ); }
void timedImageMappedRead( cl_command_queue queue, cl_mem image, unsigned char v ) { CPerfCounter t1, t2, t3; cl_int ret; cl_event ev; void *ptr; cl_map_flags mapFlag = CL_MAP_READ | CL_MAP_WRITE; t1.Reset(); t2.Reset(); t3.Reset(); if( !mapRW ) mapFlag = CL_MAP_READ; size_t rowPitch; t1.Start(); ptr = clEnqueueMapImage( queue, image, CL_FALSE, mapFlag, imageOrigin, imageRegion, &rowPitch, NULL, 0, NULL, &ev, &ret ); ASSERT_CL_RETURN( ret ); clFlush( queue ); spinForEventsComplete( 1, &ev ); t1.Stop(); t2.Start(); bool verify = readmem2DPitch( ptr, v, rowPitch , (int) imageRegion[1] ); t2.Stop(); t3.Start(); ret = clEnqueueUnmapMemObject( queue, image, (void *) ptr, 0, NULL, &ev ); ASSERT_CL_RETURN( ret ); clFlush( queue ); spinForEventsComplete( 1, &ev ); t3.Stop(); const char *msg; if( mapRW ) msg = "clEnqueueMapImage(READ|WRITE):"; else msg = "clEnqueueMapImage(READ):"; tlog->Timer( "%32s %lf s [ %8.2lf GB/s ]\n", msg, t1.GetElapsedTime(), nBytesRegion, 1 ); tlog->Timer( "%32s %lf s %8.2lf GB/s\n", "CPU read:", t2.GetElapsedTime(), nBytesRegion, 1 ); if( verify ) tlog->Msg( "%32s\n", "verification ok" ); else { tlog->Error( "%32s\n", "verification FAILED" ); vFailure = true; } tlog->Timer( "%32s %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t3.GetElapsedTime(), nBytesRegion, 1 ); }
/** * \related cl_Mem_Object_t * * This function map OpenCL Image into Host-accessible memory & returns pointer * to mapped memory region * @param[in,out] self pointer to structure, in which 'Map' function pointer * is defined to point on this function. * @param[in] blocking_map flag of type 'cl_bool' that denotes, should operation * be blocking or not. * @param [in] map_flags mapping flags, that denotes how memory object should be * mapped * @param[in] time_mode enumeration, that denotes how time measurement should be * performed * @param[out] evt_to_generate pointer to OpenCL event that will be generated * at the end of operation. * * @return pointer to Host-accessible region of memory in case of success, NULL * pointer otherwise. In that case function sets error value, which is available * through cl_Error_t structure, defined by pointer 'self->error' * * @see cl_err_codes.h for detailed error description. * @see 'cl_Error_t' structure for error handling. */ static void* Image_Map( scow_Mem_Object *self, cl_bool blocking_map, cl_map_flags map_flags, TIME_STUDY_MODE time_mode, cl_event *evt_to_generate, cl_command_queue explicit_queue) { cl_int ret; cl_event mapping_ready, *p_mapping_ready; const size_t origin[3] = { 0, 0, 0 }, region[3] = { self->width, self->height, 1 }; OCL_CHECK_EXISTENCE(self, NULL); if (blocking_map > CL_TRUE) { self->error->Set_Last_Code(self->error, INVALID_BLOCKING_FLAG); return NULL; } (evt_to_generate != NULL) ? (p_mapping_ready = evt_to_generate) : (p_mapping_ready = &mapping_ready); // We can't map the object, that is already mapped if (self->mapped_to_region != NULL) { self->error->Set_Last_Code(self->error, BUFFER_IN_USE); return VOID_MEM_OBJ_PTR; } cl_command_queue q = (explicit_queue == NULL) ? (self->parent_thread->q_data_dtoh) : (explicit_queue); /* Save mapped pointer inside a structure in case if memory object is being * destroyed without unmapping it at first. */ self->mapped_to_region = clEnqueueMapImage(q, self->cl_mem_object, blocking_map, map_flags, origin, region, &self->row_pitch, NULL, 0, NULL, p_mapping_ready, &ret); OCL_DIE_ON_ERROR(ret, CL_SUCCESS, self->error->Set_Last_Code(self->error, ret), NULL); switch (time_mode) { case MEASURE: self->timer->current_time_device = Gather_Time_uS(p_mapping_ready); self->timer->total_time_device += self->timer->current_time_device; break; case DONT_MEASURE: break; default: break; } if (p_mapping_ready != evt_to_generate){ clReleaseEvent(*p_mapping_ready); } return self->mapped_to_region; }
int ImageOverlap::runCLKernels() { cl_int status; //wait for fill end status=clEnqueueMarkerWithWaitList(commandQueue[2],2,eventlist,&enqueueEvent); CHECK_OPENCL_ERROR(status,"clEnqueueMarkerWithWaitList failed.(commandQueue[2])"); // Set appropriate arguments to the kernelOverLap // map buffer image status = clSetKernelArg( kernelOverLap, 0, sizeof(cl_mem), &mapImage); CHECK_OPENCL_ERROR(status,"clSetKernelArg failed. (mapImage)"); // fill Buffer image status = clSetKernelArg( kernelOverLap, 1, sizeof(cl_mem), &fillImage); CHECK_OPENCL_ERROR(status,"clSetKernelArg failed. (fillImage)"); // fill Buffer image status = clSetKernelArg( kernelOverLap, 2, sizeof(cl_mem), &outputImage); CHECK_OPENCL_ERROR(status,"clSetKernelArg failed. (outputImage)"); // Enqueue a kernel run call. size_t globalThreads[] = {width, height}; size_t localThreads[] = {blockSizeX, blockSizeY}; status = clEnqueueNDRangeKernel( commandQueue[2], kernelOverLap, 2, NULL, globalThreads, localThreads, 1, &enqueueEvent, NULL); CHECK_OPENCL_ERROR(status,"clEnqueueNDRangeKernel failed."); // Enqueue Read Image size_t origin[] = {0, 0, 0}; size_t region[] = {width, height, 1}; size_t rowPitch; size_t slicePitch; // Read copy outputImageData = (cl_uchar4*)clEnqueueMapImage( commandQueue[2], outputImage, CL_FALSE, mapFlag, origin, region, &rowPitch, &slicePitch, 0, NULL, NULL, &status ); CHECK_OPENCL_ERROR(status,"clEnqueueMapImage failed.(commandQueue[2])"); clFlush(commandQueue[0]); clFlush(commandQueue[1]); status = clEnqueueUnmapMemObject(commandQueue[2],outputImage,(void*)outputImageData,NULL,0,NULL); CHECK_OPENCL_ERROR(status,"clEnqueueUnmapMemObject failed.(outputImage)"); // Wait for the read buffer to finish execution status = clFinish(commandQueue[2]); CHECK_OPENCL_ERROR(status,"clFinish failed.(commandQueue[2])"); return SDK_SUCCESS; }