예제 #1
0
/*   Java->C glue code:
 *   Java package: com.jogamp.opencl.impl.CLImpl
 *    Java method: java.nio.ByteBuffer clEnqueueMapImage(long command_queue, long image, int blocking_map, long map_flags, com.jogamp.gluegen.runtime.PointerBuffer origin, com.jogamp.gluegen.runtime.PointerBuffer range, com.jogamp.gluegen.runtime.PointerBuffer image_row_pitch, com.jogamp.gluegen.runtime.PointerBuffer image_slice_pitch, int num_events_in_wait_list, com.jogamp.gluegen.runtime.PointerBuffer event_wait_list, com.jogamp.gluegen.runtime.PointerBuffer event, java.nio.IntBuffer errcode_ret)
 *     C function: void *  clEnqueueMapImage(cl_command_queue command_queue, cl_mem image, uint32_t blocking_map, uint64_t map_flags, const size_t * , const size_t * , size_t *  image_row_pitch, size_t *  image_slice_pitch, uint32_t num_events_in_wait_list, cl_event *  event_wait_list, cl_event *  event, int32_t *  errcode_ret);
 */
JNIEXPORT jobject JNICALL
Java_com_jogamp_opencl_impl_CLImpl_clEnqueueMapImage0__JJIJLjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2IILjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2I(JNIEnv *env, jobject _unused, jlong command_queue, jlong image, jint blocking_map, jlong map_flags, jobject origin, jint origin_byte_offset, jobject range, jint range_byte_offset, jobject image_row_pitch, jint image_row_pitch_byte_offset, jobject image_slice_pitch, jint image_slice_pitch_byte_offset, jint num_events_in_wait_list, jobject event_wait_list, jint event_wait_list_byte_offset, jobject event, jint event_byte_offset, jobject errcode_ret, jint errcode_ret_byte_offset) {

  size_t * _origin_ptr = NULL;
  size_t * _range_ptr = NULL;
  size_t * _image_row_pitch_ptr = NULL;
  size_t * _image_slice_pitch_ptr = NULL;
  cl_event * _event_wait_list_ptr = NULL;
  cl_event * _event_ptr = NULL;
  int32_t * _errcode_ret_ptr = NULL;
  size_t * elements = NULL;
  size_t * depth = NULL;
  size_t pixels;
  cl_int status;

  void *  _res;

    if (origin != NULL) {
        _origin_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, origin)) + origin_byte_offset);
    }
    if (range != NULL) {
        _range_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, range)) + range_byte_offset);
    }
    if (image_row_pitch != NULL) {
        _image_row_pitch_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, image_row_pitch)) + image_row_pitch_byte_offset);
    }
    if (image_slice_pitch != NULL) {
        _image_slice_pitch_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, image_slice_pitch)) + image_slice_pitch_byte_offset);
    }
    if (event_wait_list != NULL) {
        _event_wait_list_ptr = (cl_event *) (((char*) (*env)->GetDirectBufferAddress(env, event_wait_list)) + event_wait_list_byte_offset);
    }
    if (event != NULL) {
        _event_ptr = (cl_event *) (((char*) (*env)->GetDirectBufferAddress(env, event)) + event_byte_offset);
    }
    if (errcode_ret != NULL) {
        _errcode_ret_ptr = (int32_t *) (((char*) (*env)->GetDirectBufferAddress(env, errcode_ret)) + errcode_ret_byte_offset);
    }

  _res = clEnqueueMapImage((cl_command_queue) (intptr_t) command_queue, (cl_mem) (intptr_t) image, (uint32_t) blocking_map, (uint64_t) map_flags, (size_t *) _origin_ptr, (size_t *) _range_ptr, (size_t *) _image_row_pitch_ptr, (size_t *) _image_slice_pitch_ptr, (uint32_t) num_events_in_wait_list, (cl_event *) _event_wait_list_ptr, (cl_event *) _event_ptr, (int32_t *) _errcode_ret_ptr);
  if (_res == NULL) return NULL;

  // calculate buffer size
  status  = clGetImageInfo((cl_mem) (intptr_t) image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), (void *) elements, NULL);
  status |= clGetImageInfo((cl_mem) (intptr_t) image, CL_IMAGE_DEPTH, sizeof(size_t), (void *) depth, NULL);

  if(status != CL_SUCCESS) return NULL;

  if(*depth == 0) { // 2D
      pixels = (*_image_row_pitch_ptr)   * _range_ptr[1] + _range_ptr[0];
  }else{            // 3D
      pixels = (*_image_slice_pitch_ptr) * _range_ptr[2]
             + (*_image_row_pitch_ptr)   * _range_ptr[1] + _range_ptr[0];
  }

  return (*env)->NewDirectByteBuffer(env, _res, pixels * (*elements));
}
예제 #2
0
파일: opencl.c 프로젝트: YongHaoWu/wine-hub
void * WINAPI wine_clEnqueueMapImage(cl_command_queue command_queue, cl_mem image, cl_bool blocking_map,
                                     cl_map_flags map_flags, size_t * origin, size_t * region,
                                     size_t * image_row_pitch, size_t * image_slice_pitch,
                                     cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret)
{
    void * ret;
    TRACE("\n");
    ret = clEnqueueMapImage(command_queue, image, blocking_map, map_flags, origin, region, image_row_pitch, image_slice_pitch, num_events_in_wait_list, event_wait_list, event, errcode_ret);
    return ret;
}
예제 #3
0
/**
 * \brief ocl::Image::map Maps the Image into the host memory.
 *
 *  No data transfer is performed. Note that in order to map data of the Image the active queue must be a cpu and must have been allocated
 *  with the Image access mode AllocHost. You cannot modify the Image with OpenCL until unmap.
 * \param ptr is returned and contains the address of a pointer to the host memory.
 * \param origin is the 3D offset in bytes from which the image is read.
 * \param region is the 3D region in bytes to be mapped.
 * \param access specifies in what way the host_mem is used.
 * \param list contains all events for which this command has to wait.
 * \return event which can be integrated into other EventList
 */
ocl::Event ocl::Image::mapAsync(void **ptr, size_t *origin, const size_t *region, Memory::Access access, const EventList &list) const
{
    TRUE_ASSERT(this->activeQueue().device().isCpu(), "Device " << this->activeQueue().device().name() << " is not a cpu!");
    cl_int status;
    cl_event event_id;
    cl_map_flags flags = access;
    *ptr = clEnqueueMapImage(this->activeQueue().id(), this->id(), CL_TRUE, flags,
                                      origin, region, 0, 0, list.size(), list.events().data(), &event_id, &status);
    OPENCL_SAFE_CALL (status ) ;
    TRUE_ASSERT(ptr != NULL, "Could not map image!");
    return ocl::Event(event_id, this->context());
}
예제 #4
0
/**
 * \brief ocl::Image::map Maps the Image into the host memory.
 *
 *  No data transfer is performed. Note that in order to map data of the Image the active queue must be a cpu and must have been allocated
 *  with the Image access mode AllocHost. You cannot modify the Image with OpenCL until unmap.
 * \param origin is the 3D offset in bytes from which the image is read.
 * \param region is the 3D region in bytes to be mapped.
 * \param access specifies in what way the host_mem is used.
 * \return a void pointer to the mapped host memory location.
 */
void * ocl::Image::map(size_t *origin, const size_t *region, Memory::Access access) const
{
    TRUE_ASSERT(this->activeQueue().device().isCpu(), "Device " << this->activeQueue().device().name() << " is not a cpu!");
    cl_int status;
    cl_map_flags flags = access;
    void *pointer = clEnqueueMapImage(this->activeQueue().id(), this->id(), CL_TRUE, flags,
                                      origin, region, 0, 0, 0, NULL, NULL, &status);
    OPENCL_SAFE_CALL (status ) ;
    TRUE_ASSERT(pointer != NULL, "Could not map image!");
    OPENCL_SAFE_CALL( clFinish(this->activeQueue().id()) );
    return pointer;
}
예제 #5
0
파일: main.c 프로젝트: syakesaba/Myn900
void run()
{
	cl_int retval;
	void *destination_data = NULL;
	const int work_dimensions = 2;
	size_t image_dimensions[3] = { bitmapwidth, bitmapheight, 0 };
	size_t image_origin[3] = { 0, 0, 0 };
	size_t image_pitch = 0;


	// set kernel arguments. these match the kernel function declaration
	clSetKernelArg(kernel, 0, sizeof(cl_mem), &srcimage);
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &destimage);


	// execute the kernel function
	printf("Applying function '%s' to image\n", kernel_fn_name);

	// 1st param: command queue
	// 2nd param: kernel to execute
	// 3rd param: amount of work dimensions to use (1-3)
	// 4th param: global work offset, not used, set to NULL
	// 5th param: global work size, work dimension range
	// 6th param: local work size, or NULL if OpenCL divides global work size to local
	// 7th param: num events in wait list
	// 8th param: event wait list
	// 9th param: event
	clEnqueueNDRangeKernel(	command_queue,
							kernel,
							work_dimensions,
							NULL,
							image_dimensions,
							NULL,
							0,
							NULL,
							NULL);


	// add image data mapping to command queue
	// image data must be mapped to access it

	// 1st param: command queue
	// 2nd param: image to map
	// 3rd param: run blocking or in non-blocking mode
	// 4th param: image origin to map from
	// 5th param: image dimensions to map
	// 6th param: mapped horline size
	// 7th param: mapped 3D image slice size
	// 8th param: num events in wait list
	// 9th param: event wait list
	// 10th param: event
	// 11th param: error code on return, or NULL if not used
	destination_data = clEnqueueMapImage(	command_queue,
											destimage,
											CL_FALSE,
											CL_MAP_READ,
											image_origin,
											image_dimensions,
											&image_pitch,
											NULL,
											0,
											NULL,
											NULL,
											&retval);


	// run queue until all items are finished
	clFinish(command_queue);

	// write destination image data to bitmap file
	const char *output_name = "output.bmp";
	writeBitmapFile(output_name, destination_data, bitmapwidth, bitmapheight);
	printf("Wrote image '%s'\n", output_name);

	// unmap destination image

	// 1st param: command queue
	// 2nd param: image to unmap
	// 3rd param: pointer received from mapping function
	// 4th param: num events in wait list
	// 5th param: event wait list
	// 6th param: event
	clEnqueueUnmapMemObject(	command_queue,
								destimage,
								destination_data,
								0,
								NULL,
								NULL);

	// run queue until all items are finished
	clFinish(command_queue);

	printf("All done.\n");
}
예제 #6
0
int
main(void)
{
  cl_int err;
  cl_platform_id platforms[MAX_PLATFORMS];
  cl_uint nplatforms;
  cl_device_id devices[MAX_DEVICES];
  cl_uint ndevices;
  cl_uint i, j;
  size_t el, row, col;

  CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms));

  for (i = 0; i < nplatforms; i++)
  {
    CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES,
      devices, &ndevices));

    /* Only test the devices we actually have room for */
    if (ndevices > MAX_DEVICES)
      ndevices = MAX_DEVICES;

    for (j = 0; j < ndevices; j++)
    {
      /* skip devices that do not support images */
      cl_bool has_img;
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL));
      if (!has_img)
        continue;

      cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateContext");
      cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err);
      CHECK_OPENCL_ERROR_IN("clCreateCommandQueue");

      cl_ulong alloc;
      size_t max_height;
      size_t max_width;
#define MAXALLOC (1024U*1024U)

      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE,
          sizeof(alloc), &alloc, NULL));
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH,
          sizeof(max_width), &max_width, NULL));
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT,
          sizeof(max_height), &max_height, NULL));


      while (alloc > MAXALLOC)
        alloc /= 2;

      // fit at least one max_width inside the alloc (shrink max_width for this)
      while (max_width*pixel_size > alloc)
        max_width /= 2;

      // round number of elements to next multiple of max_width elements
      const size_t nels = (alloc/pixel_size/max_width)*max_width;
      const size_t buf_size = nels*pixel_size;

      cl_image_desc img_desc;
      memset(&img_desc, 0, sizeof(img_desc));
      img_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
      img_desc.image_width = max_width;
      img_desc.image_height = nels/max_width;
      img_desc.image_depth = 1;

      cl_ushort null_pixel[4] = {0, 0, 0, 0};
      cl_ushort *host_buf = malloc(buf_size);
      TEST_ASSERT(host_buf);

      for (el = 0; el < nels; el+=4) {
        host_buf[el] = el & CHANNEL_MAX;
        host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX;
        host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX;
        host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX;
      }

      cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateBuffer");
      cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateImage");

      CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL));

      const size_t offset = 0;
      const size_t origin[] = {0, 0, 0};
      const size_t region[] = {img_desc.image_width, img_desc.image_height, 1};

      CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL));

      size_t row_pitch, slice_pitch;
      cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region,
        &row_pitch, &slice_pitch, 0, NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clEnqueueMapImage");

      CHECK_CL_ERROR(clFinish(queue));

      for (row = 0; row < img_desc.image_height; ++row) {
        for (col = 0; col < img_desc.image_width; ++col) {
          cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4;
          cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4;

          if (memcmp(img_pixel, buf_pixel, pixel_size) != 0)
            printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n",
              row, col, (size_t)(buf_pixel - host_buf),
              buf_pixel[0],
              buf_pixel[1],
              buf_pixel[2],
              buf_pixel[3],
              img_pixel[0],
              img_pixel[1],
              img_pixel[2],
              img_pixel[3]);

          TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0);

        }
      }

      CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL));

      /* Clear the buffer, and ensure it has been cleared */
      CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL));
      cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer");

      CHECK_CL_ERROR(clFinish(queue));

      for (el = 0; el < nels; ++el) {
#if 0 // debug
        if (buf_map[el] != 0) {
          printf("%zu/%zu => %u\n", el, nels, buf_map[el]);
        }
#endif
        TEST_ASSERT(buf_map[el] == 0);
      }

      CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL));

      /* Copy data from image to buffer, and check that it's again equal to the original buffer */
      CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL));
      buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err);
      CHECK_CL_ERROR(clFinish(queue));

      TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0);

      CHECK_CL_ERROR (
          clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL));
      CHECK_CL_ERROR (clFinish (queue));

      free(host_buf);
      CHECK_CL_ERROR (clReleaseMemObject (img));
      CHECK_CL_ERROR (clReleaseMemObject (buf));
      CHECK_CL_ERROR (clReleaseCommandQueue (queue));
      CHECK_CL_ERROR (clReleaseContext (context));
    }
  }
  return EXIT_SUCCESS;
}
예제 #7
0
bool CL_Image3D::Map(const CL_CommandQueue* pCommandQueue, CL_MapAccess AccessType, size_t uOriginX, size_t uOriginY, size_t uOriginZ, size_t uWidth, size_t uHeight, size_t uDepth, void** ppMappedOutput, bool bIsBlocking, CL_Event* pNewEvent, const CL_EventPool* pWaitList)
{
	CL_CPP_CONDITIONAL_RETURN_FALSE(!m_Image);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!pCommandQueue);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!ppMappedOutput);

	cl_map_flags uFlagParam = 0;

	//	Determine the access type.
	switch(AccessType)
	{
		case CL_MapAccess_Read: uFlagParam = CL_MAP_READ; break;
		case CL_MapAccess_Write: uFlagParam = CL_MAP_WRITE; break;

		default:
			return false;
	}


	const size_t uOrigin[3] =
	{
		uOriginX,
		uOriginY,
		uOriginZ
	};

	const size_t uRegion[3] =
	{
		uWidth,
		uHeight,
		uDepth
	};

	cl_uint uNumWaitEvents = pWaitList ? pWaitList->GetNumEvents() : 0;
	const cl_event* pWaitEvents = pWaitList ? pWaitList->GetEventPool() : NULL;
	cl_event NewEvent = NULL;

	//	These must be specified in a call to clEnqueueMapImage(), otherwise the operation will fail.
	//	We don't need to do anything with them since we already know the values of both.
	size_t uOutputRowPitch = 0;
	size_t uOutputSlicePitch = 0;

	const cl_command_queue CommandQueue = pCommandQueue->GetCommandQueue();

	//	Map a location in host memory onto the buffer object.
	cl_int iErrorCode = CL_SUCCESS;
	void* pMap = clEnqueueMapImage(CommandQueue, m_Image, (bIsBlocking) ? CL_TRUE : CL_FALSE, uFlagParam, uOrigin, uRegion, &uOutputRowPitch, &uOutputSlicePitch, uNumWaitEvents, pWaitEvents, &NewEvent, &iErrorCode);

	CL_CPP_CATCH_ERROR(iErrorCode);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!pMap);

	(*ppMappedOutput) = pMap;

	if(NewEvent)
	{
		if(pNewEvent)
			pNewEvent->SetEvent(NewEvent);

		clReleaseEvent(NewEvent);
	}

	return true;
}
예제 #8
0
int main(int argc, char** argv)
{
	GLFWwindow* glfwwindow;
	{//OpenGL/GLFW Init
		if (!glfwInit()) {
			std::cerr << "Error: GLFW init failed" << std::endl;
			exit(EXIT_FAILURE);
		}
		glfwwindow = glfwCreateWindow(200, 200, "Nvidia interop bug demo", nullptr, nullptr);
		glfwMakeContextCurrent(glfwwindow);
		glewInit();
		std::cout << "OpenGL Info: " << (char*)glGetString(GL_VENDOR) << " " << (char*)glGetString(GL_RENDERER) << std::endl;
	}
	cl_context clcontext;
	cl_command_queue clqueue;
	{//OpenCL init
		cl_platform_id platform = nullptr;
		{//Platform init
			cl_uint numPlatforms;
			clGetPlatformIDs(0, nullptr, &numPlatforms);
			if (numPlatforms == 0)
			{
				std::cerr << "Error: No OpenCL platforms available" << std::endl;
				return EXIT_FAILURE;
			}
			cl_platform_id* all_platforms = new cl_platform_id[numPlatforms];
			clGetPlatformIDs(numPlatforms, all_platforms, nullptr);
			for (size_t i = 0; i < numPlatforms; i++) //Select Nvidia out of the platforms
			{
				char name[300];
				clGetPlatformInfo(all_platforms[i], CL_PLATFORM_NAME, sizeof(name), &name, nullptr);
				std::string namestring(name);
				if (namestring.find("NVIDIA") != std::string::npos || namestring.find("Nvidia") != std::string::npos)
					platform = all_platforms[i];	
			}
			if (platform == nullptr) {
				std::cerr << "No Nvidia OpenCL platform found, will default to platform 0 ";
				
			}

			delete[] all_platforms;
		}
		{ //Create shared context
			cl_context_properties properties[7];
			properties[0] = CL_CONTEXT_PLATFORM; //This is different for other operating systems than Windows
			properties[1] = (cl_context_properties)platform;
			properties[2] = CL_GL_CONTEXT_KHR;
			properties[3] = (cl_context_properties)wglGetCurrentContext();
			properties[4] = CL_WGL_HDC_KHR;
			properties[5] = (cl_context_properties)wglGetCurrentDC();
			properties[6] = 0;

			clcontext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, nullptr);
		}
		cl_device_id cldevice;
		{ //Create cldevice
			cl_device_id* devices;
			cl_command_queue commandQueue = nullptr;
			size_t numDevices = 0;

			// First get the size of the devices buffer
			clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, 0, nullptr, &numDevices);

			if (numDevices == 0)
			{
				std::cerr << "Error: No OpenCL devices available" << std::endl;
				return EXIT_FAILURE;
			}
			devices = new cl_device_id[numDevices];
			clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, numDevices, devices, nullptr);
			cldevice = devices[0];
			delete[] devices;
		}
		{ //Create CL command queue
			clqueue = clCreateCommandQueue(clcontext, cldevice, 0, nullptr);
		}
		char platformname[300];
		char devicename[300];
		clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platformname), &platformname, nullptr);
		clGetDeviceInfo(cldevice, CL_DEVICE_NAME, sizeof(devicename), &devicename, nullptr);
		std::cout << "OpenCL platform " << platformname << " device " << devicename << std::endl;
	}
	size_t size = 200 * 200 * 4; //w=200, h=200, 4 bytes per channel
	char* databuffer = new char[size];
	GLuint glbuffer, gltexture;
	cl_mem unsharedbuffer, sharedbuffer, unsharedtexture, sharedtexture;
	{ //Init test data
		glGenBuffers(1, &glbuffer);
		glBindBuffer(GL_ARRAY_BUFFER, glbuffer);
		glBufferData(GL_ARRAY_BUFFER, size, databuffer, GL_STREAM_DRAW);
		glBindBuffer(GL_ARRAY_BUFFER, GL_NONE);

		glGenTextures(1, &gltexture);
		glBindTexture(GL_TEXTURE_2D, gltexture);
		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 200, 200, 0, GL_RGBA, GL_UNSIGNED_BYTE, databuffer);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); //Intel needs this for shared textures
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); //Intel needs this for shared textures
		glBindTexture(GL_TEXTURE_2D, GL_NONE);

		sharedtexture = clCreateFromGLTexture(clcontext, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, gltexture, nullptr);
		sharedbuffer = clCreateFromGLBuffer(clcontext, CL_MEM_READ_WRITE, glbuffer, nullptr);

		unsharedbuffer = clCreateBuffer(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, databuffer, nullptr);
		cl_image_format imgformat;
		cl_image_desc desc;
		imgformat.image_channel_data_type = CL_UNSIGNED_INT8;
		imgformat.image_channel_order = CL_RGBA;
		desc.image_type = CL_MEM_OBJECT_IMAGE2D;
		desc.image_width = 200;
		desc.image_height = 200;
		desc.image_depth = 1;
		desc.image_array_size = 1;
		desc.image_row_pitch = 0;
		desc.image_slice_pitch = 0;
		desc.num_mip_levels = 0;
		desc.num_samples = 0;
		desc.buffer = nullptr;
		unsharedtexture = clCreateImage(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imgformat, &desc, databuffer, nullptr);
	}
	{
		const size_t origin[3] = { 0, 0, 0 };
		const size_t region[3] = { 200, 200, 1 };
		size_t pitch;
		//
		//MAIN PART BEGINS HERE
		//
		{ //OpenGL buffer
			std::cout << "Mapping buffer with OpenGL: ";
			glBindBuffer(GL_ARRAY_BUFFER, glbuffer);
			void* glmapptr = glMapBuffer(GL_ARRAY_BUFFER, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
			glUnmapBuffer(GL_ARRAY_BUFFER);
			glBindBuffer(GL_ARRAY_BUFFER, GL_NONE);
			std::cout << "OK" << std::endl;
			glFinish();
		}
		{ //OpenCL unshared texture
			std::cout << "Mapping unshared texture with OpenCL: ";
			void* unsimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared objects
			clEnqueueUnmapMemObject(clqueue, unsharedtexture, unsimgptr, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		{ //OpenCL shared texture
			std::cout << "Mapping shared texture with OpenCL: ";
			clEnqueueAcquireGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr);
			void* shdimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine shared objects
			clEnqueueUnmapMemObject(clqueue, unsharedtexture, shdimgptr, 0, nullptr, nullptr);
			clEnqueueReleaseGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		{ //OpenCL unshared buffer
			std::cout << "Mapping unshared buffer with OpenCL: ";
			void* unsbufptr = clEnqueueMapBuffer(clqueue, unsharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared buffers
			clEnqueueUnmapMemObject(clqueue, unsharedbuffer, unsbufptr, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		{ //OpenCL shared buffer
			std::cout << "Mapping shared buffer with OpenCL (EXPECTING CRASH ON NVIDIA SYSTEMS): " << std::endl;
			clEnqueueAcquireGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr);
			//
			//CRITICAL PART BEGINS HERE
			//

			void* shdbufptr = clEnqueueMapBuffer(clqueue, sharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr);
			//On Nvidia systems when using shared objects, error 0xC0000005 occurs in ntdll.dll: write access violation at position 0xSOMETHING
			//This leaves my application in an unusable state
			//But it works fine everywhere else (tested on ARM, AMD, Intel systems)

			//
			//CRITICAL PART ENDS HERE
			//
			std::cout << "did not fail" << std::endl;
			clEnqueueUnmapMemObject(clqueue, sharedbuffer, shdbufptr, 0, nullptr, nullptr);
			clEnqueueReleaseGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		//
		//MAIN PART ENDS HERE
		//
	}
	clFinish(clqueue);

	delete[] databuffer;
	clReleaseMemObject(sharedbuffer);
	clReleaseMemObject(unsharedbuffer);
	clReleaseMemObject(sharedtexture);
	clReleaseMemObject(unsharedtexture);

	clReleaseCommandQueue(clqueue);
	clReleaseContext(clcontext);

	glDeleteTextures(1, &gltexture);
	glDeleteBuffers(1, &glbuffer);

	glfwDestroyWindow(glfwwindow);
	glfwTerminate();
	return EXIT_SUCCESS;
}
예제 #9
0
END_TEST

START_TEST (test_copy_image_buffer)
{
    cl_platform_id platform = 0;
    cl_device_id device;
    cl_context ctx;
    cl_command_queue queue;
    cl_mem image, buffer;
    cl_int result;
    cl_event event;

    unsigned char image_buffer[3*3*4] = {
        255, 0, 0, 0,       0, 255, 0, 0,       0, 0, 255, 0,
        128, 0, 0, 0,       0, 128, 0, 0,       0, 0, 128, 0,
        64, 0, 0, 0,        0, 64, 0, 0,        0, 0, 64, 0
    };

    // Square that will be put in image_buffer at (1, 0)
    unsigned char buffer_buffer[2*2*4+1] = {
        33, // Oh, a padding !
        255, 255, 255, 0,   255, 0, 255, 0,
        0, 255, 255, 0,     255, 255, 0, 0
    };

    // What we must get once re-reading 2x2 rect at (1, 1)
    unsigned char correct_data[2*2*4] = {
        0, 255, 255, 0,     255, 255, 0, 0,
        0, 64, 0, 0,        0, 0, 64, 0
    };

    cl_image_format fmt;

    fmt.image_channel_data_type = CL_UNORM_INT8;
    fmt.image_channel_order = CL_RGBA;

    size_t origin[3] = {1, 0, 0};
    size_t region[3] = {2, 2, 1};

    result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0);
    fail_if(
        result != CL_SUCCESS,
        "unable to get the default device"
    );

    ctx = clCreateContext(0, 1, &device, 0, 0, &result);
    fail_if(
        result != CL_SUCCESS || ctx == 0,
        "unable to create a valid context"
    );

    queue = clCreateCommandQueue(ctx, device, 0, &result);
    fail_if(
        result != CL_SUCCESS || queue == 0,
        "cannot create a command queue"
    );

    image = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &fmt,
                            3, 3, 0, image_buffer, &result);
    fail_if(
        result != CL_SUCCESS,
        "unable to create a 3x3 bgra image"
    );

    buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                            sizeof(buffer_buffer), buffer_buffer, &result);
    fail_if(
        result != CL_SUCCESS,
        "unable to create a buffer object"
    );

    // Write buffer in image
    result = clEnqueueCopyBufferToImage(queue, buffer, image, 1, origin, region,
                                        0, 0, &event);
    fail_if(
        result != CL_SUCCESS,
        "unable to queue a copy buffer to image event, buffer offset 1, image 2x2 @ (1, 0)"
    );

    result = clWaitForEvents(1, &event);
    fail_if(
        result != CL_SUCCESS,
        "cannot wait for event"
    );

    clReleaseEvent(event);

    // Read it back into buffer, again with an offset
    origin[1] = 1;
    result = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 1,
                                        0, 0, &event);
    fail_if(
        result != CL_SUCCESS,
        "unable to queue a copy image to buffer event, buffer offset 1, image 2x2 @ (1, 1)"
    );

    result = clWaitForEvents(1, &event);
    fail_if(
        result != CL_SUCCESS,
        "cannot wait for event"
    );

    fail_if(
        std::memcmp(buffer_buffer + 1, correct_data, sizeof(correct_data)) != 0,
        "copying data around isn't working the expected way"
    );

    // Map the image and check pointers
    unsigned char *mapped;
    size_t row_pitch;

    origin[0] = 0;
    origin[1] = 0;
    origin[2] = 0;

    mapped = (unsigned char *)clEnqueueMapImage(queue, image, 1, CL_MAP_READ,
                                                origin, region, &row_pitch, 0, 0,
                                                0, 0, &result);
    fail_if(
        result != CL_SUCCESS,
        "unable to map an image"
    );
    fail_if(
        mapped != image_buffer,
        "mapped aread doesn't match host ptr"
    );

    clReleaseEvent(event);
    clReleaseMemObject(image);
    clReleaseMemObject(buffer);
    clReleaseCommandQueue(queue);
    clReleaseContext(ctx);
}
void timedImageMappedWrite( cl_command_queue queue,
                            cl_mem image,
                            unsigned char v )
{
    CPerfCounter t1, t2, t3;
    cl_int ret;
    cl_event ev;
    void *ptr;
    cl_map_flags mapFlag = CL_MAP_READ | CL_MAP_WRITE;

    t1.Reset();
    t2.Reset();
    t3.Reset();

    if( !mapRW )
        mapFlag = CL_MAP_WRITE;

    size_t  rowPitch;

    t1.Start();

    ptr = clEnqueueMapImage( queue,
                             image, 
                             CL_FALSE, 
                             mapFlag,
                             imageOrigin, imageRegion,
                             &rowPitch, NULL,
                             0, NULL,
                             &ev,
                             &ret );
    ASSERT_CL_RETURN( ret );

    clFlush( queue );
    spinForEventsComplete( 1, &ev );

    t1.Stop();
    t2.Start();

    memset2DPitch( ptr, v, 
                 imageRegion[0] * nChannels * nBytesPerChannel,
                 imageRegion[1], rowPitch );

    t2.Stop();
    t3.Start();

    ret = clEnqueueUnmapMemObject( queue,
                                   image,
                                   (void *) ptr,
                                   0, NULL, &ev );
    ASSERT_CL_RETURN( ret );

    clFlush( queue );
    spinForEventsComplete( 1, &ev );

    t3.Stop();

    const char *msg;

    if( mapRW )
        msg = "clEnqueueMapImage(READ|WRITE):";
    else
        msg = "clEnqueueMapImage(WRITE):";

    tlog->Timer( "%32s  %lf s [ %8.2lf GB/s ]\n", msg, t1.GetElapsedTime(), nBytesRegion, 1 );
    tlog->Timer( "%32s  %lf s   %8.2lf GB/s\n", "memset():", t2.GetElapsedTime(), nBytesRegion, 1 );    
    tlog->Timer( "%32s  %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t3.GetElapsedTime(), nBytesRegion, 1 );
}
void timedImageMappedRead( cl_command_queue queue,
                           cl_mem image,
                           unsigned char v )
{
    CPerfCounter t1, t2, t3;
    cl_int ret;
    cl_event ev;
    void *ptr;
    cl_map_flags mapFlag = CL_MAP_READ | CL_MAP_WRITE;

    t1.Reset();
    t2.Reset();
    t3.Reset();

     if( !mapRW )
        mapFlag = CL_MAP_READ;

    size_t  rowPitch;

    t1.Start();

    ptr = clEnqueueMapImage( queue,
                             image, 
                             CL_FALSE, 
                             mapFlag,
                             imageOrigin, imageRegion,
                             &rowPitch, NULL,
                             0, NULL,
                             &ev,
                             &ret );
    ASSERT_CL_RETURN( ret );

    clFlush( queue );
    spinForEventsComplete( 1, &ev );

    t1.Stop();
    t2.Start();

    bool verify = readmem2DPitch( ptr, v, rowPitch , (int) imageRegion[1] );

    t2.Stop();
    t3.Start();

    ret = clEnqueueUnmapMemObject( queue,
                                   image,
                                   (void *) ptr,
                                   0, NULL, &ev );
    ASSERT_CL_RETURN( ret );

    clFlush( queue );
    spinForEventsComplete( 1, &ev );

    t3.Stop();

    const char *msg;

    if( mapRW )
        msg = "clEnqueueMapImage(READ|WRITE):";
    else
        msg = "clEnqueueMapImage(READ):";

    tlog->Timer( "%32s  %lf s [ %8.2lf GB/s ]\n", msg, t1.GetElapsedTime(), nBytesRegion, 1 );
    tlog->Timer( "%32s  %lf s   %8.2lf GB/s\n", "CPU read:", t2.GetElapsedTime(), nBytesRegion, 1 ); 

    if( verify )
        tlog->Msg( "%32s\n", "verification ok" );
    else
    {
        tlog->Error( "%32s\n", "verification FAILED" );
        vFailure = true;
    }

    tlog->Timer( "%32s  %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t3.GetElapsedTime(), nBytesRegion, 1 );
}
/**
 * \related cl_Mem_Object_t
 *
 * This function map OpenCL Image into Host-accessible memory & returns pointer
 * to mapped memory region
 * @param[in,out] self  pointer to structure, in which 'Map' function pointer
 * is defined to point on this function.
 * @param[in] blocking_map flag of type 'cl_bool' that denotes, should operation
 * be blocking or not.
 * @param [in] map_flags mapping flags, that denotes how memory object should be
 * mapped
 * @param[in] time_mode enumeration, that denotes how time measurement should be
 * performed
 * @param[out] evt_to_generate pointer to OpenCL event that will be generated
 * at the end of operation.
 *
 * @return pointer to Host-accessible region of memory in case of success, NULL
 * pointer otherwise. In that case function sets error value, which is available
 * through cl_Error_t structure, defined by pointer 'self->error'
 *
 * @see cl_err_codes.h for detailed error description.
 * @see 'cl_Error_t' structure for error handling.
 */
static void* Image_Map(
    scow_Mem_Object     *self, 
    cl_bool             blocking_map,
    cl_map_flags        map_flags, 
    TIME_STUDY_MODE     time_mode,
    cl_event            *evt_to_generate, 
    cl_command_queue    explicit_queue)
{
    cl_int ret;

    cl_event mapping_ready, *p_mapping_ready;

    const size_t origin[3] =
    { 0, 0, 0 }, region[3] =
    { self->width, self->height, 1 };

    OCL_CHECK_EXISTENCE(self, NULL);

    if (blocking_map > CL_TRUE)
    {
        self->error->Set_Last_Code(self->error, INVALID_BLOCKING_FLAG);
        return NULL;
    }

    (evt_to_generate != NULL) ?
            (p_mapping_ready = evt_to_generate) : (p_mapping_ready =
                    &mapping_ready);

    // We can't map the object, that is already mapped
    if (self->mapped_to_region != NULL)
    {
        self->error->Set_Last_Code(self->error, BUFFER_IN_USE);
        return VOID_MEM_OBJ_PTR;
    }

    cl_command_queue q =
            (explicit_queue == NULL) ?
                    (self->parent_thread->q_data_dtoh) : (explicit_queue);

    /* Save mapped pointer inside a structure in case if memory object is being
     * destroyed without unmapping it at first.
     */
    self->mapped_to_region = clEnqueueMapImage(q, self->cl_mem_object,
            blocking_map, map_flags, origin, region, &self->row_pitch, NULL, 0,
            NULL, p_mapping_ready, &ret);

    OCL_DIE_ON_ERROR(ret, CL_SUCCESS,
            self->error->Set_Last_Code(self->error, ret), NULL);

    switch (time_mode)
    {
    case MEASURE:
        self->timer->current_time_device = Gather_Time_uS(p_mapping_ready);
        self->timer->total_time_device += self->timer->current_time_device;
        break;

    case DONT_MEASURE:
        break;

    default:
        break;
    }

    if (p_mapping_ready != evt_to_generate){
        clReleaseEvent(*p_mapping_ready);
    }

    return self->mapped_to_region;
}
int 
ImageOverlap::runCLKernels()
{
    cl_int status;
	
	//wait for fill end
	status=clEnqueueMarkerWithWaitList(commandQueue[2],2,eventlist,&enqueueEvent);
	CHECK_OPENCL_ERROR(status,"clEnqueueMarkerWithWaitList failed.(commandQueue[2])");

    // Set appropriate arguments to the kernelOverLap

    // map buffer image
	status = clSetKernelArg(
		kernelOverLap,
		0,
		sizeof(cl_mem),
		&mapImage);
    CHECK_OPENCL_ERROR(status,"clSetKernelArg failed. (mapImage)");

    // fill Buffer image
    status = clSetKernelArg(
        kernelOverLap,
        1,
        sizeof(cl_mem),
        &fillImage);
    CHECK_OPENCL_ERROR(status,"clSetKernelArg failed. (fillImage)");

	// fill Buffer image
	status = clSetKernelArg(
		kernelOverLap,
		2,
		sizeof(cl_mem),
		&outputImage);
	CHECK_OPENCL_ERROR(status,"clSetKernelArg failed. (outputImage)");

    // Enqueue a kernel run call.
    size_t globalThreads[] = {width, height};
    size_t localThreads[] = {blockSizeX, blockSizeY};

    status = clEnqueueNDRangeKernel(
        commandQueue[2],
        kernelOverLap,
        2,
        NULL,
        globalThreads,
        localThreads,
        1,
        &enqueueEvent,
        NULL);
    CHECK_OPENCL_ERROR(status,"clEnqueueNDRangeKernel failed.");

    // Enqueue Read Image
    size_t origin[] = {0, 0, 0};
    size_t region[] = {width, height, 1};
	size_t  rowPitch; 
	size_t  slicePitch;
    // Read copy
	outputImageData = (cl_uchar4*)clEnqueueMapImage( commandQueue[2],
		outputImage, 
		CL_FALSE, 
		mapFlag,
		origin, region,
		&rowPitch, &slicePitch,
		0, NULL,
		NULL,
		&status );
     CHECK_OPENCL_ERROR(status,"clEnqueueMapImage failed.(commandQueue[2])");

	clFlush(commandQueue[0]);
	clFlush(commandQueue[1]);
    
	status = clEnqueueUnmapMemObject(commandQueue[2],outputImage,(void*)outputImageData,NULL,0,NULL);
	CHECK_OPENCL_ERROR(status,"clEnqueueUnmapMemObject failed.(outputImage)");

	// Wait for the read buffer to finish execution
    status = clFinish(commandQueue[2]);
    CHECK_OPENCL_ERROR(status,"clFinish failed.(commandQueue[2])");


	return SDK_SUCCESS;
}