예제 #1
0
void procOCL_OCV(int texIn, int texOut, int w, int h)
{
    LOGD("Processing OpenCL via OpenCV");
    if(!haveOpenCL)
    {
        LOGE("OpenCL isn't initialized");
        return;
    }

    int64_t t = getTimeMs();
    cl::ImageGL imgIn (theContext, CL_MEM_READ_ONLY,  GL_TEXTURE_2D, 0, texIn);
    std::vector < cl::Memory > images(1, imgIn);
    theQueue.enqueueAcquireGLObjects(&images);
    theQueue.finish();
    cv::UMat uIn, uOut, uTmp;
    cv::ocl::convertFromImage(imgIn(), uIn);
    LOGD("loading texture data to OpenCV UMat costs %d ms", getTimeInterval(t));
    theQueue.enqueueReleaseGLObjects(&images);

    t = getTimeMs();
    //cv::blur(uIn, uOut, cv::Size(5, 5));
    cv::Laplacian(uIn, uTmp, CV_8U);
    cv:multiply(uTmp, 10, uOut);
    cv::ocl::finish();
    LOGD("OpenCV processing costs %d ms", getTimeInterval(t));

    t = getTimeMs();
    cl::ImageGL imgOut(theContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, texOut);
    images.clear();
    images.push_back(imgOut);
    theQueue.enqueueAcquireGLObjects(&images);
    cl_mem clBuffer = (cl_mem)uOut.handle(cv::ACCESS_READ);
    cl_command_queue q = (cl_command_queue)cv::ocl::Queue::getDefault().ptr();
    size_t offset = 0;
    size_t origin[3] = { 0, 0, 0 };
    size_t region[3] = { w, h, 1 };
    CV_Assert(clEnqueueCopyBufferToImage (q, clBuffer, imgOut(), offset, origin, region, 0, NULL, NULL) == CL_SUCCESS);
    theQueue.enqueueReleaseGLObjects(&images);
    cv::ocl::finish();
    LOGD("uploading results to texture costs %d ms", getTimeInterval(t));
}
예제 #2
0
/*!
    Copies the contents of this buffer, starting at \a offset to
    \a rect within \a dest.  Returns true if the copy was successful;
    false otherwise.

    This function will block until the request finishes.
    The request is executed on the active command queue for context().

    \sa copyToAsync()
*/
bool QCLBuffer::copyTo
(size_t offset, const QCLImage2D &dest, const QRect &rect)
{
    const size_t dst_origin[3] = {static_cast<size_t>(rect.x()),
                                  static_cast<size_t>(rect.y()), 0
                                 };
    const size_t region[3] = {static_cast<size_t>(rect.width()),
                              static_cast<size_t>(rect.height()), 1
                             };
    cl_event event;
    cl_int error = clEnqueueCopyBufferToImage
                   (context()->activeQueue(), memoryId(), dest.memoryId(),
                    offset, dst_origin, region, 0, 0, &event);
    context()->reportError("QCLBuffer::copyTo(QCLImage2D):", error);
    if (error == CL_SUCCESS) {
        clWaitForEvents(1, &event);
        clReleaseEvent(event);
        return true;
    } else {
        return false;
    }
}
예제 #3
0
/*!
    \param src Valid device buffer
    \param dst Empty device image
    \param mem_size Size of data to copy
*/
void cl_copyBufferToImage(cl_mem buffer, cl_mem image, int height, int width)
{
    static int eventCnt = 0;

    cl_event* eventPtr = NULL, event;

    if(eventsEnabled) {
        eventPtr = &event;
    }

    size_t origin[3] = {0, 0, 0};
    size_t region[3] = {width, height, 1};

    cl_int status;
    status = clEnqueueCopyBufferToImage(commandQueue, buffer, image, 0,
        origin, region, 0, NULL, eventPtr);
    cl_errChk(status, "Copying buffer to image", true);

    if(eventsEnabled) {
        char* eventStr = catStringWithInt("copyBufferToImage", eventCnt++);
        events->newIOEvent(*eventPtr, eventStr);
    }
}
예제 #4
0
/*!
    Copies the contents of this buffer, starting at \a offset to
    \a rect within \a dest.  Returns an event object that can be used
    to wait for the request to finish.

    The request will not start until all of the events in \a after
    have been signaled as finished.  The request is executed on
    the active command queue for context().

    \sa copyTo()
*/
QCLEvent QCLBuffer::copyToAsync
(size_t offset, const QCLImage2D &dest, const QRect &rect,
 const QCLEventList &after)
{
//    const size_t dst_origin[3] = {rect.x(), rect.y(), 0};
//    const size_t region[3] = {rect.width(), rect.height(), 1};
    const size_t dst_origin[3] = {static_cast<size_t>(rect.x()),
                                  static_cast<size_t>(rect.y()), 0
                                 };
    const size_t region[3] = {static_cast<size_t>(rect.width()),
                              static_cast<size_t>(rect.height()), 1
                             };
    cl_event event;
    cl_int error = clEnqueueCopyBufferToImage
                   (context()->activeQueue(), memoryId(), dest.memoryId(),
                    offset, dst_origin, region,
                    after.size(), after.eventData(), &event);
    context()->reportError("QCLBuffer::copyToAsync(QCLImage2D):", error);
    if (error == CL_SUCCESS)
        return QCLEvent(event);
    else
        return QCLEvent();
}
예제 #5
0
int bindTexture(int offset, cl_mem* texture, cl_mem memory, size_t size, cl_channel_type dataType)
{
    size_t origin[3];
    origin[0] = 0;
    origin[1] = 0;
    origin[2] = 0;
    size_t region[3];
    region[0] = size <= MaxImageWidth ? size : MaxImageWidth;
    region[1] = (size + MaxImageWidth - 1) / MaxImageWidth;
    region[2] = 1;

    if(region[0] == 0)
        region[0] = 1;
    if(region[1] == 0)
        region[1] = 1;

    cl_image_format format;
    format.image_channel_order = CL_R;
    format.image_channel_data_type = dataType;

    int err = CL_SUCCESS;
//#ifdef CL_VERSION_1_2
//    cl_image_desc desc = {0};
//    desc.image_width = region[0];
//    desc.image_height = region[1];
//
//    *texture = clCreateImage(context, CL_MEM_READ_ONLY, &format, &desc, NULL, &err);
//#else
    *texture = clCreateImage2D(context, CL_MEM_READ_ONLY, &format, region[0], region[1], 0, NULL, &err);
//#endif
    CHKERR(err, "Unable to create texture!");

    if(size != 0)
	    err = clEnqueueCopyBufferToImage(commands, memory, *texture, 0, origin, region, 0, NULL, NULL);
    CHKERR(err, "Unable to buffer texture!");
    return CL_SUCCESS;
}
예제 #6
0
//-----------------------------------------------------------------------------
//! Run the CL part of the computation
//-----------------------------------------------------------------------------
void RunKernels()
{
    static float t = 0.0f;

	// ----------------------------------------------------------------
    // populate the 2d texture
    {
		// set global and local work item dimensions
		szLocalWorkSize[0] = 16;
		szLocalWorkSize[1] = 16;
		szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_2d.width);
		szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_2d.height);

		// set the args values
#ifdef USE_STAGING_BUFFER
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(g_texture_2d.clMem), (void *) &(g_texture_2d.clMem));
#else
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(g_texture_2d.clTexture), (void *) &(g_texture_2d.clTexture));
#endif
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 1, sizeof(g_texture_2d.clTexture), (void *) &(g_texture_2d.clTexture));
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 2, sizeof(g_texture_2d.width), &g_texture_2d.width);
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 3, sizeof(g_texture_2d.height), &g_texture_2d.height);
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 4, sizeof(g_texture_2d.pitch), &g_texture_2d.pitch);
		ciErrNum |= clSetKernelArg(ckKernel_tex2d, 5, sizeof(t), &t);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
	    
		// launch computation kernel
		ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_tex2d, 2, NULL,
										  szGlobalWorkSize, szLocalWorkSize, 
										 0, NULL, NULL);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);

#ifdef USE_STAGING_BUFFER
		size_t dst[3] = { 0, 0, 0};
		size_t region[3] = { g_texture_2d.width, g_texture_2d.height, 1};
		ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue,
                   g_texture_2d.clMem		/* src_buffer */,
                   g_texture_2d.clTexture	/* dst_image */, 
                   0						/* src_offset */,
                   dst						/* dst_origin[3] */,
                   region					/* region[3] */, 
                   0						/* num_events_in_wait_list */,
                   NULL						/* event_wait_list */,
                   NULL						/* event */);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
#endif
    }
	// ----------------------------------------------------------------
    // populate the volume texture
    {
		// set global and local work item dimensions
		szLocalWorkSize[0] = 16;
		szLocalWorkSize[1] = 16;
		szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_vol.width);
		szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_vol.height);

		// set the args values
		ciErrNum |= clSetKernelArg(ckKernel_texvolume, 0, sizeof(g_texture_vol.clMem), (void *) &(g_texture_vol.clMem));
		ciErrNum |= clSetKernelArg(ckKernel_texvolume, 1, sizeof(g_texture_vol.width), &g_texture_vol.width);
		ciErrNum |= clSetKernelArg(ckKernel_texvolume, 2, sizeof(g_texture_vol.height), &g_texture_vol.height);
		ciErrNum |= clSetKernelArg(ckKernel_texvolume, 3, sizeof(g_texture_vol.depth), &g_texture_vol.depth);
		ciErrNum |= clSetKernelArg(ckKernel_texvolume, 4, sizeof(g_texture_vol.pitch), &g_texture_vol.pitch);
		ciErrNum |= clSetKernelArg(ckKernel_texvolume, 5, sizeof(g_texture_vol.pitchslice), &g_texture_vol.pitchslice);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
	    
		// launch computation kernel
		ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_texvolume, 2, NULL,
										  szGlobalWorkSize, szLocalWorkSize, 
										 0, NULL, NULL);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);

		// ONLY staging buffer works, for volume texture
		// do the copy here
		size_t dst[3] = { 0, 0, 0};
		size_t region[3] = { g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth};
		ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue,
                   g_texture_vol.clMem		/* src_buffer */,
                   g_texture_vol.clTexture	/* dst_image */, 
                   0						/* src_offset */,
                   dst						/* dst_origin[3] */,
                   region					/* region[3] */, 
                   0						/* num_events_in_wait_list */,
                   NULL						/* event_wait_list */,
                   NULL						/* event */);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
    }

	// ----------------------------------------------------------------
    // populate the faces of the cube map
    for (int face = 0; face < 6; ++face)
    {
		// set global and local work item dimensions
		szLocalWorkSize[0] = 16;
		szLocalWorkSize[1] = 16;
		szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_cube.size);
		szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_cube.size);

		// set the args values
#ifdef USE_STAGING_BUFFER
		ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(g_texture_cube.clMem[face]), (void *) &(g_texture_cube.clMem[face]));
#else
		ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(g_texture_cube.clTexture[face]), (void *) &(g_texture_cube.clTexture[face]));
#endif
		ciErrNum |= clSetKernelArg(ckKernel_texcube, 1, sizeof(g_texture_cube.size), &g_texture_cube.size);
		ciErrNum |= clSetKernelArg(ckKernel_texcube, 2, sizeof(g_texture_cube.pitch), &g_texture_cube.pitch);
		ciErrNum |= clSetKernelArg(ckKernel_texcube, 3, sizeof(int), &face);
		ciErrNum |= clSetKernelArg(ckKernel_texcube, 4, sizeof(t), &t);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
	    
		// launch computation kernel
		ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_texcube, 2, NULL,
										  szGlobalWorkSize, szLocalWorkSize, 
										 0, NULL, NULL);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);

#ifdef USE_STAGING_BUFFER
		size_t dst[3] = { 0, 0, 0};
		size_t region[3] = { g_texture_cube.size, g_texture_cube.size, 1};
		ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue,
                   g_texture_cube.clMem[face]/* src_buffer */,
                   g_texture_cube.clTexture[face]/* dst_image */, 
                   0						/* src_offset */,
                   dst						/* dst_origin[3] */,
                   region					/* region[3] */, 
                   0						/* num_events_in_wait_list */,
                   NULL						/* event_wait_list */,
                   NULL						/* event */);
		oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
#endif
    }
    t += 0.1f;
}
예제 #7
0
int
main(void)
{
  cl_int err;
  cl_platform_id platforms[MAX_PLATFORMS];
  cl_uint nplatforms;
  cl_device_id devices[MAX_DEVICES];
  cl_uint ndevices;
  cl_uint i, j;
  size_t el, row, col;

  CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms));

  for (i = 0; i < nplatforms; i++)
  {
    CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES,
      devices, &ndevices));

    /* Only test the devices we actually have room for */
    if (ndevices > MAX_DEVICES)
      ndevices = MAX_DEVICES;

    for (j = 0; j < ndevices; j++)
    {
      /* skip devices that do not support images */
      cl_bool has_img;
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL));
      if (!has_img)
        continue;

      cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateContext");
      cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err);
      CHECK_OPENCL_ERROR_IN("clCreateCommandQueue");

      cl_ulong alloc;
      size_t max_height;
      size_t max_width;
#define MAXALLOC (1024U*1024U)

      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE,
          sizeof(alloc), &alloc, NULL));
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH,
          sizeof(max_width), &max_width, NULL));
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT,
          sizeof(max_height), &max_height, NULL));


      while (alloc > MAXALLOC)
        alloc /= 2;

      // fit at least one max_width inside the alloc (shrink max_width for this)
      while (max_width*pixel_size > alloc)
        max_width /= 2;

      // round number of elements to next multiple of max_width elements
      const size_t nels = (alloc/pixel_size/max_width)*max_width;
      const size_t buf_size = nels*pixel_size;

      cl_image_desc img_desc;
      memset(&img_desc, 0, sizeof(img_desc));
      img_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
      img_desc.image_width = max_width;
      img_desc.image_height = nels/max_width;
      img_desc.image_depth = 1;

      cl_ushort null_pixel[4] = {0, 0, 0, 0};
      cl_ushort *host_buf = malloc(buf_size);
      TEST_ASSERT(host_buf);

      for (el = 0; el < nels; el+=4) {
        host_buf[el] = el & CHANNEL_MAX;
        host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX;
        host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX;
        host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX;
      }

      cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateBuffer");
      cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateImage");

      CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL));

      const size_t offset = 0;
      const size_t origin[] = {0, 0, 0};
      const size_t region[] = {img_desc.image_width, img_desc.image_height, 1};

      CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL));

      size_t row_pitch, slice_pitch;
      cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region,
        &row_pitch, &slice_pitch, 0, NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clEnqueueMapImage");

      CHECK_CL_ERROR(clFinish(queue));

      for (row = 0; row < img_desc.image_height; ++row) {
        for (col = 0; col < img_desc.image_width; ++col) {
          cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4;
          cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4;

          if (memcmp(img_pixel, buf_pixel, pixel_size) != 0)
            printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n",
              row, col, (size_t)(buf_pixel - host_buf),
              buf_pixel[0],
              buf_pixel[1],
              buf_pixel[2],
              buf_pixel[3],
              img_pixel[0],
              img_pixel[1],
              img_pixel[2],
              img_pixel[3]);

          TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0);

        }
      }

      CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL));

      /* Clear the buffer, and ensure it has been cleared */
      CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL));
      cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer");

      CHECK_CL_ERROR(clFinish(queue));

      for (el = 0; el < nels; ++el) {
#if 0 // debug
        if (buf_map[el] != 0) {
          printf("%zu/%zu => %u\n", el, nels, buf_map[el]);
        }
#endif
        TEST_ASSERT(buf_map[el] == 0);
      }

      CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL));

      /* Copy data from image to buffer, and check that it's again equal to the original buffer */
      CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL));
      buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err);
      CHECK_CL_ERROR(clFinish(queue));

      TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0);

      CHECK_CL_ERROR (
          clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL));
      CHECK_CL_ERROR (clFinish (queue));

      free(host_buf);
      CHECK_CL_ERROR (clReleaseMemObject (img));
      CHECK_CL_ERROR (clReleaseMemObject (buf));
      CHECK_CL_ERROR (clReleaseCommandQueue (queue));
      CHECK_CL_ERROR (clReleaseContext (context));
    }
  }
  return EXIT_SUCCESS;
}
예제 #8
0
       cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.channels();

            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                throw std::exception();
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                throw std::exception();
                break;
            }
#if CL_VERSION_1_2
            cl_image_desc desc;
            desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
            desc.image_width      = mat.cols;
            desc.image_height     = mat.rows;
            desc.image_depth      = 0;
            desc.image_array_size = 1;
            desc.image_row_pitch  = 0;
            desc.image_slice_pitch = 0;
            desc.buffer           = NULL;
            desc.num_mip_levels   = 0;
            desc.num_samples      = 0;
            texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
            texture = clCreateImage2D(
                mat.clCxt->impl->clContext,
                CL_MEM_READ_WRITE,
                &format,
                mat.cols,
                mat.rows,
                0,
                NULL,
                &err);
#endif
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };

            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
            }
            else
            {
                devData = (cl_mem)mat.data;
            }

            clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFinish(mat.clCxt->impl->clCmdQueue);
                clReleaseMemObject(devData);
            }

            openCLSafeCall(err);
            return texture;
        }
예제 #9
0
파일: noise.c 프로젝트: voxels/Daytum
static int
Recompute(void)
{
    void *values[10];
    size_t sizes[10];
    size_t global[2];
    size_t local[2];

    int arg = 0;
    int err = 0;
    float bias[2] = { fabs(Bias[0]), fabs(Bias[1]) };
    float scale[2] = { fabs(Scale), fabs(Scale) };

    unsigned int v = 0, s = 0;
    values[v++] = &ComputeResult;
    values[v++] = bias;
    values[v++] = scale;
    if(ActiveKernel > 0)
    {
        values[v++] = &Lacunarity;
        values[v++] = &Increment;
        values[v++] = &Octaves;
    }
    values[v++] = &Amplitude;

    sizes[s++] = sizeof(cl_mem);
    sizes[s++] = sizeof(float) * 2;
    sizes[s++] = sizeof(float) * 2;
    if(ActiveKernel > 0)
    {
        sizes[s++] = sizeof(float);
        sizes[s++] = sizeof(float);
        sizes[s++] = sizeof(float);
    }
    sizes[s++] = sizeof(float);

    err = CL_SUCCESS;
    for (arg = 0; arg < s; arg++)
    {
        err |= clSetKernelArg(ComputeKernels[ActiveKernel], arg, sizes[arg], values[arg]);
    }

    if (err)
        return -10;

    global[0] = Width;
    global[1] = Height;

    local[0] = ComputeKernelWorkGroupSizes[ActiveKernel];
    local[1] = 1;

#if DEBUG_INFO
    if(FrameCount <= 1)
        printf("Global[%4d %4d] Local[%4d %4d]\n", 
            (int)global[0], (int)global[1],
            (int)local[0], (int)local[1]);
#endif

    err = clEnqueueNDRangeKernel(ComputeCommands, ComputeKernels[ActiveKernel], 2, NULL, global, local, 0, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Failed to enqueue kernel! %d\n", err);
        return EXIT_FAILURE;
    }

#if USE_GL_ATTACHMENTS

    err = clEnqueueAcquireGLObjects(ComputeCommands, 1, &ComputeImage, 0, 0, 0);
    if (err != CL_SUCCESS)
    {
        printf("Failed to acquire GL object! %d\n", err);
        return EXIT_FAILURE;
    }

    size_t origin[] = { 0, 0, 0 };
    size_t region[] = { Width, Height, 1 };

    err = clEnqueueCopyBufferToImage(ComputeCommands, ComputeResult, ComputeImage, 
                                     0, origin, region, 0, NULL, 0);
    
    if(err != CL_SUCCESS)
    {
        printf("Failed to copy buffer to image! %d\n", err);
        return EXIT_FAILURE;
    }
        
    err = clEnqueueReleaseGLObjects(ComputeCommands, 1, &ComputeImage, 0, 0, 0);
    if (err != CL_SUCCESS)
    {
        printf("Failed to release GL object! %d\n", err);
        return EXIT_FAILURE;
    }

#else
    err = clEnqueueReadBuffer( ComputeCommands, ComputeResult, CL_TRUE, 0, 
                               Width * Height * TextureTypeSize * 4, 
                               HostImageBuffer, 0, NULL, NULL );      
    if (err)
        return -5;
#endif
        
    return CL_SUCCESS;
}
예제 #10
0
/*
 * Update Texture - requires valid OpenGL Context
 */
void TextureUpdate::updateTexture(const Measurement::ImageMeasurement& image) {
#ifdef HAVE_OPENCV
    // access OCL Manager and initialize if needed
    Vision::OpenCLManager& oclManager = Vision::OpenCLManager::singleton();

    if (!image) {
        // LOG4CPP_WARN ??
        return;
    }


    // if OpenCL is enabled and image is on GPU, then use OCL codepath
    bool image_isOnGPU = oclManager.isInitialized() & image->isOnGPU();

    if ( m_bTextureInitialized )
    {
        // check if received image fits into the allocated texture

        // find out texture format
        int umatConvertCode = -1;
        GLenum glFormat = GL_LUMINANCE;
        GLenum glDatatype = GL_UNSIGNED_BYTE;
        int numOfChannels = 1;
        Image::ImageFormatProperties fmtSrc, fmtDst;
        image->getFormatProperties(fmtSrc);
        image->getFormatProperties(fmtDst);

        getImageFormat(fmtSrc, fmtDst, image_isOnGPU, umatConvertCode, glFormat, glDatatype);

        if (image_isOnGPU) {
#ifdef HAVE_OPENCL

            glBindTexture( GL_TEXTURE_2D, m_texture );

            // @todo this probably causes unwanted delay - .. except when executed on gpu ...
            if (umatConvertCode != -1) {
                cv::cvtColor(image->uMat(), m_convertedImage, umatConvertCode );
            } else {
                m_convertedImage = image->uMat();
            }

            cv::ocl::finish();
            glFinish();

            cl_command_queue commandQueue = oclManager.getCommandQueue();
            cl_int err;

            clFinish(commandQueue);

            err = clEnqueueAcquireGLObjects(commandQueue, 1, &(m_clImage), 0, NULL, NULL);
            if(err != CL_SUCCESS)
            {
                LOG4CPP_ERROR( logger, "error at  clEnqueueAcquireGLObjects:" << getOpenCLErrorString(err) );
            }

            cl_mem clBuffer = (cl_mem) m_convertedImage.handle(cv::ACCESS_READ);
            cl_command_queue cv_ocl_queue = (cl_command_queue)cv::ocl::Queue::getDefault().ptr();

            size_t offset = 0;
            size_t dst_origin[3] = {0, 0, 0};
            size_t region[3] = {static_cast<size_t>(m_convertedImage.cols), static_cast<size_t>(m_convertedImage.rows), 1};

            err = clEnqueueCopyBufferToImage(cv_ocl_queue, clBuffer, m_clImage, offset, dst_origin, region, 0, NULL, NULL);
            if (err != CL_SUCCESS)
            {
                LOG4CPP_ERROR( logger, "error at  clEnqueueCopyBufferToImage:" << getOpenCLErrorString(err) );
            }

            err = clEnqueueReleaseGLObjects(commandQueue, 1, &m_clImage, 0, NULL, NULL);
            if(err != CL_SUCCESS)
            {
                LOG4CPP_ERROR( logger, "error at  clEnqueueReleaseGLObjects:" << getOpenCLErrorString(err) );
            }
            cv::ocl::finish();


#else // HAVE_OPENCL
            LOG4CPP_ERROR( logger, "Image isOnGPU but OpenCL is disabled!!");
#endif // HAVE_OPENCL
        } else {
            // load image from CPU buffer into texture
            glBindTexture( GL_TEXTURE_2D, m_texture );
            glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, image->width(), image->height(),
                    glFormat, glDatatype, image->Mat().data );

        }

    }
#endif // HAVE_OPENCV
}
예제 #11
0
파일: mcwutil.cpp 프로젝트: DevShah/18551
        cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.oclchannels();

            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                CV_Error(-1, "Image forma is not supported");
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                CV_Error(-1, "Image format is not supported");
                break;
            }
#ifdef CL_VERSION_1_2
            //this enables backwards portability to
            //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
            if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2))
            {
                cl_image_desc desc;
                desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
                desc.image_width      = mat.cols;
                desc.image_height     = mat.rows;
                desc.image_depth      = 0;
                desc.image_array_size = 1;
                desc.image_row_pitch  = 0;
                desc.image_slice_pitch = 0;
                desc.buffer           = NULL;
                desc.num_mip_levels   = 0;
                desc.num_samples      = 0;
                texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
            }
            else
#endif
            {
                texture = clCreateImage2D(
                    *(cl_context*)mat.clCxt->getOpenCLContextPtr(),
                    CL_MEM_READ_WRITE,
                    &format,
                    mat.cols,
                    mat.rows,
                    0,
                    NULL,
                    &err);
            }
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };

            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
                clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
            }
            else
            {
                devData = (cl_mem)mat.data;
            }

            clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
                clReleaseMemObject(devData);
            }

            openCLSafeCall(err);
            return texture;
        }
예제 #12
0
END_TEST

START_TEST (test_copy_image_buffer)
{
    cl_platform_id platform = 0;
    cl_device_id device;
    cl_context ctx;
    cl_command_queue queue;
    cl_mem image, buffer;
    cl_int result;
    cl_event event;

    unsigned char image_buffer[3*3*4] = {
        255, 0, 0, 0,       0, 255, 0, 0,       0, 0, 255, 0,
        128, 0, 0, 0,       0, 128, 0, 0,       0, 0, 128, 0,
        64, 0, 0, 0,        0, 64, 0, 0,        0, 0, 64, 0
    };

    // Square that will be put in image_buffer at (1, 0)
    unsigned char buffer_buffer[2*2*4+1] = {
        33, // Oh, a padding !
        255, 255, 255, 0,   255, 0, 255, 0,
        0, 255, 255, 0,     255, 255, 0, 0
    };

    // What we must get once re-reading 2x2 rect at (1, 1)
    unsigned char correct_data[2*2*4] = {
        0, 255, 255, 0,     255, 255, 0, 0,
        0, 64, 0, 0,        0, 0, 64, 0
    };

    cl_image_format fmt;

    fmt.image_channel_data_type = CL_UNORM_INT8;
    fmt.image_channel_order = CL_RGBA;

    size_t origin[3] = {1, 0, 0};
    size_t region[3] = {2, 2, 1};

    result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0);
    fail_if(
        result != CL_SUCCESS,
        "unable to get the default device"
    );

    ctx = clCreateContext(0, 1, &device, 0, 0, &result);
    fail_if(
        result != CL_SUCCESS || ctx == 0,
        "unable to create a valid context"
    );

    queue = clCreateCommandQueue(ctx, device, 0, &result);
    fail_if(
        result != CL_SUCCESS || queue == 0,
        "cannot create a command queue"
    );

    image = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &fmt,
                            3, 3, 0, image_buffer, &result);
    fail_if(
        result != CL_SUCCESS,
        "unable to create a 3x3 bgra image"
    );

    buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                            sizeof(buffer_buffer), buffer_buffer, &result);
    fail_if(
        result != CL_SUCCESS,
        "unable to create a buffer object"
    );

    // Write buffer in image
    result = clEnqueueCopyBufferToImage(queue, buffer, image, 1, origin, region,
                                        0, 0, &event);
    fail_if(
        result != CL_SUCCESS,
        "unable to queue a copy buffer to image event, buffer offset 1, image 2x2 @ (1, 0)"
    );

    result = clWaitForEvents(1, &event);
    fail_if(
        result != CL_SUCCESS,
        "cannot wait for event"
    );

    clReleaseEvent(event);

    // Read it back into buffer, again with an offset
    origin[1] = 1;
    result = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 1,
                                        0, 0, &event);
    fail_if(
        result != CL_SUCCESS,
        "unable to queue a copy image to buffer event, buffer offset 1, image 2x2 @ (1, 1)"
    );

    result = clWaitForEvents(1, &event);
    fail_if(
        result != CL_SUCCESS,
        "cannot wait for event"
    );

    fail_if(
        std::memcmp(buffer_buffer + 1, correct_data, sizeof(correct_data)) != 0,
        "copying data around isn't working the expected way"
    );

    // Map the image and check pointers
    unsigned char *mapped;
    size_t row_pitch;

    origin[0] = 0;
    origin[1] = 0;
    origin[2] = 0;

    mapped = (unsigned char *)clEnqueueMapImage(queue, image, 1, CL_MAP_READ,
                                                origin, region, &row_pitch, 0, 0,
                                                0, 0, &result);
    fail_if(
        result != CL_SUCCESS,
        "unable to map an image"
    );
    fail_if(
        mapped != image_buffer,
        "mapped aread doesn't match host ptr"
    );

    clReleaseEvent(event);
    clReleaseMemObject(image);
    clReleaseMemObject(buffer);
    clReleaseCommandQueue(queue);
    clReleaseContext(ctx);
}
예제 #13
0
void OpenCLExecuter::ocl_filter_shared(void)
{
	cl_int err;							// debugging variables
	size_t szParmDataBytes;				// Byte size of context information        
	cl_mem src_buffer;					// OpenCL device source buffer
	cl_mem dst_buffer;					// OpenCL device source buffer
	cl_sampler sampler;					// OpenCL sampler
	cl_kernel ckKernel;					// OpenCL kernel

	int iNumElements = volobj->texwidth*volobj->texheight*volobj->texdepth; // Length of float arrays to process

	// set Local work size dimensions
//	size_t local_threads[3] ={256,256,64};
	// set Global work size dimensions
//	size_t global_threads[3] ={roundup((int) volobj->texwidth/local_threads[0], 0)*local_threads[0], roundup((int) volobj->texheight/local_threads[1], 0)*local_threads[1], roundup((int) volobj->texdepth/local_threads[2], 0)*local_threads[2]};

	// set Global work size dimensions
	size_t global_threads[3] ={volobj->texwidth, volobj->texheight, volobj->texdepth};

	// allocate the source buffer memory object
	src_buffer = clCreateFromGLTexture3D (ocl_wrapper->context, CL_MEM_READ_WRITE, GL_TEXTURE_3D, 0, volobj->TEXTURE3D_RED, &err);
	printf("OPENCL: clCreateFromGLTexture3D: %s\n", ocl_wrapper->get_error(err));

	// allocate the destination buffer memory object
	dst_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE,  sizeof(unsigned char) * iNumElements, NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));

	// create a sampler object
	sampler = clCreateSampler(ocl_wrapper->context, CL_FALSE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &err);
	printf("OPENCL: clCreateSampler: %s\n", ocl_wrapper->get_error(err));
 
    // Create the kernel
	ckKernel = clCreateKernel (cpProgram, "myFunc", &err);
	printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
  
	// Set the Argument values
	err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 1, sizeof(cl_mem), (void*)&dst_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 2, sizeof(sampler), (void*)&sampler);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));

	size_t local;
	err = clGetKernelWorkGroupInfo(ckKernel, ocl_wrapper->devices[ocl_wrapper->deviceUsed], CL_KERNEL_LOCAL_MEM_SIZE , sizeof(local), &local, NULL);
	printf("OPENCL: clGetKernelWorkGroupInfo (kernel memory): %s\n", ocl_wrapper->get_error(err));
	printf("OPENCL: Kernel local memory use: %d Bytes\n", (int)local);

	// grab input data from OpenGL, compute, copy the results back to OpenGL
	// Runs asynchronous to host, up until blocking clFinish at the end

	glFinish();
	glFlush();
	
	// grab the OpenGL texture object for read/writing from OpenCL
	err = clEnqueueAcquireGLObjects(ocl_wrapper->commandQue, 1, &src_buffer, 0,NULL,NULL);
	printf("OPENCL: clEnqueueAcquireGLObjects: %s\n", ocl_wrapper->get_error(err));

	// Execute a kernel
	err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 3, NULL, global_threads, NULL, 0, NULL, NULL);
	printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));

	/*
	// Blocking read of results from GPU to Host
	int size = volobj->texwidth*volobj->texheight*volobj->texdepth;
	unsigned char* result = new unsigned char[size];
	err = clEnqueueReadBuffer (ocl_wrapper->commandQue, dst_buffer, CL_TRUE, 0, sizeof(unsigned char) * iNumElements, result, 0, NULL, NULL);
	printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
	for(int i=0; i<size; i++) volobj->texture3d[3*i+0] = result[i];
	delete[] result;
	*/

	// copy OpenCL buffer to OpenGl texture
	size_t corigin[3] = {0,0,0};
	size_t cdimensions[3] = {(unsigned int)volobj->texwidth, (unsigned int)volobj->texheight, (unsigned int)volobj->texdepth};
	err = clEnqueueCopyBufferToImage(ocl_wrapper->commandQue , dst_buffer, src_buffer, 0, corigin, cdimensions, 0, NULL, NULL);
	printf("OPENCL: clEnqueueCopyBufferToImage: %s\n", ocl_wrapper->get_error(err));

	//make sure we block until we are done.
	//err = clFinish(ocl_wrapper->commandQue);
	//printf("OPENCL: clFinish: %s\n", ocl_wrapper->get_error(err));
	
	//release opengl objects now
	err = clEnqueueReleaseGLObjects(ocl_wrapper->commandQue, 1, &src_buffer, 0,0,0);
	printf("OPENCL: clEnqueueAcquireGLObjects: %s\n", ocl_wrapper->get_error(err));

	// Cleanup allocated objects
	printf("OPENCL: Releasing kernel memory\n");
    if(ckKernel)clReleaseKernel(ckKernel); 
   
    //need to release any other OpenCL memory objects here
    if(src_buffer)clReleaseMemObject(src_buffer);
    if(dst_buffer)clReleaseMemObject(dst_buffer);
}