Beispiel #1
0
PIGLIT_CL_API_TEST_CONFIG_END


#if defined(CL_VERSION_1_2)
static bool
test(cl_command_queue queue, cl_mem image,
     const void *fill_color, size_t *origin, size_t *region,
     cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list,
     cl_event *event,
     cl_int expected_error, enum piglit_result* result,
     const char* test_str) {
	cl_int errNo;

	errNo = clEnqueueFillImage(queue, image,
	                           fill_color, origin, region,
	                           num_events_in_wait_list, event_wait_list,
	                           event);

	if(!piglit_cl_check_error(errNo, expected_error)) {
		fprintf(stderr, "Failed (error code: %s): %s.\n",
		        piglit_cl_get_error_name(errNo), test_str);
		piglit_merge_result(result, PIGLIT_FAIL);
		return false;
	}

	return true;
}
Beispiel #2
0
    /// Enqueues a command to fill \p image with \p fill_color.
    ///
    /// \see_opencl_ref{clEnqueueFillImage}
    ///
    /// \opencl_version_warning{1,2}
    event enqueue_fill_image(const image3d &image,
                             const void *fill_color,
                             const size_t origin[3],
                             const size_t region[3],
                             const wait_list &events = wait_list())
    {
        BOOST_ASSERT(m_queue != 0);
        BOOST_ASSERT(image.get_context() == this->get_context());

        event event_;

        cl_int ret = clEnqueueFillImage(
            m_queue,
            image.get(),
            fill_color,
            origin,
            region,
            events.size(),
            events.get_event_ptr(),
            &event_.get()
        );

        if(ret != CL_SUCCESS){
            BOOST_THROW_EXCEPTION(opencl_error(ret));
        }

        return event_;
    }
JNIEXPORT jint JNICALL Java_org_lwjgl_opencl_CL12_nclEnqueueFillImage(JNIEnv *env, jclass clazz, jlong command_queue, jlong image, jlong fill_color, jlong origin, jlong region, jint num_events_in_wait_list, jlong event_wait_list, jlong event, jlong function_pointer) {
	const cl_void *fill_color_address = (const cl_void *)(intptr_t)fill_color;
	const size_t *origin_address = (const size_t *)(intptr_t)origin;
	const size_t *region_address = (const size_t *)(intptr_t)region;
	const cl_event *event_wait_list_address = (const cl_event *)(intptr_t)event_wait_list;
	cl_event *event_address = (cl_event *)(intptr_t)event;
	clEnqueueFillImagePROC clEnqueueFillImage = (clEnqueueFillImagePROC)((intptr_t)function_pointer);
	cl_int __result = clEnqueueFillImage((cl_command_queue)(intptr_t)command_queue, (cl_mem)(intptr_t)image, fill_color_address, origin_address, region_address, num_events_in_wait_list, event_wait_list_address, event_address);
	return __result;
}
Beispiel #4
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_image_query_funcs";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  char *filename = NULL;
  char *source = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int err;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image2_desc, image3_desc;

  printf("Running test %s...\n", name);

  memset(&image2_desc, 0, sizeof(cl_image_desc));
  image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image2_desc.image_width = 2;
  image2_desc.image_height = 4;

  memset(&image3_desc, 0, sizeof(cl_image_desc));
  image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
  image3_desc.image_width = 2;
  image3_desc.image_height = 4;
  image3_desc.image_depth = 8;

  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));

  TEST_ASSERT (imageData != NULL && "out of host memory\n");
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  TEST_ASSERT (filename != NULL && "out of host memory\n");

  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);

  /* read source code */
  source = poclu_read_file (filename);
  TEST_ASSERT (source != NULL && "Kernel .cl not found.");

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  TEST_ASSERT (context != NULL && "clCreateContextFromType call failed\n");

  cl_sampler external_sampler = clCreateSampler (
      context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateSampler");

  CHECK_CL_ERROR (clGetContextInfo (context, CL_CONTEXT_DEVICES,
                                    sizeof (cl_device_id), devices, NULL));

  queue = clCreateCommandQueue (context, devices[0], 0, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateCommandQueue");

  /* Create image */
  cl_mem image2
      = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                       &image_format, &image2_desc, imageData, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image2");

  cl_mem image3
      = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                       &image_format, &image3_desc, imageData, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image3");

  unsigned color[4] = { 2, 9, 11, 7 };
  size_t orig[3] = { 0, 0, 0 };
  size_t reg[3] = { 2, 4, 1 };
  err = clEnqueueFillImage (queue, image2, color, orig, reg, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image3");

  /* create and build program */
  program = clCreateProgramWithSource (context, 1, (const char **)&source,
                                       NULL, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateProgramWithSource");

  err = clBuildProgram (program, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clBuildProgram");

  /* execute the kernel with give name */
  kernel = clCreateKernel (program, name, NULL);
  CHECK_OPENCL_ERROR_IN ("clCreateKernel");

  err = clSetKernelArg (kernel, 0, sizeof (cl_mem), &image2);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 0");

  err = clSetKernelArg (kernel, 1, sizeof (cl_mem), &image3);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 1");

  err = clSetKernelArg (kernel, 2, sizeof (cl_sampler), &external_sampler);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 2");

  err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, global_work_size,
                                local_work_size, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clEnqueueNDRangeKernel");

  err = clFinish (queue);
  CHECK_OPENCL_ERROR_IN ("clFinish");

  clReleaseMemObject (image2);
  clReleaseMemObject (image3);
  clReleaseKernel (kernel);
  clReleaseProgram (program);
  clReleaseCommandQueue (queue);
  clReleaseSampler (external_sampler);
  clUnloadCompiler ();
  clReleaseContext (context);
  free (source);
  free (filename);
  free (imageData);

  printf("OK\n");
  return 0;
}
Beispiel #5
0
void RayCastRenderer::calcNormals(void)
{
  //F();

  // synchronize with OpenGL
  utils::ocl::GLSyncHandler sync(m_queue, 1, &m_data_cl_img.get());
  if (!sync) return;

  size_t global_work_size[3] = { m_data_width, m_data_height, m_data_depth };
  size_t cell_cnt = m_data_width * m_data_height * m_data_depth;
  cl_int err = CL_SUCCESS;

  //*** calculate particle counts
  utils::ocl::KernelArgs(m_calculate_particle_cnts_kernel,
                         "ray_cast_renderer_calculate_particle_cnts")
      .arg(m_cell_starts_buf)
      .arg(m_cell_ends_buf)
      .arg(m_cell_cnts_buf);
  err = clEnqueueNDRangeKernel(m_queue, m_calculate_particle_cnts_kernel, 1,
                               nullptr, &cell_cnt, nullptr,
                               0, nullptr, nullptr);
  if (err != CL_SUCCESS)
  {
    WARNM("RayCastRenderer: Failed to enqueue ray_cast_renderer_calculate_particle_cnts kernel"
          << boost::compute::opencl_error::to_string(err)
          << "(" << err << ")");
  }

  //*** calculate gradients
  utils::ocl::KernelArgs(m_calculate_gradients_kernel,
                         "ray_cast_renderer_calculate_gradients")
      .arg(m_cell_cnts_buf)
      .arg(m_grads_x_buf)
      .arg(m_grads_y_buf)
      .arg(m_grads_z_buf)
      .arg<cl_uint>(m_data_width)
      .arg<cl_uint>(m_data_height)
      .arg<cl_uint>(m_data_depth);
  err = clEnqueueNDRangeKernel(m_queue, m_calculate_gradients_kernel, 3,
                               nullptr, global_work_size, nullptr,
                               0, nullptr, nullptr);
  if (err != CL_SUCCESS)
  {
    WARNM("RayCastRenderer: Failed to enqueue ray_cast_renderer_calculate_gradients kernel"
          << boost::compute::opencl_error::to_string(err)
          << "(" << err << ")");
  }

  //*** calculate normals
  utils::ocl::KernelArgs(m_calculate_normals_kernel,
                         "ray_cast_renderer_calculate_normals")
      .arg(m_cell_cnts_buf)
      .arg(m_grads_x_buf)
      .arg(m_grads_y_buf)
      .arg(m_grads_z_buf)
      .arg(m_data_cl_img)
      .arg<cl_uint>(m_data_width)
      .arg<cl_uint>(m_data_height)
      .arg<cl_uint>(m_data_depth);
  err = clEnqueueNDRangeKernel(m_queue, m_calculate_normals_kernel, 3,
                               nullptr, global_work_size, nullptr,
                               0, nullptr, nullptr);
  if (err != CL_SUCCESS)
  {
    WARNM("RayCastRenderer: Failed to enqueue ray_cast_renderer_calculate_normals kernel"
          << boost::compute::opencl_error::to_string(err)
          << "(" << err << ")");
  }

#if 0
  cl_float4 fill_color = { 1.0f, 1.0f, 1.0f, 1.0f };
  size_t origin[3] = { 0, 0, 0 };
  size_t region[3] = { m_data_width, m_data_height, m_data_depth };
  err = clEnqueueFillImage(m_queue, m_data_cl_img.get(),
                           &fill_color, origin, region,
                           0, nullptr, nullptr);
  if (err != CL_SUCCESS)
  {
    WARNM("RayCastRenderer: Failed to fill image"
          << boost::compute::opencl_error::to_string(err)
          << "(" << err << ")");
  }
#endif
}
Beispiel #6
0
/*!
 * @function clut_blurImage_local_unlimited
 * Blurs the image at [filename] with a filter of size [filter_size], and saves the result
 * to the file "output_unlimited.png". This function should be optimized to run on
 * local memory.
 * @param filename
 * The name of the file.
 * @param filter_size
 * The size of the blur filter.
 * @return
 * 0 on success, non-0 on failure.
 */
int clut_blurImage_local_unlimited(const cl_device_id device, const char * const filename, const unsigned int filter_size)
{
	const char * const fname = "clut_blurImage_local";
	int return_value = 1;
	cl_int ret;

	if (NULL == filename) {
		Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname);
		goto error1;
	}

	/* compute work group size */
	size_t local_width, local_height;
	if (0 != clut_getMaxWGSize(device, &local_width, &local_height)) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to get work group sizes.\n", fname);
		goto error1;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Max work group size is [%zu]x[%zu].\n", fname, local_width, local_height);

	/* openCL wants to know the size of __local statically allocated arrays at compile time,
	 * so the local size must be set with a #define */
	char *flags = calloc(128, sizeof(char));
	if (NULL == flags) {
		Debug_out(DEBUG_HOMEWORK, "%s: A calloc failed.\n", fname);
		goto error1;
	}
	sprintf(flags, "-D LOCAL_WIDTH=%zu -D LOCAL_HEIGHT=%zu -D FILTER_SIZE=%d", local_width, local_height, filter_size);
	Debug_out(DEBUG_HOMEWORK, "%s: Local flags are: '%s'.\n", fname, flags);

	/* Create context */
	cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage_local_unlimited", &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create context", error2);
	Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname);

	/* Create program */
	cl_program program = clut_createProgramFromFile(context, "homework_unlimited.cl", flags);
	if (NULL == program) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname);
		goto error3;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname);

	/* Create kernel */
	cl_kernel kernel = clCreateKernel(program, "blurImage_local_unlimited", &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create kernel", error4);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname);

	/* Create command_queue */
	cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create command queue", error5);
	Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname);

	/* open source image */
	int width, height;
	cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height);
	if (NULL == source_image) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname);
		goto error6;
	}

	if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) {
		Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname);
		goto error7;
	}

	/* crate destination image */
	cl_image_format image_format = {0, 0};

	cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
	image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
//	image_desc.image_width		= 0;
//	image_desc.image_height		= 0;
//	image_desc.image_depth		= 0; /* only for 3D images */
//	image_desc.image_array_size	= 0; /* only for image arrays */
//	image_desc.image_row_pitch	= 0;
//	image_desc.image_slice_pitch	= 0; /* only for 3D images */
//	image_desc.num_mip_levels	= 0; /* mandatory */
//	image_desc.num_samples		= 0; /* mandatory */
//	image_desc.buffer		= NULL; /* only for 1D image buffers */

	ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error7);

	int components = clut_getImageFormatComponents(image_format);
	if (0 > components) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unknown components for source image.\n", fname);
		goto error7;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Source image has %d components.\n", fname, components);

	image_desc.image_width = width - filter_size + 1;
	image_desc.image_height = height - filter_size + 1;
	image_desc.image_row_pitch = image_desc.image_width * components;

	cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create second image", error7);

	/* fill result image with black */
	const unsigned int fill_color[4] = { 0, 0, 0, 255 };
	const size_t fill_origin[3] = { 0, 0, 0 };
	const size_t fill_region[3] = { width - filter_size + 1, height - filter_size + 1, 1 };
	ret = clEnqueueFillImage(command_queue, result_image, fill_color, fill_origin, fill_region, 0, NULL, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to fill result image", error8);

	Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname);

	/* create filter matrix */
	unsigned char *filter_matrix = createFilterMatrix(filter_size);
	if (NULL == filter_matrix) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname);
		goto error8;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname);
//	printFilterMatrix(filter_matrix, filter_size);

	/* copy filter matrix to device */
	cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error9);

	/* set kernel arguments */
	ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image);
	CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname);
	ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image);
	CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname);
	ret = clSetKernelArg(kernel, 2, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer);
	CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname);

	Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname);

	const size_t work_size[2] = {
		COMPUTE_GLOBAL_SIZE(height - filter_size + 1, local_height),
		COMPUTE_GLOBAL_SIZE(width - filter_size + 1, local_width) };
	const size_t wg_size[2] = { local_height, local_width };
	Debug_out(DEBUG_HOMEWORK, "%s: work size is [%zu]x[%zu].\n", fname, work_size[0], work_size[1]);

	/* run kernel */
	cl_event kernel_event;
	ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, wg_size, 0, NULL, &kernel_event);
	CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error10);

	ret = clFinish(command_queue);
	CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname);
	ret = clWaitForEvents(1, &kernel_event);
	CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error10);

	/* check that kernel executed correctly */
	cl_int kernel_ret;
	ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret);
	if (CL_COMPLETE != kernel_ret) {
		Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret));
		goto error10;
	}

	cl_ulong end_time;
	ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error10);
	if (0 == end_time) {
		Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname);
		goto error10;
	}

	cl_double time_double = clut_getEventDuration(kernel_event);
	cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event);
	Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong);

	/* save image back to file */
	clut_saveImageToFile("output_unlimited.png", command_queue, result_image);

	/* output filter size, local width, local height, and duration in nanoseconds for profiling */
	printf("%d,%zu,%zu,%lld\n", filter_size, local_width, local_height, clut_getEventDuration_ns(kernel_event));

	return_value = 0;

error10:
	clReleaseMemObject(filter_matrix_buffer);
error9:
	free(filter_matrix);
error8:
	clReleaseMemObject(result_image);
error7:
	clReleaseMemObject(source_image);
error6:
	clReleaseCommandQueue(command_queue);
error5:
	clReleaseKernel(kernel);
error4:
	clReleaseProgram(program);
error3:
	clReleaseContext(context);
error2:
	free(flags);
error1:
	return return_value;
}
Beispiel #7
0
int main(int argc, char *argv[])
{
    cl_int ret;
    
    /* get platform ID */
    cl_platform_id platform_id;
    ret = clGetPlatformIDs(1, &platform_id, NULL);
    assert(CL_SUCCESS == ret);

    /* get device IDs */
    cl_device_id device_id;
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1, &device_id, NULL);
	assert(CL_SUCCESS == ret);
    
    /* create context */
    cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
    assert(CL_SUCCESS == ret);

    /* create command queue */
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    assert(CL_SUCCESS == ret);

    /* create image object */
    cl_image_format format;
    format.image_channel_order = CL_R;
    format.image_channel_data_type = CL_UNSIGNED_INT8;
    
    cl_image_desc desc;
	memset(&desc, 0, sizeof(desc));
    desc.image_type = CL_MEM_OBJECT_IMAGE2D;
    desc.image_width  = IMAGE_W;
    desc.image_height = IMAGE_H;
    
    cl_mem image = clCreateImage(context, 0, &format, &desc, NULL, &ret);
    assert(CL_SUCCESS == ret);

	/* filling background image */
    {
        const size_t origin[] = {0, 0, 0};
        const size_t region[] = {IMAGE_W, IMAGE_H, 1};
 		cl_uchar4 fill_color;
		fill_color.s[0] = 0;
		fill_color.s[1] = 0;
		fill_color.s[2] = 0;
		fill_color.s[3] = 0;
        ret = clEnqueueFillImage(command_queue, image, &fill_color, origin, region, 0, NULL, NULL);
        assert(CL_SUCCESS == ret);
    }

    /* filling front image */
    {
        const size_t origin[] = {(IMAGE_W*1)/4, (IMAGE_H*1)/4, 0};
        const size_t region[] = {(IMAGE_W*2)/4, (IMAGE_H*2)/4, 1};
        cl_uchar4 fill_color;
		fill_color.s[0] = 255;
		fill_color.s[1] = 0;
		fill_color.s[2] = 0;
		fill_color.s[3] = 0;
        ret = clEnqueueFillImage(command_queue, image, &fill_color, origin, region, 0, NULL, NULL);
        assert(CL_SUCCESS == ret);
    }

    /* reading image */
    cl_uchar *data = NULL;
    {
        size_t num_channels = 1;
        data = static_cast<cl_uchar*>(ALIGNED_MALLOC(IMAGE_W*IMAGE_H*sizeof(cl_uchar), num_channels*sizeof(cl_uchar)));
		assert(NULL != data);
		std::fill(&data[0], &data[IMAGE_W*IMAGE_H], 128);
        
        const size_t origin[] = {0, 0, 0};
        const size_t region[] = {IMAGE_W, IMAGE_H, 1};
        ret = clEnqueueReadImage(command_queue, image, CL_TRUE, origin, region, IMAGE_W*sizeof(cl_uchar), 0, data, 0, NULL, NULL);
        assert(CL_SUCCESS == ret);
    }

    /* print image */
    for (unsigned int h=0; h<IMAGE_H; ++h)
    {
        for (unsigned int w=0; w<IMAGE_W; ++w)
        {
            std::cout << std::setw(5) << std::right << static_cast<int>(data[h*IMAGE_W+w]);
        }
        std::cout << std::endl;
    }

    /* finalizing */
    ALIGNED_FREE(data);

    clReleaseMemObject(image);
    
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);

    return 0;
}
int 
ImageOverlap::setupCL()
{
    cl_int status = CL_SUCCESS;
    cl_device_type dType;

    if(deviceType.compare("cpu") == 0)
    {
        dType = CL_DEVICE_TYPE_CPU;
    }
    else //deviceType = "gpu" 
	{
		dType = CL_DEVICE_TYPE_GPU;
		if(isThereGPU() == false)
		{
			std::cout << "GPU not found. Falling back to CPU device" << std::endl;
			dType = CL_DEVICE_TYPE_CPU;
		}
	}

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */
    cl_platform_id platform = NULL;
    int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled());
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed");

    // Display available devices.
    retValue = sampleCommon->displayDevices(platform, dType);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed");

    // If we could find our platform, use it. Otherwise use just available platform.
    cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        0
    };

    context = clCreateContextFromType(
        cps,
        dType,
        NULL,
        NULL,
        &status);
    CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed.");

    // getting device on which to run the sample
    status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled());
    CHECK_ERROR(status, SDK_SUCCESS, "sampleCommon::getDevices() failed");

    status = deviceInfo.setDeviceInfo(devices[deviceId]);
    CHECK_OPENCL_ERROR(status, "deviceInfo.setDeviceInfo failed");

    if(!deviceInfo.imageSupport)
    {
        OPENCL_EXPECTED_ERROR(" Expected Error: Device does not support Images");
    }
	 
	blockSizeX = deviceInfo.maxWorkGroupSize<GROUP_SIZE?deviceInfo.maxWorkGroupSize:GROUP_SIZE;

    // Create command queue
	cl_command_queue_properties prop = 0;
	for(int i=0;i<3;i++)
	{
		commandQueue[i] = clCreateCommandQueue(
			context,
			devices[deviceId],
			prop,
			&status);
		 CHECK_OPENCL_ERROR(status,"clCreateCommandQueuefailed.");
	}

    // Create and initialize image objects

	// Create map image
	mapImage = clCreateImage(context,
		CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
		&imageFormat,
		&image_desc,
		mapImageData,
		&status);
	CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (mapImage)");
	int color[4] = {0,0,80,255};
	size_t origin[3] = {300,300,0};
	size_t region[3] = {100,100,1};
	status = clEnqueueFillImage(commandQueue[0], mapImage, color, origin, region, NULL, NULL, &eventlist[0]);

    // Create fill image
	fillImage = clCreateImage(context,
		CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
		&imageFormat,
		&image_desc,
		fillImageData,
		&status);
	CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (fillImage)");

	color[0] = 80;
	color[1] = 0;
	color[2] = 0;
	color[3] = 0;
	origin[0] = 50;
	origin[1] = 50;
	status = clEnqueueFillImage(commandQueue[1], fillImage, color, origin, region, NULL, NULL, &eventlist[1]);
	
	//Create output image
	outputImage = clCreateImage(context,
		CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
		&imageFormat,
		&image_desc,
		NULL,
		&status);
	CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (outputImage)");

    // create a CL program using the kernel source 
    streamsdk::buildProgramData buildData;
    buildData.kernelName = std::string("ImageOverlap_Kernels.cl");
    buildData.devices = devices;
    buildData.deviceId = deviceId;
    buildData.flagsStr = std::string("");
    if(isLoadBinaryEnabled())
        buildData.binaryName = std::string(loadBinary.c_str());

    if(isComplierFlagsSpecified())
        buildData.flagsFileName = std::string(flags.c_str());

    retValue = sampleCommon->buildOpenCLProgram(program, context, buildData);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::buildOpenCLProgram() failed");

    // get a kernel object handle for a kernel with the given name 
	kernelOverLap = clCreateKernel(program, "OverLap", &status);
	CHECK_OPENCL_ERROR(status,"clCreateKernel failed.(OverLap)");

    return SDK_SUCCESS;
}