cl_int debug_voronoi_features(Opencl_Runtime& runtime, cl_mem voronoi_image, cl_mem marks_image, unsigned int color, unsigned int border) { size_t width; clGetImageInfo(voronoi_image, CL_IMAGE_WIDTH, sizeof(width), &width, 0); size_t height; clGetImageInfo(voronoi_image, CL_IMAGE_HEIGHT, sizeof(height), &height, 0); size_t global_work_size[] = { width, height }; cl_kernel kernel = runtime.kernels[kernel_id_mark_features_debug]; cl_uint color_value = static_cast<cl_uint>(color); cl_int border_value = static_cast<cl_int>(border); cl_uint width_value = static_cast<cl_uint>(width); cl_uint height_value = static_cast<cl_uint>(height); clSetKernelArg(kernel, 0, sizeof(cl_mem), &marks_image); clSetKernelArg(kernel, 1, sizeof(cl_mem), &voronoi_image); clSetKernelArg(kernel, 2, sizeof(cl_uint), &color_value); clSetKernelArg(kernel, 3, sizeof(cl_int), &border_value); clSetKernelArg(kernel, 4, sizeof(cl_uint), &width_value); clSetKernelArg(kernel, 5, sizeof(cl_uint), &height_value); return clEnqueueNDRangeKernel(runtime.queue, kernel, 2, 0, global_work_size, 0, 0, 0, 0); }
void print_cl_image_2d_info(const cl_mem& mem) { // Query image created from texture CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized()) return; cl_int err; size_t img_width, img_height, img_depth; cl_image_format img_format; size_t bytes_written; err = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(size_t), &img_width, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_WIDTH")) return; err = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(size_t), &img_height, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_HEIGHT")) return; err = clGetImageInfo(mem, CL_IMAGE_DEPTH, sizeof(size_t), &img_depth, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_DEPTH")) return; err = clGetImageInfo(mem, CL_IMAGE_FORMAT, sizeof(cl_image_format), &img_format, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_FORMAT")) return; std::cout << "OpenCL image from texture info: " << std::endl; std::cout << "\tTexture image width: " << img_width << std::endl; std::cout << "\tTexture image height: " << img_height << std::endl; std::cout << "\tTexture image depth: " << img_depth << std::endl; std::cout << "\tTexture image channel order: " << img_format.image_channel_order << std::endl; std::cout << "\tTexture image channel data type: " << img_format.image_channel_data_type << std::endl; }
/* Java->C glue code: * Java package: com.jogamp.opencl.impl.CLImpl * Java method: java.nio.ByteBuffer clEnqueueMapImage(long command_queue, long image, int blocking_map, long map_flags, com.jogamp.gluegen.runtime.PointerBuffer origin, com.jogamp.gluegen.runtime.PointerBuffer range, com.jogamp.gluegen.runtime.PointerBuffer image_row_pitch, com.jogamp.gluegen.runtime.PointerBuffer image_slice_pitch, int num_events_in_wait_list, com.jogamp.gluegen.runtime.PointerBuffer event_wait_list, com.jogamp.gluegen.runtime.PointerBuffer event, java.nio.IntBuffer errcode_ret) * C function: void * clEnqueueMapImage(cl_command_queue command_queue, cl_mem image, uint32_t blocking_map, uint64_t map_flags, const size_t * , const size_t * , size_t * image_row_pitch, size_t * image_slice_pitch, uint32_t num_events_in_wait_list, cl_event * event_wait_list, cl_event * event, int32_t * errcode_ret); */ JNIEXPORT jobject JNICALL Java_com_jogamp_opencl_impl_CLImpl_clEnqueueMapImage0__JJIJLjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2IILjava_lang_Object_2ILjava_lang_Object_2ILjava_lang_Object_2I(JNIEnv *env, jobject _unused, jlong command_queue, jlong image, jint blocking_map, jlong map_flags, jobject origin, jint origin_byte_offset, jobject range, jint range_byte_offset, jobject image_row_pitch, jint image_row_pitch_byte_offset, jobject image_slice_pitch, jint image_slice_pitch_byte_offset, jint num_events_in_wait_list, jobject event_wait_list, jint event_wait_list_byte_offset, jobject event, jint event_byte_offset, jobject errcode_ret, jint errcode_ret_byte_offset) { size_t * _origin_ptr = NULL; size_t * _range_ptr = NULL; size_t * _image_row_pitch_ptr = NULL; size_t * _image_slice_pitch_ptr = NULL; cl_event * _event_wait_list_ptr = NULL; cl_event * _event_ptr = NULL; int32_t * _errcode_ret_ptr = NULL; size_t * elements = NULL; size_t * depth = NULL; size_t pixels; cl_int status; void * _res; if (origin != NULL) { _origin_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, origin)) + origin_byte_offset); } if (range != NULL) { _range_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, range)) + range_byte_offset); } if (image_row_pitch != NULL) { _image_row_pitch_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, image_row_pitch)) + image_row_pitch_byte_offset); } if (image_slice_pitch != NULL) { _image_slice_pitch_ptr = (size_t *) (((char*) (*env)->GetDirectBufferAddress(env, image_slice_pitch)) + image_slice_pitch_byte_offset); } if (event_wait_list != NULL) { _event_wait_list_ptr = (cl_event *) (((char*) (*env)->GetDirectBufferAddress(env, event_wait_list)) + event_wait_list_byte_offset); } if (event != NULL) { _event_ptr = (cl_event *) (((char*) (*env)->GetDirectBufferAddress(env, event)) + event_byte_offset); } if (errcode_ret != NULL) { _errcode_ret_ptr = (int32_t *) (((char*) (*env)->GetDirectBufferAddress(env, errcode_ret)) + errcode_ret_byte_offset); } _res = clEnqueueMapImage((cl_command_queue) (intptr_t) command_queue, (cl_mem) (intptr_t) image, (uint32_t) blocking_map, (uint64_t) map_flags, (size_t *) _origin_ptr, (size_t *) _range_ptr, (size_t *) _image_row_pitch_ptr, (size_t *) _image_slice_pitch_ptr, (uint32_t) num_events_in_wait_list, (cl_event *) _event_wait_list_ptr, (cl_event *) _event_ptr, (int32_t *) _errcode_ret_ptr); if (_res == NULL) return NULL; // calculate buffer size status = clGetImageInfo((cl_mem) (intptr_t) image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), (void *) elements, NULL); status |= clGetImageInfo((cl_mem) (intptr_t) image, CL_IMAGE_DEPTH, sizeof(size_t), (void *) depth, NULL); if(status != CL_SUCCESS) return NULL; if(*depth == 0) { // 2D pixels = (*_image_row_pitch_ptr) * _range_ptr[1] + _range_ptr[0]; }else{ // 3D pixels = (*_image_slice_pitch_ptr) * _range_ptr[2] + (*_image_row_pitch_ptr) * _range_ptr[1] + _range_ptr[0]; } return (*env)->NewDirectByteBuffer(env, _res, pixels * (*elements)); }
cl_int WINAPI wine_clGetImageInfo(cl_mem image, cl_image_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int ret; TRACE("\n"); ret = clGetImageInfo(image, param_name, param_value_size, param_value, param_value_size_ret); return ret; }
/* static */ cl_int MemoryObjectWrapper::imageInfoHelper (Wrapper const* aInstance, int aName, size_t aSize, void* aValueOut, size_t* aSizeOut) { cl_int err = CL_SUCCESS; MemoryObjectWrapper const* instance = dynamic_cast<MemoryObjectWrapper const*>(aInstance); VALIDATE_ARG_POINTER (instance, &err, err); return clGetImageInfo (instance->getWrapped (), aName, aSize, aValueOut, aSizeOut); }
static VALUE rcl_mem_image_info(VALUE self, VALUE param_name) { EXPECT_RCL_CONST(param_name); cl_image_info ii = FIX2UINT(param_name); cl_mem m = MemoryPtr(self); cl_image_format imgfmt; cl_int res = clGetImageInfo(m, ii, sizeof(cl_image_format), (void *)&imgfmt, NULL); CHECK_AND_RAISE(res); switch (ii) { case CL_IMAGE_FORMAT: return RImageFormat(&imgfmt); case CL_IMAGE_ELEMENT_SIZE: case CL_IMAGE_ROW_PITCH: case CL_IMAGE_SLICE_PITCH: case CL_IMAGE_WIDTH: case CL_IMAGE_HEIGHT: case CL_IMAGE_DEPTH: return ULONG2NUM(*(size_t *)&imgfmt); } return Qnil; }
cl_int mark_voronoi_features(Opencl_Runtime& runtime, cl_mem voronoi_image) { cl_int error_code = allocate_voronoi_features(runtime, voronoi_image); CORRIDORMAP_CHECK_OCL(error_code); size_t width; clGetImageInfo(voronoi_image, CL_IMAGE_WIDTH, sizeof(width), &width, 0); size_t height; clGetImageInfo(voronoi_image, CL_IMAGE_HEIGHT, sizeof(height), &height, 0); size_t global_work_size[] = { width, height }; cl_kernel kernel = runtime.kernels[kernel_id_mark_features]; clSetKernelArg(kernel, 0, sizeof(cl_mem), &voronoi_image); clSetKernelArg(kernel, 1, sizeof(cl_mem), &runtime.voronoi_vertices_img); clSetKernelArg(kernel, 2, sizeof(cl_mem), &runtime.voronoi_edges_img); return clEnqueueNDRangeKernel(runtime.queue, kernel, 2, 0, global_work_size, 0, 0, 0, 0); }
bool CLImage::get_cl_image_info (cl_image_info param_name, size_t param_size, void *param, size_t *param_size_ret) { cl_mem mem_id = get_mem_id (); cl_int error_code = CL_SUCCESS; if (!mem_id) return false; error_code = clGetImageInfo (mem_id, param_name, param_size, param, param_size_ret); XCAM_FAIL_RETURN( WARNING, error_code == CL_SUCCESS, false, "clGetImageInfo failed on param:%d, errno:%d", param_name, error_code); return true; }
sge::opencl::memory_object::image::volume::volume( context::object &_context, memory_object::flags_field const &_mem_flags, renderer::texture::volume &_renderer_texture) : impl_(), image_format_(), size_( fcppt::math::dim::structure_cast< sge::opencl::dim3, fcppt::cast::size_fun >( _renderer_texture.size() ) ) { cl_int error_code; impl_ = clCreateFromGLTexture3D( _context.impl(), sge::opencl::impl::memory_object::to_opencl_mem_flags( _mem_flags), dynamic_cast<renderer::opengl::texture::base &>( _renderer_texture).type().get(), // mip level 0, dynamic_cast<renderer::opengl::texture::base &>( _renderer_texture).id().get(), &error_code); opencl::impl::handle_error( error_code, FCPPT_TEXT("clCreateFromGLTexture3D()")); error_code = clGetImageInfo( impl_, CL_IMAGE_FORMAT, sizeof(cl_image_format), &image_format_, nullptr); opencl::impl::handle_error( error_code, FCPPT_TEXT("clGetImageInfo(image format)")); }
bool CL_Image3D::Create(cl_uint uWidth, cl_uint uHeight, cl_uint uDepth, cl_uint uRowPitch, void* pImgInput, CL_ImageOrder OrderType, CL_ImageChannel ChannelType, CL_MemAccess AccessType, CL_MemStorage StorageType) { CL_CPP_CONDITIONAL_RETURN_FALSE(m_Image); CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef); CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef->IsValid()); // CL_CPP_CONDITIONAL_RETURN_FALSE((StorageType == CL_MemStorage_UseHostInput || StorageType == CL_MemStorage_CopyInputToDevice) && !pImgInput); cl_mem_flags uMemFlags = 0; cl_image_format ImgFormat; // Determine the access flags. switch(AccessType) { case CL_MemAccess_ReadOnly: uMemFlags = CL_MEM_READ_ONLY; break; case CL_MemAccess_WriteOnly: uMemFlags = CL_MEM_WRITE_ONLY; break; case CL_MemAccess_ReadWrite: uMemFlags = CL_MEM_READ_WRITE; break; default: return false; } // Determine the storage flags. switch(StorageType) { case CL_MemStorage_AllocateOnDevice: /* default setting, do nothing */ break; case CL_MemStorage_AllocateOnHost: uMemFlags |= CL_MEM_ALLOC_HOST_PTR; break; case CL_MemStorage_UseHostInput: uMemFlags |= CL_MEM_USE_HOST_PTR; break; case CL_MemStorage_CopyInputToDevice: uMemFlags |= CL_MEM_COPY_HOST_PTR; break; default: return false; } // Determine the image channel order. switch(OrderType) { case CL_ImageOrder_R: ImgFormat.image_channel_order = CL_R; break; case CL_ImageOrder_A: ImgFormat.image_channel_order = CL_A; break; case CL_ImageOrder_RG: ImgFormat.image_channel_order = CL_RG; break; case CL_ImageOrder_RA: ImgFormat.image_channel_order = CL_RA; break; case CL_ImageOrder_RGB: ImgFormat.image_channel_order = CL_RGB; break; case CL_ImageOrder_RGBA: ImgFormat.image_channel_order = CL_RGBA; break; case CL_ImageOrder_BGRA: ImgFormat.image_channel_order = CL_BGRA; break; case CL_ImageOrder_ARGB: ImgFormat.image_channel_order = CL_ARGB; break; case CL_ImageOrder_Intensity: ImgFormat.image_channel_order = CL_INTENSITY; break; case CL_ImageOrder_Luminance: ImgFormat.image_channel_order = CL_LUMINANCE; break; #ifdef CL_VERSION_1_1 case CL_ImageOrder_Rx: ImgFormat.image_channel_order = CL_Rx; break; case CL_ImageOrder_RGx: ImgFormat.image_channel_order = CL_RGx; break; case CL_ImageOrder_RGBx: ImgFormat.image_channel_order = CL_RGBx; break; #endif } // Determine the image channel data type. switch(ChannelType) { case CL_ImageChannel_Norm_Int8: ImgFormat.image_channel_data_type = CL_SNORM_INT8; break; case CL_ImageChannel_Norm_Int16: ImgFormat.image_channel_data_type = CL_SNORM_INT16; break; case CL_ImageChannel_Norm_UInt8: ImgFormat.image_channel_data_type = CL_UNORM_INT8; break; case CL_ImageChannel_Norm_UInt16: ImgFormat.image_channel_data_type = CL_UNORM_INT16; break; case CL_ImageChannel_Norm_UShort_555: ImgFormat.image_channel_data_type = CL_UNORM_SHORT_555; break; case CL_ImageChannel_Norm_UShort_565: ImgFormat.image_channel_data_type = CL_UNORM_SHORT_565; break; case CL_ImageChannel_Norm_UInt_101010: ImgFormat.image_channel_data_type = CL_UNORM_INT_101010; break; case CL_ImageChannel_Int8: ImgFormat.image_channel_data_type = CL_SIGNED_INT8; break; case CL_ImageChannel_Int16: ImgFormat.image_channel_data_type = CL_SIGNED_INT16; break; case CL_ImageChannel_Int32: ImgFormat.image_channel_data_type = CL_SIGNED_INT32; break; case CL_ImageChannel_UInt8: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT8; break; case CL_ImageChannel_UInt16: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT16; break; case CL_ImageChannel_UInt32: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT32; break; case CL_ImageChannel_Float16: ImgFormat.image_channel_data_type = CL_HALF_FLOAT; break; case CL_ImageChannel_Float32: ImgFormat.image_channel_data_type = CL_FLOAT; break; } cl_context Context = m_pContextRef->GetContext(); cl_int iErrorCode = CL_SUCCESS; cl_uint uSlicePitch = uRowPitch * uHeight; // Create the image object. #if defined(CL_VERSION_1_2) cl_image_desc ImgDesc; ImgDesc.image_width = uWidth; ImgDesc.image_height = uHeight; ImgDesc.image_depth = uDepth; ImgDesc.image_array_size = 1; ImgDesc.image_row_pitch = (pImgInput) ? uRowPitch : 0; ImgDesc.image_slice_pitch = (pImgInput) ? uSlicePitch : 0; ImgDesc.num_mip_levels = 0; ImgDesc.num_samples = 0; ImgDesc.buffer = NULL; m_Image = clCreateImage(Context, uMemFlags, &ImgFormat, &ImgDesc, pImgInput, &iErrorCode); #else m_Image = clCreateImage3D(Context, uMemFlags, &ImgFormat, uWidth, uHeight, uDepth, (pImgInput) ? uRowPitch : 0, (pImgInput) ? uSlicePitch : 0, pImgInput, &iErrorCode); #endif CL_CPP_CATCH_ERROR(iErrorCode); CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode); m_uWidth = uWidth; m_uHeight = uHeight; m_uDepth = uDepth; m_uRowPitch = uRowPitch; m_uSlicePitch = uSlicePitch; m_uTotalSize = m_uSlicePitch * m_uHeight; // Ask OpenCL for the sizeo of each individual element in this image. clGetImageInfo(m_Image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &m_uElementSize, NULL); m_ImageOrder = OrderType; m_ImageChannel = ChannelType; m_MemAccess = AccessType; m_MemStorage = StorageType; return true; }
static void* piglit_cl_get_info(void* fn_ptr, void* obj, cl_uint param) { cl_int errNo; size_t param_size; void* param_ptr = NULL; /* get param size */ if(fn_ptr == clGetPlatformInfo) { errNo = clGetPlatformInfo(*(cl_platform_id*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetDeviceInfo) { errNo = clGetDeviceInfo(*(cl_device_id*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetContextInfo) { errNo = clGetContextInfo(*(cl_context*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetCommandQueueInfo) { errNo = clGetCommandQueueInfo(*(cl_command_queue*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetMemObjectInfo) { errNo = clGetMemObjectInfo(*(cl_mem*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetImageInfo) { errNo = clGetImageInfo(*(cl_mem*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetSamplerInfo) { errNo = clGetSamplerInfo(*(cl_sampler*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetProgramInfo) { errNo = clGetProgramInfo(*(cl_program*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetProgramBuildInfo) { errNo = clGetProgramBuildInfo(((struct _program_build_info_args*)obj)->program, ((struct _program_build_info_args*)obj)->device, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetKernelInfo) { errNo = clGetKernelInfo(*(cl_kernel*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetKernelWorkGroupInfo) { errNo = clGetKernelWorkGroupInfo(((struct _kernel_work_group_info_args*)obj)->kernel, ((struct _kernel_work_group_info_args*)obj)->device, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetEventInfo) { errNo = clGetEventInfo(*(cl_event*)obj, param, 0, NULL, ¶m_size); } else if(fn_ptr == clGetEventProfilingInfo) { errNo = clGetEventProfilingInfo(*(cl_event*)obj, param, 0, NULL, ¶m_size); } else { fprintf(stderr, "Trying to get %s information from undefined function.\n", piglit_cl_get_enum_name(param)); piglit_report_result(PIGLIT_FAIL); } if(errNo == CL_SUCCESS) { param_ptr = calloc(param_size, sizeof(char)); /* retrieve param */ if(fn_ptr == clGetPlatformInfo) { errNo = clGetPlatformInfo(*(cl_platform_id*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetDeviceInfo) { errNo = clGetDeviceInfo(*(cl_device_id*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetContextInfo) { errNo = clGetContextInfo(*(cl_context*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetCommandQueueInfo) { errNo = clGetCommandQueueInfo(*(cl_command_queue*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetMemObjectInfo) { errNo = clGetMemObjectInfo(*(cl_mem*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetImageInfo) { errNo = clGetImageInfo(*(cl_mem*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetSamplerInfo) { errNo = clGetSamplerInfo(*(cl_sampler*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetProgramInfo) { errNo = clGetProgramInfo(*(cl_program*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetProgramBuildInfo) { errNo = clGetProgramBuildInfo(((struct _program_build_info_args*)obj)->program, ((struct _program_build_info_args*)obj)->device, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetKernelInfo) { errNo = clGetKernelInfo(*(cl_kernel*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetKernelWorkGroupInfo) { errNo = clGetKernelWorkGroupInfo(((struct _kernel_work_group_info_args*)obj)->kernel, ((struct _kernel_work_group_info_args*)obj)->device, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetEventInfo) { errNo = clGetEventInfo(*(cl_event*)obj, param, param_size, param_ptr, NULL); } else if(fn_ptr == clGetEventProfilingInfo) { errNo = clGetEventProfilingInfo(*(cl_event*)obj, param, param_size, param_ptr, NULL); } if(errNo != CL_SUCCESS) { free(param_ptr); param_ptr = NULL; } } if(param_ptr == NULL) { fprintf(stderr, "Unable to get %s information (Error: %s)\n", piglit_cl_get_enum_name(param), piglit_cl_get_error_name(errNo)); piglit_report_result(PIGLIT_FAIL); } return param_ptr; }
/*! * @function clut_blurImage_local_unlimited * Blurs the image at [filename] with a filter of size [filter_size], and saves the result * to the file "output_unlimited.png". This function should be optimized to run on * local memory. * @param filename * The name of the file. * @param filter_size * The size of the blur filter. * @return * 0 on success, non-0 on failure. */ int clut_blurImage_local_unlimited(const cl_device_id device, const char * const filename, const unsigned int filter_size) { const char * const fname = "clut_blurImage_local"; int return_value = 1; cl_int ret; if (NULL == filename) { Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname); goto error1; } /* compute work group size */ size_t local_width, local_height; if (0 != clut_getMaxWGSize(device, &local_width, &local_height)) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to get work group sizes.\n", fname); goto error1; } Debug_out(DEBUG_HOMEWORK, "%s: Max work group size is [%zu]x[%zu].\n", fname, local_width, local_height); /* openCL wants to know the size of __local statically allocated arrays at compile time, * so the local size must be set with a #define */ char *flags = calloc(128, sizeof(char)); if (NULL == flags) { Debug_out(DEBUG_HOMEWORK, "%s: A calloc failed.\n", fname); goto error1; } sprintf(flags, "-D LOCAL_WIDTH=%zu -D LOCAL_HEIGHT=%zu -D FILTER_SIZE=%d", local_width, local_height, filter_size); Debug_out(DEBUG_HOMEWORK, "%s: Local flags are: '%s'.\n", fname, flags); /* Create context */ cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage_local_unlimited", &ret); CLUT_CHECK_ERROR(ret, "Unable to create context", error2); Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname); /* Create program */ cl_program program = clut_createProgramFromFile(context, "homework_unlimited.cl", flags); if (NULL == program) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname); goto error3; } Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname); /* Create kernel */ cl_kernel kernel = clCreateKernel(program, "blurImage_local_unlimited", &ret); CLUT_CHECK_ERROR(ret, "Unable to create kernel", error4); Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname); /* Create command_queue */ cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret); CLUT_CHECK_ERROR(ret, "Unable to create command queue", error5); Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname); /* open source image */ int width, height; cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height); if (NULL == source_image) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname); goto error6; } if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) { Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname); goto error7; } /* crate destination image */ cl_image_format image_format = {0, 0}; cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; // image_desc.image_width = 0; // image_desc.image_height = 0; // image_desc.image_depth = 0; /* only for 3D images */ // image_desc.image_array_size = 0; /* only for image arrays */ // image_desc.image_row_pitch = 0; // image_desc.image_slice_pitch = 0; /* only for 3D images */ // image_desc.num_mip_levels = 0; /* mandatory */ // image_desc.num_samples = 0; /* mandatory */ // image_desc.buffer = NULL; /* only for 1D image buffers */ ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL); CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error7); int components = clut_getImageFormatComponents(image_format); if (0 > components) { Debug_out(DEBUG_HOMEWORK, "%s: Unknown components for source image.\n", fname); goto error7; } Debug_out(DEBUG_HOMEWORK, "%s: Source image has %d components.\n", fname, components); image_desc.image_width = width - filter_size + 1; image_desc.image_height = height - filter_size + 1; image_desc.image_row_pitch = image_desc.image_width * components; cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret); CLUT_CHECK_ERROR(ret, "Unable to create second image", error7); /* fill result image with black */ const unsigned int fill_color[4] = { 0, 0, 0, 255 }; const size_t fill_origin[3] = { 0, 0, 0 }; const size_t fill_region[3] = { width - filter_size + 1, height - filter_size + 1, 1 }; ret = clEnqueueFillImage(command_queue, result_image, fill_color, fill_origin, fill_region, 0, NULL, NULL); CLUT_CHECK_ERROR(ret, "Unable to fill result image", error8); Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname); /* create filter matrix */ unsigned char *filter_matrix = createFilterMatrix(filter_size); if (NULL == filter_matrix) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname); goto error8; } Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname); // printFilterMatrix(filter_matrix, filter_size); /* copy filter matrix to device */ cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret); CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error9); /* set kernel arguments */ ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image); CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error10); Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname); ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image); CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error10); Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname); ret = clSetKernelArg(kernel, 2, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer); CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error10); Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname); Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname); const size_t work_size[2] = { COMPUTE_GLOBAL_SIZE(height - filter_size + 1, local_height), COMPUTE_GLOBAL_SIZE(width - filter_size + 1, local_width) }; const size_t wg_size[2] = { local_height, local_width }; Debug_out(DEBUG_HOMEWORK, "%s: work size is [%zu]x[%zu].\n", fname, work_size[0], work_size[1]); /* run kernel */ cl_event kernel_event; ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, wg_size, 0, NULL, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error10); ret = clFinish(command_queue); CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error10); Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname); ret = clWaitForEvents(1, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error10); /* check that kernel executed correctly */ cl_int kernel_ret; ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error10); Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret); if (CL_COMPLETE != kernel_ret) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret)); goto error10; } cl_ulong end_time; ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error10); if (0 == end_time) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname); goto error10; } cl_double time_double = clut_getEventDuration(kernel_event); cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event); Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong); /* save image back to file */ clut_saveImageToFile("output_unlimited.png", command_queue, result_image); /* output filter size, local width, local height, and duration in nanoseconds for profiling */ printf("%d,%zu,%zu,%lld\n", filter_size, local_width, local_height, clut_getEventDuration_ns(kernel_event)); return_value = 0; error10: clReleaseMemObject(filter_matrix_buffer); error9: free(filter_matrix); error8: clReleaseMemObject(result_image); error7: clReleaseMemObject(source_image); error6: clReleaseCommandQueue(command_queue); error5: clReleaseKernel(kernel); error4: clReleaseProgram(program); error3: clReleaseContext(context); error2: free(flags); error1: return return_value; }
/*! * @function clut_blurImage * Blurs the image at [filename] with a filter of size [filter_size], and saves the result * to the file "output.png". * @param filename * The name of the file. * @param filter_size * The size of the blur filter. * @return * 0 on success, non-0 on failure. */ int clut_blurImage(const cl_device_id device, const char * const filename, const unsigned int filter_size) { const char * const fname = "clut_blurImage"; int return_value = 1; cl_int ret; if (NULL == filename) { Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname); goto error1; } /* Create context */ cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage", &ret); CLUT_CHECK_ERROR(ret, "Unable to create context", error1); Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname); /* Create program */ cl_program program = clut_createProgramFromFile(context, "homework_global.cl", NULL); if (NULL == program) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname); goto error3; } Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname); /* Create kernel */ cl_kernel kernel = clCreateKernel(program, "blurImage", &ret); CLUT_CHECK_ERROR(ret, "Unable to create kernel", error3); Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname); /* Create command_queue */ cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret); CLUT_CHECK_ERROR(ret, "Unable to create command queue", error4); Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname); /* load source image */ int width, height; cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height); if (NULL == source_image) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname); goto error5; } if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) { Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname); goto error6; } /* create destination image */ cl_image_format image_format = {0, 0}; cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; // image_desc.image_width = 0; // image_desc.image_height = 0; // image_desc.image_depth = 0; /* only for 3D images */ // image_desc.image_array_size = 0; /* only for image arrays */ // image_desc.image_row_pitch = 0; // image_desc.image_slice_pitch = 0; /* only for 3D images */ // image_desc.num_mip_levels = 0; /* mandatory */ // image_desc.num_samples = 0; /* mandatory */ // image_desc.buffer = NULL; /* only for 1D image buffers */ image_desc.image_width = width - filter_size + 1; image_desc.image_height = height - filter_size + 1; ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL); CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error6); cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret); CLUT_CHECK_ERROR(ret, "Unable to create second image", error6); Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname); /* create filter matrix */ unsigned char *filter_matrix = createFilterMatrix(filter_size); if (NULL == filter_matrix) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname); goto error7; } Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname); // printFilterMatrix(filter_matrix, filter_size); /* copy filter matrix to device */ cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret); CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error8); /* set kernel arguments */ ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image); CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname); ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image); CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname); ret = clSetKernelArg(kernel, 2, sizeof(filter_size), (void *) &filter_size); CLUT_CHECK_ERROR(ret, "Unable to set filter size argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Filter size argument set.\n", fname); ret = clSetKernelArg(kernel, 3, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer); CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname); Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname); /* run kernel */ cl_event kernel_event; const size_t work_size[2] = { height - filter_size + 1, width - filter_size + 1}; ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error9); ret = clFinish(command_queue); CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error9); Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname); ret = clWaitForEvents(1, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error9); /* check that kernel executed correctly */ cl_int kernel_ret; ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error9); Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret); if (CL_COMPLETE != kernel_ret) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret)); goto error9; } cl_ulong end_time; ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error9); if (0 == end_time) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname); goto error9; } cl_double time_double = clut_getEventDuration(kernel_event); cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event); Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong); /* save image */ clut_saveImageToFile("output.png", command_queue, result_image); /* print filter size and duration in nanoseconds for profiling */ printf("%d,%llu\n", filter_size, clut_getEventDuration_ns(kernel_event)); return_value = 0; error9: clReleaseMemObject(filter_matrix_buffer); error8: free(filter_matrix); error7: clReleaseMemObject(result_image); error6: clReleaseMemObject(source_image); error5: clReleaseCommandQueue(command_queue); error4: clReleaseKernel(kernel); error3: clReleaseProgram(program); error2: clReleaseContext(context); error1: return return_value; }
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment) { cl_mem image; cl_mem_object_type type; size_t width, height; cl_int cle; if (frame->format != AV_PIX_FMT_OPENCL) { av_log(avctx, AV_LOG_ERROR, "Invalid frame format %s, " "opencl required.\n", av_get_pix_fmt_name(frame->format)); return AVERROR(EINVAL); } image = (cl_mem)frame->data[plane]; if (!image) { av_log(avctx, AV_LOG_ERROR, "Plane %d required but not set.\n", plane); return AVERROR(EINVAL); } cle = clGetMemObjectInfo(image, CL_MEM_TYPE, sizeof(type), &type, NULL); if (cle != CL_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to query object type of " "plane %d: %d.\n", plane, cle); return AVERROR_UNKNOWN; } if (type != CL_MEM_OBJECT_IMAGE2D) { av_log(avctx, AV_LOG_ERROR, "Plane %d is not a 2D image.\n", plane); return AVERROR(EINVAL); } cle = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(size_t), &width, NULL); if (cle != CL_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n", plane, cle); return AVERROR_UNKNOWN; } cle = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t), &height, NULL); if (cle != CL_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n", plane, cle); return AVERROR_UNKNOWN; } if (block_alignment) { width = FFALIGN(width, block_alignment); height = FFALIGN(height, block_alignment); } work_size[0] = width; work_size[1] = height; return 0; }