static gboolean gegl_cl_device_has_extension (cl_device_id device, const char *extension_name) { cl_int cl_err; size_t string_len = 0; gchar *device_ext_string = NULL; gchar **extensions; gboolean found = FALSE; if (!extension_name) return FALSE; cl_err= gegl_clGetDeviceInfo (device, CL_DEVICE_EXTENSIONS, 0, NULL, &string_len); CL_CHECK_ONLY (cl_err); if (!string_len) return FALSE; device_ext_string = g_malloc0 (string_len); cl_err = gegl_clGetDeviceInfo (device, CL_DEVICE_EXTENSIONS, string_len, device_ext_string, NULL); CL_CHECK_ONLY (cl_err); extensions = g_strsplit (device_ext_string, " ", 0); for (gint i = 0; extensions[i] && !found; ++i) { if (!strcmp (extensions[i], extension_name)) found = TRUE; } g_free (device_ext_string); g_strfreev (extensions); return found; }
static gboolean cl_buffer_get_min_max (cl_mem in_tex, size_t global_worksize, const GeglRectangle *roi, gfloat min[4], gfloat max[4]) { cl_int cl_err = 0; size_t local_ws, max_local_ws; size_t work_groups; size_t global_ws; cl_mem cl_aux_min = NULL; cl_mem cl_aux_max = NULL; cl_mem cl_min_max = NULL; cl_int n_pixels = (cl_int)global_worksize; cl_float4 min_max_buf[2]; if (global_worksize < 1) { min[0] = min[1] = min[2] = min[3] = G_MAXFLOAT; max[0] = max[1] = max[2] = max[3] = -G_MAXFLOAT; return FALSE; } cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), &max_local_ws, NULL); CL_CHECK; max_local_ws = MIN (max_local_ws, MIN (cl_data->work_group_size[0], cl_data->work_group_size[1])); /* Needs to be a power of two */ local_ws = 256; while (local_ws > max_local_ws) local_ws /= 2; work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws); global_ws = work_groups * local_ws; cl_aux_min = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, local_ws * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; cl_aux_max = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, local_ws * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; cl_min_max = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_WRITE_ONLY, 2 * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; /* The full initialization is done in the two_stages_local_min_max_reduce kernel */ #if 0 cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[3], 1, NULL, &local_ws, &local_ws, 0, NULL, NULL); CL_CHECK; #endif cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float4) * local_ws, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float4) * local_ws, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&n_pixels); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_ws, &local_ws, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&cl_min_max); CL_CHECK; /* Only one work group */ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[1], 1, NULL, &local_ws, &local_ws, 0, NULL, NULL); CL_CHECK; /* Read the memory buffer, probably better to keep it in GPU memory */ cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue (), cl_min_max, CL_TRUE, 0, 2 * sizeof (cl_float4), &min_max_buf, 0, NULL, NULL); CL_CHECK; min[0] = min_max_buf[0].x; min[1] = min_max_buf[0].y; min[2] = min_max_buf[0].z; min[3] = min_max_buf[0].w; max[0] = min_max_buf[1].x; max[1] = min_max_buf[1].y; max[2] = min_max_buf[1].z; max[3] = min_max_buf[1].w; cl_err = gegl_clReleaseMemObject (cl_aux_min); CL_CHECK_ONLY (cl_err); cl_err = gegl_clReleaseMemObject (cl_aux_max); CL_CHECK_ONLY (cl_err); cl_err = gegl_clReleaseMemObject (cl_min_max); CL_CHECK_ONLY (cl_err); return FALSE; error: if (cl_aux_min) gegl_clReleaseMemObject (cl_aux_min); if (cl_aux_max) gegl_clReleaseMemObject (cl_aux_max); if (cl_min_max) gegl_clReleaseMemObject (cl_min_max); return TRUE; }
static gboolean cl_process (GeglOperation *self, cl_mem in_tex, cl_mem out_tex, size_t global_worksize, const GeglRectangle *roi, gint level) { GeglChantO *o = GEGL_CHANT_PROPERTIES (self); gint num_sampling_points; gdouble *xs, *ys; gfloat *ysf = NULL; cl_mem cl_curve = NULL; cl_ulong cl_max_constant_size; cl_int cl_err = 0; num_sampling_points = o->sampling_points; if (!cl_data) { const char *kernel_name[] = {"cl_contrast_curve",NULL}; cl_data = gegl_cl_compile_and_build (contrast_curve_cl_source, kernel_name); } if (!cl_data) return TRUE; if (num_sampling_points > 0) { xs = g_new (gdouble, num_sampling_points); ys = g_new (gdouble, num_sampling_points); gegl_curve_calc_values (o->curve, 0.0, 1.0, num_sampling_points, xs, ys); g_free (xs); /*We need to downscale the array to pass it to the GPU*/ ysf = g_new (gfloat, num_sampling_points); copy_double_array_to_float_array (ys, ysf, num_sampling_points); g_free (ys); cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (cl_ulong), &cl_max_constant_size, NULL); CL_CHECK; GEGL_NOTE (GEGL_DEBUG_OPENCL, "Max Constant Mem Size: %lu bytes", (unsigned long) cl_max_constant_size); if (sizeof (cl_float) * num_sampling_points < cl_max_constant_size) { cl_curve = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, num_sampling_points * sizeof (cl_float), ysf, &cl_err); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 0, sizeof (cl_mem), (void*) &in_tex); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 1, sizeof (cl_mem), (void*) &out_tex); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 2, sizeof (cl_mem), (void*) &cl_curve); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 3, sizeof (gint), (void*) &num_sampling_points); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_worksize, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (cl_curve); CL_CHECK_ONLY (cl_err); } else { /*If the curve size doesn't fit constant memory is better to use CPU*/ GEGL_NOTE (GEGL_DEBUG_OPENCL, "Not enough constant memory for the curve"); g_free (ysf); return TRUE; } g_free (ysf); return FALSE; error: if (ysf) g_free (ysf); if (cl_curve) gegl_clReleaseMemObject (cl_curve); return TRUE; } else /*If the curve doesn't have a lookup table is better to use CPU*/ { GEGL_NOTE (GEGL_DEBUG_OPENCL, "Curve not suitable to be computed in the GPU"); return TRUE; } }