Beispiel #1
0
static gboolean
gegl_cl_device_has_extension (cl_device_id device, const char *extension_name)
{
  cl_int     cl_err;
  size_t     string_len = 0;
  gchar     *device_ext_string = NULL;
  gchar    **extensions;
  gboolean   found = FALSE;

  if (!extension_name)
    return FALSE;

  cl_err= gegl_clGetDeviceInfo (device, CL_DEVICE_EXTENSIONS,
                                0, NULL, &string_len);
  CL_CHECK_ONLY (cl_err);

  if (!string_len)
    return FALSE;

  device_ext_string = g_malloc0 (string_len);

  cl_err = gegl_clGetDeviceInfo (device, CL_DEVICE_EXTENSIONS,
                                 string_len, device_ext_string, NULL);
  CL_CHECK_ONLY (cl_err);

  extensions = g_strsplit (device_ext_string, " ", 0);

  for (gint i = 0; extensions[i] && !found; ++i)
    {
      if (!strcmp (extensions[i], extension_name))
        found = TRUE;
    }

  g_free (device_ext_string);
  g_strfreev (extensions);

  return found;
}
Beispiel #2
0
static gboolean
cl_buffer_get_min_max (cl_mem               in_tex,
                       size_t               global_worksize,
                       const GeglRectangle *roi,
                       gfloat              min[4],
                       gfloat              max[4])
{
  cl_int   cl_err      = 0;
  size_t   local_ws, max_local_ws;
  size_t   work_groups;
  size_t   global_ws;
  cl_mem   cl_aux_min  = NULL;
  cl_mem   cl_aux_max  = NULL;
  cl_mem   cl_min_max  = NULL;
  cl_int   n_pixels    = (cl_int)global_worksize;
  cl_float4 min_max_buf[2];

  if (global_worksize < 1)
    {
      min[0] = min[1] = min[2] = min[3] =  G_MAXFLOAT;
      max[0] = max[1] = max[2] = max[3] = -G_MAXFLOAT;
      return FALSE;
    }

  cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (),
                                 CL_DEVICE_MAX_WORK_GROUP_SIZE,
                                 sizeof (size_t), &max_local_ws, NULL);
  CL_CHECK;

  max_local_ws = MIN (max_local_ws,
                      MIN (cl_data->work_group_size[0],
                           cl_data->work_group_size[1]));

  /* Needs to be a power of two */
  local_ws = 256;
  while (local_ws > max_local_ws)
    local_ws /= 2;

  work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws);
  global_ws = work_groups * local_ws;


  cl_aux_min = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_WRITE,
                                    local_ws * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;
  cl_aux_max = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_WRITE,
                                    local_ws * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;
  cl_min_max = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_WRITE_ONLY,
                                    2 * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;

  /* The full initialization is done in the two_stages_local_min_max_reduce
     kernel */
#if 0
  cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[3], 1,
                                       NULL, &local_ws, &local_ws,
                                       0, NULL, NULL);
  CL_CHECK;
#endif

  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),
                               (void*)&in_tex);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3,
                               sizeof(cl_float4) * local_ws, NULL);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4,
                               sizeof(cl_float4) * local_ws, NULL);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),
                               (void*)&n_pixels);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[0], 1,
                                       NULL, &global_ws, &local_ws,
                                       0, NULL, NULL);
  CL_CHECK;

  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem),
                               (void*)&cl_min_max);
  CL_CHECK;

  /* Only one work group */
  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                        cl_data->kernel[1], 1,
                                        NULL, &local_ws, &local_ws,
                                        0, NULL, NULL);
  CL_CHECK;

  /* Read the memory buffer, probably better to keep it in GPU memory */
  cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue (),
                                     cl_min_max, CL_TRUE, 0,
                                     2 * sizeof (cl_float4), &min_max_buf, 0,
                                     NULL, NULL);
  CL_CHECK;

  min[0] = min_max_buf[0].x;
  min[1] = min_max_buf[0].y;
  min[2] = min_max_buf[0].z;
  min[3] = min_max_buf[0].w;

  max[0] = min_max_buf[1].x;
  max[1] = min_max_buf[1].y;
  max[2] = min_max_buf[1].z;
  max[3] = min_max_buf[1].w;

  cl_err = gegl_clReleaseMemObject (cl_aux_min);
  CL_CHECK_ONLY (cl_err);

  cl_err = gegl_clReleaseMemObject (cl_aux_max);
  CL_CHECK_ONLY (cl_err);

  cl_err = gegl_clReleaseMemObject (cl_min_max);
  CL_CHECK_ONLY (cl_err);

  return FALSE;

error:
  if (cl_aux_min)
    gegl_clReleaseMemObject (cl_aux_min);
  if (cl_aux_max)
    gegl_clReleaseMemObject (cl_aux_max);
  if (cl_min_max)
    gegl_clReleaseMemObject (cl_min_max);
  return TRUE;
}
Beispiel #3
0
static gboolean
cl_process (GeglOperation       *self,
            cl_mem               in_tex,
            cl_mem               out_tex,
            size_t               global_worksize,
            const GeglRectangle *roi,
            gint                 level)
{
  GeglChantO *o = GEGL_CHANT_PROPERTIES (self);
  gint       num_sampling_points;
  gdouble    *xs, *ys;
  gfloat     *ysf = NULL;
  cl_mem     cl_curve = NULL;
  cl_ulong   cl_max_constant_size;
  cl_int     cl_err = 0;

  num_sampling_points = o->sampling_points;

  if (!cl_data)
    {
      const char *kernel_name[] = {"cl_contrast_curve",NULL};
      cl_data = gegl_cl_compile_and_build (contrast_curve_cl_source,
                                           kernel_name);
    }
  if (!cl_data)
    return TRUE;

  if (num_sampling_points > 0)
    {
      xs = g_new (gdouble, num_sampling_points);
      ys = g_new (gdouble, num_sampling_points);

      gegl_curve_calc_values (o->curve, 0.0, 1.0, num_sampling_points, xs, ys);
      g_free (xs);

      /*We need to downscale the array to pass it to the GPU*/
      ysf = g_new (gfloat, num_sampling_points);
      copy_double_array_to_float_array (ys, ysf, num_sampling_points);
      g_free (ys);

      cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (),
                                     CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
                                     sizeof (cl_ulong),
                                     &cl_max_constant_size,
                                     NULL);
      CL_CHECK;

      GEGL_NOTE (GEGL_DEBUG_OPENCL, "Max Constant Mem Size: %lu bytes",
                 (unsigned long) cl_max_constant_size);

      if (sizeof (cl_float) * num_sampling_points < cl_max_constant_size)
        {
          cl_curve = gegl_clCreateBuffer (gegl_cl_get_context (),
                                          CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
                                          num_sampling_points * sizeof (cl_float),
                                          ysf, &cl_err);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 0, sizeof (cl_mem),
                                        (void*) &in_tex);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 1, sizeof (cl_mem),
                                        (void*) &out_tex);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 2, sizeof (cl_mem),
                                        (void*) &cl_curve);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 3, sizeof (gint),
                                        (void*) &num_sampling_points);
          CL_CHECK;
          cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                                cl_data->kernel[0], 1,
                                                NULL, &global_worksize, NULL,
                                                0, NULL, NULL);
          CL_CHECK;

          cl_err = gegl_clFinish (gegl_cl_get_command_queue ());
          CL_CHECK;

          cl_err = gegl_clReleaseMemObject (cl_curve);
          CL_CHECK_ONLY (cl_err);
        }
      else
        {
          /*If the curve size doesn't fit constant memory is better to use CPU*/
          GEGL_NOTE (GEGL_DEBUG_OPENCL,
                     "Not enough constant memory for the curve");
          g_free (ysf);
          return TRUE;
        }

      g_free (ysf);
      return FALSE;
error:
      if (ysf)
        g_free (ysf);
      if (cl_curve)
        gegl_clReleaseMemObject (cl_curve);

      return TRUE;
    }
  else  /*If the curve doesn't have a lookup table is better to use CPU*/
    {
      GEGL_NOTE (GEGL_DEBUG_OPENCL,
                 "Curve not suitable to be computed in the GPU");
      return TRUE;
    }
}