Ejemplo n.º 1
0
static inline gboolean
_gegl_buffer_cl_cache_flush2 (GeglTileHandlerCache *cache,
                              const GeglRectangle  *roi)
{
  size_t size;
  GList *elem;
  GeglRectangle tmp;
  cl_int cl_err = 0;

  gpointer data;
  gboolean need_cl = FALSE;

  for (elem=cache_entries; elem; elem=elem->next)
    {
      CacheEntry *entry = elem->data;

      if (entry->valid && entry->tile_storage->cache == cache
          && (!roi || gegl_rectangle_intersect (&tmp, roi, &entry->roi)))
        {
          entry->valid = FALSE;
          entry->used ++;

          gegl_cl_color_babl (entry->buffer->soft_format, &size);

          data = g_malloc(entry->roi.width * entry->roi.height * size);

          cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(),
                                            entry->tex, CL_TRUE, 0, entry->roi.width * entry->roi.height * size, data,
                                            0, NULL, NULL);
          /* tile-ize */
          gegl_buffer_set (entry->buffer, &entry->roi, 0, entry->buffer->soft_format, data, GEGL_AUTO_ROWSTRIDE);

          entry->used --;
          need_cl = TRUE;

          g_free(data);

          CL_CHECK;
        }
    }

  if (need_cl)
    {
      cl_err = gegl_clFinish (gegl_cl_get_command_queue ());
      CL_CHECK;

      g_mutex_lock (&cache_mutex);

      while (cache_entry_find_invalid (&data))
        {
          CacheEntry *entry = data;

#if 1
          GEGL_NOTE (GEGL_DEBUG_OPENCL, "Removing from cl-cache: %p %s {%d %d %d %d}", entry->buffer, babl_get_name(entry->buffer->soft_format),
                                                                                       entry->roi.x, entry->roi.y, entry->roi.width, entry->roi.height);
#endif

          gegl_clReleaseMemObject(entry->tex);

          memset (entry, 0x0, sizeof (CacheEntry));

          g_slice_free (CacheEntry, data);
          cache_entries = g_list_remove (cache_entries, data);
        }

      g_mutex_unlock (&cache_mutex);
    }

  return TRUE;

error:

  g_mutex_lock (&cache_mutex);

  while (cache_entry_find_invalid (&data))
    {
      g_slice_free (CacheEntry, data);
      cache_entries = g_list_remove (cache_entries, data);
    }

  g_mutex_unlock (&cache_mutex);

  /* XXX : result is corrupted */
  return FALSE;
}
Ejemplo n.º 2
0
static gboolean
cl_buffer_get_min_max (cl_mem               in_tex,
                       size_t               global_worksize,
                       const GeglRectangle *roi,
                       gfloat              min[4],
                       gfloat              max[4])
{
  cl_int   cl_err      = 0;
  size_t   local_ws, max_local_ws;
  size_t   work_groups;
  size_t   global_ws;
  cl_mem   cl_aux_min  = NULL;
  cl_mem   cl_aux_max  = NULL;
  cl_mem   cl_min_max  = NULL;
  cl_int   n_pixels    = (cl_int)global_worksize;
  cl_float4 min_max_buf[2];

  if (global_worksize < 1)
    {
      min[0] = min[1] = min[2] = min[3] =  G_MAXFLOAT;
      max[0] = max[1] = max[2] = max[3] = -G_MAXFLOAT;
      return FALSE;
    }

  cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (),
                                 CL_DEVICE_MAX_WORK_GROUP_SIZE,
                                 sizeof (size_t), &max_local_ws, NULL);
  CL_CHECK;

  max_local_ws = MIN (max_local_ws,
                      MIN (cl_data->work_group_size[0],
                           cl_data->work_group_size[1]));

  /* Needs to be a power of two */
  local_ws = 256;
  while (local_ws > max_local_ws)
    local_ws /= 2;

  work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws);
  global_ws = work_groups * local_ws;


  cl_aux_min = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_WRITE,
                                    local_ws * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;
  cl_aux_max = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_WRITE,
                                    local_ws * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;
  cl_min_max = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_WRITE_ONLY,
                                    2 * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;

  /* The full initialization is done in the two_stages_local_min_max_reduce
     kernel */
#if 0
  cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[3], 1,
                                       NULL, &local_ws, &local_ws,
                                       0, NULL, NULL);
  CL_CHECK;
#endif

  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),
                               (void*)&in_tex);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3,
                               sizeof(cl_float4) * local_ws, NULL);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4,
                               sizeof(cl_float4) * local_ws, NULL);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),
                               (void*)&n_pixels);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[0], 1,
                                       NULL, &global_ws, &local_ws,
                                       0, NULL, NULL);
  CL_CHECK;

  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem),
                               (void*)&cl_min_max);
  CL_CHECK;

  /* Only one work group */
  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                        cl_data->kernel[1], 1,
                                        NULL, &local_ws, &local_ws,
                                        0, NULL, NULL);
  CL_CHECK;

  /* Read the memory buffer, probably better to keep it in GPU memory */
  cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue (),
                                     cl_min_max, CL_TRUE, 0,
                                     2 * sizeof (cl_float4), &min_max_buf, 0,
                                     NULL, NULL);
  CL_CHECK;

  min[0] = min_max_buf[0].x;
  min[1] = min_max_buf[0].y;
  min[2] = min_max_buf[0].z;
  min[3] = min_max_buf[0].w;

  max[0] = min_max_buf[1].x;
  max[1] = min_max_buf[1].y;
  max[2] = min_max_buf[1].z;
  max[3] = min_max_buf[1].w;

  cl_err = gegl_clReleaseMemObject (cl_aux_min);
  CL_CHECK_ONLY (cl_err);

  cl_err = gegl_clReleaseMemObject (cl_aux_max);
  CL_CHECK_ONLY (cl_err);

  cl_err = gegl_clReleaseMemObject (cl_min_max);
  CL_CHECK_ONLY (cl_err);

  return FALSE;

error:
  if (cl_aux_min)
    gegl_clReleaseMemObject (cl_aux_min);
  if (cl_aux_max)
    gegl_clReleaseMemObject (cl_aux_max);
  if (cl_min_max)
    gegl_clReleaseMemObject (cl_min_max);
  return TRUE;
}