Ejemplo n.º 1
0
static gboolean
cl_process (GeglOperation       *operation,
            cl_mem               in_buf,
            cl_mem               out_buf,
            const size_t         n_pixels,
            const GeglRectangle *roi,
            gint                 level)
{
  GeglOperationClass *operation_class = GEGL_OPERATION_GET_CLASS (operation);
  GeglClRunData *cl_data = operation_class->cl_data;
  GeglProperties *o = GEGL_PROPERTIES (operation);
  const size_t gbl_size[2] = {roi->width, roi->height};
  const size_t gbl_off[2]  = {roi->x, roi->y};
  cl_int cl_err = 0;
  cl_mem filter_pat = NULL;

  if (!cl_data)
    goto error;
  filter_pat = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                    pattern_width[o->pattern] *
                                    pattern_height[o->pattern] * sizeof(cl_int),
                                    (void*)pattern[o->pattern],
                                    &cl_err);
  CL_CHECK;
  cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0],
                                    sizeof(cl_mem), &in_buf,
                                    sizeof(cl_mem), &out_buf,
                                    sizeof(cl_mem), &filter_pat,
                                    sizeof(cl_int), &pattern_width[o->pattern],
                                    sizeof(cl_int), &pattern_height[o->pattern],
                                    sizeof(cl_int), &o->additive,
                                    sizeof(cl_int), &o->rotated,
                                    NULL);
  CL_CHECK;
  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                        cl_data->kernel[0], 2,
                                        gbl_off, gbl_size, NULL,
                                        0, NULL, NULL);
  CL_CHECK;
  cl_err = gegl_clFinish (gegl_cl_get_command_queue ());
  CL_CHECK;
  cl_err = gegl_clReleaseMemObject (filter_pat);
  CL_CHECK;
  return FALSE;
  error:
    if (filter_pat)
      gegl_clReleaseMemObject (filter_pat);
    return TRUE;
}
Ejemplo n.º 2
0
static gboolean
cl_bilateral (cl_mem                in_tex,
              cl_mem                out_tex,
              const GeglRectangle  *roi,
              const GeglRectangle  *src_rect,
              gint                  s_sigma,
              gfloat                r_sigma)
{
  cl_int cl_err = 0;

  gint c;

  const gint width    = src_rect->width;
  const gint height   = src_rect->height;

  const gint sw = (width -1) / s_sigma + 1;
  const gint sh = (height-1) / s_sigma + 1;
  const gint depth = (int)(1.0f / r_sigma) + 1;

  size_t global_ws[2];
  size_t local_ws[2];

  cl_mem grid = NULL;
  cl_mem blur[4] = {NULL, NULL, NULL, NULL};

  if (!cl_data)
    {
      const char *kernel_name[] = {"bilateral_downsample",
                                   "bilateral_blur",
                                   "bilateral_interpolate",
                                   NULL};
      cl_data = gegl_cl_compile_and_build (bilateral_filter_fast_cl_source, kernel_name);
    }

  if (!cl_data)
    return 1;


  grid = gegl_clCreateBuffer (gegl_cl_get_context (),
                              CL_MEM_READ_WRITE,
                              sw * sh * depth * sizeof(cl_float8),
                              NULL,
                              &cl_err);
  CL_CHECK;

  for(c = 0; c < 4; c++)
    {
      blur[c] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                     CL_MEM_READ_WRITE,
                                     sw * sh * depth * sizeof(cl_float2),
                                     NULL, &cl_err);
      CL_CHECK;
    }

  local_ws[0] = 8;
  local_ws[1] = 8;

  global_ws[0] = ((sw + local_ws[0] - 1)/local_ws[0])*local_ws[0];
  global_ws[1] = ((sh + local_ws[1] - 1)/local_ws[1])*local_ws[1];

  gegl_cl_set_kernel_args (cl_data->kernel[0],
                           sizeof(cl_mem),   &in_tex,
                           sizeof(cl_mem),   &grid,
                           sizeof(cl_int),   &width,
                           sizeof(cl_int),   &height,
                           sizeof(cl_int),   &sw,
                           sizeof(cl_int),   &sh,
                           sizeof(cl_int),   &depth,
                           sizeof(cl_int),   &s_sigma,
                           sizeof(cl_float), &r_sigma,
                           NULL);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                        cl_data->kernel[0], 2,
                                        NULL, global_ws, local_ws,
                                        0, NULL, NULL);
  CL_CHECK;

  local_ws[0] = 16;
  local_ws[1] = 16;

  global_ws[0] = ((sw + local_ws[0] - 1)/local_ws[0])*local_ws[0];
  global_ws[1] = ((sh + local_ws[1] - 1)/local_ws[1])*local_ws[1];

  gegl_cl_set_kernel_args (cl_data->kernel[1],
                           sizeof(cl_mem), &grid,
                           sizeof(cl_mem), &blur[0],
                           sizeof(cl_mem), &blur[1],
                           sizeof(cl_mem), &blur[2],
                           sizeof(cl_mem), &blur[3],
                           sizeof(cl_int), &sw,
                           sizeof(cl_int), &sh,
                           sizeof(cl_int), &depth,
                           NULL);

  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                        cl_data->kernel[1], 2,
                                        NULL, global_ws, local_ws,
                                        0, NULL, NULL);
  CL_CHECK;

  global_ws[0] = width;
  global_ws[1] = height;

  gegl_cl_set_kernel_args (cl_data->kernel[2],
                           sizeof(cl_mem),   &in_tex,
                           sizeof(cl_mem),   &blur[0],
                           sizeof(cl_mem),   &blur[1],
                           sizeof(cl_mem),   &blur[2],
                           sizeof(cl_mem),   &blur[3],
                           sizeof(cl_mem),   &out_tex,
                           sizeof(cl_int),   &width,
                           sizeof(cl_int),   &sw,
                           sizeof(cl_int),   &sh,
                           sizeof(cl_int),   &depth,
                           sizeof(cl_int),   &s_sigma,
                           sizeof(cl_float), &r_sigma,
                           NULL);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[2], 2,
                                       NULL, global_ws, NULL,
                                       0, NULL, NULL);
  CL_CHECK;

  cl_err = gegl_clFinish (gegl_cl_get_command_queue ());
  CL_CHECK;

  cl_err = gegl_clReleaseMemObject (grid);
  CL_CHECK_ONLY (cl_err);

  for(c = 0; c < 4; c++)
    {
      cl_err = gegl_clReleaseMemObject (blur[c]);
      CL_CHECK_ONLY (cl_err);
    }

  return FALSE;

error:
  if (grid)
    gegl_clReleaseMemObject (grid);

  for (c = 0; c < 4; c++)
    {
      if (blur[c])
        gegl_clReleaseMemObject (blur[c]);
    }

  return TRUE;
}
gboolean
gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
  GeglBufferClIterators *i = (gpointer)iterator;
  gboolean result = FALSE;
  gint no, j;
  cl_int cl_err = 0;

  if (i->is_finished)
    g_error ("%s called on finished buffer iterator", G_STRFUNC);
  if (i->iteration_no == 0)
    {
      for (no=0; no<i->iterators;no++)
        {
          if (i->buffer[no])
            {
              gint j;
              gboolean found = FALSE;
              for (j=0; j<no; j++)
                if (i->buffer[no]==i->buffer[j])
                  {
                    found = TRUE;
                    break;
                  }
              if (!found)
                gegl_buffer_lock (i->buffer[no]);

              if (i->flags[no] == GEGL_CL_BUFFER_WRITE
                  || (i->flags[no] == GEGL_CL_BUFFER_READ
                      && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0)))
                {
                  gegl_buffer_cl_cache_flush (i->buffer[no], &i->rect[no]);
                }
            }
        }
    }
  else
    {
      /* complete pending write work */
      for (no=0; no<i->iterators;no++)
        {
          if (i->flags[no] == GEGL_CL_BUFFER_WRITE)
            {
              /* Wait Processing */
              cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
              if (cl_err != CL_SUCCESS) CL_ERROR;

              /* color conversion in the GPU (output) */
              if (i->conv[no] == GEGL_CL_COLOR_CONVERT)
                for (j=0; j < i->n; j++)
                  {
                    cl_err = gegl_cl_color_conv (i->tex_op[no][j], i->tex_buf[no][j], i->size[no][j],
                                                 i->format[no], i->buffer[no]->soft_format);
                    if (cl_err == FALSE) CL_ERROR;
                  }

              /* Wait Processing */
              cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
              if (cl_err != CL_SUCCESS) CL_ERROR;

              /* GPU -> CPU */
              for (j=0; j < i->n; j++)
                {
                  gpointer data;

                  /* tile-ize */
                  if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED)
                    {
                      data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
                                                     CL_MAP_READ,
                                                     0, i->size[no][j] * i->op_cl_format_size [no],
                                                     0, NULL, NULL, &cl_err);
                      if (cl_err != CL_SUCCESS) CL_ERROR;

                      /* color conversion using BABL */
                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], 0, i->format[no], data, GEGL_AUTO_ROWSTRIDE);

                      cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data,
                                                             0, NULL, NULL);
                      if (cl_err != CL_SUCCESS) CL_ERROR;
                    }
                  else
#ifdef OPENCL_USE_CACHE
                    {
                      gegl_buffer_cl_cache_new (i->buffer[no], &i->roi[no][j], i->tex_buf[no][j]);
                      /* don't release this texture */
                      i->tex_buf[no][j] = NULL;
                    }
#else
                    {
                      data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
                                                     CL_MAP_READ,
                                                     0, i->size[no][j] * i->buf_cl_format_size [no],
                                                     0, NULL, NULL, &cl_err);
                      if (cl_err != CL_SUCCESS) CL_ERROR;

                      /* color conversion using BABL */
                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE);

                      cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                             0, NULL, NULL);
                      if (cl_err != CL_SUCCESS) CL_ERROR;
                    }
#endif
                }
            }
        }

      /* Run! */
      cl_err = gegl_clFinish(gegl_cl_get_command_queue());
      if (cl_err != CL_SUCCESS) CL_ERROR;

      for (no=0; no < i->iterators; no++)
        for (j=0; j < i->n; j++)
          {
            if (i->tex_buf_from_cache [no][j])
              {
                gboolean ok = gegl_buffer_cl_cache_release (i->tex_buf[no][j]);
                g_assert (ok);
              }

            if (i->tex_buf[no][j] && !i->tex_buf_from_cache [no][j])
              gegl_clReleaseMemObject (i->tex_buf[no][j]);

            if (i->tex_op [no][j])
              gegl_clReleaseMemObject (i->tex_op [no][j]);

            i->tex    [no][j] = NULL;
            i->tex_buf[no][j] = NULL;
            i->tex_op [no][j] = NULL;
          }
    }

  g_assert (i->iterators > 0);
  result = (i->roi_no >= i->rois)? FALSE : TRUE;

  i->n = MIN(GEGL_CL_NTEX, i->rois - i->roi_no);

  /* then we iterate all */
  for (no=0; no<i->iterators;no++)
    {
      for (j = 0; j < i->n; j++)
        {
          GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0],
                             i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2],
                             i->roi_all[i->roi_no+j].width             + i->area[no][0] + i->area[no][1],
                             i->roi_all[i->roi_no+j].height            + i->area[no][2] + i->area[no][3]};
          i->roi [no][j] = r;
          i->size[no][j] = r.width * r.height;
        }

      if (i->flags[no] == GEGL_CL_BUFFER_READ)
        {
          for (j=0; j < i->n; j++)
            {
              gpointer data;

              /* un-tile */
              switch (i->conv[no])
                {
                  case GEGL_CL_COLOR_NOT_SUPPORTED:

                    {
                    gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]);

                    g_assert (i->tex_op[no][j] == NULL);
                    i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                            CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                                            i->size[no][j] * i->op_cl_format_size [no],
                                                            NULL, &cl_err);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    /* pre-pinned memory */
                    data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
                                                   CL_MAP_WRITE,
                                                   0, i->size[no][j] * i->op_cl_format_size [no],
                                                   0, NULL, NULL, &cl_err);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    /* color conversion using BABL */
                    gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->format[no], data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);

                    cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data,
                                                               0, NULL, NULL);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    i->tex[no][j] = i->tex_op[no][j];

                    break;
                    }

                  case GEGL_CL_COLOR_EQUAL:

                    {
                    i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]);

                    if (i->tex_buf[no][j])
                      i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */
                    else
                      {
                        gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]);

                        g_assert (i->tex_buf[no][j] == NULL);
                        i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                                 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                                                 i->size[no][j] * i->buf_cl_format_size [no],
                                                                 NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* pre-pinned memory */
                        data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
                                                       CL_MAP_WRITE,
                                                       0, i->size[no][j] * i->buf_cl_format_size [no],
                                                       0, NULL, NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* color conversion will be performed in the GPU later */
                        gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE);

                        cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                               0, NULL, NULL);
                        if (cl_err != CL_SUCCESS) CL_ERROR;
                      }

                    i->tex[no][j] = i->tex_buf[no][j];

                    break;
                    }

                  case GEGL_CL_COLOR_CONVERT:

                    {
                    i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]);

                    if (i->tex_buf[no][j])
                      i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */
                    else
                      {
                        gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]);

                        g_assert (i->tex_buf[no][j] == NULL);
                        i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                                 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                                                 i->size[no][j] * i->buf_cl_format_size [no],
                                                                 NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* pre-pinned memory */
                        data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
                                                       CL_MAP_WRITE,
                                                       0, i->size[no][j] * i->buf_cl_format_size [no],
                                                       0, NULL, NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* color conversion will be performed in the GPU later */
                        /* get buffer data using multiple worker threads
                           to increase bandwidth */
                        gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE);

                        cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                               0, NULL, NULL);
                        if (cl_err != CL_SUCCESS) CL_ERROR;
                      }

                    g_assert (i->tex_op[no][j] == NULL);
                    i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                            CL_MEM_READ_WRITE,
                                                            i->size[no][j] * i->op_cl_format_size [no],
                                                            NULL, &cl_err);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    /* color conversion in the GPU (input) */
                    g_assert (i->tex_buf[no][j] && i->tex_op[no][j]);
                    cl_err = gegl_cl_color_conv (i->tex_buf[no][j], i->tex_op[no][j], i->size[no][j],
                                                 i->buffer[no]->soft_format, i->format[no]);
                    if (cl_err == FALSE) CL_ERROR;

                    i->tex[no][j] = i->tex_op[no][j];

                    break;
                    }
                }
            }

          /* Wait Processing */
          cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
          if (cl_err != CL_SUCCESS) CL_ERROR;
        }
      else if (i->flags[no] == GEGL_CL_BUFFER_WRITE)
        {
          for (j=0; j < i->n; j++)
            {
              switch (i->conv[no])
                {
                  case GEGL_CL_COLOR_NOT_SUPPORTED:

                  {
                  g_assert (i->tex_op[no][j] == NULL);
                  i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                          CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
                                                          i->size[no][j] * i->op_cl_format_size [no],
                                                          NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  i->tex[no][j] = i->tex_op[no][j];

                  break;
                  }

                  case GEGL_CL_COLOR_EQUAL:

                  {
                  g_assert (i->tex_buf[no][j] == NULL);
                  i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */
                                                           i->size[no][j] * i->buf_cl_format_size [no],
                                                           NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  i->tex[no][j] = i->tex_buf[no][j];

                  break;
                  }

                  case GEGL_CL_COLOR_CONVERT:

                  {
                  g_assert (i->tex_buf[no][j] == NULL);
                  i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */
                                                           i->size[no][j] * i->buf_cl_format_size [no],
                                                           NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  g_assert (i->tex_op[no][j] == NULL);
                  i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                          CL_MEM_READ_WRITE,
                                                          i->size[no][j] * i->op_cl_format_size [no],
                                                          NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  i->tex[no][j] = i->tex_op[no][j];

                  break;
                  }
               }
            }
        }
      else if (i->flags[no] == GEGL_CL_BUFFER_AUX)
        {
          for (j=0; j < i->n; j++)
            {
              g_assert (i->tex_op[no][j] == NULL);
              i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                      CL_MEM_READ_WRITE,
                                                      i->size[no][j] * i->op_cl_format_size [no],
                                                      NULL, &cl_err);
              if (cl_err != CL_SUCCESS) CL_ERROR;

              i->tex[no][j] = i->tex_op[no][j];
            }
        }
    }

  i->roi_no += i->n;

  i->iteration_no++;

  if (result == FALSE)
    {
      for (no=0; no<i->iterators;no++)
        {
          if (i->buffer[no])
            {
              gint j;
              gboolean found = FALSE;
              for (j=0; j<no; j++)
                if (i->buffer[no]==i->buffer[j])
                  {
                    found = TRUE;
                    break;
                  }
              if (!found)
                gegl_buffer_unlock (i->buffer[no]);

              g_object_unref (i->buffer[no]);
            }
        }

      i->is_finished = TRUE;

      g_free (i->roi_all);
      g_slice_free (GeglBufferClIterators, i);
    }

  *err = FALSE;
  return result;

error:

  for (no=0; no<i->iterators;no++)
    for (j=0; j < i->n; j++)
      {
        if (i->tex_buf[no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]);
        if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]);

        i->tex    [no][j] = NULL;
        i->tex_buf[no][j] = NULL;
        i->tex_op [no][j] = NULL;
      }

  *err = TRUE;
  return FALSE;
}
Ejemplo n.º 4
0
static gboolean
cl_buffer_get_min_max (cl_mem               in_tex,
                       size_t               global_worksize,
                       const GeglRectangle *roi,
                       gfloat              min[4],
                       gfloat              max[4])
{
  cl_int   cl_err      = 0;
  size_t   local_ws, max_local_ws;
  size_t   work_groups;
  size_t   global_ws;
  cl_mem   cl_aux_min  = NULL;
  cl_mem   cl_aux_max  = NULL;
  cl_mem   cl_min_max  = NULL;
  cl_int   n_pixels    = (cl_int)global_worksize;
  cl_float4 min_max_buf[2];

  if (global_worksize < 1)
    {
      min[0] = min[1] = min[2] = min[3] =  G_MAXFLOAT;
      max[0] = max[1] = max[2] = max[3] = -G_MAXFLOAT;
      return FALSE;
    }

  cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (),
                                 CL_DEVICE_MAX_WORK_GROUP_SIZE,
                                 sizeof (size_t), &max_local_ws, NULL);
  CL_CHECK;

  max_local_ws = MIN (max_local_ws,
                      MIN (cl_data->work_group_size[0],
                           cl_data->work_group_size[1]));

  /* Needs to be a power of two */
  local_ws = 256;
  while (local_ws > max_local_ws)
    local_ws /= 2;

  work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws);
  global_ws = work_groups * local_ws;


  cl_aux_min = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_WRITE,
                                    local_ws * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;
  cl_aux_max = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_READ_WRITE,
                                    local_ws * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;
  cl_min_max = gegl_clCreateBuffer (gegl_cl_get_context (),
                                    CL_MEM_WRITE_ONLY,
                                    2 * sizeof(cl_float4),
                                    NULL, &cl_err);
  CL_CHECK;

  /* The full initialization is done in the two_stages_local_min_max_reduce
     kernel */
#if 0
  cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[3], 1,
                                       NULL, &local_ws, &local_ws,
                                       0, NULL, NULL);
  CL_CHECK;
#endif

  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),
                               (void*)&in_tex);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3,
                               sizeof(cl_float4) * local_ws, NULL);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4,
                               sizeof(cl_float4) * local_ws, NULL);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),
                               (void*)&n_pixels);
  CL_CHECK;

  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                       cl_data->kernel[0], 1,
                                       NULL, &global_ws, &local_ws,
                                       0, NULL, NULL);
  CL_CHECK;

  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),
                               (void*)&cl_aux_min);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),
                               (void*)&cl_aux_max);
  CL_CHECK;
  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem),
                               (void*)&cl_min_max);
  CL_CHECK;

  /* Only one work group */
  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                        cl_data->kernel[1], 1,
                                        NULL, &local_ws, &local_ws,
                                        0, NULL, NULL);
  CL_CHECK;

  /* Read the memory buffer, probably better to keep it in GPU memory */
  cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue (),
                                     cl_min_max, CL_TRUE, 0,
                                     2 * sizeof (cl_float4), &min_max_buf, 0,
                                     NULL, NULL);
  CL_CHECK;

  min[0] = min_max_buf[0].x;
  min[1] = min_max_buf[0].y;
  min[2] = min_max_buf[0].z;
  min[3] = min_max_buf[0].w;

  max[0] = min_max_buf[1].x;
  max[1] = min_max_buf[1].y;
  max[2] = min_max_buf[1].z;
  max[3] = min_max_buf[1].w;

  cl_err = gegl_clReleaseMemObject (cl_aux_min);
  CL_CHECK_ONLY (cl_err);

  cl_err = gegl_clReleaseMemObject (cl_aux_max);
  CL_CHECK_ONLY (cl_err);

  cl_err = gegl_clReleaseMemObject (cl_min_max);
  CL_CHECK_ONLY (cl_err);

  return FALSE;

error:
  if (cl_aux_min)
    gegl_clReleaseMemObject (cl_aux_min);
  if (cl_aux_max)
    gegl_clReleaseMemObject (cl_aux_max);
  if (cl_min_max)
    gegl_clReleaseMemObject (cl_min_max);
  return TRUE;
}
Ejemplo n.º 5
0
static gboolean
cl_process (GeglOperation       *self,
            cl_mem               in_tex,
            cl_mem               out_tex,
            size_t               global_worksize,
            const GeglRectangle *roi,
            gint                 level)
{
  GeglChantO *o = GEGL_CHANT_PROPERTIES (self);
  gint       num_sampling_points;
  gdouble    *xs, *ys;
  gfloat     *ysf = NULL;
  cl_mem     cl_curve = NULL;
  cl_ulong   cl_max_constant_size;
  cl_int     cl_err = 0;

  num_sampling_points = o->sampling_points;

  if (!cl_data)
    {
      const char *kernel_name[] = {"cl_contrast_curve",NULL};
      cl_data = gegl_cl_compile_and_build (contrast_curve_cl_source,
                                           kernel_name);
    }
  if (!cl_data)
    return TRUE;

  if (num_sampling_points > 0)
    {
      xs = g_new (gdouble, num_sampling_points);
      ys = g_new (gdouble, num_sampling_points);

      gegl_curve_calc_values (o->curve, 0.0, 1.0, num_sampling_points, xs, ys);
      g_free (xs);

      /*We need to downscale the array to pass it to the GPU*/
      ysf = g_new (gfloat, num_sampling_points);
      copy_double_array_to_float_array (ys, ysf, num_sampling_points);
      g_free (ys);

      cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (),
                                     CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
                                     sizeof (cl_ulong),
                                     &cl_max_constant_size,
                                     NULL);
      CL_CHECK;

      GEGL_NOTE (GEGL_DEBUG_OPENCL, "Max Constant Mem Size: %lu bytes",
                 (unsigned long) cl_max_constant_size);

      if (sizeof (cl_float) * num_sampling_points < cl_max_constant_size)
        {
          cl_curve = gegl_clCreateBuffer (gegl_cl_get_context (),
                                          CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
                                          num_sampling_points * sizeof (cl_float),
                                          ysf, &cl_err);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 0, sizeof (cl_mem),
                                        (void*) &in_tex);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 1, sizeof (cl_mem),
                                        (void*) &out_tex);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 2, sizeof (cl_mem),
                                        (void*) &cl_curve);
          CL_CHECK;
          cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 3, sizeof (gint),
                                        (void*) &num_sampling_points);
          CL_CHECK;
          cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
                                                cl_data->kernel[0], 1,
                                                NULL, &global_worksize, NULL,
                                                0, NULL, NULL);
          CL_CHECK;

          cl_err = gegl_clFinish (gegl_cl_get_command_queue ());
          CL_CHECK;

          cl_err = gegl_clReleaseMemObject (cl_curve);
          CL_CHECK_ONLY (cl_err);
        }
      else
        {
          /*If the curve size doesn't fit constant memory is better to use CPU*/
          GEGL_NOTE (GEGL_DEBUG_OPENCL,
                     "Not enough constant memory for the curve");
          g_free (ysf);
          return TRUE;
        }

      g_free (ysf);
      return FALSE;
error:
      if (ysf)
        g_free (ysf);
      if (cl_curve)
        gegl_clReleaseMemObject (cl_curve);

      return TRUE;
    }
  else  /*If the curve doesn't have a lookup table is better to use CPU*/
    {
      GEGL_NOTE (GEGL_DEBUG_OPENCL,
                 "Curve not suitable to be computed in the GPU");
      return TRUE;
    }
}
Ejemplo n.º 6
0
static gboolean
fir_cl_process (GeglBuffer            *input,
                GeglBuffer            *output,
                const GeglRectangle   *result,
                const Babl            *format,
                gfloat                *cmatrix,
                gint                   clen,
                GeglOrientation        orientation,
                GeglAbyssPolicy        abyss)
{
  gboolean              err = FALSE;
  cl_int                cl_err;
  cl_mem                cl_cmatrix = NULL;
  GeglBufferClIterator *i;
  gint                  read;
  gint                  left, right, top, bottom;

  if (orientation == GEGL_ORIENTATION_HORIZONTAL)
    {
      right = left = clen / 2;
      top = bottom = 0;
    }
  else
    {
      right = left = 0;
      top = bottom = clen / 2;
    }

  i = gegl_buffer_cl_iterator_new (output,
                                   result,
                                   format,
                                   GEGL_CL_BUFFER_WRITE);

  read = gegl_buffer_cl_iterator_add_2 (i,
                                        input,
                                        result,
                                        format,
                                        GEGL_CL_BUFFER_READ,
                                        left, right,
                                        top, bottom,
                                        abyss);

  cl_cmatrix = gegl_clCreateBuffer (gegl_cl_get_context(),
                                    CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY,
                                    clen * sizeof(cl_float), cmatrix, &cl_err);
  CL_CHECK;

  while (gegl_buffer_cl_iterator_next (i, &err) && !err)
    {
      err = cl_gaussian_blur(i->tex[read],
                             i->tex[0],
                             &i->roi[0],
                             cl_cmatrix,
                             clen,
                             orientation);

      if (err)
        {
          gegl_buffer_cl_iterator_stop (i);
          break;
        }
    }

  cl_err = gegl_clReleaseMemObject (cl_cmatrix);
  CL_CHECK;

  cl_cmatrix = NULL;

  return !err;

error:
  if (cl_cmatrix)
    gegl_clReleaseMemObject (cl_cmatrix);

  return FALSE;
}