Example #1
0
/*
 * WARNING: unlike commit_params, which is thread safe wrt gui thread and
 * pipes, this function lives in the pipeline thread, and NOT thread safe!
 */
static void commit_params_late(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece)
{
  dt_iop_levels_data_t *d = (dt_iop_levels_data_t *)piece->data;
  dt_iop_levels_gui_data_t *g = (dt_iop_levels_gui_data_t *)self->gui_data;

  if(d->mode == LEVELS_MODE_AUTOMATIC)
  {
    if(g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL)
    {
      dt_pthread_mutex_lock(&g->lock);
      const uint64_t hash = g->hash;
      dt_pthread_mutex_unlock(&g->lock);

      // note that the case 'hash == 0' on first invocation in a session implies that d->levels[]
      // contains NANs which initiates special handling below to avoid inconsistent results. in all
      // other cases we make sure that the preview pipe has left us with proper readings for
      // g->auto_levels[]. if data are not yet there we need to wait (with timeout).
      if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash))
        dt_control_log(_("inconsistent output"));

      dt_pthread_mutex_lock(&g->lock);
      d->levels[0] = g->auto_levels[0];
      d->levels[1] = g->auto_levels[1];
      d->levels[2] = g->auto_levels[2];
      dt_pthread_mutex_unlock(&g->lock);

      compute_lut(piece);
    }

    if(piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW || isnan(d->levels[0]) || isnan(d->levels[1])
       || isnan(d->levels[2]))
    {
      dt_iop_levels_compute_levels_automatic(piece);
      compute_lut(piece);
    }

    if(g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW && d->mode == LEVELS_MODE_AUTOMATIC)
    {
      uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority);
      dt_pthread_mutex_lock(&g->lock);
      g->auto_levels[0] = d->levels[0];
      g->auto_levels[1] = d->levels[1];
      g->auto_levels[2] = d->levels[2];
      g->hash = hash;
      dt_pthread_mutex_unlock(&g->lock);
    }
  }
}
Example #2
0
static inline void process_drago(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
                                 const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in,
                                 const dt_iop_roi_t *const roi_out, dt_iop_global_tonemap_data_t *data)
{
  dt_iop_global_tonemap_gui_data_t *g = (dt_iop_global_tonemap_gui_data_t *)self->gui_data;
  float *in = (float *)ivoid;
  float *out = (float *)ovoid;
  const int ch = piece->colors;

  /* precalcs */
  const float eps = 0.0001f;
  float lwmax;
  float tmp_lwmax = NAN;

  // Drago needs the absolute Lmax value of the image. In pixelpipe FULL we can not reliably get this value
  // as the pixelpipe might only see part of the image (region of interest). Therefore we try to get lwmax from
  // the PREVIEW pixelpipe which luckily stores it for us.
  if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL)
  {
    dt_pthread_mutex_lock(&g->lock);
    const uint64_t hash = g->hash;
    dt_pthread_mutex_unlock(&g->lock);

    // note that the case 'hash == 0' on first invocation in a session implies that g->lwmax
    // is NAN which initiates special handling below to avoid inconsistent results. in all
    // other cases we make sure that the preview pipe has left us with proper readings for
    // lwmax. if data are not yet there we need to wait (with timeout).
    if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash))
      dt_control_log(_("inconsistent output"));

    dt_pthread_mutex_lock(&g->lock);
    tmp_lwmax = g->lwmax;
    dt_pthread_mutex_unlock(&g->lock);
  }

  // in all other cases we calculate lwmax here
  if(isnan(tmp_lwmax))
  {
    lwmax = eps;
    for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
    {
      float *inp = in + ch * k;
      lwmax = fmaxf(lwmax, (inp[0] * 0.01f));
    }
  }
  else
  {
    lwmax = tmp_lwmax;
  }

  // PREVIEW pixelpipe stores lwmax
  if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW)
  {
    uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority);
    dt_pthread_mutex_lock(&g->lock);
    g->lwmax = lwmax;
    g->hash = hash;
    dt_pthread_mutex_unlock(&g->lock);
  }

  const float ldc = data->drago.max_light * 0.01 / log10f(lwmax + 1);
  const float bl = logf(fmaxf(eps, data->drago.bias)) / logf(0.5);

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(in, out, lwmax) schedule(static)
#endif
  for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
  {
    float *inp = in + ch * k;
    float *outp = out + ch * k;
    float lw = inp[0] * 0.01f;
    outp[0] = 100.0f
              * (ldc * logf(fmaxf(eps, lw + 1.0f)) / logf(fmaxf(eps, 2.0f + (powf(lw / lwmax, bl)) * 8.0f)));
    outp[1] = inp[1];
    outp[2] = inp[2];
  }
}
Example #3
0
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
               const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data;
  dt_iop_global_tonemap_global_data_t *gd = (dt_iop_global_tonemap_global_data_t *)self->data;
  dt_iop_global_tonemap_gui_data_t *g = (dt_iop_global_tonemap_gui_data_t *)self->gui_data;
  dt_bilateral_cl_t *b = NULL;

  cl_int err = -999;
  cl_mem dev_m = NULL;
  cl_mem dev_r = NULL;
  float *maximum = NULL;
  const int devid = piece->pipe->devid;
  int gtkernel = -1;

  const int width = roi_out->width;
  const int height = roi_out->height;
  float parameters[4] = { 0.0f };

  switch(d->operator)
  {
    case OPERATOR_REINHARD:
      gtkernel = gd->kernel_global_tonemap_reinhard;
      break;
    case OPERATOR_DRAGO:
      gtkernel = gd->kernel_global_tonemap_drago;
      break;
    case OPERATOR_FILMIC:
      gtkernel = gd->kernel_global_tonemap_filmic;
      break;
  }

  if(d->operator== OPERATOR_DRAGO)
  {
    const float eps = 0.0001f;
    float tmp_lwmax = NAN;

    // see comments in process() about lwmax value
    if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL)
    {
      dt_pthread_mutex_lock(&g->lock);
      const uint64_t hash = g->hash;
      dt_pthread_mutex_unlock(&g->lock);

      if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash))
        dt_control_log(_("inconsistent output"));

      dt_pthread_mutex_lock(&g->lock);
      tmp_lwmax = g->lwmax;
      dt_pthread_mutex_unlock(&g->lock);
    }

    if(isnan(tmp_lwmax))
    {
      dt_opencl_local_buffer_t flocopt
        = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
                                      .cellsize = sizeof(float), .overhead = 0,
                                      .sizex = 1 << 4, .sizey = 1 << 4 };

      if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_first, &flocopt))
        goto error;

      const size_t bwidth = ROUNDUP(width, flocopt.sizex);
      const size_t bheight = ROUNDUP(height, flocopt.sizey);

      const int bufsize = (bwidth / flocopt.sizex) * (bheight / flocopt.sizey);

      dt_opencl_local_buffer_t slocopt
        = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
                                      .cellsize = sizeof(float), .overhead = 0,
                                      .sizex = 1 << 16, .sizey = 1 };

      if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_second, &slocopt))
        goto error;

      const int reducesize = MIN(REDUCESIZE, ROUNDUP(bufsize, slocopt.sizex) / slocopt.sizex);

      size_t sizes[3];
      size_t local[3];

      dev_m = dt_opencl_alloc_device_buffer(devid, (size_t)bufsize * sizeof(float));
      if(dev_m == NULL) goto error;

      dev_r = dt_opencl_alloc_device_buffer(devid, (size_t)reducesize * sizeof(float));
      if(dev_r == NULL) goto error;

      sizes[0] = bwidth;
      sizes[1] = bheight;
      sizes[2] = 1;
      local[0] = flocopt.sizex;
      local[1] = flocopt.sizey;
      local[2] = 1;
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 0, sizeof(cl_mem), &dev_in);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 1, sizeof(int), &width);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 2, sizeof(int), &height);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 3, sizeof(cl_mem), &dev_m);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 4, flocopt.sizex * flocopt.sizey * sizeof(float), NULL);
      err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_first, sizes, local);
      if(err != CL_SUCCESS) goto error;

      sizes[0] = reducesize * slocopt.sizex;
      sizes[1] = 1;
      sizes[2] = 1;
      local[0] = slocopt.sizex;
      local[1] = 1;
      local[2] = 1;
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 0, sizeof(cl_mem), &dev_m);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 1, sizeof(cl_mem), &dev_r);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 2, sizeof(int), &bufsize);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 3, slocopt.sizex * sizeof(float), NULL);
      err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_second, sizes, local);
      if(err != CL_SUCCESS) goto error;

      maximum = dt_alloc_align(16, reducesize * sizeof(float));
      err = dt_opencl_read_buffer_from_device(devid, (void *)maximum, dev_r, 0,
                                            (size_t)reducesize * sizeof(float), CL_TRUE);
      if(err != CL_SUCCESS) goto error;

      dt_opencl_release_mem_object(dev_r);
      dt_opencl_release_mem_object(dev_m);
      dev_r = dev_m = NULL;

      for(int k = 1; k < reducesize; k++)
      {
        float mine = maximum[0];
        float other = maximum[k];
        maximum[0] = (other > mine) ? other : mine;
      }

      tmp_lwmax = MAX(eps, (maximum[0] * 0.01f));

      dt_free_align(maximum);
      maximum = NULL;
    }

    const float lwmax = tmp_lwmax;
    const float ldc = d->drago.max_light * 0.01f / log10f(lwmax + 1.0f);
    const float bl = logf(MAX(eps, d->drago.bias)) / logf(0.5f);

    parameters[0] = eps;
    parameters[1] = ldc;
    parameters[2] = bl;
    parameters[3] = lwmax;

    if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW)
    {
      uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority);
      dt_pthread_mutex_lock(&g->lock);
      g->lwmax = lwmax;
      g->hash = hash;
      dt_pthread_mutex_unlock(&g->lock);
    }
  }

  const float scale = piece->iscale / roi_in->scale;
  const float sigma_r = 8.0f; // does not depend on scale
  const float iw = piece->buf_in.width / scale;
  const float ih = piece->buf_in.height / scale;
  const float sigma_s = fminf(iw, ih) * 0.03f;

  if(d->detail != 0.0f)
  {
    b = dt_bilateral_init_cl(devid, roi_in->width, roi_in->height, sigma_s, sigma_r);
    if(!b) goto error;
    // get detail from unchanged input buffer
    err = dt_bilateral_splat_cl(b, dev_in);
    if(err != CL_SUCCESS) goto error;
  }

  size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) };
  dt_opencl_set_kernel_arg(devid, gtkernel, 0, sizeof(cl_mem), &dev_in);
  dt_opencl_set_kernel_arg(devid, gtkernel, 1, sizeof(cl_mem), &dev_out);
  dt_opencl_set_kernel_arg(devid, gtkernel, 2, sizeof(int), &width);
  dt_opencl_set_kernel_arg(devid, gtkernel, 3, sizeof(int), &height);
  dt_opencl_set_kernel_arg(devid, gtkernel, 4, 4 * sizeof(float), &parameters);
  err = dt_opencl_enqueue_kernel_2d(devid, gtkernel, sizes);
  if(err != CL_SUCCESS) goto error;

  if(d->detail != 0.0f)
  {
    err = dt_bilateral_blur_cl(b);
    if(err != CL_SUCCESS) goto error;
    // and apply it to output buffer after logscale
    err = dt_bilateral_slice_to_output_cl(b, dev_in, dev_out, d->detail);
    if(err != CL_SUCCESS) goto error;
    dt_bilateral_free_cl(b);
  }

  return TRUE;

error:
  if(b) dt_bilateral_free_cl(b);
  dt_opencl_release_mem_object(dev_m);
  dt_opencl_release_mem_object(dev_r);
  dt_free_align(maximum);
  dt_print(DT_DEBUG_OPENCL, "[opencl_global_tonemap] couldn't enqueue kernel! %d\n", err);
  return FALSE;
}
#endif


void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece,
                     const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out,
                     struct dt_develop_tiling_t *tiling)
{
  dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data;

  const float scale = piece->iscale / roi_in->scale;
  const float iw = piece->buf_in.width / scale;
  const float ih = piece->buf_in.height / scale;
  const float sigma_s = fminf(iw, ih) * 0.03f;
  const float sigma_r = 8.0f;
  const int detail = (d->detail != 0.0f);

  const int width = roi_in->width;
  const int height = roi_in->height;
  const int channels = piece->colors;

  const size_t basebuffer = width * height * channels * sizeof(float);

  tiling->factor = 2.0f + (detail ? (float)dt_bilateral_memory_use2(width, height, sigma_s, sigma_r) / basebuffer : 0.0f);
  tiling->maxbuf
      = (detail ? MAX(1.0f, (float)dt_bilateral_singlebuffer_size2(width, height, sigma_s, sigma_r) / basebuffer) : 1.0f);
  tiling->overhead = 0;
  tiling->overlap = (detail ? ceilf(4 * sigma_s) : 0);
  tiling->xalign = 1;
  tiling->yalign = 1;
  return;
}

void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_t *pipe,
                   dt_dev_pixelpipe_iop_t *piece)
{
  dt_iop_global_tonemap_params_t *p = (dt_iop_global_tonemap_params_t *)p1;
  dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data;

  d->operator= p->operator;
  d->drago.bias = p->drago.bias;
  d->drago.max_light = p->drago.max_light;
  d->detail = p->detail;

  // drago needs the maximum L-value of the whole image so it must not use tiling
  if(d->operator == OPERATOR_DRAGO) piece->process_tiling_ready = 0;

#ifdef HAVE_OPENCL
  if(d->detail != 0.0f)
    piece->process_cl_ready = (piece->process_cl_ready && !(darktable.opencl->avoid_atomics));
#endif
}
Example #4
0
void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const i, void *const o,
             const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  // this is called for preview and full pipe separately, each with its own pixelpipe piece.
  // get our data struct:
  dt_iop_bilat_data_t *d = (dt_iop_bilat_data_t *)piece->data;
  dt_iop_bilat_gui_data_t *g = self->gui_data;
  // the total scale is composed of scale before input to the pipeline (iscale),
  // and the scale of the roi.
  const float scale = piece->iscale / roi_in->scale;
  const float sigma_r = d->sigma_r; // does not depend on scale
  const float sigma_s = d->sigma_s / scale;

  if(d->mode == s_mode_bilateral)
  {
    dt_bilateral_t *b = dt_bilateral_init(roi_in->width, roi_in->height, sigma_s, sigma_r);
    dt_bilateral_splat(b, (float *)i);
    dt_bilateral_blur(b);
    dt_bilateral_slice(b, (float *)i, (float *)o, d->detail);
    dt_bilateral_free(b);
  }
  else // s_mode_local_laplacian
  {
    local_laplacian_boundary_t b = {0};
    if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW)
    {
      b.mode = 1;
    }
    else if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL)
    {
      // full pipeline working on ROI needs boundary conditions from preview pipe
      // only do this if roi covers less than 90% of full width
      if(MIN(roi_in->width/roi_in->scale / piece->buf_in.width,
             roi_in->height/roi_in->scale / piece->buf_in.height) < 0.9)
      {
        dt_pthread_mutex_lock(&g->lock);
        const uint64_t hash = g->hash;
        dt_pthread_mutex_unlock(&g->lock);
        if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash))
        {
          // TODO: remove this debug output at some point:
          dt_control_log(_("local laplacian: inconsistent output"));
        }
        else
        {
          dt_pthread_mutex_lock(&g->lock);
          // grab preview pipe buffers here:
          b = g->ll_boundary;
          dt_pthread_mutex_unlock(&g->lock);
          if(b.wd > 0 && b.ht > 0) b.mode = 2;
        }
      }
    }

    b.roi = roi_in;
    b.buf = &piece->buf_in;
    // also lock the ll_boundary in case we're using it.
    // could get away without this if the preview pipe didn't also free the data below.
    const int lockit = self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL;
    if(lockit)
    {
      dt_pthread_mutex_lock(&g->lock);
      local_laplacian_sse2(i, o, roi_in->width, roi_in->height, d->midtone, d->sigma_s, d->sigma_r, d->detail, &b);
      dt_pthread_mutex_unlock(&g->lock);
    }
    else local_laplacian_sse2(i, o, roi_in->width, roi_in->height, d->midtone, d->sigma_s, d->sigma_r, d->detail, &b);

    // preview pixelpipe stores values.
    if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW)
    {
      uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority);
      dt_pthread_mutex_lock(&g->lock);
      // store buffer pointers on gui struct. maybe need to swap/free old ones
      local_laplacian_boundary_free(&g->ll_boundary);
      g->ll_boundary = b;
      g->hash = hash;
      dt_pthread_mutex_unlock(&g->lock);
    }
  }

  if(piece->pipe->mask_display & DT_DEV_PIXELPIPE_DISPLAY_MASK) dt_iop_alpha_copy(i, o, roi_in->width, roi_in->height);
}