Example #1
0
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  dt_iop_lowpass_data_t *data = (dt_iop_lowpass_data_t *)piece->data;
  float *in  = (float *)ivoid;
  float *out = (float *)ovoid;


  const int width = roi_in->width;
  const int height = roi_in->height;
  const int ch = piece->colors;

  const int   use_bilateral = data->radius < 0 ? 1 : 0;
  const float radius = fmax(0.1f, fabs(data->radius));
  const float sigma = radius * roi_in->scale / piece ->iscale;
  const int order = data->order;

  const float Labmax[] = { 100.0f, 128.0f, 128.0f, 1.0f };
  const float Labmin[] = { 0.0f, -128.0f, -128.0f, 0.0f };


  if(!use_bilateral)
  {
    dt_gaussian_t *g = dt_gaussian_init(width, height, ch, Labmax, Labmin, sigma, order);
    if(!g) return;
    dt_gaussian_blur_4c(g, in, out);
    dt_gaussian_free(g);
  }
  else
  {
    const float sigma_r = 100.0f;// d->sigma_r; // does not depend on scale
    const float sigma_s = sigma;
    const float detail = -1.0f; // we want the bilateral base layer

    dt_bilateral_t *b = dt_bilateral_init(width, height, sigma_s, sigma_r);
    if(!b) return;
    dt_bilateral_splat(b, in);
    dt_bilateral_blur(b);
    dt_bilateral_slice(b, in, out, detail);
    dt_bilateral_free(b);
  }

  // some aliased pointers for compilers that don't yet understand operators on __m128
  const float *const Labminf = (float *)&Labmin;
  const float *const Labmaxf = (float *)&Labmax;
#ifdef _OPENMP
  #pragma omp parallel for default(none) shared(in,out,data,roi_out) schedule(static)
#endif
  for(int k=0; k<roi_out->width*roi_out->height; k++)
  {
    out[k*ch+0] = (out[k*ch+0] < 100.0f) ? data->table[CLAMP((int)(out[k*ch+0]/100.0f*0x10000ul), 0, 0xffff)] :
                  dt_iop_eval_exp(data->unbounded_coeffs, out[k*ch+0]/100.0f);
    out[k*ch+1] = CLAMPF(out[k*ch+1]*data->saturation, Labminf[1], Labmaxf[1]);
    out[k*ch+2] = CLAMPF(out[k*ch+2]*data->saturation, Labminf[2], Labmaxf[2]);
    out[k*ch+3] = in[k*ch+3];
  }
}
Example #2
0
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  float *in;
  float *out;
  dt_iop_zonesystem_gui_data_t *g = NULL;
  dt_iop_zonesystem_data_t *data = (dt_iop_zonesystem_data_t*)piece->data;

  const int width = roi_out->width;
  const int height = roi_out->height;

  guchar *in_buffer = NULL, *out_buffer = NULL;
  if( self->dev->gui_attached && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW )
  {
    g = (dt_iop_zonesystem_gui_data_t *)self->gui_data;
    dt_pthread_mutex_lock(&g->lock);
    if(g->in_preview_buffer)
      g_free (g->in_preview_buffer);
    if(g->out_preview_buffer)
      g_free (g->out_preview_buffer);

    in_buffer = g->in_preview_buffer = g_malloc ((size_t)width*height);
    out_buffer = g->out_preview_buffer = g_malloc ((size_t)width*height);
    g->preview_width = width;
    g->preview_height = height;

    dt_pthread_mutex_unlock(&g->lock);
  }

  /* calculate zonemap */
  const int size = data->size;
  float zonemap[MAX_ZONE_SYSTEM_SIZE]= {-1};
  _iop_zonesystem_calculate_zonemap (data, zonemap);
  const int ch = piece->colors;


  /* process the image */
  in  = (float *)ivoid;
  out = (float *)ovoid;

  const float rzscale = (size-1)/100.0f;

  float zonemap_offset[MAX_ZONE_SYSTEM_SIZE]= {-1};
  float zonemap_scale[MAX_ZONE_SYSTEM_SIZE]= {-1};

  // precompute scale and offset
  for (int k=0; k < size-1; k++) zonemap_scale[k]  = (zonemap[k+1]-zonemap[k])*(size-1);
  for (int k=0; k < size-1; k++) zonemap_offset[k] = 100.0f * ((k+1)*zonemap[k] - k*zonemap[k+1]) ;

#ifdef _OPENMP
  #pragma omp parallel for default(none) shared(in, out, zonemap_scale,zonemap_offset) schedule(static)
#endif
  for (int j=0; j<height; j++)
    for (int i=0; i<width; i++)
    {
      /* remap lightness into zonemap and apply lightness */
      const float *inp = in + ch*((size_t)j*width+i);
      float *outp = out + ch*((size_t)j*width+i);

      const int rz = CLAMPS(inp[0]*rzscale, 0, size-2);  // zone index

      const float zs = ((rz > 0) ? (zonemap_offset[rz]/inp[0]) : 0) + zonemap_scale[rz];

      _mm_stream_ps(outp,_mm_mul_ps(_mm_load_ps(inp),_mm_set1_ps(zs)));
    }

  _mm_sfence();

  if(piece->pipe->mask_display)
    dt_iop_alpha_copy(ivoid, ovoid, width, height);


  /* if gui and have buffer lets gaussblur and fill buffer with zone indexes */
  if( self->dev->gui_attached && g && in_buffer && out_buffer)
  {

    float Lmax[] = { 100.0f };
    float Lmin[] = { 0.0f };

    /* setup gaussian kernel */
    const int radius = 8;
    const float sigma = 2.5*(radius*roi_in->scale/piece->iscale);

    dt_gaussian_t *gauss = dt_gaussian_init(width, height, 1, Lmax, Lmin, sigma, DT_IOP_GAUSSIAN_ZERO);

    float *tmp = g_malloc((size_t)width*height*sizeof(float));

    if(gauss && tmp)
    {
#ifdef _OPENMP
      #pragma omp parallel for default(none) shared(ivoid, tmp) schedule(static)
#endif
      for(size_t k=0; k<(size_t)width*height; k++)
        tmp[k] = ((float *)ivoid)[ch*k];

      dt_gaussian_blur(gauss, tmp, tmp);

      /* create zonemap preview for input */
      dt_pthread_mutex_lock(&g->lock);
#ifdef _OPENMP
      #pragma omp parallel for default(none) shared(tmp,in_buffer) schedule(static)
#endif
      for (size_t k=0; k<(size_t)width*height; k++)
      {
        in_buffer[k] = CLAMPS(tmp[k]*(size-1)/100.0f, 0, size-2);
      }
      dt_pthread_mutex_unlock(&g->lock);


#ifdef _OPENMP
      #pragma omp parallel for default(none) shared(ovoid, tmp) schedule(static)
#endif
      for(size_t k=0; k<(size_t)width*height; k++)
        tmp[k] = ((float *)ovoid)[ch*k];

      dt_gaussian_blur(gauss, tmp, tmp);


      /* create zonemap preview for output */
      dt_pthread_mutex_lock(&g->lock);
#ifdef _OPENMP
      #pragma omp parallel for default(none) shared(tmp,out_buffer) schedule(static)
#endif
      for (size_t k=0; k<(size_t)width*height; k++)
      {
        out_buffer[k] = CLAMPS(tmp[k]*(size-1)/100.0f, 0, size-2);
      }
      dt_pthread_mutex_unlock(&g->lock);
    }

    if (tmp) g_free(tmp);
    if (gauss) dt_gaussian_free(gauss);
  }
}
Example #3
0
static void process_common_cleanup(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece,
                                   const void *const ivoid, void *const ovoid,
                                   const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  dt_iop_zonesystem_data_t *d = (dt_iop_zonesystem_data_t *)piece->data;
  dt_iop_zonesystem_gui_data_t *g = (dt_iop_zonesystem_gui_data_t *)self->gui_data;

  const int width = roi_out->width;
  const int height = roi_out->height;
  const int ch = piece->colors;
  const int size = d->params.size;

  if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, width, height);

  /* if gui and have buffer lets gaussblur and fill buffer with zone indexes */
  if(self->dev->gui_attached && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW && g && g->in_preview_buffer
     && g->out_preview_buffer)
  {
    float Lmax[] = { 100.0f };
    float Lmin[] = { 0.0f };

    /* setup gaussian kernel */
    const int radius = 8;
    const float sigma = 2.5 * (radius * roi_in->scale / piece->iscale);

    dt_gaussian_t *gauss = dt_gaussian_init(width, height, 1, Lmax, Lmin, sigma, DT_IOP_GAUSSIAN_ZERO);

    float *tmp = g_malloc_n((size_t)width * height, sizeof(float));

    if(gauss && tmp)
    {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tmp) schedule(static)
#endif
      for(size_t k = 0; k < (size_t)width * height; k++) tmp[k] = ((float *)ivoid)[ch * k];

      dt_gaussian_blur(gauss, tmp, tmp);

      /* create zonemap preview for input */
      dt_pthread_mutex_lock(&g->lock);
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tmp, g) schedule(static)
#endif
      for(size_t k = 0; k < (size_t)width * height; k++)
      {
        g->in_preview_buffer[k] = CLAMPS(tmp[k] * (size - 1) / 100.0f, 0, size - 2);
      }
      dt_pthread_mutex_unlock(&g->lock);


#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tmp) schedule(static)
#endif
      for(size_t k = 0; k < (size_t)width * height; k++) tmp[k] = ((float *)ovoid)[ch * k];

      dt_gaussian_blur(gauss, tmp, tmp);


      /* create zonemap preview for output */
      dt_pthread_mutex_lock(&g->lock);
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tmp, g) schedule(static)
#endif
      for(size_t k = 0; k < (size_t)width * height; k++)
      {
        g->out_preview_buffer[k] = CLAMPS(tmp[k] * (size - 1) / 100.0f, 0, size - 2);
      }
      dt_pthread_mutex_unlock(&g->lock);
    }

    g_free(tmp);
    if(gauss) dt_gaussian_free(gauss);
  }
}
Example #4
0
// the basis of how the following algorithm works comes from rawtherapee (http://rawtherapee.com/)
// defringe -- thanks to Emil Martinec <*****@*****.**> for that
// quite some modifications were done though:
// 1. use a fibonacci lattice instead of full window, to speed things up
// 2. option for local averaging or static (RT used the global/region one)
// 3. additional condition to reduce sharp edged artifacts, by blurring pixels near pixels over threshold,
// this really helps improving the filter with thick fringes
// -----------------------------------------------------------------------------------------
// in the following you will also see some more "magic numbers",
// most are chosen arbitrarily and/or by experiment/trial+error ... I am sorry ;-)
// and having everything user-defineable would be just too much
// -----------------------------------------------------------------------------------------
void process(struct dt_iop_module_t *module, dt_dev_pixelpipe_iop_t *piece, void *i, void *o,
             const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  dt_iop_defringe_data_t *d = (dt_iop_defringe_data_t *)piece->data;
  assert(dt_iop_module_colorspace(module) == iop_cs_Lab);

  const int order = 1; // 0,1,2
  const float sigma = fmax(0.1f, fabs(d->radius)) * roi_in->scale / piece->iscale;
  const float Labmax[] = { 100.0f, 128.0f, 128.0f, 1.0f };
  const float Labmin[] = { 0.0f, -128.0f, -128.0f, 0.0f };
  const int ch = piece->colors;

  const int radius = ceil(2.0 * ceilf(sigma));

  // save the fibonacci lattices in them later
  int *xy_avg = NULL;
  int *xy_artifact = NULL;
  int *xy_small = NULL;

  if(roi_out->width < 2 * radius + 1 || roi_out->height < 2 * radius + 1) goto ERROR_EXIT;

  float avg_edge_chroma = 0.0;

  float *const in = (float *const)i;
  float *const out = (float *const)o;
  int width = roi_in->width;
  int height = roi_in->height;

  dt_gaussian_t *gauss = NULL;
  gauss = dt_gaussian_init(width, height, ch, Labmax, Labmin, sigma, order);
  if(!gauss)
  {
    fprintf(stderr, "Error allocating memory for gaussian blur in: defringe module\n");
    goto ERROR_EXIT;
  }
  dt_gaussian_blur(gauss, in, out);
  dt_gaussian_free(gauss);

  // Pre-Compute Fibonacci Lattices
  int *tmp;

  int samples_wish = radius * radius;
  int sampleidx_avg;
  // select samples by fibonacci number
  if(samples_wish > 89)
  {
    sampleidx_avg = 12; // 144 samples
  }
  else if(samples_wish > 55)
  {
    sampleidx_avg = 11; // 89 samples
  }
  else if(samples_wish > 34)
  {
    sampleidx_avg = 10; // ..you get the idea
  }
  else if(samples_wish > 21)
  {
    sampleidx_avg = 9;
  }
  else if(samples_wish > 13)
  {
    sampleidx_avg = 8;
  }
  else
  { // don't use less than 13 samples
    sampleidx_avg = 7;
  }
  const int sampleidx_small = sampleidx_avg - 1;

  const int small_radius = MAX(radius, 3);
  const int avg_radius = 24 + radius * 4;

  const int samples_small = fib[sampleidx_small];
  const int samples_avg = fib[sampleidx_avg];

  // precompute all required fibonacci lattices:
  if((xy_avg = malloc((size_t)2 * sizeof(int) * samples_avg)))
  {
    tmp = xy_avg;
    for(int u = 0; u < samples_avg; u++)
    {
      int dx, dy;
      fib_latt(&dx, &dy, avg_radius, u, sampleidx_avg);
      *tmp++ = dx;
      *tmp++ = dy;
    }
  }
  else
  {
    fprintf(stderr, "Error allocating memory for fibonacci lattice in: defringe module\n");
    goto ERROR_EXIT;
  }

  if((xy_small = malloc((size_t)2 * sizeof(int) * samples_small)))
  {
    tmp = xy_small;
    for(int u = 0; u < samples_small; u++)
    {
      int dx, dy;
      fib_latt(&dx, &dy, small_radius, u, sampleidx_small);
      *tmp++ = dx;
      *tmp++ = dy;
    }
  }
  else
  {
    fprintf(stderr, "Error allocating memory for fibonacci lattice in: defringe module\n");
    goto ERROR_EXIT;
  }

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(width, height,                                                 \
                                              d) reduction(+ : avg_edge_chroma) schedule(static)
#endif
  for(int v = 0; v < height; v++)
  {
    for(int t = 0; t < width; t++)
    {
      // edge-detect on color channels
      // method: difference of original to gaussian blurred image:
      float a = in[(size_t)v * width * ch + t * ch + 1] - out[(size_t)v * width * ch + t * ch + 1];
      float b = in[(size_t)v * width * ch + t * ch + 2] - out[(size_t)v * width * ch + t * ch + 2];

      float edge = (a * a + b * b); // range up to 2*(256)^2 -> approx. 0 to 131072

      // save local edge chroma in out[.. +3] , this is later compared with threshold
      out[(size_t)v * width * ch + t * ch + 3] = edge;
      // the average chroma of the edge-layer in the roi
      if(MODE_GLOBAL_AVERAGE == d->op_mode) avg_edge_chroma += edge;
    }
  }

  float thresh;
  if(MODE_GLOBAL_AVERAGE == d->op_mode)
  {
    avg_edge_chroma = avg_edge_chroma / (width * height) + 10.0 * FLT_EPSILON;
    thresh = fmax(0.1f, 4.0 * d->thresh * avg_edge_chroma / MAGIC_THRESHOLD_COEFF);
  }
  else
  {
    // this fixed value will later be changed when doing local averaging, or kept as-is in "static" mode
    avg_edge_chroma = MAGIC_THRESHOLD_COEFF;
    thresh = fmax(0.1f, d->thresh);
  }

#ifdef _OPENMP
// dynamically/guided scheduled due to possible uneven edge-chroma distribution (thanks to rawtherapee code
// for this hint!)
#pragma omp parallel for default(none) shared(width, height, d, xy_small, xy_avg, xy_artifact)               \
    firstprivate(thresh, avg_edge_chroma) schedule(guided, 32)
#endif
  for(int v = 0; v < height; v++)
  {
    for(int t = 0; t < width; t++)
    {
      float local_thresh = thresh;
      // think of compiler setting "-funswitch-loops" to maybe improve these things:
      if(MODE_LOCAL_AVERAGE == d->op_mode && out[(size_t)v * width * ch + t * ch + 3] > thresh)
      {
        float local_avg = 0.0;
        // use some and not all values from the neigbourhood to speed things up:
        const int *tmp = xy_avg;
        for(int u = 0; u < samples_avg; u++)
        {
          int dx = *tmp++;
          int dy = *tmp++;
          int x = MAX(0, MIN(width - 1, t + dx));
          int y = MAX(0, MIN(height - 1, v + dy));
          local_avg += out[(size_t)y * width * ch + x * ch + 3];
        }
        avg_edge_chroma = fmax(0.01f, (float)local_avg / samples_avg);
        local_thresh = fmax(0.1f, 4.0 * d->thresh * avg_edge_chroma / MAGIC_THRESHOLD_COEFF);
      }

      if(out[(size_t)v * width * ch + t * ch + 3] > local_thresh
         // reduces artifacts ("region growing by 1 pixel"):
         || out[(size_t)MAX(0, (v - 1)) * width * ch + MAX(0, (t - 1)) * ch + 3] > local_thresh
         || out[(size_t)MAX(0, (v - 1)) * width * ch + t * ch + 3] > local_thresh
         || out[(size_t)MAX(0, (v - 1)) * width * ch + MIN(width - 1, (t + 1)) * ch + 3] > local_thresh
         || out[(size_t)v * width * ch + MAX(0, (t - 1)) * ch + 3] > local_thresh
         || out[(size_t)v * width * ch + MIN(width - 1, (t + 1)) * ch + 3] > local_thresh
         || out[(size_t)MIN(height - 1, (v + 1)) * width * ch + MAX(0, (t - 1)) * ch + 3] > local_thresh
         || out[(size_t)MIN(height - 1, (v + 1)) * width * ch + t * ch + 3] > local_thresh
         || out[(size_t)MIN(height - 1, (v + 1)) * width * ch + MIN(width - 1, (t + 1)) * ch + 3]
            > local_thresh)
      {
        float atot = 0, btot = 0;
        float norm = 0;
        float weight;
        // it seems better to use only some pixels from a larger window instead of all pixels from a smaller
        // window
        // we use a fibonacci lattice for that, samples amount need to be a fibonacci number, this can then be
        // scaled to
        // a certain radius

        // use some neighbourhood pixels for lowest chroma average
        const int *tmp = xy_small;
        for(int u = 0; u < samples_small; u++)
        {
          int dx = *tmp++;
          int dy = *tmp++;
          int x = MAX(0, MIN(width - 1, t + dx));
          int y = MAX(0, MIN(height - 1, v + dy));
          // inverse chroma weighted average of neigbouring pixels inside window
          // also taking average edge chromaticity into account (either global or local average)
          weight = 1.0 / (out[(size_t)y * width * ch + x * ch + 3] + avg_edge_chroma);
          atot += weight * in[(size_t)y * width * ch + x * ch + 1];
          btot += weight * in[(size_t)y * width * ch + x * ch + 2];
          norm += weight;
        }
        // here we could try using a "balance" between original and changed value, this could be used to
        // reduce artifcats
        // but on first tries, results weren't very convincing, and there are blend settings available anyway
        // in dt
        // float balance = (out[v*width*ch +t*ch +3]-thresh)/out[v*width*ch +t*ch +3];
        double a = (atot / norm); // *balance + in[v*width*ch + t*ch +1]*(1.0-balance);
        double b = (btot / norm); // *balance + in[v*width*ch + t*ch +2]*(1.0-balance);
        // if (a < -128.0 || a > 127.0) CLIP(a,-128.0,127.0);
        // if (b < -128.0 || b > 127.0) CLIP(b,-128.0,127.0);
        out[(size_t)v * width * ch + t * ch + 1] = a;
        out[(size_t)v * width * ch + t * ch + 2] = b;
      }
      else
      {
        out[(size_t)v * width * ch + t * ch + 1] = in[(size_t)v * width * ch + t * ch + 1];
        out[(size_t)v * width * ch + t * ch + 2] = in[(size_t)v * width * ch + t * ch + 2];
      }
      out[(size_t)v * width * ch + t * ch] = in[(size_t)v * width * ch + t * ch];
    }
  }

  if(piece->pipe->mask_display) dt_iop_alpha_copy(i, o, roi_out->width, roi_out->height);

  goto FINISH_PROCESS;

ERROR_EXIT:
  memcpy(o, i, (size_t)sizeof(float) * ch * roi_out->width * roi_out->height);

FINISH_PROCESS:
  free(xy_artifact);
  free(xy_small);
  free(xy_avg);
}