Exemple #1
0
int
output_bpp(dt_iop_module_t *module, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
{
  // this is bytes per pixel, so it has to be 4*sizeof(float) or sizeof(float) for raw images.
  if(!dt_dev_pixelpipe_uses_downsampled_input(pipe) && piece->pipe->image.filters && piece->pipe->image.bpp != 4) return sizeof(uint16_t);
  if(!dt_dev_pixelpipe_uses_downsampled_input(pipe) && piece->pipe->image.filters && piece->pipe->image.bpp == 4) return sizeof(float);
  return 4*sizeof(float);
}
Exemple #2
0
int
process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
    dt_iop_temperature_data_t *d = (dt_iop_temperature_data_t *)piece->data;
    dt_iop_temperature_global_data_t *gd = (dt_iop_temperature_global_data_t *)self->data;

    const int devid = piece->pipe->devid;
    const int filters = dt_image_flipped_filter(&piece->pipe->image);
    float coeffs[3] = {d->coeffs[0], d->coeffs[1], d->coeffs[2]};
    cl_mem dev_coeffs = NULL;
    cl_int err = -999;
    int kernel = -1;

    if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp != 4)
    {
        kernel = gd->kernel_whitebalance_1ui;
        for(int k=0; k<3; k++) coeffs[k] /= 65535.0f;
    }
    else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp == 4)
    {
        kernel = gd->kernel_whitebalance_1f;
    }
    else
    {
        kernel = gd->kernel_whitebalance_4f;
    }

    dev_coeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float)*3, coeffs);
    if (dev_coeffs == NULL) goto error;

    const int width = roi_in->width;
    const int height = roi_in->height;

    size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1};
    dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in);
    dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out);
    dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width);
    dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height);
    dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), (void *)&dev_coeffs);
    dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(uint32_t), (void *)&filters);
    dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(uint32_t), (void *)&roi_out->x);
    dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(uint32_t), (void *)&roi_out->y);
    err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
    if(err != CL_SUCCESS) goto error;

    dt_opencl_release_mem_object(dev_coeffs);
    for(int k=0; k<3; k++)
        piece->pipe->processed_maximum[k] = d->coeffs[k] * piece->pipe->processed_maximum[k];
    return TRUE;

error:
    if (dev_coeffs != NULL) dt_opencl_release_mem_object(dev_coeffs);
    dt_print(DT_DEBUG_OPENCL, "[opencl_white_balance] couldn't enqueue kernel! %d\n", err);
    return FALSE;
}
Exemple #3
0
static void process_clip_plain(dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid,
                               const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
                               const float clip)
{
  const float *const in = (const float *const)ivoid;
  float *const out = (float *const)ovoid;

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && dt_image_filter(&piece->pipe->image))
  { // raw mosaic
#ifdef _OPENMP
#pragma omp parallel for SIMD() default(none) schedule(static)
#endif
    for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++)
    {
      out[k] = MIN(clip, in[k]);
    }
  }
  else
  {
    const int ch = piece->colors;

#ifdef _OPENMP
#pragma omp parallel for SIMD() default(none) schedule(static)
#endif
    for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k++)
    {
      out[k] = MIN(clip, in[k]);
    }
  }
}
Exemple #4
0
int output_bpp(dt_iop_module_t *module, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
{
  if(!dt_dev_pixelpipe_uses_downsampled_input(pipe) && (pipe->image.flags & DT_IMAGE_RAW))
    return sizeof(float);
  else
    return 4 * sizeof(float);
}
Exemple #5
0
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
               const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data;
  dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data;

  cl_int err = -999;
  const int devid = piece->pipe->devid;
  const int width = roi_in->width;
  const int height = roi_in->height;

  size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 };
  const float clip = d->clip
                     * fminf(piece->pipe->processed_maximum[0],
                             fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2]));
  const int filters = dt_image_filter(&piece->pipe->image);
  if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters)
  {
    dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 0, sizeof(cl_mem), (void *)&dev_in);
    dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 1, sizeof(cl_mem), (void *)&dev_out);
    dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 2, sizeof(int), (void *)&width);
    dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 3, sizeof(int), (void *)&height);
    dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 4, sizeof(int), (void *)&d->mode);
    dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 5, sizeof(float), (void *)&clip);
    err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_4f_clip, sizes);
    if(err != CL_SUCCESS) goto error;
  }
  else
  {
    const int kernel
        = (d->mode == DT_IOP_HIGHLIGHTS_LCH) ? gd->kernel_highlights_1f_lch : gd->kernel_highlights_1f_clip;

    dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in);
    dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out);
    dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width);
    dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height);
    dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(float), (void *)&clip);
    dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(int), (void *)&roi_out->x);
    dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(int), (void *)&roi_out->y);
    dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(int), (void *)&filters);
    err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
    if(err != CL_SUCCESS) goto error;
  }

  // update processed maximum
  const float m = fmaxf(fmaxf(
        piece->pipe->processed_maximum[0],
        piece->pipe->processed_maximum[1]),
      piece->pipe->processed_maximum[2]);
  for(int k=0;k<3;k++) piece->pipe->processed_maximum[k] = m;

  return TRUE;

error:
  dt_print(DT_DEBUG_OPENCL, "[opencl_highlights] couldn't enqueue kernel! %d\n", err);
  return FALSE;
}
Exemple #6
0
void commit_params (struct dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece)
{
  dt_iop_hotpixels_params_t *p = (dt_iop_hotpixels_params_t *)params;
  dt_iop_hotpixels_data_t *d = (dt_iop_hotpixels_data_t *)piece->data;
  d->filters = dt_image_flipped_filter(&pipe->image);
  d->multiplier = p->strength/2.0;
  d->threshold = p->threshold;
  d->permissive = p->permissive;
  d->markfixed = p->markfixed && (pipe->type != DT_DEV_PIXELPIPE_EXPORT) && (pipe->type != DT_DEV_PIXELPIPE_THUMBNAIL);
  if (!(pipe->image.flags & DT_IMAGE_RAW)|| dt_dev_pixelpipe_uses_downsampled_input(pipe) || p->strength == 0.0)
    piece->enabled = 0;
}
Exemple #7
0
void
commit_params(
  struct dt_iop_module_t *self,
  dt_iop_params_t *params,
  dt_dev_pixelpipe_t *pipe,
  dt_dev_pixelpipe_iop_t *piece)
{
  if(!(pipe->image.flags & DT_IMAGE_RAW) ||
      dt_dev_pixelpipe_uses_downsampled_input(pipe) ||
      !dt_image_filter(&piece->pipe->image) ||
      piece->pipe->image.bpp != sizeof(uint16_t))
    piece->enabled = 0;
}
Exemple #8
0
void commit_params(dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe,
                   dt_dev_pixelpipe_iop_t *piece)
{
  const dt_iop_rawprepare_params_t *const p = (dt_iop_rawprepare_params_t *)params;
  dt_iop_rawprepare_data_t *d = (dt_iop_rawprepare_data_t *)piece->data;

  d->x = p->x;
  d->y = p->y;
  d->width = p->width;
  d->height = p->height;

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters)
  {
    const float white = (float)p->raw_white_point;

    for(int i = 0; i < 4; i++)
    {
      d->sub[i] = (float)p->raw_black_level_separate[i];
      d->div[i] = (white - d->sub[i]);
    }
  }
  else
  {
    const float white = (float)p->raw_white_point / (float)UINT16_MAX;
    float black = 0;
    for(int i = 0; i < 4; i++)
    {
      black += p->raw_black_level_separate[i] / (float)UINT16_MAX;
    }
    black /= 4.0f;

    for(int i = 0; i < 4; i++)
    {
      d->sub[i] = black;
      d->div[i] = (white - black);
    }
  }

  if(!dt_image_is_raw(&piece->pipe->image) || piece->pipe->image.bpp == sizeof(float)) piece->enabled = 0;
}
Exemple #9
0
static void process_clip_sse2(dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid,
                              const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
                              const float clip)
{
  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && dt_image_filter(&piece->pipe->image))
  { // raw mosaic
    const __m128 clipm = _mm_set1_ps(clip);
    const size_t n = (size_t)roi_out->height * roi_out->width;
    float *const out = (float *)ovoid;
    float *const in = (float *)ivoid;
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none)
#endif
    for(size_t j = 0; j < (n & ~3u); j += 4) _mm_stream_ps(out + j, _mm_min_ps(clipm, _mm_load_ps(in + j)));
    _mm_sfence();
    // lets see if there's a non-multiple of four rest to process:
    if(n & 3)
      for(size_t j = n & ~3u; j < n; j++) out[j] = MIN(clip, in[j]);
  }
  else
  {
    const __m128 clipm = _mm_set1_ps(clip);
    const int ch = piece->colors;

#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      float *out = (float *)ovoid + (size_t)ch * roi_out->width * j;
      float *in = (float *)ivoid + (size_t)ch * roi_in->width * j;
      for(int i = 0; i < roi_out->width; i++, in += ch, out += ch)
      {
        _mm_stream_ps(out, _mm_min_ps(clipm, _mm_set_ps(in[3], in[2], in[1], in[0])));
      }
    }
    _mm_sfence();
  }
}
Exemple #10
0
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
    const int filters = dt_image_flipped_filter(&piece->pipe->image);
    dt_iop_temperature_data_t *d = (dt_iop_temperature_data_t *)piece->data;
    if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp != 4)
    {
        const float coeffsi[3] = {d->coeffs[0]/65535.0f, d->coeffs[1]/65535.0f, d->coeffs[2]/65535.0f};
#ifdef _OPENMP
        #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static)
#endif
        for(int j=0; j<roi_out->height; j++)
        {
            int i=0;
            const uint16_t *in = ((uint16_t *)ivoid) + j*roi_out->width;
            float *out = ((float*)ovoid) + j*roi_out->width;

            // process unaligned pixels
            for ( ; i < ((4-(j*roi_out->width & 3)) & 3) ; i++,out++,in++)
                *out = *in * coeffsi[FC(j+roi_out->y, i+roi_out->x, filters)];

            const __m128 coeffs = _mm_set_ps(coeffsi[FC(j+roi_out->y, roi_out->x+i+3, filters)],
                                             coeffsi[FC(j+roi_out->y, roi_out->x+i+2, filters)],
                                             coeffsi[FC(j+roi_out->y, roi_out->x+i+1, filters)],
                                             coeffsi[FC(j+roi_out->y, roi_out->x+i  , filters)]);

            // process aligned pixels with SSE
            for( ; i < roi_out->width - 3 ; i+=4,out+=4,in+=4)
            {
                _mm_stream_ps(out,_mm_mul_ps(coeffs,_mm_set_ps(in[3],in[2],in[1],in[0])));
            }

            // process the rest
            for( ; i<roi_out->width; i++,out++,in++)
                *out = *in * coeffsi[FC(j+roi_out->y, i+roi_out->x, filters)];
        }
        _mm_sfence();
    }
    else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp == 4)
    {
#ifdef _OPENMP
        #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static)
#endif
        for(int j=0; j<roi_out->height; j++)
        {
            const float *in = ((float *)ivoid) + j*roi_out->width;
            float *out = ((float*)ovoid) + j*roi_out->width;
            for(int i=0; i<roi_out->width; i++,out++,in++)
                *out = *in * d->coeffs[FC(j+roi_out->x, i+roi_out->y, filters)];
        }
    }
    else
    {
        const int ch = piece->colors;
#ifdef _OPENMP
        #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static)
#endif
        for(int k=0; k<roi_out->height; k++)
        {
            const float *in = ((float*)ivoid) + ch*k*roi_out->width;
            float *out = ((float*)ovoid) + ch*k*roi_out->width;
            for (int j=0; j<roi_out->width; j++,in+=ch,out+=ch)
                for(int c=0; c<3; c++) out[c] = in[c]*d->coeffs[c];
        }
    }
    for(int k=0; k<3; k++)
        piece->pipe->processed_maximum[k] = d->coeffs[k] * piece->pipe->processed_maximum[k];
}
Exemple #11
0
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid,
             const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  const int filters = dt_image_filter(&piece->pipe->image);
  dt_iop_highlights_data_t *data = (dt_iop_highlights_data_t *)piece->data;

  const float clip = data->clip
                     * fminf(piece->pipe->processed_maximum[0],
                             fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2]));
  // const int ch = piece->colors;
  if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters)
  {
    const __m128 clipm = _mm_set1_ps(clip);
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      float *out = (float *)ovoid + (size_t)4 * roi_out->width * j;
      float *in = (float *)ivoid + (size_t)4 * roi_in->width * j;
      for(int i = 0; i < roi_out->width; i++)
      {
        _mm_stream_ps(out, _mm_min_ps(clipm, _mm_set_ps(in[3], in[2], in[1], in[0])));
        in += 4;
        out += 4;
      }
    }
    _mm_sfence();
    return;
  }

  switch(data->mode)
  {
    case DT_IOP_HIGHLIGHTS_INPAINT: // a1ex's (magiclantern) idea of color inpainting:
    {
      const float clips[4] = { 0.987 * data->clip * piece->pipe->processed_maximum[0],
                               0.987 * data->clip * piece->pipe->processed_maximum[1],
                               0.987 * data->clip * piece->pipe->processed_maximum[2], clip };

      if(filters == 9u)
      {
        const dt_image_t *img = &self->dev->image_storage;
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, img)
#endif
        for(int j = 0; j < roi_out->height; j++)
        {
          _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, 1, j, clips, img->xtrans, 0);
          _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, img->xtrans, 1);
        }
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, img)
#endif
        for(int i = 0; i < roi_out->width; i++)
        {
          _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, 1, i, clips, img->xtrans, 2);
          _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, -1, i, clips, img->xtrans, 3);
        }
        break;
      }

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece)
#endif
      for(int j = 0; j < roi_out->height; j++)
      {
        _interpolate_color(ivoid, ovoid, roi_out, 0, 1, j, clips, filters, 0);
        _interpolate_color(ivoid, ovoid, roi_out, 0, -1, j, clips, filters, 1);
      }

// up/down directions
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece)
#endif
      for(int i = 0; i < roi_out->width; i++)
      {
        _interpolate_color(ivoid, ovoid, roi_out, 1, 1, i, clips, filters, 2);
        _interpolate_color(ivoid, ovoid, roi_out, 1, -1, i, clips, filters, 3);
      }
      break;
    }
    case DT_IOP_HIGHLIGHTS_LCH:
      if(filters == 9u)
      {
        process_lch_xtrans(ivoid, ovoid, roi_out->width, roi_out->height, clip);
        break;
      }
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece)
#endif
      for(int j = 0; j < roi_out->height; j++)
      {
        float *out = (float *)ovoid + (size_t)roi_out->width * j;
        float *in = (float *)ivoid + (size_t)roi_out->width * j;
        for(int i = 0; i < roi_out->width; i++)
        {
          if(i == 0 || i == roi_out->width - 1 || j == 0 || j == roi_out->height - 1)
          {
            // fast path for border
            out[0] = in[0];
          }
          else
          {
            // analyse one bayer block to get same number of rggb pixels each time
            const float near_clip = 0.96f * clip;
            const float post_clip = 1.10f * clip;
            float blend = 0.0f;
            float mean = 0.0f;
            for(int jj = 0; jj <= 1; jj++)
            {
              for(int ii = 0; ii <= 1; ii++)
              {
                const float val = in[(size_t)jj * roi_out->width + ii];
                mean += val * 0.25f;
                blend += (fminf(post_clip, val) - near_clip) / (post_clip - near_clip);
              }
            }
            blend = CLAMP(blend, 0.0f, 1.0f);
            if(blend > 0)
            {
              // recover:
              out[0] = blend * mean + (1.f - blend) * in[0];
            }
            else
              out[0] = in[0];
          }
          out++;
          in++;
        }
      }
      break;
    default:
    case DT_IOP_HIGHLIGHTS_CLIP:
    {
      const __m128 clipm = _mm_set1_ps(clip);
      const size_t n = (size_t)roi_out->height * roi_out->width;
      float *const out = (float *)ovoid;
      float *const in = (float *)ivoid;
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none)
#endif
      for(size_t j = 0; j < (n & ~3u); j += 4) _mm_stream_ps(out + j, _mm_min_ps(clipm, _mm_load_ps(in + j)));
      _mm_sfence();
      // lets see if there's a non-multiple of four rest to process:
      if(n & 3)
        for(size_t j = n & ~3u; j < n; j++) out[j] = MIN(clip, in[j]);
      break;
    }
  }

  if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height);
}
Exemple #12
0
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  dt_iop_invert_data_t *d = (dt_iop_invert_data_t *)piece->data;
  const float film_rgb[3] = {d->color[0], d->color[1], d->color[2]};

  //FIXME: it could be wise to make this a NOP when picking colors. not sure about that though.
//   if(self->request_color_pick){
  // do nothing
//   }

  const int filters = dt_image_flipped_filter(&piece->pipe->image);

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp != 4)
  {
    const float *const m = piece->pipe->processed_maximum;
    const int32_t film_rgb_i[3] = {m[0]*film_rgb[0]*65535, m[1]*film_rgb[1]*65535, m[2]*film_rgb[2]*65535};
#ifdef _OPENMP
    #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, /*film_rgb_i, min, max, res*/) schedule(static)
#endif
    for(int j=0; j<roi_out->height; j++)
    {
      const uint16_t *in = ((uint16_t*)ivoid) + j*roi_out->width;
      uint16_t *out = ((uint16_t*)ovoid) + j*roi_out->width;
      for(int i=0; i<roi_out->width; i++,out++,in++)
      {
        *out = CLAMP(film_rgb_i[FC(j+roi_out->x, i+roi_out->y, filters)] - (int32_t)in[0], 0, 0xffff);
      }
    }

    for(int k=0; k<3; k++)
      piece->pipe->processed_maximum[k] = 1.0f;
  }
  else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp == 4)
  {
#ifdef _OPENMP
    #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static)
#endif
    for(int j=0; j<roi_out->height; j++)
    {
      const float *in = ((float *)ivoid) + j*roi_out->width;
      float *out = ((float*)ovoid) + j*roi_out->width;
      for(int i=0; i<roi_out->width; i++,out++,in++)
      {
        *out = CLAMP(film_rgb[FC(j+roi_out->x, i+roi_out->y, filters)] - *in/(float)0xffff, 0, 1.0f);
      }
    }
    for(int k=0; k<3; k++)
      piece->pipe->processed_maximum[k] = 1.0f;
  }
  else
  {
    const int ch = piece->colors;
#ifdef _OPENMP
    #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static)
#endif
    for(int k=0; k<roi_out->height; k++)
    {
      const float *in = ((float*)ivoid) + ch*k*roi_out->width;
      float *out = ((float*)ovoid) + ch*k*roi_out->width;
      for (int j=0; j<roi_out->width; j++,in+=ch,out+=ch)
        for(int c=0; c<3; c++) out[c] = film_rgb[c] - in[c];
    }
  }
}
Exemple #13
0
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid,
             void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  const int filters = dt_image_filter(&piece->pipe->image);
  dt_iop_highlights_data_t *data = (dt_iop_highlights_data_t *)piece->data;

  const float clip
      = data->clip * fminf(piece->pipe->processed_maximum[0],
                           fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2]));
  // const int ch = piece->colors;
  if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters)
  {
    process_clip(piece, ivoid, ovoid, roi_in, roi_out, clip);
    for(int k=0;k<3;k++)
      piece->pipe->processed_maximum[k] = fminf(piece->pipe->processed_maximum[0],
          fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2]));
    return;
  }

  switch(data->mode)
  {
    case DT_IOP_HIGHLIGHTS_INPAINT: // a1ex's (magiclantern) idea of color inpainting:
    {
      const float clips[4] = { 0.987 * data->clip * piece->pipe->processed_maximum[0],
                               0.987 * data->clip * piece->pipe->processed_maximum[1],
                               0.987 * data->clip * piece->pipe->processed_maximum[2], clip };

      if(filters == 9u)
      {
        const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])self->dev->image_storage.xtrans;
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none)
#endif
        for(int j = 0; j < roi_out->height; j++)
        {
          interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, 1, j, clips, xtrans, 0);
          interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, xtrans, 1);
        }
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none)
#endif
        for(int i = 0; i < roi_out->width; i++)
        {
          interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, 1, i, clips, xtrans, 2);
          interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, -1, i, clips, xtrans, 3);
        }
      }
      else
      {
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(data, piece)
#endif
        for(int j = 0; j < roi_out->height; j++)
        {
          interpolate_color(ivoid, ovoid, roi_out, 0, 1, j, clips, filters, 0);
          interpolate_color(ivoid, ovoid, roi_out, 0, -1, j, clips, filters, 1);
        }

// up/down directions
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none) shared(data, piece)
#endif
        for(int i = 0; i < roi_out->width; i++)
        {
          interpolate_color(ivoid, ovoid, roi_out, 1, 1, i, clips, filters, 2);
          interpolate_color(ivoid, ovoid, roi_out, 1, -1, i, clips, filters, 3);
        }
      }
      break;
    }
    case DT_IOP_HIGHLIGHTS_LCH:
      if(filters == 9u)
        process_lch_xtrans(self, ivoid, ovoid, roi_in, roi_out, clip);
      else
        process_lch_bayer(self, piece, ivoid, ovoid, roi_in, roi_out, clip);
      break;
    default:
    case DT_IOP_HIGHLIGHTS_CLIP:
      process_clip(piece, ivoid, ovoid, roi_in, roi_out, clip);
      break;
  }

  // update processed maximum
  const float m = fmaxf(fmaxf(
        piece->pipe->processed_maximum[0],
        piece->pipe->processed_maximum[1]),
      piece->pipe->processed_maximum[2]);
  for(int k=0;k<3;k++) piece->pipe->processed_maximum[k] = m;

  if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height);
}
Exemple #14
0
static gboolean draw(GtkWidget *widget, cairo_t *cr, dt_iop_module_t *self)
{
  if(darktable.gui->reset) return FALSE;
  if(self->picked_color_max[0] < 0.0f) return FALSE;
  if(self->request_color_pick == DT_REQUEST_COLORPICK_OFF) return FALSE;
  dt_iop_invert_gui_data_t *g = (dt_iop_invert_gui_data_t *)self->gui_data;
  dt_iop_invert_params_t *p = (dt_iop_invert_params_t *)self->params;

  if(fabsf(p->color[0] - self->picked_color[0]) < 0.0001f
     && fabsf(p->color[1] - self->picked_color[1]) < 0.0001f
     && fabsf(p->color[2] - self->picked_color[2]) < 0.0001f)
  {
    // interrupt infinite loops
    return FALSE;
  }

  p->color[0] = self->picked_color[0];
  p->color[1] = self->picked_color[1];
  p->color[2] = self->picked_color[2];
  GdkRGBA color = (GdkRGBA){.red = p->color[0], .green = p->color[1], .blue = p->color[2], .alpha = 1.0 };
  gtk_color_chooser_set_rgba(GTK_COLOR_CHOOSER(g->colorpicker), &color);

  dt_dev_add_history_item(darktable.develop, self, TRUE);
  return FALSE;
}

static void colorpicker_callback(GtkColorButton *widget, dt_iop_module_t *self)
{
  if(self->dt->gui->reset) return;
  dt_iop_invert_gui_data_t *g = (dt_iop_invert_gui_data_t *)self->gui_data;
  dt_iop_invert_params_t *p = (dt_iop_invert_params_t *)self->params;

  // turn off the other color picker so that this tool actually works ...
  gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(g->picker), FALSE);

  GdkRGBA c;
  gtk_color_chooser_get_rgba(GTK_COLOR_CHOOSER(widget), &c);
  p->color[0] = c.red;
  p->color[1] = c.green;
  p->color[2] = c.blue;

  dt_dev_add_history_item(darktable.develop, self, TRUE);
}

void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid,
             const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  dt_iop_invert_data_t *d = (dt_iop_invert_data_t *)piece->data;

  const float *const m = piece->pipe->processed_maximum;

  float film_rgb[4] = { d->color[0], d->color[1], d->color[2], 0.0f };

  // Convert the RGB color to CYGM only if we're not in the preview pipe (which is already RGB)
  if((self->dev->image_storage.flags & DT_IMAGE_4BAYER) && !dt_dev_pixelpipe_uses_downsampled_input(piece->pipe))
    dt_colorspaces_rgb_to_cygm(film_rgb, 1, d->RGB_to_CAM);

  const float film_rgb_f[4] = { film_rgb[0] * m[0], film_rgb[1] * m[1], film_rgb[2] * m[2], film_rgb[3] * m[3] };

  // FIXME: it could be wise to make this a NOP when picking colors. not sure about that though.
  //   if(self->request_color_pick){
  // do nothing
  //   }

  const int filters = dt_image_filter(&piece->pipe->image);
  const uint8_t (*const xtrans)[6] = (const uint8_t (*const)[6]) self->dev->image_storage.xtrans;

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && (filters == 9u))
  { // xtrans float mosaiced
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      const float *in = ((float *)ivoid) + (size_t)j * roi_out->width;
      float *out = ((float *)ovoid) + (size_t)j * roi_out->width;
      for(int i = 0; i < roi_out->width; i++, out++, in++)
        *out = CLAMP(film_rgb_f[FCxtrans(j, i, roi_out, xtrans)] - *in, 0.0f, 1.0f);
    }

    for(int k = 0; k < 4; k++) piece->pipe->processed_maximum[k] = 1.0f;
  }
  else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters)
  { // bayer float mosaiced

    const __m128 val_min = _mm_setzero_ps();
    const __m128 val_max = _mm_set1_ps(1.0f);

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      const float *in = ((float *)ivoid) + (size_t)j * roi_out->width;
      float *out = ((float *)ovoid) + (size_t)j * roi_out->width;

      int i = 0;
      int alignment = ((4 - (j * roi_out->width & (4 - 1))) & (4 - 1));

      // process unaligned pixels
      for(; i < alignment; i++, out++, in++)
        *out = CLAMP(film_rgb_f[FC(j + roi_out->y, i + roi_out->x, filters)] - *in, 0.0f, 1.0f);

      const __m128 film = _mm_set_ps(film_rgb_f[FC(j + roi_out->y, roi_out->x + i + 3, filters)],
                                     film_rgb_f[FC(j + roi_out->y, roi_out->x + i + 2, filters)],
                                     film_rgb_f[FC(j + roi_out->y, roi_out->x + i + 1, filters)],
                                     film_rgb_f[FC(j + roi_out->y, roi_out->x + i, filters)]);

      // process aligned pixels with SSE
      for(; i < roi_out->width - (4 - 1); i += 4, in += 4, out += 4)
      {
        const __m128 input = _mm_load_ps(in);
        const __m128 subtracted = _mm_sub_ps(film, input);
        _mm_stream_ps(out, _mm_max_ps(_mm_min_ps(subtracted, val_max), val_min));
      }

      // process the rest
      for(; i < roi_out->width; i++, out++, in++)
        *out = CLAMP(film_rgb_f[FC(j + roi_out->y, i + roi_out->x, filters)] - *in, 0.0f, 1.0f);
    }
    _mm_sfence();

    for(int k = 0; k < 4; k++) piece->pipe->processed_maximum[k] = 1.0f;
  }
  else
  { // non-mosaiced
    const int ch = piece->colors;

    const __m128 film = _mm_set_ps(1.0f, film_rgb[2], film_rgb[1], film_rgb[0]);

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static)
#endif
    for(int k = 0; k < roi_out->height; k++)
    {
      const float *in = ((float *)ivoid) + (size_t)ch * k * roi_out->width;
      float *out = ((float *)ovoid) + (size_t)ch * k * roi_out->width;
      for(int j = 0; j < roi_out->width; j++, in += ch, out += ch)
      {
        const __m128 input = _mm_load_ps(in);
        const __m128 subtracted = _mm_sub_ps(film, input);
        _mm_stream_ps(out, subtracted);
      }
    }
    _mm_sfence();

    if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height);
  }
}
Exemple #15
0
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  const int filters = dt_image_filter(&piece->pipe->image);
  uint8_t (*const xtrans)[6] = self->dev->image_storage.xtrans;
  dt_iop_temperature_data_t *d = (dt_iop_temperature_data_t *)piece->data;
  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters == 9u)
  { // xtrans float mosaiced
#ifdef _OPENMP
    #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static)
#endif
    for(int j=0; j<roi_out->height; j++)
    {
      const float *in = ((float *)ivoid) + (size_t)j*roi_out->width;
      float *out = ((float*)ovoid) + (size_t)j*roi_out->width;
      for(int i=0; i<roi_out->width; i++,out++,in++)
        *out = *in * d->coeffs[FCxtrans(j,i,roi_out,xtrans)];
    }
  }
  else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters)
  { // bayer float mosaiced
#ifdef _OPENMP
    #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static)
#endif
    for(int j=0; j<roi_out->height; j++)
    {
      const float *in = ((float *)ivoid) + (size_t)j*roi_out->width;
      float *out = ((float*)ovoid) + (size_t)j*roi_out->width;

      int i = 0;
      int alignment = ((4 - (j * roi_out->width & (4 - 1))) & (4 - 1));

      // process unaligned pixels
      for ( ; i < alignment ; i++, out++, in++)
        *out = *in * d->coeffs[FC(j+roi_out->y, i+roi_out->x, filters)];

      const __m128 coeffs = _mm_set_ps(d->coeffs[FC(j+roi_out->y, roi_out->x+i+3, filters)],
                                       d->coeffs[FC(j+roi_out->y, roi_out->x+i+2, filters)],
                                       d->coeffs[FC(j+roi_out->y, roi_out->x+i+1, filters)],
                                       d->coeffs[FC(j+roi_out->y, roi_out->x+i  , filters)]);

      // process aligned pixels with SSE
      for( ; i < roi_out->width - (4-1); i+=4,in+=4,out+=4)
      {
        const __m128 input = _mm_load_ps(in);

        const __m128 multiplied = _mm_mul_ps(input, coeffs);

        _mm_stream_ps(out, multiplied);
      }

      // process the rest
      for( ; i<roi_out->width; i++,out++,in++)
        *out = *in * d->coeffs[FC(j+roi_out->y, i+roi_out->x, filters)];
    }
    _mm_sfence();
  }
  else
  { // non-mosaiced
    const int ch = piece->colors;

    const __m128 coeffs = _mm_set_ps(1.0f,
                                     d->coeffs[2],
                                     d->coeffs[1],
                                     d->coeffs[0]);

#ifdef _OPENMP
    #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static)
#endif
    for(int k=0; k<roi_out->height; k++)
    {
      const float *in = ((float*)ivoid) + (size_t)ch*k*roi_out->width;
      float *out = ((float*)ovoid) + (size_t)ch*k*roi_out->width;
      for (int j=0; j<roi_out->width; j++,in+=ch,out+=ch)
      {
        const __m128 input = _mm_load_ps(in);
        const __m128 multiplied = _mm_mul_ps(input, coeffs);
        _mm_stream_ps(out, multiplied);
      }
    }
    _mm_sfence();

    if(piece->pipe->mask_display)
      dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height);
  }
  for(int k=0; k<3; k++)
    piece->pipe->processed_maximum[k] = d->coeffs[k] * piece->pipe->processed_maximum[k];
}
Exemple #16
0
int process_cl(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
               const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  dt_iop_rawprepare_data_t *d = (dt_iop_rawprepare_data_t *)piece->data;
  dt_iop_rawprepare_global_data_t *gd = (dt_iop_rawprepare_global_data_t *)self->data;

  const int devid = piece->pipe->devid;
  cl_mem dev_sub = NULL;
  cl_mem dev_div = NULL;
  cl_int err = -999;

  int kernel = -1;

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters)
  {
    kernel = gd->kernel_rawprepare_1f;
  }
  else
  {
    kernel = gd->kernel_rawprepare_4f;
  }

  const float scale = roi_in->scale / piece->iscale;
  const int csx = (int)roundf((float)d->x * scale), csy = (int)roundf((float)d->y * scale);

  dev_sub = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 4, d->sub);
  if(dev_sub == NULL) goto error;

  dev_div = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 4, d->div);
  if(dev_div == NULL) goto error;

  const int width = roi_out->width;
  const int height = roi_out->height;

  size_t sizes[] = { ROUNDUPWD(roi_in->width), ROUNDUPHT(roi_in->height), 1 };
  dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in);
  dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out);
  dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(width));
  dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&(height));
  dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(int), (void *)&csx);
  dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(int), (void *)&csy);
  dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(cl_mem), (void *)&dev_sub);
  dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(cl_mem), (void *)&dev_div);
  dt_opencl_set_kernel_arg(devid, kernel, 8, sizeof(uint32_t), (void *)&roi_out->x);
  dt_opencl_set_kernel_arg(devid, kernel, 9, sizeof(uint32_t), (void *)&roi_out->y);
  err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
  if(err != CL_SUCCESS) goto error;

  dt_opencl_release_mem_object(dev_sub);
  dt_opencl_release_mem_object(dev_div);

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters)
  {
    piece->pipe->filters = dt_rawspeed_crop_dcraw_filters(self->dev->image_storage.filters, csx, csy);
    adjust_xtrans_filters(piece->pipe, csx, csy);
  }

  return TRUE;

error:
  if(dev_sub != NULL) dt_opencl_release_mem_object(dev_sub);
  if(dev_div != NULL) dt_opencl_release_mem_object(dev_div);
  dt_print(DT_DEBUG_OPENCL, "[opencl_rawprepare] couldn't enqueue kernel! %d\n", err);
  return FALSE;
}
Exemple #17
0
void process_sse2(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid,
                  void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  const dt_iop_rawprepare_data_t *const d = (dt_iop_rawprepare_data_t *)piece->data;

  // fprintf(stderr, "roi in %d %d %d %d\n", roi_in->x, roi_in->y, roi_in->width, roi_in->height);
  // fprintf(stderr, "roi out %d %d %d %d\n", roi_out->x, roi_out->y, roi_out->width, roi_out->height);

  const float scale = roi_in->scale / piece->iscale;
  const int csx = (int)roundf((float)d->x * scale), csy = (int)roundf((float)d->y * scale);

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters)
  { // raw mosaic
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      const uint16_t *in = ((uint16_t *)ivoid) + ((size_t)roi_in->width * (j + csy) + csx);
      float *out = ((float *)ovoid) + (size_t)roi_out->width * j;

      int i = 0;

      // FIXME: figure alignment!  !!! replace with for !!!
      while((!dt_is_aligned(in, 16) || !dt_is_aligned(out, 16)) && (i < roi_out->width))
      {
        const int id = BL(roi_out, d, j, i);
        *out = (((float)(*in)) - d->sub[id]) / d->div[id];
        i++;
        in++;
        out++;
      }

      const __m128 sub = _mm_set_ps(d->sub[BL(roi_out, d, j, i + 3)], d->sub[BL(roi_out, d, j, i + 2)],
                                    d->sub[BL(roi_out, d, j, i + 1)], d->sub[BL(roi_out, d, j, i)]);

      const __m128 div = _mm_set_ps(d->div[BL(roi_out, d, j, i + 3)], d->div[BL(roi_out, d, j, i + 2)],
                                    d->div[BL(roi_out, d, j, i + 1)], d->div[BL(roi_out, d, j, i)]);

      // process aligned pixels with SSE
      for(; i < roi_out->width - (8 - 1); i += 8, in += 8)
      {
        const __m128i input = _mm_load_si128((__m128i *)in);

        __m128i ilo = _mm_unpacklo_epi16(input, _mm_set1_epi16(0));
        __m128i ihi = _mm_unpackhi_epi16(input, _mm_set1_epi16(0));

        __m128 flo = _mm_cvtepi32_ps(ilo);
        __m128 fhi = _mm_cvtepi32_ps(ihi);

        flo = _mm_div_ps(_mm_sub_ps(flo, sub), div);
        fhi = _mm_div_ps(_mm_sub_ps(fhi, sub), div);

        _mm_stream_ps(out, flo);
        out += 4;
        _mm_stream_ps(out, fhi);
        out += 4;
      }

      // process the rest
      for(; i < roi_out->width; i++, in++, out++)
      {
        const int id = BL(roi_out, d, j, i);
        *out = MAX(0.0f, ((float)(*in)) - d->sub[id]) / d->div[id];
      }
    }

    piece->pipe->filters = dt_rawspeed_crop_dcraw_filters(self->dev->image_storage.filters, csx, csy);
    adjust_xtrans_filters(piece->pipe, csx, csy);
  }
  else
  { // pre-downsampled buffer that needs black/white scaling

    const __m128 sub = _mm_load_ps(d->sub), div = _mm_load_ps(d->div);

#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      const float *in = ((float *)ivoid) + (size_t)4 * (roi_in->width * (j + csy) + csx);
      float *out = ((float *)ovoid) + (size_t)4 * roi_out->width * j;

      // process aligned pixels with SSE
      for(int i = 0; i < roi_out->width; i++, in += 4, out += 4)
      {
        const __m128 input = _mm_load_ps(in);

        const __m128 scaled = _mm_div_ps(_mm_sub_ps(input, sub), div);

        _mm_stream_ps(out, scaled);
      }
    }
  }
  _mm_sfence();
}
Exemple #18
0
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid,
             void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  const dt_iop_rawprepare_data_t *const d = (dt_iop_rawprepare_data_t *)piece->data;

  // fprintf(stderr, "roi in %d %d %d %d\n", roi_in->x, roi_in->y, roi_in->width, roi_in->height);
  // fprintf(stderr, "roi out %d %d %d %d\n", roi_out->x, roi_out->y, roi_out->width, roi_out->height);

  const float scale = roi_in->scale / piece->iscale;
  const int csx = (int)roundf((float)d->x * scale), csy = (int)roundf((float)d->y * scale);

  if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters)
  { // raw mosaic

    const uint16_t *const in = (const uint16_t *const)ivoid;
    float *const out = (float *const)ovoid;

#ifdef _OPENMP
#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      for(int i = 0; i < roi_out->width; i++)
      {
        const size_t pin = (size_t)(roi_in->width * (j + csy) + csx) + i;
        const size_t pout = (size_t)j * roi_out->width + i;

        const int id = BL(roi_out, d, j, i);
        out[pout] = (in[pin] - d->sub[id]) / d->div[id];
      }
    }

    piece->pipe->filters = dt_rawspeed_crop_dcraw_filters(self->dev->image_storage.filters, csx, csy);
    adjust_xtrans_filters(piece->pipe, csx, csy);
  }
  else
  { // pre-downsampled buffer that needs black/white scaling

    const float *const in = (const float *const)ivoid;
    float *const out = (float *const)ovoid;

    const float sub = d->sub[0], div = d->div[0];

    const int ch = piece->colors;

#ifdef _OPENMP
#pragma omp parallel for SIMD() default(none) schedule(static) collapse(3)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {
      for(int i = 0; i < roi_out->width; i++)
      {
        for(int c = 0; c < ch; c++)
        {
          const size_t pin = (size_t)ch * (roi_in->width * (j + csy) + csx + i) + c;
          const size_t pout = (size_t)ch * (j * roi_out->width + i) + c;

          out[pout] = (in[pin] - sub) / div;
        }
      }
    }
  }
}