static void process_clip_plain(dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const float clip) { const float *const in = (const float *const)ivoid; float *const out = (float *const)ovoid; if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && dt_image_filter(&piece->pipe->image)) { // raw mosaic #ifdef _OPENMP #pragma omp parallel for SIMD() default(none) schedule(static) #endif for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++) { out[k] = MIN(clip, in[k]); } } else { const int ch = piece->colors; #ifdef _OPENMP #pragma omp parallel for SIMD() default(none) schedule(static) #endif for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k++) { out[k] = MIN(clip, in[k]); } } }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data; dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; const float clip = d->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); const int filters = dt_image_filter(&piece->pipe->image); if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters) { dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 5, sizeof(float), (void *)&clip); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_4f_clip, sizes); if(err != CL_SUCCESS) goto error; } else { const int kernel = (d->mode == DT_IOP_HIGHLIGHTS_LCH) ? gd->kernel_highlights_1f_lch : gd->kernel_highlights_1f_clip; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; } // update processed maximum const float m = fmaxf(fmaxf( piece->pipe->processed_maximum[0], piece->pipe->processed_maximum[1]), piece->pipe->processed_maximum[2]); for(int k=0;k<3;k++) piece->pipe->processed_maximum[k] = m; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_highlights] couldn't enqueue kernel! %d\n", err); return FALSE; }
void commit_params (struct dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece) { dt_iop_hotpixels_params_t *p = (dt_iop_hotpixels_params_t *)params; dt_iop_hotpixels_data_t *d = (dt_iop_hotpixels_data_t *)piece->data; d->filters = dt_image_filter(&pipe->image); d->multiplier = p->strength/2.0; d->threshold = p->threshold; d->permissive = p->permissive; d->markfixed = p->markfixed && (pipe->type != DT_DEV_PIXELPIPE_EXPORT) && (pipe->type != DT_DEV_PIXELPIPE_THUMBNAIL); if (!(pipe->image.flags & DT_IMAGE_RAW)|| dt_dev_pixelpipe_uses_downsampled_input(pipe) || p->strength == 0.0) piece->enabled = 0; }
void commit_params( struct dt_iop_module_t *self, dt_iop_params_t *params, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece) { if(!(pipe->image.flags & DT_IMAGE_RAW) || dt_dev_pixelpipe_uses_downsampled_input(pipe) || !dt_image_filter(&piece->pipe->image) || piece->pipe->image.bpp != sizeof(uint16_t)) piece->enabled = 0; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_temperature_data_t *d = (dt_iop_temperature_data_t *)piece->data; dt_iop_temperature_global_data_t *gd = (dt_iop_temperature_global_data_t *)self->data; const int devid = piece->pipe->devid; const int filters = dt_image_filter(&piece->pipe->image); cl_mem dev_coeffs = NULL; cl_int err = -999; int kernel = -1; if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters) { kernel = gd->kernel_whitebalance_1f; } else { kernel = gd->kernel_whitebalance_4f; } dev_coeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float)*3, d->coeffs); if (dev_coeffs == NULL) goto error; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), (void *)&dev_coeffs); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(uint32_t), (void *)&filters); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(uint32_t), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(uint32_t), (void *)&roi_out->y); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_coeffs); for(int k=0; k<3; k++) piece->pipe->processed_maximum[k] = d->coeffs[k] * piece->pipe->processed_maximum[k]; return TRUE; error: if (dev_coeffs != NULL) dt_opencl_release_mem_object(dev_coeffs); dt_print(DT_DEBUG_OPENCL, "[opencl_white_balance] couldn't enqueue kernel! %d\n", err); return FALSE; }
static void process_clip_sse2(dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const float clip) { if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && dt_image_filter(&piece->pipe->image)) { // raw mosaic const __m128 clipm = _mm_set1_ps(clip); const size_t n = (size_t)roi_out->height * roi_out->width; float *const out = (float *)ovoid; float *const in = (float *)ivoid; #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) #endif for(size_t j = 0; j < (n & ~3u); j += 4) _mm_stream_ps(out + j, _mm_min_ps(clipm, _mm_load_ps(in + j))); _mm_sfence(); // lets see if there's a non-multiple of four rest to process: if(n & 3) for(size_t j = n & ~3u; j < n; j++) out[j] = MIN(clip, in[j]); } else { const __m128 clipm = _mm_set1_ps(clip); const int ch = piece->colors; #ifdef _OPENMP #pragma omp parallel for default(none) schedule(static) #endif for(int j = 0; j < roi_out->height; j++) { float *out = (float *)ovoid + (size_t)ch * roi_out->width * j; float *in = (float *)ivoid + (size_t)ch * roi_in->width * j; for(int i = 0; i < roi_out->width; i++, in += ch, out += ch) { _mm_stream_ps(out, _mm_min_ps(clipm, _mm_set_ps(in[3], in[2], in[1], in[0]))); } } _mm_sfence(); } }
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { const int filters = dt_image_filter(&piece->pipe->image); dt_iop_highlights_data_t *data = (dt_iop_highlights_data_t *)piece->data; const float clip = data->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); // const int ch = piece->colors; if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters) { process_clip(piece, ivoid, ovoid, roi_in, roi_out, clip); for(int k=0;k<3;k++) piece->pipe->processed_maximum[k] = fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); return; } switch(data->mode) { case DT_IOP_HIGHLIGHTS_INPAINT: // a1ex's (magiclantern) idea of color inpainting: { const float clips[4] = { 0.987 * data->clip * piece->pipe->processed_maximum[0], 0.987 * data->clip * piece->pipe->processed_maximum[1], 0.987 * data->clip * piece->pipe->processed_maximum[2], clip }; if(filters == 9u) { const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])self->dev->image_storage.xtrans; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) #endif for(int j = 0; j < roi_out->height; j++) { interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, 1, j, clips, xtrans, 0); interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, xtrans, 1); } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) #endif for(int i = 0; i < roi_out->width; i++) { interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, 1, i, clips, xtrans, 2); interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, -1, i, clips, xtrans, 3); } } else { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(data, piece) #endif for(int j = 0; j < roi_out->height; j++) { interpolate_color(ivoid, ovoid, roi_out, 0, 1, j, clips, filters, 0); interpolate_color(ivoid, ovoid, roi_out, 0, -1, j, clips, filters, 1); } // up/down directions #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(data, piece) #endif for(int i = 0; i < roi_out->width; i++) { interpolate_color(ivoid, ovoid, roi_out, 1, 1, i, clips, filters, 2); interpolate_color(ivoid, ovoid, roi_out, 1, -1, i, clips, filters, 3); } } break; } case DT_IOP_HIGHLIGHTS_LCH: if(filters == 9u) process_lch_xtrans(self, ivoid, ovoid, roi_in, roi_out, clip); else process_lch_bayer(self, piece, ivoid, ovoid, roi_in, roi_out, clip); break; default: case DT_IOP_HIGHLIGHTS_CLIP: process_clip(piece, ivoid, ovoid, roi_in, roi_out, clip); break; } // update processed maximum const float m = fmaxf(fmaxf( piece->pipe->processed_maximum[0], piece->pipe->processed_maximum[1]), piece->pipe->processed_maximum[2]); for(int k=0;k<3;k++) piece->pipe->processed_maximum[k] = m; if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }
static void process_lch_bayer(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, const float clip) { const int filters = dt_image_filter(&piece->pipe->image); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) #endif for(int j = 0; j < roi_out->height; j++) { float *out = (float *)ovoid + (size_t)roi_out->width * j; float *in = (float *)ivoid + (size_t)roi_out->width * j; for(int i = 0; i < roi_out->width; i++, in++, out++) { if(i == roi_out->width - 1 || j == roi_out->height - 1) { // fast path for border out[0] = MIN(clip, in[0]); } else { int clipped = 0; // sample 1 bayer block. thus we will have 2 green values. float R = 0.0f, Gmin = FLT_MAX, Gmax = -FLT_MAX, B = 0.0f; for(int jj = 0; jj <= 1; jj++) { for(int ii = 0; ii <= 1; ii++) { const float val = in[(size_t)jj * roi_out->width + ii]; clipped = (clipped || (val > clip)); const int c = FC(j + jj + roi_out->y, i + ii + roi_out->x, filters); switch(c) { case 0: R = val; break; case 1: Gmin = MIN(Gmin, val); Gmax = MAX(Gmax, val); break; case 2: B = val; break; } } } if(clipped) { const float Ro = MIN(R, clip); const float Go = MIN(Gmin, clip); const float Bo = MIN(B, clip); const float L = (R + Gmax + B) / 3.0f; float C = SQRT3 * (R - Gmax); float H = 2.0f * B - Gmax - R; const float Co = SQRT3 * (Ro - Go); const float Ho = 2.0f * Bo - Go - Ro; if(R != Gmax && Gmax != B) { const float ratio = sqrtf((Co * Co + Ho * Ho) / (C * C + H * H)); C *= ratio; H *= ratio; } float RGB[3] = { 0.0f, 0.0f, 0.0f }; /* * backtransform proof, sage: * * R,G,B,L,C,H = var('R,G,B,L,C,H') * solve([L==(R+G+B)/3, C==sqrt(3)*(R-G), H==2*B-G-R], R, G, B) * * result: * [[R == 1/6*sqrt(3)*C - 1/6*H + L, G == -1/6*sqrt(3)*C - 1/6*H + L, B == 1/3*H + L]] */ RGB[0] = L - H / 6.0f + C / SQRT12; RGB[1] = L - H / 6.0f - C / SQRT12; RGB[2] = L + H / 3.0f; out[0] = RGB[FC(j + roi_out->y, i + roi_out->x, filters)]; } else { out[0] = in[0]; } } } } }
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { const int filters = dt_image_filter(&piece->pipe->image); uint8_t (*const xtrans)[6] = self->dev->image_storage.xtrans; dt_iop_temperature_data_t *d = (dt_iop_temperature_data_t *)piece->data; if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters == 9u) { // xtrans float mosaiced #ifdef _OPENMP #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static) #endif for(int j=0; j<roi_out->height; j++) { const float *in = ((float *)ivoid) + (size_t)j*roi_out->width; float *out = ((float*)ovoid) + (size_t)j*roi_out->width; for(int i=0; i<roi_out->width; i++,out++,in++) *out = *in * d->coeffs[FCxtrans(j,i,roi_out,xtrans)]; } } else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters) { // bayer float mosaiced #ifdef _OPENMP #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static) #endif for(int j=0; j<roi_out->height; j++) { const float *in = ((float *)ivoid) + (size_t)j*roi_out->width; float *out = ((float*)ovoid) + (size_t)j*roi_out->width; int i = 0; int alignment = ((4 - (j * roi_out->width & (4 - 1))) & (4 - 1)); // process unaligned pixels for ( ; i < alignment ; i++, out++, in++) *out = *in * d->coeffs[FC(j+roi_out->y, i+roi_out->x, filters)]; const __m128 coeffs = _mm_set_ps(d->coeffs[FC(j+roi_out->y, roi_out->x+i+3, filters)], d->coeffs[FC(j+roi_out->y, roi_out->x+i+2, filters)], d->coeffs[FC(j+roi_out->y, roi_out->x+i+1, filters)], d->coeffs[FC(j+roi_out->y, roi_out->x+i , filters)]); // process aligned pixels with SSE for( ; i < roi_out->width - (4-1); i+=4,in+=4,out+=4) { const __m128 input = _mm_load_ps(in); const __m128 multiplied = _mm_mul_ps(input, coeffs); _mm_stream_ps(out, multiplied); } // process the rest for( ; i<roi_out->width; i++,out++,in++) *out = *in * d->coeffs[FC(j+roi_out->y, i+roi_out->x, filters)]; } _mm_sfence(); } else { // non-mosaiced const int ch = piece->colors; const __m128 coeffs = _mm_set_ps(1.0f, d->coeffs[2], d->coeffs[1], d->coeffs[0]); #ifdef _OPENMP #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid, d) schedule(static) #endif for(int k=0; k<roi_out->height; k++) { const float *in = ((float*)ivoid) + (size_t)ch*k*roi_out->width; float *out = ((float*)ovoid) + (size_t)ch*k*roi_out->width; for (int j=0; j<roi_out->width; j++,in+=ch,out+=ch) { const __m128 input = _mm_load_ps(in); const __m128 multiplied = _mm_mul_ps(input, coeffs); _mm_stream_ps(out, multiplied); } } _mm_sfence(); if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); } for(int k=0; k<3; k++) piece->pipe->processed_maximum[k] = d->coeffs[k] * piece->pipe->processed_maximum[k]; }
static gboolean draw(GtkWidget *widget, cairo_t *cr, dt_iop_module_t *self) { if(darktable.gui->reset) return FALSE; if(self->picked_color_max[0] < 0.0f) return FALSE; if(self->request_color_pick == DT_REQUEST_COLORPICK_OFF) return FALSE; dt_iop_invert_gui_data_t *g = (dt_iop_invert_gui_data_t *)self->gui_data; dt_iop_invert_params_t *p = (dt_iop_invert_params_t *)self->params; if(fabsf(p->color[0] - self->picked_color[0]) < 0.0001f && fabsf(p->color[1] - self->picked_color[1]) < 0.0001f && fabsf(p->color[2] - self->picked_color[2]) < 0.0001f) { // interrupt infinite loops return FALSE; } p->color[0] = self->picked_color[0]; p->color[1] = self->picked_color[1]; p->color[2] = self->picked_color[2]; GdkRGBA color = (GdkRGBA){.red = p->color[0], .green = p->color[1], .blue = p->color[2], .alpha = 1.0 }; gtk_color_chooser_set_rgba(GTK_COLOR_CHOOSER(g->colorpicker), &color); dt_dev_add_history_item(darktable.develop, self, TRUE); return FALSE; } static void colorpicker_callback(GtkColorButton *widget, dt_iop_module_t *self) { if(self->dt->gui->reset) return; dt_iop_invert_gui_data_t *g = (dt_iop_invert_gui_data_t *)self->gui_data; dt_iop_invert_params_t *p = (dt_iop_invert_params_t *)self->params; // turn off the other color picker so that this tool actually works ... gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(g->picker), FALSE); GdkRGBA c; gtk_color_chooser_get_rgba(GTK_COLOR_CHOOSER(widget), &c); p->color[0] = c.red; p->color[1] = c.green; p->color[2] = c.blue; dt_dev_add_history_item(darktable.develop, self, TRUE); } void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_invert_data_t *d = (dt_iop_invert_data_t *)piece->data; const float *const m = piece->pipe->processed_maximum; float film_rgb[4] = { d->color[0], d->color[1], d->color[2], 0.0f }; // Convert the RGB color to CYGM only if we're not in the preview pipe (which is already RGB) if((self->dev->image_storage.flags & DT_IMAGE_4BAYER) && !dt_dev_pixelpipe_uses_downsampled_input(piece->pipe)) dt_colorspaces_rgb_to_cygm(film_rgb, 1, d->RGB_to_CAM); const float film_rgb_f[4] = { film_rgb[0] * m[0], film_rgb[1] * m[1], film_rgb[2] * m[2], film_rgb[3] * m[3] }; // FIXME: it could be wise to make this a NOP when picking colors. not sure about that though. // if(self->request_color_pick){ // do nothing // } const int filters = dt_image_filter(&piece->pipe->image); const uint8_t (*const xtrans)[6] = (const uint8_t (*const)[6]) self->dev->image_storage.xtrans; if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && (filters == 9u)) { // xtrans float mosaiced #ifdef _OPENMP #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static) #endif for(int j = 0; j < roi_out->height; j++) { const float *in = ((float *)ivoid) + (size_t)j * roi_out->width; float *out = ((float *)ovoid) + (size_t)j * roi_out->width; for(int i = 0; i < roi_out->width; i++, out++, in++) *out = CLAMP(film_rgb_f[FCxtrans(j, i, roi_out, xtrans)] - *in, 0.0f, 1.0f); } for(int k = 0; k < 4; k++) piece->pipe->processed_maximum[k] = 1.0f; } else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters) { // bayer float mosaiced const __m128 val_min = _mm_setzero_ps(); const __m128 val_max = _mm_set1_ps(1.0f); #ifdef _OPENMP #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static) #endif for(int j = 0; j < roi_out->height; j++) { const float *in = ((float *)ivoid) + (size_t)j * roi_out->width; float *out = ((float *)ovoid) + (size_t)j * roi_out->width; int i = 0; int alignment = ((4 - (j * roi_out->width & (4 - 1))) & (4 - 1)); // process unaligned pixels for(; i < alignment; i++, out++, in++) *out = CLAMP(film_rgb_f[FC(j + roi_out->y, i + roi_out->x, filters)] - *in, 0.0f, 1.0f); const __m128 film = _mm_set_ps(film_rgb_f[FC(j + roi_out->y, roi_out->x + i + 3, filters)], film_rgb_f[FC(j + roi_out->y, roi_out->x + i + 2, filters)], film_rgb_f[FC(j + roi_out->y, roi_out->x + i + 1, filters)], film_rgb_f[FC(j + roi_out->y, roi_out->x + i, filters)]); // process aligned pixels with SSE for(; i < roi_out->width - (4 - 1); i += 4, in += 4, out += 4) { const __m128 input = _mm_load_ps(in); const __m128 subtracted = _mm_sub_ps(film, input); _mm_stream_ps(out, _mm_max_ps(_mm_min_ps(subtracted, val_max), val_min)); } // process the rest for(; i < roi_out->width; i++, out++, in++) *out = CLAMP(film_rgb_f[FC(j + roi_out->y, i + roi_out->x, filters)] - *in, 0.0f, 1.0f); } _mm_sfence(); for(int k = 0; k < 4; k++) piece->pipe->processed_maximum[k] = 1.0f; } else { // non-mosaiced const int ch = piece->colors; const __m128 film = _mm_set_ps(1.0f, film_rgb[2], film_rgb[1], film_rgb[0]); #ifdef _OPENMP #pragma omp parallel for default(none) shared(roi_out, ivoid, ovoid) schedule(static) #endif for(int k = 0; k < roi_out->height; k++) { const float *in = ((float *)ivoid) + (size_t)ch * k * roi_out->width; float *out = ((float *)ovoid) + (size_t)ch * k * roi_out->width; for(int j = 0; j < roi_out->width; j++, in += ch, out += ch) { const __m128 input = _mm_load_ps(in); const __m128 subtracted = _mm_sub_ps(film, input); _mm_stream_ps(out, subtracted); } } _mm_sfence(); if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); } }
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { const int filters = dt_image_filter(&piece->pipe->image); dt_iop_highlights_data_t *data = (dt_iop_highlights_data_t *)piece->data; const float clip = data->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); // const int ch = piece->colors; if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters) { const __m128 clipm = _mm_set1_ps(clip); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int j = 0; j < roi_out->height; j++) { float *out = (float *)ovoid + (size_t)4 * roi_out->width * j; float *in = (float *)ivoid + (size_t)4 * roi_in->width * j; for(int i = 0; i < roi_out->width; i++) { _mm_stream_ps(out, _mm_min_ps(clipm, _mm_set_ps(in[3], in[2], in[1], in[0]))); in += 4; out += 4; } } _mm_sfence(); return; } switch(data->mode) { case DT_IOP_HIGHLIGHTS_INPAINT: // a1ex's (magiclantern) idea of color inpainting: { const float clips[4] = { 0.987 * data->clip * piece->pipe->processed_maximum[0], 0.987 * data->clip * piece->pipe->processed_maximum[1], 0.987 * data->clip * piece->pipe->processed_maximum[2], clip }; if(filters == 9u) { const dt_image_t *img = &self->dev->image_storage; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, img) #endif for(int j = 0; j < roi_out->height; j++) { _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, 1, j, clips, img->xtrans, 0); _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, img->xtrans, 1); } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, img) #endif for(int i = 0; i < roi_out->width; i++) { _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, 1, i, clips, img->xtrans, 2); _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, -1, i, clips, img->xtrans, 3); } break; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int j = 0; j < roi_out->height; j++) { _interpolate_color(ivoid, ovoid, roi_out, 0, 1, j, clips, filters, 0); _interpolate_color(ivoid, ovoid, roi_out, 0, -1, j, clips, filters, 1); } // up/down directions #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int i = 0; i < roi_out->width; i++) { _interpolate_color(ivoid, ovoid, roi_out, 1, 1, i, clips, filters, 2); _interpolate_color(ivoid, ovoid, roi_out, 1, -1, i, clips, filters, 3); } break; } case DT_IOP_HIGHLIGHTS_LCH: if(filters == 9u) { process_lch_xtrans(ivoid, ovoid, roi_out->width, roi_out->height, clip); break; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int j = 0; j < roi_out->height; j++) { float *out = (float *)ovoid + (size_t)roi_out->width * j; float *in = (float *)ivoid + (size_t)roi_out->width * j; for(int i = 0; i < roi_out->width; i++) { if(i == 0 || i == roi_out->width - 1 || j == 0 || j == roi_out->height - 1) { // fast path for border out[0] = in[0]; } else { // analyse one bayer block to get same number of rggb pixels each time const float near_clip = 0.96f * clip; const float post_clip = 1.10f * clip; float blend = 0.0f; float mean = 0.0f; for(int jj = 0; jj <= 1; jj++) { for(int ii = 0; ii <= 1; ii++) { const float val = in[(size_t)jj * roi_out->width + ii]; mean += val * 0.25f; blend += (fminf(post_clip, val) - near_clip) / (post_clip - near_clip); } } blend = CLAMP(blend, 0.0f, 1.0f); if(blend > 0) { // recover: out[0] = blend * mean + (1.f - blend) * in[0]; } else out[0] = in[0]; } out++; in++; } } break; default: case DT_IOP_HIGHLIGHTS_CLIP: { const __m128 clipm = _mm_set1_ps(clip); const size_t n = (size_t)roi_out->height * roi_out->width; float *const out = (float *)ovoid; float *const in = (float *)ivoid; #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) #endif for(size_t j = 0; j < (n & ~3u); j += 4) _mm_stream_ps(out + j, _mm_min_ps(clipm, _mm_load_ps(in + j))); _mm_sfence(); // lets see if there's a non-multiple of four rest to process: if(n & 3) for(size_t j = n & ~3u; j < n; j++) out[j] = MIN(clip, in[j]); break; } } if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }