void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { const uint32_t filters = piece->pipe->dsc.filters; dt_iop_highlights_data_t *data = (dt_iop_highlights_data_t *)piece->data; const float clip = data->clip * fminf(piece->pipe->dsc.processed_maximum[0], fminf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2])); // const int ch = piece->colors; if(!filters) { process_clip(piece, ivoid, ovoid, roi_in, roi_out, clip); for(int k=0;k<3;k++) piece->pipe->dsc.processed_maximum[k] = fminf(piece->pipe->dsc.processed_maximum[0], fminf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2])); return; } switch(data->mode) { case DT_IOP_HIGHLIGHTS_INPAINT: // a1ex's (magiclantern) idea of color inpainting: { const float clips[4] = { 0.987 * data->clip * piece->pipe->dsc.processed_maximum[0], 0.987 * data->clip * piece->pipe->dsc.processed_maximum[1], 0.987 * data->clip * piece->pipe->dsc.processed_maximum[2], clip }; if(filters == 9u) { const uint8_t(*const xtrans)[6] = (const uint8_t(*const)[6])piece->pipe->dsc.xtrans; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) #endif for(int j = 0; j < roi_out->height; j++) { interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, 1, j, clips, xtrans, 0); interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, xtrans, 1); } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) #endif for(int i = 0; i < roi_out->width; i++) { interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, 1, i, clips, xtrans, 2); interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, -1, i, clips, xtrans, 3); } } else { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(data, piece) #endif for(int j = 0; j < roi_out->height; j++) { interpolate_color(ivoid, ovoid, roi_out, 0, 1, j, clips, filters, 0); interpolate_color(ivoid, ovoid, roi_out, 0, -1, j, clips, filters, 1); } // up/down directions #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(data, piece) #endif for(int i = 0; i < roi_out->width; i++) { interpolate_color(ivoid, ovoid, roi_out, 1, 1, i, clips, filters, 2); interpolate_color(ivoid, ovoid, roi_out, 1, -1, i, clips, filters, 3); } } break; } case DT_IOP_HIGHLIGHTS_LCH: if(filters == 9u) process_lch_xtrans(self, piece, ivoid, ovoid, roi_in, roi_out, clip); else process_lch_bayer(self, piece, ivoid, ovoid, roi_in, roi_out, clip); break; default: case DT_IOP_HIGHLIGHTS_CLIP: process_clip(piece, ivoid, ovoid, roi_in, roi_out, clip); break; } // update processed maximum const float m = fmaxf(fmaxf(piece->pipe->dsc.processed_maximum[0], piece->pipe->dsc.processed_maximum[1]), piece->pipe->dsc.processed_maximum[2]); for(int k = 0; k < 3; k++) piece->pipe->dsc.processed_maximum[k] = m; if(piece->pipe->mask_display & DT_DEV_PIXELPIPE_DISPLAY_MASK) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { const int filters = dt_image_filter(&piece->pipe->image); dt_iop_highlights_data_t *data = (dt_iop_highlights_data_t *)piece->data; const float clip = data->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); // const int ch = piece->colors; if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters) { const __m128 clipm = _mm_set1_ps(clip); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int j = 0; j < roi_out->height; j++) { float *out = (float *)ovoid + (size_t)4 * roi_out->width * j; float *in = (float *)ivoid + (size_t)4 * roi_in->width * j; for(int i = 0; i < roi_out->width; i++) { _mm_stream_ps(out, _mm_min_ps(clipm, _mm_set_ps(in[3], in[2], in[1], in[0]))); in += 4; out += 4; } } _mm_sfence(); return; } switch(data->mode) { case DT_IOP_HIGHLIGHTS_INPAINT: // a1ex's (magiclantern) idea of color inpainting: { const float clips[4] = { 0.987 * data->clip * piece->pipe->processed_maximum[0], 0.987 * data->clip * piece->pipe->processed_maximum[1], 0.987 * data->clip * piece->pipe->processed_maximum[2], clip }; if(filters == 9u) { const dt_image_t *img = &self->dev->image_storage; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, img) #endif for(int j = 0; j < roi_out->height; j++) { _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, 1, j, clips, img->xtrans, 0); _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 0, -1, j, clips, img->xtrans, 1); } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, img) #endif for(int i = 0; i < roi_out->width; i++) { _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, 1, i, clips, img->xtrans, 2); _interpolate_color_xtrans(ivoid, ovoid, roi_in, roi_out, 1, -1, i, clips, img->xtrans, 3); } break; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int j = 0; j < roi_out->height; j++) { _interpolate_color(ivoid, ovoid, roi_out, 0, 1, j, clips, filters, 0); _interpolate_color(ivoid, ovoid, roi_out, 0, -1, j, clips, filters, 1); } // up/down directions #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int i = 0; i < roi_out->width; i++) { _interpolate_color(ivoid, ovoid, roi_out, 1, 1, i, clips, filters, 2); _interpolate_color(ivoid, ovoid, roi_out, 1, -1, i, clips, filters, 3); } break; } case DT_IOP_HIGHLIGHTS_LCH: if(filters == 9u) { process_lch_xtrans(ivoid, ovoid, roi_out->width, roi_out->height, clip); break; } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) default(none) shared(ovoid, ivoid, roi_in, roi_out, data, piece) #endif for(int j = 0; j < roi_out->height; j++) { float *out = (float *)ovoid + (size_t)roi_out->width * j; float *in = (float *)ivoid + (size_t)roi_out->width * j; for(int i = 0; i < roi_out->width; i++) { if(i == 0 || i == roi_out->width - 1 || j == 0 || j == roi_out->height - 1) { // fast path for border out[0] = in[0]; } else { // analyse one bayer block to get same number of rggb pixels each time const float near_clip = 0.96f * clip; const float post_clip = 1.10f * clip; float blend = 0.0f; float mean = 0.0f; for(int jj = 0; jj <= 1; jj++) { for(int ii = 0; ii <= 1; ii++) { const float val = in[(size_t)jj * roi_out->width + ii]; mean += val * 0.25f; blend += (fminf(post_clip, val) - near_clip) / (post_clip - near_clip); } } blend = CLAMP(blend, 0.0f, 1.0f); if(blend > 0) { // recover: out[0] = blend * mean + (1.f - blend) * in[0]; } else out[0] = in[0]; } out++; in++; } } break; default: case DT_IOP_HIGHLIGHTS_CLIP: { const __m128 clipm = _mm_set1_ps(clip); const size_t n = (size_t)roi_out->height * roi_out->width; float *const out = (float *)ovoid; float *const in = (float *)ivoid; #ifdef _OPENMP #pragma omp parallel for schedule(static) default(none) #endif for(size_t j = 0; j < (n & ~3u); j += 4) _mm_stream_ps(out + j, _mm_min_ps(clipm, _mm_load_ps(in + j))); _mm_sfence(); // lets see if there's a non-multiple of four rest to process: if(n & 3) for(size_t j = n & ~3u; j < n; j++) out[j] = MIN(clip, in[j]); break; } } if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height); }