void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_lowpass_data_t *data = (dt_iop_lowpass_data_t *)piece->data; float *in = (float *)ivoid; float *out = (float *)ovoid; const int width = roi_in->width; const int height = roi_in->height; const int ch = piece->colors; const int use_bilateral = data->radius < 0 ? 1 : 0; const float radius = fmax(0.1f, fabs(data->radius)); const float sigma = radius * roi_in->scale / piece ->iscale; const int order = data->order; const float Labmax[] = { 100.0f, 128.0f, 128.0f, 1.0f }; const float Labmin[] = { 0.0f, -128.0f, -128.0f, 0.0f }; if(!use_bilateral) { dt_gaussian_t *g = dt_gaussian_init(width, height, ch, Labmax, Labmin, sigma, order); if(!g) return; dt_gaussian_blur_4c(g, in, out); dt_gaussian_free(g); } else { const float sigma_r = 100.0f;// d->sigma_r; // does not depend on scale const float sigma_s = sigma; const float detail = -1.0f; // we want the bilateral base layer dt_bilateral_t *b = dt_bilateral_init(width, height, sigma_s, sigma_r); if(!b) return; dt_bilateral_splat(b, in); dt_bilateral_blur(b); dt_bilateral_slice(b, in, out, detail); dt_bilateral_free(b); } // some aliased pointers for compilers that don't yet understand operators on __m128 const float *const Labminf = (float *)&Labmin; const float *const Labmaxf = (float *)&Labmax; #ifdef _OPENMP #pragma omp parallel for default(none) shared(in,out,data,roi_out) schedule(static) #endif for(int k=0; k<roi_out->width*roi_out->height; k++) { out[k*ch+0] = (out[k*ch+0] < 100.0f) ? data->table[CLAMP((int)(out[k*ch+0]/100.0f*0x10000ul), 0, 0xffff)] : dt_iop_eval_exp(data->unbounded_coeffs, out[k*ch+0]/100.0f); out[k*ch+1] = CLAMPF(out[k*ch+1]*data->saturation, Labminf[1], Labmaxf[1]); out[k*ch+2] = CLAMPF(out[k*ch+2]*data->saturation, Labminf[2], Labmaxf[2]); out[k*ch+3] = in[k*ch+3]; } }
void process (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { float *in; float *out; dt_iop_zonesystem_gui_data_t *g = NULL; dt_iop_zonesystem_data_t *data = (dt_iop_zonesystem_data_t*)piece->data; const int width = roi_out->width; const int height = roi_out->height; guchar *in_buffer = NULL, *out_buffer = NULL; if( self->dev->gui_attached && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW ) { g = (dt_iop_zonesystem_gui_data_t *)self->gui_data; dt_pthread_mutex_lock(&g->lock); if(g->in_preview_buffer) g_free (g->in_preview_buffer); if(g->out_preview_buffer) g_free (g->out_preview_buffer); in_buffer = g->in_preview_buffer = g_malloc ((size_t)width*height); out_buffer = g->out_preview_buffer = g_malloc ((size_t)width*height); g->preview_width = width; g->preview_height = height; dt_pthread_mutex_unlock(&g->lock); } /* calculate zonemap */ const int size = data->size; float zonemap[MAX_ZONE_SYSTEM_SIZE]= {-1}; _iop_zonesystem_calculate_zonemap (data, zonemap); const int ch = piece->colors; /* process the image */ in = (float *)ivoid; out = (float *)ovoid; const float rzscale = (size-1)/100.0f; float zonemap_offset[MAX_ZONE_SYSTEM_SIZE]= {-1}; float zonemap_scale[MAX_ZONE_SYSTEM_SIZE]= {-1}; // precompute scale and offset for (int k=0; k < size-1; k++) zonemap_scale[k] = (zonemap[k+1]-zonemap[k])*(size-1); for (int k=0; k < size-1; k++) zonemap_offset[k] = 100.0f * ((k+1)*zonemap[k] - k*zonemap[k+1]) ; #ifdef _OPENMP #pragma omp parallel for default(none) shared(in, out, zonemap_scale,zonemap_offset) schedule(static) #endif for (int j=0; j<height; j++) for (int i=0; i<width; i++) { /* remap lightness into zonemap and apply lightness */ const float *inp = in + ch*((size_t)j*width+i); float *outp = out + ch*((size_t)j*width+i); const int rz = CLAMPS(inp[0]*rzscale, 0, size-2); // zone index const float zs = ((rz > 0) ? (zonemap_offset[rz]/inp[0]) : 0) + zonemap_scale[rz]; _mm_stream_ps(outp,_mm_mul_ps(_mm_load_ps(inp),_mm_set1_ps(zs))); } _mm_sfence(); if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, width, height); /* if gui and have buffer lets gaussblur and fill buffer with zone indexes */ if( self->dev->gui_attached && g && in_buffer && out_buffer) { float Lmax[] = { 100.0f }; float Lmin[] = { 0.0f }; /* setup gaussian kernel */ const int radius = 8; const float sigma = 2.5*(radius*roi_in->scale/piece->iscale); dt_gaussian_t *gauss = dt_gaussian_init(width, height, 1, Lmax, Lmin, sigma, DT_IOP_GAUSSIAN_ZERO); float *tmp = g_malloc((size_t)width*height*sizeof(float)); if(gauss && tmp) { #ifdef _OPENMP #pragma omp parallel for default(none) shared(ivoid, tmp) schedule(static) #endif for(size_t k=0; k<(size_t)width*height; k++) tmp[k] = ((float *)ivoid)[ch*k]; dt_gaussian_blur(gauss, tmp, tmp); /* create zonemap preview for input */ dt_pthread_mutex_lock(&g->lock); #ifdef _OPENMP #pragma omp parallel for default(none) shared(tmp,in_buffer) schedule(static) #endif for (size_t k=0; k<(size_t)width*height; k++) { in_buffer[k] = CLAMPS(tmp[k]*(size-1)/100.0f, 0, size-2); } dt_pthread_mutex_unlock(&g->lock); #ifdef _OPENMP #pragma omp parallel for default(none) shared(ovoid, tmp) schedule(static) #endif for(size_t k=0; k<(size_t)width*height; k++) tmp[k] = ((float *)ovoid)[ch*k]; dt_gaussian_blur(gauss, tmp, tmp); /* create zonemap preview for output */ dt_pthread_mutex_lock(&g->lock); #ifdef _OPENMP #pragma omp parallel for default(none) shared(tmp,out_buffer) schedule(static) #endif for (size_t k=0; k<(size_t)width*height; k++) { out_buffer[k] = CLAMPS(tmp[k]*(size-1)/100.0f, 0, size-2); } dt_pthread_mutex_unlock(&g->lock); } if (tmp) g_free(tmp); if (gauss) dt_gaussian_free(gauss); } }
static void process_common_cleanup(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_zonesystem_data_t *d = (dt_iop_zonesystem_data_t *)piece->data; dt_iop_zonesystem_gui_data_t *g = (dt_iop_zonesystem_gui_data_t *)self->gui_data; const int width = roi_out->width; const int height = roi_out->height; const int ch = piece->colors; const int size = d->params.size; if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, width, height); /* if gui and have buffer lets gaussblur and fill buffer with zone indexes */ if(self->dev->gui_attached && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW && g && g->in_preview_buffer && g->out_preview_buffer) { float Lmax[] = { 100.0f }; float Lmin[] = { 0.0f }; /* setup gaussian kernel */ const int radius = 8; const float sigma = 2.5 * (radius * roi_in->scale / piece->iscale); dt_gaussian_t *gauss = dt_gaussian_init(width, height, 1, Lmax, Lmin, sigma, DT_IOP_GAUSSIAN_ZERO); float *tmp = g_malloc_n((size_t)width * height, sizeof(float)); if(gauss && tmp) { #ifdef _OPENMP #pragma omp parallel for default(none) shared(tmp) schedule(static) #endif for(size_t k = 0; k < (size_t)width * height; k++) tmp[k] = ((float *)ivoid)[ch * k]; dt_gaussian_blur(gauss, tmp, tmp); /* create zonemap preview for input */ dt_pthread_mutex_lock(&g->lock); #ifdef _OPENMP #pragma omp parallel for default(none) shared(tmp, g) schedule(static) #endif for(size_t k = 0; k < (size_t)width * height; k++) { g->in_preview_buffer[k] = CLAMPS(tmp[k] * (size - 1) / 100.0f, 0, size - 2); } dt_pthread_mutex_unlock(&g->lock); #ifdef _OPENMP #pragma omp parallel for default(none) shared(tmp) schedule(static) #endif for(size_t k = 0; k < (size_t)width * height; k++) tmp[k] = ((float *)ovoid)[ch * k]; dt_gaussian_blur(gauss, tmp, tmp); /* create zonemap preview for output */ dt_pthread_mutex_lock(&g->lock); #ifdef _OPENMP #pragma omp parallel for default(none) shared(tmp, g) schedule(static) #endif for(size_t k = 0; k < (size_t)width * height; k++) { g->out_preview_buffer[k] = CLAMPS(tmp[k] * (size - 1) / 100.0f, 0, size - 2); } dt_pthread_mutex_unlock(&g->lock); } g_free(tmp); if(gauss) dt_gaussian_free(gauss); } }
// the basis of how the following algorithm works comes from rawtherapee (http://rawtherapee.com/) // defringe -- thanks to Emil Martinec <*****@*****.**> for that // quite some modifications were done though: // 1. use a fibonacci lattice instead of full window, to speed things up // 2. option for local averaging or static (RT used the global/region one) // 3. additional condition to reduce sharp edged artifacts, by blurring pixels near pixels over threshold, // this really helps improving the filter with thick fringes // ----------------------------------------------------------------------------------------- // in the following you will also see some more "magic numbers", // most are chosen arbitrarily and/or by experiment/trial+error ... I am sorry ;-) // and having everything user-defineable would be just too much // ----------------------------------------------------------------------------------------- void process(struct dt_iop_module_t *module, dt_dev_pixelpipe_iop_t *piece, void *i, void *o, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_defringe_data_t *d = (dt_iop_defringe_data_t *)piece->data; assert(dt_iop_module_colorspace(module) == iop_cs_Lab); const int order = 1; // 0,1,2 const float sigma = fmax(0.1f, fabs(d->radius)) * roi_in->scale / piece->iscale; const float Labmax[] = { 100.0f, 128.0f, 128.0f, 1.0f }; const float Labmin[] = { 0.0f, -128.0f, -128.0f, 0.0f }; const int ch = piece->colors; const int radius = ceil(2.0 * ceilf(sigma)); // save the fibonacci lattices in them later int *xy_avg = NULL; int *xy_artifact = NULL; int *xy_small = NULL; if(roi_out->width < 2 * radius + 1 || roi_out->height < 2 * radius + 1) goto ERROR_EXIT; float avg_edge_chroma = 0.0; float *const in = (float *const)i; float *const out = (float *const)o; int width = roi_in->width; int height = roi_in->height; dt_gaussian_t *gauss = NULL; gauss = dt_gaussian_init(width, height, ch, Labmax, Labmin, sigma, order); if(!gauss) { fprintf(stderr, "Error allocating memory for gaussian blur in: defringe module\n"); goto ERROR_EXIT; } dt_gaussian_blur(gauss, in, out); dt_gaussian_free(gauss); // Pre-Compute Fibonacci Lattices int *tmp; int samples_wish = radius * radius; int sampleidx_avg; // select samples by fibonacci number if(samples_wish > 89) { sampleidx_avg = 12; // 144 samples } else if(samples_wish > 55) { sampleidx_avg = 11; // 89 samples } else if(samples_wish > 34) { sampleidx_avg = 10; // ..you get the idea } else if(samples_wish > 21) { sampleidx_avg = 9; } else if(samples_wish > 13) { sampleidx_avg = 8; } else { // don't use less than 13 samples sampleidx_avg = 7; } const int sampleidx_small = sampleidx_avg - 1; const int small_radius = MAX(radius, 3); const int avg_radius = 24 + radius * 4; const int samples_small = fib[sampleidx_small]; const int samples_avg = fib[sampleidx_avg]; // precompute all required fibonacci lattices: if((xy_avg = malloc((size_t)2 * sizeof(int) * samples_avg))) { tmp = xy_avg; for(int u = 0; u < samples_avg; u++) { int dx, dy; fib_latt(&dx, &dy, avg_radius, u, sampleidx_avg); *tmp++ = dx; *tmp++ = dy; } } else { fprintf(stderr, "Error allocating memory for fibonacci lattice in: defringe module\n"); goto ERROR_EXIT; } if((xy_small = malloc((size_t)2 * sizeof(int) * samples_small))) { tmp = xy_small; for(int u = 0; u < samples_small; u++) { int dx, dy; fib_latt(&dx, &dy, small_radius, u, sampleidx_small); *tmp++ = dx; *tmp++ = dy; } } else { fprintf(stderr, "Error allocating memory for fibonacci lattice in: defringe module\n"); goto ERROR_EXIT; } #ifdef _OPENMP #pragma omp parallel for default(none) shared(width, height, \ d) reduction(+ : avg_edge_chroma) schedule(static) #endif for(int v = 0; v < height; v++) { for(int t = 0; t < width; t++) { // edge-detect on color channels // method: difference of original to gaussian blurred image: float a = in[(size_t)v * width * ch + t * ch + 1] - out[(size_t)v * width * ch + t * ch + 1]; float b = in[(size_t)v * width * ch + t * ch + 2] - out[(size_t)v * width * ch + t * ch + 2]; float edge = (a * a + b * b); // range up to 2*(256)^2 -> approx. 0 to 131072 // save local edge chroma in out[.. +3] , this is later compared with threshold out[(size_t)v * width * ch + t * ch + 3] = edge; // the average chroma of the edge-layer in the roi if(MODE_GLOBAL_AVERAGE == d->op_mode) avg_edge_chroma += edge; } } float thresh; if(MODE_GLOBAL_AVERAGE == d->op_mode) { avg_edge_chroma = avg_edge_chroma / (width * height) + 10.0 * FLT_EPSILON; thresh = fmax(0.1f, 4.0 * d->thresh * avg_edge_chroma / MAGIC_THRESHOLD_COEFF); } else { // this fixed value will later be changed when doing local averaging, or kept as-is in "static" mode avg_edge_chroma = MAGIC_THRESHOLD_COEFF; thresh = fmax(0.1f, d->thresh); } #ifdef _OPENMP // dynamically/guided scheduled due to possible uneven edge-chroma distribution (thanks to rawtherapee code // for this hint!) #pragma omp parallel for default(none) shared(width, height, d, xy_small, xy_avg, xy_artifact) \ firstprivate(thresh, avg_edge_chroma) schedule(guided, 32) #endif for(int v = 0; v < height; v++) { for(int t = 0; t < width; t++) { float local_thresh = thresh; // think of compiler setting "-funswitch-loops" to maybe improve these things: if(MODE_LOCAL_AVERAGE == d->op_mode && out[(size_t)v * width * ch + t * ch + 3] > thresh) { float local_avg = 0.0; // use some and not all values from the neigbourhood to speed things up: const int *tmp = xy_avg; for(int u = 0; u < samples_avg; u++) { int dx = *tmp++; int dy = *tmp++; int x = MAX(0, MIN(width - 1, t + dx)); int y = MAX(0, MIN(height - 1, v + dy)); local_avg += out[(size_t)y * width * ch + x * ch + 3]; } avg_edge_chroma = fmax(0.01f, (float)local_avg / samples_avg); local_thresh = fmax(0.1f, 4.0 * d->thresh * avg_edge_chroma / MAGIC_THRESHOLD_COEFF); } if(out[(size_t)v * width * ch + t * ch + 3] > local_thresh // reduces artifacts ("region growing by 1 pixel"): || out[(size_t)MAX(0, (v - 1)) * width * ch + MAX(0, (t - 1)) * ch + 3] > local_thresh || out[(size_t)MAX(0, (v - 1)) * width * ch + t * ch + 3] > local_thresh || out[(size_t)MAX(0, (v - 1)) * width * ch + MIN(width - 1, (t + 1)) * ch + 3] > local_thresh || out[(size_t)v * width * ch + MAX(0, (t - 1)) * ch + 3] > local_thresh || out[(size_t)v * width * ch + MIN(width - 1, (t + 1)) * ch + 3] > local_thresh || out[(size_t)MIN(height - 1, (v + 1)) * width * ch + MAX(0, (t - 1)) * ch + 3] > local_thresh || out[(size_t)MIN(height - 1, (v + 1)) * width * ch + t * ch + 3] > local_thresh || out[(size_t)MIN(height - 1, (v + 1)) * width * ch + MIN(width - 1, (t + 1)) * ch + 3] > local_thresh) { float atot = 0, btot = 0; float norm = 0; float weight; // it seems better to use only some pixels from a larger window instead of all pixels from a smaller // window // we use a fibonacci lattice for that, samples amount need to be a fibonacci number, this can then be // scaled to // a certain radius // use some neighbourhood pixels for lowest chroma average const int *tmp = xy_small; for(int u = 0; u < samples_small; u++) { int dx = *tmp++; int dy = *tmp++; int x = MAX(0, MIN(width - 1, t + dx)); int y = MAX(0, MIN(height - 1, v + dy)); // inverse chroma weighted average of neigbouring pixels inside window // also taking average edge chromaticity into account (either global or local average) weight = 1.0 / (out[(size_t)y * width * ch + x * ch + 3] + avg_edge_chroma); atot += weight * in[(size_t)y * width * ch + x * ch + 1]; btot += weight * in[(size_t)y * width * ch + x * ch + 2]; norm += weight; } // here we could try using a "balance" between original and changed value, this could be used to // reduce artifcats // but on first tries, results weren't very convincing, and there are blend settings available anyway // in dt // float balance = (out[v*width*ch +t*ch +3]-thresh)/out[v*width*ch +t*ch +3]; double a = (atot / norm); // *balance + in[v*width*ch + t*ch +1]*(1.0-balance); double b = (btot / norm); // *balance + in[v*width*ch + t*ch +2]*(1.0-balance); // if (a < -128.0 || a > 127.0) CLIP(a,-128.0,127.0); // if (b < -128.0 || b > 127.0) CLIP(b,-128.0,127.0); out[(size_t)v * width * ch + t * ch + 1] = a; out[(size_t)v * width * ch + t * ch + 2] = b; } else { out[(size_t)v * width * ch + t * ch + 1] = in[(size_t)v * width * ch + t * ch + 1]; out[(size_t)v * width * ch + t * ch + 2] = in[(size_t)v * width * ch + t * ch + 2]; } out[(size_t)v * width * ch + t * ch] = in[(size_t)v * width * ch + t * ch]; } } if(piece->pipe->mask_display) dt_iop_alpha_copy(i, o, roi_out->width, roi_out->height); goto FINISH_PROCESS; ERROR_EXIT: memcpy(o, i, (size_t)sizeof(float) * ch * roi_out->width * roi_out->height); FINISH_PROCESS: free(xy_artifact); free(xy_small); free(xy_avg); }