/* * WARNING: unlike commit_params, which is thread safe wrt gui thread and * pipes, this function lives in the pipeline thread, and NOT thread safe! */ static void commit_params_late(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece) { dt_iop_levels_data_t *d = (dt_iop_levels_data_t *)piece->data; dt_iop_levels_gui_data_t *g = (dt_iop_levels_gui_data_t *)self->gui_data; if(d->mode == LEVELS_MODE_AUTOMATIC) { if(g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL) { dt_pthread_mutex_lock(&g->lock); const uint64_t hash = g->hash; dt_pthread_mutex_unlock(&g->lock); // note that the case 'hash == 0' on first invocation in a session implies that d->levels[] // contains NANs which initiates special handling below to avoid inconsistent results. in all // other cases we make sure that the preview pipe has left us with proper readings for // g->auto_levels[]. if data are not yet there we need to wait (with timeout). if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash)) dt_control_log(_("inconsistent output")); dt_pthread_mutex_lock(&g->lock); d->levels[0] = g->auto_levels[0]; d->levels[1] = g->auto_levels[1]; d->levels[2] = g->auto_levels[2]; dt_pthread_mutex_unlock(&g->lock); compute_lut(piece); } if(piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW || isnan(d->levels[0]) || isnan(d->levels[1]) || isnan(d->levels[2])) { dt_iop_levels_compute_levels_automatic(piece); compute_lut(piece); } if(g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW && d->mode == LEVELS_MODE_AUTOMATIC) { uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority); dt_pthread_mutex_lock(&g->lock); g->auto_levels[0] = d->levels[0]; g->auto_levels[1] = d->levels[1]; g->auto_levels[2] = d->levels[2]; g->hash = hash; dt_pthread_mutex_unlock(&g->lock); } } }
static inline void process_drago(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out, dt_iop_global_tonemap_data_t *data) { dt_iop_global_tonemap_gui_data_t *g = (dt_iop_global_tonemap_gui_data_t *)self->gui_data; float *in = (float *)ivoid; float *out = (float *)ovoid; const int ch = piece->colors; /* precalcs */ const float eps = 0.0001f; float lwmax; float tmp_lwmax = NAN; // Drago needs the absolute Lmax value of the image. In pixelpipe FULL we can not reliably get this value // as the pixelpipe might only see part of the image (region of interest). Therefore we try to get lwmax from // the PREVIEW pixelpipe which luckily stores it for us. if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL) { dt_pthread_mutex_lock(&g->lock); const uint64_t hash = g->hash; dt_pthread_mutex_unlock(&g->lock); // note that the case 'hash == 0' on first invocation in a session implies that g->lwmax // is NAN which initiates special handling below to avoid inconsistent results. in all // other cases we make sure that the preview pipe has left us with proper readings for // lwmax. if data are not yet there we need to wait (with timeout). if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash)) dt_control_log(_("inconsistent output")); dt_pthread_mutex_lock(&g->lock); tmp_lwmax = g->lwmax; dt_pthread_mutex_unlock(&g->lock); } // in all other cases we calculate lwmax here if(isnan(tmp_lwmax)) { lwmax = eps; for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++) { float *inp = in + ch * k; lwmax = fmaxf(lwmax, (inp[0] * 0.01f)); } } else { lwmax = tmp_lwmax; } // PREVIEW pixelpipe stores lwmax if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW) { uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority); dt_pthread_mutex_lock(&g->lock); g->lwmax = lwmax; g->hash = hash; dt_pthread_mutex_unlock(&g->lock); } const float ldc = data->drago.max_light * 0.01 / log10f(lwmax + 1); const float bl = logf(fmaxf(eps, data->drago.bias)) / logf(0.5); #ifdef _OPENMP #pragma omp parallel for default(none) shared(in, out, lwmax) schedule(static) #endif for(size_t k = 0; k < (size_t)roi_out->width * roi_out->height; k++) { float *inp = in + ch * k; float *outp = out + ch * k; float lw = inp[0] * 0.01f; outp[0] = 100.0f * (ldc * logf(fmaxf(eps, lw + 1.0f)) / logf(fmaxf(eps, 2.0f + (powf(lw / lwmax, bl)) * 8.0f))); outp[1] = inp[1]; outp[2] = inp[2]; } }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data; dt_iop_global_tonemap_global_data_t *gd = (dt_iop_global_tonemap_global_data_t *)self->data; dt_iop_global_tonemap_gui_data_t *g = (dt_iop_global_tonemap_gui_data_t *)self->gui_data; dt_bilateral_cl_t *b = NULL; cl_int err = -999; cl_mem dev_m = NULL; cl_mem dev_r = NULL; float *maximum = NULL; const int devid = piece->pipe->devid; int gtkernel = -1; const int width = roi_out->width; const int height = roi_out->height; float parameters[4] = { 0.0f }; switch(d->operator) { case OPERATOR_REINHARD: gtkernel = gd->kernel_global_tonemap_reinhard; break; case OPERATOR_DRAGO: gtkernel = gd->kernel_global_tonemap_drago; break; case OPERATOR_FILMIC: gtkernel = gd->kernel_global_tonemap_filmic; break; } if(d->operator== OPERATOR_DRAGO) { const float eps = 0.0001f; float tmp_lwmax = NAN; // see comments in process() about lwmax value if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL) { dt_pthread_mutex_lock(&g->lock); const uint64_t hash = g->hash; dt_pthread_mutex_unlock(&g->lock); if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash)) dt_control_log(_("inconsistent output")); dt_pthread_mutex_lock(&g->lock); tmp_lwmax = g->lwmax; dt_pthread_mutex_unlock(&g->lock); } if(isnan(tmp_lwmax)) { dt_opencl_local_buffer_t flocopt = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1, .cellsize = sizeof(float), .overhead = 0, .sizex = 1 << 4, .sizey = 1 << 4 }; if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_first, &flocopt)) goto error; const size_t bwidth = ROUNDUP(width, flocopt.sizex); const size_t bheight = ROUNDUP(height, flocopt.sizey); const int bufsize = (bwidth / flocopt.sizex) * (bheight / flocopt.sizey); dt_opencl_local_buffer_t slocopt = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1, .cellsize = sizeof(float), .overhead = 0, .sizex = 1 << 16, .sizey = 1 }; if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_second, &slocopt)) goto error; const int reducesize = MIN(REDUCESIZE, ROUNDUP(bufsize, slocopt.sizex) / slocopt.sizex); size_t sizes[3]; size_t local[3]; dev_m = dt_opencl_alloc_device_buffer(devid, (size_t)bufsize * sizeof(float)); if(dev_m == NULL) goto error; dev_r = dt_opencl_alloc_device_buffer(devid, (size_t)reducesize * sizeof(float)); if(dev_r == NULL) goto error; sizes[0] = bwidth; sizes[1] = bheight; sizes[2] = 1; local[0] = flocopt.sizex; local[1] = flocopt.sizey; local[2] = 1; dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 1, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 2, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 3, sizeof(cl_mem), &dev_m); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 4, flocopt.sizex * flocopt.sizey * sizeof(float), NULL); err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_first, sizes, local); if(err != CL_SUCCESS) goto error; sizes[0] = reducesize * slocopt.sizex; sizes[1] = 1; sizes[2] = 1; local[0] = slocopt.sizex; local[1] = 1; local[2] = 1; dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 0, sizeof(cl_mem), &dev_m); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 1, sizeof(cl_mem), &dev_r); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 2, sizeof(int), &bufsize); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 3, slocopt.sizex * sizeof(float), NULL); err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_second, sizes, local); if(err != CL_SUCCESS) goto error; maximum = dt_alloc_align(16, reducesize * sizeof(float)); err = dt_opencl_read_buffer_from_device(devid, (void *)maximum, dev_r, 0, (size_t)reducesize * sizeof(float), CL_TRUE); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_r); dt_opencl_release_mem_object(dev_m); dev_r = dev_m = NULL; for(int k = 1; k < reducesize; k++) { float mine = maximum[0]; float other = maximum[k]; maximum[0] = (other > mine) ? other : mine; } tmp_lwmax = MAX(eps, (maximum[0] * 0.01f)); dt_free_align(maximum); maximum = NULL; } const float lwmax = tmp_lwmax; const float ldc = d->drago.max_light * 0.01f / log10f(lwmax + 1.0f); const float bl = logf(MAX(eps, d->drago.bias)) / logf(0.5f); parameters[0] = eps; parameters[1] = ldc; parameters[2] = bl; parameters[3] = lwmax; if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW) { uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority); dt_pthread_mutex_lock(&g->lock); g->lwmax = lwmax; g->hash = hash; dt_pthread_mutex_unlock(&g->lock); } } const float scale = piece->iscale / roi_in->scale; const float sigma_r = 8.0f; // does not depend on scale const float iw = piece->buf_in.width / scale; const float ih = piece->buf_in.height / scale; const float sigma_s = fminf(iw, ih) * 0.03f; if(d->detail != 0.0f) { b = dt_bilateral_init_cl(devid, roi_in->width, roi_in->height, sigma_s, sigma_r); if(!b) goto error; // get detail from unchanged input buffer err = dt_bilateral_splat_cl(b, dev_in); if(err != CL_SUCCESS) goto error; } size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gtkernel, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gtkernel, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gtkernel, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gtkernel, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gtkernel, 4, 4 * sizeof(float), ¶meters); err = dt_opencl_enqueue_kernel_2d(devid, gtkernel, sizes); if(err != CL_SUCCESS) goto error; if(d->detail != 0.0f) { err = dt_bilateral_blur_cl(b); if(err != CL_SUCCESS) goto error; // and apply it to output buffer after logscale err = dt_bilateral_slice_to_output_cl(b, dev_in, dev_out, d->detail); if(err != CL_SUCCESS) goto error; dt_bilateral_free_cl(b); } return TRUE; error: if(b) dt_bilateral_free_cl(b); dt_opencl_release_mem_object(dev_m); dt_opencl_release_mem_object(dev_r); dt_free_align(maximum); dt_print(DT_DEBUG_OPENCL, "[opencl_global_tonemap] couldn't enqueue kernel! %d\n", err); return FALSE; } #endif void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out, struct dt_develop_tiling_t *tiling) { dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data; const float scale = piece->iscale / roi_in->scale; const float iw = piece->buf_in.width / scale; const float ih = piece->buf_in.height / scale; const float sigma_s = fminf(iw, ih) * 0.03f; const float sigma_r = 8.0f; const int detail = (d->detail != 0.0f); const int width = roi_in->width; const int height = roi_in->height; const int channels = piece->colors; const size_t basebuffer = width * height * channels * sizeof(float); tiling->factor = 2.0f + (detail ? (float)dt_bilateral_memory_use2(width, height, sigma_s, sigma_r) / basebuffer : 0.0f); tiling->maxbuf = (detail ? MAX(1.0f, (float)dt_bilateral_singlebuffer_size2(width, height, sigma_s, sigma_r) / basebuffer) : 1.0f); tiling->overhead = 0; tiling->overlap = (detail ? ceilf(4 * sigma_s) : 0); tiling->xalign = 1; tiling->yalign = 1; return; } void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece) { dt_iop_global_tonemap_params_t *p = (dt_iop_global_tonemap_params_t *)p1; dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data; d->operator= p->operator; d->drago.bias = p->drago.bias; d->drago.max_light = p->drago.max_light; d->detail = p->detail; // drago needs the maximum L-value of the whole image so it must not use tiling if(d->operator == OPERATOR_DRAGO) piece->process_tiling_ready = 0; #ifdef HAVE_OPENCL if(d->detail != 0.0f) piece->process_cl_ready = (piece->process_cl_ready && !(darktable.opencl->avoid_atomics)); #endif }
void process_sse2(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, const void *const i, void *const o, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { // this is called for preview and full pipe separately, each with its own pixelpipe piece. // get our data struct: dt_iop_bilat_data_t *d = (dt_iop_bilat_data_t *)piece->data; dt_iop_bilat_gui_data_t *g = self->gui_data; // the total scale is composed of scale before input to the pipeline (iscale), // and the scale of the roi. const float scale = piece->iscale / roi_in->scale; const float sigma_r = d->sigma_r; // does not depend on scale const float sigma_s = d->sigma_s / scale; if(d->mode == s_mode_bilateral) { dt_bilateral_t *b = dt_bilateral_init(roi_in->width, roi_in->height, sigma_s, sigma_r); dt_bilateral_splat(b, (float *)i); dt_bilateral_blur(b); dt_bilateral_slice(b, (float *)i, (float *)o, d->detail); dt_bilateral_free(b); } else // s_mode_local_laplacian { local_laplacian_boundary_t b = {0}; if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW) { b.mode = 1; } else if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL) { // full pipeline working on ROI needs boundary conditions from preview pipe // only do this if roi covers less than 90% of full width if(MIN(roi_in->width/roi_in->scale / piece->buf_in.width, roi_in->height/roi_in->scale / piece->buf_in.height) < 0.9) { dt_pthread_mutex_lock(&g->lock); const uint64_t hash = g->hash; dt_pthread_mutex_unlock(&g->lock); if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash)) { // TODO: remove this debug output at some point: dt_control_log(_("local laplacian: inconsistent output")); } else { dt_pthread_mutex_lock(&g->lock); // grab preview pipe buffers here: b = g->ll_boundary; dt_pthread_mutex_unlock(&g->lock); if(b.wd > 0 && b.ht > 0) b.mode = 2; } } } b.roi = roi_in; b.buf = &piece->buf_in; // also lock the ll_boundary in case we're using it. // could get away without this if the preview pipe didn't also free the data below. const int lockit = self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL; if(lockit) { dt_pthread_mutex_lock(&g->lock); local_laplacian_sse2(i, o, roi_in->width, roi_in->height, d->midtone, d->sigma_s, d->sigma_r, d->detail, &b); dt_pthread_mutex_unlock(&g->lock); } else local_laplacian_sse2(i, o, roi_in->width, roi_in->height, d->midtone, d->sigma_s, d->sigma_r, d->detail, &b); // preview pixelpipe stores values. if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW) { uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority); dt_pthread_mutex_lock(&g->lock); // store buffer pointers on gui struct. maybe need to swap/free old ones local_laplacian_boundary_free(&g->ll_boundary); g->ll_boundary = b; g->hash = hash; dt_pthread_mutex_unlock(&g->lock); } } if(piece->pipe->mask_display & DT_DEV_PIXELPIPE_DISPLAY_MASK) dt_iop_alpha_copy(i, o, roi_in->width, roi_in->height); }