int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_nlmeans_params_t *d = (dt_iop_nlmeans_params_t *)piece->data; dt_iop_nlmeans_global_data_t *gd = (dt_iop_nlmeans_global_data_t *)self->data; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; cl_mem dev_U4 = NULL; cl_int err = -999; const int P = ceilf(d->radius * roi_in->scale / piece->iscale); // pixel filter size const int K = ceilf(7 * roi_in->scale / piece->iscale); // nbhood const float sharpness = 100000.0f/(1.0f+d->strength); if(P < 1) { size_t origin[] = { 0, 0, 0}; size_t region[] = { width, height, 1}; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if (err != CL_SUCCESS) goto error; return TRUE; } float max_L = 120.0f, max_C = 512.0f; float nL = 1.0f/max_L, nC = 1.0f/max_C; float nL2 = nL*nL, nC2 = nC*nC; //float weight[4] = { powf(d->luma, 0.6), powf(d->chroma, 0.6), powf(d->chroma, 0.6), 1.0f }; float weight[4] = { d->luma, d->chroma, d->chroma, 1.0f }; dev_U4 = dt_opencl_alloc_device(devid, roi_out->width, roi_out->height, sizeof(float)); if (dev_U4 == NULL) goto error; // prepare local work group size_t maxsizes[3] = { 0 }; // the maximum dimensions for a work group size_t workgroupsize = 0; // the maximum number of items in a work group unsigned long localmemsize = 0; // the maximum amount of local memory we can use size_t kernelworkgroupsize = 0; // the maximum amount of items in work group of the kernel // assuming this is the same for nlmeans_horiz and nlmeans_vert // make sure blocksize is not too large int blocksize = BLOCKSIZE; if(dt_opencl_get_work_group_limits(devid, maxsizes, &workgroupsize, &localmemsize) == CL_SUCCESS && dt_opencl_get_kernel_work_group_size(devid, gd->kernel_nlmeans_horiz, &kernelworkgroupsize) == CL_SUCCESS) { // reduce blocksize step by step until it fits to limits while(blocksize > maxsizes[0] || blocksize > maxsizes[1] || blocksize > kernelworkgroupsize || blocksize > workgroupsize || (blocksize+2*P)*sizeof(float) > localmemsize) { if(blocksize == 1) break; blocksize >>= 1; } } else {
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_sharpen_data_t *d = (dt_iop_sharpen_data_t *)piece->data; dt_iop_sharpen_global_data_t *gd = (dt_iop_sharpen_global_data_t *)self->data; cl_mem dev_m = NULL; cl_mem dev_tmp = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const int rad = MIN(MAXR, ceilf(d->radius * roi_in->scale / piece->iscale)); const int wd = 2*rad+1; float mat[wd]; if(rad == 0) { size_t origin[] = {0, 0, 0}; size_t region[] = {width, height, 1}; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if (err != CL_SUCCESS) goto error; return TRUE; } // init gaussian kernel float *m = mat + rad; const float sigma2 = (1.0f/(2.5*2.5))*(d->radius*roi_in->scale/piece->iscale)*(d->radius*roi_in->scale/piece->iscale); float weight = 0.0f; for(int l=-rad; l<=rad; l++) weight += m[l] = expf(- (l*l)/(2.f*sigma2)); for(int l=-rad; l<=rad; l++) m[l] /= weight; // prepare local work group size_t maxsizes[3] = { 0 }; // the maximum dimensions for a work group size_t workgroupsize = 0; // the maximum number of items in a work group unsigned long localmemsize = 0; // the maximum amount of local memory we can use size_t kernelworkgroupsize = 0; // the maximum amount of items in work group for this kernel // make sure blocksize is not too large int blocksize = BLOCKSIZE; if(dt_opencl_get_work_group_limits(devid, maxsizes, &workgroupsize, &localmemsize) == CL_SUCCESS && dt_opencl_get_kernel_work_group_size(devid, gd->kernel_sharpen_hblur, &kernelworkgroupsize) == CL_SUCCESS) { // reduce blocksize step by step until it fits to limits while(blocksize > maxsizes[0] || blocksize > maxsizes[1] || blocksize > kernelworkgroupsize || blocksize > workgroupsize || (blocksize+2*rad)*sizeof(float) > localmemsize) { if(blocksize == 1) break; blocksize >>= 1; } } else {
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_develop_t *dev = self->dev; dt_iop_overexposed_global_data_t *gd = (dt_iop_overexposed_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const float lower = dev->overexposed.lower / 100.0f; const float upper = dev->overexposed.upper / 100.0f; const int colorscheme = dev->overexposed.colorscheme; const float *upper_color = dt_iop_overexposed_colors[colorscheme][0]; const float *lower_color = dt_iop_overexposed_colors[colorscheme][1]; if(!dev->overexposed.enabled || !dev->gui_attached) { size_t origin[] = { 0, 0, 0}; size_t region[] = { width, height, 1}; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if (err != CL_SUCCESS) goto error; return TRUE; } size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 4, sizeof(float), &lower); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 5, sizeof(float), &upper); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 6, 4*sizeof(float), lower_color); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 7, 4*sizeof(float), upper_color); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_overexposed, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_overexposed] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t origin[] = { 0, 0, 0 }; size_t region[] = { width, height, 1 }; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_mask_manage] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_borders_data_t *d = (dt_iop_borders_data_t *)piece->data; dt_iop_borders_global_data_t *gd = (dt_iop_borders_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const int bw = (piece->buf_out.width - piece->buf_in.width ) * roi_in->scale; const int bh = (piece->buf_out.height - piece->buf_in.height) * roi_in->scale; const int bx = MAX(bw/2 - roi_out->x, 0); const int by = MAX(bh/2 - roi_out->y, 0); const float col[4] = {d->color[0], d->color[1], d->color[2], 1.0f}; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, 4*sizeof(float), &col); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; size_t iorigin[] = { 0, 0, 0}; size_t oorigin[] = { bx, by, 0}; size_t region[] = { roi_in->width, roi_in->height, 1}; // copy original input from dev_in -> dev_out as starting point err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, iorigin, oorigin, region); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_borders] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_nlmeans_params_t *d = (dt_iop_nlmeans_params_t *)piece->data; dt_iop_nlmeans_global_data_t *gd = (dt_iop_nlmeans_global_data_t *)self->data; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; cl_int err = -999; const int P = ceilf(3 * roi_in->scale / piece->iscale); // pixel filter size const int K = ceilf(7 * roi_in->scale / piece->iscale); // nbhood if(P <= 1) { size_t origin[] = { 0, 0, 0}; size_t region[] = { width, height, 1}; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if (err != CL_SUCCESS) goto error; return TRUE; } float max_L = 100.0f, max_C = 256.0f; float nL = 1.0f/(d->luma*max_L), nC = 1.0f/(d->chroma*max_C); nL *= nL; nC *= nC; size_t sizes[] = { ROUNDUP(width, 4), ROUNDUP(height, 4), 1}; dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 4, sizeof(int32_t), (void *)&P); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 5, sizeof(int32_t), (void *)&K); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 6, sizeof(float), (void *)&nL); dt_opencl_set_kernel_arg(devid, gd->kernel_nlmeans, 7, sizeof(float), (void *)&nC); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_nlmeans, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_nlmeans] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_borders_data_t *d = (dt_iop_borders_data_t *)piece->data; dt_iop_borders_global_data_t *gd = (dt_iop_borders_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const int border_tot_width = (piece->buf_out.width - piece->buf_in.width ) * roi_in->scale; const int border_tot_height = (piece->buf_out.height - piece->buf_in.height) * roi_in->scale; const int border_size_t = border_tot_height*d->pos_v; const int border_size_b = border_tot_height - border_size_t; const int border_size_l = border_tot_width*d->pos_h; const int border_size_r = border_tot_width - border_size_l; const int border_in_x = MAX(border_size_l - roi_out->x, 0); const int border_in_y = MAX(border_size_t - roi_out->y, 0); // ----- Filling border const float col[4] = {d->color[0], d->color[1], d->color[2], 1.0f}; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; const int zero = 0; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &zero); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &zero); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 4, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 5, 4*sizeof(float), &col); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; // ----- Frame line const int border_min_size = MIN(MIN(border_size_t, border_size_b), MIN(border_size_l, border_size_r)); const int frame_size = border_min_size * d->frame_size; if (frame_size != 0) { const float col_frame[4] = {d->frame_color[0], d->frame_color[1], d->frame_color[2], 1.0f}; const int image_lx = border_size_l - roi_out->x; const int image_ty = border_size_t - roi_out->y; const int frame_space = border_min_size - frame_size; const int frame_offset = frame_space * d->frame_offset; const int frame_tl_in_x = MAX(border_in_x - frame_offset, 0); const int frame_tl_out_x = MAX(frame_tl_in_x - frame_size, 0); const int frame_tl_in_y = MAX(border_in_y - frame_offset, 0); const int frame_tl_out_y = MAX(frame_tl_in_y - frame_size, 0); const int frame_in_width = floor((piece->buf_in.width * roi_in->scale) + frame_offset*2); const int frame_in_height = floor((piece->buf_in.height * roi_in->scale) + frame_offset*2); const int frame_out_width = frame_in_width + frame_size*2; const int frame_out_height = frame_in_height + frame_size*2; const int frame_br_in_x = CLAMP(image_lx - frame_offset + frame_in_width, 0, roi_out->width); const int frame_br_in_y = CLAMP(image_ty - frame_offset + frame_in_height, 0, roi_out->height); // ... if 100% frame_offset we ensure frame_line "stick" the out border const int frame_br_out_x = (d->frame_offset == 1.0f) ? (roi_out->width) : CLAMP(image_lx - frame_offset - frame_size + frame_out_width, 0, roi_out->width); const int frame_br_out_y = (d->frame_offset == 1.0f) ? (roi_out->height) : CLAMP(image_ty - frame_offset - frame_size + frame_out_height, 0, roi_out->height); const int roi_frame_in_width = frame_br_in_x - frame_tl_in_x; const int roi_frame_in_height = frame_br_in_y - frame_tl_in_y; const int roi_frame_out_width = frame_br_out_x - frame_tl_out_x; const int roi_frame_out_height = frame_br_out_y - frame_tl_out_y; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &frame_tl_out_x); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &frame_tl_out_y); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, sizeof(int), &roi_frame_out_width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 4, sizeof(int), &roi_frame_out_height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 5, 4*sizeof(float), &col_frame); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &frame_tl_in_x); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &frame_tl_in_y); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, sizeof(int), &roi_frame_in_width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 4, sizeof(int), &roi_frame_in_height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 5, 4*sizeof(float), &col); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; } size_t iorigin[] = { 0, 0, 0}; size_t oorigin[] = { border_in_x, border_in_y, 0}; size_t region[] = { roi_in->width, roi_in->height, 1}; // copy original input from dev_in -> dev_out as starting point err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, iorigin, oorigin, region); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_borders] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_colorout_data_t *d = (dt_iop_colorout_data_t *)piece->data; dt_iop_colorout_global_data_t *gd = (dt_iop_colorout_global_data_t *)self->data; cl_mem dev_m = NULL, dev_r = NULL, dev_g = NULL, dev_b = NULL, dev_coeffs = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; if(d->type == DT_COLORSPACE_LAB) { size_t origin[] = { 0, 0, 0 }; size_t region[] = { roi_in->width, roi_in->height, 1 }; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if(err != CL_SUCCESS) goto error; return TRUE; } size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dev_m = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 9, d->cmatrix); if(dev_m == NULL) goto error; dev_r = dt_opencl_copy_host_to_device(devid, d->lut[0], 256, 256, sizeof(float)); if(dev_r == NULL) goto error; dev_g = dt_opencl_copy_host_to_device(devid, d->lut[1], 256, 256, sizeof(float)); if(dev_g == NULL) goto error; dev_b = dt_opencl_copy_host_to_device(devid, d->lut[2], 256, 256, sizeof(float)); if(dev_b == NULL) goto error; dev_coeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 3 * 3, (float *)d->unbounded_coeffs); if(dev_coeffs == NULL) goto error; dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 4, sizeof(cl_mem), (void *)&dev_m); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 5, sizeof(cl_mem), (void *)&dev_r); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 6, sizeof(cl_mem), (void *)&dev_g); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 7, sizeof(cl_mem), (void *)&dev_b); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 8, sizeof(cl_mem), (void *)&dev_coeffs); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorout, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_m); dt_opencl_release_mem_object(dev_r); dt_opencl_release_mem_object(dev_g); dt_opencl_release_mem_object(dev_b); dt_opencl_release_mem_object(dev_coeffs); return TRUE; error: dt_opencl_release_mem_object(dev_m); dt_opencl_release_mem_object(dev_r); dt_opencl_release_mem_object(dev_g); dt_opencl_release_mem_object(dev_b); dt_opencl_release_mem_object(dev_coeffs); dt_print(DT_DEBUG_OPENCL, "[opencl_colorout] couldn't enqueue kernel! %d\n", err); return FALSE; }