int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_relight_data_t *data = (dt_iop_relight_data_t *)piece->data; dt_iop_relight_global_data_t *gd = (dt_iop_relight_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const float center = data->center; const float wings = data->width; const float ev = data->ev; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 4, sizeof(float), (void *)¢er); dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 5, sizeof(float), (void *)&wings); dt_opencl_set_kernel_arg(devid, gd->kernel_relight, 6, sizeof(float), (void *)&ev); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_relight, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_relight] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data; dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; const float clip = d->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 5, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 6, sizeof(float), (void *)&d->blendL); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 7, sizeof(float), (void *)&d->blendC); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights, 8, sizeof(float), (void *)&d->blendh); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_highlights] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_exposure_data_t *d = (dt_iop_exposure_data_t *)piece->data; dt_iop_exposure_global_data_t *gd = (dt_iop_exposure_global_data_t *)self->data; commit_params_late(self, piece); cl_int err = -999; const float black = d->black; const float white = exposure2white(d->exposure); const float scale = 1.0 / (white - black); const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_exposure, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_exposure, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_exposure, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_exposure, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_exposure, 4, sizeof(float), (void *)&black); dt_opencl_set_kernel_arg(devid, gd->kernel_exposure, 5, sizeof(float), (void *)&scale); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_exposure, sizes); if(err != CL_SUCCESS) goto error; for(int k = 0; k < 3; k++) piece->pipe->processed_maximum[k] *= scale; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_exposure] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_colorcorrection_data_t *d = (dt_iop_colorcorrection_data_t *)piece->data; dt_iop_colorcorrection_global_data_t *gd = (dt_iop_colorcorrection_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 4, sizeof(float), &d->saturation); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 5, sizeof(float), &d->a_scale); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 6, sizeof(float), &d->a_base); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 7, sizeof(float), &d->b_scale); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcorrection, 8, sizeof(float), &d->b_base); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorcorrection, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_colorcorrection] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_colorcontrast_data_t *data = (dt_iop_colorcontrast_data_t *)piece->data; dt_iop_colorcontrast_global_data_t *gd = (dt_iop_colorcontrast_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; float scale[4] = { 1.0f, data->a_steepness, data->b_steepness, 1.0f }; float offset[4] = { 0.0f, data->a_offset, data->b_offset, 0.0f }; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; dt_opencl_set_kernel_arg(devid, gd->kernel_colorcontrast, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcontrast, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcontrast, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcontrast, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcontrast, 4, 4*sizeof(float), (void *)&scale); dt_opencl_set_kernel_arg(devid, gd->kernel_colorcontrast, 5, 4*sizeof(float), (void *)&offset); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorcontrast, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_colorcontrast] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_overexposed_data_t *d = (dt_iop_overexposed_data_t *)piece->data; dt_iop_overexposed_global_data_t *gd = (dt_iop_overexposed_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const float lower = d->lower / 100.0f; const float upper = d->upper / 100.0f; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 4, sizeof(float), &lower); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 5, sizeof(float), &upper); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_overexposed, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_overexposed] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_colorchecker_data_t *d = (dt_iop_colorchecker_data_t *)piece->data; dt_iop_colorchecker_global_data_t *gd = (dt_iop_colorchecker_global_data_t *)self->data; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const int num_patches = d->num_patches; cl_int err = -999; cl_mem dev_params = NULL; const size_t params_size = (size_t)(4 * (2 * num_patches + 4)) * sizeof(float); float *params = malloc(params_size); float *idx = params; // re-arrange data->source_Lab and data->coeff_{L,a,b} into float4 for(int n = 0; n < num_patches; n++, idx += 4) { idx[0] = d->source_Lab[3 * n]; idx[1] = d->source_Lab[3 * n + 1]; idx[2] = d->source_Lab[3 * n + 2]; idx[3] = 0.0f; } for(int n = 0; n < num_patches + 4; n++, idx += 4) { idx[0] = d->coeff_L[n]; idx[1] = d->coeff_a[n]; idx[2] = d->coeff_b[n]; idx[3] = 0.0f; } dev_params = dt_opencl_copy_host_to_device_constant(devid, params_size, params); if(dev_params == NULL) goto error; size_t sizes[3] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_colorchecker, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorchecker, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorchecker, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorchecker, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorchecker, 4, sizeof(int), (void *)&num_patches); dt_opencl_set_kernel_arg(devid, gd->kernel_colorchecker, 5, sizeof(cl_mem), (void *)&dev_params); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorchecker, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_params); free(params); return TRUE; error: free(params); if(dev_params != NULL) dt_opencl_release_mem_object(dev_params); dt_print(DT_DEBUG_OPENCL, "[opencl_colorchecker] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data; dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; const float clip = d->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); const int filters = dt_image_filter(&piece->pipe->image); if(dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) || !filters) { dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 5, sizeof(float), (void *)&clip); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_4f_clip, sizes); if(err != CL_SUCCESS) goto error; } else { const int kernel = (d->mode == DT_IOP_HIGHLIGHTS_LCH) ? gd->kernel_highlights_1f_lch : gd->kernel_highlights_1f_clip; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; } // update processed maximum const float m = fmaxf(fmaxf( piece->pipe->processed_maximum[0], piece->pipe->processed_maximum[1]), piece->pipe->processed_maximum[2]); for(int k=0;k<3;k++) piece->pipe->processed_maximum[k] = m; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_highlights] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_temperature_data_t *d = (dt_iop_temperature_data_t *)piece->data; dt_iop_temperature_global_data_t *gd = (dt_iop_temperature_global_data_t *)self->data; const int devid = piece->pipe->devid; const int filters = dt_image_flipped_filter(&piece->pipe->image); float coeffs[3] = {d->coeffs[0], d->coeffs[1], d->coeffs[2]}; cl_mem dev_coeffs = NULL; cl_int err = -999; int kernel = -1; if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp != 4) { kernel = gd->kernel_whitebalance_1ui; for(int k=0; k<3; k++) coeffs[k] /= 65535.0f; } else if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && filters && piece->pipe->image.bpp == 4) { kernel = gd->kernel_whitebalance_1f; } else { kernel = gd->kernel_whitebalance_4f; } dev_coeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float)*3, coeffs); if (dev_coeffs == NULL) goto error; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), (void *)&dev_coeffs); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(uint32_t), (void *)&filters); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(uint32_t), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(uint32_t), (void *)&roi_out->y); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_coeffs); for(int k=0; k<3; k++) piece->pipe->processed_maximum[k] = d->coeffs[k] * piece->pipe->processed_maximum[k]; return TRUE; error: if (dev_coeffs != NULL) dt_opencl_release_mem_object(dev_coeffs); dt_print(DT_DEBUG_OPENCL, "[opencl_white_balance] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_channelmixer_data_t *data = (dt_iop_channelmixer_data_t *)piece->data; dt_iop_channelmixer_global_data_t *gd = (dt_iop_channelmixer_global_data_t *)self->data; cl_mem dev_red = NULL; cl_mem dev_green = NULL; cl_mem dev_blue = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const int gray_mix_mode = (data->red[CHANNEL_GRAY] != 0.0f || data->green[CHANNEL_GRAY] != 0.0f || data->blue[CHANNEL_GRAY] != 0.0f) ? TRUE : FALSE; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dev_red = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * CHANNEL_SIZE, data->red); if(dev_red == NULL) goto error; dev_green = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * CHANNEL_SIZE, data->green); if(dev_green == NULL) goto error; dev_blue = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * CHANNEL_SIZE, data->blue); if(dev_blue == NULL) goto error; dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 4, sizeof(int), (void *)&gray_mix_mode); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 5, sizeof(cl_mem), (void *)&dev_red); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 6, sizeof(cl_mem), (void *)&dev_green); dt_opencl_set_kernel_arg(devid, gd->kernel_channelmixer, 7, sizeof(cl_mem), (void *)&dev_blue); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_channelmixer, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_red); dt_opencl_release_mem_object(dev_green); dt_opencl_release_mem_object(dev_blue); return TRUE; error: if(dev_red != NULL) dt_opencl_release_mem_object(dev_red); if(dev_green != NULL) dt_opencl_release_mem_object(dev_green); if(dev_blue != NULL) dt_opencl_release_mem_object(dev_blue); dt_print(DT_DEBUG_OPENCL, "[opencl_channelmixer] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_zonesystem_data_t *data = (dt_iop_zonesystem_data_t *)piece->data; dt_iop_zonesystem_global_data_t *gd = (dt_iop_zonesystem_global_data_t *)self->data; cl_mem dev_zmo, dev_zms = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; /* calculate zonemap */ const int size = data->params.size; float zonemap[MAX_ZONE_SYSTEM_SIZE] = { -1 }; float zonemap_offset[ROUNDUP(MAX_ZONE_SYSTEM_SIZE, 16)] = { -1 }; float zonemap_scale[ROUNDUP(MAX_ZONE_SYSTEM_SIZE, 16)] = { -1 }; _iop_zonesystem_calculate_zonemap(&(data->params), zonemap); /* precompute scale and offset */ for(int k = 0; k < size - 1; k++) zonemap_scale[k] = (zonemap[k + 1] - zonemap[k]) * (size - 1); for(int k = 0; k < size - 1; k++) zonemap_offset[k] = 100.0f * ((k + 1) * zonemap[k] - k * zonemap[k + 1]); dev_zmo = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * ROUNDUP(MAX_ZONE_SYSTEM_SIZE, 16), zonemap_offset); if(dev_zmo == NULL) goto error; dev_zms = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * ROUNDUP(MAX_ZONE_SYSTEM_SIZE, 16), zonemap_scale); if(dev_zms == NULL) goto error; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 4, sizeof(int), (void *)&size); dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 5, sizeof(cl_mem), (void *)&dev_zmo); dt_opencl_set_kernel_arg(devid, gd->kernel_zonesystem, 6, sizeof(cl_mem), (void *)&dev_zms); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_zonesystem, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_zmo); dt_opencl_release_mem_object(dev_zms); return TRUE; error: if(dev_zmo != NULL) dt_opencl_release_mem_object(dev_zmo); if(dev_zms != NULL) dt_opencl_release_mem_object(dev_zms); dt_print(DT_DEBUG_OPENCL, "[opencl_zonesystem] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_invert_data_t *d = (dt_iop_invert_data_t *)piece->data; dt_iop_invert_global_data_t *gd = (dt_iop_invert_global_data_t *)self->data; const int devid = piece->pipe->devid; const uint32_t filters = piece->pipe->dsc.filters; cl_mem dev_color = NULL; cl_int err = -999; int kernel = -1; float film_rgb_f[4] = { d->color[0], d->color[1], d->color[2], d->color[3] }; if(filters) { kernel = gd->kernel_invert_1f; const float *const m = piece->pipe->dsc.processed_maximum; for(int c = 0; c < 4; c++) film_rgb_f[c] *= m[c]; } else { kernel = gd->kernel_invert_4f; } dev_color = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 3, film_rgb_f); if(dev_color == NULL) goto error; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(cl_mem), (void *)&dev_color); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(uint32_t), (void *)&filters); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(uint32_t), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(uint32_t), (void *)&roi_out->y); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_color); for(int k = 0; k < 4; k++) piece->pipe->dsc.processed_maximum[k] = 1.0f; return TRUE; error: dt_opencl_release_mem_object(dev_color); dt_print(DT_DEBUG_OPENCL, "[opencl_invert] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_colorout_data_t *d = (dt_iop_colorout_data_t *)piece->data; dt_iop_colorout_global_data_t *gd = (dt_iop_colorout_global_data_t *)self->data; cl_mem dev_m = NULL, dev_r = NULL, dev_g = NULL, dev_b = NULL, dev_coeffs = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dev_m = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 9, d->cmatrix); if(dev_m == NULL) goto error; dev_r = dt_opencl_copy_host_to_device(devid, d->lut[0], 256, 256, sizeof(float)); if(dev_r == NULL) goto error; dev_g = dt_opencl_copy_host_to_device(devid, d->lut[1], 256, 256, sizeof(float)); if(dev_g == NULL) goto error; dev_b = dt_opencl_copy_host_to_device(devid, d->lut[2], 256, 256, sizeof(float)); if(dev_b == NULL) goto error; dev_coeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 3 * 3, (float *)d->unbounded_coeffs); if(dev_coeffs == NULL) goto error; dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 4, sizeof(cl_mem), (void *)&dev_m); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 5, sizeof(cl_mem), (void *)&dev_r); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 6, sizeof(cl_mem), (void *)&dev_g); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 7, sizeof(cl_mem), (void *)&dev_b); dt_opencl_set_kernel_arg(devid, gd->kernel_colorout, 8, sizeof(cl_mem), (void *)&dev_coeffs); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorout, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_m); dt_opencl_release_mem_object(dev_r); dt_opencl_release_mem_object(dev_g); dt_opencl_release_mem_object(dev_b); dt_opencl_release_mem_object(dev_coeffs); return TRUE; error: if(dev_m != NULL) dt_opencl_release_mem_object(dev_m); if(dev_r != NULL) dt_opencl_release_mem_object(dev_r); if(dev_g != NULL) dt_opencl_release_mem_object(dev_g); if(dev_b != NULL) dt_opencl_release_mem_object(dev_b); if(dev_coeffs != NULL) dt_opencl_release_mem_object(dev_coeffs); dt_print(DT_DEBUG_OPENCL, "[opencl_colorout] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_tonecurve_data_t *d = (dt_iop_tonecurve_data_t *)piece->data; dt_iop_tonecurve_global_data_t *gd = (dt_iop_tonecurve_global_data_t *)self->data; cl_mem dev_L, dev_a, dev_b = NULL; cl_mem dev_coeffs = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const int autoscale_ab = d->autoscale_ab; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; dev_L = dt_opencl_copy_host_to_device(devid, d->table[ch_L], 256, 256, sizeof(float)); dev_a = dt_opencl_copy_host_to_device(devid, d->table[ch_a], 256, 256, sizeof(float)); dev_b = dt_opencl_copy_host_to_device(devid, d->table[ch_b], 256, 256, sizeof(float)); if (dev_L == NULL || dev_a == NULL || dev_b == NULL) goto error; dev_coeffs = dt_opencl_copy_host_to_device_constant(devid, sizeof(float)*3, d->unbounded_coeffs); if (dev_coeffs == NULL) goto error; dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 4, sizeof(cl_mem), (void *)&dev_L); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 5, sizeof(cl_mem), (void *)&dev_a); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 6, sizeof(cl_mem), (void *)&dev_b); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 7, sizeof(int), (void *)&autoscale_ab); dt_opencl_set_kernel_arg(devid, gd->kernel_tonecurve, 8, sizeof(cl_mem), (void *)&dev_coeffs); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_tonecurve, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_L); dt_opencl_release_mem_object(dev_a); dt_opencl_release_mem_object(dev_b); dt_opencl_release_mem_object(dev_coeffs); return TRUE; error: if (dev_L != NULL) dt_opencl_release_mem_object(dev_L); if (dev_a != NULL) dt_opencl_release_mem_object(dev_a); if (dev_b != NULL) dt_opencl_release_mem_object(dev_b); if (dev_coeffs != NULL) dt_opencl_release_mem_object(dev_coeffs); dt_print(DT_DEBUG_OPENCL, "[opencl_tonecurve] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_colorize_data_t *data = (dt_iop_colorize_data_t *)piece->data; dt_iop_colorize_global_data_t *gd = (dt_iop_colorize_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; /* create Lab */ float rgb[3]={0}, XYZ[3]={0}, Lab[3]={0}; hsl2rgb(rgb,data->hue, data->saturation, data->lightness/100.0); XYZ[0] = (rgb[0] * 0.5767309) + (rgb[1] * 0.1855540) + (rgb[2] * 0.1881852); XYZ[1] = (rgb[0] * 0.2973769) + (rgb[1] * 0.6273491) + (rgb[2] * 0.0752741); XYZ[2] = (rgb[0] * 0.0270343) + (rgb[1] * 0.0706872) + (rgb[2] * 0.9911085); dt_XYZ_to_Lab(XYZ,Lab); /* a/b components */ const float L = Lab[0]; const float a = Lab[1]; const float b = Lab[2]; const float mix = data->source_lightness_mix/100.0f; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 4, sizeof(float), (void *)&mix); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 5, sizeof(float), (void *)&L); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 6, sizeof(float), (void *)&a); dt_opencl_set_kernel_arg(devid, gd->kernel_colorize, 7, sizeof(float), (void *)&b); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorize, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_colorize] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_develop_t *dev = self->dev; dt_iop_overexposed_global_data_t *gd = (dt_iop_overexposed_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const float lower = dev->overexposed.lower / 100.0f; const float upper = dev->overexposed.upper / 100.0f; const int colorscheme = dev->overexposed.colorscheme; const float *upper_color = dt_iop_overexposed_colors[colorscheme][0]; const float *lower_color = dt_iop_overexposed_colors[colorscheme][1]; if(!dev->overexposed.enabled || !dev->gui_attached) { size_t origin[] = { 0, 0, 0}; size_t region[] = { width, height, 1}; err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, origin, origin, region); if (err != CL_SUCCESS) goto error; return TRUE; } size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 4, sizeof(float), &lower); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 5, sizeof(float), &upper); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 6, 4*sizeof(float), lower_color); dt_opencl_set_kernel_arg(devid, gd->kernel_overexposed, 7, 4*sizeof(float), upper_color); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_overexposed, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_overexposed] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data; dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1}; const float clip = d->clip * fminf(piece->pipe->processed_maximum[0], fminf(piece->pipe->processed_maximum[1], piece->pipe->processed_maximum[2])); const int filters = dt_image_flipped_filter(&piece->pipe->image); if(piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW || !filters) { dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f, 5, sizeof(float), (void *)&clip); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_4f, sizes); if(err != CL_SUCCESS) goto error; } else { dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 5, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 6, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 7, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f, 8, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_1f, sizes); if(err != CL_SUCCESS) goto error; } return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_highlights] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_colorbalance_data_t *d = (dt_iop_colorbalance_data_t *)piece->data; dt_iop_colorbalance_global_data_t *gd = (dt_iop_colorbalance_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const float lift[4] = { 2.0f - (d->lift[CHANNEL_RED] * d->lift[CHANNEL_FACTOR]), 2.0f - (d->lift[CHANNEL_GREEN] * d->lift[CHANNEL_FACTOR]), 2.0f - (d->lift[CHANNEL_BLUE] * d->lift[CHANNEL_FACTOR]), 0.0f }, gamma[4] = { d->gamma[CHANNEL_RED] * d->gamma[CHANNEL_FACTOR], d->gamma[CHANNEL_GREEN] * d->gamma[CHANNEL_FACTOR], d->gamma[CHANNEL_BLUE] * d->gamma[CHANNEL_FACTOR], 0.0f }, gamma_inv[4] = { (gamma[0] != 0.0f) ? 1.0f / gamma[0] : 1000000.0f, (gamma[1] != 0.0f) ? 1.0f / gamma[1] : 1000000.0f, (gamma[2] != 0.0f) ? 1.0f / gamma[2] : 1000000.0f, 0.0f }, gain[4] = { d->gain[CHANNEL_RED] * d->gain[CHANNEL_FACTOR], d->gain[CHANNEL_GREEN] * d->gain[CHANNEL_FACTOR], d->gain[CHANNEL_BLUE] * d->gain[CHANNEL_FACTOR], 0.0f }; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 4, 4 * sizeof(float), (void *)&lift); dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 5, 4 * sizeof(float), (void *)&gain); dt_opencl_set_kernel_arg(devid, gd->kernel_colorbalance, 6, 4 * sizeof(float), (void *)&gamma_inv); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_colorbalance, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_colorbalance] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_levels_data_t *d = (dt_iop_levels_data_t *)piece->data; dt_iop_levels_global_data_t *gd = (dt_iop_levels_global_data_t *)self->data; if(d->mode == LEVELS_MODE_AUTOMATIC) { commit_params_late(self, piece); } cl_mem dev_lut = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; dev_lut = dt_opencl_copy_host_to_device(devid, d->lut, 256, 256, sizeof(float)); if(dev_lut == NULL) goto error; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 4, sizeof(cl_mem), &dev_lut); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 5, sizeof(float), &d->levels[0]); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 6, sizeof(float), &d->levels[2]); dt_opencl_set_kernel_arg(devid, gd->kernel_levels, 7, sizeof(float), &d->in_inv_gamma); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_levels, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_lut); return TRUE; error: if(dev_lut != NULL) dt_opencl_release_mem_object(dev_lut); dt_print(DT_DEBUG_OPENCL, "[opencl_levels] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_borders_data_t *d = (dt_iop_borders_data_t *)piece->data; dt_iop_borders_global_data_t *gd = (dt_iop_borders_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const int bw = (piece->buf_out.width - piece->buf_in.width ) * roi_in->scale; const int bh = (piece->buf_out.height - piece->buf_in.height) * roi_in->scale; const int bx = MAX(bw/2 - roi_out->x, 0); const int by = MAX(bh/2 - roi_out->y, 0); const float col[4] = {d->color[0], d->color[1], d->color[2], 1.0f}; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, 4*sizeof(float), &col); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; size_t iorigin[] = { 0, 0, 0}; size_t oorigin[] = { bx, by, 0}; size_t region[] = { roi_in->width, roi_in->height, 1}; // copy original input from dev_in -> dev_out as starting point err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, iorigin, oorigin, region); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_borders] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_lowlight_data_t *d = (dt_iop_lowlight_data_t *)piece->data; dt_iop_lowlight_global_data_t *gd = (dt_iop_lowlight_global_data_t *)self->data; cl_mem dev_m = NULL; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; // scotopic white, blue saturated float Lab_sw[3] = { 100.0f, 0.0f, -d->blueness }; float XYZ_sw[4]; dt_Lab_to_XYZ(Lab_sw, XYZ_sw); dev_m = dt_opencl_copy_host_to_device(devid, d->lut, 256, 256, sizeof(float)); if(dev_m == NULL) goto error; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_lowlight, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_lowlight, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_lowlight, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_lowlight, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_lowlight, 4, 4 * sizeof(float), &XYZ_sw); dt_opencl_set_kernel_arg(devid, gd->kernel_lowlight, 5, sizeof(cl_mem), &dev_m); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_lowlight, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_m); return TRUE; error: dt_opencl_release_mem_object(dev_m); dt_print(DT_DEBUG_OPENCL, "[opencl_lowlight] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_splittoning_data_t *d = (dt_iop_splittoning_data_t *)piece->data; dt_iop_splittoning_global_data_t *gd = (dt_iop_splittoning_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const float compress = (d->compress/110.0)/2.0; // Dont allow 100% compression.. const float balance = d->balance; const float shadow_hue = d->shadow_hue; const float shadow_saturation = d->shadow_saturation; const float highlight_hue = d->highlight_hue; const float highlight_saturation = d->highlight_saturation; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 4, sizeof(float), &compress); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 5, sizeof(float), &balance); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 6, sizeof(float), &shadow_hue); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 7, sizeof(float), &shadow_saturation); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 8, sizeof(float), &highlight_hue); dt_opencl_set_kernel_arg(devid, gd->kernel_splittoning, 9, sizeof(float), &highlight_saturation); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_splittoning, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_splittoning] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_borders_data_t *d = (dt_iop_borders_data_t *)piece->data; dt_iop_borders_global_data_t *gd = (dt_iop_borders_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const int border_tot_width = (piece->buf_out.width - piece->buf_in.width ) * roi_in->scale; const int border_tot_height = (piece->buf_out.height - piece->buf_in.height) * roi_in->scale; const int border_size_t = border_tot_height*d->pos_v; const int border_size_b = border_tot_height - border_size_t; const int border_size_l = border_tot_width*d->pos_h; const int border_size_r = border_tot_width - border_size_l; const int border_in_x = MAX(border_size_l - roi_out->x, 0); const int border_in_y = MAX(border_size_t - roi_out->y, 0); // ----- Filling border const float col[4] = {d->color[0], d->color[1], d->color[2], 1.0f}; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; const int zero = 0; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &zero); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &zero); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 4, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 5, 4*sizeof(float), &col); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; // ----- Frame line const int border_min_size = MIN(MIN(border_size_t, border_size_b), MIN(border_size_l, border_size_r)); const int frame_size = border_min_size * d->frame_size; if (frame_size != 0) { const float col_frame[4] = {d->frame_color[0], d->frame_color[1], d->frame_color[2], 1.0f}; const int image_lx = border_size_l - roi_out->x; const int image_ty = border_size_t - roi_out->y; const int frame_space = border_min_size - frame_size; const int frame_offset = frame_space * d->frame_offset; const int frame_tl_in_x = MAX(border_in_x - frame_offset, 0); const int frame_tl_out_x = MAX(frame_tl_in_x - frame_size, 0); const int frame_tl_in_y = MAX(border_in_y - frame_offset, 0); const int frame_tl_out_y = MAX(frame_tl_in_y - frame_size, 0); const int frame_in_width = floor((piece->buf_in.width * roi_in->scale) + frame_offset*2); const int frame_in_height = floor((piece->buf_in.height * roi_in->scale) + frame_offset*2); const int frame_out_width = frame_in_width + frame_size*2; const int frame_out_height = frame_in_height + frame_size*2; const int frame_br_in_x = CLAMP(image_lx - frame_offset + frame_in_width, 0, roi_out->width); const int frame_br_in_y = CLAMP(image_ty - frame_offset + frame_in_height, 0, roi_out->height); // ... if 100% frame_offset we ensure frame_line "stick" the out border const int frame_br_out_x = (d->frame_offset == 1.0f) ? (roi_out->width) : CLAMP(image_lx - frame_offset - frame_size + frame_out_width, 0, roi_out->width); const int frame_br_out_y = (d->frame_offset == 1.0f) ? (roi_out->height) : CLAMP(image_ty - frame_offset - frame_size + frame_out_height, 0, roi_out->height); const int roi_frame_in_width = frame_br_in_x - frame_tl_in_x; const int roi_frame_in_height = frame_br_in_y - frame_tl_in_y; const int roi_frame_out_width = frame_br_out_x - frame_tl_out_x; const int roi_frame_out_height = frame_br_out_y - frame_tl_out_y; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &frame_tl_out_x); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &frame_tl_out_y); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, sizeof(int), &roi_frame_out_width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 4, sizeof(int), &roi_frame_out_height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 5, 4*sizeof(float), &col_frame); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 0, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 1, sizeof(int), &frame_tl_in_x); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 2, sizeof(int), &frame_tl_in_y); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 3, sizeof(int), &roi_frame_in_width); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 4, sizeof(int), &roi_frame_in_height); dt_opencl_set_kernel_arg(devid, gd->kernel_borders_fill, 5, 4*sizeof(float), &col); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_borders_fill, sizes); if(err != CL_SUCCESS) goto error; } size_t iorigin[] = { 0, 0, 0}; size_t oorigin[] = { border_in_x, border_in_y, 0}; size_t region[] = { roi_in->width, roi_in->height, 1}; // copy original input from dev_in -> dev_out as starting point err = dt_opencl_enqueue_copy_image(devid, dev_in, dev_out, iorigin, oorigin, region); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_borders] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data; dt_iop_global_tonemap_global_data_t *gd = (dt_iop_global_tonemap_global_data_t *)self->data; dt_iop_global_tonemap_gui_data_t *g = (dt_iop_global_tonemap_gui_data_t *)self->gui_data; dt_bilateral_cl_t *b = NULL; cl_int err = -999; cl_mem dev_m = NULL; cl_mem dev_r = NULL; float *maximum = NULL; const int devid = piece->pipe->devid; int gtkernel = -1; const int width = roi_out->width; const int height = roi_out->height; float parameters[4] = { 0.0f }; switch(d->operator) { case OPERATOR_REINHARD: gtkernel = gd->kernel_global_tonemap_reinhard; break; case OPERATOR_DRAGO: gtkernel = gd->kernel_global_tonemap_drago; break; case OPERATOR_FILMIC: gtkernel = gd->kernel_global_tonemap_filmic; break; } if(d->operator== OPERATOR_DRAGO) { const float eps = 0.0001f; float tmp_lwmax = NAN; // see comments in process() about lwmax value if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL) { dt_pthread_mutex_lock(&g->lock); const uint64_t hash = g->hash; dt_pthread_mutex_unlock(&g->lock); if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash)) dt_control_log(_("inconsistent output")); dt_pthread_mutex_lock(&g->lock); tmp_lwmax = g->lwmax; dt_pthread_mutex_unlock(&g->lock); } if(isnan(tmp_lwmax)) { dt_opencl_local_buffer_t flocopt = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1, .cellsize = sizeof(float), .overhead = 0, .sizex = 1 << 4, .sizey = 1 << 4 }; if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_first, &flocopt)) goto error; const size_t bwidth = ROUNDUP(width, flocopt.sizex); const size_t bheight = ROUNDUP(height, flocopt.sizey); const int bufsize = (bwidth / flocopt.sizex) * (bheight / flocopt.sizey); dt_opencl_local_buffer_t slocopt = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1, .cellsize = sizeof(float), .overhead = 0, .sizex = 1 << 16, .sizey = 1 }; if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_second, &slocopt)) goto error; const int reducesize = MIN(REDUCESIZE, ROUNDUP(bufsize, slocopt.sizex) / slocopt.sizex); size_t sizes[3]; size_t local[3]; dev_m = dt_opencl_alloc_device_buffer(devid, (size_t)bufsize * sizeof(float)); if(dev_m == NULL) goto error; dev_r = dt_opencl_alloc_device_buffer(devid, (size_t)reducesize * sizeof(float)); if(dev_r == NULL) goto error; sizes[0] = bwidth; sizes[1] = bheight; sizes[2] = 1; local[0] = flocopt.sizex; local[1] = flocopt.sizey; local[2] = 1; dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 1, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 2, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 3, sizeof(cl_mem), &dev_m); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 4, flocopt.sizex * flocopt.sizey * sizeof(float), NULL); err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_first, sizes, local); if(err != CL_SUCCESS) goto error; sizes[0] = reducesize * slocopt.sizex; sizes[1] = 1; sizes[2] = 1; local[0] = slocopt.sizex; local[1] = 1; local[2] = 1; dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 0, sizeof(cl_mem), &dev_m); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 1, sizeof(cl_mem), &dev_r); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 2, sizeof(int), &bufsize); dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 3, slocopt.sizex * sizeof(float), NULL); err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_second, sizes, local); if(err != CL_SUCCESS) goto error; maximum = dt_alloc_align(16, reducesize * sizeof(float)); err = dt_opencl_read_buffer_from_device(devid, (void *)maximum, dev_r, 0, (size_t)reducesize * sizeof(float), CL_TRUE); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_r); dt_opencl_release_mem_object(dev_m); dev_r = dev_m = NULL; for(int k = 1; k < reducesize; k++) { float mine = maximum[0]; float other = maximum[k]; maximum[0] = (other > mine) ? other : mine; } tmp_lwmax = MAX(eps, (maximum[0] * 0.01f)); dt_free_align(maximum); maximum = NULL; } const float lwmax = tmp_lwmax; const float ldc = d->drago.max_light * 0.01f / log10f(lwmax + 1.0f); const float bl = logf(MAX(eps, d->drago.bias)) / logf(0.5f); parameters[0] = eps; parameters[1] = ldc; parameters[2] = bl; parameters[3] = lwmax; if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW) { uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority); dt_pthread_mutex_lock(&g->lock); g->lwmax = lwmax; g->hash = hash; dt_pthread_mutex_unlock(&g->lock); } } const float scale = piece->iscale / roi_in->scale; const float sigma_r = 8.0f; // does not depend on scale const float iw = piece->buf_in.width / scale; const float ih = piece->buf_in.height / scale; const float sigma_s = fminf(iw, ih) * 0.03f; if(d->detail != 0.0f) { b = dt_bilateral_init_cl(devid, roi_in->width, roi_in->height, sigma_s, sigma_r); if(!b) goto error; // get detail from unchanged input buffer err = dt_bilateral_splat_cl(b, dev_in); if(err != CL_SUCCESS) goto error; } size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gtkernel, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gtkernel, 1, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gtkernel, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gtkernel, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gtkernel, 4, 4 * sizeof(float), ¶meters); err = dt_opencl_enqueue_kernel_2d(devid, gtkernel, sizes); if(err != CL_SUCCESS) goto error; if(d->detail != 0.0f) { err = dt_bilateral_blur_cl(b); if(err != CL_SUCCESS) goto error; // and apply it to output buffer after logscale err = dt_bilateral_slice_to_output_cl(b, dev_in, dev_out, d->detail); if(err != CL_SUCCESS) goto error; dt_bilateral_free_cl(b); } return TRUE; error: if(b) dt_bilateral_free_cl(b); dt_opencl_release_mem_object(dev_m); dt_opencl_release_mem_object(dev_r); dt_free_align(maximum); dt_print(DT_DEBUG_OPENCL, "[opencl_global_tonemap] couldn't enqueue kernel! %d\n", err); return FALSE; } #endif void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out, struct dt_develop_tiling_t *tiling) { dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data; const float scale = piece->iscale / roi_in->scale; const float iw = piece->buf_in.width / scale; const float ih = piece->buf_in.height / scale; const float sigma_s = fminf(iw, ih) * 0.03f; const float sigma_r = 8.0f; const int detail = (d->detail != 0.0f); const int width = roi_in->width; const int height = roi_in->height; const int channels = piece->colors; const size_t basebuffer = width * height * channels * sizeof(float); tiling->factor = 2.0f + (detail ? (float)dt_bilateral_memory_use2(width, height, sigma_s, sigma_r) / basebuffer : 0.0f); tiling->maxbuf = (detail ? MAX(1.0f, (float)dt_bilateral_singlebuffer_size2(width, height, sigma_s, sigma_r) / basebuffer) : 1.0f); tiling->overhead = 0; tiling->overlap = (detail ? ceilf(4 * sigma_s) : 0); tiling->xalign = 1; tiling->yalign = 1; return; } void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece) { dt_iop_global_tonemap_params_t *p = (dt_iop_global_tonemap_params_t *)p1; dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data; d->operator= p->operator; d->drago.bias = p->drago.bias; d->drago.max_light = p->drago.max_light; d->detail = p->detail; // drago needs the maximum L-value of the whole image so it must not use tiling if(d->operator == OPERATOR_DRAGO) piece->process_tiling_ready = 0; #ifdef HAVE_OPENCL if(d->detail != 0.0f) piece->process_cl_ready = (piece->process_cl_ready && !(darktable.opencl->avoid_atomics)); #endif }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data; dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data; cl_int err = -999; cl_mem dev_xtrans = NULL; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const float clip = d->clip * fminf(piece->pipe->dsc.processed_maximum[0], fminf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2])); const uint32_t filters = piece->pipe->dsc.filters; if(!filters) { // non-raw images use dedicated kernel which just clips size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 5, sizeof(float), (void *)&clip); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_4f_clip, sizes); if(err != CL_SUCCESS) goto error; } else if(d->mode == DT_IOP_HIGHLIGHTS_CLIP) { // raw images with clip mode (both bayer and xtrans) size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 7, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_1f_clip, sizes); if(err != CL_SUCCESS) goto error; } else if(d->mode == DT_IOP_HIGHLIGHTS_LCH && filters != 9u) { // bayer sensor raws with LCH mode size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 7, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_1f_lch_bayer, sizes); if(err != CL_SUCCESS) goto error; } else if(d->mode == DT_IOP_HIGHLIGHTS_LCH && filters == 9u) { // xtrans sensor raws with LCH mode int blocksizex, blocksizey; dt_opencl_local_buffer_t locopt = (dt_opencl_local_buffer_t){ .xoffset = 2 * 2, .xfactor = 1, .yoffset = 2 * 2, .yfactor = 1, .cellsize = sizeof(float), .overhead = 0, .sizex = 1 << 8, .sizey = 1 << 8 }; if(dt_opencl_local_buffer_opt(devid, gd->kernel_highlights_1f_lch_xtrans, &locopt)) { blocksizex = locopt.sizex; blocksizey = locopt.sizey; } else blocksizex = blocksizey = 1; dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(piece->pipe->dsc.xtrans), piece->pipe->dsc.xtrans); if(dev_xtrans == NULL) goto error; size_t sizes[] = { ROUNDUP(width, blocksizex), ROUNDUP(height, blocksizey), 1 }; size_t local[] = { blocksizex, blocksizey, 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 7, sizeof(cl_mem), (void *)&dev_xtrans); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_xtrans, 8, (blocksizex + 4) * (blocksizey + 4) * sizeof(float), NULL); err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_highlights_1f_lch_xtrans, sizes, local); if(err != CL_SUCCESS) goto error; } // update processed maximum const float m = fmaxf(fmaxf(piece->pipe->dsc.processed_maximum[0], piece->pipe->dsc.processed_maximum[1]), piece->pipe->dsc.processed_maximum[2]); for(int k = 0; k < 3; k++) piece->pipe->dsc.processed_maximum[k] = m; dt_opencl_release_mem_object(dev_xtrans); return TRUE; error: dt_opencl_release_mem_object(dev_xtrans); dt_print(DT_DEBUG_OPENCL, "[opencl_highlights] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl (struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { dt_iop_monochrome_data_t *d = (dt_iop_monochrome_data_t *)piece->data; dt_iop_monochrome_global_data_t *gd = (dt_iop_monochrome_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_out->width; const int height = roi_out->height; const float sigma2 = (d->size*128.0)*(d->size*128.0f); // TODO: alloc new buffer, bilat filter, and go on with that const float scale = piece->iscale/roi_in->scale; const float sigma_r = 250.0f; // does not depend on scale const float sigma_s = 20.0f / scale; const float detail = -1.0f; // bilateral base layer cl_mem dev_tmp = NULL; dev_tmp = dt_opencl_alloc_device(devid, roi_in->width, roi_in->height, 4*sizeof(float)); dt_bilateral_cl_t *b = dt_bilateral_init_cl(devid, roi_in->width, roi_in->height, sigma_s, sigma_r); if(!b) goto error; size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) }; dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 1, sizeof(cl_mem), &dev_tmp); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 2, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 3, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 4, sizeof(float), &d->a); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 5, sizeof(float), &d->b); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome_filter, 6, sizeof(float), &sigma2); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_monochrome_filter, sizes); if(err != CL_SUCCESS) goto error; err = dt_bilateral_splat_cl(b, dev_tmp); if (err != CL_SUCCESS) goto error; err = dt_bilateral_blur_cl(b); if (err != CL_SUCCESS) goto error; err = dt_bilateral_slice_cl(b, dev_tmp, dev_tmp, detail); if (err != CL_SUCCESS) goto error; dt_bilateral_free_cl(b); b = NULL; // make sure we don't do double cleanup in case the next few lines err out dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 0, sizeof(cl_mem), &dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 1, sizeof(cl_mem), &dev_tmp); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 2, sizeof(cl_mem), &dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 3, sizeof(int), &width); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 4, sizeof(int), &height); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 5, sizeof(float), &d->a); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 6, sizeof(float), &d->b); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 7, sizeof(float), &sigma2); dt_opencl_set_kernel_arg(devid, gd->kernel_monochrome, 8, sizeof(float), &d->highlights); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_monochrome, sizes); if(err != CL_SUCCESS) goto error; if (dev_tmp != NULL) dt_opencl_release_mem_object(dev_tmp); return TRUE; error: if (dev_tmp != NULL) dt_opencl_release_mem_object(dev_tmp); dt_bilateral_free_cl(b); dt_print(DT_DEBUG_OPENCL, "[opencl_monochrome] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_highlights_data_t *d = (dt_iop_highlights_data_t *)piece->data; dt_iop_highlights_global_data_t *gd = (dt_iop_highlights_global_data_t *)self->data; cl_int err = -999; cl_mem dev_xtrans = NULL; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const float clip = d->clip * fminf(piece->pipe->dsc.processed_maximum[0], fminf(piece->pipe->dsc.processed_maximum[1], piece->pipe->dsc.processed_maximum[2])); const uint32_t filters = piece->pipe->dsc.filters; if(!filters) { // non-raw images use dedicated kernel which just clips size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 4, sizeof(int), (void *)&d->mode); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_4f_clip, 5, sizeof(float), (void *)&clip); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_4f_clip, sizes); if(err != CL_SUCCESS) goto error; } else if(d->mode == DT_IOP_HIGHLIGHTS_CLIP) { // raw images with clip mode (both bayer and xtrans) size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_clip, 7, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_1f_clip, sizes); if(err != CL_SUCCESS) goto error; } else if(d->mode == DT_IOP_HIGHLIGHTS_LCH && filters != 9u) { // bayer sensor raws with LCH mode size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 4, sizeof(float), (void *)&clip); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 5, sizeof(int), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 6, sizeof(int), (void *)&roi_out->y); dt_opencl_set_kernel_arg(devid, gd->kernel_highlights_1f_lch_bayer, 7, sizeof(int), (void *)&filters); err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_1f_lch_bayer, sizes); if(err != CL_SUCCESS) goto error; } else if(d->mode == DT_IOP_HIGHLIGHTS_LCH && filters == 9u) { // xtrans sensor raws with LCH mode // we use local buffering for speed reasons; determine suited work group size size_t maxsizes[3] = { 0 }; // the maximum dimensions for a work group size_t workgroupsize = 0; // the maximum number of items in a work group unsigned long localmemsize = 0; // the maximum amount of local memory we can use size_t kernelworkgroupsize = 0; // the maximum amount of items in work group for this kernel int blocksizex = 1 << 8; int blocksizey = 1 << 8; if(dt_opencl_get_work_group_limits(devid, maxsizes, &workgroupsize, &localmemsize) == CL_SUCCESS && dt_opencl_get_kernel_work_group_size(devid, gd->kernel_highlights_1f_lch_xtrans, &kernelworkgroupsize) == CL_SUCCESS) { while(maxsizes[0] < blocksizex || maxsizes[1] < blocksizey || localmemsize < (blocksizex + 4) * (blocksizey + 4) * sizeof(float) || workgroupsize < blocksizex * blocksizey || kernelworkgroupsize < blocksizex * blocksizey) { if(blocksizex == 1 && blocksizey == 1) break; if(blocksizex > blocksizey) blocksizex >>= 1; else blocksizey >>= 1; } }
int process_cl(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_rawprepare_data_t *d = (dt_iop_rawprepare_data_t *)piece->data; dt_iop_rawprepare_global_data_t *gd = (dt_iop_rawprepare_global_data_t *)self->data; const int devid = piece->pipe->devid; cl_mem dev_sub = NULL; cl_mem dev_div = NULL; cl_int err = -999; int kernel = -1; if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters) { kernel = gd->kernel_rawprepare_1f; } else { kernel = gd->kernel_rawprepare_4f; } const float scale = roi_in->scale / piece->iscale; const int csx = (int)roundf((float)d->x * scale), csy = (int)roundf((float)d->y * scale); dev_sub = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 4, d->sub); if(dev_sub == NULL) goto error; dev_div = dt_opencl_copy_host_to_device_constant(devid, sizeof(float) * 4, d->div); if(dev_div == NULL) goto error; const int width = roi_out->width; const int height = roi_out->height; size_t sizes[] = { ROUNDUPWD(roi_in->width), ROUNDUPHT(roi_in->height), 1 }; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&(width)); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&(height)); dt_opencl_set_kernel_arg(devid, kernel, 4, sizeof(int), (void *)&csx); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(int), (void *)&csy); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(cl_mem), (void *)&dev_sub); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(cl_mem), (void *)&dev_div); dt_opencl_set_kernel_arg(devid, kernel, 8, sizeof(uint32_t), (void *)&roi_out->x); dt_opencl_set_kernel_arg(devid, kernel, 9, sizeof(uint32_t), (void *)&roi_out->y); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; dt_opencl_release_mem_object(dev_sub); dt_opencl_release_mem_object(dev_div); if(!dt_dev_pixelpipe_uses_downsampled_input(piece->pipe) && piece->pipe->filters) { piece->pipe->filters = dt_rawspeed_crop_dcraw_filters(self->dev->image_storage.filters, csx, csy); adjust_xtrans_filters(piece->pipe, csx, csy); } return TRUE; error: if(dev_sub != NULL) dt_opencl_release_mem_object(dev_sub); if(dev_div != NULL) dt_opencl_release_mem_object(dev_div); dt_print(DT_DEBUG_OPENCL, "[opencl_rawprepare] couldn't enqueue kernel! %d\n", err); return FALSE; }
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { dt_iop_graduatednd_data_t *data = (dt_iop_graduatednd_data_t *)piece->data; dt_iop_graduatednd_global_data_t *gd = (dt_iop_graduatednd_global_data_t *)self->data; cl_int err = -999; const int devid = piece->pipe->devid; const int width = roi_in->width; const int height = roi_in->height; const int ix = (roi_in->x); const int iy = (roi_in->y); const float iw = piece->buf_in.width * roi_out->scale; const float ih = piece->buf_in.height * roi_out->scale; const float hw = iw / 2.0; const float hh = ih / 2.0; const float hw_inv = 1.0 / hw; const float hh_inv = 1.0 / hh; const float v = (-data->rotation / 180) * M_PI; const float sinv = sin(v); const float cosv = cos(v); const float filter_radie = sqrt((hh * hh) + (hw * hw)) / hh; const float offset = data->offset / 100.0 * 2; const float density = data->density; #if 1 const float filter_compression = 1.0 / filter_radie / (1.0 - (0.5 + (data->compression / 100.0) * 0.9 / 2.0)) * 0.5; #else const float compression = data->compression / 100.0f; const float t = 1.0f - .8f / (.8f + compression); const float c = 1.0f + 1000.0f * powf(4.0, compression); #endif const float length_base = (sinv * (-1.0 + ix * hw_inv) - cosv * (-1.0 + iy * hh_inv) - 1.0 + offset) * filter_compression; const float length_inc_y = -cosv * hh_inv * filter_compression; const float length_inc_x = sinv * hw_inv * filter_compression; size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; int kernel = density > 0 ? gd->kernel_graduatedndp : gd->kernel_graduatedndm; dt_opencl_set_kernel_arg(devid, kernel, 0, sizeof(cl_mem), (void *)&dev_in); dt_opencl_set_kernel_arg(devid, kernel, 1, sizeof(cl_mem), (void *)&dev_out); dt_opencl_set_kernel_arg(devid, kernel, 2, sizeof(int), (void *)&width); dt_opencl_set_kernel_arg(devid, kernel, 3, sizeof(int), (void *)&height); dt_opencl_set_kernel_arg(devid, kernel, 4, 4 * sizeof(float), (void *)data->color); dt_opencl_set_kernel_arg(devid, kernel, 5, sizeof(float), (void *)&density); dt_opencl_set_kernel_arg(devid, kernel, 6, sizeof(float), (void *)&length_base); dt_opencl_set_kernel_arg(devid, kernel, 7, sizeof(float), (void *)&length_inc_x); dt_opencl_set_kernel_arg(devid, kernel, 8, sizeof(float), (void *)&length_inc_y); err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes); if(err != CL_SUCCESS) goto error; return TRUE; error: dt_print(DT_DEBUG_OPENCL, "[opencl_graduatednd] couldn't enqueue kernel! %d\n", err); return FALSE; }