static gboolean cl_gaussian_blur (cl_mem in_tex, cl_mem out_tex, const GeglRectangle *roi, cl_mem cl_cmatrix, gint clen, GeglOrientation orientation) { cl_int cl_err = 0; size_t global_ws[2]; gint kernel_num; if (!cl_data) { const char *kernel_name[] = {"fir_ver_blur", "fir_hor_blur", NULL}; cl_data = gegl_cl_compile_and_build (gblur_1d_cl_source, kernel_name); } if (!cl_data) return TRUE; if (orientation == GEGL_ORIENTATION_VERTICAL) kernel_num = 0; else kernel_num = 1; global_ws[0] = roi->width; global_ws[1] = roi->height; cl_err = gegl_cl_set_kernel_args (cl_data->kernel[kernel_num], sizeof(cl_mem), (void*)&in_tex, sizeof(cl_mem), (void*)&out_tex, sizeof(cl_mem), (void*)&cl_cmatrix, sizeof(cl_int), (void*)&clen, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[kernel_num], 2, NULL, global_ws, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; return FALSE; error: return TRUE; }
static gboolean cl_process (GeglOperation *operation, cl_mem in_buf, cl_mem out_buf, const size_t n_pixels, const GeglRectangle *roi, gint level) { GeglOperationClass *operation_class = GEGL_OPERATION_GET_CLASS (operation); GeglClRunData *cl_data = operation_class->cl_data; GeglProperties *o = GEGL_PROPERTIES (operation); const size_t gbl_size[2] = {roi->width, roi->height}; const size_t gbl_off[2] = {roi->x, roi->y}; cl_int cl_err = 0; cl_mem filter_pat = NULL; if (!cl_data) goto error; filter_pat = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, pattern_width[o->pattern] * pattern_height[o->pattern] * sizeof(cl_int), (void*)pattern[o->pattern], &cl_err); CL_CHECK; cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0], sizeof(cl_mem), &in_buf, sizeof(cl_mem), &out_buf, sizeof(cl_mem), &filter_pat, sizeof(cl_int), &pattern_width[o->pattern], sizeof(cl_int), &pattern_height[o->pattern], sizeof(cl_int), &o->additive, sizeof(cl_int), &o->rotated, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 2, gbl_off, gbl_size, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (filter_pat); CL_CHECK; return FALSE; error: if (filter_pat) gegl_clReleaseMemObject (filter_pat); return TRUE; }
static gboolean cl_process (GeglOperation *operation, cl_mem out_tex, const GeglRectangle *roi) { GeglProperties *o = GEGL_PROPERTIES (operation); const size_t gbl_size[] = { roi->width, roi->height }; cl_int cl_err = 0; cl_int offset_x; cl_int offset_y; cl_int width; cl_int height; cl_float3 sedges; cl_float3 contours; cl_float3 frequency; cl_float brightness; cl_float polarization; cl_float scattering; cl_int iterations; cl_float weird_factor; if (!cl_data) { const char *kernel_name[] = { "cl_diffraction_patterns", NULL }; cl_data = gegl_cl_compile_and_build (diffraction_patterns_cl_source, kernel_name); if (!cl_data) return TRUE; } offset_x = roi->x; offset_y = roi->y; width = o->width; height = o->height; sedges.s[0] = o->red_sedges; sedges.s[1] = o->green_sedges; sedges.s[2] = o->blue_sedges; contours.s[0] = o->red_contours; contours.s[1] = o->green_contours; contours.s[2] = o->blue_contours; frequency.s[0] = o->red_frequency; frequency.s[1] = o->green_frequency; frequency.s[2] = o->blue_frequency; brightness = o->brightness; polarization = o->polarization; scattering = o->scattering; iterations = ITERATIONS; weird_factor = WEIRD_FACTOR; cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0], sizeof(cl_mem), &out_tex, sizeof(cl_int), &offset_x, sizeof(cl_int), &offset_y, sizeof(cl_int), &width, sizeof(cl_int), &height, sizeof(cl_float3), &sedges, sizeof(cl_float3), &contours, sizeof(cl_float3), &frequency, sizeof(cl_float), &brightness, sizeof(cl_float), &polarization, sizeof(cl_float), &scattering, sizeof(cl_int), &iterations, sizeof(cl_float), &weird_factor, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 2, NULL, gbl_size, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; return FALSE; error: return TRUE; }
static gboolean cl_bilateral (cl_mem in_tex, cl_mem out_tex, const GeglRectangle *roi, const GeglRectangle *src_rect, gint s_sigma, gfloat r_sigma) { cl_int cl_err = 0; gint c; const gint width = src_rect->width; const gint height = src_rect->height; const gint sw = (width -1) / s_sigma + 1; const gint sh = (height-1) / s_sigma + 1; const gint depth = (int)(1.0f / r_sigma) + 1; size_t global_ws[2]; size_t local_ws[2]; cl_mem grid = NULL; cl_mem blur[4] = {NULL, NULL, NULL, NULL}; if (!cl_data) { const char *kernel_name[] = {"bilateral_downsample", "bilateral_blur", "bilateral_interpolate", NULL}; cl_data = gegl_cl_compile_and_build (bilateral_filter_fast_cl_source, kernel_name); } if (!cl_data) return 1; grid = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, sw * sh * depth * sizeof(cl_float8), NULL, &cl_err); CL_CHECK; for(c = 0; c < 4; c++) { blur[c] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, sw * sh * depth * sizeof(cl_float2), NULL, &cl_err); CL_CHECK; } local_ws[0] = 8; local_ws[1] = 8; global_ws[0] = ((sw + local_ws[0] - 1)/local_ws[0])*local_ws[0]; global_ws[1] = ((sh + local_ws[1] - 1)/local_ws[1])*local_ws[1]; gegl_cl_set_kernel_args (cl_data->kernel[0], sizeof(cl_mem), &in_tex, sizeof(cl_mem), &grid, sizeof(cl_int), &width, sizeof(cl_int), &height, sizeof(cl_int), &sw, sizeof(cl_int), &sh, sizeof(cl_int), &depth, sizeof(cl_int), &s_sigma, sizeof(cl_float), &r_sigma, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 2, NULL, global_ws, local_ws, 0, NULL, NULL); CL_CHECK; local_ws[0] = 16; local_ws[1] = 16; global_ws[0] = ((sw + local_ws[0] - 1)/local_ws[0])*local_ws[0]; global_ws[1] = ((sh + local_ws[1] - 1)/local_ws[1])*local_ws[1]; gegl_cl_set_kernel_args (cl_data->kernel[1], sizeof(cl_mem), &grid, sizeof(cl_mem), &blur[0], sizeof(cl_mem), &blur[1], sizeof(cl_mem), &blur[2], sizeof(cl_mem), &blur[3], sizeof(cl_int), &sw, sizeof(cl_int), &sh, sizeof(cl_int), &depth, NULL); cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[1], 2, NULL, global_ws, local_ws, 0, NULL, NULL); CL_CHECK; global_ws[0] = width; global_ws[1] = height; gegl_cl_set_kernel_args (cl_data->kernel[2], sizeof(cl_mem), &in_tex, sizeof(cl_mem), &blur[0], sizeof(cl_mem), &blur[1], sizeof(cl_mem), &blur[2], sizeof(cl_mem), &blur[3], sizeof(cl_mem), &out_tex, sizeof(cl_int), &width, sizeof(cl_int), &sw, sizeof(cl_int), &sh, sizeof(cl_int), &depth, sizeof(cl_int), &s_sigma, sizeof(cl_float), &r_sigma, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[2], 2, NULL, global_ws, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (grid); CL_CHECK_ONLY (cl_err); for(c = 0; c < 4; c++) { cl_err = gegl_clReleaseMemObject (blur[c]); CL_CHECK_ONLY (cl_err); } return FALSE; error: if (grid) gegl_clReleaseMemObject (grid); for (c = 0; c < 4; c++) { if (blur[c]) gegl_clReleaseMemObject (blur[c]); } return TRUE; }
static inline gboolean _gegl_buffer_cl_cache_flush2 (GeglTileHandlerCache *cache, const GeglRectangle *roi) { size_t size; GList *elem; GeglRectangle tmp; cl_int cl_err = 0; gpointer data; gboolean need_cl = FALSE; for (elem=cache_entries; elem; elem=elem->next) { CacheEntry *entry = elem->data; if (entry->valid && entry->tile_storage->cache == cache && (!roi || gegl_rectangle_intersect (&tmp, roi, &entry->roi))) { entry->valid = FALSE; entry->used ++; gegl_cl_color_babl (entry->buffer->soft_format, &size); data = g_malloc(entry->roi.width * entry->roi.height * size); cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(), entry->tex, CL_TRUE, 0, entry->roi.width * entry->roi.height * size, data, 0, NULL, NULL); /* tile-ize */ gegl_buffer_set (entry->buffer, &entry->roi, 0, entry->buffer->soft_format, data, GEGL_AUTO_ROWSTRIDE); entry->used --; need_cl = TRUE; g_free(data); CL_CHECK; } } if (need_cl) { cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { CacheEntry *entry = data; #if 1 GEGL_NOTE (GEGL_DEBUG_OPENCL, "Removing from cl-cache: %p %s {%d %d %d %d}", entry->buffer, babl_get_name(entry->buffer->soft_format), entry->roi.x, entry->roi.y, entry->roi.width, entry->roi.height); #endif gegl_clReleaseMemObject(entry->tex); memset (entry, 0x0, sizeof (CacheEntry)); g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); } return TRUE; error: g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); /* XXX : result is corrupted */ return FALSE; }
gboolean gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err) { GeglBufferClIterators *i = (gpointer)iterator; gboolean result = FALSE; gint no, j; cl_int cl_err = 0; if (i->is_finished) g_error ("%s called on finished buffer iterator", G_STRFUNC); if (i->iteration_no == 0) { for (no=0; no<i->iterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; j<no; j++) if (i->buffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_lock (i->buffer[no]); if (i->flags[no] == GEGL_CL_BUFFER_WRITE || (i->flags[no] == GEGL_CL_BUFFER_READ && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0))) { gegl_buffer_cl_cache_flush (i->buffer[no], &i->rect[no]); } } } } else { /* complete pending write work */ for (no=0; no<i->iterators;no++) { if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (output) */ if (i->conv[no] == GEGL_CL_COLOR_CONVERT) for (j=0; j < i->n; j++) { cl_err = gegl_cl_color_conv (i->tex_op[no][j], i->tex_buf[no][j], i->size[no][j], i->format[no], i->buffer[no]->soft_format); if (cl_err == FALSE) CL_ERROR; } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* GPU -> CPU */ for (j=0; j < i->n; j++) { gpointer data; /* tile-ize */ if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED) { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], 0, i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } else #ifdef OPENCL_USE_CACHE { gegl_buffer_cl_cache_new (i->buffer[no], &i->roi[no][j], i->tex_buf[no][j]); /* don't release this texture */ i->tex_buf[no][j] = NULL; } #else { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } #endif } } } /* Run! */ cl_err = gegl_clFinish(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; for (no=0; no < i->iterators; no++) for (j=0; j < i->n; j++) { if (i->tex_buf_from_cache [no][j]) { gboolean ok = gegl_buffer_cl_cache_release (i->tex_buf[no][j]); g_assert (ok); } if (i->tex_buf[no][j] && !i->tex_buf_from_cache [no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } } g_assert (i->iterators > 0); result = (i->roi_no >= i->rois)? FALSE : TRUE; i->n = MIN(GEGL_CL_NTEX, i->rois - i->roi_no); /* then we iterate all */ for (no=0; no<i->iterators;no++) { for (j = 0; j < i->n; j++) { GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0], i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2], i->roi_all[i->roi_no+j].width + i->area[no][0] + i->area[no][1], i->roi_all[i->roi_no+j].height + i->area[no][2] + i->area[no][3]}; i->roi [no][j] = r; i->size[no][j] = r.width * r.height; } if (i->flags[no] == GEGL_CL_BUFFER_READ) { for (j=0; j < i->n; j++) { gpointer data; /* un-tile */ switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->format[no], data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ /* get buffer data using multiple worker threads to increase bandwidth */ gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (input) */ g_assert (i->tex_buf[no][j] && i->tex_op[no][j]); cl_err = gegl_cl_color_conv (i->tex_buf[no][j], i->tex_op[no][j], i->size[no][j], i->buffer[no]->soft_format, i->format[no]); if (cl_err == FALSE) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; } else if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { for (j=0; j < i->n; j++) { switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } } else if (i->flags[no] == GEGL_CL_BUFFER_AUX) { for (j=0; j < i->n; j++) { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; } } } i->roi_no += i->n; i->iteration_no++; if (result == FALSE) { for (no=0; no<i->iterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; j<no; j++) if (i->buffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_unlock (i->buffer[no]); g_object_unref (i->buffer[no]); } } i->is_finished = TRUE; g_free (i->roi_all); g_slice_free (GeglBufferClIterators, i); } *err = FALSE; return result; error: for (no=0; no<i->iterators;no++) for (j=0; j < i->n; j++) { if (i->tex_buf[no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } *err = TRUE; return FALSE; }
static gboolean cl_process (GeglOperation *self, cl_mem in_tex, cl_mem out_tex, size_t global_worksize, const GeglRectangle *roi, gint level) { GeglChantO *o = GEGL_CHANT_PROPERTIES (self); gint num_sampling_points; gdouble *xs, *ys; gfloat *ysf = NULL; cl_mem cl_curve = NULL; cl_ulong cl_max_constant_size; cl_int cl_err = 0; num_sampling_points = o->sampling_points; if (!cl_data) { const char *kernel_name[] = {"cl_contrast_curve",NULL}; cl_data = gegl_cl_compile_and_build (contrast_curve_cl_source, kernel_name); } if (!cl_data) return TRUE; if (num_sampling_points > 0) { xs = g_new (gdouble, num_sampling_points); ys = g_new (gdouble, num_sampling_points); gegl_curve_calc_values (o->curve, 0.0, 1.0, num_sampling_points, xs, ys); g_free (xs); /*We need to downscale the array to pass it to the GPU*/ ysf = g_new (gfloat, num_sampling_points); copy_double_array_to_float_array (ys, ysf, num_sampling_points); g_free (ys); cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (cl_ulong), &cl_max_constant_size, NULL); CL_CHECK; GEGL_NOTE (GEGL_DEBUG_OPENCL, "Max Constant Mem Size: %lu bytes", (unsigned long) cl_max_constant_size); if (sizeof (cl_float) * num_sampling_points < cl_max_constant_size) { cl_curve = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, num_sampling_points * sizeof (cl_float), ysf, &cl_err); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 0, sizeof (cl_mem), (void*) &in_tex); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 1, sizeof (cl_mem), (void*) &out_tex); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 2, sizeof (cl_mem), (void*) &cl_curve); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 3, sizeof (gint), (void*) &num_sampling_points); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_worksize, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (cl_curve); CL_CHECK_ONLY (cl_err); } else { /*If the curve size doesn't fit constant memory is better to use CPU*/ GEGL_NOTE (GEGL_DEBUG_OPENCL, "Not enough constant memory for the curve"); g_free (ysf); return TRUE; } g_free (ysf); return FALSE; error: if (ysf) g_free (ysf); if (cl_curve) gegl_clReleaseMemObject (cl_curve); return TRUE; } else /*If the curve doesn't have a lookup table is better to use CPU*/ { GEGL_NOTE (GEGL_DEBUG_OPENCL, "Curve not suitable to be computed in the GPU"); return TRUE; } }
static gboolean cl_process (GeglOperation *operation, cl_mem in, cl_mem out, size_t global_worksize, const GeglRectangle *roi, gint level) { GeglProperties *o = GEGL_PROPERTIES (operation); GeglRectangle *wr = gegl_operation_source_get_bounding_box (operation, "input"); cl_int cl_err = 0; cl_mem cl_random_data = NULL; cl_int x_offset = roi->x; cl_int y_offset = roi->y; cl_int roi_width = roi->width; cl_int wr_width = wr->width; cl_ushort4 rand; cl_int holdness; cl_float hue_distance; cl_float saturation_distance; cl_float value_distance; gegl_cl_random_get_ushort4 (o->rand, &rand); if (!cl_data) { const char *kernel_name[] = { "cl_noise_hsv", NULL }; cl_data = gegl_cl_compile_and_build (noise_hsv_cl_source, kernel_name); } if (!cl_data) return TRUE; cl_random_data = gegl_cl_load_random_data (&cl_err); CL_CHECK; holdness = o->holdness; hue_distance = o->hue_distance / 360.0; saturation_distance = o->saturation_distance; value_distance = o->value_distance; gegl_cl_set_kernel_args (cl_data->kernel[0], sizeof(cl_mem), &in, sizeof(cl_mem), &out, sizeof(cl_mem), &cl_random_data, sizeof(cl_ushort4), &rand, sizeof(cl_int), &x_offset, sizeof(cl_int), &y_offset, sizeof(cl_int), &roi_width, sizeof(cl_int), &wr_width, sizeof(cl_int), &holdness, sizeof(cl_float), &hue_distance, sizeof(cl_float), &saturation_distance, sizeof(cl_float), &value_distance, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_worksize, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; return FALSE; error: return TRUE; }