static gboolean cl_process (GeglOperation *operation, cl_mem in_buf, cl_mem out_buf, const size_t n_pixels, const GeglRectangle *roi, gint level) { GeglOperationClass *operation_class = GEGL_OPERATION_GET_CLASS (operation); GeglClRunData *cl_data = operation_class->cl_data; GeglProperties *o = GEGL_PROPERTIES (operation); const size_t gbl_size[2] = {roi->width, roi->height}; const size_t gbl_off[2] = {roi->x, roi->y}; cl_int cl_err = 0; cl_mem filter_pat = NULL; if (!cl_data) goto error; filter_pat = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, pattern_width[o->pattern] * pattern_height[o->pattern] * sizeof(cl_int), (void*)pattern[o->pattern], &cl_err); CL_CHECK; cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0], sizeof(cl_mem), &in_buf, sizeof(cl_mem), &out_buf, sizeof(cl_mem), &filter_pat, sizeof(cl_int), &pattern_width[o->pattern], sizeof(cl_int), &pattern_height[o->pattern], sizeof(cl_int), &o->additive, sizeof(cl_int), &o->rotated, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 2, gbl_off, gbl_size, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (filter_pat); CL_CHECK; return FALSE; error: if (filter_pat) gegl_clReleaseMemObject (filter_pat); return TRUE; }
void gegl_buffer_cl_cache_invalidate (GeglBuffer *buffer, const GeglRectangle *roi) { GeglRectangle tmp; GList *elem; gpointer data; for (elem=cache_entries; elem; elem=elem->next) { CacheEntry *e = elem->data; if (e->valid && e->buffer == buffer && (!roi || gegl_rectangle_intersect (&tmp, roi, &e->roi))) { g_assert (e->used == 0); gegl_clReleaseMemObject (e->tex); e->valid = FALSE; } } g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { CacheEntry *entry = data; memset(entry, 0x0, sizeof (CacheEntry)); g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); #if 0 g_printf ("-- "); for (elem=cache_entry; elem; elem=elem->next) { CacheEntry *e = elem->data; g_printf ("%p %p {%d, %d, %d, %d} %d | ", e->tex, e->buffer, e->roi.x, e->roi.y, e->roi.width, e->roi.height, e->valid); } g_printf ("\n"); #endif }
static gboolean cl_bilateral (cl_mem in_tex, cl_mem out_tex, const GeglRectangle *roi, const GeglRectangle *src_rect, gint s_sigma, gfloat r_sigma) { cl_int cl_err = 0; gint c; const gint width = src_rect->width; const gint height = src_rect->height; const gint sw = (width -1) / s_sigma + 1; const gint sh = (height-1) / s_sigma + 1; const gint depth = (int)(1.0f / r_sigma) + 1; size_t global_ws[2]; size_t local_ws[2]; cl_mem grid = NULL; cl_mem blur[4] = {NULL, NULL, NULL, NULL}; if (!cl_data) { const char *kernel_name[] = {"bilateral_downsample", "bilateral_blur", "bilateral_interpolate", NULL}; cl_data = gegl_cl_compile_and_build (bilateral_filter_fast_cl_source, kernel_name); } if (!cl_data) return 1; grid = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, sw * sh * depth * sizeof(cl_float8), NULL, &cl_err); CL_CHECK; for(c = 0; c < 4; c++) { blur[c] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, sw * sh * depth * sizeof(cl_float2), NULL, &cl_err); CL_CHECK; } local_ws[0] = 8; local_ws[1] = 8; global_ws[0] = ((sw + local_ws[0] - 1)/local_ws[0])*local_ws[0]; global_ws[1] = ((sh + local_ws[1] - 1)/local_ws[1])*local_ws[1]; gegl_cl_set_kernel_args (cl_data->kernel[0], sizeof(cl_mem), &in_tex, sizeof(cl_mem), &grid, sizeof(cl_int), &width, sizeof(cl_int), &height, sizeof(cl_int), &sw, sizeof(cl_int), &sh, sizeof(cl_int), &depth, sizeof(cl_int), &s_sigma, sizeof(cl_float), &r_sigma, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 2, NULL, global_ws, local_ws, 0, NULL, NULL); CL_CHECK; local_ws[0] = 16; local_ws[1] = 16; global_ws[0] = ((sw + local_ws[0] - 1)/local_ws[0])*local_ws[0]; global_ws[1] = ((sh + local_ws[1] - 1)/local_ws[1])*local_ws[1]; gegl_cl_set_kernel_args (cl_data->kernel[1], sizeof(cl_mem), &grid, sizeof(cl_mem), &blur[0], sizeof(cl_mem), &blur[1], sizeof(cl_mem), &blur[2], sizeof(cl_mem), &blur[3], sizeof(cl_int), &sw, sizeof(cl_int), &sh, sizeof(cl_int), &depth, NULL); cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[1], 2, NULL, global_ws, local_ws, 0, NULL, NULL); CL_CHECK; global_ws[0] = width; global_ws[1] = height; gegl_cl_set_kernel_args (cl_data->kernel[2], sizeof(cl_mem), &in_tex, sizeof(cl_mem), &blur[0], sizeof(cl_mem), &blur[1], sizeof(cl_mem), &blur[2], sizeof(cl_mem), &blur[3], sizeof(cl_mem), &out_tex, sizeof(cl_int), &width, sizeof(cl_int), &sw, sizeof(cl_int), &sh, sizeof(cl_int), &depth, sizeof(cl_int), &s_sigma, sizeof(cl_float), &r_sigma, NULL); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[2], 2, NULL, global_ws, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (grid); CL_CHECK_ONLY (cl_err); for(c = 0; c < 4; c++) { cl_err = gegl_clReleaseMemObject (blur[c]); CL_CHECK_ONLY (cl_err); } return FALSE; error: if (grid) gegl_clReleaseMemObject (grid); for (c = 0; c < 4; c++) { if (blur[c]) gegl_clReleaseMemObject (blur[c]); } return TRUE; }
static inline gboolean _gegl_buffer_cl_cache_flush2 (GeglTileHandlerCache *cache, const GeglRectangle *roi) { size_t size; GList *elem; GeglRectangle tmp; cl_int cl_err = 0; gpointer data; gboolean need_cl = FALSE; for (elem=cache_entries; elem; elem=elem->next) { CacheEntry *entry = elem->data; if (entry->valid && entry->tile_storage->cache == cache && (!roi || gegl_rectangle_intersect (&tmp, roi, &entry->roi))) { entry->valid = FALSE; entry->used ++; gegl_cl_color_babl (entry->buffer->soft_format, &size); data = g_malloc(entry->roi.width * entry->roi.height * size); cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(), entry->tex, CL_TRUE, 0, entry->roi.width * entry->roi.height * size, data, 0, NULL, NULL); /* tile-ize */ gegl_buffer_set (entry->buffer, &entry->roi, 0, entry->buffer->soft_format, data, GEGL_AUTO_ROWSTRIDE); entry->used --; need_cl = TRUE; g_free(data); CL_CHECK; } } if (need_cl) { cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { CacheEntry *entry = data; #if 1 GEGL_NOTE (GEGL_DEBUG_OPENCL, "Removing from cl-cache: %p %s {%d %d %d %d}", entry->buffer, babl_get_name(entry->buffer->soft_format), entry->roi.x, entry->roi.y, entry->roi.width, entry->roi.height); #endif gegl_clReleaseMemObject(entry->tex); memset (entry, 0x0, sizeof (CacheEntry)); g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); } return TRUE; error: g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); /* XXX : result is corrupted */ return FALSE; }
gboolean gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err) { GeglBufferClIterators *i = (gpointer)iterator; gboolean result = FALSE; gint no, j; cl_int cl_err = 0; if (i->is_finished) g_error ("%s called on finished buffer iterator", G_STRFUNC); if (i->iteration_no == 0) { for (no=0; no<i->iterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; j<no; j++) if (i->buffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_lock (i->buffer[no]); if (i->flags[no] == GEGL_CL_BUFFER_WRITE || (i->flags[no] == GEGL_CL_BUFFER_READ && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0))) { gegl_buffer_cl_cache_flush (i->buffer[no], &i->rect[no]); } } } } else { /* complete pending write work */ for (no=0; no<i->iterators;no++) { if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (output) */ if (i->conv[no] == GEGL_CL_COLOR_CONVERT) for (j=0; j < i->n; j++) { cl_err = gegl_cl_color_conv (i->tex_op[no][j], i->tex_buf[no][j], i->size[no][j], i->format[no], i->buffer[no]->soft_format); if (cl_err == FALSE) CL_ERROR; } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* GPU -> CPU */ for (j=0; j < i->n; j++) { gpointer data; /* tile-ize */ if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED) { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], 0, i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } else #ifdef OPENCL_USE_CACHE { gegl_buffer_cl_cache_new (i->buffer[no], &i->roi[no][j], i->tex_buf[no][j]); /* don't release this texture */ i->tex_buf[no][j] = NULL; } #else { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } #endif } } } /* Run! */ cl_err = gegl_clFinish(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; for (no=0; no < i->iterators; no++) for (j=0; j < i->n; j++) { if (i->tex_buf_from_cache [no][j]) { gboolean ok = gegl_buffer_cl_cache_release (i->tex_buf[no][j]); g_assert (ok); } if (i->tex_buf[no][j] && !i->tex_buf_from_cache [no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } } g_assert (i->iterators > 0); result = (i->roi_no >= i->rois)? FALSE : TRUE; i->n = MIN(GEGL_CL_NTEX, i->rois - i->roi_no); /* then we iterate all */ for (no=0; no<i->iterators;no++) { for (j = 0; j < i->n; j++) { GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0], i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2], i->roi_all[i->roi_no+j].width + i->area[no][0] + i->area[no][1], i->roi_all[i->roi_no+j].height + i->area[no][2] + i->area[no][3]}; i->roi [no][j] = r; i->size[no][j] = r.width * r.height; } if (i->flags[no] == GEGL_CL_BUFFER_READ) { for (j=0; j < i->n; j++) { gpointer data; /* un-tile */ switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->format[no], data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ /* get buffer data using multiple worker threads to increase bandwidth */ gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (input) */ g_assert (i->tex_buf[no][j] && i->tex_op[no][j]); cl_err = gegl_cl_color_conv (i->tex_buf[no][j], i->tex_op[no][j], i->size[no][j], i->buffer[no]->soft_format, i->format[no]); if (cl_err == FALSE) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; } else if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { for (j=0; j < i->n; j++) { switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } } else if (i->flags[no] == GEGL_CL_BUFFER_AUX) { for (j=0; j < i->n; j++) { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; } } } i->roi_no += i->n; i->iteration_no++; if (result == FALSE) { for (no=0; no<i->iterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; j<no; j++) if (i->buffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_unlock (i->buffer[no]); g_object_unref (i->buffer[no]); } } i->is_finished = TRUE; g_free (i->roi_all); g_slice_free (GeglBufferClIterators, i); } *err = FALSE; return result; error: for (no=0; no<i->iterators;no++) for (j=0; j < i->n; j++) { if (i->tex_buf[no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } *err = TRUE; return FALSE; }
static gboolean cl_buffer_get_min_max (cl_mem in_tex, size_t global_worksize, const GeglRectangle *roi, gfloat min[4], gfloat max[4]) { cl_int cl_err = 0; size_t local_ws, max_local_ws; size_t work_groups; size_t global_ws; cl_mem cl_aux_min = NULL; cl_mem cl_aux_max = NULL; cl_mem cl_min_max = NULL; cl_int n_pixels = (cl_int)global_worksize; cl_float4 min_max_buf[2]; if (global_worksize < 1) { min[0] = min[1] = min[2] = min[3] = G_MAXFLOAT; max[0] = max[1] = max[2] = max[3] = -G_MAXFLOAT; return FALSE; } cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), &max_local_ws, NULL); CL_CHECK; max_local_ws = MIN (max_local_ws, MIN (cl_data->work_group_size[0], cl_data->work_group_size[1])); /* Needs to be a power of two */ local_ws = 256; while (local_ws > max_local_ws) local_ws /= 2; work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws); global_ws = work_groups * local_ws; cl_aux_min = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, local_ws * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; cl_aux_max = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, local_ws * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; cl_min_max = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_WRITE_ONLY, 2 * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; /* The full initialization is done in the two_stages_local_min_max_reduce kernel */ #if 0 cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[3], 1, NULL, &local_ws, &local_ws, 0, NULL, NULL); CL_CHECK; #endif cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float4) * local_ws, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float4) * local_ws, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&n_pixels); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_ws, &local_ws, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&cl_min_max); CL_CHECK; /* Only one work group */ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[1], 1, NULL, &local_ws, &local_ws, 0, NULL, NULL); CL_CHECK; /* Read the memory buffer, probably better to keep it in GPU memory */ cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue (), cl_min_max, CL_TRUE, 0, 2 * sizeof (cl_float4), &min_max_buf, 0, NULL, NULL); CL_CHECK; min[0] = min_max_buf[0].x; min[1] = min_max_buf[0].y; min[2] = min_max_buf[0].z; min[3] = min_max_buf[0].w; max[0] = min_max_buf[1].x; max[1] = min_max_buf[1].y; max[2] = min_max_buf[1].z; max[3] = min_max_buf[1].w; cl_err = gegl_clReleaseMemObject (cl_aux_min); CL_CHECK_ONLY (cl_err); cl_err = gegl_clReleaseMemObject (cl_aux_max); CL_CHECK_ONLY (cl_err); cl_err = gegl_clReleaseMemObject (cl_min_max); CL_CHECK_ONLY (cl_err); return FALSE; error: if (cl_aux_min) gegl_clReleaseMemObject (cl_aux_min); if (cl_aux_max) gegl_clReleaseMemObject (cl_aux_max); if (cl_min_max) gegl_clReleaseMemObject (cl_min_max); return TRUE; }
static gboolean cl_process (GeglOperation *self, cl_mem in_tex, cl_mem out_tex, size_t global_worksize, const GeglRectangle *roi, gint level) { GeglChantO *o = GEGL_CHANT_PROPERTIES (self); gint num_sampling_points; gdouble *xs, *ys; gfloat *ysf = NULL; cl_mem cl_curve = NULL; cl_ulong cl_max_constant_size; cl_int cl_err = 0; num_sampling_points = o->sampling_points; if (!cl_data) { const char *kernel_name[] = {"cl_contrast_curve",NULL}; cl_data = gegl_cl_compile_and_build (contrast_curve_cl_source, kernel_name); } if (!cl_data) return TRUE; if (num_sampling_points > 0) { xs = g_new (gdouble, num_sampling_points); ys = g_new (gdouble, num_sampling_points); gegl_curve_calc_values (o->curve, 0.0, 1.0, num_sampling_points, xs, ys); g_free (xs); /*We need to downscale the array to pass it to the GPU*/ ysf = g_new (gfloat, num_sampling_points); copy_double_array_to_float_array (ys, ysf, num_sampling_points); g_free (ys); cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof (cl_ulong), &cl_max_constant_size, NULL); CL_CHECK; GEGL_NOTE (GEGL_DEBUG_OPENCL, "Max Constant Mem Size: %lu bytes", (unsigned long) cl_max_constant_size); if (sizeof (cl_float) * num_sampling_points < cl_max_constant_size) { cl_curve = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, num_sampling_points * sizeof (cl_float), ysf, &cl_err); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 0, sizeof (cl_mem), (void*) &in_tex); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 1, sizeof (cl_mem), (void*) &out_tex); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 2, sizeof (cl_mem), (void*) &cl_curve); CL_CHECK; cl_err = gegl_clSetKernelArg (cl_data->kernel[0], 3, sizeof (gint), (void*) &num_sampling_points); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_worksize, NULL, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; cl_err = gegl_clReleaseMemObject (cl_curve); CL_CHECK_ONLY (cl_err); } else { /*If the curve size doesn't fit constant memory is better to use CPU*/ GEGL_NOTE (GEGL_DEBUG_OPENCL, "Not enough constant memory for the curve"); g_free (ysf); return TRUE; } g_free (ysf); return FALSE; error: if (ysf) g_free (ysf); if (cl_curve) gegl_clReleaseMemObject (cl_curve); return TRUE; } else /*If the curve doesn't have a lookup table is better to use CPU*/ { GEGL_NOTE (GEGL_DEBUG_OPENCL, "Curve not suitable to be computed in the GPU"); return TRUE; } }
static gboolean fir_cl_process (GeglBuffer *input, GeglBuffer *output, const GeglRectangle *result, const Babl *format, gfloat *cmatrix, gint clen, GeglOrientation orientation, GeglAbyssPolicy abyss) { gboolean err = FALSE; cl_int cl_err; cl_mem cl_cmatrix = NULL; GeglBufferClIterator *i; gint read; gint left, right, top, bottom; if (orientation == GEGL_ORIENTATION_HORIZONTAL) { right = left = clen / 2; top = bottom = 0; } else { right = left = 0; top = bottom = clen / 2; } i = gegl_buffer_cl_iterator_new (output, result, format, GEGL_CL_BUFFER_WRITE); read = gegl_buffer_cl_iterator_add_2 (i, input, result, format, GEGL_CL_BUFFER_READ, left, right, top, bottom, abyss); cl_cmatrix = gegl_clCreateBuffer (gegl_cl_get_context(), CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, clen * sizeof(cl_float), cmatrix, &cl_err); CL_CHECK; while (gegl_buffer_cl_iterator_next (i, &err) && !err) { err = cl_gaussian_blur(i->tex[read], i->tex[0], &i->roi[0], cl_cmatrix, clen, orientation); if (err) { gegl_buffer_cl_iterator_stop (i); break; } } cl_err = gegl_clReleaseMemObject (cl_cmatrix); CL_CHECK; cl_cmatrix = NULL; return !err; error: if (cl_cmatrix) gegl_clReleaseMemObject (cl_cmatrix); return FALSE; }