static cl_int cl_edge_laplace (cl_mem in_tex, cl_mem aux_tex, cl_mem out_tex, const GeglRectangle *src_rect, const GeglRectangle *roi, gint radius) { cl_int cl_err = 0; size_t global_ws[2]; if (!cl_data) { const char *kernel_name[] = {"pre_edgelaplace", "knl_edgelaplace", NULL}; cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name); } if (!cl_data) return 1; global_ws[0] = roi->width; global_ws[1] = roi->height; cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex); cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex); if (cl_err != CL_SUCCESS) return cl_err; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[0], 2, NULL, global_ws, NULL, 0, NULL, NULL); if (cl_err != CL_SUCCESS) return cl_err; cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (CL_SUCCESS != cl_err) return cl_err; cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex); cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex); if (cl_err != CL_SUCCESS) return cl_err; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[1], 2, NULL, global_ws, NULL, 0, NULL, NULL); if (cl_err != CL_SUCCESS) return cl_err; return cl_err; }
static void motion_blur_cl (GeglBuffer *src, const GeglRectangle *src_rect, GeglBuffer *dst, const GeglRectangle *dst_rect, const int num_steps, const float offset_x, const float offset_y) { const Babl * in_format = babl_format("RaGaBaA float"); const Babl *out_format = babl_format("RaGaBaA float"); /* AreaFilter general processing flow. Loading data and making the necessary color space conversion. */ #include "gegl-cl-operation-area-filter-fw1.h" /////////////////////////////////////////////////////////////////////////// /* Algorithm specific processing flow. Build kernels, setting parameters, and running them. */ if (!cl_data) { const char *kernel_name[] = { "motion_blur_CL", NULL }; cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name); } if (!cl_data) CL_ERROR; cl_int cl_src_width = src_rect->width; cl_int cl_src_height = src_rect->height; cl_int cl_src_x = src_rect->x; cl_int cl_src_y = src_rect->y; cl_int cl_dst_x = dst_rect->x; cl_int cl_dst_y = dst_rect->y; cl_int cl_num_steps = num_steps; cl_float cl_offset_x = offset_x; cl_float cl_offset_y = offset_y; CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&src_mem)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 1, sizeof(cl_int), (void*)&cl_src_width)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 2, sizeof(cl_int), (void*)&cl_src_height)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 3, sizeof(cl_int), (void*)&cl_src_x)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 4, sizeof(cl_int), (void*)&cl_src_y)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&dst_mem)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 6, sizeof(cl_int), (void*)&cl_dst_x)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 7, sizeof(cl_int), (void*)&cl_dst_y)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 8, sizeof(cl_int), (void*)&cl_num_steps)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 9, sizeof(cl_float), (void*)&cl_offset_x)); CL_SAFE_CALL(errcode = gegl_clSetKernelArg( cl_data->kernel[0], 10, sizeof(cl_float), (void*)&cl_offset_y)); CL_SAFE_CALL(errcode = gegl_clEnqueueNDRangeKernel( gegl_cl_get_command_queue(), cl_data->kernel[0], 2, NULL, gbl_size, NULL, 0, NULL, NULL)); errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (CL_SUCCESS != errcode) CL_ERROR; /////////////////////////////////////////////////////////////////////////// /* AreaFilter general processing flow. Making the necessary color space conversion and Saving data. */ #include "gegl-cl-operation-area-filter-fw2.h" }
gboolean gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err) { GeglBufferClIterators *i = (gpointer)iterator; gboolean result = FALSE; gint no, j; cl_int cl_err = 0; if (i->is_finished) g_error ("%s called on finished buffer iterator", G_STRFUNC); if (i->iteration_no == 0) { for (no=0; no<i->iterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; j<no; j++) if (i->buffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_lock (i->buffer[no]); if (i->flags[no] == GEGL_CL_BUFFER_WRITE || (i->flags[no] == GEGL_CL_BUFFER_READ && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0))) { gegl_buffer_cl_cache_flush (i->buffer[no], &i->rect[no]); } } } } else { /* complete pending write work */ for (no=0; no<i->iterators;no++) { if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (output) */ if (i->conv[no] == GEGL_CL_COLOR_CONVERT) for (j=0; j < i->n; j++) { cl_err = gegl_cl_color_conv (i->tex_op[no][j], i->tex_buf[no][j], i->size[no][j], i->format[no], i->buffer[no]->soft_format); if (cl_err == FALSE) CL_ERROR; } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* GPU -> CPU */ for (j=0; j < i->n; j++) { gpointer data; /* tile-ize */ if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED) { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], 0, i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } else #ifdef OPENCL_USE_CACHE { gegl_buffer_cl_cache_new (i->buffer[no], &i->roi[no][j], i->tex_buf[no][j]); /* don't release this texture */ i->tex_buf[no][j] = NULL; } #else { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } #endif } } } /* Run! */ cl_err = gegl_clFinish(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; for (no=0; no < i->iterators; no++) for (j=0; j < i->n; j++) { if (i->tex_buf_from_cache [no][j]) { gboolean ok = gegl_buffer_cl_cache_release (i->tex_buf[no][j]); g_assert (ok); } if (i->tex_buf[no][j] && !i->tex_buf_from_cache [no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } } g_assert (i->iterators > 0); result = (i->roi_no >= i->rois)? FALSE : TRUE; i->n = MIN(GEGL_CL_NTEX, i->rois - i->roi_no); /* then we iterate all */ for (no=0; no<i->iterators;no++) { for (j = 0; j < i->n; j++) { GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0], i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2], i->roi_all[i->roi_no+j].width + i->area[no][0] + i->area[no][1], i->roi_all[i->roi_no+j].height + i->area[no][2] + i->area[no][3]}; i->roi [no][j] = r; i->size[no][j] = r.width * r.height; } if (i->flags[no] == GEGL_CL_BUFFER_READ) { for (j=0; j < i->n; j++) { gpointer data; /* un-tile */ switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->format[no], data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ /* get buffer data using multiple worker threads to increase bandwidth */ gegl_buffer_cl_worker_transf (i->buffer[no], data, i->buf_cl_format_size [no], i->roi[no][j], FALSE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (input) */ g_assert (i->tex_buf[no][j] && i->tex_op[no][j]); cl_err = gegl_cl_color_conv (i->tex_buf[no][j], i->tex_op[no][j], i->size[no][j], i->buffer[no]->soft_format, i->format[no]); if (cl_err == FALSE) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; } else if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { for (j=0; j < i->n; j++) { switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } } else if (i->flags[no] == GEGL_CL_BUFFER_AUX) { for (j=0; j < i->n; j++) { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; } } } i->roi_no += i->n; i->iteration_no++; if (result == FALSE) { for (no=0; no<i->iterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; j<no; j++) if (i->buffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_unlock (i->buffer[no]); g_object_unref (i->buffer[no]); } } i->is_finished = TRUE; g_free (i->roi_all); g_slice_free (GeglBufferClIterators, i); } *err = FALSE; return result; error: for (no=0; no<i->iterators;no++) for (j=0; j < i->n; j++) { if (i->tex_buf[no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } *err = TRUE; return FALSE; }