static inline gboolean _gegl_buffer_cl_cache_flush2 (GeglTileHandlerCache *cache, const GeglRectangle *roi) { size_t size; GList *elem; GeglRectangle tmp; cl_int cl_err = 0; gpointer data; gboolean need_cl = FALSE; for (elem=cache_entries; elem; elem=elem->next) { CacheEntry *entry = elem->data; if (entry->valid && entry->tile_storage->cache == cache && (!roi || gegl_rectangle_intersect (&tmp, roi, &entry->roi))) { entry->valid = FALSE; entry->used ++; gegl_cl_color_babl (entry->buffer->soft_format, &size); data = g_malloc(entry->roi.width * entry->roi.height * size); cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(), entry->tex, CL_TRUE, 0, entry->roi.width * entry->roi.height * size, data, 0, NULL, NULL); /* tile-ize */ gegl_buffer_set (entry->buffer, &entry->roi, 0, entry->buffer->soft_format, data, GEGL_AUTO_ROWSTRIDE); entry->used --; need_cl = TRUE; g_free(data); CL_CHECK; } } if (need_cl) { cl_err = gegl_clFinish (gegl_cl_get_command_queue ()); CL_CHECK; g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { CacheEntry *entry = data; #if 1 GEGL_NOTE (GEGL_DEBUG_OPENCL, "Removing from cl-cache: %p %s {%d %d %d %d}", entry->buffer, babl_get_name(entry->buffer->soft_format), entry->roi.x, entry->roi.y, entry->roi.width, entry->roi.height); #endif gegl_clReleaseMemObject(entry->tex); memset (entry, 0x0, sizeof (CacheEntry)); g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); } return TRUE; error: g_mutex_lock (&cache_mutex); while (cache_entry_find_invalid (&data)) { g_slice_free (CacheEntry, data); cache_entries = g_list_remove (cache_entries, data); } g_mutex_unlock (&cache_mutex); /* XXX : result is corrupted */ return FALSE; }
static gboolean cl_buffer_get_min_max (cl_mem in_tex, size_t global_worksize, const GeglRectangle *roi, gfloat min[4], gfloat max[4]) { cl_int cl_err = 0; size_t local_ws, max_local_ws; size_t work_groups; size_t global_ws; cl_mem cl_aux_min = NULL; cl_mem cl_aux_max = NULL; cl_mem cl_min_max = NULL; cl_int n_pixels = (cl_int)global_worksize; cl_float4 min_max_buf[2]; if (global_worksize < 1) { min[0] = min[1] = min[2] = min[3] = G_MAXFLOAT; max[0] = max[1] = max[2] = max[3] = -G_MAXFLOAT; return FALSE; } cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof (size_t), &max_local_ws, NULL); CL_CHECK; max_local_ws = MIN (max_local_ws, MIN (cl_data->work_group_size[0], cl_data->work_group_size[1])); /* Needs to be a power of two */ local_ws = 256; while (local_ws > max_local_ws) local_ws /= 2; work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws); global_ws = work_groups * local_ws; cl_aux_min = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, local_ws * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; cl_aux_max = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, local_ws * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; cl_min_max = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_WRITE_ONLY, 2 * sizeof(cl_float4), NULL, &cl_err); CL_CHECK; /* The full initialization is done in the two_stages_local_min_max_reduce kernel */ #if 0 cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[3], 1, NULL, &local_ws, &local_ws, 0, NULL, NULL); CL_CHECK; #endif cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float4) * local_ws, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float4) * local_ws, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&n_pixels); CL_CHECK; cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (), cl_data->kernel[0], 1, NULL, &global_ws, &local_ws, 0, NULL, NULL); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&cl_aux_min); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&cl_aux_max); CL_CHECK; cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&cl_min_max); CL_CHECK; /* Only one work group */ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (), cl_data->kernel[1], 1, NULL, &local_ws, &local_ws, 0, NULL, NULL); CL_CHECK; /* Read the memory buffer, probably better to keep it in GPU memory */ cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue (), cl_min_max, CL_TRUE, 0, 2 * sizeof (cl_float4), &min_max_buf, 0, NULL, NULL); CL_CHECK; min[0] = min_max_buf[0].x; min[1] = min_max_buf[0].y; min[2] = min_max_buf[0].z; min[3] = min_max_buf[0].w; max[0] = min_max_buf[1].x; max[1] = min_max_buf[1].y; max[2] = min_max_buf[1].z; max[3] = min_max_buf[1].w; cl_err = gegl_clReleaseMemObject (cl_aux_min); CL_CHECK_ONLY (cl_err); cl_err = gegl_clReleaseMemObject (cl_aux_max); CL_CHECK_ONLY (cl_err); cl_err = gegl_clReleaseMemObject (cl_min_max); CL_CHECK_ONLY (cl_err); return FALSE; error: if (cl_aux_min) gegl_clReleaseMemObject (cl_aux_min); if (cl_aux_max) gegl_clReleaseMemObject (cl_aux_max); if (cl_min_max) gegl_clReleaseMemObject (cl_min_max); return TRUE; }