/** * ufo_op_set: * @arg: A #UfoBuffer * @value: Value to fill @arg with * @resources: #UfoResources object * @command_queue: A valid cl_command_queue * * Fill a buffer with a value using OpenCL. * * Returns: (transfer full): Event of the set operation */ gpointer ufo_op_set (UfoBuffer *arg, gfloat value, UfoResources *resources, gpointer command_queue) { UfoRequisition requisition; cl_kernel kernel; cl_mem d_arg; cl_event event; GError *error = NULL; static GMutex mutex; ufo_buffer_get_requisition (arg, &requisition); d_arg = ufo_buffer_get_device_image (arg, command_queue); kernel = ufo_resources_get_cached_kernel (resources, OPS_FILENAME, "operation_set", &error); if (error) { g_error ("%s\n", error->message); return NULL; } g_mutex_lock (&mutex); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 0, sizeof(void *), (void *) &d_arg)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 1, sizeof(gfloat), (void *) &value)); UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (command_queue, kernel, requisition.n_dims, NULL, requisition.dims, NULL, 0, NULL, &event)); g_mutex_unlock (&mutex); return event; }
static gchar * get_device_build_options (UfoResourcesPrivate *priv, guint device_index, const gchar *additional) { GString *opts; gsize size; gchar *name; g_assert (device_index < priv->n_devices); opts = g_string_new (priv->build_opts->str); if (additional != NULL) g_string_append (opts, additional); UFO_RESOURCES_CHECK_CLERR (clGetDeviceInfo (priv->devices[device_index], CL_DEVICE_NAME, 0, NULL, &size)); name = g_malloc0 (size); UFO_RESOURCES_CHECK_CLERR (clGetDeviceInfo (priv->devices[device_index], CL_DEVICE_NAME, size, name, NULL)); g_string_append_printf (opts, " -DDEVICE=%s", escape_device_name (name)); g_free (name); g_list_foreach (priv->include_paths, (GFunc) append_include_path, opts); return g_string_free (opts, FALSE); }
/** * ufo_op_gradient_descent: * @arg: A #UfoBuffer * @out: A #UfoBuffer * @resources: #UfoResources object * @command_queue: A valid cl_command_queue * * Returns: (transfer full): Event of the POSC operation */ gpointer ufo_op_gradient_descent (UfoBuffer *arg, UfoBuffer *out, UfoResources *resources, gpointer command_queue) { UfoRequisition arg_requisition; cl_event event; GError *error = NULL; static GMutex mutex; ufo_buffer_get_requisition (arg, &arg_requisition); ufo_buffer_resize (out, &arg_requisition); cl_mem d_arg = ufo_buffer_get_device_image (arg, command_queue); cl_mem d_out = ufo_buffer_get_device_image (out, command_queue); cl_kernel kernel = ufo_resources_get_cached_kernel (resources, OPS_FILENAME, "descent_grad", &error); if (error) { g_error ("%s\n", error->message); } g_mutex_lock (&mutex); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg(kernel, 0, sizeof(void *), (void *) &d_arg)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg(kernel, 1, sizeof(void *), (void *) &d_out)); UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (command_queue, kernel, arg_requisition.n_dims, NULL, arg_requisition.dims, NULL, 0, NULL, &event)); g_mutex_unlock (&mutex); return event; }
static void ufo_resources_finalize (GObject *object) { UfoResourcesPrivate *priv; priv = UFO_RESOURCES_GET_PRIVATE (object); g_clear_error (&priv->construct_error); g_hash_table_destroy (priv->kernel_cache); list_free_full (&priv->kernel_paths, (GFunc) g_free); list_free_full (&priv->include_paths, (GFunc) g_free); list_free_full (&priv->kernels, (GFunc) release_kernel); list_free_full (&priv->programs, (GFunc) release_program); for (guint i = 0; i < priv->n_devices; i++) UFO_RESOURCES_CHECK_CLERR (clReleaseCommandQueue (priv->command_queues[i])); if (priv->context) UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context)); g_string_free (priv->build_opts, TRUE); g_free (priv->devices); g_free (priv->command_queues); priv->kernels = NULL; priv->devices = NULL; G_OBJECT_CLASS (ufo_resources_parent_class)->finalize (object); g_debug ("UfoResources: finalized"); }
/** * ufo_gpu_node_get_info: * @node: A #UfoGpuNodeInfo * @info: Information to be queried * * Return information about the associated OpenCL device. * * Returns: (transfer full): Information about @info. */ GValue * ufo_gpu_node_get_info (UfoGpuNode *node, UfoGpuNodeInfo info) { UfoGpuNodePrivate *priv; GValue *value; cl_ulong ulong_value; priv = UFO_GPU_NODE_GET_PRIVATE (node); value = g_new0 (GValue, 1); memset (value, 0, sizeof (GValue)); g_value_init (value, G_TYPE_ULONG); switch (info) { case UFO_GPU_NODE_INFO_GLOBAL_MEM_SIZE: UFO_RESOURCES_CHECK_CLERR (clGetDeviceInfo (priv->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (cl_ulong), &ulong_value, NULL)); break; case UFO_GPU_NODE_INFO_LOCAL_MEM_SIZE: UFO_RESOURCES_CHECK_CLERR (clGetDeviceInfo (priv->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (cl_ulong), &ulong_value, NULL)); break; } g_value_set_ulong (value, ulong_value); return value; }
/** * ufo_op_mul_rows: * @arg1: A #UfoBuffer * @arg2: A #UfoBuffer * @offset: Offset * @n: n ? * @out: A #UfoBuffer * @resources: #UfoResources object * @command_queue: A valid cl_command_queue * * @out = @arg1 - @modifier * @arg2 * * Returns: (transfer full): Event of the add operation */ gpointer ufo_op_mul_rows (UfoBuffer *arg1, UfoBuffer *arg2, UfoBuffer *out, guint offset, guint n, UfoResources *resources, gpointer command_queue) { cl_event event; UfoRequisition arg1_requisition, arg2_requisition, out_requisition; GError *error = NULL; static GMutex mutex; ufo_buffer_get_requisition (arg1, &arg1_requisition); ufo_buffer_get_requisition (arg2, &arg2_requisition); ufo_buffer_get_requisition (out, &out_requisition); if (arg1_requisition.dims[0] != arg2_requisition.dims[0] || arg1_requisition.dims[0] != out_requisition.dims[0]) { g_error ("Number of columns is different."); return NULL; } if (arg1_requisition.dims[1] < offset + n || arg2_requisition.dims[1] < offset + n || out_requisition.dims[1] < offset + n) { g_error ("Rows are not enough."); return NULL; } cl_mem d_arg1 = ufo_buffer_get_device_image (arg1, command_queue); cl_mem d_arg2 = ufo_buffer_get_device_image (arg2, command_queue); cl_mem d_out = ufo_buffer_get_device_image (out, command_queue); cl_kernel kernel = ufo_resources_get_cached_kernel (resources, OPS_FILENAME, "op_mulRows", &error); if (error != NULL) { g_error ("Error: %s\n", error->message); return NULL; } g_mutex_lock (&mutex); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 0, sizeof(void *), (void *) &d_arg1)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 1, sizeof(void *), (void *) &d_arg2)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 2, sizeof(void *), (void *) &d_out)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 3, sizeof(unsigned int), (void *) &offset)); UfoRequisition operation_requisition = out_requisition; operation_requisition.dims[1] = n; UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (command_queue, kernel, operation_requisition.n_dims, NULL, operation_requisition.dims, NULL, 0, NULL, &event)); g_mutex_unlock (&mutex); return event; }
static cl_event operation2 (const gchar *kernel_name, UfoBuffer *arg1, UfoBuffer *arg2, gfloat modifier, UfoBuffer *out, UfoResources *resources, gpointer command_queue) { UfoRequisition arg1_requisition, arg2_requisition, out_requisition; cl_event event; GError *error = NULL; static GMutex mutex; ufo_buffer_get_requisition (arg1, &arg1_requisition); ufo_buffer_get_requisition (arg2, &arg2_requisition); ufo_buffer_get_requisition (out, &out_requisition); if ((arg1_requisition.dims[0] != arg2_requisition.dims[0] && arg1_requisition.dims[0] != out_requisition.dims[0]) || (arg1_requisition.dims[1] != arg2_requisition.dims[1] && arg1_requisition.dims[1] != out_requisition.dims[1])) { g_error ("Incorrect volume size."); return NULL; } cl_mem d_arg1 = ufo_buffer_get_device_image (arg1, command_queue); cl_mem d_arg2 = ufo_buffer_get_device_image (arg2, command_queue); cl_mem d_out = ufo_buffer_get_device_image (out, command_queue); cl_kernel kernel = ufo_resources_get_cached_kernel (resources, OPS_FILENAME, kernel_name, &error); if (error) { g_error ("%s\n", error->message); return NULL; } g_mutex_lock (&mutex); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg(kernel, 0, sizeof(void *), (void *) &d_arg1)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg(kernel, 1, sizeof(void *), (void *) &d_arg2)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg(kernel, 2, sizeof(gfloat), (void *) &modifier)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg(kernel, 3, sizeof(void *), (void *) &d_out)); UFO_RESOURCES_CHECK_CLERR (clEnqueueNDRangeKernel (command_queue, kernel, arg1_requisition.n_dims, NULL, arg1_requisition.dims, NULL, 0, NULL, &event)); g_mutex_unlock (&mutex); return event; }
static gboolean ufo_null_task_process (UfoTask *task, UfoBuffer **inputs, UfoBuffer *output, UfoRequisition *requisition) { UfoNullTaskPrivate *priv; priv = UFO_NULL_TASK_GET_PRIVATE (task); if (priv->force_download) { gfloat *host_array; host_array = ufo_buffer_get_host_array (inputs[0], NULL); host_array[0] = 0.0; } if (priv->finish) { UfoGpuNode *gpu; gpu = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); UFO_RESOURCES_CHECK_CLERR (clFinish (ufo_gpu_node_get_cmd_queue (gpu))); } return TRUE; }
static gboolean platform_vendor_has_prefix (cl_platform_id platform, const gchar *prefix) { gboolean has_prefix; gchar *str; gsize size; UFO_RESOURCES_CHECK_CLERR (clGetPlatformInfo (platform, CL_PLATFORM_VENDOR, 0, NULL, &size)); str = g_malloc0 (size); UFO_RESOURCES_CHECK_CLERR (clGetPlatformInfo (platform, CL_PLATFORM_VENDOR, size, str, NULL)); has_prefix = g_str_has_prefix (str, prefix); g_free (str); return has_prefix; }
static void ufo_gpu_node_finalize (GObject *object) { UfoGpuNodePrivate *priv; priv = UFO_GPU_NODE_GET_PRIVATE (object); if (priv->cmd_queue != NULL) { g_debug ("Release cmd_queue=%p", (gpointer) priv->cmd_queue); UFO_RESOURCES_CHECK_CLERR (clReleaseCommandQueue (priv->cmd_queue)); priv->cmd_queue = NULL; UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context)); } G_OBJECT_CLASS (ufo_gpu_node_parent_class)->finalize (object); }
UfoNode * ufo_gpu_node_new (gpointer context, gpointer device) { UfoGpuNode *node; cl_int errcode; cl_command_queue_properties queue_properties; g_return_val_if_fail (context != NULL && device != NULL, NULL); queue_properties = CL_QUEUE_PROFILING_ENABLE; node = UFO_GPU_NODE (g_object_new (UFO_TYPE_GPU_NODE, NULL)); node->priv->context = context; node->priv->device = device; node->priv->cmd_queue = clCreateCommandQueue (context, device, queue_properties, &errcode); UFO_RESOURCES_CHECK_CLERR (errcode); UFO_RESOURCES_CHECK_CLERR (clRetainContext (context)); return UFO_NODE (node); }
static cl_kernel create_kernel (UfoResourcesPrivate *priv, cl_program program, const gchar *kernel_name, GError **error) { cl_kernel kernel; gchar *name; cl_int errcode = CL_SUCCESS; if (kernel_name == NULL) { gchar *source; gsize size; UFO_RESOURCES_CHECK_CLERR (clGetProgramInfo (program, CL_PROGRAM_SOURCE, 0, NULL, &size)); source = g_malloc0 (size); UFO_RESOURCES_CHECK_CLERR (clGetProgramInfo (program, CL_PROGRAM_SOURCE, size, source, NULL)); name = get_first_kernel_name (source); g_free (source); } else { name = g_strdup (kernel_name); } kernel = clCreateKernel (program, name, &errcode); g_free (name); if (kernel == NULL || errcode != CL_SUCCESS) { g_set_error (error, UFO_RESOURCES_ERROR, UFO_RESOURCES_ERROR_CREATE_KERNEL, "Failed to create kernel `%s`: %s", kernel_name, ufo_resources_clerr (errcode)); return NULL; } priv->kernels = g_list_append (priv->kernels, kernel); return kernel; }
static void ufo_fftmult_task_setup (UfoTask *task, UfoResources *resources, GError **error) { UfoFftmultTaskPrivate *priv; priv = UFO_FFTMULT_TASK_GET_PRIVATE (task); priv->resources = resources; priv->k_fftmult = ufo_resources_get_kernel (resources, "fftmult.cl", "mult", error); if (priv->k_fftmult != NULL) UFO_RESOURCES_CHECK_CLERR (clRetainKernel (priv->k_fftmult)); }
static gboolean platform_has_gpus (cl_platform_id platform) { cl_uint n_devices = 0; cl_int err; err = clGetDeviceIDs (platform, CL_DEVICE_TYPE_GPU, 0, NULL, &n_devices); if (err != CL_DEVICE_NOT_FOUND) UFO_RESOURCES_CHECK_CLERR (err); return n_devices > 0; }
static cl_platform_id get_preferably_gpu_based_platform (void) { cl_platform_id *platforms; cl_uint n_platforms; cl_platform_id candidate = 0; UFO_RESOURCES_CHECK_CLERR (clGetPlatformIDs (0, NULL, &n_platforms)); platforms = g_malloc0 (n_platforms * sizeof (cl_platform_id)); UFO_RESOURCES_CHECK_CLERR (clGetPlatformIDs (n_platforms, platforms, NULL)); if (n_platforms > 0) candidate = platforms[0]; for (guint i = 0; i < n_platforms; i++) { if (platform_has_gpus (platforms[i])) { candidate = platforms[i]; break; } } g_free (platforms); return candidate; }
cl_int ufo_fft_update (UfoFft *fft, cl_context context, cl_command_queue queue, UfoFftParameter *param) { gboolean changed; cl_int error; error = CL_SUCCESS; changed = param->size[0] != fft->seen.size[0] || param->size[1] != fft->seen.size[1]; if (changed) memcpy (&fft->seen, param, sizeof (UfoFftParameter)); #ifdef HAVE_AMD if (fft->amd_plan == 0 || changed) { /* we use param->dimension to index into this array! */ clfftDim dimension[4] = { 0, CLFFT_1D, CLFFT_2D, CLFFT_3D }; if (fft->amd_plan != 0) { clfftDestroyPlan (&fft->amd_plan); fft->amd_plan = 0; } UFO_RESOURCES_CHECK_CLERR (clfftCreateDefaultPlan (&fft->amd_plan, context, dimension[param->dimensions], param->size)); UFO_RESOURCES_CHECK_CLERR (clfftSetPlanBatchSize (fft->amd_plan, param->batch)); UFO_RESOURCES_CHECK_CLERR (clfftSetPlanPrecision (fft->amd_plan, CLFFT_SINGLE)); UFO_RESOURCES_CHECK_CLERR (clfftSetLayout (fft->amd_plan, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED)); UFO_RESOURCES_CHECK_CLERR (clfftSetResultLocation (fft->amd_plan, param->zeropad ? CLFFT_INPLACE : CLFFT_OUTOFPLACE)); UFO_RESOURCES_CHECK_CLERR (clfftBakePlan (fft->amd_plan, 1, &queue, NULL, NULL)); } #else if (fft->apple_plan == NULL || changed) { clFFT_Dim3 size; /* we use param->dimension to index into this array! */ clFFT_Dimension dimension[4] = { 0, clFFT_1D, clFFT_2D, clFFT_3D }; size.x = param->size[0]; size.y = param->size[1]; size.z = param->size[2]; if (fft->apple_plan != NULL) { clFFT_DestroyPlan (fft->apple_plan); fft->apple_plan = NULL; } fft->apple_plan = clFFT_CreatePlan (context, size, dimension[param->dimensions], clFFT_InterleavedComplexFormat, &error); } #endif return error; }
UfoFft * ufo_fft_new (void) { UfoFft *fft; fft = g_malloc0 (sizeof (UfoFft)); #ifdef HAVE_AMD UFO_RESOURCES_CHECK_CLERR (clfftSetup (&fft->amd_setup)); g_mutex_lock (&amd_mutex); ffts_created = g_list_append (ffts_created, fft); g_mutex_unlock (&amd_mutex); g_debug ("INFO Create new plan using AMD FFT"); #else g_debug ("INFO Create new plan using Apple FFT"); #endif return fft; }
static void handle_get_num_devices (UfoDaemon *daemon) { UfoDaemonPrivate *priv = UFO_DAEMON_GET_PRIVATE (daemon); cl_context context; UfoMessage *msg = ufo_message_new (UFO_MESSAGE_ACK, sizeof (guint16)); cl_uint *num_devices = g_malloc (sizeof (cl_uint)); context = ufo_scheduler_get_context (priv->scheduler); UFO_RESOURCES_CHECK_CLERR (clGetContextInfo (context, CL_CONTEXT_NUM_DEVICES, sizeof (cl_uint), num_devices, NULL)); *(guint16 *) msg->data = (guint16) *num_devices; ufo_messenger_send_blocking (priv->msger, msg, 0); ufo_message_free (msg); }
static void handle_build_error (cl_program program, cl_device_id device, cl_int errcode, GError **error) { const gsize LOG_SIZE = 4096; gchar *log; g_set_error (error, UFO_RESOURCES_ERROR, UFO_RESOURCES_ERROR_BUILD_PROGRAM, "Failed to build OpenCL program: %s", ufo_resources_clerr (errcode)); log = g_malloc0 (LOG_SIZE * sizeof (char)); UFO_RESOURCES_CHECK_CLERR (clGetProgramBuildInfo (program, device, CL_PROGRAM_BUILD_LOG, LOG_SIZE, log, NULL)); g_print ("\n=== Build log ===%s\n\n", log); g_free (log); }
static void release_program (cl_program program) { UFO_RESOURCES_CHECK_CLERR (clReleaseProgram (program)); }
static void release_kernel (cl_kernel kernel) { UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (kernel)); }