int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, char *kernel_name, int devid) { int err; cl_device_id device; cl_context context; cl_program program; starpu_opencl_get_device(devid, &device); starpu_opencl_get_context(devid, &context); starpu_opencl_get_queue(devid, queue); program = opencl_programs->programs[devid]; if (!program) { _STARPU_DISP("Program not available\n"); return CL_INVALID_PROGRAM; } // Create the compute kernel in the program we wish to run *kernel = clCreateKernel(program, kernel_name, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); return CL_SUCCESS; }
void soclEnqueueNDRangeKernel_task(void *descr[], void *args) { command_ndrange_kernel cmd = (command_ndrange_kernel)args; cl_command_queue cq; int wid; cl_int err; cl_event ev = command_event_get(cmd); ev->prof_start = _socl_nanotime(); gc_entity_release(ev); wid = starpu_worker_get_id(); starpu_opencl_get_queue(wid, &cq); DEBUG_MSG("[worker %d] [kernel %d] Executing kernel...\n", wid, cmd->kernel->id); int range = starpu_worker_get_range(); /* Set arguments */ { unsigned int i; int buf = 0; for (i=0; i<cmd->num_args; i++) { switch (cmd->arg_types[i]) { case Null: err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], NULL); break; case Buffer: { cl_mem mem; mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[buf]); err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], &mem); buf++; } break; case Immediate: err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], cmd->args[i]); break; } if (err != CL_SUCCESS) { DEBUG_CL("clSetKernelArg", err); DEBUG_ERROR("Aborting\n"); } } } /* Calling Kernel */ cl_event event; err = clEnqueueNDRangeKernel(cq, cmd->kernel->cl_kernels[range], cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size, 0, NULL, &event); if (err != CL_SUCCESS) { ERROR_MSG("Worker[%d] Unable to Enqueue kernel (error %d)\n", wid, err); DEBUG_CL("clEnqueueNDRangeKernel", err); DEBUG_MSG("Workdim %d, global_work_offset %p, global_work_size %p, local_work_size %p\n", cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size); DEBUG_MSG("Global work size: %ld %ld %ld\n", cmd->global_work_size[0], (cmd->work_dim > 1 ? cmd->global_work_size[1] : 1), (cmd->work_dim > 2 ? cmd->global_work_size[2] : 1)); if (cmd->local_work_size != NULL) DEBUG_MSG("Local work size: %ld %ld %ld\n", cmd->local_work_size[0], (cmd->work_dim > 1 ? cmd->local_work_size[1] : 1), (cmd->work_dim > 2 ? cmd->local_work_size[2] : 1)); } else { /* Waiting for kernel to terminate */ clWaitForEvents(1, &event); clReleaseEvent(event); } }