void opencl_codelet(void *descr[], void *_args) { cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); cl_kernel kernel; cl_command_queue queue; cl_event event; int id, devid, err; id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "incrementer", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=4; size_t local, s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); starpu_opencl_release_kernel(kernel); } }
void axpy_opencl(void *buffers[], void *_args) { TYPE *alpha = _args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_axpy_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(x), &x); err|= clSetKernelArg(kernel, 1, sizeof(y), &y); err|= clSetKernelArg(kernel, 2, sizeof(n), &n); err|= clSetKernelArg(kernel, 3, sizeof(*alpha), alpha); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); }
void redux_opencl_func(void *buffers[], void *args) { int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; cl_mem dota = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]); cl_mem dotb = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[1]); id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_redux_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(dota), &dota); err|= clSetKernelArg(kernel, 1, sizeof(dotb), &dotb); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); }
int starpu_opencl_load_opencl_from_string(char *opencl_program_source, struct starpu_opencl_program *opencl_programs) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); opencl_programs->programs[dev] = NULL; if (context == NULL) continue; // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) &opencl_program_source, NULL, &err); if (!program || err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); // Build the program executable err = clBuildProgram(program, 1, &device, "-Werror -cl-mad-enable", NULL, NULL); if (err != CL_SUCCESS) { size_t len; static char buffer[4096]; _STARPU_DISP("Error: Failed to build program executable!\n"); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); _STARPU_DISP("<%s>\n", buffer); return EXIT_FAILURE; } // Store program opencl_programs->programs[dev] = program; } return EXIT_SUCCESS; }
int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, char *kernel_name, int devid) { int err; cl_device_id device; cl_context context; cl_program program; starpu_opencl_get_device(devid, &device); starpu_opencl_get_context(devid, &context); starpu_opencl_get_queue(devid, queue); program = opencl_programs->programs[devid]; if (!program) { _STARPU_DISP("Program not available\n"); return CL_INVALID_PROGRAM; } // Create the compute kernel in the program we wish to run *kernel = clCreateKernel(program, kernel_name, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); return CL_SUCCESS; }
void cpu_to_opencl_opencl_func(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; unsigned n = CUSTOM_GET_NX(buffers[0]); n*=2; struct point *aop; aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_conversion_program, "custom_opencl_conversion", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); void *x = CUSTOM_GET_OPENCL_X_PTR(buffers[0]); if (starpu_opencl_set_kernel_args(&err, &kernel, sizeof(aop), &aop, sizeof(x), &x, sizeof(n), &n, 0) != 3) { STARPU_OPENCL_REPORT_ERROR(err); assert(0); } { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; err = clEnqueueNDRangeKernel( queue, kernel, 1, /* work_dim */ NULL, /* global_work_offset */ &global, /* global_work_size */ &local, /* local_work_size */ 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); }
int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; char *binary; char binary_file_name[1024]; size_t length; cl_int binary_status; opencl_programs->programs[dev] = NULL; starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); if (context == NULL) { _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); continue; } // Load the binary buffer err = _starpu_opencl_get_binary_name(binary_file_name, 1024, kernel_id, dev, device); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); binary = _starpu_opencl_load_program_binary(binary_file_name, &length); // Create the compute program from the binary buffer program = clCreateProgramWithBinary(context, 1, &device, &length, (const unsigned char **) &binary, &binary_status, &err); if (!program || err != CL_SUCCESS) { _STARPU_DISP("Error: Failed to load program binary!\n"); return EXIT_FAILURE; } // Build the program executable err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // Get the status { cl_build_status status; size_t len; static char buffer[4096] = ""; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); if (len > 2) _STARPU_DISP("Compilation output\n%s\n", buffer); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) { _STARPU_DISP("Error: Failed to build program executable!\n"); _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); return EXIT_FAILURE; } } // Store program opencl_programs->programs[dev] = program; } return 0; }
static int _starpu_opencl_compile_or_load_opencl_from_string(const char *opencl_program_source, const char* build_options, struct starpu_opencl_program *opencl_programs, const char* source_file_name) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; if (opencl_programs) opencl_programs->programs[dev] = NULL; starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); if (context == NULL) { _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); continue; } // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) &opencl_program_source, NULL, &err); if (!program || err != CL_SUCCESS) { _STARPU_DISP("Error: Failed to load program source with options %s!\n", build_options); return EXIT_FAILURE; } // Build the program executable err = clBuildProgram(program, 1, &device, build_options, NULL, NULL); // Get the status { cl_build_status status; size_t len; static char buffer[4096] = ""; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); if (len > 2) _STARPU_DISP("Compilation output\n%s\n", buffer); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) { _STARPU_DISP("Error: Failed to build program executable!\n"); _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); return EXIT_FAILURE; } } // Store program if (opencl_programs) opencl_programs->programs[dev] = program; else { char binary_file_name[1024]; char *binary; size_t binary_len; FILE *fh; err = _starpu_opencl_get_binary_name(binary_file_name, 1024, source_file_name, dev, device); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_len, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); binary = malloc(binary_len); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); fh = fopen(binary_file_name, "w"); if (fh == NULL) { _STARPU_DISP("Error: Failed to open file <%s>\n", binary_file_name); perror("fopen"); return EXIT_FAILURE; } fwrite(binary, binary_len, 1, fh); fclose(fh); free(binary); _STARPU_DEBUG("File <%s> created\n", binary_file_name); } } return EXIT_SUCCESS; }
void cpu_to_opencl_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; (void) args; int id, devid, ret; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_conversion_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_conversion_program, "cpu_to_opencl_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(src), &src); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(dst), &dst); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(n), &n); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_conversion_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); }
void multiformat_scal_opencl_func(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); cl_mem val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "multiformat_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); }
void test_variable_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); cl_context context; id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &variable_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "variable_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(fail), &fail); if (err) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(factor), &factor); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global = 1; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &variable_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); return; }