static __isl_give isl_printer *opencl_print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel, __isl_take isl_printer *p) { isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree); isl_ast_print_options *print_options; print_options = isl_ast_print_options_alloc(ctx); print_options = isl_ast_print_options_set_print_user(print_options, &opencl_print_kernel_stmt, NULL); p = isl_printer_set_output_format(p, ISL_FORMAT_C); p = opencl_print_kernel_header(p, prog, kernel); p = isl_printer_print_str(p, "{"); p = isl_printer_end_line(p); p = isl_printer_indent(p, 4); p = opencl_print_kernel_iterators(p, kernel); p = opencl_print_kernel_vars(p, kernel); p = isl_printer_end_line(p); p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p); p = ppcg_print_macros(p, kernel->tree); p = isl_ast_node_print(kernel->tree, p, print_options); p = isl_printer_indent(p, -4); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "}"); p = isl_printer_end_line(p); return p; }
static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel, struct cuda_info *cuda) { isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree); isl_ast_print_options *print_options; isl_printer *p; print_kernel_headers(prog, kernel, cuda); fprintf(cuda->kernel_c, "{\n"); print_kernel_iterators(cuda->kernel_c, kernel); p = isl_printer_to_file(ctx, cuda->kernel_c); p = isl_printer_set_output_format(p, ISL_FORMAT_C); p = isl_printer_indent(p, 4); p = print_kernel_vars(p, kernel); p = isl_printer_end_line(p); p = ppcg_set_macro_names(p); p = gpu_print_macros(p, kernel->tree); //p = print_surface_read_to_temp(p, kernel); print_options = isl_ast_print_options_alloc(ctx); print_options = isl_ast_print_options_set_print_user(print_options, &print_kernel_stmt, NULL); p = isl_ast_node_print(kernel->tree, p, print_options); isl_printer_free(p); fprintf(cuda->kernel_c, "}\n"); }
/* Print the user statement of the host code to "p". * * The host code may contain original user statements, kernel launches and * statements that copy data to/from the device. * The original user statements and the kernel launches have * an associated annotation, while the data copy statements do not. * The latter are handled by print_to_from_device. * The annotation on the user statements is called "user". * * In case of a kernel launch, print a block of statements that * defines the grid and the work group and then launches the kernel. * * A grid is composed of many work groups (blocks), each work group holds * many work-items (threads). * * global_work_size[kernel->n_block] represents the total number of work * items. It points to an array of kernel->n_block unsigned * values that describe the total number of work-items that will execute * the kernel. The total number of work-items is computed as: * global_work_size[0] *...* global_work_size[kernel->n_block - 1]. * * The size of each work group (i.e. the number of work-items in each work * group) is described using block_size[kernel->n_block]. The total * number of work-items in a block (work-group) is computed as: * block_size[0] *... * block_size[kernel->n_block - 1]. * * For more information check: * http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html */ static __isl_give isl_printer *opencl_print_host_user( __isl_take isl_printer *p, __isl_take isl_ast_print_options *print_options, __isl_keep isl_ast_node *node, void *user) { isl_id *id; int is_user; struct ppcg_kernel *kernel; struct ppcg_kernel_stmt *stmt; struct print_host_user_data_opencl *data; isl_ast_print_options_free(print_options); data = (struct print_host_user_data_opencl *) user; id = isl_ast_node_get_annotation(node); if (!id) return print_to_from_device(p, node, data->prog); is_user = !strcmp(isl_id_get_name(id), "user"); kernel = is_user ? NULL : isl_id_get_user(id); stmt = is_user ? isl_id_get_user(id) : NULL; isl_id_free(id); if (is_user) return ppcg_kernel_print_domain(p, stmt); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "{"); p = isl_printer_end_line(p); p = isl_printer_indent(p, 2); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "size_t global_work_size["); if (kernel->n_block > 0) p = isl_printer_print_int(p, kernel->n_block); else p = isl_printer_print_int(p, 1); p = isl_printer_print_str(p, "] = {"); p = opencl_print_total_number_of_work_items_as_list(p, kernel); p = isl_printer_print_str(p, "};"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "size_t block_size["); if (kernel->n_block > 0) p = isl_printer_print_int(p, kernel->n_block); else p = isl_printer_print_int(p, 1); p = isl_printer_print_str(p, "] = {"); p = opencl_print_block_sizes(p, kernel); p = isl_printer_print_str(p, "};"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_kernel kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, " = clCreateKernel(program, \"kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "\", &err);"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(err);"); p = isl_printer_end_line(p); opencl_set_kernel_arguments(p, data->prog, kernel); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(clEnqueueNDRangeKernel" "(queue, kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, ", "); if (kernel->n_block > 0) p = isl_printer_print_int(p, kernel->n_block); else p = isl_printer_print_int(p, 1); p = isl_printer_print_str(p, ", NULL, global_work_size, " "block_size, " "0, NULL, NULL));"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(" "clReleaseKernel(kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "));"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "clFinish(queue);"); p = isl_printer_end_line(p); p = isl_printer_indent(p, -2); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "}"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_end_line(p); data->opencl->kprinter = opencl_print_kernel(data->prog, kernel, data->opencl->kprinter); return p; }