/* This function prints the i'th block size multiplied by the i'th grid size, * where i (a parameter to this function) is one of the possible dimensions of * grid sizes and block sizes. * If the dimension of block sizes is not equal to the dimension of grid sizes * the output is calculated as follows: * * Suppose that: * block_sizes[dim1] is the list of blocks sizes and it contains dim1 elements. * grid_sizes[dim2] is the list of grid sizes and it contains dim2 elements. * * The output is: * If (i > dim2) then the output is block_sizes[i] * If (i > dim1) then the output is grid_sizes[i] */ static __isl_give isl_printer *opencl_print_total_number_of_work_items_for_dim( __isl_take isl_printer *p, struct ppcg_kernel *kernel, int i) { int grid_dim, block_dim; isl_pw_aff *bound_grid; grid_dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set); block_dim = kernel->n_block; if (i < min(grid_dim, block_dim)) { bound_grid = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i); p = isl_printer_print_str(p, "("); p = isl_printer_print_pw_aff(p, bound_grid); p = isl_printer_print_str(p, ") * "); p = isl_printer_print_int(p, kernel->block_dim[i]); isl_pw_aff_free(bound_grid); } else if (i >= grid_dim) p = isl_printer_print_int(p, kernel->block_dim[i]); else { bound_grid = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i); p = isl_printer_print_pw_aff(p, bound_grid); isl_pw_aff_free(bound_grid); } return p; }
/* Print a list of iterators of type "type" with names "ids" to "p". * Each iterator is assigned the corresponding opencl identifier returned * by the function "opencl_id". * Unlike the equivalent function in the CUDA backend which prints iterators * in reverse order to promote coalescing, this function does not print * iterators in reverse order. The OpenCL backend currently does not take * into account any coalescing considerations. */ static __isl_give isl_printer *print_iterators(__isl_take isl_printer *p, const char *type, __isl_keep isl_id_list *ids, const char *opencl_id) { int i, n; n = isl_id_list_n_id(ids); if (n <= 0) return p; p = isl_printer_start_line(p); p = isl_printer_print_str(p, type); p = isl_printer_print_str(p, " "); for (i = 0; i < n; ++i) { isl_id *id; if (i) p = isl_printer_print_str(p, ", "); id = isl_id_list_get_id(ids, i); p = isl_printer_print_id(p, id); isl_id_free(id); p = isl_printer_print_str(p, " = "); p = isl_printer_print_str(p, opencl_id); p = isl_printer_print_str(p, "("); p = isl_printer_print_int(p, i); p = isl_printer_print_str(p, ")"); } p = isl_printer_print_str(p, ";"); p = isl_printer_end_line(p); return p; }
/* Print the header of the given kernel. */ static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p, struct gpu_prog *prog, struct ppcg_kernel *kernel) { p = isl_printer_start_line(p); p = isl_printer_print_str(p, "__global__ void kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "("); p = print_kernel_arguments(p, prog, kernel, 1); p = isl_printer_print_str(p, ")"); return p; }
/* Print the grid definition. */ static __isl_give isl_printer *print_grid(__isl_take isl_printer *p, struct ppcg_kernel *kernel) { p = isl_printer_start_line(p); p = isl_printer_print_str(p, "dim3 k"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "_dimGrid"); p = print_grid_size(p, kernel); p = isl_printer_print_str(p, ";"); p = isl_printer_end_line(p); return p; }
/* Print a call to the OpenCL clSetKernelArg() function which sets * the arguments of the kernel. arg_name and arg_index are the name and the * index of the kernel argument. The index of the leftmost argument of * the kernel is 0 whereas the index of the rightmost argument of the kernel * is n - 1, where n is the total number of the kernel arguments. * read_only_scalar is a boolean that indicates whether the argument is a read * only scalar. */ static __isl_give isl_printer *opencl_set_kernel_argument( __isl_take isl_printer *p, int kernel_id, const char *arg_name, int arg_index, int read_only_scalar) { p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(clSetKernelArg(kernel"); p = isl_printer_print_int(p, kernel_id); p = isl_printer_print_str(p, ", "); p = isl_printer_print_int(p, arg_index); p = isl_printer_print_str(p, ", sizeof("); if (read_only_scalar) { p = isl_printer_print_str(p, arg_name); p = isl_printer_print_str(p, "), &"); } else p = isl_printer_print_str(p, "cl_mem), (void *) &dev_"); p = isl_printer_print_str(p, arg_name); p = isl_printer_print_str(p, "));"); p = isl_printer_end_line(p); return p; }
/* Print the block sizes as a list of the sizes in each * dimension. */ static __isl_give isl_printer *opencl_print_block_sizes( __isl_take isl_printer *p, struct ppcg_kernel *kernel) { int i; if (kernel->n_block > 0) for (i = 0; i < kernel->n_block; ++i) { if (i) p = isl_printer_print_str(p, ", "); p = isl_printer_print_int(p, kernel->block_dim[i]); } else p = isl_printer_print_str(p, "1"); return p; }
/* Print the user statement of the host code to "p". * * The host code may contain original user statements, kernel launches, * statements that copy data to/from the device and statements * the initialize or clear the device. * The original user statements and the kernel launches have * an associated annotation, while the other statements do not. * The latter are handled by print_device_node. * The annotation on the user statements is called "user". * * In case of a kernel launch, print a block of statements that * defines the grid and the block and then launches the kernel. */ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p, __isl_take isl_ast_print_options *print_options, __isl_keep isl_ast_node *node, void *user) { isl_id *id; int is_user; struct ppcg_kernel *kernel; struct ppcg_kernel_stmt *stmt; struct print_host_user_data *data; isl_ast_print_options_free(print_options); data = (struct print_host_user_data *) user; id = isl_ast_node_get_annotation(node); if (!id) { //p = isl_printer_print_str(p,"marker_NO_ID_CASE"); return print_device_node(p, node, data->prog); } is_user = !strcmp(isl_id_get_name(id), "user"); kernel = is_user ? NULL : isl_id_get_user(id); stmt = is_user ? isl_id_get_user(id) : NULL; isl_id_free(id); if (is_user) return ppcg_kernel_print_domain(p, stmt); p = ppcg_start_block(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "dim3 k"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "_dimBlock"); print_reverse_list(isl_printer_get_file(p), kernel->n_block, kernel->block_dim); p = isl_printer_print_str(p, ";"); p = isl_printer_end_line(p); p = print_grid(p, kernel); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, " <<<k"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "_dimGrid, k"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "_dimBlock>>> ("); p = print_kernel_arguments(p, data->prog, kernel, 0); p = isl_printer_print_str(p, ");"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cudaCheckKernel();"); p = isl_printer_end_line(p); p = ppcg_end_block(p); p = isl_printer_start_line(p); p = isl_printer_end_line(p); p = copy_data_from_device_to_device(p,kernel); printf("printing kernel"); print_kernel(data->prog, kernel, data->cuda); printf("printing kernel done"); return p; }
/* Print the user statement of the host code to "p". * * The host code may contain original user statements, kernel launches and * statements that copy data to/from the device. * The original user statements and the kernel launches have * an associated annotation, while the data copy statements do not. * The latter are handled by print_to_from_device. * The annotation on the user statements is called "user". * * In case of a kernel launch, print a block of statements that * defines the grid and the work group and then launches the kernel. * * A grid is composed of many work groups (blocks), each work group holds * many work-items (threads). * * global_work_size[kernel->n_block] represents the total number of work * items. It points to an array of kernel->n_block unsigned * values that describe the total number of work-items that will execute * the kernel. The total number of work-items is computed as: * global_work_size[0] *...* global_work_size[kernel->n_block - 1]. * * The size of each work group (i.e. the number of work-items in each work * group) is described using block_size[kernel->n_block]. The total * number of work-items in a block (work-group) is computed as: * block_size[0] *... * block_size[kernel->n_block - 1]. * * For more information check: * http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html */ static __isl_give isl_printer *opencl_print_host_user( __isl_take isl_printer *p, __isl_take isl_ast_print_options *print_options, __isl_keep isl_ast_node *node, void *user) { isl_id *id; int is_user; struct ppcg_kernel *kernel; struct ppcg_kernel_stmt *stmt; struct print_host_user_data_opencl *data; isl_ast_print_options_free(print_options); data = (struct print_host_user_data_opencl *) user; id = isl_ast_node_get_annotation(node); if (!id) return print_to_from_device(p, node, data->prog); is_user = !strcmp(isl_id_get_name(id), "user"); kernel = is_user ? NULL : isl_id_get_user(id); stmt = is_user ? isl_id_get_user(id) : NULL; isl_id_free(id); if (is_user) return ppcg_kernel_print_domain(p, stmt); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "{"); p = isl_printer_end_line(p); p = isl_printer_indent(p, 2); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "size_t global_work_size["); if (kernel->n_block > 0) p = isl_printer_print_int(p, kernel->n_block); else p = isl_printer_print_int(p, 1); p = isl_printer_print_str(p, "] = {"); p = opencl_print_total_number_of_work_items_as_list(p, kernel); p = isl_printer_print_str(p, "};"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "size_t block_size["); if (kernel->n_block > 0) p = isl_printer_print_int(p, kernel->n_block); else p = isl_printer_print_int(p, 1); p = isl_printer_print_str(p, "] = {"); p = opencl_print_block_sizes(p, kernel); p = isl_printer_print_str(p, "};"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_kernel kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, " = clCreateKernel(program, \"kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "\", &err);"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(err);"); p = isl_printer_end_line(p); opencl_set_kernel_arguments(p, data->prog, kernel); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(clEnqueueNDRangeKernel" "(queue, kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, ", "); if (kernel->n_block > 0) p = isl_printer_print_int(p, kernel->n_block); else p = isl_printer_print_int(p, 1); p = isl_printer_print_str(p, ", NULL, global_work_size, " "block_size, " "0, NULL, NULL));"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(" "clReleaseKernel(kernel"); p = isl_printer_print_int(p, kernel->id); p = isl_printer_print_str(p, "));"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "clFinish(queue);"); p = isl_printer_end_line(p); p = isl_printer_indent(p, -2); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "}"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_end_line(p); data->opencl->kprinter = opencl_print_kernel(data->prog, kernel, data->opencl->kprinter); return p; }
/* Create an OpenCL device, context, command queue and build the kernel. * input is the name of the input file provided to ppcg. */ static __isl_give isl_printer *opencl_setup(__isl_take isl_printer *p, const char *input, struct opencl_info *info) { p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_device_id device;"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_context context;"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_program program;"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_command_queue queue;"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "cl_int err;"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "device = opencl_create_device("); p = isl_printer_print_int(p, info->options->opencl_use_gpu); p = isl_printer_print_str(p, ");"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "context = clCreateContext(NULL, 1, " "&device, NULL, NULL, &err);"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(err);"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "queue = clCreateCommandQueue" "(context, device, 0, &err);"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "openclCheckReturn(err);"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_print_str(p, "program = "); if (info->options->opencl_embed_kernel_code) { p = isl_printer_print_str(p, "opencl_build_program_from_string(" "context, device, kernel_code, " "sizeof(kernel_code), \""); } else { p = isl_printer_print_str(p, "opencl_build_program_from_file(" "context, device, \""); p = isl_printer_print_str(p, info->kernel_c_name); p = isl_printer_print_str(p, "\", \""); } if (info->options->opencl_compiler_options) p = isl_printer_print_str(p, info->options->opencl_compiler_options); p = isl_printer_print_str(p, "\");"); p = isl_printer_end_line(p); p = isl_printer_start_line(p); p = isl_printer_end_line(p); return p; }