示例#1
0
/* Print a statement for copying an array to or from the device,
 * or for initializing or clearing the device.
 * The statement identifier of a copying node is called
 * "to_device_<array name>" or "from_device_<array name>" and
 * its user pointer points to the gpu_array_info of the array
 * that needs to be copied.
 * The node for initializing the device is called "init_device".
 * The node for clearing the device is called "clear_device".
 *
 * Extract the array (if any) from the identifier and call
 * init_device, clear_device, copy_array_to_device or copy_array_from_device.
 */
static __isl_give isl_printer *print_device_node(__isl_take isl_printer *p,
	__isl_keep isl_ast_node *node, struct gpu_prog *prog)
{
	isl_ast_expr *expr, *arg;
	isl_id *id;
	const char *name;
	struct gpu_array_info *array;

	expr = isl_ast_node_user_get_expr(node);
	arg = isl_ast_expr_get_op_arg(expr, 0);
	id = isl_ast_expr_get_id(arg);
	name = isl_id_get_name(id);
	array = isl_id_get_user(id);
	isl_id_free(id);
	isl_ast_expr_free(arg);
	isl_ast_expr_free(expr);

	if (!name)
		return isl_printer_free(p);
	if (!strcmp(name, "init_device"))
		return init_device(p, prog);
	if (!strcmp(name, "clear_device"))
		return clear_device(p, prog);
	if (!array)
		return isl_printer_free(p);

	if (!prefixcmp(name, "to_device"))
		return copy_array_to_device(p, array);
	else
		return copy_array_from_device(p, array);
}
示例#2
0
/* This function is called for each user statement in the AST,
 * i.e., for each kernel body statement, copy statement or sync statement.
 */
static __isl_give isl_printer *opencl_print_kernel_stmt(
	__isl_take isl_printer *p,
	__isl_take isl_ast_print_options *print_options,
	__isl_keep isl_ast_node *node, void *user)
{
	isl_id *id;
	struct ppcg_kernel_stmt *stmt;

	id = isl_ast_node_get_annotation(node);
	stmt = isl_id_get_user(id);
	isl_id_free(id);

	isl_ast_print_options_free(print_options);

	switch (stmt->type) {
	case ppcg_kernel_copy:
		return ppcg_kernel_print_copy(p, stmt);
	case ppcg_kernel_sync:
		return opencl_print_sync(p, stmt);
	case ppcg_kernel_domain:
		return print_opencl_kernel_domain(p, stmt);
	}

	return p;
}
示例#3
0
/* Print a list of iterators of type "type" with names "ids" to "p".
 * Each iterator is assigned the corresponding opencl identifier returned
 * by the function "opencl_id".
 * Unlike the equivalent function in the CUDA backend which prints iterators
 * in reverse order to promote coalescing, this function does not print
 * iterators in reverse order.  The OpenCL backend currently does not take
 * into account any coalescing considerations.
 */
static __isl_give isl_printer *print_iterators(__isl_take isl_printer *p,
	const char *type, __isl_keep isl_id_list *ids, const char *opencl_id)
{
	int i, n;

	n = isl_id_list_n_id(ids);
	if (n <= 0)
		return p;
	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, type);
	p = isl_printer_print_str(p, " ");
	for (i = 0; i < n; ++i) {
		isl_id *id;

		if (i)
			p = isl_printer_print_str(p, ", ");
		id = isl_id_list_get_id(ids, i);
		p = isl_printer_print_id(p, id);
		isl_id_free(id);
		p = isl_printer_print_str(p, " = ");
		p = isl_printer_print_str(p, opencl_id);
		p = isl_printer_print_str(p, "(");
		p = isl_printer_print_int(p, i);
		p = isl_printer_print_str(p, ")");
	}
	p = isl_printer_print_str(p, ";");
	p = isl_printer_end_line(p);

	return p;
}
示例#4
0
文件: Cloog.cpp 项目: CIB/polly
CloogInput *Cloog::buildCloogInput() {
  // XXX: We do not copy the context of the scop, but use an unconstrained
  //      context. This 'hack' is necessary as the context may contain bounds
  //      on parameters such as [n] -> {:0 <= n < 2^32}. Those large
  //      integers will cause CLooG to construct a clast that contains
  //      expressions that include these large integers. Such expressions can
  //      possibly not be evaluated correctly with i64 types. The cloog
  //      based code generation backend, however, can not derive types
  //      automatically and just assumes i64 types. Hence, it will break or
  //      generate incorrect code.
  //      This hack does not remove all possibilities of incorrectly generated
  //      code, but it is ensures that for most problems the problems do not
  //      show up. The correct solution, will be to automatically derive the
  //      minimal types for each expression. This could be added to CLooG and it
  //      will be available in the isl based code generation.
  isl_set *EmptyContext = isl_set_universe(S->getParamSpace());
  CloogDomain *Context = cloog_domain_from_isl_set(EmptyContext);
  CloogUnionDomain *Statements = buildCloogUnionDomain();

  isl_set *ScopContext = S->getContext();

  for (unsigned i = 0; i < isl_set_dim(ScopContext, isl_dim_param); i++) {
    isl_id *id = isl_set_get_dim_id(ScopContext, isl_dim_param, i);
    Statements = cloog_union_domain_set_name(Statements, CLOOG_PARAM, i,
                                             isl_id_get_name(id));
    isl_id_free(id);
  }

  isl_set_free(ScopContext);

  CloogInput *Input = cloog_input_alloc(Context, Statements);
  return Input;
}
示例#5
0
expression_ptr cpp_from_isl::process_expr(isl_ast_expr * ast_expr)
{
    expression_ptr expr;

    auto type = isl_ast_expr_get_type(ast_expr);

    switch(type)
    {
    case isl_ast_expr_op:
    {
        expr = process_op(ast_expr);
        break;
    }
    case isl_ast_expr_id:
    {
        auto id = isl_ast_expr_get_id(ast_expr);
        string name(isl_id_get_name(id));
        isl_id_free(id);

        if (m_id_func)
            expr = m_id_func(name);
        if (!expr)
            expr = make_shared<id_expression>(name);
        break;
    }
    case isl_ast_expr_int:
    {
        auto val = isl_ast_expr_get_val(ast_expr);
        if (isl_val_is_int(val) != isl_bool_true)
            throw error("Value is not an integer.");
        int ival = isl_val_get_num_si(val);
        isl_val_free(val);
        expr = literal(ival);
        break;
    }
    default:
        throw error("Unexpected AST expression type.");
    }

    return expr;
}
示例#6
0
/* Print a list of iterators of type "type" with names "ids" to "out".
 * Each iterator is assigned one of the cuda identifiers in cuda_dims.
 * In particular, the last iterator is assigned the x identifier
 * (the first in the list of cuda identifiers).
 */
static void print_iterators(FILE *out, const char *type,
	__isl_keep isl_id_list *ids, const char *cuda_dims[])
{
	int i, n;

	n = isl_id_list_n_id(ids);
	if (n <= 0)
		return;
	print_indent(out, 4);
	fprintf(out, "%s ", type);
	for (i = 0; i < n; ++i) {
		isl_id *id;

		if (i)
			fprintf(out, ", ");
		id = isl_id_list_get_id(ids, i);
		fprintf(out, "%s = %s", isl_id_get_name(id),
			cuda_dims[n - 1 - i]);
		isl_id_free(id);
	}
	fprintf(out, ";\n");
}
示例#7
0
/* Print the user statement of the host code to "p".
 *
 * The host code may contain original user statements, kernel launches,
 * statements that copy data to/from the device and statements
 * the initialize or clear the device.
 * The original user statements and the kernel launches have
 * an associated annotation, while the other statements do not.
 * The latter are handled by print_device_node.
 * The annotation on the user statements is called "user".
 *
 * In case of a kernel launch, print a block of statements that
 * defines the grid and the block and then launches the kernel.
 */
static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
	__isl_take isl_ast_print_options *print_options,
	__isl_keep isl_ast_node *node, void *user)
{
	isl_id *id;
	int is_user;
	struct ppcg_kernel *kernel;
	struct ppcg_kernel_stmt *stmt;
	struct print_host_user_data *data;

	isl_ast_print_options_free(print_options);

	data = (struct print_host_user_data *) user;

	id = isl_ast_node_get_annotation(node);
	if (!id)
	{
		//p = isl_printer_print_str(p,"marker_NO_ID_CASE");
		return print_device_node(p, node, data->prog);
	}	
	is_user = !strcmp(isl_id_get_name(id), "user");
	kernel = is_user ? NULL : isl_id_get_user(id);
	stmt = is_user ? isl_id_get_user(id) : NULL;
	isl_id_free(id);

	if (is_user)
		return ppcg_kernel_print_domain(p, stmt);

	p = ppcg_start_block(p);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "dim3 k");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, "_dimBlock");
	print_reverse_list(isl_printer_get_file(p),
				kernel->n_block, kernel->block_dim);
	p = isl_printer_print_str(p, ";");
	p = isl_printer_end_line(p);

	p = print_grid(p, kernel);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "kernel");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, " <<<k");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, "_dimGrid, k");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, "_dimBlock>>> (");
	p = print_kernel_arguments(p, data->prog, kernel, 0);
	p = isl_printer_print_str(p, ");");
	p = isl_printer_end_line(p);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "cudaCheckKernel();");
	p = isl_printer_end_line(p);

	p = ppcg_end_block(p);

	p = isl_printer_start_line(p);
	p = isl_printer_end_line(p);

	p = copy_data_from_device_to_device(p,kernel); 

	printf("printing kernel");

	print_kernel(data->prog, kernel, data->cuda);

	printf("printing kernel done");

	return p;
}
示例#8
0
/* Print the user statement of the host code to "p".
 *
 * The host code may contain original user statements, kernel launches and
 * statements that copy data to/from the device.
 * The original user statements and the kernel launches have
 * an associated annotation, while the data copy statements do not.
 * The latter are handled by print_to_from_device.
 * The annotation on the user statements is called "user".
 *
 * In case of a kernel launch, print a block of statements that
 * defines the grid and the work group and then launches the kernel.
 *
 * A grid is composed of many work groups (blocks), each work group holds
 * many work-items (threads).
 *
 * global_work_size[kernel->n_block] represents the total number of work
 * items.  It points to an array of kernel->n_block unsigned
 * values that describe the total number of work-items that will execute
 * the kernel.  The total number of work-items is computed as:
 * global_work_size[0] *...* global_work_size[kernel->n_block - 1].
 *
 * The size of each work group (i.e. the number of work-items in each work
 * group) is described using block_size[kernel->n_block].  The total
 * number of work-items in a block (work-group) is computed as:
 * block_size[0] *... * block_size[kernel->n_block - 1].
 *
 * For more information check:
 * http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html
 */
static __isl_give isl_printer *opencl_print_host_user(
	__isl_take isl_printer *p,
	__isl_take isl_ast_print_options *print_options,
	__isl_keep isl_ast_node *node, void *user)
{
	isl_id *id;
	int is_user;
	struct ppcg_kernel *kernel;
	struct ppcg_kernel_stmt *stmt;
	struct print_host_user_data_opencl *data;

	isl_ast_print_options_free(print_options);

	data = (struct print_host_user_data_opencl *) user;

	id = isl_ast_node_get_annotation(node);
	if (!id)
		return print_to_from_device(p, node, data->prog);

	is_user = !strcmp(isl_id_get_name(id), "user");
	kernel = is_user ? NULL : isl_id_get_user(id);
	stmt = is_user ? isl_id_get_user(id) : NULL;
	isl_id_free(id);

	if (is_user)
		return ppcg_kernel_print_domain(p, stmt);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "{");
	p = isl_printer_end_line(p);
	p = isl_printer_indent(p, 2);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "size_t global_work_size[");

	if (kernel->n_block > 0)
		p = isl_printer_print_int(p, kernel->n_block);
	else
		p = isl_printer_print_int(p, 1);

	p = isl_printer_print_str(p, "] = {");
	p = opencl_print_total_number_of_work_items_as_list(p, kernel);
	p = isl_printer_print_str(p, "};");
	p = isl_printer_end_line(p);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "size_t block_size[");

	if (kernel->n_block > 0)
		p = isl_printer_print_int(p, kernel->n_block);
	else
		p = isl_printer_print_int(p, 1);

	p = isl_printer_print_str(p, "] = {");
	p = opencl_print_block_sizes(p, kernel);
	p = isl_printer_print_str(p, "};");
	p = isl_printer_end_line(p);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "cl_kernel kernel");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, " = clCreateKernel(program, \"kernel");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, "\", &err);");
	p = isl_printer_end_line(p);
	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "openclCheckReturn(err);");
	p = isl_printer_end_line(p);

	opencl_set_kernel_arguments(p, data->prog, kernel);

	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "openclCheckReturn(clEnqueueNDRangeKernel"
		"(queue, kernel");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, ", ");
	if (kernel->n_block > 0)
		p = isl_printer_print_int(p, kernel->n_block);
	else
		p = isl_printer_print_int(p, 1);

	p = isl_printer_print_str(p, ", NULL, global_work_size, "
					"block_size, "
					"0, NULL, NULL));");
	p = isl_printer_end_line(p);
	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "openclCheckReturn("
					"clReleaseKernel(kernel");
	p = isl_printer_print_int(p, kernel->id);
	p = isl_printer_print_str(p, "));");
	p = isl_printer_end_line(p);
	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "clFinish(queue);");
	p = isl_printer_end_line(p);
	p = isl_printer_indent(p, -2);
	p = isl_printer_start_line(p);
	p = isl_printer_print_str(p, "}");
	p = isl_printer_end_line(p);

	p = isl_printer_start_line(p);
	p = isl_printer_end_line(p);

	data->opencl->kprinter = opencl_print_kernel(data->prog, kernel,
						data->opencl->kprinter);

	return p;
}
示例#9
0
isl_ast_node * ast_gen::after_for(isl_ast_node *node, isl_ast_build * builder)
{
    if (verbose<ast_gen>::enabled())
        cout << "-- After for" << endl;

    bool is_deepest_loop = m_deepest_loop == m_current_loop;

    bool is_requested_parallel = false;

    {
        auto iter_expr = isl_ast_node_for_get_iterator(node);
        auto id = isl_ast_expr_get_id(iter_expr);
        if (verbose<ast_gen>::enabled())
            cout << "   Loop iter: " << isl_id_get_name(id) << endl;
        auto data = isl_id_get_user(id);
        if (data == &m_parallel_loop_id)
        {
            if (verbose<ast_gen>::enabled())
                cout << "   Requested as parallel" << endl;
            is_requested_parallel = true;
        }
        id = isl_id_free(id);
        isl_ast_expr_free(iter_expr);
    }

    auto id = isl_ast_node_get_annotation(node);

    auto info = ast_node_info::get_from_id(id);

    // Mark loop parallel if parallelizable and
    // either requested by user or outermost parallizable.

    if (!m_options.parallel)
    {
        if (verbose<ast_gen>::enabled())
            cout << "   Explicit parallelization not enabled." << endl;
    }
    else if (!info->is_parallelizable)
    {
        if (verbose<ast_gen>::enabled())
            cout << "   Not parallelizable." << endl;
    }
    else if (m_options.parallel_dim < 0)
    {
        if (m_num_parallelizable_loops != 1)
        {
            if (verbose<ast_gen>::enabled())
                cout << "   Not the outermost parallelizable loop." << endl;
        }
        else
        {
            info->is_parallel = true;
        }
    }
    else
    {
        if (!is_requested_parallel)
        {
            if (verbose<ast_gen>::enabled())
                cout << "   Not the requested parallel loop." << endl;
        }
        else
        {
            if (verbose<ast_gen>::enabled())
                cout << "   Parallelized." << endl;

            info->is_parallel = true;
        }
    }

    // Mark loop vectorized if parallelizable and deepest.

    if (m_options.vectorize && is_deepest_loop && info->is_parallelizable)
    {
        if (verbose<ast_gen>::enabled())
            cout << "-- Loop vectorized." << endl;

        info->is_vector = true;
    }

    if (info->is_parallel || info->is_vector)
    {
        store_parallel_accesses_for_current_dimension(builder);
    }

    if (info->is_parallelizable)
        --m_num_parallelizable_loops;

    --m_current_loop;

    id = isl_id_free(id);

    return node;
}