int SuitableAlignmentVisitor::unhandled_node(const Nodecl::NodeclBase& n) { WARNING_MESSAGE( "Suitable Alignment Visitor: Unknown node '%s' at '%s'\n", ast_print_node_type( n.get_kind( ) ), n.get_locus_str( ).c_str( ) ); return -1; }
/* * Create wrapper function for HLS to unpack streamed arguments * */ Nodecl::NodeclBase DeviceFPGA::gen_hls_wrapper(const Symbol &func_symbol, ObjectList<OutlineDataItem*>& data_items) { //Check that we are calling a function task (this checking may be performed earlyer in the code) if (!func_symbol.is_function()) { running_error("Only function-tasks are supperted at this moment"); } Scope fun_scope = func_symbol.get_scope(); // const ObjectList<Symbol> ¶m_list = func_symbol.get_function_parameters(); /* * FIXME We suppose that all the input or the output arrays * are of the same type * Otherwise we must convert (~cast, raw type conversion) for each type */ /* * The wrapper function must have: * An input and an output parameters * with respective pragmas needed for streaming * For each scalar parameter, another scalar patameter * IN THE SAME ORDER AS THE ORIGINAL FUNCTION as long as we are generating * scalar parameter passing based on original function task parameters */ //Source wrapper_params; std::string in_dec, out_dec; get_inout_decl(data_items, in_dec, out_dec); Source pragmas_src; //call to task_has_scalars is not the optimal, but it is much more simple and readable //than checking inside another loop if (task_has_scalars(data_items)) { pragmas_src << "#pragma HLS resource core=AXI_SLAVE variable=return metadata=\"-bus_bundle AXIlite\" " << "port_map={{ap_start START} {ap_done DONE} {ap_idle IDLE} {ap_return RETURN}}\n"; ; } Source args; if (in_dec != "") { args << in_dec << hls_in; //add stream parameter pragma pragmas_src << "#pragma HLS resource core=AXI4Stream variable=" << hls_in << "\n" << "#pragma HLS interface ap_fifo port=" << hls_in << "\n" ; } if (out_dec != "") { args.append_with_separator(out_dec + hls_out, ","); pragmas_src << "#pragma HLS resource core=AXI4Stream variable=" << hls_out << "\n" << "#pragma HLS interface ap_fifo port=" << hls_out << "\n" ; } /* * Generate wrapper code * We are going to keep original parameter name for the original function * * input/outlut parameters are received concatenated one after another. * The wrapper must create local variables for each input/output and unpack * streamed input/output data into that local variables. * * Scalar parameters are going to be copied as long as no unpacking is needed */ Source copies_src; Source in_copies, out_copies; Source fun_params; Source local_decls; int in_offset = 0; int out_offset = 0; for (ObjectList<OutlineDataItem*>::iterator it = data_items.begin(); it != data_items.end(); it++) { fun_params.append_with_separator((*it)->get_field_name(), ","); const std::string &field_name = (*it)->get_field_name(); const Scope &scope = (*it)->get_symbol().get_scope(); const ObjectList<OutlineDataItem::CopyItem> &copies = (*it)->get_copies(); if (!copies.empty()) { Nodecl::NodeclBase expr = copies.front().expression; if (copies.size() > 1) { internal_error("Only one copy per object (in/out/inout) is allowed (%s)", expr.get_locus_str().c_str()); } /* * emit copy code * - Create local variable (known size in compile time) * - Create create copy loop + update param offset */ //get copy size (must be known at compile time) int n_elements = get_copy_elements(expr); const Type &field_type = (*it)->get_field_type(); Type elem_type; if (field_type.is_pointer()) { elem_type = field_type.points_to(); } else if (field_type.is_array()) { elem_type = field_type.array_element(); } else { internal_error("invalid type for input/output, only pointer and array is allowed (%d)", expr.get_locus_str().c_str()); } std::string par_simple_decl = elem_type.get_simple_declaration(scope, field_name); local_decls << par_simple_decl << "[" << n_elements << "];\n"; if (copies.front().directionality == OutlineDataItem::COPY_IN or copies.front().directionality == OutlineDataItem::COPY_INOUT) { in_copies << "for (" << HLS_I << "=0;" << HLS_I << "<" << n_elements << "; " << HLS_I << "++)" << "{" << " " << field_name << "[" << HLS_I << "] = " << hls_in << "[" << HLS_I << "+" << in_offset << "];" << "}" ; in_offset += n_elements; } if (copies.front().directionality == OutlineDataItem::COPY_OUT or copies.front().directionality == OutlineDataItem::COPY_INOUT) { out_copies << "for (" << HLS_I << "=0;" << HLS_I << "<" << n_elements << "; " << HLS_I << "++)" //<< "for (i=0; i<" << n_elements << "; i++)" << "{" << " " << hls_out << "[" << HLS_I << "+" << out_offset << "] = " << field_name << "[" << HLS_I << "];" << "}" ; out_offset += n_elements; } } else { //generate scalar parameter code Source par_src; par_src << (*it)->get_field_type().get_simple_declaration(scope, field_name) ; args.append_with_separator(par_src, ","); pragmas_src << "#pragma HLS INTERFACE ap_none port=" << field_name << "\n" << "#pragma AP resource core=AXI_SLAVE variable=" << field_name << " metadata=\"-bus_bundle AXIlite\"\n" ; } } Nodecl::NodeclBase fun_code = func_symbol.get_function_code(); Source wrapper_src; wrapper_src << "void core_hw_accelerator(" << args<< "){" ; local_decls << "unsigned int " << HLS_I << ";"; wrapper_src << pragmas_src << local_decls << in_copies << func_symbol.get_name() << "(" << fun_params << ");" << out_copies << "}" ; //parse source ReferenceScope refscope(func_symbol.get_scope()); Nodecl::NodeclBase wrapper_node = wrapper_src.parse_global(refscope); return wrapper_node; }
void LoweringVisitor::reduction_initialization_code( OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree, Nodecl::NodeclBase construct) { ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0); if (!Nanos::Version::interface_is_at_least("master", 5023)) { running_error("%s: error: a newer version of Nanos++ (>=5023) is required for reductions support\n", construct.get_locus_str().c_str()); } TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter( predicate(lift_pointer(functor(&OutlineDataItem::is_reduction)))); ERROR_CONDITION (reduction_items.empty(), "No reductions to process", 0); Source result; Source reduction_declaration, thread_initializing_reduction_info, thread_fetching_reduction_info; result << reduction_declaration << "{" << as_type(get_bool_type()) << " red_single_guard;" << "nanos_err_t err;" << "err = nanos_enter_sync_init(&red_single_guard);" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "if (red_single_guard)" << "{" << "int nanos_num_threads = nanos_omp_get_num_threads();" << thread_initializing_reduction_info << "err = nanos_release_sync_init();" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "}" << "else" << "{" << "err = nanos_wait_sync_init();" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << thread_fetching_reduction_info << "}" << "}" ; for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin(); it != reduction_items.end(); it++) { std::string nanos_red_name = "nanos_red_" + (*it)->get_symbol().get_name(); std::pair<OpenMP::Reduction*, TL::Type> reduction_info = (*it)->get_reduction_info(); OpenMP::Reduction* reduction = reduction_info.first; TL::Type reduction_type = reduction_info.second; if (reduction_type.is_any_reference()) reduction_type = reduction_type.references_to(); TL::Type reduction_element_type = reduction_type; if (IS_FORTRAN_LANGUAGE) { while (reduction_element_type.is_fortran_array()) reduction_element_type = reduction_element_type.array_element(); } else { while (reduction_element_type.is_array()) reduction_element_type = reduction_element_type.array_element(); } Source element_size; if (IS_FORTRAN_LANGUAGE) { if (reduction_type.is_fortran_array()) { // We need to parse this bit in Fortran Source number_of_bytes; number_of_bytes << "SIZE(" << (*it)->get_symbol().get_name() << ") * " << reduction_element_type.get_size(); element_size << as_expression(number_of_bytes.parse_expression(construct)); } else { element_size << "sizeof(" << as_type(reduction_type) << ")"; } } else { element_size << "sizeof(" << as_type(reduction_type) << ")"; } reduction_declaration << "nanos_reduction_t* " << nanos_red_name << ";" ; Source allocate_private_buffer, cleanup_code; Source num_scalars; TL::Symbol basic_reduction_function, vector_reduction_function; create_reduction_function(reduction, construct, reduction_type, basic_reduction_function, vector_reduction_function); (*it)->reduction_set_basic_function(basic_reduction_function); thread_initializing_reduction_info << "err = nanos_malloc((void**)&" << nanos_red_name << ", sizeof(nanos_reduction_t), " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << nanos_red_name << "->original = (void*)" << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ";" << allocate_private_buffer << nanos_red_name << "->vop = " << (vector_reduction_function.is_valid() ? as_symbol(vector_reduction_function) : "0") << ";" << nanos_red_name << "->bop = (void(*)(void*,void*,int))" << as_symbol(basic_reduction_function) << ";" << nanos_red_name << "->element_size = " << element_size << ";" << nanos_red_name << "->num_scalars = " << num_scalars << ";" << cleanup_code << "err = nanos_register_reduction(" << nanos_red_name << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { if (reduction_type.is_array()) { num_scalars << "sizeof(" << as_type(reduction_type) << ") / sizeof(" << as_type(reduction_element_type) <<")"; } else { num_scalars << "1"; } allocate_private_buffer << "err = nanos_malloc(&" << nanos_red_name << "->privates, sizeof(" << as_type(reduction_type) << ") * nanos_num_threads, " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << nanos_red_name << "->descriptor = " << nanos_red_name << "->privates;" << "rdv_" << (*it)->get_field_name() << " = (" << as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;" ; thread_fetching_reduction_info << "err = nanos_reduction_get(&" << nanos_red_name << ", " << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "rdv_" << (*it)->get_field_name() << " = (" << as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;" ; cleanup_code << nanos_red_name << "->cleanup = nanos_free0;" ; } else if (IS_FORTRAN_LANGUAGE) { Type private_reduction_vector_type; Source extra_dims; { TL::Type t = (*it)->get_symbol().get_type().no_ref(); int rank = 0; if (t.is_fortran_array()) { rank = t.fortran_rank(); } if (rank != 0) { // We need to parse this bit in Fortran Source size_call; size_call << "SIZE(" << (*it)->get_symbol().get_name() << ")"; num_scalars << as_expression(size_call.parse_expression(construct)); } else { num_scalars << "1"; } private_reduction_vector_type = fortran_get_n_ranked_type_with_descriptor( get_void_type(), rank + 1, construct.retrieve_context().get_decl_context()); int i; for (i = 0; i < rank; i++) { Source lbound_src; lbound_src << "LBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")"; Source ubound_src; ubound_src << "UBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")"; extra_dims << "[" << as_expression(lbound_src.parse_expression(construct)) << ":" << as_expression(ubound_src.parse_expression(construct)) << "]"; t = t.array_element(); } } allocate_private_buffer << "@FORTRAN_ALLOCATE@((*rdv_" << (*it)->get_field_name() << ")[0:(nanos_num_threads-1)]" << extra_dims <<");" << nanos_red_name << "->privates = &(*rdv_" << (*it)->get_field_name() << ");" << "err = nanos_malloc(&" << nanos_red_name << "->descriptor, sizeof(" << as_type(private_reduction_vector_type) << "), " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "err = nanos_memcpy(" << nanos_red_name << "->descriptor, " "&rdv_" << (*it)->get_field_name() << ", sizeof(" << as_type(private_reduction_vector_type) << "));" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; thread_fetching_reduction_info << "err = nanos_reduction_get(&" << nanos_red_name << ", &" << (*it)->get_symbol().get_name() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "err = nanos_memcpy(" << "&rdv_" << (*it)->get_field_name() << "," << nanos_red_name << "->descriptor, " << "sizeof(" << as_type(private_reduction_vector_type) << "));" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; TL::Symbol reduction_cleanup = create_reduction_cleanup_function(reduction, construct); cleanup_code << nanos_red_name << "->cleanup = " << as_symbol(reduction_cleanup) << ";" ; } else { internal_error("Code unreachable", 0); } } FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::C; } ref_tree.replace(result.parse_statement(ref_tree)); FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::Current; } }