TL::Symbol LoweringVisitor::create_reduction_cleanup_function(OpenMP::Reduction* red, Nodecl::NodeclBase construct) { if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { internal_error("Currently only valid in Fortran", 0); } reduction_map_t::iterator it = _reduction_cleanup_map.find(red); if (it != _reduction_cleanup_map.end()) { return it->second; } std::string fun_name; { std::stringstream ss; ss << "nanos_cleanup_" << red << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Source src; src << "SUBROUTINE " << fun_name << "(X)\n" << as_type(red->get_type()) << ", POINTER :: X(:)\n" << "DEALLOCATE(X)\n" << "END SUBROUTINE\n" ; Nodecl::NodeclBase function_code = src.parse_global(construct); TL::Symbol function_sym = ReferenceScope(construct).get_scope().get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); _reduction_cleanup_map[red] = function_sym; Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code); Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(), function_sym.get_related_scope()); return function_sym; }
static TL::Symbol create_initializer_function_fortran( OpenMP::Reduction* red, TL::Type reduction_type, Nodecl::NodeclBase construct) { std::string fun_name; { std::stringstream ss; ss << "nanos_ini_" << red << "_" << reduction_type.get_internal_type() << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase initializer = red->get_initializer().shallow_copy(); TL::Type omp_out_type = reduction_type, omp_ori_type = reduction_type; // These sources are only used in array reductions TL::Source omp_out_extra_attributes, extra_stuff_array_red; if (reduction_type.is_array()) { Source dims_descr; TL::Type t = reduction_type; int rank = 0; if (t.is_fortran_array()) { rank = t.fortran_rank(); } dims_descr << "("; omp_out_extra_attributes << ", POINTER, DIMENSION("; int i; for (i = 0; i < rank; i++) { if (i != 0) { dims_descr << ","; omp_out_extra_attributes << ","; } dims_descr << "LBOUND(omp_orig, DIM = " << (rank - i) << ")" << ":" << "UBOUND(omp_orig, DIM = " << (rank - i) << ")" ; omp_out_extra_attributes << ":"; t = t.array_element(); } dims_descr << ")"; omp_out_extra_attributes << ")"; omp_out_type = t; extra_stuff_array_red << "ALLOCATE(omp_out" << dims_descr <<")\n"; } Source src; src << "SUBROUTINE " << fun_name << "(omp_out, omp_orig)\n" << "IMPLICIT NONE\n" << as_type(omp_out_type) << omp_out_extra_attributes << " :: omp_out\n" << as_type(omp_ori_type) << " :: omp_orig\n" << extra_stuff_array_red << "omp_out = " << as_expression(initializer) << "\n" << "END SUBROUTINE " << fun_name << "\n" ; TL::Scope global_scope = construct.retrieve_context().get_global_scope(); Nodecl::NodeclBase function_code = src.parse_global(global_scope); TL::Symbol function_sym = global_scope.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); // As the initializer function is needed during the instantiation of // the task, this function should be inserted before the construct Nodecl::Utils::prepend_to_enclosing_top_level_location(construct, function_code); return function_sym; }
static TL::Symbol create_initializer_function_c( OpenMP::Reduction* red, TL::Type reduction_type, Nodecl::NodeclBase construct) { std::string fun_name; { std::stringstream ss; ss << "nanos_ini_" << red << "_" << reduction_type.get_internal_type() << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase function_body; Source src; src << "void " << fun_name << "(" << as_type(reduction_type.no_ref().get_lvalue_reference_to()) << " omp_priv," << as_type(reduction_type.no_ref().get_lvalue_reference_to()) << " omp_orig)" << "{" << statement_placeholder(function_body) << "}" ; Nodecl::NodeclBase function_code = src.parse_global(construct.retrieve_context().get_global_scope()); TL::Scope inside_function = ReferenceScope(function_body).get_scope(); TL::Symbol param_omp_priv = inside_function.get_symbol_from_name("omp_priv"); ERROR_CONDITION(!param_omp_priv.is_valid(), "Symbol omp_priv not found", 0); TL::Symbol param_omp_orig = inside_function.get_symbol_from_name("omp_orig"); ERROR_CONDITION(!param_omp_orig.is_valid(), "Symbol omp_orig not found", 0); TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); Nodecl::NodeclBase initializer = red->get_initializer().shallow_copy(); if (initializer.is<Nodecl::StructuredValue>()) { Nodecl::StructuredValue structured_value = initializer.as<Nodecl::StructuredValue>(); if (structured_value.get_form().is<Nodecl::StructuredValueBracedImplicit>()) { structured_value.set_form(Nodecl::StructuredValueCompoundLiteral::make()); } } Nodecl::Utils::SimpleSymbolMap translation_map; translation_map.add_map(red->get_omp_priv(), param_omp_priv); translation_map.add_map(red->get_omp_orig(), param_omp_orig); Nodecl::NodeclBase new_initializer = Nodecl::Utils::deep_copy(initializer, inside_function, translation_map); if (red->get_is_initialization()) { // The original initializer was something like 'omp_priv = expr1', but the // new_initializer only represents the lhs expression (in our example, expr1). // For this reason we create manually an assignment expression. Nodecl::NodeclBase param_omp_priv_ref = Nodecl::Symbol::make(param_omp_priv); param_omp_priv_ref.set_type(param_omp_priv.get_type()); function_body.replace( Nodecl::List::make( Nodecl::ExpressionStatement::make( Nodecl::Assignment::make( param_omp_priv_ref, new_initializer, param_omp_priv_ref.get_type().no_ref()) ))); } else { function_body.replace( Nodecl::List::make(Nodecl::ExpressionStatement::make(new_initializer))); } // As the initializer function is needed during the instantiation of // the task, this function should be inserted before the construct Nodecl::Utils::prepend_to_enclosing_top_level_location(construct, function_code); return function_sym; }
void LoweringVisitor::reduction_initialization_code( OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree, Nodecl::NodeclBase construct) { ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0); if (!Nanos::Version::interface_is_at_least("master", 5023)) { running_error("%s: error: a newer version of Nanos++ (>=5023) is required for reductions support\n", construct.get_locus_str().c_str()); } TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter( predicate(lift_pointer(functor(&OutlineDataItem::is_reduction)))); ERROR_CONDITION (reduction_items.empty(), "No reductions to process", 0); Source result; Source reduction_declaration, thread_initializing_reduction_info, thread_fetching_reduction_info; result << reduction_declaration << "{" << as_type(get_bool_type()) << " red_single_guard;" << "nanos_err_t err;" << "err = nanos_enter_sync_init(&red_single_guard);" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "if (red_single_guard)" << "{" << "int nanos_num_threads = nanos_omp_get_num_threads();" << thread_initializing_reduction_info << "err = nanos_release_sync_init();" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "}" << "else" << "{" << "err = nanos_wait_sync_init();" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << thread_fetching_reduction_info << "}" << "}" ; for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin(); it != reduction_items.end(); it++) { std::string nanos_red_name = "nanos_red_" + (*it)->get_symbol().get_name(); std::pair<OpenMP::Reduction*, TL::Type> reduction_info = (*it)->get_reduction_info(); OpenMP::Reduction* reduction = reduction_info.first; TL::Type reduction_type = reduction_info.second; if (reduction_type.is_any_reference()) reduction_type = reduction_type.references_to(); TL::Type reduction_element_type = reduction_type; if (IS_FORTRAN_LANGUAGE) { while (reduction_element_type.is_fortran_array()) reduction_element_type = reduction_element_type.array_element(); } else { while (reduction_element_type.is_array()) reduction_element_type = reduction_element_type.array_element(); } Source element_size; if (IS_FORTRAN_LANGUAGE) { if (reduction_type.is_fortran_array()) { // We need to parse this bit in Fortran Source number_of_bytes; number_of_bytes << "SIZE(" << (*it)->get_symbol().get_name() << ") * " << reduction_element_type.get_size(); element_size << as_expression(number_of_bytes.parse_expression(construct)); } else { element_size << "sizeof(" << as_type(reduction_type) << ")"; } } else { element_size << "sizeof(" << as_type(reduction_type) << ")"; } reduction_declaration << "nanos_reduction_t* " << nanos_red_name << ";" ; Source allocate_private_buffer, cleanup_code; Source num_scalars; TL::Symbol basic_reduction_function, vector_reduction_function; create_reduction_function(reduction, construct, reduction_type, basic_reduction_function, vector_reduction_function); (*it)->reduction_set_basic_function(basic_reduction_function); thread_initializing_reduction_info << "err = nanos_malloc((void**)&" << nanos_red_name << ", sizeof(nanos_reduction_t), " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << nanos_red_name << "->original = (void*)" << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ";" << allocate_private_buffer << nanos_red_name << "->vop = " << (vector_reduction_function.is_valid() ? as_symbol(vector_reduction_function) : "0") << ";" << nanos_red_name << "->bop = (void(*)(void*,void*,int))" << as_symbol(basic_reduction_function) << ";" << nanos_red_name << "->element_size = " << element_size << ";" << nanos_red_name << "->num_scalars = " << num_scalars << ";" << cleanup_code << "err = nanos_register_reduction(" << nanos_red_name << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { if (reduction_type.is_array()) { num_scalars << "sizeof(" << as_type(reduction_type) << ") / sizeof(" << as_type(reduction_element_type) <<")"; } else { num_scalars << "1"; } allocate_private_buffer << "err = nanos_malloc(&" << nanos_red_name << "->privates, sizeof(" << as_type(reduction_type) << ") * nanos_num_threads, " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << nanos_red_name << "->descriptor = " << nanos_red_name << "->privates;" << "rdv_" << (*it)->get_field_name() << " = (" << as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;" ; thread_fetching_reduction_info << "err = nanos_reduction_get(&" << nanos_red_name << ", " << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "rdv_" << (*it)->get_field_name() << " = (" << as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;" ; cleanup_code << nanos_red_name << "->cleanup = nanos_free0;" ; } else if (IS_FORTRAN_LANGUAGE) { Type private_reduction_vector_type; Source extra_dims; { TL::Type t = (*it)->get_symbol().get_type().no_ref(); int rank = 0; if (t.is_fortran_array()) { rank = t.fortran_rank(); } if (rank != 0) { // We need to parse this bit in Fortran Source size_call; size_call << "SIZE(" << (*it)->get_symbol().get_name() << ")"; num_scalars << as_expression(size_call.parse_expression(construct)); } else { num_scalars << "1"; } private_reduction_vector_type = fortran_get_n_ranked_type_with_descriptor( get_void_type(), rank + 1, construct.retrieve_context().get_decl_context()); int i; for (i = 0; i < rank; i++) { Source lbound_src; lbound_src << "LBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")"; Source ubound_src; ubound_src << "UBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")"; extra_dims << "[" << as_expression(lbound_src.parse_expression(construct)) << ":" << as_expression(ubound_src.parse_expression(construct)) << "]"; t = t.array_element(); } } allocate_private_buffer << "@FORTRAN_ALLOCATE@((*rdv_" << (*it)->get_field_name() << ")[0:(nanos_num_threads-1)]" << extra_dims <<");" << nanos_red_name << "->privates = &(*rdv_" << (*it)->get_field_name() << ");" << "err = nanos_malloc(&" << nanos_red_name << "->descriptor, sizeof(" << as_type(private_reduction_vector_type) << "), " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "err = nanos_memcpy(" << nanos_red_name << "->descriptor, " "&rdv_" << (*it)->get_field_name() << ", sizeof(" << as_type(private_reduction_vector_type) << "));" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; thread_fetching_reduction_info << "err = nanos_reduction_get(&" << nanos_red_name << ", &" << (*it)->get_symbol().get_name() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "err = nanos_memcpy(" << "&rdv_" << (*it)->get_field_name() << "," << nanos_red_name << "->descriptor, " << "sizeof(" << as_type(private_reduction_vector_type) << "));" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; TL::Symbol reduction_cleanup = create_reduction_cleanup_function(reduction, construct); cleanup_code << nanos_red_name << "->cleanup = " << as_symbol(reduction_cleanup) << ";" ; } else { internal_error("Code unreachable", 0); } } FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::C; } ref_tree.replace(result.parse_statement(ref_tree)); FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::Current; } }
TL::Symbol LoweringVisitor::create_basic_reduction_function_fortran(OpenMP::Reduction* red, Nodecl::NodeclBase construct) { reduction_map_t::iterator it = _basic_reduction_map_openmp.find(red); if (it != _basic_reduction_map_openmp.end()) { return it->second; } std::string fun_name; { std::stringstream ss; ss << "nanos_red_" << red << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase function_body; Source src; src << "SUBROUTINE " << fun_name << "(omp_out, omp_in, num_scalars)\n" << "IMPLICIT NONE\n" << as_type(red->get_type()) << " :: omp_out(num_scalars)\n" << as_type(red->get_type()) << " :: omp_in(num_scalars)\n" << "INTEGER, VALUE :: num_scalars\n" << "INTEGER :: I\n" << statement_placeholder(function_body) << "\n" << "END SUBROUTINE " << fun_name << "\n"; ; Nodecl::NodeclBase function_code = src.parse_global(construct); TL::Scope inside_function = ReferenceScope(function_body).get_scope(); TL::Symbol param_omp_in = inside_function.get_symbol_from_name("omp_in"); ERROR_CONDITION(!param_omp_in.is_valid(), "Symbol omp_in not found", 0); TL::Symbol param_omp_out = inside_function.get_symbol_from_name("omp_out"); ERROR_CONDITION(!param_omp_out.is_valid(), "Symbol omp_out not found", 0); TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); TL::Symbol index = inside_function.get_symbol_from_name("i"); ERROR_CONDITION(!index.is_valid(), "Symbol %s not found", "i"); TL::Symbol num_scalars = inside_function.get_symbol_from_name("num_scalars"); ERROR_CONDITION(!num_scalars.is_valid(), "Symbol %s not found", "num_scalars"); Nodecl::NodeclBase num_scalars_ref = Nodecl::Symbol::make(num_scalars); num_scalars_ref.set_type(num_scalars.get_type().no_ref().get_lvalue_reference_to()); Nodecl::Symbol nodecl_index = Nodecl::Symbol::make(index); nodecl_index.set_type(index.get_type().get_lvalue_reference_to()); Nodecl::NodeclBase loop_header = Nodecl::RangeLoopControl::make( nodecl_index, const_value_to_nodecl(const_value_get_signed_int(1)), num_scalars_ref, Nodecl::NodeclBase::null()); Nodecl::NodeclBase expanded_combiner = red->get_combiner().shallow_copy(); BasicReductionExpandVisitor expander_visitor( red->get_omp_in(), param_omp_in, red->get_omp_out(), param_omp_out, index); expander_visitor.walk(expanded_combiner); function_body.replace( Nodecl::ForStatement::make(loop_header, Nodecl::List::make( Nodecl::ExpressionStatement::make( expanded_combiner)), Nodecl::NodeclBase::null())); _basic_reduction_map_openmp[red] = function_sym; if (IS_FORTRAN_LANGUAGE) { Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(), function_sym.get_related_scope()); } Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code); return function_sym; }