Nodecl::NodeclVisitor<void>::Ret AVX2StrideVisitorConv::unhandled_node(const Nodecl::NodeclBase& node) { //printf("Unsupported %d: %s\n", _vector_num_elements, node.prettyprint().c_str()); if (node.get_type().is_vector()) { Nodecl::NodeclBase new_node = node.shallow_copy().as<Nodecl::NodeclBase>(); new_node.set_type(TL::Type::get_int_type().get_vector_of_elements( _vector_num_elements)); // TODO better node.replace(new_node); Nodecl::NodeclBase::Children children = node.children(); for(Nodecl::NodeclBase::Children::iterator it = children.begin(); it != children.end(); it ++) { walk(*it); } } return Ret(); }
static void handle_ompss_opencl_deallocate_intrinsic( Nodecl::FunctionCall function_call, Nodecl::NodeclBase expr_stmt) { Nodecl::List arguments = function_call.get_arguments().as<Nodecl::List>(); ERROR_CONDITION(arguments.size() != 1, "More than one argument in ompss_opencl_deallocate call", 0); Nodecl::NodeclBase actual_argument = arguments[0]; ERROR_CONDITION(!actual_argument.is<Nodecl::FortranActualArgument>(), "Unexpected tree", 0); Nodecl::NodeclBase arg = actual_argument.as<Nodecl::FortranActualArgument>().get_argument(); TL::Symbol array_sym = ::fortran_data_ref_get_symbol(arg.get_internal_nodecl()); ERROR_CONDITION( !(array_sym.get_type().is_fortran_array() && array_sym.is_allocatable()) && !(array_sym.get_type().is_pointer() && array_sym.get_type().points_to().is_fortran_array()), "The argument of 'ompss_opencl_deallocate' intrinsic must be " "an allocatable array or a pointer to an array\n", 0); // Replace the current intrinsic call by a call to the Nanos++ API TL::Symbol ptr_of_arr_sym = get_function_ptr_of(array_sym, expr_stmt.retrieve_context()); TL::Source new_function_call; new_function_call << "CALL NANOS_OPENCL_DEALLOCATE_FORTRAN(" << ptr_of_arr_sym.get_name() << "("<< as_expression(arg) << "))\n" ; expr_stmt.replace(new_function_call.parse_statement(expr_stmt)); }
Nodecl::NodeclBase handle_task_statements( Nodecl::NodeclBase construct, Nodecl::NodeclBase task_statements, Nodecl::NodeclBase& task_placeholder, // Do not remove the reference TL::Source &new_stmts_src, // It should be a const reference const std::map<TL::Symbol, std::string> &reduction_symbols_map) { if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::C; Nodecl::NodeclBase new_statements = new_stmts_src.parse_statement(construct); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::Current; TL::Scope new_scope = ReferenceScope(task_placeholder).get_scope(); std::map<TL::Symbol, Nodecl::NodeclBase> reduction_symbol_to_nodecl_map; for (std::map<TL::Symbol, std::string>::const_iterator it = reduction_symbols_map.begin(); it != reduction_symbols_map.end(); ++it) { TL::Symbol reduction_sym = it->first; std::string storage_name = it->second; TL::Symbol storage_sym = new_scope.get_symbol_from_name(storage_name); ERROR_CONDITION(!storage_sym.is_valid(), "This symbol is not valid", 0); Nodecl::NodeclBase deref_storage = Nodecl::Dereference::make( storage_sym.make_nodecl(/* set_ref_type */ true, storage_sym.get_locus()), storage_sym.get_type().points_to()); reduction_symbol_to_nodecl_map[reduction_sym] = deref_storage; } ReplaceReductionSymbols visitor(reduction_symbol_to_nodecl_map); Nodecl::NodeclBase copied_statements = task_statements.shallow_copy(); visitor.walk(copied_statements); task_placeholder.replace(copied_statements); return new_statements; }
bool LoweringVisitor::handle_reductions_on_task( Nodecl::NodeclBase construct, OutlineInfo& outline_info, Nodecl::NodeclBase statements, bool generate_final_stmts, Nodecl::NodeclBase& final_statements) { int num_reductions = 0; TL::Source reductions_stuff, final_clause_stuff, // This source represents an expression which is used to check if // we can do an optimization in the final code. This optimization // consists on calling the original code (with a serial closure) if // we are in a final context and the reduction variables that we // are using have not been registered previously final_clause_opt_expr, extra_array_red_memcpy; std::map<TL::Symbol, std::string> reduction_symbols_map; TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items(); for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin(); it != data_items.end(); it++) { if (!(*it)->is_reduction()) continue; std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info(); TL::OpenMP::Reduction* reduction_info = red_info_pair.first; TL::Type reduction_type = red_info_pair.second.no_ref(); TL::Symbol reduction_item = (*it)->get_symbol(); TL::Type reduction_item_type = reduction_item.get_type().no_ref(); std::string storage_var_name = (*it)->get_field_name() + "_storage"; TL::Type storage_var_type = reduction_type.get_pointer_to(); TL::Symbol reduction_function, reduction_function_original_var, initializer_function; // Checking if the current reduction type has been treated before // Note that if that happens we can reuse the combiner and // initializer function. // // C/C++: note that if the type of the list item is an array type, // we regiter the reduction over its element type TL::Type registered_reduction_type = reduction_type; while (!IS_FORTRAN_LANGUAGE && registered_reduction_type.is_array()) { registered_reduction_type = registered_reduction_type.array_element(); } LoweringVisitor::reduction_task_map_t::iterator task_red_info = _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type)); if (task_red_info != _task_reductions_map.end()) { reduction_function = task_red_info->second._reducer; reduction_function_original_var = task_red_info->second._reducer_orig_var; initializer_function = task_red_info->second._initializer; } else { create_reduction_functions(reduction_info, construct, registered_reduction_type, reduction_item, reduction_function, reduction_function_original_var); create_initializer_function(reduction_info, construct, registered_reduction_type, initializer_function); _task_reductions_map.insert( std::make_pair( std::make_pair(reduction_info, registered_reduction_type), TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function) )); } // Mandatory TL::Sources to be filled by any reduction TL::Source orig_address, // address of the original reduction variable storage_var; // variable which holds the address of the storage // Specific TL::Sources to be filled only by Fortran array reduction TL::Source extra_array_red_decl; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { storage_var << storage_var_name; orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name(); final_clause_stuff << "if (" << storage_var_name << " == 0)" << "{" << storage_var_name << " = " << "(" << as_type(storage_var_type) << ")" << orig_address << ";" << "}" ; } else { orig_address << "&" << (*it)->get_field_name(); if (reduction_item_type.is_array()) { size_t size_of_array_descriptor = fortran_size_of_array_descriptor( fortran_get_rank0_type(reduction_item_type.get_internal_type()), fortran_get_rank_of_type(reduction_item_type.get_internal_type())); storage_var << storage_var_name << "_indirect"; extra_array_red_decl << "void *" << storage_var << ";"; extra_array_red_memcpy << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << storage_var << "," << size_of_array_descriptor << ");" ; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << "(void *) "<< orig_address << "," << size_of_array_descriptor << ");" << "}" << "else" << "{" << extra_array_red_memcpy << "}" ; } else { // We need to convert a void* type into a pointer to the reduction type. // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this // conversion directly in the FORTRAN source. For this reason we introduce // a new function that will be defined in a C file. TL::Symbol func = TL::Nanox::get_function_ptr_conversion( // Destination reduction_item_type.get_pointer_to(), // Origin TL::Type::get_void_type().get_pointer_to(), construct.retrieve_context()); storage_var << storage_var_name; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << storage_var_name << " = " << func.get_name() << "(" << orig_address << ");" << "}" ; } } if (num_reductions > 0) final_clause_opt_expr << " && "; final_clause_opt_expr << storage_var << " == 0 "; num_reductions++; reductions_stuff << extra_array_red_decl << as_type(storage_var_type) << " " << storage_var_name << ";" << "nanos_err = nanos_task_reduction_get_thread_storage(" << "(void *)" << orig_address << "," << "(void **) &" << storage_var << ");" ; reduction_symbols_map[reduction_item] = storage_var_name; } if (num_reductions != 0) { // Generating the final code if needed if (generate_final_stmts) { std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct); ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0); Nodecl::NodeclBase placeholder; TL::Source new_statements_src; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << "if (" << final_clause_opt_expr << ")" << "{" << as_statement(it4->second) << "}" << "else" << "{" << final_clause_stuff << statement_placeholder(placeholder) << "}" << "}" ; final_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); } // Generating the task code { TL::Source new_statements_src; Nodecl::NodeclBase placeholder; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << extra_array_red_memcpy << statement_placeholder(placeholder) << "}" ; Nodecl::NodeclBase new_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); statements.replace(new_statements); } } ERROR_CONDITION(num_reductions != 0 && !Nanos::Version::interface_is_at_least("task_reduction", 1001), "The version of the runtime begin used does not support task reductions", 0); return (num_reductions != 0); }
void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info, Nodecl::NodeclBase construct, Nodecl::List distribute_environment, Nodecl::RangeLoopControl& range, const std::string& outline_name, TL::Symbol structure_symbol, TL::Symbol slicer_descriptor, Nodecl::NodeclBase task_label) { Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct); Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>(); ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0); Nodecl::NodeclBase lower = range.get_lower(); Nodecl::NodeclBase upper = range.get_upper(); Nodecl::NodeclBase step = range.get_step(); Source struct_size, dynamic_size, struct_arg_type_name; struct_arg_type_name << ((structure_symbol.get_type().is_template_specialized_type() && structure_symbol.get_type().is_dependent()) ? "typename " : "") << structure_symbol.get_qualified_name(enclosing_function.get_scope()) ; struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size; Source immediate_decl; allocate_immediate_structure( structure_symbol.get_user_defined_type(), outline_info, struct_arg_type_name, struct_size, // out immediate_decl, dynamic_size); Source call_outline_function; Source schedule_setup; schedule_setup << "int nanos_chunk;" ; if (schedule.get_text() == "runtime") { schedule_setup << "nanos_omp_sched_t nanos_runtime_sched;" << "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);" ; } else { Source schedule_name; if (Nanos::Version::interface_is_at_least("openmp", 8)) { schedule_name << "nanos_omp_sched_" << schedule.get_text(); } else { // We used nanos_omp_sched in versions prior to 8 schedule_name << "omp_sched_" << schedule.get_text(); } schedule_setup << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");" << "if (current_ws_policy == 0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";" ; } Source worksharing_creation; if (IS_CXX_LANGUAGE) { worksharing_creation << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor)); } worksharing_creation << "nanos_err = nanos_worksharing_create(" << "&" << as_symbol(slicer_descriptor) << "," << "current_ws_policy," << "(void**)&nanos_setup_info_loop," << "&single_guard);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree; TL::Source pm_specific_code; if (!_lowering->in_ompss_mode()) { // OpenMP pm_specific_code << immediate_decl << statement_placeholder(fill_immediate_arguments_tree) << "smp_" << outline_name << "(imm_args);" ; } else { // OmpSs std::string wd_description = (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name(); Source const_wd_info; const_wd_info << fill_const_wd_info(struct_arg_type_name, /* is_untied */ false, /* mandatory_creation */ true, /* is_function_task */ false, wd_description, outline_info, construct); std::string dyn_props_var = "nanos_wd_dyn_props"; Source dynamic_wd_info; dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";"; fill_dynamic_properties(dyn_props_var, /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info); pm_specific_code << struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;" << const_wd_info << "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;" << dynamic_wd_info << "static nanos_slicer_t replicate = (nanos_slicer_t)0;" << "if (replicate == (nanos_slicer_t)0)" << "replicate = nanos_find_slicer(\"replicate\");" << "if (replicate == (nanos_slicer_t)0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_err = nanos_create_sliced_wd(&nanos_wd_, " << "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, " << "(size_t)" << struct_size << ", nanos_wd_const_data.base.data_alignment, " << "(void**)&ol_args, nanos_current_wd(), replicate," << "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0," << "0, (nanos_region_dimension_internal_t**)0" << ");" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << statement_placeholder(fill_outline_arguments_tree) << "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; } TL::Source implicit_barrier_or_tw; if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null()) { implicit_barrier_or_tw << get_implicit_sync_end_construct_source(); } Source spawn_code; spawn_code << "{" << as_type(get_bool_type()) << " single_guard;" << "nanos_err_t nanos_err;" << schedule_setup << "nanos_ws_info_loop_t nanos_setup_info_loop;" << "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";" << "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";" << "nanos_setup_info_loop.loop_step = " << as_expression(step) << ";" << "nanos_setup_info_loop.chunk_size = nanos_chunk;" << worksharing_creation << pm_specific_code << implicit_barrier_or_tw << "}" ; Source fill_outline_arguments, fill_immediate_arguments; fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::C; Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::Current; Nodecl::NodeclBase arguments_tree; TL::Source *fill_arguments; if (!_lowering->in_ompss_mode()) { // OpenMP arguments_tree = fill_immediate_arguments_tree; fill_arguments = &fill_immediate_arguments; } else { // OmpSs arguments_tree = fill_outline_arguments_tree; fill_arguments = &fill_outline_arguments; } // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp) const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context(); slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context; ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol()); // Parse the arguments Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree); arguments_tree.replace(new_tree); // Finally, replace the construct by the tree that represents the spawn code construct.replace(spawn_code_tree); }
static void handle_ompss_opencl_allocate_intrinsic( Nodecl::FunctionCall function_call, std::map<std::pair<TL::Type, std::pair<int, bool> > , Symbol> &declared_ocl_allocate_functions, Nodecl::NodeclBase expr_stmt) { Nodecl::List arguments = function_call.get_arguments().as<Nodecl::List>(); ERROR_CONDITION(arguments.size() != 1, "More than one argument in 'ompss_opencl_allocate' call\n", 0); Nodecl::NodeclBase actual_argument = arguments[0]; ERROR_CONDITION(!actual_argument.is<Nodecl::FortranActualArgument>(), "Unexpected tree\n", 0); Nodecl::NodeclBase arg = actual_argument.as<Nodecl::FortranActualArgument>().get_argument(); ERROR_CONDITION(!arg.is<Nodecl::ArraySubscript>(), "Unreachable code\n", 0); Nodecl::NodeclBase subscripted = arg.as<Nodecl::ArraySubscript>().get_subscripted(); TL::Symbol subscripted_symbol = ::fortran_data_ref_get_symbol(subscripted.get_internal_nodecl()); ERROR_CONDITION( !(subscripted_symbol.get_type().is_fortran_array() && subscripted_symbol.is_allocatable()) && !(subscripted_symbol.get_type().is_pointer() && subscripted_symbol.get_type().points_to().is_fortran_array()), "The argument of 'ompss_opencl_allocate' intrinsic must be " "an allocatable array or a pointer to an array with all its bounds specified\n", 0); TL::Type array_type; int num_dimensions; bool is_allocatable; if (subscripted_symbol.is_allocatable()) { array_type = subscripted_symbol.get_type(); num_dimensions = subscripted_symbol.get_type().get_num_dimensions(); is_allocatable = true; } else { array_type = subscripted_symbol.get_type().points_to(); num_dimensions = array_type.get_num_dimensions(); is_allocatable = false; } TL::Type element_type = array_type; while (element_type.is_array()) { element_type = element_type.array_element(); } ERROR_CONDITION(!array_type.is_array(), "This type should be an array type", 0); std::pair<TL::Type, std::pair<int, bool> > key = std::make_pair(element_type, std::make_pair(num_dimensions, is_allocatable)); std::map<std::pair<TL::Type, std::pair<int, bool> > , Symbol>::iterator it_new_fun = declared_ocl_allocate_functions.find(key); // Reuse the auxiliar function if it already exists Symbol new_function_sym; if (it_new_fun != declared_ocl_allocate_functions.end()) { new_function_sym = it_new_fun->second; } else { new_function_sym = create_new_function_opencl_allocate( expr_stmt, subscripted_symbol, element_type, num_dimensions, is_allocatable); declared_ocl_allocate_functions[key] = new_function_sym; } // Replace the current intrinsic call by a call to the new function TL::Source actual_arg_array; Nodecl::NodeclBase subscripted_lvalue = subscripted.shallow_copy(); subscripted_lvalue.set_type(subscripted_symbol.get_type().no_ref().get_lvalue_reference_to()); actual_arg_array << as_expression(subscripted_lvalue); TL::Source actual_arg_bounds; Nodecl::List subscripts = arg.as<Nodecl::ArraySubscript>().get_subscripts().as<Nodecl::List>(); for (Nodecl::List::reverse_iterator it = subscripts.rbegin(); it != subscripts.rend(); it++) { Nodecl::NodeclBase subscript = *it, lower, upper; if (it != subscripts.rbegin()) actual_arg_bounds << ", "; if (subscript.is<Nodecl::Range>()) { lower = subscript.as<Nodecl::Range>().get_lower(); upper = subscript.as<Nodecl::Range>().get_upper(); } else { lower = nodecl_make_integer_literal( fortran_get_default_integer_type(), const_value_get_signed_int(1), make_locus("", 0, 0)); upper = subscript; } actual_arg_bounds << as_expression(lower) << "," << as_expression(upper); } TL::Source new_function_call; new_function_call << "CALL " << as_symbol(new_function_sym) << "(" << actual_arg_array << ", " << actual_arg_bounds << ")\n" ; expr_stmt.replace(new_function_call.parse_statement(expr_stmt)); }
void LoweringVisitor::perform_partial_reduction(OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree) { ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0); Source reduction_code; TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter( predicate(lift_pointer(functor(&OutlineDataItem::is_reduction)))); if (!reduction_items.empty()) { for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin(); it != reduction_items.end(); it++) { if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { if ((*it)->get_private_type().is_array()) { reduction_code << "__builtin_memcpy(rdv_" << (*it)->get_field_name() << "[nanos_omp_get_thread_num()]," << "rdp_" << (*it)->get_symbol().get_name() << "," << " sizeof(" << as_type((*it)->get_private_type()) << "));" ; } else { reduction_code << "rdv_" << (*it)->get_field_name() << "[nanos_omp_get_thread_num()] " << "= rdp_" << (*it)->get_symbol().get_name() << ";" ; } } else if (IS_FORTRAN_LANGUAGE) { Source extra_dims; { TL::Type t = (*it)->get_symbol().get_type().no_ref(); int rank = 0; if (t.is_fortran_array()) { rank = t.fortran_rank(); } int i; for (i = 0; i < rank; i++) { extra_dims << ":,"; } } reduction_code << "rdv_" << (*it)->get_field_name() << "( " << extra_dims << "nanos_omp_get_thread_num() ) = rdp_" << (*it)->get_symbol().get_name() << "\n" ; } else { internal_error("Code unreachable", 0); } } } ref_tree.replace(reduction_code.parse_statement(ref_tree)); }
void LoweringVisitor::reduction_initialization_code( OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree, Nodecl::NodeclBase construct) { ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0); if (!Nanos::Version::interface_is_at_least("master", 5023)) { running_error("%s: error: a newer version of Nanos++ (>=5023) is required for reductions support\n", construct.get_locus_str().c_str()); } TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter( predicate(lift_pointer(functor(&OutlineDataItem::is_reduction)))); ERROR_CONDITION (reduction_items.empty(), "No reductions to process", 0); Source result; Source reduction_declaration, thread_initializing_reduction_info, thread_fetching_reduction_info; result << reduction_declaration << "{" << as_type(get_bool_type()) << " red_single_guard;" << "nanos_err_t err;" << "err = nanos_enter_sync_init(&red_single_guard);" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "if (red_single_guard)" << "{" << "int nanos_num_threads = nanos_omp_get_num_threads();" << thread_initializing_reduction_info << "err = nanos_release_sync_init();" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "}" << "else" << "{" << "err = nanos_wait_sync_init();" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << thread_fetching_reduction_info << "}" << "}" ; for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin(); it != reduction_items.end(); it++) { std::string nanos_red_name = "nanos_red_" + (*it)->get_symbol().get_name(); std::pair<OpenMP::Reduction*, TL::Type> reduction_info = (*it)->get_reduction_info(); OpenMP::Reduction* reduction = reduction_info.first; TL::Type reduction_type = reduction_info.second; if (reduction_type.is_any_reference()) reduction_type = reduction_type.references_to(); TL::Type reduction_element_type = reduction_type; if (IS_FORTRAN_LANGUAGE) { while (reduction_element_type.is_fortran_array()) reduction_element_type = reduction_element_type.array_element(); } else { while (reduction_element_type.is_array()) reduction_element_type = reduction_element_type.array_element(); } Source element_size; if (IS_FORTRAN_LANGUAGE) { if (reduction_type.is_fortran_array()) { // We need to parse this bit in Fortran Source number_of_bytes; number_of_bytes << "SIZE(" << (*it)->get_symbol().get_name() << ") * " << reduction_element_type.get_size(); element_size << as_expression(number_of_bytes.parse_expression(construct)); } else { element_size << "sizeof(" << as_type(reduction_type) << ")"; } } else { element_size << "sizeof(" << as_type(reduction_type) << ")"; } reduction_declaration << "nanos_reduction_t* " << nanos_red_name << ";" ; Source allocate_private_buffer, cleanup_code; Source num_scalars; TL::Symbol basic_reduction_function, vector_reduction_function; create_reduction_function(reduction, construct, reduction_type, basic_reduction_function, vector_reduction_function); (*it)->reduction_set_basic_function(basic_reduction_function); thread_initializing_reduction_info << "err = nanos_malloc((void**)&" << nanos_red_name << ", sizeof(nanos_reduction_t), " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << nanos_red_name << "->original = (void*)" << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ";" << allocate_private_buffer << nanos_red_name << "->vop = " << (vector_reduction_function.is_valid() ? as_symbol(vector_reduction_function) : "0") << ";" << nanos_red_name << "->bop = (void(*)(void*,void*,int))" << as_symbol(basic_reduction_function) << ";" << nanos_red_name << "->element_size = " << element_size << ";" << nanos_red_name << "->num_scalars = " << num_scalars << ";" << cleanup_code << "err = nanos_register_reduction(" << nanos_red_name << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { if (reduction_type.is_array()) { num_scalars << "sizeof(" << as_type(reduction_type) << ") / sizeof(" << as_type(reduction_element_type) <<")"; } else { num_scalars << "1"; } allocate_private_buffer << "err = nanos_malloc(&" << nanos_red_name << "->privates, sizeof(" << as_type(reduction_type) << ") * nanos_num_threads, " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << nanos_red_name << "->descriptor = " << nanos_red_name << "->privates;" << "rdv_" << (*it)->get_field_name() << " = (" << as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;" ; thread_fetching_reduction_info << "err = nanos_reduction_get(&" << nanos_red_name << ", " << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "rdv_" << (*it)->get_field_name() << " = (" << as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;" ; cleanup_code << nanos_red_name << "->cleanup = nanos_free0;" ; } else if (IS_FORTRAN_LANGUAGE) { Type private_reduction_vector_type; Source extra_dims; { TL::Type t = (*it)->get_symbol().get_type().no_ref(); int rank = 0; if (t.is_fortran_array()) { rank = t.fortran_rank(); } if (rank != 0) { // We need to parse this bit in Fortran Source size_call; size_call << "SIZE(" << (*it)->get_symbol().get_name() << ")"; num_scalars << as_expression(size_call.parse_expression(construct)); } else { num_scalars << "1"; } private_reduction_vector_type = fortran_get_n_ranked_type_with_descriptor( get_void_type(), rank + 1, construct.retrieve_context().get_decl_context()); int i; for (i = 0; i < rank; i++) { Source lbound_src; lbound_src << "LBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")"; Source ubound_src; ubound_src << "UBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")"; extra_dims << "[" << as_expression(lbound_src.parse_expression(construct)) << ":" << as_expression(ubound_src.parse_expression(construct)) << "]"; t = t.array_element(); } } allocate_private_buffer << "@FORTRAN_ALLOCATE@((*rdv_" << (*it)->get_field_name() << ")[0:(nanos_num_threads-1)]" << extra_dims <<");" << nanos_red_name << "->privates = &(*rdv_" << (*it)->get_field_name() << ");" << "err = nanos_malloc(&" << nanos_red_name << "->descriptor, sizeof(" << as_type(private_reduction_vector_type) << "), " << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "err = nanos_memcpy(" << nanos_red_name << "->descriptor, " "&rdv_" << (*it)->get_field_name() << ", sizeof(" << as_type(private_reduction_vector_type) << "));" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; thread_fetching_reduction_info << "err = nanos_reduction_get(&" << nanos_red_name << ", &" << (*it)->get_symbol().get_name() << ");" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" << "err = nanos_memcpy(" << "&rdv_" << (*it)->get_field_name() << "," << nanos_red_name << "->descriptor, " << "sizeof(" << as_type(private_reduction_vector_type) << "));" << "if (err != NANOS_OK)" << "nanos_handle_error(err);" ; TL::Symbol reduction_cleanup = create_reduction_cleanup_function(reduction, construct); cleanup_code << nanos_red_name << "->cleanup = " << as_symbol(reduction_cleanup) << ";" ; } else { internal_error("Code unreachable", 0); } } FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::C; } ref_tree.replace(result.parse_statement(ref_tree)); FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::Current; } }
void LoweringVisitor::perform_partial_reduction_slicer(OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree, Nodecl::Utils::SimpleSymbolMap*& symbol_map) { ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0); TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter( lift_pointer<bool, OutlineDataItem>(&OutlineDataItem::is_reduction)); if (!reduction_items.empty()) { TL::ObjectList<Nodecl::NodeclBase> reduction_stmts; Nodecl::Utils::SimpleSymbolMap* simple_symbol_map = new Nodecl::Utils::SimpleSymbolMap(symbol_map); symbol_map = simple_symbol_map; for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin(); it != reduction_items.end(); it++) { scope_entry_t* shared_symbol = (*it)->get_symbol().get_internal_symbol(); // We need this to avoid the original symbol be replaced // incorrectly scope_entry_t* shared_symbol_proxy = NEW0(scope_entry_t); shared_symbol_proxy->symbol_name = UNIQUESTR_LITERAL("<<reduction-variable>>"); // Crude way to ensure it is replaced shared_symbol_proxy->kind = shared_symbol->kind; symbol_entity_specs_copy_from(shared_symbol_proxy, shared_symbol); shared_symbol_proxy->decl_context = shared_symbol->decl_context; shared_symbol_proxy->type_information = shared_symbol->type_information; shared_symbol_proxy->locus = shared_symbol->locus; simple_symbol_map->add_map( shared_symbol_proxy, (*it)->reduction_get_shared_symbol_in_outline() ); Source reduction_code; Nodecl::NodeclBase partial_reduction_code; reduction_code << "{" << "nanos_lock_t* red_lock;" << "nanos_err_t nanos_err;" << "nanos_err = nanos_get_lock_address(" << ((*it)->get_private_type().is_array() ? "" : "&") << as_symbol( shared_symbol_proxy ) << ", &red_lock);" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << "nanos_err = nanos_set_lock(red_lock);" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << statement_placeholder(partial_reduction_code) << "nanos_err = nanos_unset_lock(red_lock);" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << "}" ; FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::C; } Nodecl::NodeclBase statement = reduction_code.parse_statement(ref_tree); FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::Current; } ERROR_CONDITION(!statement.is<Nodecl::List>(), "Expecting a list", 0); reduction_stmts.append(statement.as<Nodecl::List>()[0]); TL::Type elemental_type = (*it)->get_private_type(); while (elemental_type.is_array()) elemental_type = elemental_type.array_element(); Source partial_reduction_code_src; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { partial_reduction_code_src << as_symbol( (*it)->reduction_get_basic_function() ) << "(" // This will be the reduction shared << ((*it)->get_private_type().is_array() ? "" : "&") << as_symbol( shared_symbol_proxy ) << ", " // This will be the reduction private var << ((*it)->get_private_type().is_array() ? "" : "&") << as_symbol( (*it)->get_symbol() ) << ", " << ((*it)->get_private_type().is_array() ? ( "sizeof(" + as_type( (*it)->get_private_type()) + ")" "/ sizeof(" + as_type(elemental_type) + ")" ) : "1") << ");" ; } else if (IS_FORTRAN_LANGUAGE) { // We use an ELEMENTAL call here partial_reduction_code_src << "CALL " << as_symbol ( (*it)->reduction_get_basic_function() ) << "(" // This will be the reduction shared << as_symbol( shared_symbol_proxy ) << ", " // This will be the reduction private var << as_symbol( (*it)->get_symbol() ) << ")" ; } else { internal_error("Code unreachable", 0); } partial_reduction_code.replace( partial_reduction_code_src.parse_statement(partial_reduction_code)); } ref_tree.replace( Nodecl::CompoundStatement::make( Nodecl::List::make(reduction_stmts), Nodecl::NodeclBase::null() ) ); }