void VectorizerVectorReduction::vectorize_reduction(const TL::Symbol& scalar_symbol, TL::Symbol& vector_symbol, const Nodecl::NodeclBase& reduction_initializer, const std::string& reduction_name, const TL::Type& reduction_type, Nodecl::List& pre_nodecls, Nodecl::List& post_nodecls) { // Step1: ADD REDUCTION SYMBOLS vector_symbol.set_value(Nodecl::VectorPromotion::make( reduction_initializer.shallow_copy(), vector_symbol.get_type())); // Add new ObjectInit with the initialization Nodecl::ObjectInit reduction_object_init = Nodecl::ObjectInit::make(vector_symbol); pre_nodecls.append(reduction_object_init); // Step2: ADD VECTOR REDUCTION INSTRUCTIONS if(reduction_name.compare("+") == 0) { Nodecl::ExpressionStatement post_reduction_stmt = Nodecl::ExpressionStatement::make( Nodecl::VectorReductionAdd::make( scalar_symbol.make_nodecl(true), vector_symbol.make_nodecl(true), scalar_symbol.get_type())); post_nodecls.append(post_reduction_stmt); } else if (reduction_name.compare("-") == 0) { Nodecl::ExpressionStatement post_reduction_stmt = Nodecl::ExpressionStatement::make( Nodecl::VectorReductionMinus::make( scalar_symbol.make_nodecl(true), vector_symbol.make_nodecl(true), scalar_symbol.get_type())); post_nodecls.append(post_reduction_stmt); } }
TL::Symbol new_function_symbol(TL::Symbol function) { TL::ObjectList<TL::Type> parameter_types = function.get_type().parameters(); TL::ObjectList<std::string> parameter_names; TL::ObjectList<TL::Symbol> function_related_symbols = function.get_related_symbols(); for (TL::ObjectList<TL::Symbol>::iterator it = function_related_symbols.begin(); it != function_related_symbols.end(); it++) { parameter_names.append(it->get_name()); } TL::Symbol new_function = SymbolUtils::new_function_symbol( function, function.get_name(), function.get_type().returns(), parameter_names, parameter_types); return new_function; }
void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info, Nodecl::NodeclBase construct, Nodecl::List distribute_environment, Nodecl::RangeLoopControl& range, const std::string& outline_name, TL::Symbol structure_symbol, TL::Symbol slicer_descriptor, Nodecl::NodeclBase task_label) { Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct); Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>(); ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0); Nodecl::NodeclBase lower = range.get_lower(); Nodecl::NodeclBase upper = range.get_upper(); Nodecl::NodeclBase step = range.get_step(); Source struct_size, dynamic_size, struct_arg_type_name; struct_arg_type_name << ((structure_symbol.get_type().is_template_specialized_type() && structure_symbol.get_type().is_dependent()) ? "typename " : "") << structure_symbol.get_qualified_name(enclosing_function.get_scope()) ; struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size; Source immediate_decl; allocate_immediate_structure( structure_symbol.get_user_defined_type(), outline_info, struct_arg_type_name, struct_size, // out immediate_decl, dynamic_size); Source call_outline_function; Source schedule_setup; schedule_setup << "int nanos_chunk;" ; if (schedule.get_text() == "runtime") { schedule_setup << "nanos_omp_sched_t nanos_runtime_sched;" << "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);" ; } else { Source schedule_name; if (Nanos::Version::interface_is_at_least("openmp", 8)) { schedule_name << "nanos_omp_sched_" << schedule.get_text(); } else { // We used nanos_omp_sched in versions prior to 8 schedule_name << "omp_sched_" << schedule.get_text(); } schedule_setup << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");" << "if (current_ws_policy == 0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";" ; } Source worksharing_creation; if (IS_CXX_LANGUAGE) { worksharing_creation << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor)); } worksharing_creation << "nanos_err = nanos_worksharing_create(" << "&" << as_symbol(slicer_descriptor) << "," << "current_ws_policy," << "(void**)&nanos_setup_info_loop," << "&single_guard);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree; TL::Source pm_specific_code; if (!_lowering->in_ompss_mode()) { // OpenMP pm_specific_code << immediate_decl << statement_placeholder(fill_immediate_arguments_tree) << "smp_" << outline_name << "(imm_args);" ; } else { // OmpSs std::string wd_description = (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name(); Source const_wd_info; const_wd_info << fill_const_wd_info(struct_arg_type_name, /* is_untied */ false, /* mandatory_creation */ true, /* is_function_task */ false, wd_description, outline_info, construct); std::string dyn_props_var = "nanos_wd_dyn_props"; Source dynamic_wd_info; dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";"; fill_dynamic_properties(dyn_props_var, /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info); pm_specific_code << struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;" << const_wd_info << "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;" << dynamic_wd_info << "static nanos_slicer_t replicate = (nanos_slicer_t)0;" << "if (replicate == (nanos_slicer_t)0)" << "replicate = nanos_find_slicer(\"replicate\");" << "if (replicate == (nanos_slicer_t)0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_err = nanos_create_sliced_wd(&nanos_wd_, " << "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, " << "(size_t)" << struct_size << ", nanos_wd_const_data.base.data_alignment, " << "(void**)&ol_args, nanos_current_wd(), replicate," << "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0," << "0, (nanos_region_dimension_internal_t**)0" << ");" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << statement_placeholder(fill_outline_arguments_tree) << "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; } TL::Source implicit_barrier_or_tw; if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null()) { implicit_barrier_or_tw << get_implicit_sync_end_construct_source(); } Source spawn_code; spawn_code << "{" << as_type(get_bool_type()) << " single_guard;" << "nanos_err_t nanos_err;" << schedule_setup << "nanos_ws_info_loop_t nanos_setup_info_loop;" << "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";" << "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";" << "nanos_setup_info_loop.loop_step = " << as_expression(step) << ";" << "nanos_setup_info_loop.chunk_size = nanos_chunk;" << worksharing_creation << pm_specific_code << implicit_barrier_or_tw << "}" ; Source fill_outline_arguments, fill_immediate_arguments; fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::C; Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::Current; Nodecl::NodeclBase arguments_tree; TL::Source *fill_arguments; if (!_lowering->in_ompss_mode()) { // OpenMP arguments_tree = fill_immediate_arguments_tree; fill_arguments = &fill_immediate_arguments; } else { // OmpSs arguments_tree = fill_outline_arguments_tree; fill_arguments = &fill_outline_arguments; } // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp) const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context(); slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context; ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol()); // Parse the arguments Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree); arguments_tree.replace(new_tree); // Finally, replace the construct by the tree that represents the spawn code construct.replace(spawn_code_tree); }
TL::Source LoopBlocking::do_blocking() { Source result, block_loops; result << block_loops ; ObjectList<ForStatement> nest_loops = _for_nest_info.get_nest_list(); _nesting = std::min(_nest_factors.size(), nest_loops.size()); TL::Source *current_innermost_part = &block_loops; // For every loop declare its block loop variable and the inter-block loop ObjectList<TL::Expression>::iterator current_factor = _nest_factors.begin(); ObjectList<TL::ForStatement>::iterator current_for = nest_loops.begin(); for (int current_nest = 0; current_nest < _nesting; current_nest++, current_for++, current_factor++) { TL::IdExpression induction_var = current_for->get_induction_variable(); TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); std::string var = "_blk_" + sym.get_name(); TL::Source *new_innermost_part = new TL::Source(); (*current_innermost_part) << "for(" << type.get_declaration(sym.get_scope(), var) << " = " << current_for->get_lower_bound() << ";" << var << current_for->get_bound_operator() << current_for->get_upper_bound() << ";" << var << "+= ( " << current_for->get_step() << ") * " << current_factor->prettyprint() << ")" << (*new_innermost_part) ; current_innermost_part = new_innermost_part; } // Now for every loop, declare the intra-loop current_factor = _nest_factors.begin(); current_for = nest_loops.begin(); for (int current_nest = 0; current_nest < _nesting; current_nest++, current_for++, current_factor++) { TL::IdExpression induction_var = current_for->get_induction_variable(); TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); std::string var = induction_var.prettyprint(); std::string init_var = var; // If the loop declares the iterator in the for statement // declare it again AST_t loop_init = current_for->get_iterating_init(); if (Declaration::predicate(loop_init)) { // Fix init_var to be a declaration init_var = type.get_declaration(sym.get_scope(), var); } std::string blk_var = "_blk_" + sym.get_name(); TL::Source min_code; TL::Source *new_innermost_part = new TL::Source(); (*current_innermost_part) << "for(" << init_var << " = " << blk_var << ";" << var << current_for->get_bound_operator() << min_code << ";" << var << "+= ( " << current_for->get_step() << "))" << (*new_innermost_part) ; TL::Source a, b; min_code << "((" << a << ") < (" << b << ") ? (" << a << ") : (" << b << "))" ; a << blk_var << " + (" << current_for->get_step() << ") * (" << current_factor->prettyprint() << " - 1 )"; b << current_for->get_upper_bound(); current_innermost_part = new_innermost_part; } // And now the innermost loop (*current_innermost_part) << nest_loops[_nesting - 1].get_loop_body() ; return result; }
TL::Source LoopUnroll::do_unroll() { if (!_for_stmt.regular_loop()) { return silly_unroll(); } // Get parts of the loop IdExpression induction_var = _for_stmt.get_induction_variable(); Expression lower_bound = _for_stmt.get_lower_bound(); Expression upper_bound = _for_stmt.get_upper_bound(); Expression step = _for_stmt.get_step(); TL::Source operator_bound = _for_stmt.get_bound_operator(); Statement loop_body = _for_stmt.get_loop_body(); TL::Source result, epilogue, main, induction_var_decl, before_main, after_main; std::stringstream ss; ss << _factor; result << "{" << induction_var_decl << before_main << main << after_main << epilogue << "}" ; Source replicated_body; Source epilogue_body; if (_factor > 1) { AST_t init = _for_stmt.get_iterating_init(); if (Declaration::predicate(init)) { TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); // Declare it since it will have local scope induction_var_decl << type.get_declaration(sym.get_scope(), sym.get_name()) << ";" ; } main << "for (" << induction_var << " = " << lower_bound << ";" << induction_var << operator_bound << "((" << upper_bound << ") - (" << _factor << " - 1)* (" << step << "));" << induction_var << "+= (" << step << ") * " << _factor << ")" << "{" << replicated_body << "}" ; // FIXME - It could help to initialize here another variable and make both loops independent epilogue << "for ( ; " // No initialization, keep using the old induction var << induction_var << operator_bound << upper_bound << ";" << induction_var << "+= (" << step << "))" << epilogue_body ; if (!_remove_tasks) { epilogue_body << loop_body; } else { std::cerr << "Do not create task " << __FILE__ << ":" << __LINE__ << std::endl; running_error("Path not supported yet", 0); // epilogue_body << loop_body.get_ast().prettyprint_with_callback(functor(ignore_tasks)); } } else { // Leave it as is main << "for(" << _for_stmt.get_iterating_init().prettyprint() << _for_stmt.get_iterating_condition() << ";" << _for_stmt.get_iterating_expression() << ")" << "{" << replicated_body << "}" ; } // Replicate the body bool consider_omp = false; if (TaskAggregation::contains_relevant_openmp(loop_body)) { consider_omp = true; } if (_ignore_omp || !consider_omp) { simple_replication(_factor, replicated_body, epilogue_body, induction_var, loop_body); } else { omp_replication(_factor, replicated_body, epilogue_body, induction_var, loop_body, before_main, after_main); } return result; }
TL::Symbol new_function_symbol( TL::Symbol current_function, const std::string& name, TL::Type return_type, TL::ObjectList<std::string> parameter_names, TL::ObjectList<TL::Type> parameter_types) { if (IS_FORTRAN_LANGUAGE && current_function.is_nested_function()) { // Get the enclosing function current_function = current_function.get_scope().get_related_symbol(); } decl_context_t decl_context = current_function.get_scope().get_decl_context(); ERROR_CONDITION(parameter_names.size() != parameter_types.size(), "Mismatch between names and types", 0); decl_context_t function_context; if (IS_FORTRAN_LANGUAGE) { function_context = new_program_unit_context(decl_context); } else { function_context = new_function_context(decl_context); function_context = new_block_context(function_context); } // Build the function type int num_parameters = 0; scope_entry_t** parameter_list = NULL; parameter_info_t* p_types = new parameter_info_t[parameter_types.size()]; parameter_info_t* it_ptypes = &(p_types[0]); TL::ObjectList<TL::Type>::iterator type_it = parameter_types.begin(); for (TL::ObjectList<std::string>::iterator it = parameter_names.begin(); it != parameter_names.end(); it++, it_ptypes++, type_it++) { scope_entry_t* param = new_symbol(function_context, function_context.current_scope, it->c_str()); param->entity_specs.is_user_declared = 1; param->kind = SK_VARIABLE; param->locus = make_locus("", 0, 0); param->defined = 1; param->type_information = get_unqualified_type(type_it->get_internal_type()); P_LIST_ADD(parameter_list, num_parameters, param); it_ptypes->is_ellipsis = 0; it_ptypes->nonadjusted_type_info = NULL; it_ptypes->type_info = get_indirect_type(param); } type_t *function_type = get_new_function_type( return_type.get_internal_type(), p_types, parameter_types.size()); delete[] p_types; // Now, we can create the new function symbol scope_entry_t* new_function_sym = NULL; if (!current_function.get_type().is_template_specialized_type()) { new_function_sym = new_symbol(decl_context, decl_context.current_scope, name.c_str()); new_function_sym->entity_specs.is_user_declared = 1; new_function_sym->kind = SK_FUNCTION; new_function_sym->locus = make_locus("", 0, 0); new_function_sym->type_information = function_type; } else { scope_entry_t* new_template_sym = new_symbol( decl_context, decl_context.current_scope, name.c_str()); new_template_sym->kind = SK_TEMPLATE; new_template_sym->locus = make_locus("", 0, 0); new_template_sym->type_information = get_new_template_type( decl_context.template_parameters, function_type, uniquestr(name.c_str()), decl_context, make_locus("", 0, 0)); template_type_set_related_symbol(new_template_sym->type_information, new_template_sym); // The new function is the primary template specialization new_function_sym = named_type_get_symbol( template_type_get_primary_type( new_template_sym->type_information)); } function_context.function_scope->related_entry = new_function_sym; function_context.block_scope->related_entry = new_function_sym; new_function_sym->related_decl_context = function_context; new_function_sym->entity_specs.related_symbols = parameter_list; new_function_sym->entity_specs.num_related_symbols = num_parameters; for (int i = 0; i < new_function_sym->entity_specs.num_related_symbols; ++i) { symbol_set_as_parameter_of_function( new_function_sym->entity_specs.related_symbols[i], new_function_sym, /* parameter position */ i); } // Make it static new_function_sym->entity_specs.is_static = 1; // Make it member if the enclosing function is member if (current_function.is_member()) { new_function_sym->entity_specs.is_member = 1; new_function_sym->entity_specs.class_type = current_function.get_class_type().get_internal_type(); new_function_sym->entity_specs.access = AS_PUBLIC; ::class_type_add_member(new_function_sym->entity_specs.class_type, new_function_sym); } if (current_function.is_inline()) new_function_sym->entity_specs.is_inline = 1; // new_function_sym->entity_specs.is_defined_inside_class_specifier = // current_function.get_internal_symbol()->entity_specs.is_defined_inside_class_specifier; if (IS_FORTRAN_LANGUAGE && current_function.is_in_module()) { scope_entry_t* module_sym = current_function.in_module().get_internal_symbol(); new_function_sym->entity_specs.in_module = module_sym; P_LIST_ADD( module_sym->entity_specs.related_symbols, module_sym->entity_specs.num_related_symbols, new_function_sym); new_function_sym->entity_specs.is_module_procedure = 1; } return new_function_sym; }
TL::Symbol LoweringVisitor::create_basic_reduction_function_fortran(OpenMP::Reduction* red, Nodecl::NodeclBase construct) { reduction_map_t::iterator it = _basic_reduction_map_openmp.find(red); if (it != _basic_reduction_map_openmp.end()) { return it->second; } std::string fun_name; { std::stringstream ss; ss << "nanos_red_" << red << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase function_body; Source src; src << "SUBROUTINE " << fun_name << "(omp_out, omp_in, num_scalars)\n" << "IMPLICIT NONE\n" << as_type(red->get_type()) << " :: omp_out(num_scalars)\n" << as_type(red->get_type()) << " :: omp_in(num_scalars)\n" << "INTEGER, VALUE :: num_scalars\n" << "INTEGER :: I\n" << statement_placeholder(function_body) << "\n" << "END SUBROUTINE " << fun_name << "\n"; ; Nodecl::NodeclBase function_code = src.parse_global(construct); TL::Scope inside_function = ReferenceScope(function_body).get_scope(); TL::Symbol param_omp_in = inside_function.get_symbol_from_name("omp_in"); ERROR_CONDITION(!param_omp_in.is_valid(), "Symbol omp_in not found", 0); TL::Symbol param_omp_out = inside_function.get_symbol_from_name("omp_out"); ERROR_CONDITION(!param_omp_out.is_valid(), "Symbol omp_out not found", 0); TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); TL::Symbol index = inside_function.get_symbol_from_name("i"); ERROR_CONDITION(!index.is_valid(), "Symbol %s not found", "i"); TL::Symbol num_scalars = inside_function.get_symbol_from_name("num_scalars"); ERROR_CONDITION(!num_scalars.is_valid(), "Symbol %s not found", "num_scalars"); Nodecl::NodeclBase num_scalars_ref = Nodecl::Symbol::make(num_scalars); num_scalars_ref.set_type(num_scalars.get_type().no_ref().get_lvalue_reference_to()); Nodecl::Symbol nodecl_index = Nodecl::Symbol::make(index); nodecl_index.set_type(index.get_type().get_lvalue_reference_to()); Nodecl::NodeclBase loop_header = Nodecl::RangeLoopControl::make( nodecl_index, const_value_to_nodecl(const_value_get_signed_int(1)), num_scalars_ref, Nodecl::NodeclBase::null()); Nodecl::NodeclBase expanded_combiner = red->get_combiner().shallow_copy(); BasicReductionExpandVisitor expander_visitor( red->get_omp_in(), param_omp_in, red->get_omp_out(), param_omp_out, index); expander_visitor.walk(expanded_combiner); function_body.replace( Nodecl::ForStatement::make(loop_header, Nodecl::List::make( Nodecl::ExpressionStatement::make( expanded_combiner)), Nodecl::NodeclBase::null())); _basic_reduction_map_openmp[red] = function_sym; if (IS_FORTRAN_LANGUAGE) { Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(), function_sym.get_related_scope()); } Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code); return function_sym; }