void LoweringVisitor::visit_post(const Nodecl::OpenMP::Task& task) { std::cerr << __PRETTY_FUNCTION__ << std::endl; // For this example we assume the user #include'd <stdio.h> Nodecl::NodeclBase new_statement; // This will be invalid until we parse Source src; src << "{" << "fprintf(stderr, \"Before the task\\n\");" << statement_placeholder(new_statement) << "fprintf(stderr, \"After the task\\n\");" << "}" ; // Here task plays the role of context for a successful parsing Nodecl::NodeclBase generated_code = src.parse_statement(task); // After parse_statement, new_statement is an empty statement (';') // that can be replaced with another tree (so generated_code now is // a sort of skeleton) // We will replace new_statement with the statements of the task. // This may look a bit crude (and indeed it is) but it is fine // because we are not changing any "symbolic bindings" or stuff // like this new_statement.replace(task.get_statements()); // Now replace the original task code with the new code we parsed above (and that now includes // the body of the task inside) task.replace(generated_code); }
bool LoweringVisitor::handle_reductions_on_task( Nodecl::NodeclBase construct, OutlineInfo& outline_info, Nodecl::NodeclBase statements, bool generate_final_stmts, Nodecl::NodeclBase& final_statements) { int num_reductions = 0; TL::Source reductions_stuff, final_clause_stuff, // This source represents an expression which is used to check if // we can do an optimization in the final code. This optimization // consists on calling the original code (with a serial closure) if // we are in a final context and the reduction variables that we // are using have not been registered previously final_clause_opt_expr, extra_array_red_memcpy; std::map<TL::Symbol, std::string> reduction_symbols_map; TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items(); for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin(); it != data_items.end(); it++) { if (!(*it)->is_reduction()) continue; std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info(); TL::OpenMP::Reduction* reduction_info = red_info_pair.first; TL::Type reduction_type = red_info_pair.second.no_ref(); TL::Symbol reduction_item = (*it)->get_symbol(); TL::Type reduction_item_type = reduction_item.get_type().no_ref(); std::string storage_var_name = (*it)->get_field_name() + "_storage"; TL::Type storage_var_type = reduction_type.get_pointer_to(); TL::Symbol reduction_function, reduction_function_original_var, initializer_function; // Checking if the current reduction type has been treated before // Note that if that happens we can reuse the combiner and // initializer function. // // C/C++: note that if the type of the list item is an array type, // we regiter the reduction over its element type TL::Type registered_reduction_type = reduction_type; while (!IS_FORTRAN_LANGUAGE && registered_reduction_type.is_array()) { registered_reduction_type = registered_reduction_type.array_element(); } LoweringVisitor::reduction_task_map_t::iterator task_red_info = _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type)); if (task_red_info != _task_reductions_map.end()) { reduction_function = task_red_info->second._reducer; reduction_function_original_var = task_red_info->second._reducer_orig_var; initializer_function = task_red_info->second._initializer; } else { create_reduction_functions(reduction_info, construct, registered_reduction_type, reduction_item, reduction_function, reduction_function_original_var); create_initializer_function(reduction_info, construct, registered_reduction_type, initializer_function); _task_reductions_map.insert( std::make_pair( std::make_pair(reduction_info, registered_reduction_type), TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function) )); } // Mandatory TL::Sources to be filled by any reduction TL::Source orig_address, // address of the original reduction variable storage_var; // variable which holds the address of the storage // Specific TL::Sources to be filled only by Fortran array reduction TL::Source extra_array_red_decl; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { storage_var << storage_var_name; orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name(); final_clause_stuff << "if (" << storage_var_name << " == 0)" << "{" << storage_var_name << " = " << "(" << as_type(storage_var_type) << ")" << orig_address << ";" << "}" ; } else { orig_address << "&" << (*it)->get_field_name(); if (reduction_item_type.is_array()) { size_t size_of_array_descriptor = fortran_size_of_array_descriptor( fortran_get_rank0_type(reduction_item_type.get_internal_type()), fortran_get_rank_of_type(reduction_item_type.get_internal_type())); storage_var << storage_var_name << "_indirect"; extra_array_red_decl << "void *" << storage_var << ";"; extra_array_red_memcpy << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << storage_var << "," << size_of_array_descriptor << ");" ; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << "(void *) "<< orig_address << "," << size_of_array_descriptor << ");" << "}" << "else" << "{" << extra_array_red_memcpy << "}" ; } else { // We need to convert a void* type into a pointer to the reduction type. // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this // conversion directly in the FORTRAN source. For this reason we introduce // a new function that will be defined in a C file. TL::Symbol func = TL::Nanox::get_function_ptr_conversion( // Destination reduction_item_type.get_pointer_to(), // Origin TL::Type::get_void_type().get_pointer_to(), construct.retrieve_context()); storage_var << storage_var_name; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << storage_var_name << " = " << func.get_name() << "(" << orig_address << ");" << "}" ; } } if (num_reductions > 0) final_clause_opt_expr << " && "; final_clause_opt_expr << storage_var << " == 0 "; num_reductions++; reductions_stuff << extra_array_red_decl << as_type(storage_var_type) << " " << storage_var_name << ";" << "nanos_err = nanos_task_reduction_get_thread_storage(" << "(void *)" << orig_address << "," << "(void **) &" << storage_var << ");" ; reduction_symbols_map[reduction_item] = storage_var_name; } if (num_reductions != 0) { // Generating the final code if needed if (generate_final_stmts) { std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct); ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0); Nodecl::NodeclBase placeholder; TL::Source new_statements_src; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << "if (" << final_clause_opt_expr << ")" << "{" << as_statement(it4->second) << "}" << "else" << "{" << final_clause_stuff << statement_placeholder(placeholder) << "}" << "}" ; final_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); } // Generating the task code { TL::Source new_statements_src; Nodecl::NodeclBase placeholder; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << extra_array_red_memcpy << statement_placeholder(placeholder) << "}" ; Nodecl::NodeclBase new_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); statements.replace(new_statements); } } ERROR_CONDITION(num_reductions != 0 && !Nanos::Version::interface_is_at_least("task_reduction", 1001), "The version of the runtime begin used does not support task reductions", 0); return (num_reductions != 0); }
static TL::Symbol create_initializer_function_c( OpenMP::Reduction* red, TL::Type reduction_type, Nodecl::NodeclBase construct) { std::string fun_name; { std::stringstream ss; ss << "nanos_ini_" << red << "_" << reduction_type.get_internal_type() << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase function_body; Source src; src << "void " << fun_name << "(" << as_type(reduction_type.no_ref().get_lvalue_reference_to()) << " omp_priv," << as_type(reduction_type.no_ref().get_lvalue_reference_to()) << " omp_orig)" << "{" << statement_placeholder(function_body) << "}" ; Nodecl::NodeclBase function_code = src.parse_global(construct.retrieve_context().get_global_scope()); TL::Scope inside_function = ReferenceScope(function_body).get_scope(); TL::Symbol param_omp_priv = inside_function.get_symbol_from_name("omp_priv"); ERROR_CONDITION(!param_omp_priv.is_valid(), "Symbol omp_priv not found", 0); TL::Symbol param_omp_orig = inside_function.get_symbol_from_name("omp_orig"); ERROR_CONDITION(!param_omp_orig.is_valid(), "Symbol omp_orig not found", 0); TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); Nodecl::NodeclBase initializer = red->get_initializer().shallow_copy(); if (initializer.is<Nodecl::StructuredValue>()) { Nodecl::StructuredValue structured_value = initializer.as<Nodecl::StructuredValue>(); if (structured_value.get_form().is<Nodecl::StructuredValueBracedImplicit>()) { structured_value.set_form(Nodecl::StructuredValueCompoundLiteral::make()); } } Nodecl::Utils::SimpleSymbolMap translation_map; translation_map.add_map(red->get_omp_priv(), param_omp_priv); translation_map.add_map(red->get_omp_orig(), param_omp_orig); Nodecl::NodeclBase new_initializer = Nodecl::Utils::deep_copy(initializer, inside_function, translation_map); if (red->get_is_initialization()) { // The original initializer was something like 'omp_priv = expr1', but the // new_initializer only represents the lhs expression (in our example, expr1). // For this reason we create manually an assignment expression. Nodecl::NodeclBase param_omp_priv_ref = Nodecl::Symbol::make(param_omp_priv); param_omp_priv_ref.set_type(param_omp_priv.get_type()); function_body.replace( Nodecl::List::make( Nodecl::ExpressionStatement::make( Nodecl::Assignment::make( param_omp_priv_ref, new_initializer, param_omp_priv_ref.get_type().no_ref()) ))); } else { function_body.replace( Nodecl::List::make(Nodecl::ExpressionStatement::make(new_initializer))); } // As the initializer function is needed during the instantiation of // the task, this function should be inserted before the construct Nodecl::Utils::prepend_to_enclosing_top_level_location(construct, function_code); return function_sym; }
void LoweringVisitor::visit(const Nodecl::OpenMP::Parallel& construct) { Nodecl::NodeclBase num_replicas = construct.get_num_replicas(); Nodecl::NodeclBase environment = construct.get_environment(); Nodecl::NodeclBase statements = construct.get_statements(); ERROR_CONDITION (_lowering->in_ompss_mode(), "A parallel reached Nanos++ lowering but we are in OmpSs mode", 0); walk(statements); // Get the new statements statements = construct.get_statements(); ParallelEnvironmentVisitor parallel_environment; parallel_environment.walk(environment); Scope enclosing_scope = construct.retrieve_context(); Symbol function_symbol = Nodecl::Utils::get_enclosing_function(construct); OutlineInfo outline_info(*_lowering, environment, function_symbol); Nodecl::NodeclBase task_label = construct.get_environment().as<Nodecl::List>() .find_first<Nodecl::OmpSs::TaskLabel>(); // Handle the special object 'this' if (IS_CXX_LANGUAGE && !function_symbol.is_static() && function_symbol.is_member()) { TL::Symbol this_symbol = enclosing_scope.get_symbol_this(); ERROR_CONDITION(!this_symbol.is_valid(), "Invalid symbol", 0); Nodecl::NodeclBase sym_ref = Nodecl::Symbol::make(this_symbol); sym_ref.set_type(this_symbol.get_type()); // The object 'this' may already have an associated OutlineDataItem OutlineDataItem& argument_outline_data_item = outline_info.get_entity_for_symbol(this_symbol); argument_outline_data_item.set_is_cxx_this(true); // ERROR_CONDITION(argument_outline_data_item.get_sharing() == OutlineDataItem::SHARING_UNDEFINED, // "This does not have any data-sharing\n", 0); // This is a special kind of shared if (argument_outline_data_item.get_sharing() == OutlineDataItem::SHARING_UNDEFINED) argument_outline_data_item.set_sharing(OutlineDataItem::SHARING_CAPTURE_ADDRESS); argument_outline_data_item.set_base_address_expression(sym_ref); } TL::Symbol structure_symbol = declare_argument_structure(outline_info, construct); Source outline_source, reduction_code_src, reduction_initialization_src; Nodecl::NodeclBase inner_placeholder; outline_source << "nanos_err_t nanos_err;" << "nanos_err = nanos_omp_set_implicit(nanos_current_wd());" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << "nanos_err = nanos_enter_team();" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << reduction_initialization_src << statement_placeholder(inner_placeholder) << reduction_code_src << "nanos_err = nanos_omp_barrier();" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << "nanos_err = nanos_leave_team();" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" ; Nodecl::NodeclBase reduction_initialization, reduction_code; if (there_are_reductions(outline_info)) { reduction_initialization_src << statement_placeholder(reduction_initialization); reduction_code_src << statement_placeholder(reduction_code); } // Outline DeviceHandler device_handler = DeviceHandler::get_device_handler(); const TargetInformation& target_info = outline_info.get_target_information(function_symbol); std::string outline_name = target_info.get_outline_name(); CreateOutlineInfo info( _lowering, outline_name, outline_info.get_data_items(), target_info, /* original statements */ statements, /* current task statements */ statements, task_label, structure_symbol, /* called_task */ TL::Symbol::invalid()); // List of device names const TL::ObjectList<std::string>& device_names = target_info.get_device_names(); for (TL::ObjectList<std::string>::const_iterator it = device_names.begin(); it != device_names.end(); it++) { std::string device_name = *it; DeviceProvider* device = device_handler.get_device(device_name); ERROR_CONDITION(device == NULL, " Device '%s' has not been loaded.", device_name.c_str()); Nodecl::NodeclBase outline_placeholder, output_statements; Nodecl::Utils::SimpleSymbolMap *symbol_map = NULL; device->create_outline(info, outline_placeholder, output_statements, symbol_map); if (IS_FORTRAN_LANGUAGE) { Source::source_language = SourceLanguage::C; } outline_placeholder.replace(outline_source.parse_statement(outline_placeholder)); if (IS_FORTRAN_LANGUAGE) { Source::source_language = SourceLanguage::Current; } if (there_are_reductions(outline_info)) { reduction_initialization_code(outline_info, reduction_initialization, construct); perform_partial_reduction(outline_info, reduction_code); } Nodecl::Utils::LabelSymbolMap label_symbol_map(symbol_map, output_statements, outline_placeholder); Nodecl::NodeclBase outline_statements_code = Nodecl::Utils::deep_copy(output_statements, outline_placeholder, label_symbol_map); delete symbol_map; inner_placeholder.replace(outline_statements_code); } // This function replaces the current construct parallel_spawn(outline_info, construct, num_replicas, parallel_environment.if_condition, outline_name, structure_symbol, task_label); }
void OpenMPTransform::adf_task_postorder(PragmaCustomConstruct adf_construct) { PragmaCustomClause exit_condition_clause = adf_construct.get_clause("exit_condition"); PragmaCustomClause trigger_set = adf_construct.get_clause("trigger_set"); PragmaCustomClause group_name_clause = adf_construct.get_clause("name"); bool group_name_given = group_name_clause.is_defined(); std::string group_name = "default"; if (group_name_given) { group_name = group_name_clause.get_expression_list()[0].prettyprint(); } Source main_code_layout; AST_t inner_tree, exit_condition_placeholder, trigger_set_placeholder; main_code_layout << "{" << "Transaction * __t = createtx(\"" << adf_construct.get_ast().get_file() << "\"," << adf_construct.get_ast().get_line() <<");" << "addTransactionToADFGroup(\"" << group_name << "\", __t);" << statement_placeholder(trigger_set_placeholder) << "while(1)" << "{" << "starttx(__t);" << "if (__t->status == 18 /*TS_ADF_FINISH*/) " << " break; " << statement_placeholder(exit_condition_placeholder) << "if((__t->nestingLevel > 0) || (0 == setjmp(__t->context)))" << "{" << statement_placeholder(inner_tree) << "if (committx(__t) == 0)" << "{" << "retrytx(__t);" << "break;" << "}" << "}" << "}" << "destroytx(__t);" << "}" ; AST_t code_layout_tree = main_code_layout.parse_statement( adf_construct.get_ast(), adf_construct.get_scope_link()); // empty ObjectList<Symbol> unmanaged_symbols; ObjectList<Symbol> local_symbols; // XXX - Currently using /dev/null as the filter and log files std::fstream stm_log_file; stm_log_file.open("/dev/null", std::ios_base::out | std::ios_base::trunc); STMExpressionReplacement expression_replacement(unmanaged_symbols, local_symbols, "/dev/null", "normal", "/dev/null", "normal", stm_log_file); // STMize exit condition if (exit_condition_clause.is_defined()) { Expression exit_condition = exit_condition_clause.get_expression_list()[0]; // Duplicate but by means of parsing (this updates the semantic information) Source src = exit_condition.prettyprint(); AST_t tree = src.parse_expression(inner_tree, adf_construct.get_scope_link()); Expression expr(tree, adf_construct.get_scope_link()); expression_replacement.replace_expression(expr); Source exit_condition_src; exit_condition_src << "if (" << expr.prettyprint() << ")" << "break;" ; AST_t exit_condition_tree = exit_condition_src.parse_statement(exit_condition_placeholder, adf_construct.get_scope_link()); exit_condition_placeholder.replace(exit_condition_tree); } // Main code { // Duplicate with new contextual info Source src = adf_construct.get_statement().prettyprint(); AST_t task_tree = src.parse_statement(inner_tree, adf_construct.get_scope_link()); ObjectList<AST_t> expressions = task_tree.depth_subtrees(Expression::predicate, AST_t::NON_RECURSIVE); for (ObjectList<AST_t>::iterator it = expressions.begin(); it != expressions.end(); it++) { Expression expression(*it, scope_link); expression_replacement.replace_expression(expression); } inner_tree.replace(task_tree); } // Trigger set registration due to 'exit_condition' Source trigger_set_registration; bool have_some_trigger_set = false; if (exit_condition_clause.is_defined()) { ObjectList<IdExpression> id_expression_list = exit_condition_clause.id_expressions(); for (ObjectList<IdExpression>::iterator it = id_expression_list.begin(); it != id_expression_list.end(); it++) { Symbol sym = it->get_symbol(); Type type = sym.get_type(); if (!type.is_array()) { trigger_set_registration << "add_scalar_to_trigger_set(__t, " << "&" << it->prettyprint() << ", " << "sizeof(" << it->prettyprint() << ")" << ");" ; } else { std::cerr << it->get_ast().get_locus() << ": error: exit condition expression '" << it->prettyprint() << "' involves an array. This is not yet supported" << std::endl; } have_some_trigger_set = true; } // Trigger set registration due to 'trigger_set' if (trigger_set.is_defined()) { ObjectList<Expression> trigger_set_expr = trigger_set.get_expression_list(); for (ObjectList<Expression>::iterator it = trigger_set_expr.begin(); it != trigger_set_expr.end(); it++) { Expression &trigger_expr(*it); // This should be improved to handle cases like 'a[2]' if (trigger_expr.is_id_expression()) { trigger_set_registration << "add_scalar_to_trigger_set(__t, " << "&" << trigger_expr.prettyprint() << ", " << "sizeof(" << trigger_expr.prettyprint() << ")" << ");" ; } else if (trigger_expr.is_array_section_range()) { ObjectList<Expression> lower_bounds; ObjectList<Expression> upper_bounds; Expression basic_expr = compute_bounds_of_sectioned_expression(trigger_expr, lower_bounds, upper_bounds); // FIXME - Do not be so restrictive, pointers to arrays are useful as well :) if (!basic_expr.is_id_expression()) { std::cerr << basic_expr.get_ast().get_locus() << ": error: invalid trigger set specification '" << trigger_expr.prettyprint() << "' since the basic expression '" << basic_expr.prettyprint() << "' is not an id-expression" << std::endl; set_phase_status(PHASE_STATUS_ERROR); return; } IdExpression id_expression = basic_expr.get_id_expression(); Symbol sym = id_expression.get_symbol(); if (!sym.is_valid()) { std::cerr << id_expression.get_ast().get_locus() << ": error: unknown entity '" << id_expression.prettyprint() << "'" << std::endl; set_phase_status(PHASE_STATUS_ERROR); return; } Type type = sym.get_type(); ObjectList<AST_t> array_dimensions = compute_array_dimensions_of_type(type); if (array_dimensions.size() != lower_bounds.size() || lower_bounds.size() != upper_bounds.size()) { std::cerr << id_expression.get_ast().get_locus() << ": error: mismatch between array type and array section" << std::endl; set_phase_status(PHASE_STATUS_ERROR); return; } trigger_set_registration << "add_range_to_trigger_set(__t, " << basic_expr.prettyprint() << "," << array_dimensions.size() << "," << "sizeof(" << get_basic_type(type).get_declaration(sym.get_scope(), "") << ")" ; // First add dimensions for (ObjectList<AST_t>::iterator it = array_dimensions.begin(); it != array_dimensions.end(); it++) { trigger_set_registration << "," << it->prettyprint(); } // Now lower and upper bounds { ObjectList<Expression>::iterator it_l = lower_bounds.begin(); ObjectList<Expression>::iterator it_u = upper_bounds.begin(); while (it_l != lower_bounds.end()) { trigger_set_registration << "," << it_l->prettyprint(); trigger_set_registration << "," << it_u->prettyprint(); it_u++; it_l++; } } // Final parenthesis and semicolon trigger_set_registration << ");" ; } have_some_trigger_set = true; } } } if (have_some_trigger_set) { AST_t trigger_registration_tree = trigger_set_registration.parse_statement(trigger_set_placeholder, adf_construct.get_scope_link()); trigger_set_placeholder.replace(trigger_registration_tree); } // Replace it all adf_construct.get_ast().replace(code_layout_tree); }
void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info, Nodecl::NodeclBase construct, Nodecl::List distribute_environment, Nodecl::RangeLoopControl& range, const std::string& outline_name, TL::Symbol structure_symbol, TL::Symbol slicer_descriptor, Nodecl::NodeclBase task_label) { Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct); Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>(); ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0); Nodecl::NodeclBase lower = range.get_lower(); Nodecl::NodeclBase upper = range.get_upper(); Nodecl::NodeclBase step = range.get_step(); Source struct_size, dynamic_size, struct_arg_type_name; struct_arg_type_name << ((structure_symbol.get_type().is_template_specialized_type() && structure_symbol.get_type().is_dependent()) ? "typename " : "") << structure_symbol.get_qualified_name(enclosing_function.get_scope()) ; struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size; Source immediate_decl; allocate_immediate_structure( structure_symbol.get_user_defined_type(), outline_info, struct_arg_type_name, struct_size, // out immediate_decl, dynamic_size); Source call_outline_function; Source schedule_setup; schedule_setup << "int nanos_chunk;" ; if (schedule.get_text() == "runtime") { schedule_setup << "nanos_omp_sched_t nanos_runtime_sched;" << "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);" ; } else { Source schedule_name; if (Nanos::Version::interface_is_at_least("openmp", 8)) { schedule_name << "nanos_omp_sched_" << schedule.get_text(); } else { // We used nanos_omp_sched in versions prior to 8 schedule_name << "omp_sched_" << schedule.get_text(); } schedule_setup << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");" << "if (current_ws_policy == 0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";" ; } Source worksharing_creation; if (IS_CXX_LANGUAGE) { worksharing_creation << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor)); } worksharing_creation << "nanos_err = nanos_worksharing_create(" << "&" << as_symbol(slicer_descriptor) << "," << "current_ws_policy," << "(void**)&nanos_setup_info_loop," << "&single_guard);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree; TL::Source pm_specific_code; if (!_lowering->in_ompss_mode()) { // OpenMP pm_specific_code << immediate_decl << statement_placeholder(fill_immediate_arguments_tree) << "smp_" << outline_name << "(imm_args);" ; } else { // OmpSs std::string wd_description = (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name(); Source const_wd_info; const_wd_info << fill_const_wd_info(struct_arg_type_name, /* is_untied */ false, /* mandatory_creation */ true, /* is_function_task */ false, wd_description, outline_info, construct); std::string dyn_props_var = "nanos_wd_dyn_props"; Source dynamic_wd_info; dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";"; fill_dynamic_properties(dyn_props_var, /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info); pm_specific_code << struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;" << const_wd_info << "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;" << dynamic_wd_info << "static nanos_slicer_t replicate = (nanos_slicer_t)0;" << "if (replicate == (nanos_slicer_t)0)" << "replicate = nanos_find_slicer(\"replicate\");" << "if (replicate == (nanos_slicer_t)0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_err = nanos_create_sliced_wd(&nanos_wd_, " << "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, " << "(size_t)" << struct_size << ", nanos_wd_const_data.base.data_alignment, " << "(void**)&ol_args, nanos_current_wd(), replicate," << "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0," << "0, (nanos_region_dimension_internal_t**)0" << ");" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << statement_placeholder(fill_outline_arguments_tree) << "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; } TL::Source implicit_barrier_or_tw; if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null()) { implicit_barrier_or_tw << get_implicit_sync_end_construct_source(); } Source spawn_code; spawn_code << "{" << as_type(get_bool_type()) << " single_guard;" << "nanos_err_t nanos_err;" << schedule_setup << "nanos_ws_info_loop_t nanos_setup_info_loop;" << "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";" << "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";" << "nanos_setup_info_loop.loop_step = " << as_expression(step) << ";" << "nanos_setup_info_loop.chunk_size = nanos_chunk;" << worksharing_creation << pm_specific_code << implicit_barrier_or_tw << "}" ; Source fill_outline_arguments, fill_immediate_arguments; fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::C; Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::Current; Nodecl::NodeclBase arguments_tree; TL::Source *fill_arguments; if (!_lowering->in_ompss_mode()) { // OpenMP arguments_tree = fill_immediate_arguments_tree; fill_arguments = &fill_immediate_arguments; } else { // OmpSs arguments_tree = fill_outline_arguments_tree; fill_arguments = &fill_outline_arguments; } // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp) const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context(); slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context; ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol()); // Parse the arguments Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree); arguments_tree.replace(new_tree); // Finally, replace the construct by the tree that represents the spawn code construct.replace(spawn_code_tree); }
static Symbol create_new_function_opencl_allocate( Nodecl::NodeclBase expr_stmt, Symbol subscripted_symbol, Type element_type, int num_dimensions, bool is_allocatable) { std::string alloca_or_pointer = is_allocatable ? "ALLOCATABLE" : "POINTER"; TL::Source dummy_arguments_bounds, dimension_attr, allocate_dims; dimension_attr << "DIMENSION("; for (int i = 1; i <= num_dimensions; ++i) { if (i != 1) { allocate_dims << ", "; dummy_arguments_bounds <<", "; dimension_attr << ", "; } dummy_arguments_bounds <<"LB" << i <<", " << "UB" << i; dimension_attr << ":"; allocate_dims << "LB" << i << ":" << "UB" << i; } dimension_attr << ")"; size_t size_of_array_descriptor = fortran_size_of_array_descriptor( fortran_get_rank0_type(subscripted_symbol.get_type().get_internal_type()), fortran_get_rank_of_type(subscripted_symbol.get_type().get_internal_type())); TL::Source new_function_name; new_function_name << "NANOX_OPENCL_ALLOCATE_INTERNAL_" << (ptrdiff_t) subscripted_symbol.get_internal_symbol() ; Nodecl::NodeclBase nodecl_body; TL::Source new_function; new_function << "SUBROUTINE " << new_function_name << "(ARR, " << dummy_arguments_bounds << ")\n" << as_type(element_type) << ", " << dimension_attr << ", " << alloca_or_pointer << " :: ARR\n" << as_type(element_type) << ", " << dimension_attr << ", ALLOCATABLE :: TMP\n" << "INTEGER :: " << dummy_arguments_bounds << "\n" << "INTEGER :: ERR \n" << "ALLOCATE(TMP(" << allocate_dims << "))\n" << statement_placeholder(nodecl_body) << "DEALLOCATE(TMP)\n" << "END SUBROUTINE " << new_function_name << "\n" ; Nodecl::NodeclBase function_code = new_function.parse_global(expr_stmt.retrieve_context().get_global_scope()); TL::Scope inside_function = ReferenceScope(nodecl_body).get_scope(); TL::Symbol new_function_sym = inside_function.get_symbol_from_name(strtolower(new_function_name.get_source().c_str())); TL::Symbol arr_sym = inside_function.get_symbol_from_name("arr"); TL::Symbol tmp_sym = inside_function.get_symbol_from_name("tmp"); TL::Symbol ptr_of_arr_sym = get_function_ptr_of(arr_sym, inside_function); TL::Symbol ptr_of_tmp_sym = get_function_ptr_of(tmp_sym, inside_function); TL::Source aux; aux << "ERR = NANOS_MEMCPY(" << ptr_of_arr_sym.get_name() << "(ARR)," << ptr_of_tmp_sym.get_name() << "(TMP)," << "INT(" << size_of_array_descriptor << "," << type_get_size(get_ptrdiff_t_type()) << "))\n" << "CALL NANOS_OPENCL_ALLOCATE_FORTRAN(" << "SIZEOF(TMP)," << ptr_of_arr_sym.get_name() << "(ARR))\n" ; nodecl_body.replace(aux.parse_statement(inside_function)); Nodecl::Utils::prepend_to_enclosing_top_level_location(expr_stmt, function_code); return new_function_sym; }
TL::Symbol LoweringVisitor::create_basic_reduction_function_fortran(OpenMP::Reduction* red, Nodecl::NodeclBase construct) { reduction_map_t::iterator it = _basic_reduction_map_openmp.find(red); if (it != _basic_reduction_map_openmp.end()) { return it->second; } std::string fun_name; { std::stringstream ss; ss << "nanos_red_" << red << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase function_body; Source src; src << "SUBROUTINE " << fun_name << "(omp_out, omp_in, num_scalars)\n" << "IMPLICIT NONE\n" << as_type(red->get_type()) << " :: omp_out(num_scalars)\n" << as_type(red->get_type()) << " :: omp_in(num_scalars)\n" << "INTEGER, VALUE :: num_scalars\n" << "INTEGER :: I\n" << statement_placeholder(function_body) << "\n" << "END SUBROUTINE " << fun_name << "\n"; ; Nodecl::NodeclBase function_code = src.parse_global(construct); TL::Scope inside_function = ReferenceScope(function_body).get_scope(); TL::Symbol param_omp_in = inside_function.get_symbol_from_name("omp_in"); ERROR_CONDITION(!param_omp_in.is_valid(), "Symbol omp_in not found", 0); TL::Symbol param_omp_out = inside_function.get_symbol_from_name("omp_out"); ERROR_CONDITION(!param_omp_out.is_valid(), "Symbol omp_out not found", 0); TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); TL::Symbol index = inside_function.get_symbol_from_name("i"); ERROR_CONDITION(!index.is_valid(), "Symbol %s not found", "i"); TL::Symbol num_scalars = inside_function.get_symbol_from_name("num_scalars"); ERROR_CONDITION(!num_scalars.is_valid(), "Symbol %s not found", "num_scalars"); Nodecl::NodeclBase num_scalars_ref = Nodecl::Symbol::make(num_scalars); num_scalars_ref.set_type(num_scalars.get_type().no_ref().get_lvalue_reference_to()); Nodecl::Symbol nodecl_index = Nodecl::Symbol::make(index); nodecl_index.set_type(index.get_type().get_lvalue_reference_to()); Nodecl::NodeclBase loop_header = Nodecl::RangeLoopControl::make( nodecl_index, const_value_to_nodecl(const_value_get_signed_int(1)), num_scalars_ref, Nodecl::NodeclBase::null()); Nodecl::NodeclBase expanded_combiner = red->get_combiner().shallow_copy(); BasicReductionExpandVisitor expander_visitor( red->get_omp_in(), param_omp_in, red->get_omp_out(), param_omp_out, index); expander_visitor.walk(expanded_combiner); function_body.replace( Nodecl::ForStatement::make(loop_header, Nodecl::List::make( Nodecl::ExpressionStatement::make( expanded_combiner)), Nodecl::NodeclBase::null())); _basic_reduction_map_openmp[red] = function_sym; if (IS_FORTRAN_LANGUAGE) { Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(), function_sym.get_related_scope()); } Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code); return function_sym; }
void LoweringVisitor::perform_partial_reduction_slicer(OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree, Nodecl::Utils::SimpleSymbolMap*& symbol_map) { ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0); TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter( lift_pointer<bool, OutlineDataItem>(&OutlineDataItem::is_reduction)); if (!reduction_items.empty()) { TL::ObjectList<Nodecl::NodeclBase> reduction_stmts; Nodecl::Utils::SimpleSymbolMap* simple_symbol_map = new Nodecl::Utils::SimpleSymbolMap(symbol_map); symbol_map = simple_symbol_map; for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin(); it != reduction_items.end(); it++) { scope_entry_t* shared_symbol = (*it)->get_symbol().get_internal_symbol(); // We need this to avoid the original symbol be replaced // incorrectly scope_entry_t* shared_symbol_proxy = NEW0(scope_entry_t); shared_symbol_proxy->symbol_name = UNIQUESTR_LITERAL("<<reduction-variable>>"); // Crude way to ensure it is replaced shared_symbol_proxy->kind = shared_symbol->kind; symbol_entity_specs_copy_from(shared_symbol_proxy, shared_symbol); shared_symbol_proxy->decl_context = shared_symbol->decl_context; shared_symbol_proxy->type_information = shared_symbol->type_information; shared_symbol_proxy->locus = shared_symbol->locus; simple_symbol_map->add_map( shared_symbol_proxy, (*it)->reduction_get_shared_symbol_in_outline() ); Source reduction_code; Nodecl::NodeclBase partial_reduction_code; reduction_code << "{" << "nanos_lock_t* red_lock;" << "nanos_err_t nanos_err;" << "nanos_err = nanos_get_lock_address(" << ((*it)->get_private_type().is_array() ? "" : "&") << as_symbol( shared_symbol_proxy ) << ", &red_lock);" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << "nanos_err = nanos_set_lock(red_lock);" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << statement_placeholder(partial_reduction_code) << "nanos_err = nanos_unset_lock(red_lock);" << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);" << "}" ; FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::C; } Nodecl::NodeclBase statement = reduction_code.parse_statement(ref_tree); FORTRAN_LANGUAGE() { Source::source_language = SourceLanguage::Current; } ERROR_CONDITION(!statement.is<Nodecl::List>(), "Expecting a list", 0); reduction_stmts.append(statement.as<Nodecl::List>()[0]); TL::Type elemental_type = (*it)->get_private_type(); while (elemental_type.is_array()) elemental_type = elemental_type.array_element(); Source partial_reduction_code_src; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { partial_reduction_code_src << as_symbol( (*it)->reduction_get_basic_function() ) << "(" // This will be the reduction shared << ((*it)->get_private_type().is_array() ? "" : "&") << as_symbol( shared_symbol_proxy ) << ", " // This will be the reduction private var << ((*it)->get_private_type().is_array() ? "" : "&") << as_symbol( (*it)->get_symbol() ) << ", " << ((*it)->get_private_type().is_array() ? ( "sizeof(" + as_type( (*it)->get_private_type()) + ")" "/ sizeof(" + as_type(elemental_type) + ")" ) : "1") << ");" ; } else if (IS_FORTRAN_LANGUAGE) { // We use an ELEMENTAL call here partial_reduction_code_src << "CALL " << as_symbol ( (*it)->reduction_get_basic_function() ) << "(" // This will be the reduction shared << as_symbol( shared_symbol_proxy ) << ", " // This will be the reduction private var << as_symbol( (*it)->get_symbol() ) << ")" ; } else { internal_error("Code unreachable", 0); } partial_reduction_code.replace( partial_reduction_code_src.parse_statement(partial_reduction_code)); } ref_tree.replace( Nodecl::CompoundStatement::make( Nodecl::List::make(reduction_stmts), Nodecl::NodeclBase::null() ) ); }
void Core::collapse_loop_first(PragmaCustomConstruct& construct) { PragmaCustomClause collapse = construct.get_clause("collapse"); if (!collapse.is_defined()) return; ObjectList<Expression> expr_list = collapse.get_expression_list(); if (expr_list.size() != 1) { running_error("%s: error: 'collapse' clause must have one argument\n", construct.get_ast().get_locus().c_str()); } Expression &expr = expr_list.front(); if (!expr.is_constant()) { running_error("%s: error: 'collapse' clause argument '%s' is not a constant expression\n", expr.get_ast().get_locus().c_str(), expr.prettyprint().c_str()); } bool valid; int nest_level = expr.evaluate_constant_int_expression(valid); if (!valid) { running_error("%s: error: 'collapse' clause argument '%s' is not a constant expression\n", expr.get_ast().get_locus().c_str(), expr.prettyprint().c_str()); } if (nest_level <= 0) { running_error("%s: error: nesting level of 'collapse' clause must be a nonzero positive integer\n", expr.get_ast().get_locus().c_str()); } if (!ForStatement::predicate(construct.get_statement().get_ast())) { running_error("%s: error: collapsed '#pragma omp for' or '#pragma omp parallel for' require a for-statement\n", construct.get_statement().get_ast().get_locus().c_str()); } ForStatement for_stmt(construct.get_statement().get_ast(), construct.get_scope_link()); HLT::LoopCollapse loop_collapse(for_stmt); ObjectList<std::string> ancillary_names; Source header; loop_collapse .set_nesting_level(nest_level) .set_split_transform(header) .set_induction_private(true) .keep_ancillary_names(ancillary_names); Source collapsed_for = loop_collapse; Source transformed_code; AST_t pragma_placeholder; transformed_code << "{" << header << statement_placeholder(pragma_placeholder) << "}" ; AST_t tree = transformed_code.parse_statement(construct.get_ast(), construct.get_scope_link()); Source new_firstprivate_entities; Source pragma_line; Source omp_part_src; omp_part_src << "#pragma omp " << pragma_line << new_firstprivate_entities << "\n" << collapsed_for ; new_firstprivate_entities << "firstprivate(" << concat_strings(ancillary_names, ",") << ")"; pragma_line << construct.get_pragma_line().prettyprint_with_callback(functor(remove_collapse_clause)); AST_t omp_part_tree = omp_part_src.parse_statement(pragma_placeholder, construct.get_scope_link()); // Replace the pragma part pragma_placeholder.replace(omp_part_tree); // Replace the whole construct construct.get_ast().replace(tree); // Now overwrite the old construct with this new one construct = PragmaCustomConstruct(pragma_placeholder, construct.get_scope_link()); }