void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info,
            Nodecl::NodeclBase construct,
            Nodecl::List distribute_environment,
            Nodecl::RangeLoopControl& range,
            const std::string& outline_name,
            TL::Symbol structure_symbol,
            TL::Symbol slicer_descriptor,
            Nodecl::NodeclBase task_label)
    {
        Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct);

        Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>();
        ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0);

        Nodecl::NodeclBase lower = range.get_lower();
        Nodecl::NodeclBase upper = range.get_upper();
        Nodecl::NodeclBase step = range.get_step();

        Source struct_size, dynamic_size, struct_arg_type_name;

        struct_arg_type_name
            << ((structure_symbol.get_type().is_template_specialized_type()
                        &&  structure_symbol.get_type().is_dependent()) ? "typename " : "")
            << structure_symbol.get_qualified_name(enclosing_function.get_scope())
            ;

        struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size;

        Source immediate_decl;
        allocate_immediate_structure(
                structure_symbol.get_user_defined_type(),
                outline_info,
                struct_arg_type_name,
                struct_size,
                // out
                immediate_decl,
                dynamic_size);


        Source call_outline_function;

        Source schedule_setup;
        schedule_setup
            <<     "int nanos_chunk;"
            ;
        if (schedule.get_text() == "runtime")
        {
            schedule_setup
                <<     "nanos_omp_sched_t nanos_runtime_sched;"
                <<     "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);"
                <<     "if (nanos_err != NANOS_OK)"
                <<         "nanos_handle_error(nanos_err);"
                <<     "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);"
                ;
        }
        else
        {
            Source schedule_name;

            if (Nanos::Version::interface_is_at_least("openmp", 8))
            {
                schedule_name << "nanos_omp_sched_" << schedule.get_text();
            }
            else
            {
                // We used nanos_omp_sched in versions prior to 8
                schedule_name << "omp_sched_" << schedule.get_text();
            }

            schedule_setup
                <<     "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");"
                <<     "if (current_ws_policy == 0)"
                <<         "nanos_handle_error(NANOS_UNIMPLEMENTED);"
                <<     "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";"
            ;
        }


        Source worksharing_creation;
        if (IS_CXX_LANGUAGE)
        {
            worksharing_creation
                << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor));
        }
        worksharing_creation
            <<     "nanos_err = nanos_worksharing_create("
            <<                      "&" << as_symbol(slicer_descriptor) << ","
            <<                      "current_ws_policy,"
            <<                      "(void**)&nanos_setup_info_loop,"
            <<                      "&single_guard);"
            <<     "if (nanos_err != NANOS_OK)"
            <<         "nanos_handle_error(nanos_err);"
            ;

        Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree;

        TL::Source pm_specific_code;
        if (!_lowering->in_ompss_mode())
        {
            // OpenMP
            pm_specific_code
                << immediate_decl
                << statement_placeholder(fill_immediate_arguments_tree)
                << "smp_" << outline_name << "(imm_args);"
                ;
        }
        else
        {
            // OmpSs
            std::string wd_description =
                (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name();

            Source const_wd_info;
            const_wd_info
                << fill_const_wd_info(struct_arg_type_name,
                        /* is_untied */ false,
                        /* mandatory_creation */ true,
                        /* is_function_task */ false,
                        wd_description,
                        outline_info,
                        construct);

            std::string dyn_props_var = "nanos_wd_dyn_props";

            Source dynamic_wd_info;
            dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";";

            fill_dynamic_properties(dyn_props_var,
                    /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info);

            pm_specific_code
                <<  struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;"
                <<  const_wd_info
                <<  "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;"
                <<  dynamic_wd_info
                <<  "static nanos_slicer_t replicate = (nanos_slicer_t)0;"
                <<  "if (replicate == (nanos_slicer_t)0)"
                <<      "replicate = nanos_find_slicer(\"replicate\");"
                <<  "if (replicate == (nanos_slicer_t)0)"
                <<      "nanos_handle_error(NANOS_UNIMPLEMENTED);"
                <<  "nanos_err = nanos_create_sliced_wd(&nanos_wd_, "
                <<                                "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, "
                <<                                "(size_t)" << struct_size << ",  nanos_wd_const_data.base.data_alignment, "
                <<                                "(void**)&ol_args, nanos_current_wd(), replicate,"
                <<                                "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0,"
                <<                                "0, (nanos_region_dimension_internal_t**)0"
                <<                                ");"
                <<  "if (nanos_err != NANOS_OK)"
                <<      "nanos_handle_error(nanos_err);"
                <<  statement_placeholder(fill_outline_arguments_tree)
                <<  "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);"
                <<  "if (nanos_err != NANOS_OK)"
                <<      "nanos_handle_error(nanos_err);"
                ;

        }

        TL::Source implicit_barrier_or_tw;
        if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null())
        {
            implicit_barrier_or_tw << get_implicit_sync_end_construct_source();
        }

        Source spawn_code;
        spawn_code
            << "{"
            <<      as_type(get_bool_type()) << " single_guard;"
            <<      "nanos_err_t nanos_err;"
            <<      schedule_setup
            <<      "nanos_ws_info_loop_t nanos_setup_info_loop;"
            <<      "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";"
            <<      "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";"
            <<      "nanos_setup_info_loop.loop_step = "   << as_expression(step)  << ";"
            <<      "nanos_setup_info_loop.chunk_size = nanos_chunk;"
            <<      worksharing_creation
            <<      pm_specific_code
            <<      implicit_barrier_or_tw
            << "}"
            ;

        Source fill_outline_arguments, fill_immediate_arguments;
        fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::C;

        Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::Current;

        Nodecl::NodeclBase arguments_tree;
        TL::Source *fill_arguments;
        if (!_lowering->in_ompss_mode())
        {
            // OpenMP
            arguments_tree = fill_immediate_arguments_tree;
            fill_arguments = &fill_immediate_arguments;
        }
        else
        {
            // OmpSs
            arguments_tree = fill_outline_arguments_tree;
            fill_arguments = &fill_outline_arguments;
        }

        // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp)
        const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context();
        slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context;
        ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol());

        // Parse the arguments
        Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree);
        arguments_tree.replace(new_tree);

        // Finally, replace the construct by the tree that represents the spawn code
        construct.replace(spawn_code_tree);
    }
    bool LoweringVisitor::handle_reductions_on_task(
            Nodecl::NodeclBase construct,
            OutlineInfo& outline_info,
            Nodecl::NodeclBase statements,
            bool generate_final_stmts,
            Nodecl::NodeclBase& final_statements)
    {
        int num_reductions = 0;

        TL::Source
            reductions_stuff,
            final_clause_stuff,
            // This source represents an expression which is used to check if
            // we can do an optimization in the final code. This optimization
            // consists on calling the original code (with a serial closure) if
            // we are in a final context and the reduction variables that we
            // are using have not been registered previously
            final_clause_opt_expr,
            extra_array_red_memcpy;

        std::map<TL::Symbol, std::string> reduction_symbols_map;

        TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items();
        for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin();
                it != data_items.end();
                it++)
        {
           if (!(*it)->is_reduction())
              continue;

            std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info();
            TL::OpenMP::Reduction* reduction_info = red_info_pair.first;
            TL::Type reduction_type = red_info_pair.second.no_ref();

            TL::Symbol reduction_item = (*it)->get_symbol();
            TL::Type reduction_item_type = reduction_item.get_type().no_ref();

            std::string storage_var_name = (*it)->get_field_name() + "_storage";
            TL::Type storage_var_type = reduction_type.get_pointer_to();


            TL::Symbol reduction_function, reduction_function_original_var, initializer_function;

            // Checking if the current reduction type has been treated before
            // Note that if that happens we can reuse the combiner and
            // initializer function.
            //
            // C/C++: note that if the type of the list item is an array type,
            // we regiter the reduction over its element type
            TL::Type registered_reduction_type = reduction_type;
            while (!IS_FORTRAN_LANGUAGE
                    && registered_reduction_type.is_array())
            {
                registered_reduction_type = registered_reduction_type.array_element();
            }

            LoweringVisitor::reduction_task_map_t::iterator task_red_info =
               _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type));

            if (task_red_info != _task_reductions_map.end())
            {
              reduction_function = task_red_info->second._reducer;
              reduction_function_original_var = task_red_info->second._reducer_orig_var;
              initializer_function = task_red_info->second._initializer;
            }
            else
            {
               create_reduction_functions(reduction_info,
                     construct,
                     registered_reduction_type,
                     reduction_item,
                     reduction_function,
                     reduction_function_original_var);

               create_initializer_function(reduction_info,
                     construct,
                     registered_reduction_type,
                     initializer_function);

               _task_reductions_map.insert(
                       std::make_pair(
                           std::make_pair(reduction_info, registered_reduction_type),
                           TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function)
                           ));
            }

            // Mandatory TL::Sources to be filled by any reduction
            TL::Source
                orig_address, // address of the original reduction variable
                storage_var; // variable which holds the address of the storage

            // Specific TL::Sources to be filled only by Fortran array reduction
            TL::Source extra_array_red_decl;

            if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
            {
                storage_var << storage_var_name;
                orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name();

                final_clause_stuff
                    << "if (" << storage_var_name << " == 0)"
                    << "{"
                    <<     storage_var_name  << " = "
                    <<        "(" << as_type(storage_var_type) << ")" << orig_address << ";"
                    << "}"
                    ;
            }
            else
            {
               orig_address <<  "&" << (*it)->get_field_name();
                if (reduction_item_type.is_array())
                {
                    size_t size_of_array_descriptor =
                        fortran_size_of_array_descriptor(
                                fortran_get_rank0_type(reduction_item_type.get_internal_type()),
                                fortran_get_rank_of_type(reduction_item_type.get_internal_type()));


                    storage_var << storage_var_name << "_indirect";
                    extra_array_red_decl << "void *" << storage_var << ";";

                    extra_array_red_memcpy
                        << "nanos_err = nanos_memcpy("
                        <<      "(void **) &" << storage_var_name << ","
                        <<      storage_var << ","
                        <<      size_of_array_descriptor << ");"
                            ;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     "nanos_err = nanos_memcpy("
                        <<         "(void **) &" << storage_var_name << ","
                        <<         "(void *) "<< orig_address << ","
                        <<         size_of_array_descriptor << ");"
                        << "}"
                        << "else"
                        << "{"
                        <<     extra_array_red_memcpy
                        << "}"
                        ;
                }
                else
                {
                    // We need to convert a void* type into a pointer to the reduction type.
                    // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this
                    // conversion directly in the FORTRAN source. For this reason we introduce
                    // a new function that will be defined in a C file.
                    TL::Symbol func = TL::Nanox::get_function_ptr_conversion(
                            // Destination
                            reduction_item_type.get_pointer_to(),
                            // Origin
                            TL::Type::get_void_type().get_pointer_to(),
                            construct.retrieve_context());

                    storage_var << storage_var_name;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     storage_var_name << " = " << func.get_name() << "(" <<  orig_address << ");"
                        << "}"
                        ;
                }
            }

            if (num_reductions > 0)
                final_clause_opt_expr << " && ";
            final_clause_opt_expr << storage_var << " == 0 ";
            num_reductions++;

            reductions_stuff
                << extra_array_red_decl
                << as_type(storage_var_type) << " " << storage_var_name << ";"
                << "nanos_err = nanos_task_reduction_get_thread_storage("
                <<         "(void *)" << orig_address  << ","
                <<         "(void **) &" << storage_var << ");"
                ;

            reduction_symbols_map[reduction_item] = storage_var_name;
        }

        if (num_reductions != 0)
        {
            // Generating the final code if needed
            if (generate_final_stmts)
            {
                std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct);
                ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0);

                Nodecl::NodeclBase placeholder;
                TL::Source new_statements_src;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      "if (" << final_clause_opt_expr  << ")"
                    <<      "{"
                    <<          as_statement(it4->second)
                    <<      "}"
                    <<      "else"
                    <<      "{"
                    <<          final_clause_stuff
                    <<          statement_placeholder(placeholder)
                    <<      "}"
                    << "}"
                    ;

                final_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
            }

            // Generating the task code
            {
                TL::Source new_statements_src;
                Nodecl::NodeclBase placeholder;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      extra_array_red_memcpy
                    <<      statement_placeholder(placeholder)
                    << "}"
                    ;

                Nodecl::NodeclBase new_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
                statements.replace(new_statements);
            }
        }

        ERROR_CONDITION(num_reductions != 0 &&
                !Nanos::Version::interface_is_at_least("task_reduction", 1001),
                "The version of the runtime begin used does not support task reductions", 0);

        return (num_reductions != 0);
    }
Beispiel #3
0
void LoweringVisitor::visit(const Nodecl::OpenMP::TaskExpression& task_expr)
{
    Nodecl::NodeclBase join_task = task_expr.get_join_task();
    Nodecl::List task_calls = task_expr.get_task_calls().as<Nodecl::List>();

    if (!_lowering->final_clause_transformation_disabled()
            && Nanos::Version::interface_is_at_least("master", 5024))
    {
        ERROR_CONDITION(!task_expr.get_parent().is<Nodecl::ExpressionStatement>(), "Unexpected node", 0);
        Nodecl::NodeclBase expr_stmt = task_expr.get_parent();

        TL::Source code;
        code
            << "{"
            <<      as_type(TL::Type::get_bool_type()) << "mcc_is_in_final;"
            <<      "nanos_err_t mcc_err_in_final = nanos_in_final(&mcc_is_in_final);"
            <<      "if (mcc_err_in_final != NANOS_OK) nanos_handle_error(mcc_err_in_final);"
            <<      "if (mcc_is_in_final)"
            <<      "{"
            <<          as_statement(task_expr.get_sequential_code())
            <<      "}"
            <<      "else"
            <<      "{"
            <<          as_statement(Nodecl::ExpressionStatement::make(task_expr))
            <<      "}"
            << "}"
            ;

        std::cout << "In expression\n";

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::C;

        Nodecl::NodeclBase if_else_tree = code.parse_statement(expr_stmt);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::Current;

        expr_stmt.replace(if_else_tree);
    }

    Nodecl::NodeclBase placeholder_task_expr_transformation;
    if (join_task.is<Nodecl::OpenMP::Task>())
    {
        // Note: don't walk over the OpenMP::Task node because Its visitor ignores
        // the placeholder and sets to false the 'inside_task_expression' boolean
        visit_task(
                join_task.as<Nodecl::OpenMP::Task>(),
                /* inside_task_expression */ true,
                &placeholder_task_expr_transformation);
    }
    else if (join_task.is<Nodecl::ExpressionStatement>() &&
            join_task.as<Nodecl::ExpressionStatement>().get_nest().is<Nodecl::OpenMP::TaskCall>())
    {
        visit_task_call(
                join_task.as<Nodecl::ExpressionStatement>().get_nest().as<Nodecl::OpenMP::TaskCall>(),
                /* inside_task_expression */ true,
                &placeholder_task_expr_transformation);
    }
    else
    {
        internal_error("Unreachable code", 0);
    }


    // Note: don't walk over the OpenMP::TaskCall because It's visitor sets to
    // false the 'inside_task_expression' boolean
    for (Nodecl::List::iterator it = task_calls.begin();
            it != task_calls.end();
            ++it)
    {
        Nodecl::ExpressionStatement current_expr_stmt = it->as<Nodecl::ExpressionStatement>();
        Nodecl::OpenMP::TaskCall current_task_call = current_expr_stmt.get_nest().as<Nodecl::OpenMP::TaskCall>();

        visit_task_call(current_task_call,
                /* inside_task_expression */ true,
                /* placeholder_task_expr_transformation */ NULL);

        Nodecl::Utils::prepend_items_before(placeholder_task_expr_transformation, *it);
    }

    ERROR_CONDITION(!task_expr.get_parent().is<Nodecl::ExpressionStatement>(), "Unexpected node", 0);
    Nodecl::NodeclBase expr_stmt = task_expr.get_parent();
    Nodecl::Utils::prepend_items_before(expr_stmt, task_expr.get_join_task());

    // Finally, remove from the tree the TaskExpression node
    Nodecl::Utils::remove_from_enclosing_list(expr_stmt);
}