static void handle_ompss_opencl_deallocate_intrinsic(
            Nodecl::FunctionCall function_call,
            Nodecl::NodeclBase expr_stmt)
    {
        Nodecl::List arguments = function_call.get_arguments().as<Nodecl::List>();
        ERROR_CONDITION(arguments.size() != 1, "More than one argument in ompss_opencl_deallocate call", 0);

        Nodecl::NodeclBase actual_argument = arguments[0];
        ERROR_CONDITION(!actual_argument.is<Nodecl::FortranActualArgument>(), "Unexpected tree", 0);

        Nodecl::NodeclBase arg = actual_argument.as<Nodecl::FortranActualArgument>().get_argument();
        TL::Symbol array_sym = ::fortran_data_ref_get_symbol(arg.get_internal_nodecl());

        ERROR_CONDITION(
                !(array_sym.get_type().is_fortran_array()
                    && array_sym.is_allocatable())
                &&
                !(array_sym.get_type().is_pointer()
                    && array_sym.get_type().points_to().is_fortran_array()),
                "The argument of 'ompss_opencl_deallocate' intrinsic must be "
                "an allocatable array or a pointer to an array\n", 0);

        // Replace the current intrinsic call by a call to the Nanos++ API
        TL::Symbol ptr_of_arr_sym = get_function_ptr_of(array_sym, expr_stmt.retrieve_context());

        TL::Source new_function_call;
        new_function_call
            << "CALL NANOS_OPENCL_DEALLOCATE_FORTRAN("
            <<      ptr_of_arr_sym.get_name() << "("<< as_expression(arg) << "))\n"
            ;

        expr_stmt.replace(new_function_call.parse_statement(expr_stmt));
    }
Beispiel #2
0
    TL::Symbol LoweringVisitor::create_reduction_cleanup_function(OpenMP::Reduction* red, Nodecl::NodeclBase construct)
    {
        if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
        {
            internal_error("Currently only valid in Fortran", 0);
        }
        reduction_map_t::iterator it = _reduction_cleanup_map.find(red);
        if (it != _reduction_cleanup_map.end())
        {
            return it->second;
        }

        std::string fun_name;
        {
            std::stringstream ss;
            ss << "nanos_cleanup_" << red << "_" << simple_hash_str(construct.get_filename().c_str());
            fun_name = ss.str();
        }


        Source src;
        src << "SUBROUTINE " << fun_name << "(X)\n"
            <<     as_type(red->get_type()) << ", POINTER ::  X(:)\n"
            <<     "DEALLOCATE(X)\n"
            << "END SUBROUTINE\n"
            ;

        Nodecl::NodeclBase function_code = src.parse_global(construct);

        TL::Symbol function_sym = ReferenceScope(construct).get_scope().get_symbol_from_name(fun_name);
        ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str());

        _reduction_cleanup_map[red] = function_sym;

        Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code);

        Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(), function_sym.get_related_scope());

        return function_sym;
    }
    void LoweringVisitor::register_reductions(
          Nodecl::NodeclBase construct, OutlineInfo& outline_info, TL::Source& src)
    {
        TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items();
        for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin();
                it != data_items.end();
                it++)
        {
           if (!(*it)->is_reduction())
              continue;

            std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info();
            TL::OpenMP::Reduction* reduction_info = red_info_pair.first;
            TL::Type reduction_type = red_info_pair.second.no_ref();

            ERROR_CONDITION(!Nanos::Version::interface_is_at_least("task_reduction", 1001),
                  "The version of the runtime being used does not support task reductions", 0);

            TL::Symbol reduction_item = (*it)->get_symbol();

            ERROR_CONDITION(reduction_type.is_array()
                  && (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
                  && !Nanos::Version::interface_is_at_least("task_reduction", 1002),
                  "The version of the runtime being used does not support array reductions in C/C++", 0);

            // Note that at this point all the reduction must be registered.
            // For C/C++ array reductions, the registered_reduction type is the
            // element type
            TL::Type registered_reduction_type = reduction_type;
            while (!IS_FORTRAN_LANGUAGE
                    && registered_reduction_type.is_array())
            {
                registered_reduction_type = registered_reduction_type.array_element();
            }

            LoweringVisitor::reduction_task_map_t::iterator task_red_info =
               _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type));

            ERROR_CONDITION(task_red_info == _task_reductions_map.end(),
                  "Unregistered task reduction\n", 0);

            TL::Symbol reduction_function = task_red_info->second._reducer;
            TL::Symbol reduction_function_original_var = task_red_info->second._reducer_orig_var;
            TL::Symbol initializer_function = task_red_info->second._initializer;

            // Common case: the runtime will host the private copies of the list item
            if (!(IS_FORTRAN_LANGUAGE && reduction_type.is_array()))
            {
               // Array Reductions in C/C++ are defined over the elements of the array
               TL::Source reduction_size_src_opt;
               TL::Type element_type = registered_reduction_type;

               reduction_size_src_opt << "sizeof(" << as_type(reduction_type) <<"),";

               TL::Source item_address =
                  (reduction_item.get_type().is_pointer() ? "" : "&") + (*it)->get_field_name();

               src
                  << "nanos_err = nanos_task_reduction_register("
                  <<      "(void *) " << item_address << ","          // object address
                  <<      reduction_size_src_opt                      // whole reduction size
                  <<      "sizeof(" << as_type(element_type) << "),"  // element size
                  <<      "(void (*)(void *, void *)) &"
                  <<          initializer_function.get_name() << ","  // initializer
                  <<      "(void (*)(void *, void *)) &"
                  <<          reduction_function.get_name() << ");"   // reducer
                  ;
            }
            else
            {
               // Specific case for Fortran Array Reductions: the runtime will
               // host a private array descriptor for each thread. Later, in
               // the initializer function, this array descriptors will be
               // initialized and their array storage will be allocated
               TL::Source target_address;
               size_t size_array_descriptor =
                  fortran_size_of_array_descriptor(
                        fortran_get_rank0_type(reduction_type.get_internal_type()),
                        fortran_get_rank_of_type(reduction_type.get_internal_type()));

               if (reduction_type.array_requires_descriptor())
               {
                  TL::Symbol ptr_of_sym = get_function_ptr_of(reduction_item, construct.retrieve_context());
                  target_address << ptr_of_sym.get_name() << "( " << (*it)->get_symbol().get_name() << ")";
               }
               else
               {
                  target_address << "(void *) &" << (*it)->get_field_name();
               }

               src
                  << "nanos_err = nanos_task_fortran_array_reduction_register("
                  <<      target_address << ","                                  // Address to the array descriptor
                  <<      "(void *) & " << (*it)->get_field_name() << ","        // Address to the storage
                  <<      size_array_descriptor << ","                           // size
                  <<      "(void (*)(void *, void *)) &"
                  <<          initializer_function.get_name() << ","             // initializer
                  <<      "(void (*)(void *, void *)) &"
                  <<          reduction_function.get_name() << ","               // reducer
                  <<      "(void (*)(void *, void *)) &"
                  <<          reduction_function_original_var.get_name() << ");" // reducer ori
                  ;
            }
        }
    }
    bool LoweringVisitor::handle_reductions_on_task(
            Nodecl::NodeclBase construct,
            OutlineInfo& outline_info,
            Nodecl::NodeclBase statements,
            bool generate_final_stmts,
            Nodecl::NodeclBase& final_statements)
    {
        int num_reductions = 0;

        TL::Source
            reductions_stuff,
            final_clause_stuff,
            // This source represents an expression which is used to check if
            // we can do an optimization in the final code. This optimization
            // consists on calling the original code (with a serial closure) if
            // we are in a final context and the reduction variables that we
            // are using have not been registered previously
            final_clause_opt_expr,
            extra_array_red_memcpy;

        std::map<TL::Symbol, std::string> reduction_symbols_map;

        TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items();
        for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin();
                it != data_items.end();
                it++)
        {
           if (!(*it)->is_reduction())
              continue;

            std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info();
            TL::OpenMP::Reduction* reduction_info = red_info_pair.first;
            TL::Type reduction_type = red_info_pair.second.no_ref();

            TL::Symbol reduction_item = (*it)->get_symbol();
            TL::Type reduction_item_type = reduction_item.get_type().no_ref();

            std::string storage_var_name = (*it)->get_field_name() + "_storage";
            TL::Type storage_var_type = reduction_type.get_pointer_to();


            TL::Symbol reduction_function, reduction_function_original_var, initializer_function;

            // Checking if the current reduction type has been treated before
            // Note that if that happens we can reuse the combiner and
            // initializer function.
            //
            // C/C++: note that if the type of the list item is an array type,
            // we regiter the reduction over its element type
            TL::Type registered_reduction_type = reduction_type;
            while (!IS_FORTRAN_LANGUAGE
                    && registered_reduction_type.is_array())
            {
                registered_reduction_type = registered_reduction_type.array_element();
            }

            LoweringVisitor::reduction_task_map_t::iterator task_red_info =
               _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type));

            if (task_red_info != _task_reductions_map.end())
            {
              reduction_function = task_red_info->second._reducer;
              reduction_function_original_var = task_red_info->second._reducer_orig_var;
              initializer_function = task_red_info->second._initializer;
            }
            else
            {
               create_reduction_functions(reduction_info,
                     construct,
                     registered_reduction_type,
                     reduction_item,
                     reduction_function,
                     reduction_function_original_var);

               create_initializer_function(reduction_info,
                     construct,
                     registered_reduction_type,
                     initializer_function);

               _task_reductions_map.insert(
                       std::make_pair(
                           std::make_pair(reduction_info, registered_reduction_type),
                           TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function)
                           ));
            }

            // Mandatory TL::Sources to be filled by any reduction
            TL::Source
                orig_address, // address of the original reduction variable
                storage_var; // variable which holds the address of the storage

            // Specific TL::Sources to be filled only by Fortran array reduction
            TL::Source extra_array_red_decl;

            if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
            {
                storage_var << storage_var_name;
                orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name();

                final_clause_stuff
                    << "if (" << storage_var_name << " == 0)"
                    << "{"
                    <<     storage_var_name  << " = "
                    <<        "(" << as_type(storage_var_type) << ")" << orig_address << ";"
                    << "}"
                    ;
            }
            else
            {
               orig_address <<  "&" << (*it)->get_field_name();
                if (reduction_item_type.is_array())
                {
                    size_t size_of_array_descriptor =
                        fortran_size_of_array_descriptor(
                                fortran_get_rank0_type(reduction_item_type.get_internal_type()),
                                fortran_get_rank_of_type(reduction_item_type.get_internal_type()));


                    storage_var << storage_var_name << "_indirect";
                    extra_array_red_decl << "void *" << storage_var << ";";

                    extra_array_red_memcpy
                        << "nanos_err = nanos_memcpy("
                        <<      "(void **) &" << storage_var_name << ","
                        <<      storage_var << ","
                        <<      size_of_array_descriptor << ");"
                            ;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     "nanos_err = nanos_memcpy("
                        <<         "(void **) &" << storage_var_name << ","
                        <<         "(void *) "<< orig_address << ","
                        <<         size_of_array_descriptor << ");"
                        << "}"
                        << "else"
                        << "{"
                        <<     extra_array_red_memcpy
                        << "}"
                        ;
                }
                else
                {
                    // We need to convert a void* type into a pointer to the reduction type.
                    // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this
                    // conversion directly in the FORTRAN source. For this reason we introduce
                    // a new function that will be defined in a C file.
                    TL::Symbol func = TL::Nanox::get_function_ptr_conversion(
                            // Destination
                            reduction_item_type.get_pointer_to(),
                            // Origin
                            TL::Type::get_void_type().get_pointer_to(),
                            construct.retrieve_context());

                    storage_var << storage_var_name;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     storage_var_name << " = " << func.get_name() << "(" <<  orig_address << ");"
                        << "}"
                        ;
                }
            }

            if (num_reductions > 0)
                final_clause_opt_expr << " && ";
            final_clause_opt_expr << storage_var << " == 0 ";
            num_reductions++;

            reductions_stuff
                << extra_array_red_decl
                << as_type(storage_var_type) << " " << storage_var_name << ";"
                << "nanos_err = nanos_task_reduction_get_thread_storage("
                <<         "(void *)" << orig_address  << ","
                <<         "(void **) &" << storage_var << ");"
                ;

            reduction_symbols_map[reduction_item] = storage_var_name;
        }

        if (num_reductions != 0)
        {
            // Generating the final code if needed
            if (generate_final_stmts)
            {
                std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct);
                ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0);

                Nodecl::NodeclBase placeholder;
                TL::Source new_statements_src;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      "if (" << final_clause_opt_expr  << ")"
                    <<      "{"
                    <<          as_statement(it4->second)
                    <<      "}"
                    <<      "else"
                    <<      "{"
                    <<          final_clause_stuff
                    <<          statement_placeholder(placeholder)
                    <<      "}"
                    << "}"
                    ;

                final_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
            }

            // Generating the task code
            {
                TL::Source new_statements_src;
                Nodecl::NodeclBase placeholder;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      extra_array_red_memcpy
                    <<      statement_placeholder(placeholder)
                    << "}"
                    ;

                Nodecl::NodeclBase new_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
                statements.replace(new_statements);
            }
        }

        ERROR_CONDITION(num_reductions != 0 &&
                !Nanos::Version::interface_is_at_least("task_reduction", 1001),
                "The version of the runtime begin used does not support task reductions", 0);

        return (num_reductions != 0);
    }
    static TL::Symbol create_initializer_function_fortran(
            OpenMP::Reduction* red,
            TL::Type reduction_type,
            Nodecl::NodeclBase construct)
    {
        std::string fun_name;
        {
            std::stringstream ss;
            ss << "nanos_ini_" << red << "_" << reduction_type.get_internal_type() << "_" << simple_hash_str(construct.get_filename().c_str());
            fun_name = ss.str();
        }

        Nodecl::NodeclBase initializer = red->get_initializer().shallow_copy();


        TL::Type omp_out_type = reduction_type,
                 omp_ori_type = reduction_type;

        // These sources are only used in array reductions
        TL::Source omp_out_extra_attributes,
            extra_stuff_array_red;

        if (reduction_type.is_array())
        {
            Source dims_descr;
            TL::Type t = reduction_type;
            int rank = 0;
            if (t.is_fortran_array())
            {
                rank = t.fortran_rank();
            }

            dims_descr << "(";
            omp_out_extra_attributes << ", POINTER, DIMENSION(";

            int i;
            for (i = 0; i < rank; i++)
            {
                if (i != 0)
                {
                    dims_descr << ",";
                    omp_out_extra_attributes << ",";
                }

                dims_descr << "LBOUND(omp_orig, DIM = " << (rank - i) << ")"
                    << ":"
                    << "UBOUND(omp_orig, DIM = " << (rank - i) << ")"
                    ;

                omp_out_extra_attributes << ":";
                t = t.array_element();
            }

            dims_descr << ")";
            omp_out_extra_attributes << ")";

            omp_out_type = t;

            extra_stuff_array_red << "ALLOCATE(omp_out" << dims_descr <<")\n";
        }

        Source src;
        src << "SUBROUTINE " << fun_name << "(omp_out, omp_orig)\n"
            <<    "IMPLICIT NONE\n"
            <<    as_type(omp_out_type) << omp_out_extra_attributes << " ::  omp_out\n"
            <<    as_type(omp_ori_type) <<  " :: omp_orig\n"
            <<    extra_stuff_array_red
            <<    "omp_out = " << as_expression(initializer) << "\n"
            << "END SUBROUTINE " << fun_name << "\n"
            ;

        TL::Scope global_scope = construct.retrieve_context().get_global_scope();
        Nodecl::NodeclBase function_code = src.parse_global(global_scope);
        TL::Symbol function_sym = global_scope.get_symbol_from_name(fun_name);

        ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str());

        // As the initializer function is needed during the instantiation of
        // the task, this function should be inserted before the construct
        Nodecl::Utils::prepend_to_enclosing_top_level_location(construct,
                function_code);

        return function_sym;
    }
    static TL::Symbol create_initializer_function_c(
            OpenMP::Reduction* red,
            TL::Type reduction_type,
            Nodecl::NodeclBase construct)
    {
        std::string fun_name;
        {
            std::stringstream ss;
            ss << "nanos_ini_" << red << "_" << reduction_type.get_internal_type() << "_" << simple_hash_str(construct.get_filename().c_str());
            fun_name = ss.str();
        }

        Nodecl::NodeclBase function_body;
        Source src;
        src << "void " << fun_name << "("
            <<      as_type(reduction_type.no_ref().get_lvalue_reference_to()) << " omp_priv,"
            <<      as_type(reduction_type.no_ref().get_lvalue_reference_to()) << " omp_orig)"
            << "{"
            <<    statement_placeholder(function_body)
            << "}"
            ;

        Nodecl::NodeclBase function_code = src.parse_global(construct.retrieve_context().get_global_scope());

        TL::Scope inside_function = ReferenceScope(function_body).get_scope();
        TL::Symbol param_omp_priv = inside_function.get_symbol_from_name("omp_priv");
        ERROR_CONDITION(!param_omp_priv.is_valid(), "Symbol omp_priv not found", 0);

        TL::Symbol param_omp_orig = inside_function.get_symbol_from_name("omp_orig");
        ERROR_CONDITION(!param_omp_orig.is_valid(), "Symbol omp_orig not found", 0);

        TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name);
        ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str());

        Nodecl::NodeclBase initializer = red->get_initializer().shallow_copy();
        if (initializer.is<Nodecl::StructuredValue>())
        {
            Nodecl::StructuredValue structured_value = initializer.as<Nodecl::StructuredValue>();
            if (structured_value.get_form().is<Nodecl::StructuredValueBracedImplicit>())
            {
                structured_value.set_form(Nodecl::StructuredValueCompoundLiteral::make());
            }
        }

        Nodecl::Utils::SimpleSymbolMap translation_map;

        translation_map.add_map(red->get_omp_priv(), param_omp_priv);
        translation_map.add_map(red->get_omp_orig(), param_omp_orig);

        Nodecl::NodeclBase new_initializer = Nodecl::Utils::deep_copy(initializer, inside_function, translation_map);

        if (red->get_is_initialization())
        {
            // The original initializer was something like 'omp_priv = expr1', but the
            // new_initializer only represents the lhs expression (in our example, expr1).
            // For this reason we create manually an assignment expression.
            Nodecl::NodeclBase param_omp_priv_ref = Nodecl::Symbol::make(param_omp_priv);
            param_omp_priv_ref.set_type(param_omp_priv.get_type());

            function_body.replace(
                    Nodecl::List::make(
                        Nodecl::ExpressionStatement::make(
                            Nodecl::Assignment::make(
                                param_omp_priv_ref,
                                new_initializer,
                                param_omp_priv_ref.get_type().no_ref())
                            )));
        }
        else
        {
            function_body.replace(
                    Nodecl::List::make(Nodecl::ExpressionStatement::make(new_initializer)));
        }

        // As the initializer function is needed during the instantiation of
        // the task, this function should be inserted before the construct
        Nodecl::Utils::prepend_to_enclosing_top_level_location(construct,
                function_code);

        return function_sym;
    }
Beispiel #7
0
 ReferenceScope::ReferenceScope(Nodecl::NodeclBase n)
     : _scope(n.retrieve_context())
 {
 }
    static Symbol create_new_function_opencl_allocate(
            Nodecl::NodeclBase expr_stmt,
            Symbol subscripted_symbol,
            Type element_type,
            int num_dimensions,
            bool is_allocatable)
    {
        std::string alloca_or_pointer = is_allocatable ? "ALLOCATABLE" : "POINTER";

        TL::Source dummy_arguments_bounds, dimension_attr, allocate_dims;
        dimension_attr << "DIMENSION(";
        for (int i = 1; i <= num_dimensions; ++i)
        {
            if (i != 1)
            {
                allocate_dims << ", ";
                dummy_arguments_bounds <<", ";
                dimension_attr << ", ";
            }

            dummy_arguments_bounds <<"LB" << i <<", " << "UB" << i;
            dimension_attr << ":";
            allocate_dims << "LB" << i << ":" << "UB" << i;
        }
        dimension_attr << ")";

        size_t size_of_array_descriptor =
            fortran_size_of_array_descriptor(
                    fortran_get_rank0_type(subscripted_symbol.get_type().get_internal_type()),
                    fortran_get_rank_of_type(subscripted_symbol.get_type().get_internal_type()));

        TL::Source new_function_name;
        new_function_name
            << "NANOX_OPENCL_ALLOCATE_INTERNAL_"
            << (ptrdiff_t) subscripted_symbol.get_internal_symbol()
            ;

        Nodecl::NodeclBase nodecl_body;
        TL::Source new_function;
        new_function
            << "SUBROUTINE " << new_function_name << "(ARR, " << dummy_arguments_bounds  << ")\n"
            <<      as_type(element_type) << ", " << dimension_attr << ", " << alloca_or_pointer << " :: ARR\n"
            <<      as_type(element_type) << ", " << dimension_attr << ", ALLOCATABLE :: TMP\n"
            <<      "INTEGER :: " << dummy_arguments_bounds << "\n"
            <<      "INTEGER :: ERR \n"
            <<      "ALLOCATE(TMP(" << allocate_dims << "))\n"
            <<      statement_placeholder(nodecl_body)
            <<      "DEALLOCATE(TMP)\n"
            << "END SUBROUTINE " << new_function_name << "\n"
            ;

        Nodecl::NodeclBase function_code = new_function.parse_global(expr_stmt.retrieve_context().get_global_scope());

        TL::Scope inside_function = ReferenceScope(nodecl_body).get_scope();
        TL::Symbol new_function_sym = inside_function.get_symbol_from_name(strtolower(new_function_name.get_source().c_str()));
        TL::Symbol arr_sym = inside_function.get_symbol_from_name("arr");
        TL::Symbol tmp_sym = inside_function.get_symbol_from_name("tmp");
        TL::Symbol ptr_of_arr_sym = get_function_ptr_of(arr_sym, inside_function);
        TL::Symbol ptr_of_tmp_sym = get_function_ptr_of(tmp_sym, inside_function);

        TL::Source aux;
        aux
            <<  "ERR = NANOS_MEMCPY("
            <<          ptr_of_arr_sym.get_name() << "(ARR),"
            <<          ptr_of_tmp_sym.get_name() << "(TMP),"
            <<          "INT(" << size_of_array_descriptor << "," << type_get_size(get_ptrdiff_t_type()) << "))\n"

            <<  "CALL NANOS_OPENCL_ALLOCATE_FORTRAN("
            <<          "SIZEOF(TMP),"
            <<          ptr_of_arr_sym.get_name() << "(ARR))\n"
            ;

        nodecl_body.replace(aux.parse_statement(inside_function));
        Nodecl::Utils::prepend_to_enclosing_top_level_location(expr_stmt, function_code);

        return new_function_sym;
    }
Beispiel #9
0
    void LoweringVisitor::reduction_initialization_code(
            OutlineInfo& outline_info,
            Nodecl::NodeclBase ref_tree,
            Nodecl::NodeclBase construct)
    {
        ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0);

        if (!Nanos::Version::interface_is_at_least("master", 5023))
        {
            running_error("%s: error: a newer version of Nanos++ (>=5023) is required for reductions support\n",
                    construct.get_locus_str().c_str());
        }

        TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter(
                predicate(lift_pointer(functor(&OutlineDataItem::is_reduction))));
        ERROR_CONDITION (reduction_items.empty(), "No reductions to process", 0);

        Source result;

        Source reduction_declaration,
               thread_initializing_reduction_info,
               thread_fetching_reduction_info;

        result
            << reduction_declaration
            << "{"
            << as_type(get_bool_type()) << " red_single_guard;"
            << "nanos_err_t err;"
            << "err = nanos_enter_sync_init(&red_single_guard);"
            << "if (err != NANOS_OK)"
            <<     "nanos_handle_error(err);"
            << "if (red_single_guard)"
            << "{"
            <<    "int nanos_num_threads = nanos_omp_get_num_threads();"
            <<    thread_initializing_reduction_info
            <<    "err = nanos_release_sync_init();"
            <<    "if (err != NANOS_OK)"
            <<        "nanos_handle_error(err);"
            << "}"
            << "else"
            << "{"
            <<    "err = nanos_wait_sync_init();"
            <<    "if (err != NANOS_OK)"
            <<        "nanos_handle_error(err);"
            <<    thread_fetching_reduction_info
            << "}"
            << "}"
            ;

        for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin();
                it != reduction_items.end();
                it++)
        {
            std::string nanos_red_name = "nanos_red_" + (*it)->get_symbol().get_name();

            std::pair<OpenMP::Reduction*, TL::Type> reduction_info = (*it)->get_reduction_info();
            OpenMP::Reduction* reduction = reduction_info.first;
            TL::Type reduction_type = reduction_info.second;

            if (reduction_type.is_any_reference())
                reduction_type = reduction_type.references_to();

            TL::Type reduction_element_type = reduction_type;
            if (IS_FORTRAN_LANGUAGE)
            {
                while (reduction_element_type.is_fortran_array())
                    reduction_element_type = reduction_element_type.array_element();
            }
            else
            {
                while (reduction_element_type.is_array())
                    reduction_element_type = reduction_element_type.array_element();
            }

            Source element_size;
            if (IS_FORTRAN_LANGUAGE)
            {
                if (reduction_type.is_fortran_array())
                {
                    // We need to parse this bit in Fortran
                    Source number_of_bytes;
                    number_of_bytes << "SIZE(" << (*it)->get_symbol().get_name() << ") * " << reduction_element_type.get_size();

                    element_size << as_expression(number_of_bytes.parse_expression(construct));
                }
                else
                {
                    element_size << "sizeof(" << as_type(reduction_type) << ")";
                }
            }
            else
            {
                element_size << "sizeof(" << as_type(reduction_type) << ")";
            }

            reduction_declaration
                << "nanos_reduction_t* " << nanos_red_name << ";"
                ;

            Source allocate_private_buffer, cleanup_code;

            Source num_scalars;

            TL::Symbol basic_reduction_function, vector_reduction_function;
            create_reduction_function(reduction, construct, reduction_type, basic_reduction_function, vector_reduction_function);
            (*it)->reduction_set_basic_function(basic_reduction_function);

            thread_initializing_reduction_info
                << "err = nanos_malloc((void**)&" << nanos_red_name << ", sizeof(nanos_reduction_t), " 
                << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");"
                << "if (err != NANOS_OK)"
                <<     "nanos_handle_error(err);"
                << nanos_red_name << "->original = (void*)" 
                <<            (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ";"
                << allocate_private_buffer
                << nanos_red_name << "->vop = "
                <<      (vector_reduction_function.is_valid() ? as_symbol(vector_reduction_function) : "0") << ";"
                << nanos_red_name << "->bop = (void(*)(void*,void*,int))" << as_symbol(basic_reduction_function) << ";"
                << nanos_red_name << "->element_size = " << element_size << ";"
                << nanos_red_name << "->num_scalars = " << num_scalars << ";"
                << cleanup_code
                << "err = nanos_register_reduction(" << nanos_red_name << ");"
                << "if (err != NANOS_OK)"
                <<     "nanos_handle_error(err);"
                ;

            if (IS_C_LANGUAGE
                    || IS_CXX_LANGUAGE)
            {
                if (reduction_type.is_array())
                {
                    num_scalars << "sizeof(" << as_type(reduction_type) << ") / sizeof(" << as_type(reduction_element_type) <<")";
                }
                else
                {
                    num_scalars << "1";
                }

                allocate_private_buffer
                    << "err = nanos_malloc(&" << nanos_red_name << "->privates, sizeof(" << as_type(reduction_type) << ") * nanos_num_threads, "
                    << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << nanos_red_name << "->descriptor = " << nanos_red_name << "->privates;"
                    << "rdv_" << (*it)->get_field_name() << " = (" <<  as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;"
                    ;


                thread_fetching_reduction_info
                    << "err = nanos_reduction_get(&" << nanos_red_name << ", " 
                    << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ");"

                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << "rdv_" << (*it)->get_field_name() << " = (" <<  as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;"
                    ;
                cleanup_code
                    << nanos_red_name << "->cleanup = nanos_free0;"
                    ;
            }
            else if (IS_FORTRAN_LANGUAGE)
            {

                Type private_reduction_vector_type;

                Source extra_dims;
                {
                    TL::Type t = (*it)->get_symbol().get_type().no_ref();
                    int rank = 0;
                    if (t.is_fortran_array())
                    {
                        rank = t.fortran_rank();
                    }

                    if (rank != 0)
                    {
                        // We need to parse this bit in Fortran
                        Source size_call;
                        size_call << "SIZE(" << (*it)->get_symbol().get_name() << ")";

                        num_scalars << as_expression(size_call.parse_expression(construct));
                    }
                    else
                    {
                        num_scalars << "1";
                    }
                    private_reduction_vector_type = fortran_get_n_ranked_type_with_descriptor(
                            get_void_type(), rank + 1, construct.retrieve_context().get_decl_context());

                    int i;
                    for (i = 0; i < rank; i++)
                    {
                        Source lbound_src;
                        lbound_src << "LBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")";
                        Source ubound_src;
                        ubound_src << "UBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")";

                        extra_dims 
                            << "["
                            << as_expression(lbound_src.parse_expression(construct))
                            << ":"
                            << as_expression(ubound_src.parse_expression(construct))
                            << "]";

                        t = t.array_element();
                    }
                }

                allocate_private_buffer
                    << "@FORTRAN_ALLOCATE@((*rdv_" << (*it)->get_field_name() << ")[0:(nanos_num_threads-1)]" << extra_dims <<");"
                    << nanos_red_name << "->privates = &(*rdv_" << (*it)->get_field_name() << ");"
                    << "err = nanos_malloc(&" << nanos_red_name << "->descriptor, sizeof(" << as_type(private_reduction_vector_type) << "), "
                    << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << "err = nanos_memcpy(" << nanos_red_name << "->descriptor, "
                    "&rdv_" << (*it)->get_field_name() << ", sizeof(" << as_type(private_reduction_vector_type) << "));"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    ;

                thread_fetching_reduction_info
                    << "err = nanos_reduction_get(&" << nanos_red_name << ", &" << (*it)->get_symbol().get_name() << ");"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << "err = nanos_memcpy("
                    << "&rdv_" << (*it)->get_field_name() << ","
                    << nanos_red_name << "->descriptor, "
                    << "sizeof(" << as_type(private_reduction_vector_type) << "));"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    ;

                TL::Symbol reduction_cleanup = create_reduction_cleanup_function(reduction, construct);
                cleanup_code
                    << nanos_red_name << "->cleanup = " << as_symbol(reduction_cleanup) << ";"
                    ;
            }
            else
            {
                internal_error("Code unreachable", 0);
            }
        }

        FORTRAN_LANGUAGE()
        {
            Source::source_language = SourceLanguage::C;
        }
        ref_tree.replace(result.parse_statement(ref_tree));
        FORTRAN_LANGUAGE()
        {
            Source::source_language = SourceLanguage::Current;
        }
    }
Beispiel #10
0
    TL::Symbol LoweringVisitor::create_basic_reduction_function_fortran(OpenMP::Reduction* red, Nodecl::NodeclBase construct)
    {
        reduction_map_t::iterator it = _basic_reduction_map_openmp.find(red);
        if (it != _basic_reduction_map_openmp.end())
        {
            return it->second;
        }

        std::string fun_name;
        {
            std::stringstream ss;
            ss << "nanos_red_" << red << "_" << simple_hash_str(construct.get_filename().c_str());
            fun_name = ss.str();
        }

        Nodecl::NodeclBase function_body;
        Source src;

        src << "SUBROUTINE " << fun_name << "(omp_out, omp_in, num_scalars)\n"
            <<    "IMPLICIT NONE\n"
            <<    as_type(red->get_type()) << " :: omp_out(num_scalars)\n" 
            <<    as_type(red->get_type()) << " :: omp_in(num_scalars)\n"
            <<    "INTEGER, VALUE :: num_scalars\n"
            <<    "INTEGER :: I\n"
            <<    statement_placeholder(function_body) << "\n"
            << "END SUBROUTINE " << fun_name << "\n";
        ;

        Nodecl::NodeclBase function_code = src.parse_global(construct);

        TL::Scope inside_function = ReferenceScope(function_body).get_scope();
        TL::Symbol param_omp_in = inside_function.get_symbol_from_name("omp_in");
        ERROR_CONDITION(!param_omp_in.is_valid(), "Symbol omp_in not found", 0);
        TL::Symbol param_omp_out = inside_function.get_symbol_from_name("omp_out");
        ERROR_CONDITION(!param_omp_out.is_valid(), "Symbol omp_out not found", 0);

        TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name);
        ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str());

        TL::Symbol index = inside_function.get_symbol_from_name("i");
        ERROR_CONDITION(!index.is_valid(), "Symbol %s not found", "i");
        TL::Symbol num_scalars = inside_function.get_symbol_from_name("num_scalars");
        ERROR_CONDITION(!num_scalars.is_valid(), "Symbol %s not found", "num_scalars");

        Nodecl::NodeclBase num_scalars_ref = Nodecl::Symbol::make(num_scalars);

        num_scalars_ref.set_type(num_scalars.get_type().no_ref().get_lvalue_reference_to());

        Nodecl::Symbol nodecl_index = Nodecl::Symbol::make(index);
        nodecl_index.set_type(index.get_type().get_lvalue_reference_to());

        Nodecl::NodeclBase loop_header = Nodecl::RangeLoopControl::make(
                nodecl_index,
                const_value_to_nodecl(const_value_get_signed_int(1)),
                num_scalars_ref,
                Nodecl::NodeclBase::null());

        Nodecl::NodeclBase expanded_combiner =
            red->get_combiner().shallow_copy();
        BasicReductionExpandVisitor expander_visitor(
                red->get_omp_in(),
                param_omp_in,
                red->get_omp_out(),
                param_omp_out,
                index);
        expander_visitor.walk(expanded_combiner);

        function_body.replace(
                Nodecl::ForStatement::make(loop_header,
                    Nodecl::List::make(
                        Nodecl::ExpressionStatement::make(
                            expanded_combiner)),
                    Nodecl::NodeclBase::null()));

        _basic_reduction_map_openmp[red] = function_sym;

        if (IS_FORTRAN_LANGUAGE)
        {
            Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(),
                    function_sym.get_related_scope());
        }

        Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code);

        return function_sym;
    }