Nodecl::NodeclVisitor<void>::Ret AVX2StrideVisitorConv::unhandled_node(const Nodecl::NodeclBase& node) 
        {
            //printf("Unsupported %d: %s\n", _vector_num_elements, node.prettyprint().c_str()); 
            
            if (node.get_type().is_vector())
            {

                Nodecl::NodeclBase new_node = node.shallow_copy().as<Nodecl::NodeclBase>();

                new_node.set_type(TL::Type::get_int_type().get_vector_of_elements(
                            _vector_num_elements));

                // TODO better
                node.replace(new_node);

                Nodecl::NodeclBase::Children children = node.children();
                for(Nodecl::NodeclBase::Children::iterator it = children.begin();
                        it != children.end();
                        it ++)
                {
                    walk(*it);
                }
            }
            return Ret(); 
        }
    static void handle_ompss_opencl_deallocate_intrinsic(
            Nodecl::FunctionCall function_call,
            Nodecl::NodeclBase expr_stmt)
    {
        Nodecl::List arguments = function_call.get_arguments().as<Nodecl::List>();
        ERROR_CONDITION(arguments.size() != 1, "More than one argument in ompss_opencl_deallocate call", 0);

        Nodecl::NodeclBase actual_argument = arguments[0];
        ERROR_CONDITION(!actual_argument.is<Nodecl::FortranActualArgument>(), "Unexpected tree", 0);

        Nodecl::NodeclBase arg = actual_argument.as<Nodecl::FortranActualArgument>().get_argument();
        TL::Symbol array_sym = ::fortran_data_ref_get_symbol(arg.get_internal_nodecl());

        ERROR_CONDITION(
                !(array_sym.get_type().is_fortran_array()
                    && array_sym.is_allocatable())
                &&
                !(array_sym.get_type().is_pointer()
                    && array_sym.get_type().points_to().is_fortran_array()),
                "The argument of 'ompss_opencl_deallocate' intrinsic must be "
                "an allocatable array or a pointer to an array\n", 0);

        // Replace the current intrinsic call by a call to the Nanos++ API
        TL::Symbol ptr_of_arr_sym = get_function_ptr_of(array_sym, expr_stmt.retrieve_context());

        TL::Source new_function_call;
        new_function_call
            << "CALL NANOS_OPENCL_DEALLOCATE_FORTRAN("
            <<      ptr_of_arr_sym.get_name() << "("<< as_expression(arg) << "))\n"
            ;

        expr_stmt.replace(new_function_call.parse_statement(expr_stmt));
    }
    Nodecl::NodeclBase handle_task_statements(
          Nodecl::NodeclBase construct,
          Nodecl::NodeclBase task_statements,
          Nodecl::NodeclBase& task_placeholder, // Do not remove the reference
          TL::Source &new_stmts_src,            // It should be a const reference
          const std::map<TL::Symbol, std::string> &reduction_symbols_map)
    {
       if (IS_FORTRAN_LANGUAGE)
          Source::source_language = SourceLanguage::C;

       Nodecl::NodeclBase new_statements = new_stmts_src.parse_statement(construct);

       if (IS_FORTRAN_LANGUAGE)
          Source::source_language = SourceLanguage::Current;

       TL::Scope new_scope = ReferenceScope(task_placeholder).get_scope();
       std::map<TL::Symbol, Nodecl::NodeclBase> reduction_symbol_to_nodecl_map;
       for (std::map<TL::Symbol, std::string>::const_iterator it = reduction_symbols_map.begin();
             it != reduction_symbols_map.end();
             ++it)
       {
          TL::Symbol reduction_sym = it->first;
          std::string storage_name = it->second;
          TL::Symbol storage_sym = new_scope.get_symbol_from_name(storage_name);
          ERROR_CONDITION(!storage_sym.is_valid(), "This symbol is not valid", 0);

          Nodecl::NodeclBase deref_storage = Nodecl::Dereference::make(
                storage_sym.make_nodecl(/* set_ref_type */ true, storage_sym.get_locus()),
                storage_sym.get_type().points_to());

          reduction_symbol_to_nodecl_map[reduction_sym] = deref_storage;
       }

       ReplaceReductionSymbols visitor(reduction_symbol_to_nodecl_map);
       Nodecl::NodeclBase copied_statements = task_statements.shallow_copy();
       visitor.walk(copied_statements);
       task_placeholder.replace(copied_statements);

       return new_statements;
    }
    bool LoweringVisitor::handle_reductions_on_task(
            Nodecl::NodeclBase construct,
            OutlineInfo& outline_info,
            Nodecl::NodeclBase statements,
            bool generate_final_stmts,
            Nodecl::NodeclBase& final_statements)
    {
        int num_reductions = 0;

        TL::Source
            reductions_stuff,
            final_clause_stuff,
            // This source represents an expression which is used to check if
            // we can do an optimization in the final code. This optimization
            // consists on calling the original code (with a serial closure) if
            // we are in a final context and the reduction variables that we
            // are using have not been registered previously
            final_clause_opt_expr,
            extra_array_red_memcpy;

        std::map<TL::Symbol, std::string> reduction_symbols_map;

        TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items();
        for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin();
                it != data_items.end();
                it++)
        {
           if (!(*it)->is_reduction())
              continue;

            std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info();
            TL::OpenMP::Reduction* reduction_info = red_info_pair.first;
            TL::Type reduction_type = red_info_pair.second.no_ref();

            TL::Symbol reduction_item = (*it)->get_symbol();
            TL::Type reduction_item_type = reduction_item.get_type().no_ref();

            std::string storage_var_name = (*it)->get_field_name() + "_storage";
            TL::Type storage_var_type = reduction_type.get_pointer_to();


            TL::Symbol reduction_function, reduction_function_original_var, initializer_function;

            // Checking if the current reduction type has been treated before
            // Note that if that happens we can reuse the combiner and
            // initializer function.
            //
            // C/C++: note that if the type of the list item is an array type,
            // we regiter the reduction over its element type
            TL::Type registered_reduction_type = reduction_type;
            while (!IS_FORTRAN_LANGUAGE
                    && registered_reduction_type.is_array())
            {
                registered_reduction_type = registered_reduction_type.array_element();
            }

            LoweringVisitor::reduction_task_map_t::iterator task_red_info =
               _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type));

            if (task_red_info != _task_reductions_map.end())
            {
              reduction_function = task_red_info->second._reducer;
              reduction_function_original_var = task_red_info->second._reducer_orig_var;
              initializer_function = task_red_info->second._initializer;
            }
            else
            {
               create_reduction_functions(reduction_info,
                     construct,
                     registered_reduction_type,
                     reduction_item,
                     reduction_function,
                     reduction_function_original_var);

               create_initializer_function(reduction_info,
                     construct,
                     registered_reduction_type,
                     initializer_function);

               _task_reductions_map.insert(
                       std::make_pair(
                           std::make_pair(reduction_info, registered_reduction_type),
                           TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function)
                           ));
            }

            // Mandatory TL::Sources to be filled by any reduction
            TL::Source
                orig_address, // address of the original reduction variable
                storage_var; // variable which holds the address of the storage

            // Specific TL::Sources to be filled only by Fortran array reduction
            TL::Source extra_array_red_decl;

            if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
            {
                storage_var << storage_var_name;
                orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name();

                final_clause_stuff
                    << "if (" << storage_var_name << " == 0)"
                    << "{"
                    <<     storage_var_name  << " = "
                    <<        "(" << as_type(storage_var_type) << ")" << orig_address << ";"
                    << "}"
                    ;
            }
            else
            {
               orig_address <<  "&" << (*it)->get_field_name();
                if (reduction_item_type.is_array())
                {
                    size_t size_of_array_descriptor =
                        fortran_size_of_array_descriptor(
                                fortran_get_rank0_type(reduction_item_type.get_internal_type()),
                                fortran_get_rank_of_type(reduction_item_type.get_internal_type()));


                    storage_var << storage_var_name << "_indirect";
                    extra_array_red_decl << "void *" << storage_var << ";";

                    extra_array_red_memcpy
                        << "nanos_err = nanos_memcpy("
                        <<      "(void **) &" << storage_var_name << ","
                        <<      storage_var << ","
                        <<      size_of_array_descriptor << ");"
                            ;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     "nanos_err = nanos_memcpy("
                        <<         "(void **) &" << storage_var_name << ","
                        <<         "(void *) "<< orig_address << ","
                        <<         size_of_array_descriptor << ");"
                        << "}"
                        << "else"
                        << "{"
                        <<     extra_array_red_memcpy
                        << "}"
                        ;
                }
                else
                {
                    // We need to convert a void* type into a pointer to the reduction type.
                    // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this
                    // conversion directly in the FORTRAN source. For this reason we introduce
                    // a new function that will be defined in a C file.
                    TL::Symbol func = TL::Nanox::get_function_ptr_conversion(
                            // Destination
                            reduction_item_type.get_pointer_to(),
                            // Origin
                            TL::Type::get_void_type().get_pointer_to(),
                            construct.retrieve_context());

                    storage_var << storage_var_name;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     storage_var_name << " = " << func.get_name() << "(" <<  orig_address << ");"
                        << "}"
                        ;
                }
            }

            if (num_reductions > 0)
                final_clause_opt_expr << " && ";
            final_clause_opt_expr << storage_var << " == 0 ";
            num_reductions++;

            reductions_stuff
                << extra_array_red_decl
                << as_type(storage_var_type) << " " << storage_var_name << ";"
                << "nanos_err = nanos_task_reduction_get_thread_storage("
                <<         "(void *)" << orig_address  << ","
                <<         "(void **) &" << storage_var << ");"
                ;

            reduction_symbols_map[reduction_item] = storage_var_name;
        }

        if (num_reductions != 0)
        {
            // Generating the final code if needed
            if (generate_final_stmts)
            {
                std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct);
                ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0);

                Nodecl::NodeclBase placeholder;
                TL::Source new_statements_src;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      "if (" << final_clause_opt_expr  << ")"
                    <<      "{"
                    <<          as_statement(it4->second)
                    <<      "}"
                    <<      "else"
                    <<      "{"
                    <<          final_clause_stuff
                    <<          statement_placeholder(placeholder)
                    <<      "}"
                    << "}"
                    ;

                final_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
            }

            // Generating the task code
            {
                TL::Source new_statements_src;
                Nodecl::NodeclBase placeholder;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      extra_array_red_memcpy
                    <<      statement_placeholder(placeholder)
                    << "}"
                    ;

                Nodecl::NodeclBase new_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
                statements.replace(new_statements);
            }
        }

        ERROR_CONDITION(num_reductions != 0 &&
                !Nanos::Version::interface_is_at_least("task_reduction", 1001),
                "The version of the runtime begin used does not support task reductions", 0);

        return (num_reductions != 0);
    }
    void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info,
            Nodecl::NodeclBase construct,
            Nodecl::List distribute_environment,
            Nodecl::RangeLoopControl& range,
            const std::string& outline_name,
            TL::Symbol structure_symbol,
            TL::Symbol slicer_descriptor,
            Nodecl::NodeclBase task_label)
    {
        Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct);

        Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>();
        ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0);

        Nodecl::NodeclBase lower = range.get_lower();
        Nodecl::NodeclBase upper = range.get_upper();
        Nodecl::NodeclBase step = range.get_step();

        Source struct_size, dynamic_size, struct_arg_type_name;

        struct_arg_type_name
            << ((structure_symbol.get_type().is_template_specialized_type()
                        &&  structure_symbol.get_type().is_dependent()) ? "typename " : "")
            << structure_symbol.get_qualified_name(enclosing_function.get_scope())
            ;

        struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size;

        Source immediate_decl;
        allocate_immediate_structure(
                structure_symbol.get_user_defined_type(),
                outline_info,
                struct_arg_type_name,
                struct_size,
                // out
                immediate_decl,
                dynamic_size);


        Source call_outline_function;

        Source schedule_setup;
        schedule_setup
            <<     "int nanos_chunk;"
            ;
        if (schedule.get_text() == "runtime")
        {
            schedule_setup
                <<     "nanos_omp_sched_t nanos_runtime_sched;"
                <<     "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);"
                <<     "if (nanos_err != NANOS_OK)"
                <<         "nanos_handle_error(nanos_err);"
                <<     "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);"
                ;
        }
        else
        {
            Source schedule_name;

            if (Nanos::Version::interface_is_at_least("openmp", 8))
            {
                schedule_name << "nanos_omp_sched_" << schedule.get_text();
            }
            else
            {
                // We used nanos_omp_sched in versions prior to 8
                schedule_name << "omp_sched_" << schedule.get_text();
            }

            schedule_setup
                <<     "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");"
                <<     "if (current_ws_policy == 0)"
                <<         "nanos_handle_error(NANOS_UNIMPLEMENTED);"
                <<     "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";"
            ;
        }


        Source worksharing_creation;
        if (IS_CXX_LANGUAGE)
        {
            worksharing_creation
                << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor));
        }
        worksharing_creation
            <<     "nanos_err = nanos_worksharing_create("
            <<                      "&" << as_symbol(slicer_descriptor) << ","
            <<                      "current_ws_policy,"
            <<                      "(void**)&nanos_setup_info_loop,"
            <<                      "&single_guard);"
            <<     "if (nanos_err != NANOS_OK)"
            <<         "nanos_handle_error(nanos_err);"
            ;

        Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree;

        TL::Source pm_specific_code;
        if (!_lowering->in_ompss_mode())
        {
            // OpenMP
            pm_specific_code
                << immediate_decl
                << statement_placeholder(fill_immediate_arguments_tree)
                << "smp_" << outline_name << "(imm_args);"
                ;
        }
        else
        {
            // OmpSs
            std::string wd_description =
                (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name();

            Source const_wd_info;
            const_wd_info
                << fill_const_wd_info(struct_arg_type_name,
                        /* is_untied */ false,
                        /* mandatory_creation */ true,
                        /* is_function_task */ false,
                        wd_description,
                        outline_info,
                        construct);

            std::string dyn_props_var = "nanos_wd_dyn_props";

            Source dynamic_wd_info;
            dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";";

            fill_dynamic_properties(dyn_props_var,
                    /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info);

            pm_specific_code
                <<  struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;"
                <<  const_wd_info
                <<  "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;"
                <<  dynamic_wd_info
                <<  "static nanos_slicer_t replicate = (nanos_slicer_t)0;"
                <<  "if (replicate == (nanos_slicer_t)0)"
                <<      "replicate = nanos_find_slicer(\"replicate\");"
                <<  "if (replicate == (nanos_slicer_t)0)"
                <<      "nanos_handle_error(NANOS_UNIMPLEMENTED);"
                <<  "nanos_err = nanos_create_sliced_wd(&nanos_wd_, "
                <<                                "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, "
                <<                                "(size_t)" << struct_size << ",  nanos_wd_const_data.base.data_alignment, "
                <<                                "(void**)&ol_args, nanos_current_wd(), replicate,"
                <<                                "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0,"
                <<                                "0, (nanos_region_dimension_internal_t**)0"
                <<                                ");"
                <<  "if (nanos_err != NANOS_OK)"
                <<      "nanos_handle_error(nanos_err);"
                <<  statement_placeholder(fill_outline_arguments_tree)
                <<  "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);"
                <<  "if (nanos_err != NANOS_OK)"
                <<      "nanos_handle_error(nanos_err);"
                ;

        }

        TL::Source implicit_barrier_or_tw;
        if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null())
        {
            implicit_barrier_or_tw << get_implicit_sync_end_construct_source();
        }

        Source spawn_code;
        spawn_code
            << "{"
            <<      as_type(get_bool_type()) << " single_guard;"
            <<      "nanos_err_t nanos_err;"
            <<      schedule_setup
            <<      "nanos_ws_info_loop_t nanos_setup_info_loop;"
            <<      "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";"
            <<      "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";"
            <<      "nanos_setup_info_loop.loop_step = "   << as_expression(step)  << ";"
            <<      "nanos_setup_info_loop.chunk_size = nanos_chunk;"
            <<      worksharing_creation
            <<      pm_specific_code
            <<      implicit_barrier_or_tw
            << "}"
            ;

        Source fill_outline_arguments, fill_immediate_arguments;
        fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::C;

        Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::Current;

        Nodecl::NodeclBase arguments_tree;
        TL::Source *fill_arguments;
        if (!_lowering->in_ompss_mode())
        {
            // OpenMP
            arguments_tree = fill_immediate_arguments_tree;
            fill_arguments = &fill_immediate_arguments;
        }
        else
        {
            // OmpSs
            arguments_tree = fill_outline_arguments_tree;
            fill_arguments = &fill_outline_arguments;
        }

        // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp)
        const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context();
        slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context;
        ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol());

        // Parse the arguments
        Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree);
        arguments_tree.replace(new_tree);

        // Finally, replace the construct by the tree that represents the spawn code
        construct.replace(spawn_code_tree);
    }
    static void handle_ompss_opencl_allocate_intrinsic(
            Nodecl::FunctionCall function_call,
            std::map<std::pair<TL::Type, std::pair<int, bool> > , Symbol> &declared_ocl_allocate_functions,
            Nodecl::NodeclBase expr_stmt)
    {
        Nodecl::List arguments = function_call.get_arguments().as<Nodecl::List>();
        ERROR_CONDITION(arguments.size() != 1, "More than one argument in 'ompss_opencl_allocate' call\n", 0);

        Nodecl::NodeclBase actual_argument = arguments[0];
        ERROR_CONDITION(!actual_argument.is<Nodecl::FortranActualArgument>(), "Unexpected tree\n", 0);

        Nodecl::NodeclBase arg = actual_argument.as<Nodecl::FortranActualArgument>().get_argument();
        ERROR_CONDITION(!arg.is<Nodecl::ArraySubscript>(), "Unreachable code\n", 0);

        Nodecl::NodeclBase subscripted = arg.as<Nodecl::ArraySubscript>().get_subscripted();
        TL::Symbol subscripted_symbol = ::fortran_data_ref_get_symbol(subscripted.get_internal_nodecl());

        ERROR_CONDITION(
                !(subscripted_symbol.get_type().is_fortran_array()
                    && subscripted_symbol.is_allocatable())
                &&
                !(subscripted_symbol.get_type().is_pointer()
                    && subscripted_symbol.get_type().points_to().is_fortran_array()),
                "The argument of 'ompss_opencl_allocate' intrinsic must be "
                "an allocatable array or a pointer to an array with all its bounds specified\n", 0);

        TL::Type array_type;
        int num_dimensions;
        bool is_allocatable;
        if (subscripted_symbol.is_allocatable())
        {
            array_type = subscripted_symbol.get_type();
            num_dimensions = subscripted_symbol.get_type().get_num_dimensions();
            is_allocatable = true;
        }
        else
        {
            array_type = subscripted_symbol.get_type().points_to();
            num_dimensions = array_type.get_num_dimensions();
            is_allocatable = false;
        }

        TL::Type element_type = array_type;
        while (element_type.is_array())
        {
            element_type = element_type.array_element();
        }

        ERROR_CONDITION(!array_type.is_array(), "This type should be an array type", 0);

        std::pair<TL::Type, std::pair<int, bool> > key =
            std::make_pair(element_type, std::make_pair(num_dimensions, is_allocatable));

        std::map<std::pair<TL::Type, std::pair<int, bool> > , Symbol>::iterator it_new_fun =
            declared_ocl_allocate_functions.find(key);

        // Reuse the auxiliar function if it already exists
        Symbol new_function_sym;
        if (it_new_fun != declared_ocl_allocate_functions.end())
        {
            new_function_sym = it_new_fun->second;
        }
        else
        {
            new_function_sym = create_new_function_opencl_allocate(
                    expr_stmt, subscripted_symbol, element_type, num_dimensions, is_allocatable);

            declared_ocl_allocate_functions[key] = new_function_sym;
        }

        // Replace the current intrinsic call by a call to the new function
        TL::Source actual_arg_array;
        Nodecl::NodeclBase subscripted_lvalue = subscripted.shallow_copy();
        subscripted_lvalue.set_type(subscripted_symbol.get_type().no_ref().get_lvalue_reference_to());

        actual_arg_array << as_expression(subscripted_lvalue);

        TL::Source actual_arg_bounds;
        Nodecl::List subscripts = arg.as<Nodecl::ArraySubscript>().get_subscripts().as<Nodecl::List>();
        for (Nodecl::List::reverse_iterator it = subscripts.rbegin();
                it != subscripts.rend();
                it++)
        {
            Nodecl::NodeclBase subscript = *it, lower, upper;

            if (it != subscripts.rbegin())
                actual_arg_bounds << ", ";

            if (subscript.is<Nodecl::Range>())
            {
                lower = subscript.as<Nodecl::Range>().get_lower();
                upper = subscript.as<Nodecl::Range>().get_upper();
            }
            else
            {
                lower = nodecl_make_integer_literal(
                        fortran_get_default_integer_type(),
                        const_value_get_signed_int(1), make_locus("", 0, 0));
                upper = subscript;
            }
            actual_arg_bounds << as_expression(lower) << "," << as_expression(upper);
        }

        TL::Source new_function_call;
        new_function_call
            << "CALL " << as_symbol(new_function_sym) << "("
            <<  actual_arg_array  << ", "
            <<  actual_arg_bounds << ")\n"
            ;

        expr_stmt.replace(new_function_call.parse_statement(expr_stmt));

    }
Example #7
0
    void LoweringVisitor::perform_partial_reduction(OutlineInfo& outline_info, Nodecl::NodeclBase ref_tree)
    {
        ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0);

        Source reduction_code;

        TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter(
                predicate(lift_pointer(functor(&OutlineDataItem::is_reduction))));
        if (!reduction_items.empty())
        {
            for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin();
                    it != reduction_items.end();
                    it++)
            {
                if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
                {
                    if ((*it)->get_private_type().is_array())
                    {
                        reduction_code
                            << "__builtin_memcpy(rdv_" << (*it)->get_field_name() << "[nanos_omp_get_thread_num()],"
                            << "rdp_" << (*it)->get_symbol().get_name() << ","
                            << " sizeof(" << as_type((*it)->get_private_type()) << "));"
                            ;
                    }
                    else
                    {
                        reduction_code
                            << "rdv_" << (*it)->get_field_name() << "[nanos_omp_get_thread_num()] "
                            << "= rdp_" << (*it)->get_symbol().get_name() << ";"
                            ;
                    }
                }
                else if (IS_FORTRAN_LANGUAGE)
                {
                    Source extra_dims;
                    {
                        TL::Type t = (*it)->get_symbol().get_type().no_ref();
                        int rank = 0;
                        if (t.is_fortran_array())
                        {
                            rank = t.fortran_rank();
                        }

                        int i;
                        for (i = 0; i < rank; i++)
                        {
                            extra_dims << ":,";
                        }
                    }

                    reduction_code
                        << "rdv_" << (*it)->get_field_name() << "( " << extra_dims << "nanos_omp_get_thread_num() ) = rdp_" << (*it)->get_symbol().get_name() << "\n"
                        ;
                }
                else
                {
                    internal_error("Code unreachable", 0);
                }
            }
        }

        ref_tree.replace(reduction_code.parse_statement(ref_tree));
    }
Example #8
0
    void LoweringVisitor::reduction_initialization_code(
            OutlineInfo& outline_info,
            Nodecl::NodeclBase ref_tree,
            Nodecl::NodeclBase construct)
    {
        ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0);

        if (!Nanos::Version::interface_is_at_least("master", 5023))
        {
            running_error("%s: error: a newer version of Nanos++ (>=5023) is required for reductions support\n",
                    construct.get_locus_str().c_str());
        }

        TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter(
                predicate(lift_pointer(functor(&OutlineDataItem::is_reduction))));
        ERROR_CONDITION (reduction_items.empty(), "No reductions to process", 0);

        Source result;

        Source reduction_declaration,
               thread_initializing_reduction_info,
               thread_fetching_reduction_info;

        result
            << reduction_declaration
            << "{"
            << as_type(get_bool_type()) << " red_single_guard;"
            << "nanos_err_t err;"
            << "err = nanos_enter_sync_init(&red_single_guard);"
            << "if (err != NANOS_OK)"
            <<     "nanos_handle_error(err);"
            << "if (red_single_guard)"
            << "{"
            <<    "int nanos_num_threads = nanos_omp_get_num_threads();"
            <<    thread_initializing_reduction_info
            <<    "err = nanos_release_sync_init();"
            <<    "if (err != NANOS_OK)"
            <<        "nanos_handle_error(err);"
            << "}"
            << "else"
            << "{"
            <<    "err = nanos_wait_sync_init();"
            <<    "if (err != NANOS_OK)"
            <<        "nanos_handle_error(err);"
            <<    thread_fetching_reduction_info
            << "}"
            << "}"
            ;

        for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin();
                it != reduction_items.end();
                it++)
        {
            std::string nanos_red_name = "nanos_red_" + (*it)->get_symbol().get_name();

            std::pair<OpenMP::Reduction*, TL::Type> reduction_info = (*it)->get_reduction_info();
            OpenMP::Reduction* reduction = reduction_info.first;
            TL::Type reduction_type = reduction_info.second;

            if (reduction_type.is_any_reference())
                reduction_type = reduction_type.references_to();

            TL::Type reduction_element_type = reduction_type;
            if (IS_FORTRAN_LANGUAGE)
            {
                while (reduction_element_type.is_fortran_array())
                    reduction_element_type = reduction_element_type.array_element();
            }
            else
            {
                while (reduction_element_type.is_array())
                    reduction_element_type = reduction_element_type.array_element();
            }

            Source element_size;
            if (IS_FORTRAN_LANGUAGE)
            {
                if (reduction_type.is_fortran_array())
                {
                    // We need to parse this bit in Fortran
                    Source number_of_bytes;
                    number_of_bytes << "SIZE(" << (*it)->get_symbol().get_name() << ") * " << reduction_element_type.get_size();

                    element_size << as_expression(number_of_bytes.parse_expression(construct));
                }
                else
                {
                    element_size << "sizeof(" << as_type(reduction_type) << ")";
                }
            }
            else
            {
                element_size << "sizeof(" << as_type(reduction_type) << ")";
            }

            reduction_declaration
                << "nanos_reduction_t* " << nanos_red_name << ";"
                ;

            Source allocate_private_buffer, cleanup_code;

            Source num_scalars;

            TL::Symbol basic_reduction_function, vector_reduction_function;
            create_reduction_function(reduction, construct, reduction_type, basic_reduction_function, vector_reduction_function);
            (*it)->reduction_set_basic_function(basic_reduction_function);

            thread_initializing_reduction_info
                << "err = nanos_malloc((void**)&" << nanos_red_name << ", sizeof(nanos_reduction_t), " 
                << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");"
                << "if (err != NANOS_OK)"
                <<     "nanos_handle_error(err);"
                << nanos_red_name << "->original = (void*)" 
                <<            (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ";"
                << allocate_private_buffer
                << nanos_red_name << "->vop = "
                <<      (vector_reduction_function.is_valid() ? as_symbol(vector_reduction_function) : "0") << ";"
                << nanos_red_name << "->bop = (void(*)(void*,void*,int))" << as_symbol(basic_reduction_function) << ";"
                << nanos_red_name << "->element_size = " << element_size << ";"
                << nanos_red_name << "->num_scalars = " << num_scalars << ";"
                << cleanup_code
                << "err = nanos_register_reduction(" << nanos_red_name << ");"
                << "if (err != NANOS_OK)"
                <<     "nanos_handle_error(err);"
                ;

            if (IS_C_LANGUAGE
                    || IS_CXX_LANGUAGE)
            {
                if (reduction_type.is_array())
                {
                    num_scalars << "sizeof(" << as_type(reduction_type) << ") / sizeof(" << as_type(reduction_element_type) <<")";
                }
                else
                {
                    num_scalars << "1";
                }

                allocate_private_buffer
                    << "err = nanos_malloc(&" << nanos_red_name << "->privates, sizeof(" << as_type(reduction_type) << ") * nanos_num_threads, "
                    << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << nanos_red_name << "->descriptor = " << nanos_red_name << "->privates;"
                    << "rdv_" << (*it)->get_field_name() << " = (" <<  as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;"
                    ;


                thread_fetching_reduction_info
                    << "err = nanos_reduction_get(&" << nanos_red_name << ", " 
                    << (reduction_type.is_array() ? "" : "&") << (*it)->get_symbol().get_name() << ");"

                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << "rdv_" << (*it)->get_field_name() << " = (" <<  as_type( (*it)->get_private_type().get_pointer_to() ) << ")" << nanos_red_name << "->privates;"
                    ;
                cleanup_code
                    << nanos_red_name << "->cleanup = nanos_free0;"
                    ;
            }
            else if (IS_FORTRAN_LANGUAGE)
            {

                Type private_reduction_vector_type;

                Source extra_dims;
                {
                    TL::Type t = (*it)->get_symbol().get_type().no_ref();
                    int rank = 0;
                    if (t.is_fortran_array())
                    {
                        rank = t.fortran_rank();
                    }

                    if (rank != 0)
                    {
                        // We need to parse this bit in Fortran
                        Source size_call;
                        size_call << "SIZE(" << (*it)->get_symbol().get_name() << ")";

                        num_scalars << as_expression(size_call.parse_expression(construct));
                    }
                    else
                    {
                        num_scalars << "1";
                    }
                    private_reduction_vector_type = fortran_get_n_ranked_type_with_descriptor(
                            get_void_type(), rank + 1, construct.retrieve_context().get_decl_context());

                    int i;
                    for (i = 0; i < rank; i++)
                    {
                        Source lbound_src;
                        lbound_src << "LBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")";
                        Source ubound_src;
                        ubound_src << "UBOUND(" << (*it)->get_symbol().get_name() << ", DIM = " << (rank - i) << ")";

                        extra_dims 
                            << "["
                            << as_expression(lbound_src.parse_expression(construct))
                            << ":"
                            << as_expression(ubound_src.parse_expression(construct))
                            << "]";

                        t = t.array_element();
                    }
                }

                allocate_private_buffer
                    << "@FORTRAN_ALLOCATE@((*rdv_" << (*it)->get_field_name() << ")[0:(nanos_num_threads-1)]" << extra_dims <<");"
                    << nanos_red_name << "->privates = &(*rdv_" << (*it)->get_field_name() << ");"
                    << "err = nanos_malloc(&" << nanos_red_name << "->descriptor, sizeof(" << as_type(private_reduction_vector_type) << "), "
                    << "\"" << construct.get_filename() << "\", " << construct.get_line() << ");"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << "err = nanos_memcpy(" << nanos_red_name << "->descriptor, "
                    "&rdv_" << (*it)->get_field_name() << ", sizeof(" << as_type(private_reduction_vector_type) << "));"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    ;

                thread_fetching_reduction_info
                    << "err = nanos_reduction_get(&" << nanos_red_name << ", &" << (*it)->get_symbol().get_name() << ");"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    << "err = nanos_memcpy("
                    << "&rdv_" << (*it)->get_field_name() << ","
                    << nanos_red_name << "->descriptor, "
                    << "sizeof(" << as_type(private_reduction_vector_type) << "));"
                    << "if (err != NANOS_OK)"
                    <<     "nanos_handle_error(err);"
                    ;

                TL::Symbol reduction_cleanup = create_reduction_cleanup_function(reduction, construct);
                cleanup_code
                    << nanos_red_name << "->cleanup = " << as_symbol(reduction_cleanup) << ";"
                    ;
            }
            else
            {
                internal_error("Code unreachable", 0);
            }
        }

        FORTRAN_LANGUAGE()
        {
            Source::source_language = SourceLanguage::C;
        }
        ref_tree.replace(result.parse_statement(ref_tree));
        FORTRAN_LANGUAGE()
        {
            Source::source_language = SourceLanguage::Current;
        }
    }
Example #9
0
    void LoweringVisitor::perform_partial_reduction_slicer(OutlineInfo& outline_info,
            Nodecl::NodeclBase ref_tree,
            Nodecl::Utils::SimpleSymbolMap*& symbol_map)
    {
        ERROR_CONDITION(ref_tree.is_null(), "Invalid tree", 0);

        TL::ObjectList<OutlineDataItem*> reduction_items = outline_info.get_data_items().filter(
               lift_pointer<bool, OutlineDataItem>(&OutlineDataItem::is_reduction));
        if (!reduction_items.empty())
        {
            TL::ObjectList<Nodecl::NodeclBase> reduction_stmts;

            Nodecl::Utils::SimpleSymbolMap* simple_symbol_map = new Nodecl::Utils::SimpleSymbolMap(symbol_map);
            symbol_map = simple_symbol_map;

            for (TL::ObjectList<OutlineDataItem*>::iterator it = reduction_items.begin();
                    it != reduction_items.end();
                    it++)
            {
                scope_entry_t* shared_symbol = (*it)->get_symbol().get_internal_symbol();

                // We need this to avoid the original symbol be replaced
                // incorrectly
                scope_entry_t* shared_symbol_proxy = NEW0(scope_entry_t);
                shared_symbol_proxy->symbol_name = UNIQUESTR_LITERAL("<<reduction-variable>>"); // Crude way to ensure it is replaced
                shared_symbol_proxy->kind = shared_symbol->kind;
                symbol_entity_specs_copy_from(shared_symbol_proxy, shared_symbol);
                shared_symbol_proxy->decl_context = shared_symbol->decl_context;
                shared_symbol_proxy->type_information = shared_symbol->type_information;
                shared_symbol_proxy->locus = shared_symbol->locus;

                simple_symbol_map->add_map( shared_symbol_proxy,
                        (*it)->reduction_get_shared_symbol_in_outline() );

                Source reduction_code;
                Nodecl::NodeclBase partial_reduction_code;
                reduction_code
                    << "{"
                    << "nanos_lock_t* red_lock;"
                    << "nanos_err_t nanos_err;"
                    << "nanos_err = nanos_get_lock_address("
                    <<       ((*it)->get_private_type().is_array() ? "" : "&")
                    <<             as_symbol( shared_symbol_proxy ) << ", &red_lock);"
                    << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);"

                    << "nanos_err = nanos_set_lock(red_lock);"
                    << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);"
                    << statement_placeholder(partial_reduction_code)
                    << "nanos_err = nanos_unset_lock(red_lock);"
                    << "if (nanos_err != NANOS_OK) nanos_handle_error(nanos_err);"
                    << "}"
                    ;

                FORTRAN_LANGUAGE()
                {
                    Source::source_language = SourceLanguage::C;
                }
                Nodecl::NodeclBase statement = reduction_code.parse_statement(ref_tree);
                FORTRAN_LANGUAGE()
                {
                    Source::source_language = SourceLanguage::Current;
                }

                ERROR_CONDITION(!statement.is<Nodecl::List>(), "Expecting a list", 0);
                reduction_stmts.append(statement.as<Nodecl::List>()[0]);

                TL::Type elemental_type = (*it)->get_private_type();
                while (elemental_type.is_array())
                    elemental_type = elemental_type.array_element();

                Source partial_reduction_code_src;
                if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
                {
                    partial_reduction_code_src
                        << as_symbol( (*it)->reduction_get_basic_function() ) << "("
                        // This will be the reduction shared
                        <<       ((*it)->get_private_type().is_array() ? "" : "&")
                        <<       as_symbol( shared_symbol_proxy ) << ", "
                        // This will be the reduction private var
                        <<       ((*it)->get_private_type().is_array() ? "" : "&")
                        <<       as_symbol( (*it)->get_symbol() ) << ", "
                        <<    ((*it)->get_private_type().is_array() ?
                               (
                                  "sizeof(" + as_type( (*it)->get_private_type()) + ")"
                                   "/ sizeof(" + as_type(elemental_type) + ")"
                                )
                                : "1")
                        << ");"
                        ;

                }
                else if (IS_FORTRAN_LANGUAGE)
                {
                    // We use an ELEMENTAL call here
                    partial_reduction_code_src
                        << "CALL " << as_symbol ( (*it)->reduction_get_basic_function() ) << "("
                        // This will be the reduction shared
                        <<    as_symbol( shared_symbol_proxy ) << ", "
                        // This will be the reduction private var
                        <<    as_symbol( (*it)->get_symbol() )
                        << ")"
                        ;
                }
                else
                {
                    internal_error("Code unreachable", 0);
                }

                partial_reduction_code.replace(
                        partial_reduction_code_src.parse_statement(partial_reduction_code));
            }
            ref_tree.replace(
                    Nodecl::CompoundStatement::make(
                        Nodecl::List::make(reduction_stmts),
                        Nodecl::NodeclBase::null()
                        )
                    );
        }