void visit(const Nodecl::ObjectInit& node)
        {
            TL::Symbol sym = node.get_symbol();
            if (sym.get_value().is_null())
                return;

            walk(sym.get_value());
        }
    void VectorizerVisitorPostprocessor::visit(const Nodecl::ObjectInit& n)
    {
        TL::Symbol sym = n.get_symbol();
        Nodecl::NodeclBase init = sym.get_value();

        if(!init.is_null())
        {
            walk(init);
        }
    }
예제 #3
0
 void visit(const Nodecl::Symbol& node)
 {
     TL::Symbol sym = node.get_symbol();
     if ((_data_sharing.get_data_sharing(sym, /* check_enclosing */ false) & ~DS_IMPLICIT)
             == DS_UNDEFINED)
     {
         // Mark this as an implicit firstprivate
         _data_sharing.set_data_sharing(sym, TL::OpenMP::DataSharingAttribute( DS_FIRSTPRIVATE | DS_IMPLICIT) );
         std::cerr << node.get_locus_str() << ": warning: assuming '" << sym.get_qualified_name() << "' as firstprivate" << std::endl;
     }
 }
예제 #4
0
        void SSEVectorLegalization::visit(const Nodecl::ObjectInit& node) 
        {
            TL::Source intrin_src;

            TL::Symbol sym = node.get_symbol();
            fix_mask_symbol(sym);

            // Vectorizing initialization
            Nodecl::NodeclBase init = sym.get_value();
            if (!init.is_null())
            {
                walk(init);
            }
        }
예제 #5
0
    void Fortran::append_module_to_scope(TL::Symbol module,
            TL::Scope scope)
    {
        ERROR_CONDITION(!module.is_valid() || !module.is_fortran_module(), "Symbol must be a Fortran module", 0);

        scope_entry_t* used_modules_info
            = ::get_or_create_used_modules_symbol_info(scope.get_decl_context());

        P_LIST_ADD_ONCE(used_modules_info->entity_specs.related_symbols,
                used_modules_info->entity_specs.num_related_symbols,
                module.get_internal_symbol());

        if (!module.get_internal_symbol()->entity_specs.is_builtin)
            fortran_load_module(module.get_internal_symbol()->symbol_name, /* intrinsic */ 0, make_locus("", 0, 0));
    }
예제 #6
0
        void NeonVectorBackend::visit(const Nodecl::ObjectInit& n)
        {
            TL::Source intrin_src;

            if(n.has_symbol())
            {
                TL::Symbol sym = n.get_symbol();

                // Vectorizing initialization
                Nodecl::NodeclBase init = sym.get_value();
                if(!init.is_null())
                {
                    walk(init);
                }
            }
        }
예제 #7
0
    void build_empty_body_for_function(
            TL::Symbol function_symbol,
            Nodecl::NodeclBase &function_code,
            Nodecl::NodeclBase &empty_stmt)
    {
        empty_stmt = Nodecl::EmptyStatement::make(make_locus("", 0, 0));
        Nodecl::List stmt_list = Nodecl::List::make(empty_stmt);

        if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
        {
            Nodecl::CompoundStatement compound_statement =
                Nodecl::CompoundStatement::make(stmt_list,
                        /* destructors */ Nodecl::NodeclBase::null(),
                        make_locus("", 0, 0));
            stmt_list = Nodecl::List::make(compound_statement);
        }

        Nodecl::NodeclBase context = Nodecl::Context::make(
                stmt_list,
                function_symbol.get_related_scope(), make_locus("", 0, 0));

        function_symbol.get_internal_symbol()->defined = 1;

        if (function_symbol.is_dependent_function())
        {
            function_code = Nodecl::TemplateFunctionCode::make(context,
                    // Initializers
                    Nodecl::NodeclBase::null(),
                    function_symbol,
                    make_locus("", 0, 0));
        }
        else
        {
            function_code = Nodecl::FunctionCode::make(context,
                    // Initializers
                    Nodecl::NodeclBase::null(),
                    function_symbol,
                    make_locus("", 0, 0));
        }

        function_symbol.get_internal_symbol()->entity_specs.function_code = function_code.get_internal_nodecl();

    }
예제 #8
0
    TL::Symbol new_function_symbol(TL::Symbol function)
    {
        TL::ObjectList<TL::Type> parameter_types = function.get_type().parameters();

        TL::ObjectList<std::string> parameter_names;
        TL::ObjectList<TL::Symbol> function_related_symbols = function.get_related_symbols();
        for (TL::ObjectList<TL::Symbol>::iterator it = function_related_symbols.begin();
                it != function_related_symbols.end();
                it++)
        {
            parameter_names.append(it->get_name());
        }

        TL::Symbol new_function = SymbolUtils::new_function_symbol(
                function,
                function.get_name(),
                function.get_type().returns(),
                parameter_names,
                parameter_types);

        return new_function;
    }
예제 #9
0
void DeviceFPGA::copy_stuff_to_device_file(
        const TL::ObjectList<Nodecl::NodeclBase>& stuff_to_be_copied)
{
    for (TL::ObjectList<Nodecl::NodeclBase>::const_iterator it = stuff_to_be_copied.begin();
            it != stuff_to_be_copied.end();
            ++it)
    {
        if (it->is<Nodecl::FunctionCode>()
                || it->is<Nodecl::TemplateFunctionCode>())
        {
            TL::Symbol function = it->get_symbol();
            TL::Symbol new_function = SymbolUtils::new_function_symbol(function, function.get_name() + "_hls");

            Nodecl::Utils::SimpleSymbolMap symbol_map;
            symbol_map.add_map(function, new_function);
            _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it, symbol_map));
        }
        else
        {
            _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it));
        }
    }
}
예제 #10
0
        void SimdVisitor::visit(const Nodecl::OpenMP::SimdFunction& simd_node)
        {
            Nodecl::FunctionCode function_code = simd_node.get_statement()
                .as<Nodecl::FunctionCode>();
            
            // Remove SimdFunction node
            simd_node.replace(function_code);

            TL::Symbol sym = function_code.get_symbol();

            Nodecl::FunctionCode vectorized_func_code = 
                Nodecl::Utils::deep_copy(function_code, function_code).as<Nodecl::FunctionCode>();

            // Vectorize function
            _vectorizer.vectorize(vectorized_func_code, 
                    _device_name, _vector_length, NULL); 

            // Set new name
            std::stringstream vectorized_func_name; 
            
            vectorized_func_name <<"__" 
                << sym.get_name() 
                << "_" 
                << _device_name 
                << "_" 
                << _vector_length;

            vectorized_func_code.get_symbol().set_name(vectorized_func_name.str());

            // Add SIMD version to vector function versioning
            _vectorizer.add_vector_function_version(sym.get_name(), vectorized_func_code, 
                    _device_name, _vector_length, NULL, TL::Vectorization::SIMD_FUNC_PRIORITY);

            // Append vectorized function code to scalar function
            simd_node.append_sibling(vectorized_func_code);
        }
예제 #11
0
        void VectorizerVectorReduction::vectorize_reduction(const TL::Symbol& scalar_symbol,
                TL::Symbol& vector_symbol,
                const Nodecl::NodeclBase& reduction_initializer,
                const std::string& reduction_name,
                const TL::Type& reduction_type,
                Nodecl::List& pre_nodecls,
                Nodecl::List& post_nodecls)
        {
            // Step1: ADD REDUCTION SYMBOLS
            vector_symbol.set_value(Nodecl::VectorPromotion::make(
                        reduction_initializer.shallow_copy(),
                        vector_symbol.get_type()));

            // Add new ObjectInit with the initialization
            Nodecl::ObjectInit reduction_object_init =
                Nodecl::ObjectInit::make(vector_symbol);

            pre_nodecls.append(reduction_object_init);


            // Step2: ADD VECTOR REDUCTION INSTRUCTIONS
            if(reduction_name.compare("+") == 0)
            {
                Nodecl::ExpressionStatement post_reduction_stmt =
                    Nodecl::ExpressionStatement::make(
                            Nodecl::VectorReductionAdd::make(
                                scalar_symbol.make_nodecl(true),
                                vector_symbol.make_nodecl(true),
                                scalar_symbol.get_type()));

                post_nodecls.append(post_reduction_stmt);
            }
            else if (reduction_name.compare("-") == 0)
            {
                Nodecl::ExpressionStatement post_reduction_stmt =
                    Nodecl::ExpressionStatement::make(
                            Nodecl::VectorReductionMinus::make(
                                scalar_symbol.make_nodecl(true),
                                vector_symbol.make_nodecl(true),
                                scalar_symbol.get_type()));

                post_nodecls.append(post_reduction_stmt);
            }
        }
예제 #12
0
    void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info,
            Nodecl::NodeclBase construct,
            Nodecl::List distribute_environment,
            Nodecl::RangeLoopControl& range,
            const std::string& outline_name,
            TL::Symbol structure_symbol,
            TL::Symbol slicer_descriptor,
            Nodecl::NodeclBase task_label)
    {
        Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct);

        Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>();
        ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0);

        Nodecl::NodeclBase lower = range.get_lower();
        Nodecl::NodeclBase upper = range.get_upper();
        Nodecl::NodeclBase step = range.get_step();

        Source struct_size, dynamic_size, struct_arg_type_name;

        struct_arg_type_name
            << ((structure_symbol.get_type().is_template_specialized_type()
                        &&  structure_symbol.get_type().is_dependent()) ? "typename " : "")
            << structure_symbol.get_qualified_name(enclosing_function.get_scope())
            ;

        struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size;

        Source immediate_decl;
        allocate_immediate_structure(
                structure_symbol.get_user_defined_type(),
                outline_info,
                struct_arg_type_name,
                struct_size,
                // out
                immediate_decl,
                dynamic_size);


        Source call_outline_function;

        Source schedule_setup;
        schedule_setup
            <<     "int nanos_chunk;"
            ;
        if (schedule.get_text() == "runtime")
        {
            schedule_setup
                <<     "nanos_omp_sched_t nanos_runtime_sched;"
                <<     "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);"
                <<     "if (nanos_err != NANOS_OK)"
                <<         "nanos_handle_error(nanos_err);"
                <<     "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);"
                ;
        }
        else
        {
            Source schedule_name;

            if (Nanos::Version::interface_is_at_least("openmp", 8))
            {
                schedule_name << "nanos_omp_sched_" << schedule.get_text();
            }
            else
            {
                // We used nanos_omp_sched in versions prior to 8
                schedule_name << "omp_sched_" << schedule.get_text();
            }

            schedule_setup
                <<     "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");"
                <<     "if (current_ws_policy == 0)"
                <<         "nanos_handle_error(NANOS_UNIMPLEMENTED);"
                <<     "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";"
            ;
        }


        Source worksharing_creation;
        if (IS_CXX_LANGUAGE)
        {
            worksharing_creation
                << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor));
        }
        worksharing_creation
            <<     "nanos_err = nanos_worksharing_create("
            <<                      "&" << as_symbol(slicer_descriptor) << ","
            <<                      "current_ws_policy,"
            <<                      "(void**)&nanos_setup_info_loop,"
            <<                      "&single_guard);"
            <<     "if (nanos_err != NANOS_OK)"
            <<         "nanos_handle_error(nanos_err);"
            ;

        Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree;

        TL::Source pm_specific_code;
        if (!_lowering->in_ompss_mode())
        {
            // OpenMP
            pm_specific_code
                << immediate_decl
                << statement_placeholder(fill_immediate_arguments_tree)
                << "smp_" << outline_name << "(imm_args);"
                ;
        }
        else
        {
            // OmpSs
            std::string wd_description =
                (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name();

            Source const_wd_info;
            const_wd_info
                << fill_const_wd_info(struct_arg_type_name,
                        /* is_untied */ false,
                        /* mandatory_creation */ true,
                        /* is_function_task */ false,
                        wd_description,
                        outline_info,
                        construct);

            std::string dyn_props_var = "nanos_wd_dyn_props";

            Source dynamic_wd_info;
            dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";";

            fill_dynamic_properties(dyn_props_var,
                    /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info);

            pm_specific_code
                <<  struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;"
                <<  const_wd_info
                <<  "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;"
                <<  dynamic_wd_info
                <<  "static nanos_slicer_t replicate = (nanos_slicer_t)0;"
                <<  "if (replicate == (nanos_slicer_t)0)"
                <<      "replicate = nanos_find_slicer(\"replicate\");"
                <<  "if (replicate == (nanos_slicer_t)0)"
                <<      "nanos_handle_error(NANOS_UNIMPLEMENTED);"
                <<  "nanos_err = nanos_create_sliced_wd(&nanos_wd_, "
                <<                                "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, "
                <<                                "(size_t)" << struct_size << ",  nanos_wd_const_data.base.data_alignment, "
                <<                                "(void**)&ol_args, nanos_current_wd(), replicate,"
                <<                                "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0,"
                <<                                "0, (nanos_region_dimension_internal_t**)0"
                <<                                ");"
                <<  "if (nanos_err != NANOS_OK)"
                <<      "nanos_handle_error(nanos_err);"
                <<  statement_placeholder(fill_outline_arguments_tree)
                <<  "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);"
                <<  "if (nanos_err != NANOS_OK)"
                <<      "nanos_handle_error(nanos_err);"
                ;

        }

        TL::Source implicit_barrier_or_tw;
        if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null())
        {
            implicit_barrier_or_tw << get_implicit_sync_end_construct_source();
        }

        Source spawn_code;
        spawn_code
            << "{"
            <<      as_type(get_bool_type()) << " single_guard;"
            <<      "nanos_err_t nanos_err;"
            <<      schedule_setup
            <<      "nanos_ws_info_loop_t nanos_setup_info_loop;"
            <<      "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";"
            <<      "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";"
            <<      "nanos_setup_info_loop.loop_step = "   << as_expression(step)  << ";"
            <<      "nanos_setup_info_loop.chunk_size = nanos_chunk;"
            <<      worksharing_creation
            <<      pm_specific_code
            <<      implicit_barrier_or_tw
            << "}"
            ;

        Source fill_outline_arguments, fill_immediate_arguments;
        fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::C;

        Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct);

        if (IS_FORTRAN_LANGUAGE)
            Source::source_language = SourceLanguage::Current;

        Nodecl::NodeclBase arguments_tree;
        TL::Source *fill_arguments;
        if (!_lowering->in_ompss_mode())
        {
            // OpenMP
            arguments_tree = fill_immediate_arguments_tree;
            fill_arguments = &fill_immediate_arguments;
        }
        else
        {
            // OmpSs
            arguments_tree = fill_outline_arguments_tree;
            fill_arguments = &fill_outline_arguments;
        }

        // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp)
        const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context();
        slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context;
        ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol());

        // Parse the arguments
        Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree);
        arguments_tree.replace(new_tree);

        // Finally, replace the construct by the tree that represents the spawn code
        construct.replace(spawn_code_tree);
    }
예제 #13
0
TL::Source LoopBlocking::do_blocking()
{
    Source result, block_loops;

    result
        << block_loops
        ;

    ObjectList<ForStatement> nest_loops = _for_nest_info.get_nest_list();

    _nesting = std::min(_nest_factors.size(), nest_loops.size());

    TL::Source *current_innermost_part = &block_loops;
    // For every loop declare its block loop variable and the inter-block loop
    ObjectList<TL::Expression>::iterator current_factor = _nest_factors.begin();
    ObjectList<TL::ForStatement>::iterator current_for = nest_loops.begin();
    for (int current_nest = 0;
            current_nest < _nesting;
            current_nest++, current_for++, current_factor++)
    {
        TL::IdExpression induction_var = current_for->get_induction_variable();
        TL::Symbol sym = induction_var.get_symbol();
        TL::Type type = sym.get_type();

        std::string var = "_blk_" + sym.get_name();

        TL::Source *new_innermost_part = new TL::Source();
        (*current_innermost_part)
            << "for(" << type.get_declaration(sym.get_scope(), var) << " = " << current_for->get_lower_bound() << ";"
                      << var << current_for->get_bound_operator() << current_for->get_upper_bound() << ";"
                      << var << "+= ( " << current_for->get_step() << ") * " << current_factor->prettyprint() << ")" 
            << (*new_innermost_part)
            ;

        current_innermost_part = new_innermost_part;
    }

    // Now for every loop, declare the intra-loop
    current_factor = _nest_factors.begin();
    current_for = nest_loops.begin();
    for (int current_nest = 0;
            current_nest < _nesting;
            current_nest++, current_for++, current_factor++)
    {
        TL::IdExpression induction_var = current_for->get_induction_variable();
        TL::Symbol sym = induction_var.get_symbol();
        TL::Type type = sym.get_type();

        std::string var = induction_var.prettyprint();
        std::string init_var = var;
        // If the loop declares the iterator in the for statement
        // declare it again
        AST_t loop_init = current_for->get_iterating_init();
        if (Declaration::predicate(loop_init))
        {
            // Fix init_var to be a declaration
            init_var = type.get_declaration(sym.get_scope(), var);
        }

        std::string blk_var = "_blk_" + sym.get_name();

        TL::Source min_code;

        TL::Source *new_innermost_part = new TL::Source();
        (*current_innermost_part)
            << "for(" << init_var << " = " << blk_var << ";"
                      << var << current_for->get_bound_operator() << min_code  << ";"
                      << var << "+= ( " << current_for->get_step() << "))" 
            << (*new_innermost_part)
            ;

        TL::Source a, b;
        min_code
            << "((" << a << ") < (" << b << ") ? (" << a << ") : (" << b << "))"
            ;

        a << blk_var << " + (" << current_for->get_step() << ") * (" << current_factor->prettyprint() << " - 1 )";
        b << current_for->get_upper_bound();

        current_innermost_part = new_innermost_part;
    }

    // And now the innermost loop
    (*current_innermost_part)
        << nest_loops[_nesting - 1].get_loop_body()
        ;

    return result;
}
// Old version - Deprecated. Kept here for compatibility with old runtimes (Nanos++ 0.7)
void DeviceOpenCL::old_generate_ndrange_code(
        const TL::Symbol& called_task,
        const TL::Symbol& unpacked_function,
        const TargetInformation& target_info,
        const std::string filename,
        const std::string kernel_name,
        const TL::ObjectList<OutlineDataItem*>& data_items,
        Nodecl::Utils::SimpleSymbolMap* called_fun_to_outline_data_map,
        Nodecl::Utils::SimpleSymbolMap* outline_data_to_unpacked_fun_map,
        // Out
        TL::Source& code_ndrange)
{
    // The arguments of the clauses 'ndrange' and 'shmem' must be updated because
    // they are not expressed in terms of the unpacked function parameters
    TL::ObjectList<Nodecl::NodeclBase> new_ndrange, new_shmem;
    update_ndrange_and_shmem_expressions(
            unpacked_function.get_related_scope(),
            target_info,
            outline_data_to_unpacked_fun_map,
            new_ndrange,
            new_shmem);

    int num_args_ndrange = new_ndrange.size();

    TL::Source code_ndrange_aux;
    Nodecl::Utils::SimpleSymbolMap called_fun_to_unpacked_fun_map;

    const std::map<TL::Symbol, TL::Symbol>* called_fun_to_outline_data_map_simple =
        called_fun_to_outline_data_map->get_simple_symbol_map();
    for (std::map<TL::Symbol, TL::Symbol>::const_iterator it = called_fun_to_outline_data_map_simple->begin();
            it != called_fun_to_outline_data_map_simple->end();
            it++)
    {
        TL::Symbol key = it->first;
        TL::Symbol value = outline_data_to_unpacked_fun_map->map(it->second.get_internal_symbol());
        called_fun_to_unpacked_fun_map.add_map(key, value);
    }

    bool dim_const = new_ndrange[0].is_constant();

    char is_null_ended = 0;
    bool check_dim = !(new_ndrange[num_args_ndrange - 1].is_constant()
            && const_value_is_string(new_ndrange[num_args_ndrange - 1].get_constant())
            && (strcmp(const_value_string_unpack_to_string(new_ndrange[num_args_ndrange-1].get_constant(), &is_null_ended),
                    "noCheckDim") == 0));

    int num_dim = 0;
    if (dim_const)
    {
        num_dim = const_value_cast_to_4(new_ndrange[0].get_constant());

        ERROR_CONDITION(num_dim < 1 || num_dim > 3,
                "invalid number of dimensions for 'ndrange' clause. Valid values: 1, 2 and 3." , 0);

        ERROR_CONDITION((((num_dim * 3) + 1 + !check_dim) != num_args_ndrange)
                && (((num_dim * 2) + 1 + !check_dim) != num_args_ndrange),
                "invalid number of arguments for 'ndrange' clause", 0);
    }

    std::string compiler_opts;
    if (CURRENT_CONFIGURATION->opencl_build_options != NULL)
    {
        compiler_opts = std::string(CURRENT_CONFIGURATION->opencl_build_options);
    }

    //Create OCL Kernel
    code_ndrange_aux << "nanos_err_t nanos_err;"
                     << "void* ompss_kernel_ocl = nanos_create_current_kernel(\""
                     <<         kernel_name << "\",\""
                     <<         filename << "\",\""
                     <<         compiler_opts << "\");";

    //Prepare setArgs
    unsigned int index_local = 0;
    TL::ObjectList<TL::Symbol> parameters_called = called_task.get_function_parameters();
    for (unsigned int i = 0; i < parameters_called.size(); ++i)
    {
        TL::Symbol unpacked_argument = called_fun_to_unpacked_fun_map.map(parameters_called[i]);

        // The attribute __global is deduced: the current argument will be __global if it has any copies
        bool is_global = false;
        if (unpacked_argument.get_type().no_ref().is_pointer()
                || unpacked_argument.get_type().no_ref().is_array())
        {
            for (TL::ObjectList<OutlineDataItem*>::const_iterator it = data_items.begin();
                    it != data_items.end() && !is_global;
                    ++it)
            {
                TL::Symbol outline_data_item_sym = (*it)->get_symbol();

                // If the outline data item has not a valid symbol, skip it
                if (!outline_data_item_sym.is_valid())
                    continue;

                // If the symbol of the current outline data item is not the
                // same as the unpacked_argument, skip it
                if(outline_data_to_unpacked_fun_map->map(outline_data_item_sym.get_internal_symbol()) != unpacked_argument)
                    continue;

                is_global = !((*it)->get_copies().empty());
            }
        }

        bool is_local = !is_global && unpacked_argument.get_type().no_ref().is_pointer();

        if (is_global)
        {
            code_ndrange_aux
                << "nanos_err = nanos_opencl_set_bufferarg("
                <<      "ompss_kernel_ocl, "
                <<      i << ", "
                <<      as_symbol(unpacked_argument) <<");";
        }
        else if (is_local)
        {
            TL::Source sizeof_arg;
            if (index_local >= new_shmem.size())
            {
                warn_printf_at(called_task.get_locus(),
                        "the size of the local symbol '%s' has not been specified in the 'shmem' clause, assuming zero\n",
                        unpacked_argument.get_name().c_str());

                sizeof_arg << "0";
            }
            else
            {
                sizeof_arg << as_expression(new_shmem[index_local]);
            }

            code_ndrange_aux << "nanos_err = nanos_opencl_set_arg("
                <<      "ompss_kernel_ocl, "
                <<      i << ", "
                <<      sizeof_arg << ", "
                <<      "0);";
            ++index_local;
        }
        else
        {
            code_ndrange_aux << "nanos_err = nanos_opencl_set_arg("
                <<      "ompss_kernel_ocl, "
                <<      i << ", "
                <<      "sizeof(" << as_type(unpacked_argument.get_type().no_ref()) << "), "
                <<      "&" << as_symbol(unpacked_argument) <<");";
        }
    }


    //Build arrays with information from ndrange clause or pointing to the ndrange pointers
    if (!dim_const)
    {
        if (IS_FORTRAN_LANGUAGE)
        {
            internal_error("The number of dimensions is non-constant. This feature is not implemented yet in Fortran.", 0);
        }

        //Prepare ndrange calc pointers and arrays
        code_ndrange_aux
            << "int num_dim = " << as_expression(new_ndrange[0]) <<";"
            << "size_t offset_tmp[num_dim];"
            << "size_t offset_arr[num_dim];"
            << "size_t local_size_arr[num_dim];"
            << "size_t global_size_arr[num_dim];"
            << "size_t* local_size_ptr;"
            << "size_t* offset_ptr;"
            << "size_t* global_size_ptr;"
            << "size_t* final_local_size_ptr;"
            << as_type(TL::Type::get_bool_type()) << " local_size_zero = 0;"
            << "int i = 0;"
            ;
        if (num_args_ndrange == 3)
        {
            code_ndrange_aux
                << "for (i = 0; i < num_dim; ++i)"
                << "{"
                <<     "offset_tmp[i] = 0;"
                << "}"
                << "offset_ptr = offset_tmp;"
                << "global_size_ptr = " << as_expression(new_ndrange[1]) << ";"
                << "local_size_ptr = " << as_expression(new_ndrange[2]) << ";"
                ;
        }
        else if (num_args_ndrange == 4)
        {
            code_ndrange_aux
                << "offset_ptr = " << as_expression(new_ndrange[1]) << ";"
                << "global_size_ptr = " << as_expression(new_ndrange[2]) << ";"
                << "local_size_ptr = " << as_expression(new_ndrange[3]) << ";"
                ;
        }
        else
        {
            WARNING_MESSAGE("Invalid number of parameters for ndrange, when number of dimensions is not const, it must be 3 or 4",0);
        }

        //Check if local_size has zeros
        code_ndrange_aux
            << "for (i = 0; i < num_dim; ++i)"
            << "{"
            <<     "if (local_size_ptr[i] == 0)"
            <<     "{"
            <<         "local_size_zero = 1;"
            <<     "}"
            << " }"
            << "if (local_size_zero)"
            << "{"
            <<     "for (i = 0; i < num_dim; ++i)"
            <<     "{"
            <<         "local_size_ptr[i] = 1;"
            <<     "}"
            << "}"
            ;

        //Now do the rounding
        if (check_dim)
        {
            code_ndrange_aux
                << "for (i = 0; i < num_dim; ++i)"
                << "{"
                <<     "offset_arr[i] = offset_ptr[i];"

                <<     "local_size_arr[i] = (global_size_ptr[i] < local_size_ptr[i]) ? "
                <<         "global_size_ptr[i] : local_size_ptr[i];"

                <<     "global_size_arr[i] = (global_size_ptr[i] < local_size_ptr[i]) ? "
                <<         "global_size_ptr[i] : global_size_ptr[i] + ("
                <<             "(global_size_ptr[i] % local_size_ptr[i] == 0) ? "
                <<                  "0 : (local_size_ptr[i] - global_size_ptr[i] % local_size_ptr[i]));"
                << "}"
                ;
        }

        if (check_dim)
        {
            code_ndrange_aux
                << "if (local_size_zero)"
                << "{"
                <<     "final_local_size_ptr = 0;"
                << "}"
                << "else"
                << "{"
                <<     "final_local_size_ptr = local_size_arr;"
                << "}"
                //Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop
                << "nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_arr, final_local_size_ptr, global_size_arr);";
            ;
        }
        else
        {
            code_ndrange_aux
                << "if (local_size_zero)"
                << "{"
                <<     "final_local_size_ptr = 0;"
                << "}"
                << "else"
                << "{"
                <<     "final_local_size_ptr = local_size_ptr;"
                << "}"
                << "nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_ptr, final_local_size_ptr, global_size_ptr);"
                ;
        }
    }
    else
    {
        int num_dim_offset = num_dim;

        //Prepare ndrange calc pointers and arrays
        code_ndrange_aux
            << "int num_dim = " << as_expression(new_ndrange[0]) <<";"
            << "size_t offset_arr[num_dim];"
            << "size_t local_size_arr[num_dim];"
            << "size_t global_size_arr[num_dim];"
            << as_type(TL::Type::get_bool_type()) << " local_size_zero;"
            << "local_size_zero = 0;"
            ;

        for (int i = 1; i <= num_dim; ++i)
        {
            if (((num_dim * 3) + 1 + !check_dim) != num_args_ndrange)
            {
                num_dim_offset = 0;
                code_ndrange_aux << "offset_arr[" << i-1 << "] = 0;";
            }
            else
            {
                code_ndrange_aux
                    << "offset_arr[" << i-1 << "] = " << as_expression(new_ndrange[i]) << ";";
            }

            code_ndrange_aux
                << "local_size_arr[" << i-1 << "] = " << as_expression(new_ndrange[num_dim + num_dim_offset + i]) << ";"
                << "if (local_size_arr[" << i - 1 << "] == 0)"
                << "{"
                <<      "local_size_zero = 1;"
                << "}"
                << "global_size_arr[" << i-1 << "] = " << as_expression(new_ndrange[num_dim_offset + i]) << ";"
                ;
        }

        //Now do the rounding
        if (check_dim)
        {
            code_ndrange_aux
                << "if (!local_size_zero)"
                << "{"
                <<      "int i;"
                <<      "for (i = 0; i < num_dim; i = i + 1)"
                <<      "{"
                <<           "if (global_size_arr[i] < local_size_arr[i])"
                <<           "{"
                <<               "local_size_arr[i] = global_size_arr[i];"
                <<           "}"
                <<           "else"
                <<           "{"
                <<               "if (global_size_arr[i] % local_size_arr[i] != 0)"
                <<               "{"
                <<                   "global_size_arr[i] = global_size_arr[i]"
                <<                       " + (local_size_arr[i] - global_size_arr[i] % local_size_arr[i]);"
                <<               "}"
                <<           "}"
                <<      "}"
                << "}"
                ;
        }

        code_ndrange_aux
            << "if (local_size_zero)"
            << "{"
            //Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop
            <<      "nanos_err = nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_arr, 0, global_size_arr);"
            << "}"
            << "else"
            << "{"
            //Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop
            <<      "nanos_err = nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_arr, local_size_arr, global_size_arr);"
            << "}"
            ;
    }

    if (IS_FORTRAN_LANGUAGE)
    {
        Source::source_language = SourceLanguage::C;

        Nodecl::NodeclBase code_ndrange_tree = code_ndrange_aux.parse_statement(unpacked_function.get_related_scope());

        Source::source_language = SourceLanguage::Current;

        code_ndrange << as_statement(code_ndrange_tree);
    }
    else
    {
        code_ndrange << code_ndrange_aux;
    }
}
예제 #15
0
    TL::Symbol new_function_symbol(
            TL::Symbol current_function,
            const std::string& name,
            TL::Type return_type,
            TL::ObjectList<std::string> parameter_names,
            TL::ObjectList<TL::Type> parameter_types)
    {
        if (IS_FORTRAN_LANGUAGE && current_function.is_nested_function())
        {
            // Get the enclosing function
            current_function = current_function.get_scope().get_related_symbol();
        }

        decl_context_t decl_context = current_function.get_scope().get_decl_context();

        ERROR_CONDITION(parameter_names.size() != parameter_types.size(), "Mismatch between names and types", 0);

        decl_context_t function_context;
        if (IS_FORTRAN_LANGUAGE)
        {
            function_context = new_program_unit_context(decl_context);
        }
        else
        {
            function_context = new_function_context(decl_context);
            function_context = new_block_context(function_context);
        }

        // Build the function type
        int num_parameters = 0;
        scope_entry_t** parameter_list = NULL;

        parameter_info_t* p_types = new parameter_info_t[parameter_types.size()];
        parameter_info_t* it_ptypes = &(p_types[0]);
        TL::ObjectList<TL::Type>::iterator type_it = parameter_types.begin();
        for (TL::ObjectList<std::string>::iterator it = parameter_names.begin();
                it != parameter_names.end();
                it++, it_ptypes++, type_it++)
        {
            scope_entry_t* param = new_symbol(function_context, function_context.current_scope, it->c_str());
            param->entity_specs.is_user_declared = 1;
            param->kind = SK_VARIABLE;
            param->locus = make_locus("", 0, 0);

            param->defined = 1;

            param->type_information = get_unqualified_type(type_it->get_internal_type());

            P_LIST_ADD(parameter_list, num_parameters, param);

            it_ptypes->is_ellipsis = 0;
            it_ptypes->nonadjusted_type_info = NULL;
            it_ptypes->type_info = get_indirect_type(param);
        }

        type_t *function_type = get_new_function_type(
                return_type.get_internal_type(),
                p_types,
                parameter_types.size());

        delete[] p_types;

        // Now, we can create the new function symbol
        scope_entry_t* new_function_sym = NULL;
        if (!current_function.get_type().is_template_specialized_type())
        {
            new_function_sym = new_symbol(decl_context, decl_context.current_scope, name.c_str());
            new_function_sym->entity_specs.is_user_declared = 1;
            new_function_sym->kind = SK_FUNCTION;
            new_function_sym->locus = make_locus("", 0, 0);
            new_function_sym->type_information = function_type;
        }
        else
        {
            scope_entry_t* new_template_sym = new_symbol(
                    decl_context, decl_context.current_scope, name.c_str());
            new_template_sym->kind = SK_TEMPLATE;
            new_template_sym->locus = make_locus("", 0, 0);

            new_template_sym->type_information = get_new_template_type(
                    decl_context.template_parameters,
                    function_type,
                    uniquestr(name.c_str()),
                    decl_context, make_locus("", 0, 0));

            template_type_set_related_symbol(new_template_sym->type_information, new_template_sym);

            // The new function is the primary template specialization
            new_function_sym = named_type_get_symbol(
                    template_type_get_primary_type(
                        new_template_sym->type_information));
        }

        function_context.function_scope->related_entry = new_function_sym;
        function_context.block_scope->related_entry = new_function_sym;

        new_function_sym->related_decl_context = function_context;

        new_function_sym->entity_specs.related_symbols = parameter_list;
        new_function_sym->entity_specs.num_related_symbols = num_parameters;
        for (int i = 0; i < new_function_sym->entity_specs.num_related_symbols; ++i)
        {
            symbol_set_as_parameter_of_function(
                    new_function_sym->entity_specs.related_symbols[i], new_function_sym, /* parameter position */ i);
        }

        // Make it static
        new_function_sym->entity_specs.is_static = 1;

        // Make it member if the enclosing function is member
        if (current_function.is_member())
        {
            new_function_sym->entity_specs.is_member = 1;
            new_function_sym->entity_specs.class_type = current_function.get_class_type().get_internal_type();

            new_function_sym->entity_specs.access = AS_PUBLIC;

            ::class_type_add_member(new_function_sym->entity_specs.class_type, new_function_sym);
        }

        if (current_function.is_inline())
            new_function_sym->entity_specs.is_inline = 1;

        // new_function_sym->entity_specs.is_defined_inside_class_specifier =
        //     current_function.get_internal_symbol()->entity_specs.is_defined_inside_class_specifier;

        if (IS_FORTRAN_LANGUAGE && current_function.is_in_module())
        {
            scope_entry_t* module_sym = current_function.in_module().get_internal_symbol();
            new_function_sym->entity_specs.in_module = module_sym;
            P_LIST_ADD(
                    module_sym->entity_specs.related_symbols,
                    module_sym->entity_specs.num_related_symbols,
                    new_function_sym);
            new_function_sym->entity_specs.is_module_procedure = 1;
        }

        return new_function_sym;
    }
예제 #16
0
    TL::Symbol LoweringVisitor::create_basic_reduction_function_fortran(OpenMP::Reduction* red, Nodecl::NodeclBase construct)
    {
        reduction_map_t::iterator it = _basic_reduction_map_openmp.find(red);
        if (it != _basic_reduction_map_openmp.end())
        {
            return it->second;
        }

        std::string fun_name;
        {
            std::stringstream ss;
            ss << "nanos_red_" << red << "_" << simple_hash_str(construct.get_filename().c_str());
            fun_name = ss.str();
        }

        Nodecl::NodeclBase function_body;
        Source src;

        src << "SUBROUTINE " << fun_name << "(omp_out, omp_in, num_scalars)\n"
            <<    "IMPLICIT NONE\n"
            <<    as_type(red->get_type()) << " :: omp_out(num_scalars)\n" 
            <<    as_type(red->get_type()) << " :: omp_in(num_scalars)\n"
            <<    "INTEGER, VALUE :: num_scalars\n"
            <<    "INTEGER :: I\n"
            <<    statement_placeholder(function_body) << "\n"
            << "END SUBROUTINE " << fun_name << "\n";
        ;

        Nodecl::NodeclBase function_code = src.parse_global(construct);

        TL::Scope inside_function = ReferenceScope(function_body).get_scope();
        TL::Symbol param_omp_in = inside_function.get_symbol_from_name("omp_in");
        ERROR_CONDITION(!param_omp_in.is_valid(), "Symbol omp_in not found", 0);
        TL::Symbol param_omp_out = inside_function.get_symbol_from_name("omp_out");
        ERROR_CONDITION(!param_omp_out.is_valid(), "Symbol omp_out not found", 0);

        TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name);
        ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str());

        TL::Symbol index = inside_function.get_symbol_from_name("i");
        ERROR_CONDITION(!index.is_valid(), "Symbol %s not found", "i");
        TL::Symbol num_scalars = inside_function.get_symbol_from_name("num_scalars");
        ERROR_CONDITION(!num_scalars.is_valid(), "Symbol %s not found", "num_scalars");

        Nodecl::NodeclBase num_scalars_ref = Nodecl::Symbol::make(num_scalars);

        num_scalars_ref.set_type(num_scalars.get_type().no_ref().get_lvalue_reference_to());

        Nodecl::Symbol nodecl_index = Nodecl::Symbol::make(index);
        nodecl_index.set_type(index.get_type().get_lvalue_reference_to());

        Nodecl::NodeclBase loop_header = Nodecl::RangeLoopControl::make(
                nodecl_index,
                const_value_to_nodecl(const_value_get_signed_int(1)),
                num_scalars_ref,
                Nodecl::NodeclBase::null());

        Nodecl::NodeclBase expanded_combiner =
            red->get_combiner().shallow_copy();
        BasicReductionExpandVisitor expander_visitor(
                red->get_omp_in(),
                param_omp_in,
                red->get_omp_out(),
                param_omp_out,
                index);
        expander_visitor.walk(expanded_combiner);

        function_body.replace(
                Nodecl::ForStatement::make(loop_header,
                    Nodecl::List::make(
                        Nodecl::ExpressionStatement::make(
                            expanded_combiner)),
                    Nodecl::NodeclBase::null()));

        _basic_reduction_map_openmp[red] = function_sym;

        if (IS_FORTRAN_LANGUAGE)
        {
            Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(),
                    function_sym.get_related_scope());
        }

        Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code);

        return function_sym;
    }
예제 #17
0
파일: tl-source.cpp 프로젝트: bsc-pm/mcxx
 std::string as_symbol(TL::Symbol s)
 {
     return symbol_to_source(s.get_internal_symbol());
 }
예제 #18
0
 void LoweringVisitor::visit(const Nodecl::ExpressionStatement& expr_stmt)
 {
     Nodecl::NodeclBase nest = expr_stmt.get_nest();
     if (IS_FORTRAN_LANGUAGE
             && nest.is<Nodecl::FunctionCall>())
     {
         Nodecl::FunctionCall function_call = nest.as<Nodecl::FunctionCall>();
         if (function_call.get_called().is<Nodecl::Symbol>())
         {
             TL::Symbol sym = function_call.get_called().as<Nodecl::Symbol>().get_symbol();
             // We are only interested in two intrinsic symbols
             if (sym.is_intrinsic())
             {
                 if(sym.get_name() == "ompss_opencl_allocate")
                 {
                     // We replace the intrinsic call by a call to a new function which:
                     //  - allocates a new temporary array with descriptor
                     //  - copies the array descriptor to the address of the array
                     //  - calls to the Nanos++ API to allocate the buffer in the shared memory
                     //  - deallocates the temporary array with descriptor
                     //
                     // Example:
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    OMPSS_OPENCL_ALLOCATE(V(10))
                     //    ...
                     //
                     // Is transformed into:
                     //
                     //    SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL(ARR, LB1, UB1)
                     //        INTEGER, ALLOCATABLE :: ARR(:)
                     //        INTEGER :: LB1, UB1, ERR
                     //        INTEGER, ALLOCATABLE :: TMP(:)
                     //
                     //        ALLOCATE(TMP(LB1:UB1))
                     //
                     //        ERR = NANOS_MEMCPY(
                     //                MERCURIUM_GET_ADDRESS_OF(ARR),
                     //                MERCURIUM_GET_ADDRESS_OF(TMP),
                     //                48)
                     //
                     //        CALL NANOS_OPENCL_ALLOCATE_FORTRAN(
                     //            SIZEOF(TMP),
                     //            MERCURIUM_GET_ADDRESS_OF(ARR))
                     //
                     //        DEALLOCATE(TMP)
                     //    END SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    CALL NANOX_OPENCL_ALLOCATE_INTERNAL(V, 1, 10)
                     //    ...
                     //
                     // For more information: https://pm.bsc.es/projects/mcxx/ticket/1994
                     handle_ompss_opencl_allocate_intrinsic(
                             function_call,
                             _declared_ocl_allocate_functions,
                             expr_stmt);
                 }
                 else if (sym.get_name() == "ompss_opencl_deallocate")
                 {
                     // The transformation applied to this intrinsic is more
                     // simple than the other one, we only need to replace
                     // the call to the intrinsic by a call to the Nanos++
                     // API:
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    ...
                     //    OMPSS_OPENCL_DEALLOCATE(V)
                     //    ...
                     //
                     // Is transformed into:
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    ...
                     //    CALL NANOS_OPENCL_ALLOCATE_FORTRAN(MERCURIUM_GET_ADDRESS_OF(V))
                     //    ...
                     handle_ompss_opencl_deallocate_intrinsic(function_call, expr_stmt);
                 }
             }
         }
     }
     walk(expr_stmt.get_nest());
 }
void DeviceOpenCL::generate_ndrange_code(
        const TL::Symbol& called_task,
        const TL::Symbol& unpacked_function,
        const TargetInformation& target_info,
        const std::string filename,
        const std::string kernel_name,
        const TL::ObjectList<OutlineDataItem*>& data_items,
        Nodecl::Utils::SimpleSymbolMap* called_fun_to_outline_data_map,
        Nodecl::Utils::SimpleSymbolMap* outline_data_to_unpacked_fun_map,
        // Out
        TL::Source& code_ndrange)
{
    if (!Nanos::Version::interface_is_at_least("opencl", 1003))
    {
        return old_generate_ndrange_code(
                called_task,
                unpacked_function,
                target_info,
                filename,
                kernel_name,
                data_items,
                called_fun_to_outline_data_map,
                outline_data_to_unpacked_fun_map,
                code_ndrange);
    }

    // The arguments of the clauses 'ndrange' and 'shmem' must be updated because
    // they are not expressed in terms of the unpacked function parameters
    TL::ObjectList<Nodecl::NodeclBase> new_ndrange, new_shmem;
    update_ndrange_and_shmem_expressions(
            unpacked_function.get_related_scope(),
            target_info,
            outline_data_to_unpacked_fun_map,
            new_ndrange,
            new_shmem);

    // Prepare mapping for the call to the kernel
    TL::Source code_ndrange_aux;
    Nodecl::Utils::SimpleSymbolMap called_fun_to_unpacked_fun_map;

    const std::map<TL::Symbol, TL::Symbol>* called_fun_to_outline_data_map_simple =
        called_fun_to_outline_data_map->get_simple_symbol_map();
    for (std::map<TL::Symbol, TL::Symbol>::const_iterator it = called_fun_to_outline_data_map_simple->begin();
            it != called_fun_to_outline_data_map_simple->end();
            it++)
    {
        TL::Symbol key = it->first;
        TL::Symbol value = outline_data_to_unpacked_fun_map->map(it->second.get_internal_symbol());
        called_fun_to_unpacked_fun_map.add_map(key, value);
    }

    // The syntax of ndrange is
    //
    //     ndrange(N, global-list [, local-list])
    //
    // Each X-list has as much as N elements

    Nodecl::NodeclBase num_dims_expr = new_ndrange[0];

    // N must be a constant
    if (!num_dims_expr.is_constant())
    {
        fatal_printf_at(num_dims_expr.get_locus(),
                "first argument in 'ndrange' clause must be constant\n");
    }

    // At this point we can remove "N" from the new_ndrange list (pop_front)
    new_ndrange.erase(new_ndrange.begin());

    // N must be between 1 and 3
    int num_dims = const_value_cast_to_signed_int(num_dims_expr.get_constant());
    if (num_dims < 1 || num_dims > 3)
    {
        fatal_printf_at(num_dims_expr.get_locus(),
                "number of dimensions for 'ndrange' clause is not 1, 2 or 3\n");
    }

    // Checking the number of remaining expressions in the new_ndrange list
    if (num_dims != (int)new_ndrange.size()
            && (num_dims * 2) != (int)new_ndrange.size())
    {
        fatal_printf_at(num_dims_expr.get_locus(),
                "a 'ndrange(%d, argument-list)' clause requires %d or %d arguments in argument-list\n",
                num_dims,
                num_dims ,
                num_dims * 2);
    }

    std::string compiler_options;
    if (CURRENT_CONFIGURATION->opencl_build_options != NULL)
    {
        compiler_options = std::string(CURRENT_CONFIGURATION->opencl_build_options);
    }

    // Create OpenCL kernel
    code_ndrange_aux << "nanos_err_t nanos_err;"
                     << "void* ompss_kernel_ocl = nanos_create_current_kernel(\""
                     <<         kernel_name << "\",\""
                     <<         filename << "\","
                     <<         "\"" << compiler_options << "\");";

    // Prepare setArgs
    TL::ObjectList<Nodecl::NodeclBase> global_list;
    TL::ObjectList<Nodecl::NodeclBase> local_list;

    unsigned int index_local = 0;
    TL::ObjectList<TL::Symbol> parameters_called = called_task.get_function_parameters();
    for (unsigned int i = 0; i < parameters_called.size(); ++i)
    {
        TL::Symbol unpacked_argument = called_fun_to_unpacked_fun_map.map(parameters_called[i]);

        // The attribute __global is deduced: the current argument will be __global if it has any copies
        bool is_global = false;
        if (unpacked_argument.get_type().no_ref().is_pointer()
                || unpacked_argument.get_type().no_ref().is_array())
        {
            for (TL::ObjectList<OutlineDataItem*>::const_iterator it = data_items.begin();
                    it != data_items.end() && !is_global;
                    ++it)
            {
                TL::Symbol outline_data_item_sym = (*it)->get_symbol();

                // If the outline data item has not a valid symbol, skip it
                if (!outline_data_item_sym.is_valid())
                    continue;

                // If the symbol of the current outline data item is not the
                // same as the unpacked_argument, skip it
                if(outline_data_to_unpacked_fun_map->map(outline_data_item_sym.get_internal_symbol()) != unpacked_argument)
                    continue;

                is_global = !((*it)->get_copies().empty());
            }
        }

        bool is_local = !is_global && unpacked_argument.get_type().no_ref().is_pointer();

        if (is_global)
        {
            code_ndrange_aux
                << "nanos_err = nanos_opencl_set_bufferarg("
                <<      "ompss_kernel_ocl, "
                <<      i << ", "
                <<      as_symbol(unpacked_argument) <<");";
        }
        else if (is_local)
        {
            TL::Source sizeof_arg;
            if (index_local >= new_shmem.size())
            {
                warn_printf_at(called_task.get_locus(),
                        "the size of the local symbol '%s' has not been specified in the 'shmem' clause, assuming zero\n",
                        unpacked_argument.get_name().c_str());

                sizeof_arg << "0";
            }
            else
            {
                sizeof_arg << as_expression(new_shmem[index_local]);
            }

            code_ndrange_aux << "nanos_err = nanos_opencl_set_arg("
                <<      "ompss_kernel_ocl, "
                <<      i << ", "
                <<      sizeof_arg << ", "
                <<      "0);";
            ++index_local;
        }
        else
        {
            code_ndrange_aux << "nanos_err = nanos_opencl_set_arg("
                <<      "ompss_kernel_ocl, "
                <<      i << ", "
                <<      "sizeof(" << as_type(unpacked_argument.get_type().no_ref()) << "), "
                <<      "&" << as_symbol(unpacked_argument) <<");";
        }
    }


    //Build arrays with information from ndrange clause or pointing to the ndrange pointers
    if (num_dims * 2 == (int)new_ndrange.size())
    {
        // ndrange(global-list, local-list)
        int i = 0;
        for (; i < num_dims; i++)
        {
            global_list.append(new_ndrange[i]);
        }
        for (; i < num_dims*2; i++)
        {
            local_list.append(new_ndrange[i]);
        }
    }
    // locals are not specified here
    else if (num_dims == (int)new_ndrange.size())
    {
        // ndrange(global-list)
        int i = 0;
        for (int k = 0; k < num_dims; k++, i++)
        {
            global_list.append(new_ndrange[i]);
        }
    }
    else
    {
        internal_error("Code unreachable", 0);
    }

    bool there_is_local_size = !local_list.empty();

    // Prepare ndrange calc pointers and arrays
    if (there_is_local_size)
    {
        code_ndrange_aux
            << "size_t local_size_arr[" << num_dims << "];"
            ;
    }
    code_ndrange_aux
        << "size_t global_size_arr[" << num_dims << "];"
        ;

    for (int i = 0; i < num_dims; i++)
    {
        if (there_is_local_size)
        {
            code_ndrange_aux
                << "local_size_arr[" << i << "] = " << as_expression(local_list[i]) << ";"
                ;
        }
        code_ndrange_aux
            << "global_size_arr[" << i << "] = " << as_expression(global_list[i]) << ";"
            ;
    }

    if (there_is_local_size)
    {
        // Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop
        code_ndrange_aux << "nanos_err = nanos_exec_kernel(ompss_kernel_ocl, " << num_dims << ", local_size_arr, global_size_arr);"
            ;
    }
    else
    {
        // Let the runtime choose the best local size
        code_ndrange_aux << "nanos_err = nanos_profile_exec_kernel(ompss_kernel_ocl, " << num_dims << ", global_size_arr);"
            ;
    }

    if (IS_FORTRAN_LANGUAGE)
    {
        Source::source_language = SourceLanguage::C;

        Nodecl::NodeclBase code_ndrange_tree = code_ndrange_aux.parse_statement(unpacked_function.get_related_scope());

        Source::source_language = SourceLanguage::Current;

        code_ndrange << as_statement(code_ndrange_tree);
    }
    else
    {
        code_ndrange << code_ndrange_aux;
    }
}
예제 #20
0
TL::Source LoopUnroll::do_unroll()
{
	if (!_for_stmt.regular_loop())
	{
		return silly_unroll();
	}
	
    // Get parts of the loop
    IdExpression induction_var = _for_stmt.get_induction_variable();
    Expression lower_bound = _for_stmt.get_lower_bound();
    Expression upper_bound = _for_stmt.get_upper_bound();
    Expression step = _for_stmt.get_step();
    TL::Source operator_bound = _for_stmt.get_bound_operator();

    Statement loop_body = _for_stmt.get_loop_body();

    TL::Source result, epilogue, 
        main, induction_var_decl,
        before_main, after_main;

    std::stringstream ss;
    ss << _factor;

    result
        << "{"
        << induction_var_decl
        << before_main
        << main
        << after_main
        << epilogue
        << "}"
        ;

	Source replicated_body;
	Source epilogue_body;
	if (_factor > 1)
	{
		AST_t init = _for_stmt.get_iterating_init();
		if (Declaration::predicate(init))
		{
			TL::Symbol sym = induction_var.get_symbol();
			TL::Type type = sym.get_type();
			// Declare it since it will have local scope
			induction_var_decl
				<< type.get_declaration(sym.get_scope(), sym.get_name()) << ";"
				;
		}

		main
			<< "for (" << induction_var << " = " << lower_bound << ";"
			<< induction_var << operator_bound << "((" << upper_bound << ") - (" << _factor << " - 1)* (" << step << "));"
			<< induction_var << "+= (" << step << ") * " << _factor << ")"
			<< "{"
			<< replicated_body
			<< "}"
			;

		// FIXME - It could help to initialize here another variable and make both loops independent
		epilogue
			<< "for ( ; "  // No initialization, keep using the old induction var
			<< induction_var << operator_bound << upper_bound << ";"
			<< induction_var << "+= (" << step << "))"
			<< epilogue_body
			;

		if (!_remove_tasks)
		{
			epilogue_body << loop_body;
		}
		else
		{
			std::cerr << "Do not create task " << __FILE__ << ":" << __LINE__ << std::endl;
            running_error("Path not supported yet", 0);
			// epilogue_body << loop_body.get_ast().prettyprint_with_callback(functor(ignore_tasks));
		}
	}
	else
	{
		// Leave it as is
		main << "for(" << _for_stmt.get_iterating_init().prettyprint()
			<< _for_stmt.get_iterating_condition() << ";"
			<< _for_stmt.get_iterating_expression() << ")"
			<< "{"
			<< replicated_body
			<< "}"
			;
	}

    // Replicate the body
    bool consider_omp = false;

    if (TaskAggregation::contains_relevant_openmp(loop_body))
    {
        consider_omp = true;
    }

    if (_ignore_omp || !consider_omp)
    {
        simple_replication(_factor, replicated_body, epilogue_body,
                induction_var, loop_body);
    }
    else
    {
        omp_replication(_factor, replicated_body, epilogue_body,
                induction_var, loop_body, before_main, after_main);
    }

    return result;
}
    bool LoweringVisitor::handle_reductions_on_task(
            Nodecl::NodeclBase construct,
            OutlineInfo& outline_info,
            Nodecl::NodeclBase statements,
            bool generate_final_stmts,
            Nodecl::NodeclBase& final_statements)
    {
        int num_reductions = 0;

        TL::Source
            reductions_stuff,
            final_clause_stuff,
            // This source represents an expression which is used to check if
            // we can do an optimization in the final code. This optimization
            // consists on calling the original code (with a serial closure) if
            // we are in a final context and the reduction variables that we
            // are using have not been registered previously
            final_clause_opt_expr,
            extra_array_red_memcpy;

        std::map<TL::Symbol, std::string> reduction_symbols_map;

        TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items();
        for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin();
                it != data_items.end();
                it++)
        {
           if (!(*it)->is_reduction())
              continue;

            std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info();
            TL::OpenMP::Reduction* reduction_info = red_info_pair.first;
            TL::Type reduction_type = red_info_pair.second.no_ref();

            TL::Symbol reduction_item = (*it)->get_symbol();
            TL::Type reduction_item_type = reduction_item.get_type().no_ref();

            std::string storage_var_name = (*it)->get_field_name() + "_storage";
            TL::Type storage_var_type = reduction_type.get_pointer_to();


            TL::Symbol reduction_function, reduction_function_original_var, initializer_function;

            // Checking if the current reduction type has been treated before
            // Note that if that happens we can reuse the combiner and
            // initializer function.
            //
            // C/C++: note that if the type of the list item is an array type,
            // we regiter the reduction over its element type
            TL::Type registered_reduction_type = reduction_type;
            while (!IS_FORTRAN_LANGUAGE
                    && registered_reduction_type.is_array())
            {
                registered_reduction_type = registered_reduction_type.array_element();
            }

            LoweringVisitor::reduction_task_map_t::iterator task_red_info =
               _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type));

            if (task_red_info != _task_reductions_map.end())
            {
              reduction_function = task_red_info->second._reducer;
              reduction_function_original_var = task_red_info->second._reducer_orig_var;
              initializer_function = task_red_info->second._initializer;
            }
            else
            {
               create_reduction_functions(reduction_info,
                     construct,
                     registered_reduction_type,
                     reduction_item,
                     reduction_function,
                     reduction_function_original_var);

               create_initializer_function(reduction_info,
                     construct,
                     registered_reduction_type,
                     initializer_function);

               _task_reductions_map.insert(
                       std::make_pair(
                           std::make_pair(reduction_info, registered_reduction_type),
                           TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function)
                           ));
            }

            // Mandatory TL::Sources to be filled by any reduction
            TL::Source
                orig_address, // address of the original reduction variable
                storage_var; // variable which holds the address of the storage

            // Specific TL::Sources to be filled only by Fortran array reduction
            TL::Source extra_array_red_decl;

            if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
            {
                storage_var << storage_var_name;
                orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name();

                final_clause_stuff
                    << "if (" << storage_var_name << " == 0)"
                    << "{"
                    <<     storage_var_name  << " = "
                    <<        "(" << as_type(storage_var_type) << ")" << orig_address << ";"
                    << "}"
                    ;
            }
            else
            {
               orig_address <<  "&" << (*it)->get_field_name();
                if (reduction_item_type.is_array())
                {
                    size_t size_of_array_descriptor =
                        fortran_size_of_array_descriptor(
                                fortran_get_rank0_type(reduction_item_type.get_internal_type()),
                                fortran_get_rank_of_type(reduction_item_type.get_internal_type()));


                    storage_var << storage_var_name << "_indirect";
                    extra_array_red_decl << "void *" << storage_var << ";";

                    extra_array_red_memcpy
                        << "nanos_err = nanos_memcpy("
                        <<      "(void **) &" << storage_var_name << ","
                        <<      storage_var << ","
                        <<      size_of_array_descriptor << ");"
                            ;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     "nanos_err = nanos_memcpy("
                        <<         "(void **) &" << storage_var_name << ","
                        <<         "(void *) "<< orig_address << ","
                        <<         size_of_array_descriptor << ");"
                        << "}"
                        << "else"
                        << "{"
                        <<     extra_array_red_memcpy
                        << "}"
                        ;
                }
                else
                {
                    // We need to convert a void* type into a pointer to the reduction type.
                    // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this
                    // conversion directly in the FORTRAN source. For this reason we introduce
                    // a new function that will be defined in a C file.
                    TL::Symbol func = TL::Nanox::get_function_ptr_conversion(
                            // Destination
                            reduction_item_type.get_pointer_to(),
                            // Origin
                            TL::Type::get_void_type().get_pointer_to(),
                            construct.retrieve_context());

                    storage_var << storage_var_name;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     storage_var_name << " = " << func.get_name() << "(" <<  orig_address << ");"
                        << "}"
                        ;
                }
            }

            if (num_reductions > 0)
                final_clause_opt_expr << " && ";
            final_clause_opt_expr << storage_var << " == 0 ";
            num_reductions++;

            reductions_stuff
                << extra_array_red_decl
                << as_type(storage_var_type) << " " << storage_var_name << ";"
                << "nanos_err = nanos_task_reduction_get_thread_storage("
                <<         "(void *)" << orig_address  << ","
                <<         "(void **) &" << storage_var << ");"
                ;

            reduction_symbols_map[reduction_item] = storage_var_name;
        }

        if (num_reductions != 0)
        {
            // Generating the final code if needed
            if (generate_final_stmts)
            {
                std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct);
                ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0);

                Nodecl::NodeclBase placeholder;
                TL::Source new_statements_src;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      "if (" << final_clause_opt_expr  << ")"
                    <<      "{"
                    <<          as_statement(it4->second)
                    <<      "}"
                    <<      "else"
                    <<      "{"
                    <<          final_clause_stuff
                    <<          statement_placeholder(placeholder)
                    <<      "}"
                    << "}"
                    ;

                final_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
            }

            // Generating the task code
            {
                TL::Source new_statements_src;
                Nodecl::NodeclBase placeholder;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      extra_array_red_memcpy
                    <<      statement_placeholder(placeholder)
                    << "}"
                    ;

                Nodecl::NodeclBase new_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
                statements.replace(new_statements);
            }
        }

        ERROR_CONDITION(num_reductions != 0 &&
                !Nanos::Version::interface_is_at_least("task_reduction", 1001),
                "The version of the runtime begin used does not support task reductions", 0);

        return (num_reductions != 0);
    }
예제 #22
0
 static bool check_symbol(TL::Symbol s)
 {
     return CheckIfInCudaCompiler::check(s.get_filename());
 }