예제 #1
0
        void SimdVisitor::visit(const Nodecl::OpenMP::SimdFunction& simd_node)
        {
            Nodecl::FunctionCode function_code = simd_node.get_statement()
                .as<Nodecl::FunctionCode>();
            
            // Remove SimdFunction node
            simd_node.replace(function_code);

            TL::Symbol sym = function_code.get_symbol();

            Nodecl::FunctionCode vectorized_func_code = 
                Nodecl::Utils::deep_copy(function_code, function_code).as<Nodecl::FunctionCode>();

            // Vectorize function
            _vectorizer.vectorize(vectorized_func_code, 
                    _device_name, _vector_length, NULL); 

            // Set new name
            std::stringstream vectorized_func_name; 
            
            vectorized_func_name <<"__" 
                << sym.get_name() 
                << "_" 
                << _device_name 
                << "_" 
                << _vector_length;

            vectorized_func_code.get_symbol().set_name(vectorized_func_name.str());

            // Add SIMD version to vector function versioning
            _vectorizer.add_vector_function_version(sym.get_name(), vectorized_func_code, 
                    _device_name, _vector_length, NULL, TL::Vectorization::SIMD_FUNC_PRIORITY);

            // Append vectorized function code to scalar function
            simd_node.append_sibling(vectorized_func_code);
        }
예제 #2
0
    TL::Symbol new_function_symbol(TL::Symbol function)
    {
        TL::ObjectList<TL::Type> parameter_types = function.get_type().parameters();

        TL::ObjectList<std::string> parameter_names;
        TL::ObjectList<TL::Symbol> function_related_symbols = function.get_related_symbols();
        for (TL::ObjectList<TL::Symbol>::iterator it = function_related_symbols.begin();
                it != function_related_symbols.end();
                it++)
        {
            parameter_names.append(it->get_name());
        }

        TL::Symbol new_function = SymbolUtils::new_function_symbol(
                function,
                function.get_name(),
                function.get_type().returns(),
                parameter_names,
                parameter_types);

        return new_function;
    }
예제 #3
0
void DeviceFPGA::copy_stuff_to_device_file(
        const TL::ObjectList<Nodecl::NodeclBase>& stuff_to_be_copied)
{
    for (TL::ObjectList<Nodecl::NodeclBase>::const_iterator it = stuff_to_be_copied.begin();
            it != stuff_to_be_copied.end();
            ++it)
    {
        if (it->is<Nodecl::FunctionCode>()
                || it->is<Nodecl::TemplateFunctionCode>())
        {
            TL::Symbol function = it->get_symbol();
            TL::Symbol new_function = SymbolUtils::new_function_symbol(function, function.get_name() + "_hls");

            Nodecl::Utils::SimpleSymbolMap symbol_map;
            symbol_map.add_map(function, new_function);
            _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it, symbol_map));
        }
        else
        {
            _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it));
        }
    }
}
    bool LoweringVisitor::handle_reductions_on_task(
            Nodecl::NodeclBase construct,
            OutlineInfo& outline_info,
            Nodecl::NodeclBase statements,
            bool generate_final_stmts,
            Nodecl::NodeclBase& final_statements)
    {
        int num_reductions = 0;

        TL::Source
            reductions_stuff,
            final_clause_stuff,
            // This source represents an expression which is used to check if
            // we can do an optimization in the final code. This optimization
            // consists on calling the original code (with a serial closure) if
            // we are in a final context and the reduction variables that we
            // are using have not been registered previously
            final_clause_opt_expr,
            extra_array_red_memcpy;

        std::map<TL::Symbol, std::string> reduction_symbols_map;

        TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items();
        for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin();
                it != data_items.end();
                it++)
        {
           if (!(*it)->is_reduction())
              continue;

            std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info();
            TL::OpenMP::Reduction* reduction_info = red_info_pair.first;
            TL::Type reduction_type = red_info_pair.second.no_ref();

            TL::Symbol reduction_item = (*it)->get_symbol();
            TL::Type reduction_item_type = reduction_item.get_type().no_ref();

            std::string storage_var_name = (*it)->get_field_name() + "_storage";
            TL::Type storage_var_type = reduction_type.get_pointer_to();


            TL::Symbol reduction_function, reduction_function_original_var, initializer_function;

            // Checking if the current reduction type has been treated before
            // Note that if that happens we can reuse the combiner and
            // initializer function.
            //
            // C/C++: note that if the type of the list item is an array type,
            // we regiter the reduction over its element type
            TL::Type registered_reduction_type = reduction_type;
            while (!IS_FORTRAN_LANGUAGE
                    && registered_reduction_type.is_array())
            {
                registered_reduction_type = registered_reduction_type.array_element();
            }

            LoweringVisitor::reduction_task_map_t::iterator task_red_info =
               _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type));

            if (task_red_info != _task_reductions_map.end())
            {
              reduction_function = task_red_info->second._reducer;
              reduction_function_original_var = task_red_info->second._reducer_orig_var;
              initializer_function = task_red_info->second._initializer;
            }
            else
            {
               create_reduction_functions(reduction_info,
                     construct,
                     registered_reduction_type,
                     reduction_item,
                     reduction_function,
                     reduction_function_original_var);

               create_initializer_function(reduction_info,
                     construct,
                     registered_reduction_type,
                     initializer_function);

               _task_reductions_map.insert(
                       std::make_pair(
                           std::make_pair(reduction_info, registered_reduction_type),
                           TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function)
                           ));
            }

            // Mandatory TL::Sources to be filled by any reduction
            TL::Source
                orig_address, // address of the original reduction variable
                storage_var; // variable which holds the address of the storage

            // Specific TL::Sources to be filled only by Fortran array reduction
            TL::Source extra_array_red_decl;

            if (IS_C_LANGUAGE || IS_CXX_LANGUAGE)
            {
                storage_var << storage_var_name;
                orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name();

                final_clause_stuff
                    << "if (" << storage_var_name << " == 0)"
                    << "{"
                    <<     storage_var_name  << " = "
                    <<        "(" << as_type(storage_var_type) << ")" << orig_address << ";"
                    << "}"
                    ;
            }
            else
            {
               orig_address <<  "&" << (*it)->get_field_name();
                if (reduction_item_type.is_array())
                {
                    size_t size_of_array_descriptor =
                        fortran_size_of_array_descriptor(
                                fortran_get_rank0_type(reduction_item_type.get_internal_type()),
                                fortran_get_rank_of_type(reduction_item_type.get_internal_type()));


                    storage_var << storage_var_name << "_indirect";
                    extra_array_red_decl << "void *" << storage_var << ";";

                    extra_array_red_memcpy
                        << "nanos_err = nanos_memcpy("
                        <<      "(void **) &" << storage_var_name << ","
                        <<      storage_var << ","
                        <<      size_of_array_descriptor << ");"
                            ;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     "nanos_err = nanos_memcpy("
                        <<         "(void **) &" << storage_var_name << ","
                        <<         "(void *) "<< orig_address << ","
                        <<         size_of_array_descriptor << ");"
                        << "}"
                        << "else"
                        << "{"
                        <<     extra_array_red_memcpy
                        << "}"
                        ;
                }
                else
                {
                    // We need to convert a void* type into a pointer to the reduction type.
                    // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this
                    // conversion directly in the FORTRAN source. For this reason we introduce
                    // a new function that will be defined in a C file.
                    TL::Symbol func = TL::Nanox::get_function_ptr_conversion(
                            // Destination
                            reduction_item_type.get_pointer_to(),
                            // Origin
                            TL::Type::get_void_type().get_pointer_to(),
                            construct.retrieve_context());

                    storage_var << storage_var_name;

                    final_clause_stuff
                        << "if (" << storage_var << " == 0)"
                        << "{"
                        <<     storage_var_name << " = " << func.get_name() << "(" <<  orig_address << ");"
                        << "}"
                        ;
                }
            }

            if (num_reductions > 0)
                final_clause_opt_expr << " && ";
            final_clause_opt_expr << storage_var << " == 0 ";
            num_reductions++;

            reductions_stuff
                << extra_array_red_decl
                << as_type(storage_var_type) << " " << storage_var_name << ";"
                << "nanos_err = nanos_task_reduction_get_thread_storage("
                <<         "(void *)" << orig_address  << ","
                <<         "(void **) &" << storage_var << ");"
                ;

            reduction_symbols_map[reduction_item] = storage_var_name;
        }

        if (num_reductions != 0)
        {
            // Generating the final code if needed
            if (generate_final_stmts)
            {
                std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct);
                ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0);

                Nodecl::NodeclBase placeholder;
                TL::Source new_statements_src;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      "if (" << final_clause_opt_expr  << ")"
                    <<      "{"
                    <<          as_statement(it4->second)
                    <<      "}"
                    <<      "else"
                    <<      "{"
                    <<          final_clause_stuff
                    <<          statement_placeholder(placeholder)
                    <<      "}"
                    << "}"
                    ;

                final_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
            }

            // Generating the task code
            {
                TL::Source new_statements_src;
                Nodecl::NodeclBase placeholder;
                new_statements_src
                    << "{"
                    <<      "nanos_err_t nanos_err;"
                    <<      reductions_stuff
                    <<      extra_array_red_memcpy
                    <<      statement_placeholder(placeholder)
                    << "}"
                    ;

                Nodecl::NodeclBase new_statements = handle_task_statements(
                      construct, statements, placeholder, new_statements_src, reduction_symbols_map);
                statements.replace(new_statements);
            }
        }

        ERROR_CONDITION(num_reductions != 0 &&
                !Nanos::Version::interface_is_at_least("task_reduction", 1001),
                "The version of the runtime begin used does not support task reductions", 0);

        return (num_reductions != 0);
    }
예제 #5
0
TL::Source LoopBlocking::do_blocking()
{
    Source result, block_loops;

    result
        << block_loops
        ;

    ObjectList<ForStatement> nest_loops = _for_nest_info.get_nest_list();

    _nesting = std::min(_nest_factors.size(), nest_loops.size());

    TL::Source *current_innermost_part = &block_loops;
    // For every loop declare its block loop variable and the inter-block loop
    ObjectList<TL::Expression>::iterator current_factor = _nest_factors.begin();
    ObjectList<TL::ForStatement>::iterator current_for = nest_loops.begin();
    for (int current_nest = 0;
            current_nest < _nesting;
            current_nest++, current_for++, current_factor++)
    {
        TL::IdExpression induction_var = current_for->get_induction_variable();
        TL::Symbol sym = induction_var.get_symbol();
        TL::Type type = sym.get_type();

        std::string var = "_blk_" + sym.get_name();

        TL::Source *new_innermost_part = new TL::Source();
        (*current_innermost_part)
            << "for(" << type.get_declaration(sym.get_scope(), var) << " = " << current_for->get_lower_bound() << ";"
                      << var << current_for->get_bound_operator() << current_for->get_upper_bound() << ";"
                      << var << "+= ( " << current_for->get_step() << ") * " << current_factor->prettyprint() << ")" 
            << (*new_innermost_part)
            ;

        current_innermost_part = new_innermost_part;
    }

    // Now for every loop, declare the intra-loop
    current_factor = _nest_factors.begin();
    current_for = nest_loops.begin();
    for (int current_nest = 0;
            current_nest < _nesting;
            current_nest++, current_for++, current_factor++)
    {
        TL::IdExpression induction_var = current_for->get_induction_variable();
        TL::Symbol sym = induction_var.get_symbol();
        TL::Type type = sym.get_type();

        std::string var = induction_var.prettyprint();
        std::string init_var = var;
        // If the loop declares the iterator in the for statement
        // declare it again
        AST_t loop_init = current_for->get_iterating_init();
        if (Declaration::predicate(loop_init))
        {
            // Fix init_var to be a declaration
            init_var = type.get_declaration(sym.get_scope(), var);
        }

        std::string blk_var = "_blk_" + sym.get_name();

        TL::Source min_code;

        TL::Source *new_innermost_part = new TL::Source();
        (*current_innermost_part)
            << "for(" << init_var << " = " << blk_var << ";"
                      << var << current_for->get_bound_operator() << min_code  << ";"
                      << var << "+= ( " << current_for->get_step() << "))" 
            << (*new_innermost_part)
            ;

        TL::Source a, b;
        min_code
            << "((" << a << ") < (" << b << ") ? (" << a << ") : (" << b << "))"
            ;

        a << blk_var << " + (" << current_for->get_step() << ") * (" << current_factor->prettyprint() << " - 1 )";
        b << current_for->get_upper_bound();

        current_innermost_part = new_innermost_part;
    }

    // And now the innermost loop
    (*current_innermost_part)
        << nest_loops[_nesting - 1].get_loop_body()
        ;

    return result;
}
TL::Source LoopUnroll::do_unroll()
{
	if (!_for_stmt.regular_loop())
	{
		return silly_unroll();
	}
	
    // Get parts of the loop
    IdExpression induction_var = _for_stmt.get_induction_variable();
    Expression lower_bound = _for_stmt.get_lower_bound();
    Expression upper_bound = _for_stmt.get_upper_bound();
    Expression step = _for_stmt.get_step();
    TL::Source operator_bound = _for_stmt.get_bound_operator();

    Statement loop_body = _for_stmt.get_loop_body();

    TL::Source result, epilogue, 
        main, induction_var_decl,
        before_main, after_main;

    std::stringstream ss;
    ss << _factor;

    result
        << "{"
        << induction_var_decl
        << before_main
        << main
        << after_main
        << epilogue
        << "}"
        ;

	Source replicated_body;
	Source epilogue_body;
	if (_factor > 1)
	{
		AST_t init = _for_stmt.get_iterating_init();
		if (Declaration::predicate(init))
		{
			TL::Symbol sym = induction_var.get_symbol();
			TL::Type type = sym.get_type();
			// Declare it since it will have local scope
			induction_var_decl
				<< type.get_declaration(sym.get_scope(), sym.get_name()) << ";"
				;
		}

		main
			<< "for (" << induction_var << " = " << lower_bound << ";"
			<< induction_var << operator_bound << "((" << upper_bound << ") - (" << _factor << " - 1)* (" << step << "));"
			<< induction_var << "+= (" << step << ") * " << _factor << ")"
			<< "{"
			<< replicated_body
			<< "}"
			;

		// FIXME - It could help to initialize here another variable and make both loops independent
		epilogue
			<< "for ( ; "  // No initialization, keep using the old induction var
			<< induction_var << operator_bound << upper_bound << ";"
			<< induction_var << "+= (" << step << "))"
			<< epilogue_body
			;

		if (!_remove_tasks)
		{
			epilogue_body << loop_body;
		}
		else
		{
			std::cerr << "Do not create task " << __FILE__ << ":" << __LINE__ << std::endl;
            running_error("Path not supported yet", 0);
			// epilogue_body << loop_body.get_ast().prettyprint_with_callback(functor(ignore_tasks));
		}
	}
	else
	{
		// Leave it as is
		main << "for(" << _for_stmt.get_iterating_init().prettyprint()
			<< _for_stmt.get_iterating_condition() << ";"
			<< _for_stmt.get_iterating_expression() << ")"
			<< "{"
			<< replicated_body
			<< "}"
			;
	}

    // Replicate the body
    bool consider_omp = false;

    if (TaskAggregation::contains_relevant_openmp(loop_body))
    {
        consider_omp = true;
    }

    if (_ignore_omp || !consider_omp)
    {
        simple_replication(_factor, replicated_body, epilogue_body,
                induction_var, loop_body);
    }
    else
    {
        omp_replication(_factor, replicated_body, epilogue_body,
                induction_var, loop_body, before_main, after_main);
    }

    return result;
}
예제 #7
0
 void LoweringVisitor::visit(const Nodecl::ExpressionStatement& expr_stmt)
 {
     Nodecl::NodeclBase nest = expr_stmt.get_nest();
     if (IS_FORTRAN_LANGUAGE
             && nest.is<Nodecl::FunctionCall>())
     {
         Nodecl::FunctionCall function_call = nest.as<Nodecl::FunctionCall>();
         if (function_call.get_called().is<Nodecl::Symbol>())
         {
             TL::Symbol sym = function_call.get_called().as<Nodecl::Symbol>().get_symbol();
             // We are only interested in two intrinsic symbols
             if (sym.is_intrinsic())
             {
                 if(sym.get_name() == "ompss_opencl_allocate")
                 {
                     // We replace the intrinsic call by a call to a new function which:
                     //  - allocates a new temporary array with descriptor
                     //  - copies the array descriptor to the address of the array
                     //  - calls to the Nanos++ API to allocate the buffer in the shared memory
                     //  - deallocates the temporary array with descriptor
                     //
                     // Example:
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    OMPSS_OPENCL_ALLOCATE(V(10))
                     //    ...
                     //
                     // Is transformed into:
                     //
                     //    SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL(ARR, LB1, UB1)
                     //        INTEGER, ALLOCATABLE :: ARR(:)
                     //        INTEGER :: LB1, UB1, ERR
                     //        INTEGER, ALLOCATABLE :: TMP(:)
                     //
                     //        ALLOCATE(TMP(LB1:UB1))
                     //
                     //        ERR = NANOS_MEMCPY(
                     //                MERCURIUM_GET_ADDRESS_OF(ARR),
                     //                MERCURIUM_GET_ADDRESS_OF(TMP),
                     //                48)
                     //
                     //        CALL NANOS_OPENCL_ALLOCATE_FORTRAN(
                     //            SIZEOF(TMP),
                     //            MERCURIUM_GET_ADDRESS_OF(ARR))
                     //
                     //        DEALLOCATE(TMP)
                     //    END SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    CALL NANOX_OPENCL_ALLOCATE_INTERNAL(V, 1, 10)
                     //    ...
                     //
                     // For more information: https://pm.bsc.es/projects/mcxx/ticket/1994
                     handle_ompss_opencl_allocate_intrinsic(
                             function_call,
                             _declared_ocl_allocate_functions,
                             expr_stmt);
                 }
                 else if (sym.get_name() == "ompss_opencl_deallocate")
                 {
                     // The transformation applied to this intrinsic is more
                     // simple than the other one, we only need to replace
                     // the call to the intrinsic by a call to the Nanos++
                     // API:
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    ...
                     //    OMPSS_OPENCL_DEALLOCATE(V)
                     //    ...
                     //
                     // Is transformed into:
                     //
                     //    ...
                     //    INTEGER, ALLOCATABLE :: V(:)
                     //    ...
                     //    CALL NANOS_OPENCL_ALLOCATE_FORTRAN(MERCURIUM_GET_ADDRESS_OF(V))
                     //    ...
                     handle_ompss_opencl_deallocate_intrinsic(function_call, expr_stmt);
                 }
             }
         }
     }
     walk(expr_stmt.get_nest());
 }