void SimdVisitor::visit(const Nodecl::OpenMP::SimdFunction& simd_node) { Nodecl::FunctionCode function_code = simd_node.get_statement() .as<Nodecl::FunctionCode>(); // Remove SimdFunction node simd_node.replace(function_code); TL::Symbol sym = function_code.get_symbol(); Nodecl::FunctionCode vectorized_func_code = Nodecl::Utils::deep_copy(function_code, function_code).as<Nodecl::FunctionCode>(); // Vectorize function _vectorizer.vectorize(vectorized_func_code, _device_name, _vector_length, NULL); // Set new name std::stringstream vectorized_func_name; vectorized_func_name <<"__" << sym.get_name() << "_" << _device_name << "_" << _vector_length; vectorized_func_code.get_symbol().set_name(vectorized_func_name.str()); // Add SIMD version to vector function versioning _vectorizer.add_vector_function_version(sym.get_name(), vectorized_func_code, _device_name, _vector_length, NULL, TL::Vectorization::SIMD_FUNC_PRIORITY); // Append vectorized function code to scalar function simd_node.append_sibling(vectorized_func_code); }
TL::Symbol new_function_symbol(TL::Symbol function) { TL::ObjectList<TL::Type> parameter_types = function.get_type().parameters(); TL::ObjectList<std::string> parameter_names; TL::ObjectList<TL::Symbol> function_related_symbols = function.get_related_symbols(); for (TL::ObjectList<TL::Symbol>::iterator it = function_related_symbols.begin(); it != function_related_symbols.end(); it++) { parameter_names.append(it->get_name()); } TL::Symbol new_function = SymbolUtils::new_function_symbol( function, function.get_name(), function.get_type().returns(), parameter_names, parameter_types); return new_function; }
void DeviceFPGA::copy_stuff_to_device_file( const TL::ObjectList<Nodecl::NodeclBase>& stuff_to_be_copied) { for (TL::ObjectList<Nodecl::NodeclBase>::const_iterator it = stuff_to_be_copied.begin(); it != stuff_to_be_copied.end(); ++it) { if (it->is<Nodecl::FunctionCode>() || it->is<Nodecl::TemplateFunctionCode>()) { TL::Symbol function = it->get_symbol(); TL::Symbol new_function = SymbolUtils::new_function_symbol(function, function.get_name() + "_hls"); Nodecl::Utils::SimpleSymbolMap symbol_map; symbol_map.add_map(function, new_function); _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it, symbol_map)); } else { _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it)); } } }
bool LoweringVisitor::handle_reductions_on_task( Nodecl::NodeclBase construct, OutlineInfo& outline_info, Nodecl::NodeclBase statements, bool generate_final_stmts, Nodecl::NodeclBase& final_statements) { int num_reductions = 0; TL::Source reductions_stuff, final_clause_stuff, // This source represents an expression which is used to check if // we can do an optimization in the final code. This optimization // consists on calling the original code (with a serial closure) if // we are in a final context and the reduction variables that we // are using have not been registered previously final_clause_opt_expr, extra_array_red_memcpy; std::map<TL::Symbol, std::string> reduction_symbols_map; TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items(); for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin(); it != data_items.end(); it++) { if (!(*it)->is_reduction()) continue; std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info(); TL::OpenMP::Reduction* reduction_info = red_info_pair.first; TL::Type reduction_type = red_info_pair.second.no_ref(); TL::Symbol reduction_item = (*it)->get_symbol(); TL::Type reduction_item_type = reduction_item.get_type().no_ref(); std::string storage_var_name = (*it)->get_field_name() + "_storage"; TL::Type storage_var_type = reduction_type.get_pointer_to(); TL::Symbol reduction_function, reduction_function_original_var, initializer_function; // Checking if the current reduction type has been treated before // Note that if that happens we can reuse the combiner and // initializer function. // // C/C++: note that if the type of the list item is an array type, // we regiter the reduction over its element type TL::Type registered_reduction_type = reduction_type; while (!IS_FORTRAN_LANGUAGE && registered_reduction_type.is_array()) { registered_reduction_type = registered_reduction_type.array_element(); } LoweringVisitor::reduction_task_map_t::iterator task_red_info = _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type)); if (task_red_info != _task_reductions_map.end()) { reduction_function = task_red_info->second._reducer; reduction_function_original_var = task_red_info->second._reducer_orig_var; initializer_function = task_red_info->second._initializer; } else { create_reduction_functions(reduction_info, construct, registered_reduction_type, reduction_item, reduction_function, reduction_function_original_var); create_initializer_function(reduction_info, construct, registered_reduction_type, initializer_function); _task_reductions_map.insert( std::make_pair( std::make_pair(reduction_info, registered_reduction_type), TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function) )); } // Mandatory TL::Sources to be filled by any reduction TL::Source orig_address, // address of the original reduction variable storage_var; // variable which holds the address of the storage // Specific TL::Sources to be filled only by Fortran array reduction TL::Source extra_array_red_decl; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { storage_var << storage_var_name; orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name(); final_clause_stuff << "if (" << storage_var_name << " == 0)" << "{" << storage_var_name << " = " << "(" << as_type(storage_var_type) << ")" << orig_address << ";" << "}" ; } else { orig_address << "&" << (*it)->get_field_name(); if (reduction_item_type.is_array()) { size_t size_of_array_descriptor = fortran_size_of_array_descriptor( fortran_get_rank0_type(reduction_item_type.get_internal_type()), fortran_get_rank_of_type(reduction_item_type.get_internal_type())); storage_var << storage_var_name << "_indirect"; extra_array_red_decl << "void *" << storage_var << ";"; extra_array_red_memcpy << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << storage_var << "," << size_of_array_descriptor << ");" ; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << "(void *) "<< orig_address << "," << size_of_array_descriptor << ");" << "}" << "else" << "{" << extra_array_red_memcpy << "}" ; } else { // We need to convert a void* type into a pointer to the reduction type. // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this // conversion directly in the FORTRAN source. For this reason we introduce // a new function that will be defined in a C file. TL::Symbol func = TL::Nanox::get_function_ptr_conversion( // Destination reduction_item_type.get_pointer_to(), // Origin TL::Type::get_void_type().get_pointer_to(), construct.retrieve_context()); storage_var << storage_var_name; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << storage_var_name << " = " << func.get_name() << "(" << orig_address << ");" << "}" ; } } if (num_reductions > 0) final_clause_opt_expr << " && "; final_clause_opt_expr << storage_var << " == 0 "; num_reductions++; reductions_stuff << extra_array_red_decl << as_type(storage_var_type) << " " << storage_var_name << ";" << "nanos_err = nanos_task_reduction_get_thread_storage(" << "(void *)" << orig_address << "," << "(void **) &" << storage_var << ");" ; reduction_symbols_map[reduction_item] = storage_var_name; } if (num_reductions != 0) { // Generating the final code if needed if (generate_final_stmts) { std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct); ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0); Nodecl::NodeclBase placeholder; TL::Source new_statements_src; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << "if (" << final_clause_opt_expr << ")" << "{" << as_statement(it4->second) << "}" << "else" << "{" << final_clause_stuff << statement_placeholder(placeholder) << "}" << "}" ; final_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); } // Generating the task code { TL::Source new_statements_src; Nodecl::NodeclBase placeholder; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << extra_array_red_memcpy << statement_placeholder(placeholder) << "}" ; Nodecl::NodeclBase new_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); statements.replace(new_statements); } } ERROR_CONDITION(num_reductions != 0 && !Nanos::Version::interface_is_at_least("task_reduction", 1001), "The version of the runtime begin used does not support task reductions", 0); return (num_reductions != 0); }
TL::Source LoopBlocking::do_blocking() { Source result, block_loops; result << block_loops ; ObjectList<ForStatement> nest_loops = _for_nest_info.get_nest_list(); _nesting = std::min(_nest_factors.size(), nest_loops.size()); TL::Source *current_innermost_part = &block_loops; // For every loop declare its block loop variable and the inter-block loop ObjectList<TL::Expression>::iterator current_factor = _nest_factors.begin(); ObjectList<TL::ForStatement>::iterator current_for = nest_loops.begin(); for (int current_nest = 0; current_nest < _nesting; current_nest++, current_for++, current_factor++) { TL::IdExpression induction_var = current_for->get_induction_variable(); TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); std::string var = "_blk_" + sym.get_name(); TL::Source *new_innermost_part = new TL::Source(); (*current_innermost_part) << "for(" << type.get_declaration(sym.get_scope(), var) << " = " << current_for->get_lower_bound() << ";" << var << current_for->get_bound_operator() << current_for->get_upper_bound() << ";" << var << "+= ( " << current_for->get_step() << ") * " << current_factor->prettyprint() << ")" << (*new_innermost_part) ; current_innermost_part = new_innermost_part; } // Now for every loop, declare the intra-loop current_factor = _nest_factors.begin(); current_for = nest_loops.begin(); for (int current_nest = 0; current_nest < _nesting; current_nest++, current_for++, current_factor++) { TL::IdExpression induction_var = current_for->get_induction_variable(); TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); std::string var = induction_var.prettyprint(); std::string init_var = var; // If the loop declares the iterator in the for statement // declare it again AST_t loop_init = current_for->get_iterating_init(); if (Declaration::predicate(loop_init)) { // Fix init_var to be a declaration init_var = type.get_declaration(sym.get_scope(), var); } std::string blk_var = "_blk_" + sym.get_name(); TL::Source min_code; TL::Source *new_innermost_part = new TL::Source(); (*current_innermost_part) << "for(" << init_var << " = " << blk_var << ";" << var << current_for->get_bound_operator() << min_code << ";" << var << "+= ( " << current_for->get_step() << "))" << (*new_innermost_part) ; TL::Source a, b; min_code << "((" << a << ") < (" << b << ") ? (" << a << ") : (" << b << "))" ; a << blk_var << " + (" << current_for->get_step() << ") * (" << current_factor->prettyprint() << " - 1 )"; b << current_for->get_upper_bound(); current_innermost_part = new_innermost_part; } // And now the innermost loop (*current_innermost_part) << nest_loops[_nesting - 1].get_loop_body() ; return result; }
TL::Source LoopUnroll::do_unroll() { if (!_for_stmt.regular_loop()) { return silly_unroll(); } // Get parts of the loop IdExpression induction_var = _for_stmt.get_induction_variable(); Expression lower_bound = _for_stmt.get_lower_bound(); Expression upper_bound = _for_stmt.get_upper_bound(); Expression step = _for_stmt.get_step(); TL::Source operator_bound = _for_stmt.get_bound_operator(); Statement loop_body = _for_stmt.get_loop_body(); TL::Source result, epilogue, main, induction_var_decl, before_main, after_main; std::stringstream ss; ss << _factor; result << "{" << induction_var_decl << before_main << main << after_main << epilogue << "}" ; Source replicated_body; Source epilogue_body; if (_factor > 1) { AST_t init = _for_stmt.get_iterating_init(); if (Declaration::predicate(init)) { TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); // Declare it since it will have local scope induction_var_decl << type.get_declaration(sym.get_scope(), sym.get_name()) << ";" ; } main << "for (" << induction_var << " = " << lower_bound << ";" << induction_var << operator_bound << "((" << upper_bound << ") - (" << _factor << " - 1)* (" << step << "));" << induction_var << "+= (" << step << ") * " << _factor << ")" << "{" << replicated_body << "}" ; // FIXME - It could help to initialize here another variable and make both loops independent epilogue << "for ( ; " // No initialization, keep using the old induction var << induction_var << operator_bound << upper_bound << ";" << induction_var << "+= (" << step << "))" << epilogue_body ; if (!_remove_tasks) { epilogue_body << loop_body; } else { std::cerr << "Do not create task " << __FILE__ << ":" << __LINE__ << std::endl; running_error("Path not supported yet", 0); // epilogue_body << loop_body.get_ast().prettyprint_with_callback(functor(ignore_tasks)); } } else { // Leave it as is main << "for(" << _for_stmt.get_iterating_init().prettyprint() << _for_stmt.get_iterating_condition() << ";" << _for_stmt.get_iterating_expression() << ")" << "{" << replicated_body << "}" ; } // Replicate the body bool consider_omp = false; if (TaskAggregation::contains_relevant_openmp(loop_body)) { consider_omp = true; } if (_ignore_omp || !consider_omp) { simple_replication(_factor, replicated_body, epilogue_body, induction_var, loop_body); } else { omp_replication(_factor, replicated_body, epilogue_body, induction_var, loop_body, before_main, after_main); } return result; }
void LoweringVisitor::visit(const Nodecl::ExpressionStatement& expr_stmt) { Nodecl::NodeclBase nest = expr_stmt.get_nest(); if (IS_FORTRAN_LANGUAGE && nest.is<Nodecl::FunctionCall>()) { Nodecl::FunctionCall function_call = nest.as<Nodecl::FunctionCall>(); if (function_call.get_called().is<Nodecl::Symbol>()) { TL::Symbol sym = function_call.get_called().as<Nodecl::Symbol>().get_symbol(); // We are only interested in two intrinsic symbols if (sym.is_intrinsic()) { if(sym.get_name() == "ompss_opencl_allocate") { // We replace the intrinsic call by a call to a new function which: // - allocates a new temporary array with descriptor // - copies the array descriptor to the address of the array // - calls to the Nanos++ API to allocate the buffer in the shared memory // - deallocates the temporary array with descriptor // // Example: // // ... // INTEGER, ALLOCATABLE :: V(:) // OMPSS_OPENCL_ALLOCATE(V(10)) // ... // // Is transformed into: // // SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL(ARR, LB1, UB1) // INTEGER, ALLOCATABLE :: ARR(:) // INTEGER :: LB1, UB1, ERR // INTEGER, ALLOCATABLE :: TMP(:) // // ALLOCATE(TMP(LB1:UB1)) // // ERR = NANOS_MEMCPY( // MERCURIUM_GET_ADDRESS_OF(ARR), // MERCURIUM_GET_ADDRESS_OF(TMP), // 48) // // CALL NANOS_OPENCL_ALLOCATE_FORTRAN( // SIZEOF(TMP), // MERCURIUM_GET_ADDRESS_OF(ARR)) // // DEALLOCATE(TMP) // END SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL // // ... // INTEGER, ALLOCATABLE :: V(:) // CALL NANOX_OPENCL_ALLOCATE_INTERNAL(V, 1, 10) // ... // // For more information: https://pm.bsc.es/projects/mcxx/ticket/1994 handle_ompss_opencl_allocate_intrinsic( function_call, _declared_ocl_allocate_functions, expr_stmt); } else if (sym.get_name() == "ompss_opencl_deallocate") { // The transformation applied to this intrinsic is more // simple than the other one, we only need to replace // the call to the intrinsic by a call to the Nanos++ // API: // // ... // INTEGER, ALLOCATABLE :: V(:) // ... // OMPSS_OPENCL_DEALLOCATE(V) // ... // // Is transformed into: // // ... // INTEGER, ALLOCATABLE :: V(:) // ... // CALL NANOS_OPENCL_ALLOCATE_FORTRAN(MERCURIUM_GET_ADDRESS_OF(V)) // ... handle_ompss_opencl_deallocate_intrinsic(function_call, expr_stmt); } } } } walk(expr_stmt.get_nest()); }