void visit(const Nodecl::ObjectInit& node) { TL::Symbol sym = node.get_symbol(); if (sym.get_value().is_null()) return; walk(sym.get_value()); }
void VectorizerVisitorPostprocessor::visit(const Nodecl::ObjectInit& n) { TL::Symbol sym = n.get_symbol(); Nodecl::NodeclBase init = sym.get_value(); if(!init.is_null()) { walk(init); } }
void visit(const Nodecl::Symbol& node) { TL::Symbol sym = node.get_symbol(); if ((_data_sharing.get_data_sharing(sym, /* check_enclosing */ false) & ~DS_IMPLICIT) == DS_UNDEFINED) { // Mark this as an implicit firstprivate _data_sharing.set_data_sharing(sym, TL::OpenMP::DataSharingAttribute( DS_FIRSTPRIVATE | DS_IMPLICIT) ); std::cerr << node.get_locus_str() << ": warning: assuming '" << sym.get_qualified_name() << "' as firstprivate" << std::endl; } }
void SSEVectorLegalization::visit(const Nodecl::ObjectInit& node) { TL::Source intrin_src; TL::Symbol sym = node.get_symbol(); fix_mask_symbol(sym); // Vectorizing initialization Nodecl::NodeclBase init = sym.get_value(); if (!init.is_null()) { walk(init); } }
void Fortran::append_module_to_scope(TL::Symbol module, TL::Scope scope) { ERROR_CONDITION(!module.is_valid() || !module.is_fortran_module(), "Symbol must be a Fortran module", 0); scope_entry_t* used_modules_info = ::get_or_create_used_modules_symbol_info(scope.get_decl_context()); P_LIST_ADD_ONCE(used_modules_info->entity_specs.related_symbols, used_modules_info->entity_specs.num_related_symbols, module.get_internal_symbol()); if (!module.get_internal_symbol()->entity_specs.is_builtin) fortran_load_module(module.get_internal_symbol()->symbol_name, /* intrinsic */ 0, make_locus("", 0, 0)); }
void NeonVectorBackend::visit(const Nodecl::ObjectInit& n) { TL::Source intrin_src; if(n.has_symbol()) { TL::Symbol sym = n.get_symbol(); // Vectorizing initialization Nodecl::NodeclBase init = sym.get_value(); if(!init.is_null()) { walk(init); } } }
void build_empty_body_for_function( TL::Symbol function_symbol, Nodecl::NodeclBase &function_code, Nodecl::NodeclBase &empty_stmt) { empty_stmt = Nodecl::EmptyStatement::make(make_locus("", 0, 0)); Nodecl::List stmt_list = Nodecl::List::make(empty_stmt); if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { Nodecl::CompoundStatement compound_statement = Nodecl::CompoundStatement::make(stmt_list, /* destructors */ Nodecl::NodeclBase::null(), make_locus("", 0, 0)); stmt_list = Nodecl::List::make(compound_statement); } Nodecl::NodeclBase context = Nodecl::Context::make( stmt_list, function_symbol.get_related_scope(), make_locus("", 0, 0)); function_symbol.get_internal_symbol()->defined = 1; if (function_symbol.is_dependent_function()) { function_code = Nodecl::TemplateFunctionCode::make(context, // Initializers Nodecl::NodeclBase::null(), function_symbol, make_locus("", 0, 0)); } else { function_code = Nodecl::FunctionCode::make(context, // Initializers Nodecl::NodeclBase::null(), function_symbol, make_locus("", 0, 0)); } function_symbol.get_internal_symbol()->entity_specs.function_code = function_code.get_internal_nodecl(); }
TL::Symbol new_function_symbol(TL::Symbol function) { TL::ObjectList<TL::Type> parameter_types = function.get_type().parameters(); TL::ObjectList<std::string> parameter_names; TL::ObjectList<TL::Symbol> function_related_symbols = function.get_related_symbols(); for (TL::ObjectList<TL::Symbol>::iterator it = function_related_symbols.begin(); it != function_related_symbols.end(); it++) { parameter_names.append(it->get_name()); } TL::Symbol new_function = SymbolUtils::new_function_symbol( function, function.get_name(), function.get_type().returns(), parameter_names, parameter_types); return new_function; }
void DeviceFPGA::copy_stuff_to_device_file( const TL::ObjectList<Nodecl::NodeclBase>& stuff_to_be_copied) { for (TL::ObjectList<Nodecl::NodeclBase>::const_iterator it = stuff_to_be_copied.begin(); it != stuff_to_be_copied.end(); ++it) { if (it->is<Nodecl::FunctionCode>() || it->is<Nodecl::TemplateFunctionCode>()) { TL::Symbol function = it->get_symbol(); TL::Symbol new_function = SymbolUtils::new_function_symbol(function, function.get_name() + "_hls"); Nodecl::Utils::SimpleSymbolMap symbol_map; symbol_map.add_map(function, new_function); _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it, symbol_map)); } else { _fpga_file_code.append(Nodecl::Utils::deep_copy(*it, *it)); } } }
void SimdVisitor::visit(const Nodecl::OpenMP::SimdFunction& simd_node) { Nodecl::FunctionCode function_code = simd_node.get_statement() .as<Nodecl::FunctionCode>(); // Remove SimdFunction node simd_node.replace(function_code); TL::Symbol sym = function_code.get_symbol(); Nodecl::FunctionCode vectorized_func_code = Nodecl::Utils::deep_copy(function_code, function_code).as<Nodecl::FunctionCode>(); // Vectorize function _vectorizer.vectorize(vectorized_func_code, _device_name, _vector_length, NULL); // Set new name std::stringstream vectorized_func_name; vectorized_func_name <<"__" << sym.get_name() << "_" << _device_name << "_" << _vector_length; vectorized_func_code.get_symbol().set_name(vectorized_func_name.str()); // Add SIMD version to vector function versioning _vectorizer.add_vector_function_version(sym.get_name(), vectorized_func_code, _device_name, _vector_length, NULL, TL::Vectorization::SIMD_FUNC_PRIORITY); // Append vectorized function code to scalar function simd_node.append_sibling(vectorized_func_code); }
void VectorizerVectorReduction::vectorize_reduction(const TL::Symbol& scalar_symbol, TL::Symbol& vector_symbol, const Nodecl::NodeclBase& reduction_initializer, const std::string& reduction_name, const TL::Type& reduction_type, Nodecl::List& pre_nodecls, Nodecl::List& post_nodecls) { // Step1: ADD REDUCTION SYMBOLS vector_symbol.set_value(Nodecl::VectorPromotion::make( reduction_initializer.shallow_copy(), vector_symbol.get_type())); // Add new ObjectInit with the initialization Nodecl::ObjectInit reduction_object_init = Nodecl::ObjectInit::make(vector_symbol); pre_nodecls.append(reduction_object_init); // Step2: ADD VECTOR REDUCTION INSTRUCTIONS if(reduction_name.compare("+") == 0) { Nodecl::ExpressionStatement post_reduction_stmt = Nodecl::ExpressionStatement::make( Nodecl::VectorReductionAdd::make( scalar_symbol.make_nodecl(true), vector_symbol.make_nodecl(true), scalar_symbol.get_type())); post_nodecls.append(post_reduction_stmt); } else if (reduction_name.compare("-") == 0) { Nodecl::ExpressionStatement post_reduction_stmt = Nodecl::ExpressionStatement::make( Nodecl::VectorReductionMinus::make( scalar_symbol.make_nodecl(true), vector_symbol.make_nodecl(true), scalar_symbol.get_type())); post_nodecls.append(post_reduction_stmt); } }
void LoweringVisitor::loop_spawn_worksharing(OutlineInfo& outline_info, Nodecl::NodeclBase construct, Nodecl::List distribute_environment, Nodecl::RangeLoopControl& range, const std::string& outline_name, TL::Symbol structure_symbol, TL::Symbol slicer_descriptor, Nodecl::NodeclBase task_label) { Symbol enclosing_function = Nodecl::Utils::get_enclosing_function(construct); Nodecl::OpenMP::Schedule schedule = distribute_environment.find_first<Nodecl::OpenMP::Schedule>(); ERROR_CONDITION(schedule.is_null(), "Schedule tree is missing", 0); Nodecl::NodeclBase lower = range.get_lower(); Nodecl::NodeclBase upper = range.get_upper(); Nodecl::NodeclBase step = range.get_step(); Source struct_size, dynamic_size, struct_arg_type_name; struct_arg_type_name << ((structure_symbol.get_type().is_template_specialized_type() && structure_symbol.get_type().is_dependent()) ? "typename " : "") << structure_symbol.get_qualified_name(enclosing_function.get_scope()) ; struct_size << "sizeof( " << struct_arg_type_name << " )" << dynamic_size; Source immediate_decl; allocate_immediate_structure( structure_symbol.get_user_defined_type(), outline_info, struct_arg_type_name, struct_size, // out immediate_decl, dynamic_size); Source call_outline_function; Source schedule_setup; schedule_setup << "int nanos_chunk;" ; if (schedule.get_text() == "runtime") { schedule_setup << "nanos_omp_sched_t nanos_runtime_sched;" << "nanos_err = nanos_omp_get_schedule(&nanos_runtime_sched, &nanos_chunk);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(nanos_runtime_sched);" ; } else { Source schedule_name; if (Nanos::Version::interface_is_at_least("openmp", 8)) { schedule_name << "nanos_omp_sched_" << schedule.get_text(); } else { // We used nanos_omp_sched in versions prior to 8 schedule_name << "omp_sched_" << schedule.get_text(); } schedule_setup << "nanos_ws_t current_ws_policy = nanos_omp_find_worksharing(" << schedule_name << ");" << "if (current_ws_policy == 0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_chunk = " << as_expression(schedule.get_chunk()) << ";" ; } Source worksharing_creation; if (IS_CXX_LANGUAGE) { worksharing_creation << as_statement(Nodecl::CxxDef::make(Nodecl::NodeclBase::null(), slicer_descriptor)); } worksharing_creation << "nanos_err = nanos_worksharing_create(" << "&" << as_symbol(slicer_descriptor) << "," << "current_ws_policy," << "(void**)&nanos_setup_info_loop," << "&single_guard);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; Nodecl::NodeclBase fill_outline_arguments_tree, fill_immediate_arguments_tree; TL::Source pm_specific_code; if (!_lowering->in_ompss_mode()) { // OpenMP pm_specific_code << immediate_decl << statement_placeholder(fill_immediate_arguments_tree) << "smp_" << outline_name << "(imm_args);" ; } else { // OmpSs std::string wd_description = (!task_label.is_null()) ? task_label.get_text() : enclosing_function.get_name(); Source const_wd_info; const_wd_info << fill_const_wd_info(struct_arg_type_name, /* is_untied */ false, /* mandatory_creation */ true, /* is_function_task */ false, wd_description, outline_info, construct); std::string dyn_props_var = "nanos_wd_dyn_props"; Source dynamic_wd_info; dynamic_wd_info << "nanos_wd_dyn_props_t " << dyn_props_var << ";"; fill_dynamic_properties(dyn_props_var, /* priority_expr */ nodecl_null(), /* final_expr */ nodecl_null(), /* is_implicit */ 0, dynamic_wd_info); pm_specific_code << struct_arg_type_name << " *ol_args = (" << struct_arg_type_name <<"*) 0;" << const_wd_info << "nanos_wd_t nanos_wd_ = (nanos_wd_t) 0;" << dynamic_wd_info << "static nanos_slicer_t replicate = (nanos_slicer_t)0;" << "if (replicate == (nanos_slicer_t)0)" << "replicate = nanos_find_slicer(\"replicate\");" << "if (replicate == (nanos_slicer_t)0)" << "nanos_handle_error(NANOS_UNIMPLEMENTED);" << "nanos_err = nanos_create_sliced_wd(&nanos_wd_, " << "nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, " << "(size_t)" << struct_size << ", nanos_wd_const_data.base.data_alignment, " << "(void**)&ol_args, nanos_current_wd(), replicate," << "&nanos_wd_const_data.base.props, &" << dyn_props_var << ", 0, (nanos_copy_data_t**)0," << "0, (nanos_region_dimension_internal_t**)0" << ");" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" << statement_placeholder(fill_outline_arguments_tree) << "nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);" << "if (nanos_err != NANOS_OK)" << "nanos_handle_error(nanos_err);" ; } TL::Source implicit_barrier_or_tw; if (!distribute_environment.find_first<Nodecl::OpenMP::BarrierAtEnd>().is_null()) { implicit_barrier_or_tw << get_implicit_sync_end_construct_source(); } Source spawn_code; spawn_code << "{" << as_type(get_bool_type()) << " single_guard;" << "nanos_err_t nanos_err;" << schedule_setup << "nanos_ws_info_loop_t nanos_setup_info_loop;" << "nanos_setup_info_loop.lower_bound = " << as_expression(lower) << ";" << "nanos_setup_info_loop.upper_bound = " << as_expression(upper) << ";" << "nanos_setup_info_loop.loop_step = " << as_expression(step) << ";" << "nanos_setup_info_loop.chunk_size = nanos_chunk;" << worksharing_creation << pm_specific_code << implicit_barrier_or_tw << "}" ; Source fill_outline_arguments, fill_immediate_arguments; fill_arguments(construct, outline_info, fill_outline_arguments, fill_immediate_arguments); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::C; Nodecl::NodeclBase spawn_code_tree = spawn_code.parse_statement(construct); if (IS_FORTRAN_LANGUAGE) Source::source_language = SourceLanguage::Current; Nodecl::NodeclBase arguments_tree; TL::Source *fill_arguments; if (!_lowering->in_ompss_mode()) { // OpenMP arguments_tree = fill_immediate_arguments_tree; fill_arguments = &fill_immediate_arguments; } else { // OmpSs arguments_tree = fill_outline_arguments_tree; fill_arguments = &fill_outline_arguments; } // Now attach the slicer symbol to its final scope (see tl-lower-for-worksharing.cpp) const decl_context_t* spawn_inner_context = arguments_tree.retrieve_context().get_decl_context(); slicer_descriptor.get_internal_symbol()->decl_context = spawn_inner_context; ::insert_entry(spawn_inner_context->current_scope, slicer_descriptor.get_internal_symbol()); // Parse the arguments Nodecl::NodeclBase new_tree = fill_arguments->parse_statement(arguments_tree); arguments_tree.replace(new_tree); // Finally, replace the construct by the tree that represents the spawn code construct.replace(spawn_code_tree); }
TL::Source LoopBlocking::do_blocking() { Source result, block_loops; result << block_loops ; ObjectList<ForStatement> nest_loops = _for_nest_info.get_nest_list(); _nesting = std::min(_nest_factors.size(), nest_loops.size()); TL::Source *current_innermost_part = &block_loops; // For every loop declare its block loop variable and the inter-block loop ObjectList<TL::Expression>::iterator current_factor = _nest_factors.begin(); ObjectList<TL::ForStatement>::iterator current_for = nest_loops.begin(); for (int current_nest = 0; current_nest < _nesting; current_nest++, current_for++, current_factor++) { TL::IdExpression induction_var = current_for->get_induction_variable(); TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); std::string var = "_blk_" + sym.get_name(); TL::Source *new_innermost_part = new TL::Source(); (*current_innermost_part) << "for(" << type.get_declaration(sym.get_scope(), var) << " = " << current_for->get_lower_bound() << ";" << var << current_for->get_bound_operator() << current_for->get_upper_bound() << ";" << var << "+= ( " << current_for->get_step() << ") * " << current_factor->prettyprint() << ")" << (*new_innermost_part) ; current_innermost_part = new_innermost_part; } // Now for every loop, declare the intra-loop current_factor = _nest_factors.begin(); current_for = nest_loops.begin(); for (int current_nest = 0; current_nest < _nesting; current_nest++, current_for++, current_factor++) { TL::IdExpression induction_var = current_for->get_induction_variable(); TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); std::string var = induction_var.prettyprint(); std::string init_var = var; // If the loop declares the iterator in the for statement // declare it again AST_t loop_init = current_for->get_iterating_init(); if (Declaration::predicate(loop_init)) { // Fix init_var to be a declaration init_var = type.get_declaration(sym.get_scope(), var); } std::string blk_var = "_blk_" + sym.get_name(); TL::Source min_code; TL::Source *new_innermost_part = new TL::Source(); (*current_innermost_part) << "for(" << init_var << " = " << blk_var << ";" << var << current_for->get_bound_operator() << min_code << ";" << var << "+= ( " << current_for->get_step() << "))" << (*new_innermost_part) ; TL::Source a, b; min_code << "((" << a << ") < (" << b << ") ? (" << a << ") : (" << b << "))" ; a << blk_var << " + (" << current_for->get_step() << ") * (" << current_factor->prettyprint() << " - 1 )"; b << current_for->get_upper_bound(); current_innermost_part = new_innermost_part; } // And now the innermost loop (*current_innermost_part) << nest_loops[_nesting - 1].get_loop_body() ; return result; }
// Old version - Deprecated. Kept here for compatibility with old runtimes (Nanos++ 0.7) void DeviceOpenCL::old_generate_ndrange_code( const TL::Symbol& called_task, const TL::Symbol& unpacked_function, const TargetInformation& target_info, const std::string filename, const std::string kernel_name, const TL::ObjectList<OutlineDataItem*>& data_items, Nodecl::Utils::SimpleSymbolMap* called_fun_to_outline_data_map, Nodecl::Utils::SimpleSymbolMap* outline_data_to_unpacked_fun_map, // Out TL::Source& code_ndrange) { // The arguments of the clauses 'ndrange' and 'shmem' must be updated because // they are not expressed in terms of the unpacked function parameters TL::ObjectList<Nodecl::NodeclBase> new_ndrange, new_shmem; update_ndrange_and_shmem_expressions( unpacked_function.get_related_scope(), target_info, outline_data_to_unpacked_fun_map, new_ndrange, new_shmem); int num_args_ndrange = new_ndrange.size(); TL::Source code_ndrange_aux; Nodecl::Utils::SimpleSymbolMap called_fun_to_unpacked_fun_map; const std::map<TL::Symbol, TL::Symbol>* called_fun_to_outline_data_map_simple = called_fun_to_outline_data_map->get_simple_symbol_map(); for (std::map<TL::Symbol, TL::Symbol>::const_iterator it = called_fun_to_outline_data_map_simple->begin(); it != called_fun_to_outline_data_map_simple->end(); it++) { TL::Symbol key = it->first; TL::Symbol value = outline_data_to_unpacked_fun_map->map(it->second.get_internal_symbol()); called_fun_to_unpacked_fun_map.add_map(key, value); } bool dim_const = new_ndrange[0].is_constant(); char is_null_ended = 0; bool check_dim = !(new_ndrange[num_args_ndrange - 1].is_constant() && const_value_is_string(new_ndrange[num_args_ndrange - 1].get_constant()) && (strcmp(const_value_string_unpack_to_string(new_ndrange[num_args_ndrange-1].get_constant(), &is_null_ended), "noCheckDim") == 0)); int num_dim = 0; if (dim_const) { num_dim = const_value_cast_to_4(new_ndrange[0].get_constant()); ERROR_CONDITION(num_dim < 1 || num_dim > 3, "invalid number of dimensions for 'ndrange' clause. Valid values: 1, 2 and 3." , 0); ERROR_CONDITION((((num_dim * 3) + 1 + !check_dim) != num_args_ndrange) && (((num_dim * 2) + 1 + !check_dim) != num_args_ndrange), "invalid number of arguments for 'ndrange' clause", 0); } std::string compiler_opts; if (CURRENT_CONFIGURATION->opencl_build_options != NULL) { compiler_opts = std::string(CURRENT_CONFIGURATION->opencl_build_options); } //Create OCL Kernel code_ndrange_aux << "nanos_err_t nanos_err;" << "void* ompss_kernel_ocl = nanos_create_current_kernel(\"" << kernel_name << "\",\"" << filename << "\",\"" << compiler_opts << "\");"; //Prepare setArgs unsigned int index_local = 0; TL::ObjectList<TL::Symbol> parameters_called = called_task.get_function_parameters(); for (unsigned int i = 0; i < parameters_called.size(); ++i) { TL::Symbol unpacked_argument = called_fun_to_unpacked_fun_map.map(parameters_called[i]); // The attribute __global is deduced: the current argument will be __global if it has any copies bool is_global = false; if (unpacked_argument.get_type().no_ref().is_pointer() || unpacked_argument.get_type().no_ref().is_array()) { for (TL::ObjectList<OutlineDataItem*>::const_iterator it = data_items.begin(); it != data_items.end() && !is_global; ++it) { TL::Symbol outline_data_item_sym = (*it)->get_symbol(); // If the outline data item has not a valid symbol, skip it if (!outline_data_item_sym.is_valid()) continue; // If the symbol of the current outline data item is not the // same as the unpacked_argument, skip it if(outline_data_to_unpacked_fun_map->map(outline_data_item_sym.get_internal_symbol()) != unpacked_argument) continue; is_global = !((*it)->get_copies().empty()); } } bool is_local = !is_global && unpacked_argument.get_type().no_ref().is_pointer(); if (is_global) { code_ndrange_aux << "nanos_err = nanos_opencl_set_bufferarg(" << "ompss_kernel_ocl, " << i << ", " << as_symbol(unpacked_argument) <<");"; } else if (is_local) { TL::Source sizeof_arg; if (index_local >= new_shmem.size()) { warn_printf_at(called_task.get_locus(), "the size of the local symbol '%s' has not been specified in the 'shmem' clause, assuming zero\n", unpacked_argument.get_name().c_str()); sizeof_arg << "0"; } else { sizeof_arg << as_expression(new_shmem[index_local]); } code_ndrange_aux << "nanos_err = nanos_opencl_set_arg(" << "ompss_kernel_ocl, " << i << ", " << sizeof_arg << ", " << "0);"; ++index_local; } else { code_ndrange_aux << "nanos_err = nanos_opencl_set_arg(" << "ompss_kernel_ocl, " << i << ", " << "sizeof(" << as_type(unpacked_argument.get_type().no_ref()) << "), " << "&" << as_symbol(unpacked_argument) <<");"; } } //Build arrays with information from ndrange clause or pointing to the ndrange pointers if (!dim_const) { if (IS_FORTRAN_LANGUAGE) { internal_error("The number of dimensions is non-constant. This feature is not implemented yet in Fortran.", 0); } //Prepare ndrange calc pointers and arrays code_ndrange_aux << "int num_dim = " << as_expression(new_ndrange[0]) <<";" << "size_t offset_tmp[num_dim];" << "size_t offset_arr[num_dim];" << "size_t local_size_arr[num_dim];" << "size_t global_size_arr[num_dim];" << "size_t* local_size_ptr;" << "size_t* offset_ptr;" << "size_t* global_size_ptr;" << "size_t* final_local_size_ptr;" << as_type(TL::Type::get_bool_type()) << " local_size_zero = 0;" << "int i = 0;" ; if (num_args_ndrange == 3) { code_ndrange_aux << "for (i = 0; i < num_dim; ++i)" << "{" << "offset_tmp[i] = 0;" << "}" << "offset_ptr = offset_tmp;" << "global_size_ptr = " << as_expression(new_ndrange[1]) << ";" << "local_size_ptr = " << as_expression(new_ndrange[2]) << ";" ; } else if (num_args_ndrange == 4) { code_ndrange_aux << "offset_ptr = " << as_expression(new_ndrange[1]) << ";" << "global_size_ptr = " << as_expression(new_ndrange[2]) << ";" << "local_size_ptr = " << as_expression(new_ndrange[3]) << ";" ; } else { WARNING_MESSAGE("Invalid number of parameters for ndrange, when number of dimensions is not const, it must be 3 or 4",0); } //Check if local_size has zeros code_ndrange_aux << "for (i = 0; i < num_dim; ++i)" << "{" << "if (local_size_ptr[i] == 0)" << "{" << "local_size_zero = 1;" << "}" << " }" << "if (local_size_zero)" << "{" << "for (i = 0; i < num_dim; ++i)" << "{" << "local_size_ptr[i] = 1;" << "}" << "}" ; //Now do the rounding if (check_dim) { code_ndrange_aux << "for (i = 0; i < num_dim; ++i)" << "{" << "offset_arr[i] = offset_ptr[i];" << "local_size_arr[i] = (global_size_ptr[i] < local_size_ptr[i]) ? " << "global_size_ptr[i] : local_size_ptr[i];" << "global_size_arr[i] = (global_size_ptr[i] < local_size_ptr[i]) ? " << "global_size_ptr[i] : global_size_ptr[i] + (" << "(global_size_ptr[i] % local_size_ptr[i] == 0) ? " << "0 : (local_size_ptr[i] - global_size_ptr[i] % local_size_ptr[i]));" << "}" ; } if (check_dim) { code_ndrange_aux << "if (local_size_zero)" << "{" << "final_local_size_ptr = 0;" << "}" << "else" << "{" << "final_local_size_ptr = local_size_arr;" << "}" //Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop << "nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_arr, final_local_size_ptr, global_size_arr);"; ; } else { code_ndrange_aux << "if (local_size_zero)" << "{" << "final_local_size_ptr = 0;" << "}" << "else" << "{" << "final_local_size_ptr = local_size_ptr;" << "}" << "nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_ptr, final_local_size_ptr, global_size_ptr);" ; } } else { int num_dim_offset = num_dim; //Prepare ndrange calc pointers and arrays code_ndrange_aux << "int num_dim = " << as_expression(new_ndrange[0]) <<";" << "size_t offset_arr[num_dim];" << "size_t local_size_arr[num_dim];" << "size_t global_size_arr[num_dim];" << as_type(TL::Type::get_bool_type()) << " local_size_zero;" << "local_size_zero = 0;" ; for (int i = 1; i <= num_dim; ++i) { if (((num_dim * 3) + 1 + !check_dim) != num_args_ndrange) { num_dim_offset = 0; code_ndrange_aux << "offset_arr[" << i-1 << "] = 0;"; } else { code_ndrange_aux << "offset_arr[" << i-1 << "] = " << as_expression(new_ndrange[i]) << ";"; } code_ndrange_aux << "local_size_arr[" << i-1 << "] = " << as_expression(new_ndrange[num_dim + num_dim_offset + i]) << ";" << "if (local_size_arr[" << i - 1 << "] == 0)" << "{" << "local_size_zero = 1;" << "}" << "global_size_arr[" << i-1 << "] = " << as_expression(new_ndrange[num_dim_offset + i]) << ";" ; } //Now do the rounding if (check_dim) { code_ndrange_aux << "if (!local_size_zero)" << "{" << "int i;" << "for (i = 0; i < num_dim; i = i + 1)" << "{" << "if (global_size_arr[i] < local_size_arr[i])" << "{" << "local_size_arr[i] = global_size_arr[i];" << "}" << "else" << "{" << "if (global_size_arr[i] % local_size_arr[i] != 0)" << "{" << "global_size_arr[i] = global_size_arr[i]" << " + (local_size_arr[i] - global_size_arr[i] % local_size_arr[i]);" << "}" << "}" << "}" << "}" ; } code_ndrange_aux << "if (local_size_zero)" << "{" //Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop << "nanos_err = nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_arr, 0, global_size_arr);" << "}" << "else" << "{" //Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop << "nanos_err = nanos_exec_kernel(ompss_kernel_ocl, num_dim, offset_arr, local_size_arr, global_size_arr);" << "}" ; } if (IS_FORTRAN_LANGUAGE) { Source::source_language = SourceLanguage::C; Nodecl::NodeclBase code_ndrange_tree = code_ndrange_aux.parse_statement(unpacked_function.get_related_scope()); Source::source_language = SourceLanguage::Current; code_ndrange << as_statement(code_ndrange_tree); } else { code_ndrange << code_ndrange_aux; } }
TL::Symbol new_function_symbol( TL::Symbol current_function, const std::string& name, TL::Type return_type, TL::ObjectList<std::string> parameter_names, TL::ObjectList<TL::Type> parameter_types) { if (IS_FORTRAN_LANGUAGE && current_function.is_nested_function()) { // Get the enclosing function current_function = current_function.get_scope().get_related_symbol(); } decl_context_t decl_context = current_function.get_scope().get_decl_context(); ERROR_CONDITION(parameter_names.size() != parameter_types.size(), "Mismatch between names and types", 0); decl_context_t function_context; if (IS_FORTRAN_LANGUAGE) { function_context = new_program_unit_context(decl_context); } else { function_context = new_function_context(decl_context); function_context = new_block_context(function_context); } // Build the function type int num_parameters = 0; scope_entry_t** parameter_list = NULL; parameter_info_t* p_types = new parameter_info_t[parameter_types.size()]; parameter_info_t* it_ptypes = &(p_types[0]); TL::ObjectList<TL::Type>::iterator type_it = parameter_types.begin(); for (TL::ObjectList<std::string>::iterator it = parameter_names.begin(); it != parameter_names.end(); it++, it_ptypes++, type_it++) { scope_entry_t* param = new_symbol(function_context, function_context.current_scope, it->c_str()); param->entity_specs.is_user_declared = 1; param->kind = SK_VARIABLE; param->locus = make_locus("", 0, 0); param->defined = 1; param->type_information = get_unqualified_type(type_it->get_internal_type()); P_LIST_ADD(parameter_list, num_parameters, param); it_ptypes->is_ellipsis = 0; it_ptypes->nonadjusted_type_info = NULL; it_ptypes->type_info = get_indirect_type(param); } type_t *function_type = get_new_function_type( return_type.get_internal_type(), p_types, parameter_types.size()); delete[] p_types; // Now, we can create the new function symbol scope_entry_t* new_function_sym = NULL; if (!current_function.get_type().is_template_specialized_type()) { new_function_sym = new_symbol(decl_context, decl_context.current_scope, name.c_str()); new_function_sym->entity_specs.is_user_declared = 1; new_function_sym->kind = SK_FUNCTION; new_function_sym->locus = make_locus("", 0, 0); new_function_sym->type_information = function_type; } else { scope_entry_t* new_template_sym = new_symbol( decl_context, decl_context.current_scope, name.c_str()); new_template_sym->kind = SK_TEMPLATE; new_template_sym->locus = make_locus("", 0, 0); new_template_sym->type_information = get_new_template_type( decl_context.template_parameters, function_type, uniquestr(name.c_str()), decl_context, make_locus("", 0, 0)); template_type_set_related_symbol(new_template_sym->type_information, new_template_sym); // The new function is the primary template specialization new_function_sym = named_type_get_symbol( template_type_get_primary_type( new_template_sym->type_information)); } function_context.function_scope->related_entry = new_function_sym; function_context.block_scope->related_entry = new_function_sym; new_function_sym->related_decl_context = function_context; new_function_sym->entity_specs.related_symbols = parameter_list; new_function_sym->entity_specs.num_related_symbols = num_parameters; for (int i = 0; i < new_function_sym->entity_specs.num_related_symbols; ++i) { symbol_set_as_parameter_of_function( new_function_sym->entity_specs.related_symbols[i], new_function_sym, /* parameter position */ i); } // Make it static new_function_sym->entity_specs.is_static = 1; // Make it member if the enclosing function is member if (current_function.is_member()) { new_function_sym->entity_specs.is_member = 1; new_function_sym->entity_specs.class_type = current_function.get_class_type().get_internal_type(); new_function_sym->entity_specs.access = AS_PUBLIC; ::class_type_add_member(new_function_sym->entity_specs.class_type, new_function_sym); } if (current_function.is_inline()) new_function_sym->entity_specs.is_inline = 1; // new_function_sym->entity_specs.is_defined_inside_class_specifier = // current_function.get_internal_symbol()->entity_specs.is_defined_inside_class_specifier; if (IS_FORTRAN_LANGUAGE && current_function.is_in_module()) { scope_entry_t* module_sym = current_function.in_module().get_internal_symbol(); new_function_sym->entity_specs.in_module = module_sym; P_LIST_ADD( module_sym->entity_specs.related_symbols, module_sym->entity_specs.num_related_symbols, new_function_sym); new_function_sym->entity_specs.is_module_procedure = 1; } return new_function_sym; }
TL::Symbol LoweringVisitor::create_basic_reduction_function_fortran(OpenMP::Reduction* red, Nodecl::NodeclBase construct) { reduction_map_t::iterator it = _basic_reduction_map_openmp.find(red); if (it != _basic_reduction_map_openmp.end()) { return it->second; } std::string fun_name; { std::stringstream ss; ss << "nanos_red_" << red << "_" << simple_hash_str(construct.get_filename().c_str()); fun_name = ss.str(); } Nodecl::NodeclBase function_body; Source src; src << "SUBROUTINE " << fun_name << "(omp_out, omp_in, num_scalars)\n" << "IMPLICIT NONE\n" << as_type(red->get_type()) << " :: omp_out(num_scalars)\n" << as_type(red->get_type()) << " :: omp_in(num_scalars)\n" << "INTEGER, VALUE :: num_scalars\n" << "INTEGER :: I\n" << statement_placeholder(function_body) << "\n" << "END SUBROUTINE " << fun_name << "\n"; ; Nodecl::NodeclBase function_code = src.parse_global(construct); TL::Scope inside_function = ReferenceScope(function_body).get_scope(); TL::Symbol param_omp_in = inside_function.get_symbol_from_name("omp_in"); ERROR_CONDITION(!param_omp_in.is_valid(), "Symbol omp_in not found", 0); TL::Symbol param_omp_out = inside_function.get_symbol_from_name("omp_out"); ERROR_CONDITION(!param_omp_out.is_valid(), "Symbol omp_out not found", 0); TL::Symbol function_sym = inside_function.get_symbol_from_name(fun_name); ERROR_CONDITION(!function_sym.is_valid(), "Symbol %s not found", fun_name.c_str()); TL::Symbol index = inside_function.get_symbol_from_name("i"); ERROR_CONDITION(!index.is_valid(), "Symbol %s not found", "i"); TL::Symbol num_scalars = inside_function.get_symbol_from_name("num_scalars"); ERROR_CONDITION(!num_scalars.is_valid(), "Symbol %s not found", "num_scalars"); Nodecl::NodeclBase num_scalars_ref = Nodecl::Symbol::make(num_scalars); num_scalars_ref.set_type(num_scalars.get_type().no_ref().get_lvalue_reference_to()); Nodecl::Symbol nodecl_index = Nodecl::Symbol::make(index); nodecl_index.set_type(index.get_type().get_lvalue_reference_to()); Nodecl::NodeclBase loop_header = Nodecl::RangeLoopControl::make( nodecl_index, const_value_to_nodecl(const_value_get_signed_int(1)), num_scalars_ref, Nodecl::NodeclBase::null()); Nodecl::NodeclBase expanded_combiner = red->get_combiner().shallow_copy(); BasicReductionExpandVisitor expander_visitor( red->get_omp_in(), param_omp_in, red->get_omp_out(), param_omp_out, index); expander_visitor.walk(expanded_combiner); function_body.replace( Nodecl::ForStatement::make(loop_header, Nodecl::List::make( Nodecl::ExpressionStatement::make( expanded_combiner)), Nodecl::NodeclBase::null())); _basic_reduction_map_openmp[red] = function_sym; if (IS_FORTRAN_LANGUAGE) { Nodecl::Utils::Fortran::append_used_modules(construct.retrieve_context(), function_sym.get_related_scope()); } Nodecl::Utils::append_to_enclosing_top_level_location(construct, function_code); return function_sym; }
std::string as_symbol(TL::Symbol s) { return symbol_to_source(s.get_internal_symbol()); }
void LoweringVisitor::visit(const Nodecl::ExpressionStatement& expr_stmt) { Nodecl::NodeclBase nest = expr_stmt.get_nest(); if (IS_FORTRAN_LANGUAGE && nest.is<Nodecl::FunctionCall>()) { Nodecl::FunctionCall function_call = nest.as<Nodecl::FunctionCall>(); if (function_call.get_called().is<Nodecl::Symbol>()) { TL::Symbol sym = function_call.get_called().as<Nodecl::Symbol>().get_symbol(); // We are only interested in two intrinsic symbols if (sym.is_intrinsic()) { if(sym.get_name() == "ompss_opencl_allocate") { // We replace the intrinsic call by a call to a new function which: // - allocates a new temporary array with descriptor // - copies the array descriptor to the address of the array // - calls to the Nanos++ API to allocate the buffer in the shared memory // - deallocates the temporary array with descriptor // // Example: // // ... // INTEGER, ALLOCATABLE :: V(:) // OMPSS_OPENCL_ALLOCATE(V(10)) // ... // // Is transformed into: // // SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL(ARR, LB1, UB1) // INTEGER, ALLOCATABLE :: ARR(:) // INTEGER :: LB1, UB1, ERR // INTEGER, ALLOCATABLE :: TMP(:) // // ALLOCATE(TMP(LB1:UB1)) // // ERR = NANOS_MEMCPY( // MERCURIUM_GET_ADDRESS_OF(ARR), // MERCURIUM_GET_ADDRESS_OF(TMP), // 48) // // CALL NANOS_OPENCL_ALLOCATE_FORTRAN( // SIZEOF(TMP), // MERCURIUM_GET_ADDRESS_OF(ARR)) // // DEALLOCATE(TMP) // END SUBROUTINE NANOX_OPENCL_ALLOCATE_INTERNAL // // ... // INTEGER, ALLOCATABLE :: V(:) // CALL NANOX_OPENCL_ALLOCATE_INTERNAL(V, 1, 10) // ... // // For more information: https://pm.bsc.es/projects/mcxx/ticket/1994 handle_ompss_opencl_allocate_intrinsic( function_call, _declared_ocl_allocate_functions, expr_stmt); } else if (sym.get_name() == "ompss_opencl_deallocate") { // The transformation applied to this intrinsic is more // simple than the other one, we only need to replace // the call to the intrinsic by a call to the Nanos++ // API: // // ... // INTEGER, ALLOCATABLE :: V(:) // ... // OMPSS_OPENCL_DEALLOCATE(V) // ... // // Is transformed into: // // ... // INTEGER, ALLOCATABLE :: V(:) // ... // CALL NANOS_OPENCL_ALLOCATE_FORTRAN(MERCURIUM_GET_ADDRESS_OF(V)) // ... handle_ompss_opencl_deallocate_intrinsic(function_call, expr_stmt); } } } } walk(expr_stmt.get_nest()); }
void DeviceOpenCL::generate_ndrange_code( const TL::Symbol& called_task, const TL::Symbol& unpacked_function, const TargetInformation& target_info, const std::string filename, const std::string kernel_name, const TL::ObjectList<OutlineDataItem*>& data_items, Nodecl::Utils::SimpleSymbolMap* called_fun_to_outline_data_map, Nodecl::Utils::SimpleSymbolMap* outline_data_to_unpacked_fun_map, // Out TL::Source& code_ndrange) { if (!Nanos::Version::interface_is_at_least("opencl", 1003)) { return old_generate_ndrange_code( called_task, unpacked_function, target_info, filename, kernel_name, data_items, called_fun_to_outline_data_map, outline_data_to_unpacked_fun_map, code_ndrange); } // The arguments of the clauses 'ndrange' and 'shmem' must be updated because // they are not expressed in terms of the unpacked function parameters TL::ObjectList<Nodecl::NodeclBase> new_ndrange, new_shmem; update_ndrange_and_shmem_expressions( unpacked_function.get_related_scope(), target_info, outline_data_to_unpacked_fun_map, new_ndrange, new_shmem); // Prepare mapping for the call to the kernel TL::Source code_ndrange_aux; Nodecl::Utils::SimpleSymbolMap called_fun_to_unpacked_fun_map; const std::map<TL::Symbol, TL::Symbol>* called_fun_to_outline_data_map_simple = called_fun_to_outline_data_map->get_simple_symbol_map(); for (std::map<TL::Symbol, TL::Symbol>::const_iterator it = called_fun_to_outline_data_map_simple->begin(); it != called_fun_to_outline_data_map_simple->end(); it++) { TL::Symbol key = it->first; TL::Symbol value = outline_data_to_unpacked_fun_map->map(it->second.get_internal_symbol()); called_fun_to_unpacked_fun_map.add_map(key, value); } // The syntax of ndrange is // // ndrange(N, global-list [, local-list]) // // Each X-list has as much as N elements Nodecl::NodeclBase num_dims_expr = new_ndrange[0]; // N must be a constant if (!num_dims_expr.is_constant()) { fatal_printf_at(num_dims_expr.get_locus(), "first argument in 'ndrange' clause must be constant\n"); } // At this point we can remove "N" from the new_ndrange list (pop_front) new_ndrange.erase(new_ndrange.begin()); // N must be between 1 and 3 int num_dims = const_value_cast_to_signed_int(num_dims_expr.get_constant()); if (num_dims < 1 || num_dims > 3) { fatal_printf_at(num_dims_expr.get_locus(), "number of dimensions for 'ndrange' clause is not 1, 2 or 3\n"); } // Checking the number of remaining expressions in the new_ndrange list if (num_dims != (int)new_ndrange.size() && (num_dims * 2) != (int)new_ndrange.size()) { fatal_printf_at(num_dims_expr.get_locus(), "a 'ndrange(%d, argument-list)' clause requires %d or %d arguments in argument-list\n", num_dims, num_dims , num_dims * 2); } std::string compiler_options; if (CURRENT_CONFIGURATION->opencl_build_options != NULL) { compiler_options = std::string(CURRENT_CONFIGURATION->opencl_build_options); } // Create OpenCL kernel code_ndrange_aux << "nanos_err_t nanos_err;" << "void* ompss_kernel_ocl = nanos_create_current_kernel(\"" << kernel_name << "\",\"" << filename << "\"," << "\"" << compiler_options << "\");"; // Prepare setArgs TL::ObjectList<Nodecl::NodeclBase> global_list; TL::ObjectList<Nodecl::NodeclBase> local_list; unsigned int index_local = 0; TL::ObjectList<TL::Symbol> parameters_called = called_task.get_function_parameters(); for (unsigned int i = 0; i < parameters_called.size(); ++i) { TL::Symbol unpacked_argument = called_fun_to_unpacked_fun_map.map(parameters_called[i]); // The attribute __global is deduced: the current argument will be __global if it has any copies bool is_global = false; if (unpacked_argument.get_type().no_ref().is_pointer() || unpacked_argument.get_type().no_ref().is_array()) { for (TL::ObjectList<OutlineDataItem*>::const_iterator it = data_items.begin(); it != data_items.end() && !is_global; ++it) { TL::Symbol outline_data_item_sym = (*it)->get_symbol(); // If the outline data item has not a valid symbol, skip it if (!outline_data_item_sym.is_valid()) continue; // If the symbol of the current outline data item is not the // same as the unpacked_argument, skip it if(outline_data_to_unpacked_fun_map->map(outline_data_item_sym.get_internal_symbol()) != unpacked_argument) continue; is_global = !((*it)->get_copies().empty()); } } bool is_local = !is_global && unpacked_argument.get_type().no_ref().is_pointer(); if (is_global) { code_ndrange_aux << "nanos_err = nanos_opencl_set_bufferarg(" << "ompss_kernel_ocl, " << i << ", " << as_symbol(unpacked_argument) <<");"; } else if (is_local) { TL::Source sizeof_arg; if (index_local >= new_shmem.size()) { warn_printf_at(called_task.get_locus(), "the size of the local symbol '%s' has not been specified in the 'shmem' clause, assuming zero\n", unpacked_argument.get_name().c_str()); sizeof_arg << "0"; } else { sizeof_arg << as_expression(new_shmem[index_local]); } code_ndrange_aux << "nanos_err = nanos_opencl_set_arg(" << "ompss_kernel_ocl, " << i << ", " << sizeof_arg << ", " << "0);"; ++index_local; } else { code_ndrange_aux << "nanos_err = nanos_opencl_set_arg(" << "ompss_kernel_ocl, " << i << ", " << "sizeof(" << as_type(unpacked_argument.get_type().no_ref()) << "), " << "&" << as_symbol(unpacked_argument) <<");"; } } //Build arrays with information from ndrange clause or pointing to the ndrange pointers if (num_dims * 2 == (int)new_ndrange.size()) { // ndrange(global-list, local-list) int i = 0; for (; i < num_dims; i++) { global_list.append(new_ndrange[i]); } for (; i < num_dims*2; i++) { local_list.append(new_ndrange[i]); } } // locals are not specified here else if (num_dims == (int)new_ndrange.size()) { // ndrange(global-list) int i = 0; for (int k = 0; k < num_dims; k++, i++) { global_list.append(new_ndrange[i]); } } else { internal_error("Code unreachable", 0); } bool there_is_local_size = !local_list.empty(); // Prepare ndrange calc pointers and arrays if (there_is_local_size) { code_ndrange_aux << "size_t local_size_arr[" << num_dims << "];" ; } code_ndrange_aux << "size_t global_size_arr[" << num_dims << "];" ; for (int i = 0; i < num_dims; i++) { if (there_is_local_size) { code_ndrange_aux << "local_size_arr[" << i << "] = " << as_expression(local_list[i]) << ";" ; } code_ndrange_aux << "global_size_arr[" << i << "] = " << as_expression(global_list[i]) << ";" ; } if (there_is_local_size) { // Launch kernel/ it will be freed inside, with ndrange calculated inside the checkDim loop code_ndrange_aux << "nanos_err = nanos_exec_kernel(ompss_kernel_ocl, " << num_dims << ", local_size_arr, global_size_arr);" ; } else { // Let the runtime choose the best local size code_ndrange_aux << "nanos_err = nanos_profile_exec_kernel(ompss_kernel_ocl, " << num_dims << ", global_size_arr);" ; } if (IS_FORTRAN_LANGUAGE) { Source::source_language = SourceLanguage::C; Nodecl::NodeclBase code_ndrange_tree = code_ndrange_aux.parse_statement(unpacked_function.get_related_scope()); Source::source_language = SourceLanguage::Current; code_ndrange << as_statement(code_ndrange_tree); } else { code_ndrange << code_ndrange_aux; } }
TL::Source LoopUnroll::do_unroll() { if (!_for_stmt.regular_loop()) { return silly_unroll(); } // Get parts of the loop IdExpression induction_var = _for_stmt.get_induction_variable(); Expression lower_bound = _for_stmt.get_lower_bound(); Expression upper_bound = _for_stmt.get_upper_bound(); Expression step = _for_stmt.get_step(); TL::Source operator_bound = _for_stmt.get_bound_operator(); Statement loop_body = _for_stmt.get_loop_body(); TL::Source result, epilogue, main, induction_var_decl, before_main, after_main; std::stringstream ss; ss << _factor; result << "{" << induction_var_decl << before_main << main << after_main << epilogue << "}" ; Source replicated_body; Source epilogue_body; if (_factor > 1) { AST_t init = _for_stmt.get_iterating_init(); if (Declaration::predicate(init)) { TL::Symbol sym = induction_var.get_symbol(); TL::Type type = sym.get_type(); // Declare it since it will have local scope induction_var_decl << type.get_declaration(sym.get_scope(), sym.get_name()) << ";" ; } main << "for (" << induction_var << " = " << lower_bound << ";" << induction_var << operator_bound << "((" << upper_bound << ") - (" << _factor << " - 1)* (" << step << "));" << induction_var << "+= (" << step << ") * " << _factor << ")" << "{" << replicated_body << "}" ; // FIXME - It could help to initialize here another variable and make both loops independent epilogue << "for ( ; " // No initialization, keep using the old induction var << induction_var << operator_bound << upper_bound << ";" << induction_var << "+= (" << step << "))" << epilogue_body ; if (!_remove_tasks) { epilogue_body << loop_body; } else { std::cerr << "Do not create task " << __FILE__ << ":" << __LINE__ << std::endl; running_error("Path not supported yet", 0); // epilogue_body << loop_body.get_ast().prettyprint_with_callback(functor(ignore_tasks)); } } else { // Leave it as is main << "for(" << _for_stmt.get_iterating_init().prettyprint() << _for_stmt.get_iterating_condition() << ";" << _for_stmt.get_iterating_expression() << ")" << "{" << replicated_body << "}" ; } // Replicate the body bool consider_omp = false; if (TaskAggregation::contains_relevant_openmp(loop_body)) { consider_omp = true; } if (_ignore_omp || !consider_omp) { simple_replication(_factor, replicated_body, epilogue_body, induction_var, loop_body); } else { omp_replication(_factor, replicated_body, epilogue_body, induction_var, loop_body, before_main, after_main); } return result; }
bool LoweringVisitor::handle_reductions_on_task( Nodecl::NodeclBase construct, OutlineInfo& outline_info, Nodecl::NodeclBase statements, bool generate_final_stmts, Nodecl::NodeclBase& final_statements) { int num_reductions = 0; TL::Source reductions_stuff, final_clause_stuff, // This source represents an expression which is used to check if // we can do an optimization in the final code. This optimization // consists on calling the original code (with a serial closure) if // we are in a final context and the reduction variables that we // are using have not been registered previously final_clause_opt_expr, extra_array_red_memcpy; std::map<TL::Symbol, std::string> reduction_symbols_map; TL::ObjectList<OutlineDataItem*> data_items = outline_info.get_data_items(); for (TL::ObjectList<OutlineDataItem*>::iterator it = data_items.begin(); it != data_items.end(); it++) { if (!(*it)->is_reduction()) continue; std::pair<TL::OpenMP::Reduction*, TL::Type> red_info_pair = (*it)->get_reduction_info(); TL::OpenMP::Reduction* reduction_info = red_info_pair.first; TL::Type reduction_type = red_info_pair.second.no_ref(); TL::Symbol reduction_item = (*it)->get_symbol(); TL::Type reduction_item_type = reduction_item.get_type().no_ref(); std::string storage_var_name = (*it)->get_field_name() + "_storage"; TL::Type storage_var_type = reduction_type.get_pointer_to(); TL::Symbol reduction_function, reduction_function_original_var, initializer_function; // Checking if the current reduction type has been treated before // Note that if that happens we can reuse the combiner and // initializer function. // // C/C++: note that if the type of the list item is an array type, // we regiter the reduction over its element type TL::Type registered_reduction_type = reduction_type; while (!IS_FORTRAN_LANGUAGE && registered_reduction_type.is_array()) { registered_reduction_type = registered_reduction_type.array_element(); } LoweringVisitor::reduction_task_map_t::iterator task_red_info = _task_reductions_map.find(std::make_pair(reduction_info, registered_reduction_type)); if (task_red_info != _task_reductions_map.end()) { reduction_function = task_red_info->second._reducer; reduction_function_original_var = task_red_info->second._reducer_orig_var; initializer_function = task_red_info->second._initializer; } else { create_reduction_functions(reduction_info, construct, registered_reduction_type, reduction_item, reduction_function, reduction_function_original_var); create_initializer_function(reduction_info, construct, registered_reduction_type, initializer_function); _task_reductions_map.insert( std::make_pair( std::make_pair(reduction_info, registered_reduction_type), TaskReductionsInfo(reduction_function, reduction_function_original_var, initializer_function) )); } // Mandatory TL::Sources to be filled by any reduction TL::Source orig_address, // address of the original reduction variable storage_var; // variable which holds the address of the storage // Specific TL::Sources to be filled only by Fortran array reduction TL::Source extra_array_red_decl; if (IS_C_LANGUAGE || IS_CXX_LANGUAGE) { storage_var << storage_var_name; orig_address << (reduction_item_type.is_pointer() ? "" : "&") << (*it)->get_field_name(); final_clause_stuff << "if (" << storage_var_name << " == 0)" << "{" << storage_var_name << " = " << "(" << as_type(storage_var_type) << ")" << orig_address << ";" << "}" ; } else { orig_address << "&" << (*it)->get_field_name(); if (reduction_item_type.is_array()) { size_t size_of_array_descriptor = fortran_size_of_array_descriptor( fortran_get_rank0_type(reduction_item_type.get_internal_type()), fortran_get_rank_of_type(reduction_item_type.get_internal_type())); storage_var << storage_var_name << "_indirect"; extra_array_red_decl << "void *" << storage_var << ";"; extra_array_red_memcpy << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << storage_var << "," << size_of_array_descriptor << ");" ; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << "nanos_err = nanos_memcpy(" << "(void **) &" << storage_var_name << "," << "(void *) "<< orig_address << "," << size_of_array_descriptor << ");" << "}" << "else" << "{" << extra_array_red_memcpy << "}" ; } else { // We need to convert a void* type into a pointer to the reduction type. // As a void* in FORTRAN is represented as an INTEGER(8), we cannot do this // conversion directly in the FORTRAN source. For this reason we introduce // a new function that will be defined in a C file. TL::Symbol func = TL::Nanox::get_function_ptr_conversion( // Destination reduction_item_type.get_pointer_to(), // Origin TL::Type::get_void_type().get_pointer_to(), construct.retrieve_context()); storage_var << storage_var_name; final_clause_stuff << "if (" << storage_var << " == 0)" << "{" << storage_var_name << " = " << func.get_name() << "(" << orig_address << ");" << "}" ; } } if (num_reductions > 0) final_clause_opt_expr << " && "; final_clause_opt_expr << storage_var << " == 0 "; num_reductions++; reductions_stuff << extra_array_red_decl << as_type(storage_var_type) << " " << storage_var_name << ";" << "nanos_err = nanos_task_reduction_get_thread_storage(" << "(void *)" << orig_address << "," << "(void **) &" << storage_var << ");" ; reduction_symbols_map[reduction_item] = storage_var_name; } if (num_reductions != 0) { // Generating the final code if needed if (generate_final_stmts) { std::map<Nodecl::NodeclBase, Nodecl::NodeclBase>::iterator it4 = _final_stmts_map.find(construct); ERROR_CONDITION(it4 == _final_stmts_map.end(), "Unreachable code", 0); Nodecl::NodeclBase placeholder; TL::Source new_statements_src; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << "if (" << final_clause_opt_expr << ")" << "{" << as_statement(it4->second) << "}" << "else" << "{" << final_clause_stuff << statement_placeholder(placeholder) << "}" << "}" ; final_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); } // Generating the task code { TL::Source new_statements_src; Nodecl::NodeclBase placeholder; new_statements_src << "{" << "nanos_err_t nanos_err;" << reductions_stuff << extra_array_red_memcpy << statement_placeholder(placeholder) << "}" ; Nodecl::NodeclBase new_statements = handle_task_statements( construct, statements, placeholder, new_statements_src, reduction_symbols_map); statements.replace(new_statements); } } ERROR_CONDITION(num_reductions != 0 && !Nanos::Version::interface_is_at_least("task_reduction", 1001), "The version of the runtime begin used does not support task reductions", 0); return (num_reductions != 0); }
static bool check_symbol(TL::Symbol s) { return CheckIfInCudaCompiler::check(s.get_filename()); }