void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Fetches entries to registers std::set<std::string> fetched; for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit) //Useless to fetch cpu scalars into registers if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(iit->second.get())) p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream); //Generates all the expression, in order vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++])); stream << str << ";" << std::endl; } //Writes back for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) //Gets the mapped object at the LHS of each expression if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get())) p->write_back( std::make_pair("i", "0"), fetched, stream); stream.dec_tab(); stream << "}" << std::endl; }
void core(unsigned int /*kernel_id*/, utils::kernel_generation_stream& stream, statements_container const & statements, std::vector<mapping_type> const & mapping) const { statements_container::data_type::const_iterator sit; std::vector<mapping_type>::const_iterator mit; stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Registers already allocated std::set<std::string> cache; //Fetch std::string rhs_suffix = "reg"; std::string lhs_suffix = statements.order()==statements_container::INDEPENDENT?"tmp":rhs_suffix; for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit) { tree_parsing::read_write(tree_parsing::read_write_traversal::FETCH, parameters_.simd_width, lhs_suffix, cache, *sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::LHS_NODE_TYPE); tree_parsing::read_write(tree_parsing::read_write_traversal::FETCH, parameters_.simd_width, rhs_suffix, cache, *sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::RHS_NODE_TYPE); } //Generates all the expression, in order for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit) stream << tree_parsing::evaluate_expression(*sit, sit->root(), index_tuple("i", "N"), 0, *mit, tree_parsing::PARENT_NODE_TYPE) << ";" << std::endl; //Write back for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit) { tree_parsing::read_write(tree_parsing::read_write_traversal::WRITE_BACK, parameters_.simd_width, lhs_suffix, cache,*sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::LHS_NODE_TYPE); } stream.dec_tab(); stream << "}" << std::endl; }
void generate_body_impl(unsigned int i, utils::kernel_generation_stream& kss){ saxpy_vector_profile const * casted_prof = static_cast<saxpy_vector_profile const *>(prof_); symbolic_vector_base * first_vector = static_cast<symbolic_vector_base*>(&(*expressions_.begin())->lhs()); unsigned int n_unroll = casted_prof->loop_unroll(); kss << "int i = get_global_id(0)" ; if(n_unroll>1) kss << "*" << n_unroll; kss << ";" << std::endl; kss << "if(i<" << first_vector->size() << "){" << std::endl; kss.inc_tab(); //Set access indices for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it=expressions_.begin() ; it!=expressions_.end();++it) for(unsigned int j=0 ; j < n_unroll ; ++j){ if(j==0) (*it)->access_index(j,"i","0"); else (*it)->access_index(j,"i + " + utils::to_string(j),"0"); (*it)->fetch(j,kss); } //Compute expressions for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it=expressions_.begin() ; it!=expressions_.end();++it) for(unsigned int j=0 ; j < n_unroll ; ++j) kss << (*it)->generate(j) << ";" << std::endl; for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it=expressions_.begin() ; it!=expressions_.end();++it) for(unsigned int j=0 ; j < n_unroll ; ++j) (*it)->write_back(j,kss); kss << "}" << std::endl; kss.dec_tab(); for(unsigned int i=0 ; i < n_unroll ; ++i) for(std::list<tools::shared_ptr<symbolic_binary_expression_tree_infos_base> >::iterator it = expressions_.begin(); it != expressions_.end() ; ++it) (*it)->clear_private_value(i); }
inline void reduce_1d_local_memory(utils::kernel_generation_stream & stream, std::size_t size, std::vector<std::string> const & bufs, std::vector<scheduler::op_element> const & rops) { //Reduce local memory for(std::size_t stride = size/2 ; stride>0 ; stride /=2){ stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; stream << "if(lid < " << stride << "){" << std::endl; stream.inc_tab(); for(std::size_t k = 0 ; k < bufs.size() ; ++k){ std::string acc = bufs[k] + "[lid]"; std::string str = bufs[k] + "[lid + " + utils::to_string(stride) + "]"; compute_reduction(stream,acc,str,rops[k]); } stream.dec_tab(); stream << "}" << std::endl; } }
void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){ for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){ if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get())) p->bind_sizes("M","N"); } } stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Fetches entries to registers std::set<std::string> fetched; for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2) if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix *>(it2->second.get())) p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream); vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++])); stream << str << ";" << std::endl; } //Writes back for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get())) p->write_back(std::make_pair("i", "j"), fetched, stream); } stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; }
virtual void operator()(utils::kernel_generation_stream & stream, std::size_t device_offset, statements_type const & statements) const { std::vector<detail::mapping_type> mapping(statements.size()); ///Get Prototype, initialize mapping std::string prototype; std::set<std::string> already_generated; kernel_arguments(statements, prototype); { std::map<void *, std::size_t> memory; unsigned int current_arg = 0; std::size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) detail::traverse(it->first, it->second, detail::map_functor(memory,current_arg,mapping[i++])); } for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ detail::traverse(it->first, it->second, detail::prototype_generation_traversal(already_generated, prototype, vectorization(), mapping[std::distance(statements.begin(), it)])); } prototype.erase(prototype.size()-1); //Last comma pruned //Generate for(std::size_t n = 0 ; n < num_kernels() ; ++n){ //stream << "__attribute__((vec_type_hint()))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << local_size_1_ << "," << local_size_2_ << "," << 1 << ")))" << std::endl; stream << "__kernel " << "void " << "kernel_" << device_offset << "_" << n << "(" << std::endl; stream << prototype << std::endl; stream << ")" << std::endl; //core: stream << "{" << std::endl; stream.inc_tab(); core(n, stream, statements, mapping); stream.dec_tab(); stream << "}" << std::endl; } }
void operator()(utils::kernel_generation_stream& kss) { unsigned int n_unroll = profile_->loop_unroll(); symbolic_vector_base * first_vector = NULL; symbolic_matrix_base * first_matrix = NULL; if(vector_expressions_.size()) first_vector = static_cast<symbolic_vector_base*>(&(*vector_expressions_.begin())->lhs()); if(matrix_expressions_.size()) first_matrix = static_cast<symbolic_matrix_base*>(&(*matrix_expressions_.begin())->lhs()); if(first_vector) { kss << "int i = get_global_id(0)" ; if(n_unroll>1) kss << "*" << n_unroll; kss << ";" << std::endl; // kss << "if(i < " << first_vector->size() << "){" << std::endl; kss.inc_tab(); //Set access indices for(typename std::list<symbolic_binary_vector_expression_base*>::iterator it=vector_expressions_.begin() ; it!=vector_expressions_.end(); ++it) { for(unsigned int j=0 ; j < n_unroll ; ++j) { if(j==0) (*it)->access_index(j,"i","0"); else (*it)->access_index(j,"i + " + utils::to_string(j),"0"); (*it)->fetch(j,kss); } } //Compute expressions for(typename std::list<symbolic_binary_vector_expression_base*>::iterator it=vector_expressions_.begin() ; it!=vector_expressions_.end(); ++it) { for(unsigned int j=0 ; j < n_unroll ; ++j) { kss << (*it)->generate(j) << ";" << std::endl; } } for(typename std::list<symbolic_binary_vector_expression_base*>::iterator it=vector_expressions_.begin() ; it!=vector_expressions_.end(); ++it) { for(unsigned int j=0 ; j < n_unroll ; ++j) { (*it)->write_back(j,kss); } } kss.dec_tab(); // kss << "}" << std::endl; } if(first_matrix) { if(first_matrix->is_rowmajor()) { kss << "unsigned int r = get_global_id(0)/" << first_matrix->internal_size2() << ";" << std::endl; kss << "unsigned int c = get_global_id(0)%" << first_matrix->internal_size2() << ";" << std::endl; } else { kss << "unsigned int r = get_global_id(0)%" << first_matrix->internal_size1() << ";" << std::endl; kss << "unsigned int c = get_global_id(0)/" << first_matrix->internal_size1() << ";" << std::endl; } kss << "if(r < " << first_matrix->internal_size1() << "){" << std::endl; kss.inc_tab(); //Set access indices for(typename std::list<symbolic_binary_matrix_expression_base*>::iterator it=matrix_expressions_.begin() ; it!=matrix_expressions_.end(); ++it) { (*it)->access_index(0,"r","c"); (*it)->fetch(0,kss); } //Compute expressions for(std::list<symbolic_binary_matrix_expression_base*>::iterator it = matrix_expressions_.begin(); it!=matrix_expressions_.end(); ++it) kss << (*it)->generate(0) << ";" << std::endl; for(typename std::list<symbolic_binary_matrix_expression_base*>::iterator it=matrix_expressions_.begin() ; it!=matrix_expressions_.end(); ++it) (*it)->write_back(0,kss); kss.dec_tab(); kss << "}" << std::endl; } for(unsigned int i=0 ; i < n_unroll ; ++i) { for(std::list<symbolic_binary_vector_expression_base*>::iterator it = vector_expressions_.begin(); it != vector_expressions_.end() ; ++it) (*it)->clear_private_value(i); for(std::list<symbolic_binary_matrix_expression_base*>::iterator it = matrix_expressions_.begin(); it != matrix_expressions_.end() ; ++it) (*it)->clear_private_value(i); for(std::list<symbolic_binary_scalar_expression_base*>::iterator it = scalar_expressions_.begin() ; it != scalar_expressions_.end(); ++it) (*it)->clear_private_value(i); } }