void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Fetches entries to registers std::set<std::string> fetched; for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit) //Useless to fetch cpu scalars into registers if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(iit->second.get())) p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream); //Generates all the expression, in order vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++])); stream << str << ";" << std::endl; } //Writes back for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) //Gets the mapped object at the LHS of each expression if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get())) p->write_back( std::make_pair("i", "0"), fetched, stream); stream.dec_tab(); stream << "}" << std::endl; }
void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){ for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){ if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get())) p->bind_sizes("M","N"); } } stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Fetches entries to registers std::set<std::string> fetched; for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2) if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix *>(it2->second.get())) p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream); vcl_size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ std::string str; detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++])); stream << str << ";" << std::endl; } //Writes back for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get())) p->write_back(std::make_pair("i", "j"), fetched, stream); } stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; }
virtual void operator()(utils::kernel_generation_stream & stream, std::size_t device_offset, statements_type const & statements) const { std::vector<detail::mapping_type> mapping(statements.size()); ///Get Prototype, initialize mapping std::string prototype; std::set<std::string> already_generated; kernel_arguments(statements, prototype); { std::map<void *, std::size_t> memory; unsigned int current_arg = 0; std::size_t i = 0; for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) detail::traverse(it->first, it->second, detail::map_functor(memory,current_arg,mapping[i++])); } for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ detail::traverse(it->first, it->second, detail::prototype_generation_traversal(already_generated, prototype, vectorization(), mapping[std::distance(statements.begin(), it)])); } prototype.erase(prototype.size()-1); //Last comma pruned //Generate for(std::size_t n = 0 ; n < num_kernels() ; ++n){ //stream << "__attribute__((vec_type_hint()))" << std::endl; stream << " __attribute__((reqd_work_group_size(" << local_size_1_ << "," << local_size_2_ << "," << 1 << ")))" << std::endl; stream << "__kernel " << "void " << "kernel_" << device_offset << "_" << n << "(" << std::endl; stream << prototype << std::endl; stream << ")" << std::endl; //core: stream << "{" << std::endl; stream.inc_tab(); core(n, stream, statements, mapping); stream.dec_tab(); stream << "}" << std::endl; } }