void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements) { viennacl::kernel & kernel = programs[0].program().kernel(kernel_prefix); kernel.local_work_size(0, p_.local_size_0); kernel.local_work_size(1, p_.local_size_1); kernel.global_work_size(0,p_.local_size_0*p_.num_groups_0); kernel.global_work_size(1,p_.local_size_1*p_.num_groups_1); scheduler::statement_node const & root = statements.data().front().array()[statements.data().front().root()]; unsigned int current_arg = 0; if (up_to_internal_size_) { kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::internal_size1_fun()))); kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::internal_size2_fun()))); } else { kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::size1_fun()))); kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::size2_fun()))); } set_arguments(statements, kernel, current_arg); kernel.enqueue(); }
void configure_impl(vcl_size_t /*kernel_id*/, viennacl::ocl::context & /*context*/, statements_container const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const { k.global_work_size(0,parameters_.local_size_0*parameters_.num_groups); k.global_work_size(1,1); cl_uint size = static_cast<cl_uint>(get_vector_size(statements.data().front())); k.arg(n_arg++, size/parameters_.simd_width); }
void core(unsigned int /*kernel_id*/, utils::kernel_generation_stream& stream, statements_container const & statements, std::vector<mapping_type> const & mapping) const { statements_container::data_type::const_iterator sit; std::vector<mapping_type>::const_iterator mit; stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl; stream << "{" << std::endl; stream.inc_tab(); //Registers already allocated std::set<std::string> cache; //Fetch std::string rhs_suffix = "reg"; std::string lhs_suffix = statements.order()==statements_container::INDEPENDENT?"tmp":rhs_suffix; for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit) { tree_parsing::read_write(tree_parsing::read_write_traversal::FETCH, parameters_.simd_width, lhs_suffix, cache, *sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::LHS_NODE_TYPE); tree_parsing::read_write(tree_parsing::read_write_traversal::FETCH, parameters_.simd_width, rhs_suffix, cache, *sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::RHS_NODE_TYPE); } //Generates all the expression, in order for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit) stream << tree_parsing::evaluate_expression(*sit, sit->root(), index_tuple("i", "N"), 0, *mit, tree_parsing::PARENT_NODE_TYPE) << ";" << std::endl; //Write back for(mit = mapping.begin(), sit = statements.data().begin() ; sit != statements.data().end() ; ++sit, ++mit) { tree_parsing::read_write(tree_parsing::read_write_traversal::WRITE_BACK, parameters_.simd_width, lhs_suffix, cache,*sit, sit->root(), index_tuple("i", "N"), stream, *mit, tree_parsing::LHS_NODE_TYPE); } stream.dec_tab(); stream << "}" << std::endl; }