void configure_impl(vcl_size_t /*kernel_id*/, viennacl::ocl::context & /*context*/, statements_container const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const { k.global_work_size(0,parameters_.local_size_0*parameters_.num_groups); k.global_work_size(1,1); cl_uint size = static_cast<cl_uint>(get_vector_size(statements.data().front())); k.arg(n_arg++, size/parameters_.simd_width); }
void config_nd_range(viennacl::ocl::kernel & k, symbolic_expression_tree_base* p) { k.local_work_size(0,group_size0_); if(symbolic_vector_base* vec = dynamic_cast<symbolic_vector_base*>(p)) { k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(vec->real_size()/(vectorization_*loop_unroll_),group_size0_)); //Note: now using for-loop for good performance on CPU } else if(symbolic_matrix_base * mat = dynamic_cast<symbolic_matrix_base*>(p)) { k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(mat->real_size1() * mat->real_size2()/(vectorization_*loop_unroll_),group_size0_)); } }
void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ configure_local_sizes(k, kernel_id); k.global_work_size(0,local_size_1_*num_groups_); k.global_work_size(1,1); scheduler::statement_node const & first_node = statements.front().second; viennacl::vcl_size_t N = utils::call_on_vector(first_node.lhs, utils::internal_size_fun()); k.arg(n_arg++, cl_uint(N/vector_size_)); }
void configure_range_enqueue_arguments(std::size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ configure_local_sizes(k, kernel_id); k.global_work_size(0,group_size_row_*num_groups_row_); k.global_work_size(1,group_size_col_*num_groups_col_); scheduler::statement_node const & first_node = statements.front().second; k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun()))); k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun()))); }