void config_nd_range(viennacl::ocl::kernel & k, symbolic_expression_tree_base* p) { k.local_work_size(0,group_size0_); if(symbolic_vector_base* vec = dynamic_cast<symbolic_vector_base*>(p)) { k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(vec->real_size()/(vectorization_*loop_unroll_),group_size0_)); //Note: now using for-loop for good performance on CPU } else if(symbolic_matrix_base * mat = dynamic_cast<symbolic_matrix_base*>(p)) { k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(mat->real_size1() * mat->real_size2()/(vectorization_*loop_unroll_),group_size0_)); } }
void configure_local_sizes(viennacl::ocl::kernel & k, std::size_t /*kernel_id*/) const { k.local_work_size(0,local_size_1_); k.local_work_size(1,local_size_2_); }