예제 #1
0
 void config_nd_range(viennacl::ocl::kernel & k, symbolic_expression_tree_base* p) {
     k.local_work_size(0,group_size0_);
     if(symbolic_vector_base* vec = dynamic_cast<symbolic_vector_base*>(p)) {
         k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(vec->real_size()/(vectorization_*loop_unroll_),group_size0_)); //Note: now using for-loop for good performance on CPU
     }
     else if(symbolic_matrix_base * mat = dynamic_cast<symbolic_matrix_base*>(p)) {
         k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(mat->real_size1() * mat->real_size2()/(vectorization_*loop_unroll_),group_size0_));
     }
 }
 void configure_local_sizes(viennacl::ocl::kernel & k, std::size_t /*kernel_id*/) const {
   k.local_work_size(0,local_size_1_);
   k.local_work_size(1,local_size_2_);
 }