void configure_impl(vcl_size_t /*kernel_id*/, viennacl::ocl::context & /*context*/, statements_container const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg)  const
 {
   k.global_work_size(0,parameters_.local_size_0*parameters_.num_groups);
   k.global_work_size(1,1);
   cl_uint size = static_cast<cl_uint>(get_vector_size(statements.data().front()));
   k.arg(n_arg++, size/parameters_.simd_width);
 }
Beispiel #2
0
 void config_nd_range(viennacl::ocl::kernel & k, symbolic_expression_tree_base* p) {
     k.local_work_size(0,group_size0_);
     if(symbolic_vector_base* vec = dynamic_cast<symbolic_vector_base*>(p)) {
         k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(vec->real_size()/(vectorization_*loop_unroll_),group_size0_)); //Note: now using for-loop for good performance on CPU
     }
     else if(symbolic_matrix_base * mat = dynamic_cast<symbolic_matrix_base*>(p)) {
         k.global_work_size(0,viennacl::tools::roundUpToNextMultiple<cl_uint>(mat->real_size1() * mat->real_size2()/(vectorization_*loop_unroll_),group_size0_));
     }
 }
Beispiel #3
0
        void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type  const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg)  const{
          configure_local_sizes(k, kernel_id);

          k.global_work_size(0,local_size_1_*num_groups_);
          k.global_work_size(1,1);

          scheduler::statement_node const & first_node = statements.front().second;
          viennacl::vcl_size_t N = utils::call_on_vector(first_node.lhs, utils::internal_size_fun());
          k.arg(n_arg++, cl_uint(N/vector_size_));
        }
Beispiel #4
0
        void configure_range_enqueue_arguments(std::size_t kernel_id, statements_type  const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg)  const{
          configure_local_sizes(k, kernel_id);

          k.global_work_size(0,group_size_row_*num_groups_row_);
          k.global_work_size(1,group_size_col_*num_groups_col_);

          scheduler::statement_node const & first_node = statements.front().second;
          k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun())));
          k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun())));
        }