void kk_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { typedef typename XType::non_const_value_type Scalar; typedef typename AType::execution_space execution_space; typedef KokkosSparse::CrsMatrix<const Scalar,int,execution_space,void,int> matrix_type ; typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> y_type; typedef typename Kokkos::View<const Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > x_type; int rows_per_team = launch_parameters<execution_space>(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); double s_a = 1.0; double s_b = 0.0; SPMV_Functor<matrix_type,x_type,y_type,0,false> func (s_a,A,x,s_b,y,rows_per_team); int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; Kokkos::TeamPolicy<Kokkos::Schedule<ScheduleType> > policy(1,1); if(team_size>0) policy = Kokkos::TeamPolicy<Kokkos::Schedule<ScheduleType> >(worksets,team_size,vector_length); else policy = Kokkos::TeamPolicy<Kokkos::Schedule<ScheduleType> >(worksets,Kokkos::AUTO,vector_length); Kokkos::parallel_for(policy,func); }
void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { typedef typename XType::non_const_value_type Scalar; typedef typename AType::execution_space execution_space; typedef KokkosSparse::CrsMatrix<const Scalar,int,execution_space,void,int> matrix_type ; typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> y_type; typedef typename Kokkos::View<const Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > x_type; //int rows_per_team = launch_parameters<execution_space>(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); //static int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; static int worksets = std::is_same<Schedule,Kokkos::Static>::value ? team_size>0?execution_space::concurrency()/team_size:execution_space::concurrency() : //static team_size>0?execution_space::concurrency()*32/team_size:execution_space::concurrency()*32 ; //dynamic static Kokkos::View<int*> workset_offsets; if(workset_offsets.extent(0) == 0) { workset_offsets = Kokkos::View<int*> ("WorksetOffsets",worksets+1); const size_t nnz = A.nnz(); int nnz_per_workset = (nnz+worksets-1)/worksets; workset_offsets(0) = 0; int ws = 1; for(int row = 0; row<A.numRows(); row++) { if(A.graph.row_map(row) > ws*nnz_per_workset) { workset_offsets(ws) = row; ws++; } } if(workset_offsets(ws-1) < A.numRows()) { workset_offsets(ws) = A.numRows(); } printf("Worksets: %i %i\n",worksets,ws); worksets = ws; } double s_a = 1.0; double s_b = 0.0; SPMV_Inspector_Functor<matrix_type,x_type,y_type,0,false,int> func (s_a,A,x,workset_offsets,s_b,y); Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> > policy(1,1); if(team_size>0) policy = Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> >(worksets,team_size,vector_length); else policy = Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> >(worksets,Kokkos::AUTO,vector_length); Kokkos::parallel_for("KokkosSparse::PerfTest::SpMV_Inspector", policy,func); }