Пример #1
0
void kk_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) {

  typedef typename XType::non_const_value_type Scalar;
  typedef typename AType::execution_space execution_space;
  typedef KokkosSparse::CrsMatrix<const Scalar,int,execution_space,void,int> matrix_type ;
  typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> y_type;
  typedef typename Kokkos::View<const Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > x_type;

  int rows_per_team = launch_parameters<execution_space>(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length);

  double s_a = 1.0;
  double s_b = 0.0;
  SPMV_Functor<matrix_type,x_type,y_type,0,false> func (s_a,A,x,s_b,y,rows_per_team);

  int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team;

  Kokkos::TeamPolicy<Kokkos::Schedule<ScheduleType> > policy(1,1);

  if(team_size>0)
    policy = Kokkos::TeamPolicy<Kokkos::Schedule<ScheduleType> >(worksets,team_size,vector_length);
  else
    policy = Kokkos::TeamPolicy<Kokkos::Schedule<ScheduleType> >(worksets,Kokkos::AUTO,vector_length);

  Kokkos::parallel_for(policy,func);
}
void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) {

  typedef typename XType::non_const_value_type Scalar;
  typedef typename AType::execution_space execution_space;
  typedef KokkosSparse::CrsMatrix<const Scalar,int,execution_space,void,int> matrix_type ;
  typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> y_type;
  typedef typename Kokkos::View<const Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > x_type;

  //int rows_per_team = launch_parameters<execution_space>(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length);
  //static int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team;
  static int worksets = std::is_same<Schedule,Kokkos::Static>::value ?
                        team_size>0?execution_space::concurrency()/team_size:execution_space::concurrency() : //static
                        team_size>0?execution_space::concurrency()*32/team_size:execution_space::concurrency()*32 ; //dynamic
  static Kokkos::View<int*> workset_offsets;
  if(workset_offsets.extent(0) == 0) {
    workset_offsets = Kokkos::View<int*> ("WorksetOffsets",worksets+1);
    const size_t nnz = A.nnz();
    int nnz_per_workset = (nnz+worksets-1)/worksets;
    workset_offsets(0) = 0;
    int ws = 1;
    for(int row = 0; row<A.numRows(); row++) {
      if(A.graph.row_map(row) > ws*nnz_per_workset) {
        workset_offsets(ws) = row;
        ws++;
      }
    }
    if(workset_offsets(ws-1) < A.numRows()) {
      workset_offsets(ws) = A.numRows();
    }
    printf("Worksets: %i %i\n",worksets,ws);
    worksets = ws;
  }
  double s_a = 1.0;
  double s_b = 0.0;
  SPMV_Inspector_Functor<matrix_type,x_type,y_type,0,false,int> func (s_a,A,x,workset_offsets,s_b,y);

  Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> > policy(1,1);

  if(team_size>0)
    policy = Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> >(worksets,team_size,vector_length);
  else
    policy = Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> >(worksets,Kokkos::AUTO,vector_length);

  Kokkos::parallel_for("KokkosSparse::PerfTest::SpMV_Inspector", policy,func);
}