Пример #1
0
int test_crs_matrix_test_singlevec(int numRows, int numCols, int nnz, int test, const char* filename, const bool binaryfile) {
        typedef Kokkos::CrsMatrix<Scalar,int,execution_space,void,int> matrix_type ;
        typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> mv_type;
        typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > mv_random_read_type;
        typedef typename mv_type::HostMirror h_mv_type;

        Scalar* val = NULL;
        int* row = NULL;
        int* col = NULL;

        srand(17312837);
        if(filename==NULL)
          nnz = SparseMatrix_generate<Scalar,int>(numRows,numCols,nnz,nnz/numRows*0.2,numRows*0.01,val,row,col);
        else
          if(!binaryfile)
            nnz = SparseMatrix_MatrixMarket_read<Scalar,int>(filename,numRows,numCols,nnz,val,row,col);
          else
            nnz = SparseMatrix_ReadBinaryGraph<Scalar,int>(filename,numRows,numCols,nnz,val,row,col);

        matrix_type A("CRS::A",numRows,numCols,nnz,val,row,col,false);

        mv_type x("X",numCols);
        mv_random_read_type t_x(x);
        mv_type y("Y",numRows);
        h_mv_type h_x = Kokkos::create_mirror_view(x);
        h_mv_type h_y = Kokkos::create_mirror_view(y);
        h_mv_type h_y_compare = Kokkos::create_mirror(y);
    typename matrix_type::StaticCrsGraphType::HostMirror h_graph = Kokkos::create_mirror(A.graph);
    typename matrix_type::values_type::HostMirror h_values = Kokkos::create_mirror_view(A.values);

    //Kokkos::deep_copy(h_graph.row_map,A.graph.row_map);
          //h_a(k) = (Scalar) (1.0*(rand()%40)-20.);
          for(int i=0; i<numCols;i++) {
                  h_x(i) = (Scalar) (1.0*(rand()%40)-20.);
                  h_y(i) = (Scalar) (1.0*(rand()%40)-20.);
          }
        for(int i=0;i<numRows;i++) {
                int start = h_graph.row_map(i);
                int end = h_graph.row_map(i+1);
                for(int j=start;j<end;j++) {
                   h_values(j) = h_graph.entries(j) + i;
                }
            h_y_compare(i) = 0;
                for(int j=start;j<end;j++) {
                   Scalar val = h_graph.entries(j) + i;
                   int idx = h_graph.entries(j);
                     h_y_compare(i)+=val*h_x(idx);
                }
        }

        Kokkos::deep_copy(x,h_x);
        Kokkos::deep_copy(y,h_y);
        Kokkos::deep_copy(A.graph.entries,h_graph.entries);
        Kokkos::deep_copy(A.values,h_values);
        /*for(int i=0;i<numRows;i++)
                for(int k = 0; k<numVecs; k++) {
          //error[k]+=(h_y_compare(i,k)-h_y(i,k))*(h_y_compare(i,k)-h_y(i,k));
          printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k));
                }*/
    typename Kokkos::CrsMatrix<Scalar,int,execution_space,void,int>::values_type x1("X1",numCols);
    typename Kokkos::CrsMatrix<Scalar,int,execution_space,void,int>::values_type y1("Y1",numRows);
#ifdef NEWKERNEL
          KokkosSparse::spmv("N",1.0,A,x1,0.0,y1);
#else
          Kokkos::MV_Multiply(y1,A,x1);
#endif

#ifdef NEWKERNEL
          KokkosSparse::spmv("N",1.0,A,x,0.0,y);
#else
          Kokkos::MV_Multiply(y,A,x);
#endif
        execution_space::fence();
        Kokkos::deep_copy(h_y,y);
        Scalar error = 0;
        Scalar sum = 0;
        for(int i=0;i<numRows;i++) {
          error+=(h_y_compare(i)-h_y(i))*(h_y_compare(i)-h_y(i));
          sum += h_y_compare(i)*h_y_compare(i);
         // printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k));
                }

        //for(int i=0;i<A.nnz;i++) printf("%i %lf\n",h_graph.entries(i),h_values(i));
    int num_errors = 0;
    double total_error = 0;
    double total_sum = 0;
                num_errors += (error/(sum==0?1:sum))>1e-5?1:0;
                total_error += error;
                total_sum += sum;

    int loop = 100;
    Kokkos::Impl::Timer timer;

        for(int i=0;i<loop;i++)
#ifdef NEWKERNEL
          KokkosSparse::spmv("N",1.0,A,x,0.0,y);
#else
        Kokkos::MV_Multiply(y,A,x);
#endif
        execution_space::fence();
        double time = timer.seconds();
        double matrix_size = 1.0*((nnz*(sizeof(Scalar)+sizeof(int)) + numRows*sizeof(int)))/1024/1024;
        double vector_size = 2.0*numRows*sizeof(Scalar)/1024/1024;
        double vector_readwrite = (nnz+numCols)*sizeof(Scalar)/1024/1024;

        double problem_size = matrix_size+vector_size;
    printf("%i %i %i %i %6.2lf MB %6.2lf GB/s %6.2lf GFlop/s %6.3lf ms %i\n",nnz, numRows,numCols,1,problem_size,(matrix_size+vector_readwrite)/time*loop/1024, 2.0*nnz*loop/time/1e9, time/loop*1000, num_errors);
        return (int)total_error;
}
Пример #2
0
int test_crs_matrix_test_singlevec(int numRows, int numCols, int nnz, int test, const char* filename, const bool binaryfile) {
	typedef KokkosArray::CrsMatrix<Scalar,int,device_type> matrix_type ;
	typedef typename KokkosArray::View<Scalar*,KokkosArray::LayoutLeft,device_type> mv_type;
	typedef typename KokkosArray::View<Scalar*,KokkosArray::LayoutLeft,device_type,KokkosArray::MemoryRandomRead> mv_random_read_type;
	typedef typename mv_type::HostMirror h_mv_type;

	Scalar* val = NULL;
	int* row = NULL;
	int* col = NULL;

	srand(17312837);
	if(filename==NULL)
	  nnz = SparseMatrix_generate<Scalar,int>(numRows,numCols,nnz,nnz/numRows*0.2,numRows*0.01,val,row,col);
	else
	  if(!binaryfile)
	    nnz = SparseMatrix_MatrixMarket_read<Scalar,int>(filename,numRows,numCols,nnz,val,row,col);
	  else
	    nnz = SparseMatrix_ReadBinaryGraph<Scalar,int>(filename,numRows,numCols,nnz,val,row,col);

	matrix_type A("CRS::A",numRows,numCols,nnz,val,row,col,false);

	mv_type x("X",numCols);
	mv_random_read_type t_x(x);
	mv_type y("Y",numRows);
	h_mv_type h_x = KokkosArray::create_mirror_view(x);
	h_mv_type h_y = KokkosArray::create_mirror_view(y);
	h_mv_type h_y_compare = KokkosArray::create_mirror(y);
    typename matrix_type::CrsArrayType::HostMirror h_graph = KokkosArray::create_mirror(A.graph);
    typename matrix_type::values_type::HostMirror h_values = KokkosArray::create_mirror_view(A.values);

    //KokkosArray::deep_copy(h_graph.row_map,A.graph.row_map);
	  //h_a(k) = (Scalar) (1.0*(rand()%40)-20.);
	  for(int i=0; i<numCols;i++) {
		  h_x(i) = (Scalar) (1.0*(rand()%40)-20.);
		  h_y(i) = (Scalar) (1.0*(rand()%40)-20.);
	  }
	for(int i=0;i<numRows;i++) {
		int start = h_graph.row_map(i);
		int end = h_graph.row_map(i+1);
		for(int j=start;j<end;j++) {
		   h_values(j) = h_graph.entries(j) + i;
		}
  	    h_y_compare(i) = 0;
		for(int j=start;j<end;j++) {
		   Scalar val = h_graph.entries(j) + i;
		   int idx = h_graph.entries(j);
  		     h_y_compare(i)+=val*h_x(idx);
		}
	}

	KokkosArray::deep_copy(x,h_x);
	KokkosArray::deep_copy(y,h_y);
	KokkosArray::deep_copy(A.graph.entries,h_graph.entries);
	KokkosArray::deep_copy(A.values,h_values);
	/*for(int i=0;i<numRows;i++)
		for(int k = 0; k<numVecs; k++) {
          //error[k]+=(h_y_compare(i,k)-h_y(i,k))*(h_y_compare(i,k)-h_y(i,k));
          printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k));
		}*/
    typename KokkosArray::CrsMatrix<Scalar,int,device_type>::values_type x1("X1",numCols);
    typename KokkosArray::CrsMatrix<Scalar,int,device_type>::values_type y1("Y1",numRows);
    KokkosArray::MV_Multiply(0.0,y1,1.0,A,x1);

	KokkosArray::MV_Multiply(0.0,y,1.0,A,x);
	device_type::fence();
	KokkosArray::deep_copy(h_y,y);
	Scalar error = 0;
	Scalar sum = 0;
	for(int i=0;i<numRows;i++) {
          error+=(h_y_compare(i)-h_y(i))*(h_y_compare(i)-h_y(i));
          sum += h_y_compare(i)*h_y_compare(i);
         // printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k));
		}

	//for(int i=0;i<A.nnz;i++) printf("%i %lf\n",h_graph.entries(i),h_values(i));
    int num_errors = 0;
    double total_error = 0;
    double total_sum = 0;
		num_errors += (error/(sum==0?1:sum))>1e-5?1:0;
		total_error += error;
		total_sum += sum;

    int loop = 10;
	timespec starttime,endtime;
    clock_gettime(CLOCK_REALTIME,&starttime);

	for(int i=0;i<loop;i++)
		KokkosArray::MV_Multiply(0.0,y,1.0,A,t_x);
	device_type::fence();
	clock_gettime(CLOCK_REALTIME,&endtime);
	double time = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
	double matrix_size = 1.0*((nnz*(sizeof(Scalar)+sizeof(int)) + numRows*sizeof(int)))/1024/1024;
	double vector_size = 2.0*numRows*sizeof(Scalar)/1024/1024;
	double vector_readwrite = 2.0*nnz*sizeof(Scalar)/1024/1024;

	double problem_size = matrix_size+vector_size;
    printf("%i %i %i %i %6.2lf MB %6.2lf GB/s %6.2lf ms %i\n",nnz, numRows,numCols,1,problem_size,(matrix_size+vector_readwrite)/time*loop/1024, time/loop*1000, num_errors);
	return (int)total_error;
}