int test_crs_matrix_test_singlevec(int numRows, int numCols, int nnz, int test, const char* filename, const bool binaryfile) { typedef Kokkos::CrsMatrix<Scalar,int,execution_space,void,int> matrix_type ; typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> mv_type; typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > mv_random_read_type; typedef typename mv_type::HostMirror h_mv_type; Scalar* val = NULL; int* row = NULL; int* col = NULL; srand(17312837); if(filename==NULL) nnz = SparseMatrix_generate<Scalar,int>(numRows,numCols,nnz,nnz/numRows*0.2,numRows*0.01,val,row,col); else if(!binaryfile) nnz = SparseMatrix_MatrixMarket_read<Scalar,int>(filename,numRows,numCols,nnz,val,row,col); else nnz = SparseMatrix_ReadBinaryGraph<Scalar,int>(filename,numRows,numCols,nnz,val,row,col); matrix_type A("CRS::A",numRows,numCols,nnz,val,row,col,false); mv_type x("X",numCols); mv_random_read_type t_x(x); mv_type y("Y",numRows); h_mv_type h_x = Kokkos::create_mirror_view(x); h_mv_type h_y = Kokkos::create_mirror_view(y); h_mv_type h_y_compare = Kokkos::create_mirror(y); typename matrix_type::StaticCrsGraphType::HostMirror h_graph = Kokkos::create_mirror(A.graph); typename matrix_type::values_type::HostMirror h_values = Kokkos::create_mirror_view(A.values); //Kokkos::deep_copy(h_graph.row_map,A.graph.row_map); //h_a(k) = (Scalar) (1.0*(rand()%40)-20.); for(int i=0; i<numCols;i++) { h_x(i) = (Scalar) (1.0*(rand()%40)-20.); h_y(i) = (Scalar) (1.0*(rand()%40)-20.); } for(int i=0;i<numRows;i++) { int start = h_graph.row_map(i); int end = h_graph.row_map(i+1); for(int j=start;j<end;j++) { h_values(j) = h_graph.entries(j) + i; } h_y_compare(i) = 0; for(int j=start;j<end;j++) { Scalar val = h_graph.entries(j) + i; int idx = h_graph.entries(j); h_y_compare(i)+=val*h_x(idx); } } Kokkos::deep_copy(x,h_x); Kokkos::deep_copy(y,h_y); Kokkos::deep_copy(A.graph.entries,h_graph.entries); Kokkos::deep_copy(A.values,h_values); /*for(int i=0;i<numRows;i++) for(int k = 0; k<numVecs; k++) { //error[k]+=(h_y_compare(i,k)-h_y(i,k))*(h_y_compare(i,k)-h_y(i,k)); printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k)); }*/ typename Kokkos::CrsMatrix<Scalar,int,execution_space,void,int>::values_type x1("X1",numCols); typename Kokkos::CrsMatrix<Scalar,int,execution_space,void,int>::values_type y1("Y1",numRows); #ifdef NEWKERNEL KokkosSparse::spmv("N",1.0,A,x1,0.0,y1); #else Kokkos::MV_Multiply(y1,A,x1); #endif #ifdef NEWKERNEL KokkosSparse::spmv("N",1.0,A,x,0.0,y); #else Kokkos::MV_Multiply(y,A,x); #endif execution_space::fence(); Kokkos::deep_copy(h_y,y); Scalar error = 0; Scalar sum = 0; for(int i=0;i<numRows;i++) { error+=(h_y_compare(i)-h_y(i))*(h_y_compare(i)-h_y(i)); sum += h_y_compare(i)*h_y_compare(i); // printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k)); } //for(int i=0;i<A.nnz;i++) printf("%i %lf\n",h_graph.entries(i),h_values(i)); int num_errors = 0; double total_error = 0; double total_sum = 0; num_errors += (error/(sum==0?1:sum))>1e-5?1:0; total_error += error; total_sum += sum; int loop = 100; Kokkos::Impl::Timer timer; for(int i=0;i<loop;i++) #ifdef NEWKERNEL KokkosSparse::spmv("N",1.0,A,x,0.0,y); #else Kokkos::MV_Multiply(y,A,x); #endif execution_space::fence(); double time = timer.seconds(); double matrix_size = 1.0*((nnz*(sizeof(Scalar)+sizeof(int)) + numRows*sizeof(int)))/1024/1024; double vector_size = 2.0*numRows*sizeof(Scalar)/1024/1024; double vector_readwrite = (nnz+numCols)*sizeof(Scalar)/1024/1024; double problem_size = matrix_size+vector_size; printf("%i %i %i %i %6.2lf MB %6.2lf GB/s %6.2lf GFlop/s %6.3lf ms %i\n",nnz, numRows,numCols,1,problem_size,(matrix_size+vector_readwrite)/time*loop/1024, 2.0*nnz*loop/time/1e9, time/loop*1000, num_errors); return (int)total_error; }
int test_crs_matrix_test_singlevec(int numRows, int numCols, int nnz, int test, const char* filename, const bool binaryfile) { typedef KokkosArray::CrsMatrix<Scalar,int,device_type> matrix_type ; typedef typename KokkosArray::View<Scalar*,KokkosArray::LayoutLeft,device_type> mv_type; typedef typename KokkosArray::View<Scalar*,KokkosArray::LayoutLeft,device_type,KokkosArray::MemoryRandomRead> mv_random_read_type; typedef typename mv_type::HostMirror h_mv_type; Scalar* val = NULL; int* row = NULL; int* col = NULL; srand(17312837); if(filename==NULL) nnz = SparseMatrix_generate<Scalar,int>(numRows,numCols,nnz,nnz/numRows*0.2,numRows*0.01,val,row,col); else if(!binaryfile) nnz = SparseMatrix_MatrixMarket_read<Scalar,int>(filename,numRows,numCols,nnz,val,row,col); else nnz = SparseMatrix_ReadBinaryGraph<Scalar,int>(filename,numRows,numCols,nnz,val,row,col); matrix_type A("CRS::A",numRows,numCols,nnz,val,row,col,false); mv_type x("X",numCols); mv_random_read_type t_x(x); mv_type y("Y",numRows); h_mv_type h_x = KokkosArray::create_mirror_view(x); h_mv_type h_y = KokkosArray::create_mirror_view(y); h_mv_type h_y_compare = KokkosArray::create_mirror(y); typename matrix_type::CrsArrayType::HostMirror h_graph = KokkosArray::create_mirror(A.graph); typename matrix_type::values_type::HostMirror h_values = KokkosArray::create_mirror_view(A.values); //KokkosArray::deep_copy(h_graph.row_map,A.graph.row_map); //h_a(k) = (Scalar) (1.0*(rand()%40)-20.); for(int i=0; i<numCols;i++) { h_x(i) = (Scalar) (1.0*(rand()%40)-20.); h_y(i) = (Scalar) (1.0*(rand()%40)-20.); } for(int i=0;i<numRows;i++) { int start = h_graph.row_map(i); int end = h_graph.row_map(i+1); for(int j=start;j<end;j++) { h_values(j) = h_graph.entries(j) + i; } h_y_compare(i) = 0; for(int j=start;j<end;j++) { Scalar val = h_graph.entries(j) + i; int idx = h_graph.entries(j); h_y_compare(i)+=val*h_x(idx); } } KokkosArray::deep_copy(x,h_x); KokkosArray::deep_copy(y,h_y); KokkosArray::deep_copy(A.graph.entries,h_graph.entries); KokkosArray::deep_copy(A.values,h_values); /*for(int i=0;i<numRows;i++) for(int k = 0; k<numVecs; k++) { //error[k]+=(h_y_compare(i,k)-h_y(i,k))*(h_y_compare(i,k)-h_y(i,k)); printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k)); }*/ typename KokkosArray::CrsMatrix<Scalar,int,device_type>::values_type x1("X1",numCols); typename KokkosArray::CrsMatrix<Scalar,int,device_type>::values_type y1("Y1",numRows); KokkosArray::MV_Multiply(0.0,y1,1.0,A,x1); KokkosArray::MV_Multiply(0.0,y,1.0,A,x); device_type::fence(); KokkosArray::deep_copy(h_y,y); Scalar error = 0; Scalar sum = 0; for(int i=0;i<numRows;i++) { error+=(h_y_compare(i)-h_y(i))*(h_y_compare(i)-h_y(i)); sum += h_y_compare(i)*h_y_compare(i); // printf("%i %i %lf %lf %lf\n",i,k,h_y_compare(i,k),h_y(i,k),h_x(i,k)); } //for(int i=0;i<A.nnz;i++) printf("%i %lf\n",h_graph.entries(i),h_values(i)); int num_errors = 0; double total_error = 0; double total_sum = 0; num_errors += (error/(sum==0?1:sum))>1e-5?1:0; total_error += error; total_sum += sum; int loop = 10; timespec starttime,endtime; clock_gettime(CLOCK_REALTIME,&starttime); for(int i=0;i<loop;i++) KokkosArray::MV_Multiply(0.0,y,1.0,A,t_x); device_type::fence(); clock_gettime(CLOCK_REALTIME,&endtime); double time = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; double matrix_size = 1.0*((nnz*(sizeof(Scalar)+sizeof(int)) + numRows*sizeof(int)))/1024/1024; double vector_size = 2.0*numRows*sizeof(Scalar)/1024/1024; double vector_readwrite = 2.0*nnz*sizeof(Scalar)/1024/1024; double problem_size = matrix_size+vector_size; printf("%i %i %i %i %6.2lf MB %6.2lf GB/s %6.2lf ms %i\n",nnz, numRows,numCols,1,problem_size,(matrix_size+vector_readwrite)/time*loop/1024, time/loop*1000, num_errors); return (int)total_error; }