int main(){ TYPE nzval[NNZ]; TYPE x[N]; TYPE y[N]; int colind[NNZ]; int rowptr[N+1]; int i; srand48(8650341L); fillVal(nzval); fill(x); initMat(colind, rowptr); initOut(y); spmv(nzval, colind, rowptr, x, y); printf("\n"); for(i = 0; i < N; i++){ printf("%d ", y[i]); } printf("\n"); return 0; }
void spmv(const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, const BetaType& beta, const YVector& y) { typedef typename Kokkos::Impl::if_c<XVector::rank==2,RANK_TWO,RANK_ONE>::type RANK_SPECIALISE; spmv(mode,alpha,A,x,beta,y,RANK_SPECIALISE()); }
virtual void solve(int steps = 100000, T tolerance = (T)1e-6){ cgfassert(this->mat->getWidth() == this->b->getSize()); cgfassert(this->mat->getWidth() == this->mat->getHeight()); cgfassert(this->mat->getWidth() == this->x->getSize()); this->iterations = 0; for(int i=0;i<this->mat->getHeight();i++){ (*C)[i] = (T)1.0/Sqrt(Fabs((*this->mat)[i][i])); } /*r = Ax*/ spmv(*r, *(this->mat), *(this->x)); /*r = b - r*/; Vector<T>::sub(*r, *(this->b), *r); /*w = C * r*/ Vector<T>::mul(*w, *C, *r); /*v = C * w*/ Vector<T>::mul(*v, *C, *w); /*s1 = w * w*/ Vector<T>::mul(*scratch1, *w, *w); T alpha; /*alpha = sum(s1)*/ alpha = scratch1->sum(); int k=0; while(k<steps){ /*s1 = v * v*/ this->iterations++; Vector<T>::mul(*scratch1, *v, *v); T residual; residual = scratch1->sum(); if(Sqrt(Fabs(residual)) < (tolerance*bnorm /*+ tolerance*/)){ warning("CG::Success in %d iterations, %10.10e, %10.10e", k, residual, Sqrt(residual)); return; } /*u = A*v*/ spmv(*u, *(this->mat), *v); /*s1 = v * u*/ Vector<T>::mul(*scratch1, *v, *u); T divider; /*divider = sum(s1)*/ divider = scratch1->sum(); T t = alpha/divider; /*x = x + t*v*/ /*r = r - t*u*/ /*w = C * r*/ Vector<T>::mfadd(*(this->x), t, *v, *(this->x)); Vector<T>::mfadd(*r, -t, *u, *r); Vector<T>::mul (*w, *C, *r); /*s1 = w*w*/ Vector<T>::mul(*scratch1, *w, *w); /*beta = sum(s1)*/ T beta = scratch1->sum(); #if 1 if(beta < (tolerance*bnorm + tolerance)){ T rl = r->length2(); if(Sqrt(rl)<(tolerance*bnorm + tolerance)){ warning("CG::Success in %d iterations, %10.10e, %10.10e", k, rl, Sqrt(Fabs(rl))); return; } } #endif T s = beta/alpha; /*s1 = C * w*/ Vector<T>::mul(*scratch1, *C, *w); /*v = s1 + s * v*/ Vector<T>::mfadd(*v, s, *v, *scratch1); alpha = beta; k++; } message("Unsuccesfull"); throw new SolutionNotFoundException(__LINE__, __FILE__, "Number of iterations exceeded."); }
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::test_solve() { ENTRY_1DARRAY x_known; ENTRY_1DARRAY x; ENTRY_1DARRAY y; #ifdef BASKER_DEBUG_SOLVE_RHS printf("test_solve called \n"); printf("Global pivot permuation\n"); printVec(gperm, gn); printf("\n"); printf("Global pivot permutation inverse\n"); printVec(gpermi, gn); printf("\n"); #endif BASKER_ASSERT(gn > 0, "solve testsolve gn"); MALLOC_ENTRY_1DARRAY(x_known, gn); init_value(x_known, gn , (Entry)1.0); //temp for(Int i = 0; i < gn; i++) { //x_known(i) = (Entry)(i+1); x_known(i) = (Entry) 1.0; } //JDB: used for other test //permute(x_known, order_csym_array, gn); MALLOC_ENTRY_1DARRAY(x, gn); init_value(x, gn, (Entry) 0.0); BASKER_ASSERT(gm > 0, "solve testsolve gm"); MALLOC_ENTRY_1DARRAY(y, gm); init_value(y, gm, (Entry) 0.0); if(btf_nblks > 0) { sort_matrix(BTF_C); //printMTX("C_BEFORE_SOLVE.mtx", BTF_C); } if(Options.btf == BASKER_TRUE) { //printf("btf_tabs_offset: %d ", btf_tabs_offset); //printf("btf_nblks: %d \n", btf_nblks); if(btf_tabs_offset != 0) { //printf("BTF_A spmv\n"); spmv(BTF_A, x_known,y); if(btf_nblks> 1) { //printf("btf_B spmv \n"); spmv(BTF_B, x_known, y); } } if(btf_nblks > 1) { //printf("btf_c spmv \n"); spmv(BTF_C, x_known, y); } //return -1; } else { //printf("other\n"); //spmv(BTF_A, x_known,y); } //printf("\n Before Test Points \n"); //printf("i: %d x: %f y: %f \n", 0, x_known(0), y(0)); //if(gn > 24) // { // printf("i: %d x: %f y: %f \n", 24, x_known(24), y(24)); // } //pivot permuation //printVec("gperm.csc", gpermi, gn); for(Int i = 0; i < gn; i++) { x(gpermi(i)) = y(i); } for(Int i = 0; i < gn; i++) { y(i) = x(i); x(i) = 0; } #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); //printf("Known Solution: \n"); //for(Int i = 0; i < gn; i++) // { // printf("%f, " , x_known(i)); // } printf("\n\n"); printf("RHS: \n"); for(Int i =0; i < gm; i++) { printf("%d %f,\n ", i, y(i)); } printf("\n\n"); #endif if(Options.btf == BASKER_FALSE) { //printf("before serial solve\n"); if(btf_tabs_offset != 0) { serial_solve(y,x); } //printf("After serial solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } else { //A\y -> y //serial_btf_solve(y,x); //printf("before btf serial solve\n"); serial_btf_solve(y,x); //printf("After btf solve\n"); //printf("i: %d x: %f y: %f \n", 0, x(0), y(0)); //printf("i: %d x: %f y: %f \n", 24, x(24), y(24)); } Entry diff =0.0; for(Int i = 0; i < gn; i++) { diff += (x_known(i) - x(i)); } diff = diff/(Entry) gn; #ifdef BASKER_DEBUG_SOLVE_RHS printf("\n\n"); printf("Solve Compare: \n"); for(Int i = 0; i < gn; i++) { printf("%d %f %f \n", i, x_known(i), x(i)); } printf("\n\n"); #endif printf("\n Test Points \n"); printf("i: %d x: %f %f \n", 0, x_known(0), x(0)); if(gn > 24) { printf("i: %d x: %f %f \n", 10, x_known(10), x(10)); printf("i: %d x: %f %f \n", 24, x_known(24), x(24)); } printf("\n"); printf("TEST_SOLVE: ||x-x||/||x| = %e", diff); printf("\n"); if((diff > -1e-2) && (diff < 1e-2)) { printf("TEST PASSED \n"); } return 0; }//end test_solve
void run_benchmark( void *vargs ) { struct bench_args_t *args = (struct bench_args_t *)vargs; spmv( args->val, args->cols, args->rowDelimiters, args->vec, args->out ); }
void spmv(const char mode[], const AlphaType& alpha_in, const AMatrix& A, const XVector& x, const BetaType& beta_in, const YVector& y, const RANK_TWO) { typedef typename Impl::GetCoeffView<AlphaType, typename XVector::device_type>::view_type alpha_view_type; typedef typename Impl::GetCoeffView<BetaType, typename XVector::device_type>::view_type beta_view_type; //alpha_view_type alpha = Impl::GetCoeffView<AlphaType, typename XVector::device_type>::get_view(alpha_in,x.dimension_1()); //beta_view_type beta = Impl::GetCoeffView<AlphaType, typename XVector::device_type>::get_view(beta_in, x.dimension_1()); #ifdef KOKKOS_HAVE_CXX11 // Make sure that both x and y have the same rank. static_assert (XVector::rank == YVector::rank, "KokkosBlas::spmv: Vector ranks do not match."); // Make sure that y is non-const. static_assert (Kokkos::Impl::is_same<typename YVector::value_type, typename YVector::non_const_value_type>::value, "KokkosBlas::spmv: Output Vector must be non-const."); #else // We prefer to use C++11 static_assert, because it doesn't give // "unused typedef" warnings, like the constructs below do. // // Make sure that both x and y have the same rank. typedef typename Kokkos::Impl::StaticAssert<XVector::rank == YVector::rank>::type Blas1_spmv_vector_ranks_do_not_match; #endif // KOKKOS_HAVE_CXX11 // Check compatibility of dimensions at run time. if((mode[0]==NoTranspose[0])||(mode[0]==Conjugate[0])) { if ((x.dimension_1 () != y.dimension_1 ()) || (static_cast<size_t> (A.numCols ()) > static_cast<size_t> (x.dimension_0 ())) || (static_cast<size_t> (A.numRows ()) > static_cast<size_t> (y.dimension_0 ()))) { std::ostringstream os; os << "KokkosBlas::spmv: Dimensions do not match: " << ", A: " << A.numRows () << " x " << A.numCols() << ", x: " << x.dimension_0 () << " x " << x.dimension_1 () << ", y: " << y.dimension_0 () << " x " << y.dimension_1 (); Kokkos::Impl::throw_runtime_exception (os.str ()); } } else { if ((x.dimension_1 () != y.dimension_1 ()) || (static_cast<size_t> (A.numCols ()) > static_cast<size_t> (y.dimension_0 ())) || (static_cast<size_t> (A.numRows ()) > static_cast<size_t> (x.dimension_0 ()))) { std::ostringstream os; os << "KokkosBlas::spmv: Dimensions do not match (transpose): " << ", A: " << A.numRows () << " x " << A.numCols() << ", x: " << x.dimension_0 () << " x " << x.dimension_1 () << ", y: " << y.dimension_0 () << " x " << y.dimension_1 (); Kokkos::Impl::throw_runtime_exception (os.str ()); } } typedef KokkosSparse::CrsMatrix<typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, typename AMatrix::device_type, typename AMatrix::memory_traits, typename AMatrix::const_size_type> AMatrix_Internal; AMatrix_Internal A_i = A; // Call single vector version if appropriate if( x.dimension_1() == 1) { typedef Kokkos::View<typename XVector::const_value_type*, typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<typename YVector::array_layout,Kokkos::LayoutLeft>::value, Kokkos::LayoutLeft,Kokkos::LayoutStride>::type, typename XVector::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess> > XVector_SubInternal; typedef Kokkos::View<typename YVector::non_const_value_type*, typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<typename YVector::array_layout,Kokkos::LayoutLeft>::value, Kokkos::LayoutLeft,Kokkos::LayoutStride>::type, typename YVector::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> > YVector_SubInternal; XVector_SubInternal x_i = Kokkos::subview(x,Kokkos::ALL(),0); YVector_SubInternal y_i = Kokkos::subview(y,Kokkos::ALL(),0); alpha_view_type alpha = Impl::GetCoeffView<AlphaType, typename XVector::device_type>::get_view(alpha_in,x.dimension_1()); beta_view_type beta = Impl::GetCoeffView<BetaType, typename XVector::device_type>::get_view(beta_in, x.dimension_1()); typename alpha_view_type::non_const_type::HostMirror h_alpha = Kokkos::create_mirror_view(alpha); Kokkos::deep_copy(h_alpha,alpha); typename beta_view_type::non_const_type::HostMirror h_beta = Kokkos::create_mirror_view(beta); Kokkos::deep_copy(h_beta,beta); spmv(mode,h_alpha(0),A,x_i,h_beta(0),y_i); return; } else { typedef Kokkos::View<typename XVector::const_value_type**, typename XVector::array_layout, typename XVector::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess> > XVector_Internal; typedef Kokkos::View<typename YVector::non_const_value_type**, typename YVector::array_layout, typename YVector::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> > YVector_Internal; XVector_Internal x_i = x; YVector_Internal y_i = y; typedef Kokkos::View<typename alpha_view_type::const_value_type*, typename alpha_view_type::array_layout, typename alpha_view_type::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> > alpha_view_type_Internal; typedef Kokkos::View<typename beta_view_type::const_value_type*, typename beta_view_type::array_layout, typename beta_view_type::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> > beta_view_type_Internal; //alpha_view_type_Internal alpha_c = alpha; //beta_view_type_Internal beta_c = beta; return Impl::SPMV_MV<typename alpha_view_type_Internal::value_type*, typename alpha_view_type_Internal::array_layout, typename alpha_view_type_Internal::device_type, typename alpha_view_type_Internal::memory_traits, typename AMatrix_Internal::value_type, typename AMatrix_Internal::ordinal_type, typename AMatrix_Internal::device_type, typename AMatrix_Internal::memory_traits, typename AMatrix_Internal::size_type, typename XVector_Internal::value_type**, typename XVector_Internal::array_layout, typename XVector_Internal::device_type, typename XVector_Internal::memory_traits, typename beta_view_type_Internal::value_type*, typename beta_view_type_Internal::array_layout, typename beta_view_type_Internal::device_type, typename beta_view_type_Internal::memory_traits, typename YVector_Internal::value_type**, typename YVector_Internal::array_layout, typename YVector_Internal::device_type, typename YVector_Internal::memory_traits>::spmv_mv(mode,alpha_in,A,x,beta_in,y); } }