void run_experiment( crsMat_t crsmat){ typedef typename crsMat_t::values_type scalar_view_t; idx nv = crsmat.numRows(); scalar_view_t kok_x_original = create_x_vector<scalar_view_t>(nv, MAXVAL); KokkosKernels::Experimental::Util::print_1Dview(kok_x_original); scalar_view_t kok_b_vector = create_y_vector(crsmat, kok_x_original); //create X vector scalar_view_t kok_x_vector("kok_x_vector", nv); double solve_time = 0; const unsigned cg_iteration_limit = 100000; const double cg_iteration_tolerance = 1e-7 ; KokkosKernels::Experimental::Example::CGSolveResult cg_result ; typedef KokkosKernels::Experimental::KokkosKernelsHandle < typename crsMat_t::StaticCrsGraphType::row_map_type, typename crsMat_t::StaticCrsGraphType::entries_type, typename crsMat_t::values_type, ExecSpace, ExecSpace, ExecSpace > KernelHandle; KernelHandle kh; kh.create_gs_handle(); Kokkos::Impl::Timer timer1; KokkosKernels::Experimental::Example::pcgsolve( kh , crsmat , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); solve_time = timer1.seconds(); std::cout << "DEFAULT SOLVE:" << "\n\t(P)CG_NUM_ITER [" << cg_result.iteration << "]" << "\n\tMATVEC_TIME [" << cg_result.matvec_time << "]" << "\n\tCG_RESIDUAL [" << cg_result.norm_res << "]" << "\n\tCG_ITERATION_TIME [" << cg_result.iter_time << "]" << "\n\tPRECONDITIONER_TIME [" << cg_result.precond_time << "]" << "\n\tPRECONDITIONER_INIT_TIME [" << cg_result.precond_init_time << "]" << "\n\tPRECOND_APPLY_TIME_PER_ITER [" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << "\n\tSOLVE_TIME [" << solve_time<< "]" << std::endl ; /* kh.destroy_gs_handle(); kh.create_gs_handle(KokkosKernels::Experimental::Graph::GS_PERMUTED); kok_x_vector = scalar_view_t("kok_x_vector", nv); timer1.reset(); KokkosKernels::Experimental::Example::pcgsolve( kh , crsmat , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); solve_time = timer1.seconds(); std::cout << "\nPERMUTED SGS SOLVE:" << "\n\t(P)CG_NUM_ITER [" << cg_result.iteration << "]" << "\n\tMATVEC_TIME [" << cg_result.matvec_time << "]" << "\n\tCG_RESIDUAL [" << cg_result.norm_res << "]" << "\n\tCG_ITERATION_TIME [" << cg_result.iter_time << "]" << "\n\tPRECONDITIONER_TIME [" << cg_result.precond_time << "]" << "\n\tPRECONDITIONER_INIT_TIME [" << cg_result.precond_init_time << "]" << "\n\tPRECOND_APPLY_TIME_PER_ITER [" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << "\n\tSOLVE_TIME [" << solve_time<< "]" << std::endl ; kh.destroy_gs_handle(); kh.create_gs_handle(KokkosKernels::Experimental::Graph::GS_TEAM); kok_x_vector = scalar_view_t("kok_x_vector", nv); timer1.reset(); KokkosKernels::Experimental::Example::pcgsolve( kh , crsmat , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); solve_time = timer1.seconds(); std::cout << "\nTEAM SGS SOLVE:" << "\n\t(P)CG_NUM_ITER [" << cg_result.iteration << "]" << "\n\tMATVEC_TIME [" << cg_result.matvec_time << "]" << "\n\tCG_RESIDUAL [" << cg_result.norm_res << "]" << "\n\tCG_ITERATION_TIME [" << cg_result.iter_time << "]" << "\n\tPRECONDITIONER_TIME [" << cg_result.precond_time << "]" << "\n\tPRECONDITIONER_INIT_TIME [" << cg_result.precond_init_time << "]" << "\n\tPRECOND_APPLY_TIME_PER_ITER [" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << "\n\tSOLVE_TIME [" << solve_time<< "]" << std::endl ; kok_x_vector = scalar_view_t("kok_x_vector", nv); timer1.reset(); KokkosKernels::Experimental::Example::pcgsolve( kh , crsmat , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , false ); Kokkos::fence(); solve_time = timer1.seconds(); std::cout << "\nCG SOLVE (With no Preconditioner):" << "\n\t(P)CG_NUM_ITER [" << cg_result.iteration << "]" << "\n\tMATVEC_TIME [" << cg_result.matvec_time << "]" << "\n\tCG_RESIDUAL [" << cg_result.norm_res << "]" << "\n\tCG_ITERATION_TIME [" << cg_result.iter_time << "]" << "\n\tPRECONDITIONER_TIME [" << cg_result.precond_time << "]" << "\n\tPRECONDITIONER_INIT_TIME [" << cg_result.precond_init_time << "]" << "\n\tPRECOND_APPLY_TIME_PER_ITER [" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << "\n\tSOLVE_TIME [" << solve_time<< "]" << std::endl ; */ }
inline void pcgsolve( //const ImportType & import, KernelHandle &kh , const CrsMatrix <typename KernelHandle::nonzero_value_type , typename KernelHandle::row_index_type, typename KernelHandle::HandleExecSpace > & A , const Kokkos::View <typename KernelHandle::nonzero_value_type *, typename KernelHandle::HandleExecSpace> & b , const Kokkos::View <typename KernelHandle::nonzero_value_type * , typename KernelHandle::HandleExecSpace > & x , const size_t maximum_iteration = 200 , const double tolerance = std::numeric_limits<double>::epsilon() , CGSolveResult * result = 0 , bool use_sgs = true ) { typedef typename KernelHandle::HandleExecSpace Space; //typedef typename KernelHandle::nonzero_value_type MScalar; typedef typename KernelHandle::nonzero_value_type VScalar; //typedef typename KernelHandle::row_index_type Idx_Type; //typedef typename KernelHandle::idx_array_type idx_array_type; typedef typename Kokkos::View< VScalar * , Space > VectorType ; //const size_t count_owned = import.count_owned ; //const size_t count_total = import.count_owned + import.count_receive; const size_t count_owned = A.graph.nv; const size_t count_total = count_owned; size_t iteration = 0 ; double iter_time = 0 ; double matvec_time = 0 ; double norm_res = 0 ; double precond_time = 0; double precond_init_time = 0; Kokkos::Impl::Timer wall_clock ; Kokkos::Impl::Timer timer; // Need input vector to matvec to be owned + received VectorType pAll ( "cg::p" , count_total ); VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) ); VectorType r ( "cg::r" , count_owned ); VectorType Ap( "cg::Ap", count_owned ); /* r = b - A * x ; */ /* p = x */ Kokkos::deep_copy( p , x ); ///* import p */ import( pAll ); /* Ap = A * p */ multiply( count_owned , Ap , A , pAll ); /* r = b - Ap */ waxpby( count_owned , r , 1.0 , b , -1.0 , Ap ); /* p = r */ Kokkos::deep_copy( p , r ); //double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm ); double old_rdot = dot( count_owned , r , r ); norm_res = sqrt( old_rdot ); int apply_count = 1; VectorType z; //double precond_old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , z ) , import.comm ); double precond_old_rdot = 1; #ifdef PRECOND_NORM double precond_norm_res = 1; #endif Kokkos::deep_copy( p , z ); //typename KernelHandle::GaussSeidelHandleType *gsHandler; bool owner_handle = false; if (use_sgs){ if (kh.get_gs_handle() == NULL){ owner_handle = true; kh.create_gs_handle(); } //gsHandler = kh.get_gs_handle(); timer.reset(); KokkosKernels::Experimental::Graph::gauss_seidel_numeric (&kh, count_owned, count_owned, A.graph.row_map, A.graph.entries, A.coeff); Space::fence(); precond_init_time += timer.seconds(); z = VectorType( "pcg::z" , count_owned ); Space::fence(); timer.reset(); KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply (&kh, count_owned, count_owned, A.graph.row_map, A.graph.entries, A.coeff, z, r, true, apply_count); Space::fence(); precond_time += timer.seconds(); //double precond_old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , z ) , import.comm ); precond_old_rdot = dot( count_owned , r , z ); #ifdef PRECOND_NORM precond_norm_res = sqrt( precond_old_rdot ); #endif Kokkos::deep_copy( p , z ); } iteration = 0 ; #ifdef PRINTRES std::cout << "norm_res:" << norm_res << " old_rdot:" << old_rdot<< std::endl; #ifdef PRECOND_NORM if (use_sgs) std::cout << "precond_norm_res:" << precond_norm_res << " precond_old_rdot:" << precond_old_rdot<< std::endl; #endif #endif while ( tolerance < norm_res && iteration < maximum_iteration ) { /* pAp_dot = dot( p , Ap = A * p ) */ timer.reset(); ///* import p */ import( pAll ); /* Ap = A * p */ multiply( count_owned , Ap , A , pAll ); Space::fence(); matvec_time += timer.seconds(); //const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm ); const double pAp_dot = dot( count_owned , p , Ap ) ; double alpha = 0; if (use_sgs){ alpha = precond_old_rdot / pAp_dot ; } else { alpha = old_rdot / pAp_dot ; } /* x += alpha * p ; */ waxpby( count_owned , x , alpha, p , 1.0 , x ); /* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r ); //const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm ); const double r_dot = dot( count_owned , r , r ); const double beta_original = r_dot / old_rdot ; double precond_r_dot = 1; double precond_beta = 1; if (use_sgs){ Space::fence(); timer.reset(); KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply(&kh, count_owned, count_owned, A.graph.row_map, A.graph.entries, A.coeff, z, r, true, apply_count); Space::fence(); precond_time += timer.seconds(); //const double precond_r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , z ) , import.comm ); precond_r_dot = dot( count_owned , r , z ); precond_beta = precond_r_dot / precond_old_rdot ; } double beta = 1; if (!use_sgs){ beta = beta_original; /* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p ); } else { beta = precond_beta; waxpby( count_owned , p , 1.0 , z , beta , p ); } #ifdef PRINTRES std::cout << "\tbeta_original:" << beta_original << std::endl; if (use_sgs) std::cout << "\tprecond_beta:" << precond_beta << std::endl; #endif norm_res = sqrt( old_rdot = r_dot ); #ifdef PRECOND_NORM if (use_sgs){ precond_norm_res = sqrt( precond_old_rdot = precond_r_dot ); } #else precond_old_rdot = precond_r_dot; #endif #ifdef PRINTRES std::cout << "\tnorm_res:" << norm_res << " old_rdot:" << old_rdot<< std::endl; #ifdef PRECOND_NORM if (use_sgs) std::cout << "\tprecond_norm_res:" << precond_norm_res << " precond_old_rdot:" << precond_old_rdot<< std::endl; #endif #endif ++iteration ; } Space::fence(); iter_time = wall_clock.seconds(); if ( 0 != result ) { result->iteration = iteration ; result->iter_time = iter_time ; result->matvec_time = matvec_time ; result->norm_res = norm_res ; result->precond_time = precond_time; result->precond_init_time = precond_init_time; } if (use_sgs & owner_handle ){ kh.destroy_gs_handle(); } }
void run_experiment( int repeatcount, idx nv, idx ne, idx_array_type kok_xadj, idx_edge_array_type kok_adj, value_array_type kok_mtx_vals ){ value_array_type kok_x_original = create_x_vector(nv, MAXVAL); value_array_type kok_b_vector = create_b_vector( nv, kok_xadj, kok_adj, kok_mtx_vals, kok_x_original); Experimental::KokkosKernels::Example::CrsMatrix<wt, idx, MyExecSpace> A(nv ,ne, kok_xadj, kok_adj, kok_mtx_vals); //create X vector value_array_type kok_x_vector("kok_x_vector", nv); double gs_time = 0; const unsigned cg_iteration_limit = 100000; const double cg_iteration_tolerance = 1e-7 ; Experimental::KokkosKernels::Example::CGSolveResult cg_result ; typedef Experimental::KokkosKernels::KokkosKernelsHandle <idx_array_type,idx_edge_array_type, value_array_type, MyExecSpace, TemporaryWorkSpace,PersistentWorkSpace > KernelHandle; KernelHandle kh; kh.set_row_map(A.graph.row_map); kh.set_entries(A.graph.entries); kh.set_values(A.coeff); Kokkos::Impl::Timer timer1; Experimental::KokkosKernels::Example::pcgsolve( kh , A , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); gs_time = timer1.seconds(); std::cout << " cg_iteration[" << cg_result.iteration << "]" << " matvec_time[" << cg_result.matvec_time << "]" << " cg_residual[" << cg_result.norm_res << "]" << " cg_iter_time[" << cg_result.iter_time << "]" << " precond_time[" << cg_result.precond_time << "]" << " precond_init_time[" << cg_result.precond_init_time << "]" << " precond_time/iter[" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << " GSTIME[" << gs_time<< "]" << std::endl ; kh.create_graph_coloring_handle(Experimental::KokkosKernels::Graph::COLORING_VB); Experimental::KokkosKernels::Graph::graph_color_solve<KernelHandle> (&kh); kok_x_vector = value_array_type("kok_x_vector", nv); timer1.reset(); Experimental::KokkosKernels::Example::pcgsolve( kh , A , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); gs_time = timer1.seconds(); std::cout << "\n\nCOLORING_VB PRECALL:\n cg_iteration[" << cg_result.iteration << "]" << " matvec_time[" << cg_result.matvec_time << "]" << " cg_residual[" << cg_result.norm_res << "]" << " cg_iter_time[" << cg_result.iter_time << "]" << " precond_time[" << cg_result.precond_time << "]" << " precond_init_time[" << cg_result.precond_init_time << "]" << " precond_time/iter[" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << " GSTIME[" << gs_time<< "]" << " numColor[" << kh.get_graph_coloring_handle()->get_num_colors()<<"]" << std::endl ; kh.destroy_graph_coloring_handle(); kh.create_graph_coloring_handle(Experimental::KokkosKernels::Graph::COLORING_EB); Experimental::KokkosKernels::Graph::graph_color_solve<KernelHandle> (&kh); kok_x_vector = value_array_type("kok_x_vector", nv); timer1.reset(); Experimental::KokkosKernels::Example::pcgsolve( kh , A , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); gs_time = timer1.seconds(); std::cout << "\n\nCOLORING_EB PRECALL:\n cg_iteration[" << cg_result.iteration << "]" << " matvec_time[" << cg_result.matvec_time << "]" << " cg_residual[" << cg_result.norm_res << "]" << " cg_iter_time[" << cg_result.iter_time << "]" << " precond_time[" << cg_result.precond_time << "]" << " precond_init_time[" << cg_result.precond_init_time << "]" << " precond_time/iter[" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << " GSTIME[" << gs_time<< "]" << " numColor[" << kh.get_graph_coloring_handle()->get_num_colors()<<"]" << std::endl ; kh.destroy_graph_coloring_handle(); kh.destroy_gs_handle(); kh.create_gs_handle(Experimental::KokkosKernels::Graph::GS_PERMUTED); kok_x_vector = value_array_type("kok_x_vector", nv); timer1.reset(); Experimental::KokkosKernels::Example::pcgsolve( kh , A , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); gs_time = timer1.seconds(); std::cout << "\n\nPERMUTED:\n cg_iteration[" << cg_result.iteration << "]" << " matvec_time[" << cg_result.matvec_time << "]" << " cg_residual[" << cg_result.norm_res << "]" << " cg_iter_time[" << cg_result.iter_time << "]" << " precond_time[" << cg_result.precond_time << "]" << " precond_init_time[" << cg_result.precond_init_time << "]" << " precond_time/iter[" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << " GSTIME[" << gs_time<< "]" << std::endl ; kh.destroy_graph_coloring_handle(); kh.destroy_gs_handle(); kh.create_gs_handle(Experimental::KokkosKernels::Graph::GS_TEAM); kok_x_vector = value_array_type("kok_x_vector", nv); timer1.reset(); Experimental::KokkosKernels::Example::pcgsolve( kh , A , kok_b_vector , kok_x_vector , cg_iteration_limit , cg_iteration_tolerance , & cg_result , true ); Kokkos::fence(); gs_time = timer1.seconds(); std::cout << "\n\nGSTEAM:\n cg_iteration[" << cg_result.iteration << "]" << " matvec_time[" << cg_result.matvec_time << "]" << " cg_residual[" << cg_result.norm_res << "]" << " cg_iter_time[" << cg_result.iter_time << "]" << " precond_time[" << cg_result.precond_time << "]" << " precond_init_time[" << cg_result.precond_init_time << "]" << " precond_time/iter[" << cg_result.precond_time / (cg_result.iteration + 1) << "]" << " GSTIME[" << gs_time<< "]" << std::endl ; }