Exemplo n.º 1
0
inline
void pcgsolve( //const ImportType & import,
              KernelHandle &kh
            ,  const CrsMatrix <typename KernelHandle::nonzero_value_type , typename KernelHandle::row_index_type, typename KernelHandle::HandleExecSpace >      & A
            , const Kokkos::View <typename KernelHandle::nonzero_value_type *,
                                  typename KernelHandle::HandleExecSpace> & b
            , const Kokkos::View <typename KernelHandle::nonzero_value_type * ,
                                  typename KernelHandle::HandleExecSpace > & x
            , const size_t  maximum_iteration = 200
            , const double  tolerance = std::numeric_limits<double>::epsilon()
            , CGSolveResult * result = 0
            , bool use_sgs = true
            )
{
  typedef typename KernelHandle::HandleExecSpace Space;
  //typedef typename KernelHandle::nonzero_value_type MScalar;
  typedef typename KernelHandle::nonzero_value_type VScalar;
  //typedef typename KernelHandle::row_index_type Idx_Type;
  //typedef typename KernelHandle::idx_array_type idx_array_type;
  typedef typename Kokkos::View< VScalar * , Space >  VectorType ;

  //const size_t count_owned = import.count_owned ;
  //const size_t count_total = import.count_owned + import.count_receive;
  const size_t count_owned = A.graph.nv;
  const size_t count_total  = count_owned;

  size_t  iteration = 0 ;
  double  iter_time = 0 ;
  double  matvec_time = 0 ;
  double  norm_res = 0 ;
  double precond_time = 0;
  double precond_init_time = 0;

  Kokkos::Impl::Timer wall_clock ;
  Kokkos::Impl::Timer timer;
  // Need input vector to matvec to be owned + received
  VectorType pAll ( "cg::p" , count_total );

  VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
  VectorType r ( "cg::r" , count_owned );
  VectorType Ap( "cg::Ap", count_owned );

  /* r = b - A * x ; */

  /* p  = x       */  Kokkos::deep_copy( p , x );
  ///* import p     */  import( pAll );
  /* Ap = A * p   */  multiply( count_owned , Ap , A , pAll );
  /* r = b - Ap   */  waxpby( count_owned , r , 1.0 , b , -1.0 , Ap );
  /* p  = r       */  Kokkos::deep_copy( p , r );

  //double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
  double old_rdot = dot( count_owned , r , r );

  norm_res  = sqrt( old_rdot );



  int apply_count = 1;
  VectorType z;
  //double precond_old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , z ) , import.comm );
  double precond_old_rdot = 1;
#ifdef PRECOND_NORM
  double precond_norm_res  = 1;
#endif
  Kokkos::deep_copy( p , z );

  //typename KernelHandle::GaussSeidelHandleType *gsHandler;
  bool owner_handle = false;
  if (use_sgs){
    if (kh.get_gs_handle() == NULL){

      owner_handle = true;
      kh.create_gs_handle();
    }
    //gsHandler = kh.get_gs_handle();
    timer.reset();

    KokkosKernels::Experimental::Graph::gauss_seidel_numeric
      (&kh, count_owned, count_owned, A.graph.row_map, A.graph.entries, A.coeff);

    Space::fence();
    precond_init_time += timer.seconds();

    z = VectorType( "pcg::z" , count_owned );
    Space::fence();
    timer.reset();

    KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply
        (&kh, count_owned, count_owned, A.graph.row_map, A.graph.entries, A.coeff, z, r, true, apply_count);

    Space::fence();
    precond_time += timer.seconds();
    //double precond_old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , z ) , import.comm );
    precond_old_rdot = dot( count_owned , r , z );
#ifdef PRECOND_NORM
    precond_norm_res  = sqrt( precond_old_rdot );
#endif

    Kokkos::deep_copy( p , z );
  }

  iteration = 0 ;

#ifdef PRINTRES

  std::cout << "norm_res:" << norm_res << " old_rdot:" << old_rdot<<  std::endl;
#ifdef PRECOND_NORM
  if (use_sgs)
  std::cout << "precond_norm_res:" << precond_norm_res << " precond_old_rdot:" << precond_old_rdot<<  std::endl;
#endif

#endif
  while ( tolerance < norm_res && iteration < maximum_iteration ) {

    /* pAp_dot = dot( p , Ap = A * p ) */

    timer.reset();
    ///* import p    */  import( pAll );
    /* Ap = A * p  */  multiply( count_owned , Ap , A , pAll );
    Space::fence();
    matvec_time += timer.seconds();

    //const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm );
    const double pAp_dot = dot( count_owned , p , Ap ) ;

    double alpha  = 0;
    if (use_sgs){
      alpha = precond_old_rdot / pAp_dot ;
    }
    else {
      alpha = old_rdot / pAp_dot ;
    }

    /* x +=  alpha * p ;  */ waxpby( count_owned , x ,  alpha, p  , 1.0 , x );
    /* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r );

    //const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
    const double r_dot = dot( count_owned , r , r );
    const double beta_original  = r_dot / old_rdot ;

    double precond_r_dot = 1;
    double precond_beta = 1;
    if (use_sgs){
      Space::fence();
      timer.reset();
      KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply(&kh, count_owned, count_owned, A.graph.row_map, A.graph.entries, A.coeff, z, r, true, apply_count);

      Space::fence();
      precond_time += timer.seconds();
      //const double precond_r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , z ) , import.comm );
      precond_r_dot = dot( count_owned , r , z );
      precond_beta  = precond_r_dot / precond_old_rdot ;
    }

    double beta  = 1;
    if (!use_sgs){
      beta = beta_original;
      /* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p );
    }
    else {
      beta = precond_beta;
      waxpby( count_owned , p , 1.0 , z , beta , p );
    }

#ifdef PRINTRES
    std::cout << "\tbeta_original:" << beta_original <<  std::endl;

    if (use_sgs)
    std::cout << "\tprecond_beta:" << precond_beta <<  std::endl;

#endif


    norm_res = sqrt( old_rdot = r_dot );
#ifdef PRECOND_NORM
    if (use_sgs){
      precond_norm_res = sqrt( precond_old_rdot = precond_r_dot );
    }
#else
    precond_old_rdot = precond_r_dot;
#endif

#ifdef PRINTRES
    std::cout << "\tnorm_res:" << norm_res << " old_rdot:" << old_rdot<<  std::endl;
#ifdef PRECOND_NORM

    if (use_sgs)
    std::cout << "\tprecond_norm_res:" << precond_norm_res << " precond_old_rdot:" << precond_old_rdot<<  std::endl;
#endif
#endif
    ++iteration ;
  }

  Space::fence();
  iter_time = wall_clock.seconds();

  if ( 0 != result ) {
    result->iteration   = iteration ;
    result->iter_time   = iter_time ;
    result->matvec_time = matvec_time ;
    result->norm_res    = norm_res ;
    result->precond_time = precond_time;
    result->precond_init_time = precond_init_time;
  }

  if (use_sgs & owner_handle ){

    kh.destroy_gs_handle();
  }
}