//============================================================================
KrylovSolverStatus MINRES_CR_Solver::
solve(const LinearOperator &A, const double *rhs, double *result,
      const LinearOperator *preconditioner, bool use_given_initial_guess)
{
   const int n=A.m;
   assert(A.n==n);
   assert(preconditioner==0 || (preconditioner->m==n && preconditioner->n==n));
   if((int)s.size()!=n){
      r.resize(n);
      z.resize(n);
      q.resize(n);
      s.resize(n);
      t.resize(n);
   }
   // convergence tolerance
   double tol=tolerance_factor*BLAS::abs_max(n, rhs);
   // initial guess
   if(use_given_initial_guess){
      A.apply_and_subtract(result, rhs, &r[0]);
   }else{
      BLAS::set_zero(n, result);
      BLAS::copy(n, rhs, &r[0]);
   }
   // check instant convergence
   iteration=0;
   residual_norm=BLAS::abs_max(r);
   if(residual_norm==0) return status=KRYLOV_CONVERGED;
   // set up CR
   double rho;
   if(preconditioner) preconditioner->apply(r, z); else BLAS::copy(r, s);
   A.apply(s, t);
   rho=BLAS::dot(r, t);
   if(rho==0 || rho!=rho) return status=KRYLOV_BREAKDOWN;
   // and iterate
   for(iteration=1; iteration<max_iterations; ++iteration){
      double alpha;
      double tt=BLAS::dot(t, t);
      if(tt==0 || tt!=tt) return status=KRYLOV_BREAKDOWN;
      alpha=rho/tt;
      BLAS::add_scaled(n, alpha, &s[0], result);
      BLAS::add_scaled(-alpha, t, r);
      residual_norm=BLAS::abs_max(r);
      if(residual_norm<=tol) return KRYLOV_CONVERGED;
      if(preconditioner) preconditioner->apply(r, z);
      else               BLAS::copy(r, z);
      A.apply(z, q);
      double rho_new=BLAS::dot(r, q);
      if(rho_new==0 || rho_new!=rho_new) return KRYLOV_BREAKDOWN;
      double beta=rho_new/rho;
      BLAS::add_scaled(beta, s, z); s.swap(z); // s=beta*s+z
      BLAS::add_scaled(beta, t, q); t.swap(q); // t=beta*t+q
      rho=rho_new;
   }
   return KRYLOV_EXCEEDED_MAX_ITERATIONS;
}
//============================================================================
KrylovSolverStatus CGNR_Solver::
solve(const LinearOperator &A, const double *rhs, double *result,
      const LinearOperator *preconditioner, bool use_given_initial_guess)
{
   const int m=A.m, n=A.n;
   assert(preconditioner==0 || (preconditioner->m==n && preconditioner->n==n));
   if((int)s.size()!=n){
      r.resize(n);
      z.resize(n);
      s.resize(n);
      u.resize(m);
   }
   // convergence tolerance
   A.apply_transpose(rhs, &r[0]); // form A^T*rhs in r
   double tol=tolerance_factor*BLAS::abs_max(r);
   // initial guess
   if(use_given_initial_guess){
      A.apply_and_subtract(result, rhs, &u[0]);
      A.apply_transpose(u, r);
   }else{
      BLAS::set_zero(n, result);
   }
   // check instant convergence
   iteration=0;
   residual_norm=BLAS::abs_max(r);
   if(residual_norm==0) return status=KRYLOV_CONVERGED;
   // set up CG
   double rho;
   if(preconditioner) preconditioner->apply(r, z); else BLAS::copy(r, z);
   rho=BLAS::dot(r, z);
   if(rho<=0 || rho!=rho) return status=KRYLOV_BREAKDOWN;
   BLAS::copy(z, s);
   // and iterate
   for(iteration=1; iteration<max_iterations; ++iteration){
      double alpha;
      A.apply(s, u);
      A.apply_transpose(u, z);
      double sz=BLAS::dot(u, u);
      if(sz<=0 || sz!=sz) return status=KRYLOV_BREAKDOWN;
      alpha=rho/sz;
      BLAS::add_scaled(n, alpha, &s[0], result);
      BLAS::add_scaled(-alpha, z, r);
      residual_norm=BLAS::abs_max(r);
      if(residual_norm<=tol) return status=KRYLOV_CONVERGED;
      if(preconditioner) preconditioner->apply(r, z); else BLAS::copy(r, z);
      double rho_new=BLAS::dot(r, z);
      if(rho_new<=0 || rho_new!=rho_new) return status=KRYLOV_BREAKDOWN;
      double beta=rho_new/rho;
      BLAS::add_scaled(beta, s, z); s.swap(z); // s=beta*s+z
      rho=rho_new;
      
      if ( iteration % 5000 == 0 )
      {
         std::cout << "CGNR_Solver --- residual_norm: " << residual_norm << std::endl;
      }
      
   }
   return status=KRYLOV_EXCEEDED_MAX_ITERATIONS;
}
  void run( Vector<Real> &x, LinearOperator<Real> &A, const Vector<Real> &b,
            LinearOperator<Real> &M, int &iter, int &flag ) {

    using Teuchos::RCP;
 
    flag = 0; 

    Real zero = 0.0;
    Real one =  1.0;

    if ( !isInitialized_ ) {
      r_  = b.clone();
      w_  = b.clone();
      z_  = x.clone();

      isInitialized_ = true;
    }

    Real itol  = std::sqrt(ROL_EPSILON<Real>()); 

    // Compute initial residual
    if(useInitialGuess_) {
    
      A.apply(*r_,x,itol);
      r_->scale(-1.0);
      r_->plus(b);       // r = b-Ax
 
    }
    else {
      x.zero();
      r_->set(b);
    }

    Real temp  = 0;

    std::vector<RCP<Vector<Real > > > V;
    std::vector<RCP<Vector<Real > > > Z;

    (*res_)[0] = r_->norm();
     
    Real rtol  = std::min(absTol_,relTol_*(*res_)[0]);

    V.push_back(b.clone());
    (V[0])->set(*r_);
    (V[0])->scale(one/(*res_)[0]);    

    (*s_)(0) = (*res_)[0];

    for( iter=0; iter<maxit_; ++iter ) {

//      std::cout << (*res_)[iter] << std::endl;

      if( useInexact_ ) {
        itol = rtol/(maxit_*(*res_)[iter]); 
      }

      Z.push_back(x.clone());

      // Apply right preconditioner
      M.applyInverse(*(Z[iter]),*(V[iter]),itol);

      // Apply operator
      A.apply(*w_,*(Z[iter]),itol);

      // Evaluate coefficients and orthogonalize using Gram-Schmidt
      for( int k=0; k<=iter; ++k ) {
        (*H_)(k,iter) = w_->dot(*(V[k]));
        w_->axpy( -(*H_)(k,iter), *(V[k]) );
      } 
     
      (*H_)(iter+1,iter) = w_->norm();

      V.push_back( b.clone() );
      (V[iter+1])->set(*w_);
      (V[iter+1])->scale(one/((*H_)(iter+1,iter)));

      // Apply Givens rotations
      for( int k=0; k<=iter-1; ++k ) {
        temp            =  (*cs_)(k)*(*H_)(k,iter) + (*sn_)(k)*(*H_)(k+1,iter);
        (*H_)(k+1,iter) = -(*sn_)(k)*(*H_)(k,iter) + (*cs_)(k)*(*H_)(k+1,iter); 
        (*H_)(k,iter)   = temp;
      } 

      // Form i-th rotation matrix
      if( (*H_)(iter+1,iter) == zero ) {
        (*cs_)(iter) = one;
        (*sn_)(iter) = zero;
      }
      else if ( std::abs((*H_)(iter+1,iter)) > std::abs((*H_)(iter,iter)) ) { 
        temp = (*H_)(iter,iter) / (*H_)(iter+1,iter);
        (*sn_)(iter) = one / std::sqrt( one + temp*temp );
        (*cs_)(iter) = temp*(*sn_)(iter); 
      }
      else {
        temp = (*H_)(iter+1,iter) / (*H_)(iter,iter);
        (*cs_)(iter) = one / std::sqrt( one + temp*temp );
        (*sn_)(iter) = temp*(*cs_)(iter);  
      }
     
      // Approximate residual norm
      temp               = (*cs_)(iter)*(*s_)(iter);
      (*s_)(iter+1)      = -(*sn_)(iter)*(*s_)(iter);
      (*s_)(iter)        = temp;
      (*H_)(iter,iter)   = (*cs_)(iter)*(*H_)(iter,iter) + (*sn_)(iter)*(*H_)(iter+1,iter);
      (*H_)(iter+1,iter) = zero;
      (*res_)[iter+1]    = std::abs((*s_)(iter+1));
  
      // Update solution approximation.
      const char uplo = 'U';
      const char trans = 'N';
      const char diag = 'N';
      const char normin = 'N';
      Real scaling = zero;
      int info = 0;
      *y_ = *s_;
      lapack_.LATRS(uplo, trans, diag, normin, iter+1, H_->values(), maxit_+1, y_->values(), &scaling, cnorm_->values(), &info);

      z_->zero();

      for( int k=0; k<=iter;++k ) {
        z_->axpy((*y_)(k),*(Z[k]));
      }

      if( (*res_)[iter+1] <= rtol ) {
        // Update solution vector
        x.plus(*z_);  
        break;
      }

      if(iter == maxit_) {
        flag = 1;
      }
    } // loop over iter

  }