Пример #1
0
static int CVDiagSetup(CVodeMem cv_mem, int convfail, N_Vector ypred,
                       N_Vector fpred, booleantype *jcurPtr, N_Vector vtemp1,
                       N_Vector vtemp2, N_Vector vtemp3)
{
  realtype r;
  N_Vector ftemp, y;
  booleantype invOK;
  CVDiagMem cvdiag_mem;
  int retval;

  cvdiag_mem = (CVDiagMem) lmem;

  /* Rename work vectors for use as temporary values of y and f */
  ftemp = vtemp1;
  y     = vtemp2;

  /* Form y with perturbation = FRACT*(func. iter. correction) */
  r = FRACT * rl1;
  N_VLinearSum(h, fpred, -ONE, zn[1], ftemp);
  N_VLinearSum(r, ftemp, ONE, ypred, y);

  /* Evaluate f at perturbed y */
  retval = f(tn, y, M, cv_mem->cv_user_data);
  nfeDI++;
  if (retval < 0) {
    cvProcessError(cv_mem, CVDIAG_RHSFUNC_UNRECVR, "CVDIAG", "CVDiagSetup", MSGDG_RHSFUNC_FAILED);
    last_flag = CVDIAG_RHSFUNC_UNRECVR;
    return(-1);
  }
  if (retval > 0) {
    last_flag = CVDIAG_RHSFUNC_RECVR;
    return(1);
  }

  /* Construct M = I - gamma*J with J = diag(deltaf_i/deltay_i) */
  N_VLinearSum(ONE, M, -ONE, fpred, M);
  N_VLinearSum(FRACT, ftemp, -h, M, M);
  N_VProd(ftemp, ewt, y);
  /* Protect against deltay_i being at roundoff level */
  N_VCompare(uround, y, bit);
  N_VAddConst(bit, -ONE, bitcomp);
  N_VProd(ftemp, bit, y);
  N_VLinearSum(FRACT, y, -ONE, bitcomp, y);
  N_VDiv(M, y, M);
  N_VProd(M, bit, M);
  N_VLinearSum(ONE, M, -ONE, bitcomp, M);

  /* Invert M with test for zero components */
  invOK = N_VInvTest(M, M);
  if (!invOK) {
    last_flag = CVDIAG_INV_FAIL;
    return(1);
  }

  /* Set jcur = TRUE, save gamma in gammasv, and return */
  *jcurPtr = TRUE;
  gammasv = gamma;
  last_flag = CVDIAG_SUCCESS;
  return(0);
}
Пример #2
0
static real KINScSteplength(KINMem kin_mem, N_Vector ucur,
                            N_Vector ss, N_Vector usc)
{
  N_VInv(usc, vtemp1);
  N_VAbs(ucur, vtemp2);
  N_VLinearSum(ONE, vtemp1, ONE, vtemp2, vtemp1);
  N_VDiv(ss, vtemp1, vtemp1);
  return(N_VMaxNorm(vtemp1));
}
void N_VDiv_SensWrapper(N_Vector x, N_Vector y, N_Vector z)
{
  int i;

  for (i=0; i < NV_NVECS_SW(x); i++)
    N_VDiv(NV_VEC_SW(x,i), NV_VEC_SW(y,i), NV_VEC_SW(z,i));

  return;
}
Пример #4
0
static int KINConstraint(KINMem kin_mem)
{
  real mxchange;

  N_VLinearSum(ONE, uu, ONE, pp, vtemp1);

  /*  this vector.c routine returns TRUE if all products v1[i]*v2[i] are
   *  positive (with the proviso that all products which would result from
   *  v1[i]=0.  are ignored) , and FALSE otherwise (e.g. at least one such
   *  product is negative) */

  if (N_VConstrProdPos(constraints, vtemp1))
    return(0);

  N_VDiv(pp, uu, vtemp2);
  mxchange = N_VMaxNorm(vtemp2);

  if (mxchange >= relu)
  {
    stepl = POINT9 * relu / mxchange;
    return(1);
  }
  return(0);
}
Пример #5
0
int SUNLinSolSolve_SPBCGS(SUNLinearSolver S, SUNMatrix A, N_Vector x, 
                          N_Vector b, realtype delta)
{
  /* local data and shortcut variables */
  realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho;
  N_Vector r_star, r, p, q, u, Ap, vtemp;
  booleantype preOnLeft, preOnRight, scale_x, scale_b, converged;
  int l, l_max, ier;
  void *A_data, *P_data;
  N_Vector sx, sb;
  ATimesFn atimes;
  PSolveFn psolve;
  realtype *res_norm;
  int *nli;

  /* local variables for fused vector operations */
  realtype cv[3];
  N_Vector Xv[3];
  
  /* Make local shorcuts to solver variables. */
  if (S == NULL) return(SUNLS_MEM_NULL);
  l_max        = SPBCGS_CONTENT(S)->maxl;
  r_star       = SPBCGS_CONTENT(S)->r_star;
  r            = SPBCGS_CONTENT(S)->r;
  p            = SPBCGS_CONTENT(S)->p;
  q            = SPBCGS_CONTENT(S)->q;
  u            = SPBCGS_CONTENT(S)->u;
  Ap           = SPBCGS_CONTENT(S)->Ap;
  vtemp        = SPBCGS_CONTENT(S)->vtemp;
  sb           = SPBCGS_CONTENT(S)->s1;
  sx           = SPBCGS_CONTENT(S)->s2;
  A_data       = SPBCGS_CONTENT(S)->ATData;
  P_data       = SPBCGS_CONTENT(S)->PData;
  atimes       = SPBCGS_CONTENT(S)->ATimes;
  psolve       = SPBCGS_CONTENT(S)->Psolve;
  nli          = &(SPBCGS_CONTENT(S)->numiters);
  res_norm     = &(SPBCGS_CONTENT(S)->resnorm);

  /* Initialize counters and convergence flag */
  *nli = 0;
  converged = SUNFALSE;

  /* set booleantype flags for internal solver options */
  preOnLeft  = ( (PRETYPE(S) == PREC_LEFT) || 
                 (PRETYPE(S) == PREC_BOTH) );
  preOnRight = ( (PRETYPE(S) == PREC_RIGHT) || 
                 (PRETYPE(S) == PREC_BOTH) );
  scale_x = (sx != NULL);
  scale_b = (sb != NULL);

  /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */

  if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star);
  else {
    ier = atimes(A_data, x, r_star);
    if (ier != 0) {
      LASTFLAG(S) = (ier < 0) ?
        SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC;
      return(LASTFLAG(S));
    }
    N_VLinearSum(ONE, b, -ONE, r_star, r_star);
  }

  /* Apply left preconditioner and b-scaling to r_star = r_0 */

  if (preOnLeft) {
    ier = psolve(P_data, r_star, r, delta, PREC_LEFT);
    if (ier != 0) {
      LASTFLAG(S) = (ier < 0) ?
        SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC;
      return(LASTFLAG(S));
    }
  }
  else N_VScale(ONE, r_star, r);

  if (scale_b) N_VProd(sb, r, r_star);
  else N_VScale(ONE, r, r_star);

  /* Initialize beta_denom to the dot product of r0 with r0 */

  beta_denom = N_VDotProd(r_star, r_star);

  /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and
     return if small */

  *res_norm = r_norm = rho = SUNRsqrt(beta_denom);
  if (r_norm <= delta) {
    LASTFLAG(S) = SUNLS_SUCCESS;
    return(LASTFLAG(S));
  }

  /* Copy r_star to r and p */

  N_VScale(ONE, r_star, r);
  N_VScale(ONE, r_star, p);

  /* Begin main iteration loop */

  for(l = 0; l < l_max; l++) {

    (*nli)++;

    /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */

    /*   Apply x-scaling: vtemp = sx_inv p */

    if (scale_x) N_VDiv(p, sx, vtemp);
    else N_VScale(ONE, p, vtemp);

    /*   Apply right preconditioner: vtemp = P2_inv sx_inv p */

    if (preOnRight) {
      N_VScale(ONE, vtemp, Ap);
      ier = psolve(P_data, Ap, vtemp, delta, PREC_RIGHT);
      if (ier != 0) {
        LASTFLAG(S) = (ier < 0) ?
          SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC;
        return(LASTFLAG(S));
      }
    }

    /*   Apply A: Ap = A P2_inv sx_inv p */

    ier = atimes(A_data, vtemp, Ap );
    if (ier != 0) {
      LASTFLAG(S) = (ier < 0) ?
        SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC;
      return(LASTFLAG(S));
    }

    /*   Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */

    if (preOnLeft) {
      ier = psolve(P_data, Ap, vtemp, delta, PREC_LEFT);
      if (ier != 0) {
        LASTFLAG(S) = (ier < 0) ?
          SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC;
        return(LASTFLAG(S));
      }
    }
    else N_VScale(ONE, Ap, vtemp);

    /*   Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */

    if (scale_b) N_VProd(sb, vtemp, Ap);
    else N_VScale(ONE, vtemp, Ap);


    /* Calculate alpha = <r,r_star>/<Ap,r_star> */

    alpha = ((beta_denom / N_VDotProd(Ap, r_star)));

    /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */

    N_VLinearSum(ONE, r, -alpha, Ap, q);

    /* Generate u = A-tilde q */

    /*   Apply x-scaling: vtemp = sx_inv q */

    if (scale_x) N_VDiv(q, sx, vtemp);
    else N_VScale(ONE, q, vtemp);

    /*   Apply right preconditioner: vtemp = P2_inv sx_inv q */

    if (preOnRight) {
      N_VScale(ONE, vtemp, u);
      ier = psolve(P_data, u, vtemp, delta, PREC_RIGHT);
      if (ier != 0) {
        LASTFLAG(S) = (ier < 0) ?
          SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC;
        return(LASTFLAG(S));
      }
    }

    /*   Apply A: u = A P2_inv sx_inv u */

    ier = atimes(A_data, vtemp, u );
    if (ier != 0) {
      LASTFLAG(S) = (ier < 0) ?
        SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC;
      return(LASTFLAG(S));
    }

    /*   Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */

    if (preOnLeft) {
      ier = psolve(P_data, u, vtemp, delta, PREC_LEFT);
      if (ier != 0) {
        LASTFLAG(S) = (ier < 0) ?
          SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC;
        return(LASTFLAG(S));
      }
    }
    else N_VScale(ONE, u, vtemp);

    /*   Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */

    if (scale_b) N_VProd(sb, vtemp, u);
    else N_VScale(ONE, vtemp, u);


    /* Calculate omega = <u,q>/<u,u> */

    omega_denom = N_VDotProd(u, u);
    if (omega_denom == ZERO) omega_denom = ONE;
    omega = (N_VDotProd(u, q) / omega_denom);

    /* Update x = x + alpha*p + omega*q */
    cv[0] = ONE;
    Xv[0] = x;

    cv[1] = alpha;
    Xv[1] = p;

    cv[2] = omega;
    Xv[2] = q;

    ier = N_VLinearCombination(3, cv, Xv, x);
    if (ier != SUNLS_SUCCESS) return(SUNLS_VECTOROP_ERR);

    /* Update the residual r = q - omega*u */

    N_VLinearSum(ONE, q, -omega, u, r);

    /* Set rho = norm(r) and check convergence */

    *res_norm = rho = SUNRsqrt(N_VDotProd(r, r));
    if (rho <= delta) {
      converged = SUNTRUE;
      break;
    }

    /* Not yet converged, continue iteration */
    /* Update beta = <rnew,r_star> / <rold,r_start> * alpha / omega */

    beta_num = N_VDotProd(r, r_star);
    beta = ((beta_num / beta_denom) * (alpha / omega));

    /* Update p = r + beta*(p - omega*Ap) = beta*p - beta*omega*Ap + r */
    cv[0] = beta;
    Xv[0] = p;

    cv[1] = -alpha*(beta_num / beta_denom);
    Xv[1] = Ap;

    cv[2] = ONE;
    Xv[2] = r;

    ier = N_VLinearCombination(3, cv, Xv, p);
    if (ier != SUNLS_SUCCESS) return(SUNLS_VECTOROP_ERR);

    /* udpate beta_denom for next iteration */
    beta_denom = beta_num;
  }

  /* Main loop finished */

  if ((converged == SUNTRUE) || (rho < r_norm)) {

    /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */

    if (scale_x) N_VDiv(x, sx, x);
    if (preOnRight) {
      ier = psolve(P_data, x, vtemp, delta, PREC_RIGHT);
      if (ier != 0) {
        LASTFLAG(S) = (ier < 0) ?
          SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC;
        return(LASTFLAG(S));
      }
      N_VScale(ONE, vtemp, x);
    }

    if (converged == SUNTRUE) 
      LASTFLAG(S) = SUNLS_SUCCESS;
    else 
      LASTFLAG(S) = SUNLS_RES_REDUCED;
    return(LASTFLAG(S));
    
  }
  else {
    LASTFLAG(S) = SUNLS_CONV_FAIL;
    return(LASTFLAG(S));
  }
}
Пример #6
0
int SpgmrSolve(SpgmrMem mem, void *A_data, N_Vector x, N_Vector b,
               int pretype, int gstype, real delta, int max_restarts,
	       void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes,
	       PSolveFn psolve, real *res_norm, int *nli, int *nps)
{
  N_Vector *V, xcor, vtemp;
  real **Hes, *givens, *yg;
  real s_r0_norm, beta, rotation_product, r_norm, s_product, rho;
  boole preOnLeft, preOnRight, scale_x, scale_b, converged;
  int i, j, k, l, l_plus_1, l_max, krydim, ier, ntries;

  if (mem == NULL) return(SPGMR_MEM_NULL);

  /* Make local copies of mem variables */
  l_max  = mem->l_max;
  V      = mem->V;
  Hes    = mem->Hes;
  givens = mem->givens;
  xcor   = mem->xcor;
  yg     = mem->yg;
  vtemp  = mem->vtemp;

  *nli = *nps = 0;     /* Initialize counters */
  converged = FALSE;   /* Initialize converged flag */

  if (max_restarts < 0) max_restarts = 0;

  if ((pretype != LEFT) && (pretype != RIGHT) && (pretype != BOTH))
    pretype = NONE;
  
  preOnLeft  = ((pretype == LEFT) || (pretype == BOTH));
  preOnRight = ((pretype == RIGHT) || (pretype == BOTH));
  scale_x    = (sx != NULL);
  scale_b    = (sb != NULL);

  /* Set vtemp and V[0] to initial (unscaled) residual r_0 = b - A*x_0  */

  if (N_VDotProd(x, x) == ZERO) {
    N_VScale(ONE, b, vtemp);
  } else {
    if (atimes(A_data, x, vtemp) != 0)
      return(SPGMR_ATIMES_FAIL);
    N_VLinearSum(ONE, b, -ONE, vtemp, vtemp);
  }
  N_VScale(ONE, vtemp, V[0]);

  /* Apply b-scaling to vtemp, get L2 norm of sb r_0, and return if small */
/*
  if (scale_b) N_VProd(sb, vtemp, vtemp);
  s_r0_norm = RSqrt(N_VDotProd(vtemp, vtemp));
  if (s_r0_norm <= delta) return(SPGMR_SUCCESS);
*/  
  /* Apply left preconditioner and b-scaling to V[0] = r_0 */
  
  if (preOnLeft) {
    ier = psolve(P_data, V[0], vtemp, LEFT);
    (*nps)++;
    if (ier != 0)
      return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC);
  } else {
    N_VScale(ONE, V[0], vtemp);
  }
  
  if (scale_b) {
    N_VProd(sb, vtemp, V[0]);   
  } else {
    N_VScale(ONE, vtemp, V[0]);
  }

  /* Set r_norm = beta to L2 norm of V[0] = sb P1_inv r_0, and
     return if small  */

  *res_norm = r_norm = beta = RSqrt(N_VDotProd(V[0], V[0])); 
  if (r_norm <= delta)
    return(SPGMR_SUCCESS);

  /* Set xcor = 0 */

  N_VConst(ZERO, xcor);


  /* Begin outer iterations: up to (max_restarts + 1) attempts */
  
  for (ntries = 0; ntries <= max_restarts; ntries++) {

    /* Initialize the Hessenberg matrix Hes and Givens rotation
       product.  Normalize the initial vector V[0].             */
   
    for (i=0; i <= l_max; i++)
      for (j=0; j < l_max; j++)
	Hes[i][j] = ZERO;

    rotation_product = ONE;
    
    N_VScale(ONE/r_norm, V[0], V[0]);

    /* Inner loop: generate Krylov sequence and Arnoldi basis */
    
    for(l=0; l < l_max; l++) {

      (*nli)++;

      krydim = l_plus_1 = l + 1;
      
      /* Generate A-tilde V[l], where A-tilde = sb P1_inv A P2_inv sx_inv */

      /* Apply x-scaling: vtemp = sx_inv V[l] */
      if (scale_x) {
	N_VDiv(V[l], sx, vtemp);
      } else {
	N_VScale(ONE, V[l], vtemp);
      }

      /* Apply right precoditioner: vtemp = P2_inv sx_inv V[l] */ 
      N_VScale(ONE, vtemp, V[l_plus_1]);
      if (preOnRight) {
	ier = psolve(P_data, V[l_plus_1], vtemp, RIGHT);
	(*nps)++;
	if (ier != 0)
	  return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC);
      }

      /* Apply A: V[l+1] = A P2_inv sx_inv V[l] */
      if (atimes(A_data, vtemp, V[l_plus_1] ) != 0)
	return(SPGMR_ATIMES_FAIL);

      /* Apply left preconditioning: vtemp = P1_inv A P2_inv sx_inv V[l] */
      if (preOnLeft) {
	ier = psolve(P_data, V[l_plus_1], vtemp, LEFT);
	(*nps)++;
	if (ier != 0)
	  return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC);
      } else {
	N_VScale(ONE, V[l_plus_1], vtemp);
      }

      /* Apply b-scaling: V[l+1] = sb P1_inv A P2_inv sx_inv V[l] */
      if (scale_b) {
	N_VProd(sb, vtemp, V[l_plus_1]);
      } else {
	N_VScale(ONE, vtemp, V[l_plus_1]);
      }
      
      /*  Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */

      if (gstype == CLASSICAL_GS) {
	if (ClassicalGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l]),
			vtemp, yg) != 0)
	  return(SPGMR_GS_FAIL);
      } else {
	if (ModifiedGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l])) != 0) 
	  return(SPGMR_GS_FAIL);
      }

      /*  Update the QR factorization of Hes  */

      if(QRfact(krydim, Hes, givens, l) != 0 )
	return(SPGMR_QRFACT_FAIL);

      /*  Update residual norm estimate; break if convergence test passes */
      
      rotation_product *= givens[2*l+1];
    
      if ((*res_norm = rho = ABS(rotation_product*r_norm)) <= delta) {
	converged = TRUE;
	break;
      }
      
      /* Normalize V[l+1] with norm value from the Gram-Schmidt */
      N_VScale(ONE/Hes[l_plus_1][l], V[l_plus_1], V[l_plus_1]);
    }
    
    /* Inner loop is done.  Compute the new correction vector xcor */

    /* Construct g, then solve for y */
    yg[0] = r_norm;
    for (i=1; i <= krydim; i++) yg[i]=ZERO;
    if (QRsol(krydim, Hes, givens, yg) != 0)
      return(SPGMR_QRSOL_FAIL);
    
    /* Add correction vector V_l y to xcor */
    for (k=0; k < krydim; k++)
      N_VLinearSum(yg[k], V[k], ONE, xcor, xcor);

    /* If converged, construct the final solution vector x */
    if (converged) {
     
      /* Apply x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */
  
      if (scale_x) N_VDiv(xcor, sx, xcor);
      if (preOnRight) {
	ier = psolve(P_data, xcor, vtemp, RIGHT);
	(*nps)++;
	if (ier != 0)
	   return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC);
      } else {
	N_VScale(ONE, xcor, vtemp);
      }

      /* Add correction to initial x to get final solution x, and return */
      N_VLinearSum(ONE, x, ONE, vtemp, x);

      return(SPGMR_SUCCESS);
    }

    /* Not yet converged; if allowed, prepare for restart */

    if (ntries == max_restarts) break;

    /* Construct last column of Q in yg */
    s_product = ONE;
    for (i=krydim; i > 0; i--) {
      yg[i] = s_product*givens[2*i-2];
      s_product *= givens[2*i-1];
    }
    yg[0] = s_product;

    /* Scale r_norm and yg */
    r_norm *= s_product;
    for (i=0; i <= krydim; i++)
      yg[i] *= r_norm;
    r_norm = ABS(r_norm);

    /* Multiply yg by V_(krydim+1) to get last residual vector; restart */
    N_VScale(yg[0], V[0], V[0]);
    for( k=1; k <= krydim; k++)
      N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]);

  }

  /* Failed to converge, even after allowed restarts.
     If the residual norm was reduced below its initial value, compute
     and return x anyway.  Otherwise return failure flag.              */

  if (rho < beta) {

    /* Apply the x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */
    
    if (scale_x) N_VDiv(xcor, sx, xcor);
    if (preOnRight) {
      ier = psolve(P_data, xcor, vtemp, RIGHT);
      (*nps)++;
      if (ier != 0)
	return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC);
    } else {
      N_VScale(ONE, xcor, vtemp);
    }

    /* Add vtemp to initial x to get final solution x, and return */
    N_VLinearSum(ONE, x, ONE, vtemp, x);
    
    return(SPGMR_RES_REDUCED);
  }

  return(SPGMR_CONV_FAIL); 
}
Пример #7
0
int SpbcgSolve(SpbcgMem mem, void *A_data, N_Vector x, N_Vector b,
               int pretype, realtype delta, void *P_data, N_Vector sx,
               N_Vector sb, ATimesFn atimes, PSolveFn psolve,
               realtype *res_norm, int *nli, int *nps)
{
  realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho;
  N_Vector r_star, r, p, q, u, Ap, vtemp;
  booleantype preOnLeft, preOnRight, scale_x, scale_b, converged;
  int l, l_max, ier;

  if (mem == NULL) return(SPBCG_MEM_NULL);

  /* Make local copies of mem variables */

  l_max  = mem->l_max;
  r_star = mem->r_star;
  r      = mem->r;
  p      = mem->p;
  q      = mem->q;
  u      = mem->u;
  Ap     = mem->Ap;
  vtemp  = mem->vtemp;

  *nli = *nps = 0;    /* Initialize counters */
  converged = FALSE;  /* Initialize converged flag */

  if ((pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) pretype = PREC_NONE;

  preOnLeft  = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT));
  preOnRight = ((pretype == PREC_BOTH) || (pretype == PREC_RIGHT));

  scale_x = (sx != NULL);
  scale_b = (sb != NULL);

  /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */

  if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star);
  else {
    ier = atimes(A_data, x, r_star);
    if (ier != 0)
      return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC);
    N_VLinearSum(ONE, b, -ONE, r_star, r_star);
  }

  /* Apply left preconditioner and b-scaling to r_star = r_0 */

  if (preOnLeft) {
    ier = psolve(P_data, r_star, r, PREC_LEFT);
    (*nps)++;
    if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC);
  }
  else N_VScale(ONE, r_star, r);

  if (scale_b) N_VProd(sb, r, r_star);
  else N_VScale(ONE, r, r_star);

  /* Initialize beta_denom to the dot product of r0 with r0 */

  beta_denom = N_VDotProd(r_star, r_star);

  /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and
     return if small */

  *res_norm = r_norm = rho = SUNRsqrt(beta_denom);
  if (r_norm <= delta) return(SPBCG_SUCCESS);

  /* Copy r_star to r and p */

  N_VScale(ONE, r_star, r);
  N_VScale(ONE, r_star, p);

  /* Begin main iteration loop */

  for(l = 0; l < l_max; l++) {

    (*nli)++;

    /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */

    /*   Apply x-scaling: vtemp = sx_inv p */

    if (scale_x) N_VDiv(p, sx, vtemp);
    else N_VScale(ONE, p, vtemp);

    /*   Apply right preconditioner: vtemp = P2_inv sx_inv p */

    if (preOnRight) {
      N_VScale(ONE, vtemp, Ap);
      ier = psolve(P_data, Ap, vtemp, PREC_RIGHT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC);
    }

    /*   Apply A: Ap = A P2_inv sx_inv p */

    ier = atimes(A_data, vtemp, Ap );
    if (ier != 0)
      return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC);

    /*   Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */

    if (preOnLeft) {
      ier = psolve(P_data, Ap, vtemp, PREC_LEFT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC);
    }
    else N_VScale(ONE, Ap, vtemp);

    /*   Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */

    if (scale_b) N_VProd(sb, vtemp, Ap);
    else N_VScale(ONE, vtemp, Ap);


    /* Calculate alpha = <r,r_star>/<Ap,r_star> */

    alpha = ((beta_denom / N_VDotProd(Ap, r_star)));

    /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */

    N_VLinearSum(ONE, r, -alpha, Ap, q);

    /* Generate u = A-tilde q */

    /*   Apply x-scaling: vtemp = sx_inv q */

    if (scale_x) N_VDiv(q, sx, vtemp);
    else N_VScale(ONE, q, vtemp);

    /*   Apply right preconditioner: vtemp = P2_inv sx_inv q */

    if (preOnRight) {
      N_VScale(ONE, vtemp, u);
      ier = psolve(P_data, u, vtemp, PREC_RIGHT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC);
    }

    /*   Apply A: u = A P2_inv sx_inv u */

    ier = atimes(A_data, vtemp, u );
    if (ier != 0)
      return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC);

    /*   Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */

    if (preOnLeft) {
      ier = psolve(P_data, u, vtemp, PREC_LEFT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC);
    }
    else N_VScale(ONE, u, vtemp);

    /*   Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */

    if (scale_b) N_VProd(sb, vtemp, u);
    else N_VScale(ONE, vtemp, u);


    /* Calculate omega = <u,q>/<u,u> */

    omega_denom = N_VDotProd(u, u);
    if (omega_denom == ZERO) omega_denom = ONE;
    omega = (N_VDotProd(u, q) / omega_denom);

    /* Update x = x + alpha*p + omega*q */

    N_VLinearSum(alpha, p, omega, q, vtemp);
    N_VLinearSum(ONE, x, ONE, vtemp, x);

    /* Update the residual r = q - omega*u */

    N_VLinearSum(ONE, q, -omega, u, r);

    /* Set rho = norm(r) and check convergence */

    *res_norm = rho = SUNRsqrt(N_VDotProd(r, r));
    if (rho <= delta) {
      converged = TRUE;
      break;
    }

    /* Not yet converged, continue iteration */
    /* Update beta = <rnew,r_star> / <rold,r_start> * alpha / omega */

    beta_num = N_VDotProd(r, r_star);
    beta = ((beta_num / beta_denom) * (alpha / omega));
    beta_denom = beta_num;

    /* Update p = r + beta*(p - omega*Ap) */

    N_VLinearSum(ONE, p, -omega, Ap, vtemp);
    N_VLinearSum(ONE, r, beta, vtemp, p);

  }

  /* Main loop finished */

  if ((converged == TRUE) || (rho < r_norm)) {

    /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */

    if (scale_x) N_VDiv(x, sx, x);
    if (preOnRight) {
      ier = psolve(P_data, x, vtemp, PREC_RIGHT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC);
      N_VScale(ONE, vtemp, x);
    }

    if (converged == TRUE) return(SPBCG_SUCCESS);
    else return(SPBCG_RES_REDUCED);
  }
  else return(SPBCG_CONV_FAIL);
}
Пример #8
0
/*----------------------------------------------------------------
 Function : SpfgmrSolve
 ---------------------------------------------------------------*/
int SpfgmrSolve(SpfgmrMem mem, void *A_data, N_Vector x, 
		N_Vector b, int pretype, int gstype, realtype delta, 
		int max_restarts, int maxit, void *P_data, 
		N_Vector s1, N_Vector s2, ATimesFn atimes, 
		PSolveFn psolve, realtype *res_norm, int *nli, int *nps)
{
  N_Vector *V, *Z, xcor, vtemp;
  realtype **Hes, *givens, *yg;
  realtype beta, rotation_product, r_norm, s_product, rho;
  booleantype preOnRight, scale1, scale2, converged;
  int i, j, k, l, l_max, krydim, ier, ntries;

  if (mem == NULL) return(SPFGMR_MEM_NULL);

  /* Initialize some variables */
  krydim = 0;

  /* Make local copies of mem variables. */
  l_max  = mem->l_max;
  V      = mem->V;
  Z      = mem->Z;
  Hes    = mem->Hes;
  givens = mem->givens;
  xcor   = mem->xcor;
  yg     = mem->yg;
  vtemp  = mem->vtemp;

  *nli = *nps = 0;    /* Initialize counters */
  converged = SUNFALSE;  /* Initialize converged flag */

  /* If maxit is greater than l_max, then set maxit=l_max */
  if (maxit > l_max)  maxit = l_max;

  /* Check for legal value of max_restarts */
  if (max_restarts < 0)  max_restarts = 0;

  /* Set preconditioning flag (enabling any preconditioner implies right 
     preconditioning, since FGMRES does not support left preconditioning) */
  preOnRight = ((pretype == PREC_RIGHT) || (pretype == PREC_BOTH) || (pretype == PREC_LEFT));

  /* Set scaling flags */
  scale1 = (s1 != NULL);
  scale2 = (s2 != NULL);

  /* Set vtemp to initial (unscaled) residual r_0 = b - A*x_0. */
  if (N_VDotProd(x, x) == ZERO) {
    N_VScale(ONE, b, vtemp);
  } else {
    ier = atimes(A_data, x, vtemp);
    if (ier != 0)
      return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC);
    N_VLinearSum(ONE, b, -ONE, vtemp, vtemp);
  }

  /* Apply left scaling to vtemp = r_0 to fill V[0]. */
  if (scale1) {
    N_VProd(s1, vtemp, V[0]);   
  } else {
    N_VScale(ONE, vtemp, V[0]);
  }

  /* Set r_norm = beta to L2 norm of V[0] = s1 r_0, and return if small */
  *res_norm = r_norm = beta = SUNRsqrt(N_VDotProd(V[0], V[0]));
  if (r_norm <= delta)
    return(SPFGMR_SUCCESS);

  /* Initialize rho to avoid compiler warning message */
  rho = beta;

  /* Set xcor = 0. */
  N_VConst(ZERO, xcor);

  /* Begin outer iterations: up to (max_restarts + 1) attempts. */
  for (ntries=0; ntries<=max_restarts; ntries++) {
    
    /* Initialize the Hessenberg matrix Hes and Givens rotation
       product.  Normalize the initial vector V[0].             */
    for (i=0; i<=l_max; i++)
      for (j=0; j<l_max; j++)
        Hes[i][j] = ZERO;
    rotation_product = ONE;
    N_VScale(ONE/r_norm, V[0], V[0]);
    
    /* Inner loop: generate Krylov sequence and Arnoldi basis. */
    for (l=0; l<maxit; l++) {
      
      (*nli)++;
      
      krydim = l + 1;
      
      /* Generate A-tilde V[l], where A-tilde = s1 A P_inv s2_inv. */

      /*   Apply right scaling: vtemp = s2_inv V[l]. */
      if (scale2) N_VDiv(V[l], s2, vtemp);
      else N_VScale(ONE, V[l], vtemp);
      
      /*   Apply right preconditioner: vtemp = Z[l] = P_inv s2_inv V[l]. */ 
      if (preOnRight) {
        N_VScale(ONE, vtemp, V[l+1]);
        ier = psolve(P_data, V[l+1], vtemp, delta, PREC_RIGHT);
        (*nps)++;
        if (ier != 0)
          return((ier < 0) ? SPFGMR_PSOLVE_FAIL_UNREC : SPFGMR_PSOLVE_FAIL_REC);
      }
      N_VScale(ONE, vtemp, Z[l]);
      
      /*   Apply A: V[l+1] = A P_inv s2_inv V[l]. */
      ier = atimes(A_data, vtemp, V[l+1]);
      if (ier != 0)
        return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC);

      /*   Apply left scaling: V[l+1] = s1 A P_inv s2_inv V[l]. */
      if (scale1)  N_VProd(s1, V[l+1], V[l+1]);
      
      /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */
      if (gstype == CLASSICAL_GS) {
        if (ClassicalGS(V, Hes, l+1, l_max, &(Hes[l+1][l]),
                        vtemp, yg) != 0)
          return(SPFGMR_GS_FAIL);
      } else {
        if (ModifiedGS(V, Hes, l+1, l_max, &(Hes[l+1][l])) != 0) 
          return(SPFGMR_GS_FAIL);
      }
      
      /* Update the QR factorization of Hes. */
      if(QRfact(krydim, Hes, givens, l) != 0 )
        return(SPFGMR_QRFACT_FAIL);
      
      /* Update residual norm estimate; break if convergence test passes. */
      rotation_product *= givens[2*l+1];
      *res_norm = rho = SUNRabs(rotation_product*r_norm);
      if (rho <= delta) { converged = SUNTRUE; break; }
      
      /* Normalize V[l+1] with norm value from the Gram-Schmidt routine. */
      N_VScale(ONE/Hes[l+1][l], V[l+1], V[l+1]);
    }
    
    /* Inner loop is done.  Compute the new correction vector xcor. */
    
    /*   Construct g, then solve for y. */
    yg[0] = r_norm;
    for (i=1; i<=krydim; i++)  yg[i]=ZERO;
    if (QRsol(krydim, Hes, givens, yg) != 0)
      return(SPFGMR_QRSOL_FAIL);
    
    /*   Add correction vector Z_l y to xcor. */
    for (k=0; k<krydim; k++)
      N_VLinearSum(yg[k], Z[k], ONE, xcor, xcor);
    
    /* If converged, construct the final solution vector x and return. */
    if (converged) {
      N_VLinearSum(ONE, x, ONE, xcor, x);
      return(SPFGMR_SUCCESS);
    }
    
    /* Not yet converged; if allowed, prepare for restart. */
    if (ntries == max_restarts) break;
    
    /* Construct last column of Q in yg. */
    s_product = ONE;
    for (i=krydim; i>0; i--) {
      yg[i] = s_product*givens[2*i-2];
      s_product *= givens[2*i-1];
    }
    yg[0] = s_product;
    
    /* Scale r_norm and yg. */
    r_norm *= s_product;
    for (i=0; i<=krydim; i++)
      yg[i] *= r_norm;
    r_norm = SUNRabs(r_norm);
    
    /* Multiply yg by V_(krydim+1) to get last residual vector; restart. */
    N_VScale(yg[0], V[0], V[0]);
    for (k=1; k<=krydim; k++)
      N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]);
    
  }
  
  /* Failed to converge, even after allowed restarts.
     If the residual norm was reduced below its initial value, compute
     and return x anyway.  Otherwise return failure flag. */
  if (rho < beta) {
    N_VLinearSum(ONE, x, ONE, xcor, x);
    return(SPFGMR_RES_REDUCED);
  }

  return(SPFGMR_CONV_FAIL); 
}
int SptfqmrSolve(SptfqmrMem mem, void *A_data, N_Vector x, N_Vector b,
		 int pretype, realtype delta, void *P_data, N_Vector sx,
		 N_Vector sb, ATimesFn atimes, PSolveFn psolve,
		 realtype *res_norm, int *nli, int *nps)
{
  realtype alpha, tau, eta, beta, c, sigma, v_bar, omega;
  realtype rho[2];
  realtype r_init_norm, r_curr_norm;
  realtype temp_val;
  booleantype preOnLeft, preOnRight, scale_x, scale_b, converged;
  booleantype b_ok;
  int n, m, ier;

  /* Exit immediately if memory pointer is NULL */
  if (mem == NULL) return(SPTFQMR_MEM_NULL);

  temp_val = r_curr_norm = -ONE;  /* Initialize to avoid compiler warnings */

  *nli = *nps = 0;    /* Initialize counters */
  converged = FALSE;  /* Initialize convergence flag */
  b_ok = FALSE;

  if ((pretype != PREC_LEFT)  &&
      (pretype != PREC_RIGHT) &&
      (pretype != PREC_BOTH)) pretype = PREC_NONE;

  preOnLeft  = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT));
  preOnRight = ((pretype == PREC_BOTH) || (pretype == PREC_RIGHT));

  scale_x = (sx != NULL);
  scale_b = (sb != NULL);

  /* Set r_star to initial (unscaled) residual r_star = r_0 = b - A*x_0 */
  /* NOTE: if x == 0 then just set residual to b and continue */
  if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star);
  else {
    ier = atimes(A_data, x, r_star);
    if (ier != 0)
      return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC);
    N_VLinearSum(ONE, b, -ONE, r_star, r_star);
  }

  /* Apply left preconditioner and b-scaling to r_star (or really just r_0) */
  if (preOnLeft) {
    ier = psolve(P_data, r_star, vtemp1, PREC_LEFT);
    (*nps)++;
    if (ier != 0)
      return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
  }
  else N_VScale(ONE, r_star, vtemp1);
  if (scale_b) N_VProd(sb, vtemp1, r_star);
  else N_VScale(ONE, vtemp1, r_star);

  /* Initialize rho[0] */
  /* NOTE: initialized here to reduce number of computations - avoid need
           to compute r_star^T*r_star twice, and avoid needlessly squaring
           values */
  rho[0] = N_VDotProd(r_star, r_star);

  /* Compute norm of initial residual (r_0) to see if we really need
     to do anything */
  *res_norm = r_init_norm = RSqrt(rho[0]);
  if (r_init_norm <= delta) return(SPTFQMR_SUCCESS);

  /* Set v_ = A*r_0 (preconditioned and scaled) */
  if (scale_x) N_VDiv(r_star, sx, vtemp1);
  else N_VScale(ONE, r_star, vtemp1);
  if (preOnRight) {
    N_VScale(ONE, vtemp1, v_);
    ier = psolve(P_data, v_, vtemp1, PREC_RIGHT);
    (*nps)++;
    if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
  }
  ier = atimes(A_data, vtemp1, v_);
  if (ier != 0)
    return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC);
  if (preOnLeft) {
    ier = psolve(P_data, v_, vtemp1, PREC_LEFT);
    (*nps)++;
    if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
  }
  else N_VScale(ONE, v_, vtemp1);
  if (scale_b) N_VProd(sb, vtemp1, v_);
  else N_VScale(ONE, vtemp1, v_);

  /* Initialize remaining variables */
  N_VScale(ONE, r_star, r_[0]);
  N_VScale(ONE, r_star, u_);
  N_VScale(ONE, r_star, p_);
  N_VConst(ZERO, d_);

  tau = r_init_norm;
  v_bar = eta = ZERO;

  /* START outer loop */
  for (n = 0; n < l_max; ++n) {

    /* Increment linear iteration counter */
    (*nli)++;

    /* sigma = r_star^T*v_ */
    sigma = N_VDotProd(r_star, v_);

    /* alpha = rho[0]/sigma */
    alpha = rho[0]/sigma;

    /* q_ = u_-alpha*v_ */
    N_VLinearSum(ONE, u_, -alpha, v_, q_);

    /* r_[1] = r_[0]-alpha*A*(u_+q_) */
    N_VLinearSum(ONE, u_, ONE, q_, r_[1]);
    if (scale_x) N_VDiv(r_[1], sx, r_[1]);
    if (preOnRight) {
      N_VScale(ONE, r_[1], vtemp1);
      ier = psolve(P_data, vtemp1, r_[1], PREC_RIGHT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
    }
    ier = atimes(A_data, r_[1], vtemp1);
    if (ier != 0)
      return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC);
    if (preOnLeft) {
      ier = psolve(P_data, vtemp1, r_[1], PREC_LEFT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
    }
    else N_VScale(ONE, vtemp1, r_[1]);
    if (scale_b) N_VProd(sb, r_[1], vtemp1);
    else N_VScale(ONE, r_[1], vtemp1);
    N_VLinearSum(ONE, r_[0], -alpha, vtemp1, r_[1]);

    /* START inner loop */
    for (m = 0; m < 2; ++m) {

      /* d_ = [*]+(v_bar^2*eta/alpha)*d_ */
      /* NOTES:
       *   (1) [*] = u_ if m == 0, and q_ if m == 1
       *   (2) using temp_val reduces the number of required computations
       *       if the inner loop is executed twice
       */
      if (m == 0) {
	temp_val = RSqrt(N_VDotProd(r_[1], r_[1]));
	omega = RSqrt(RSqrt(N_VDotProd(r_[0], r_[0]))*temp_val);
	N_VLinearSum(ONE, u_, SQR(v_bar)*eta/alpha, d_, d_);
      }
      else {
	omega = temp_val;
	N_VLinearSum(ONE, q_, SQR(v_bar)*eta/alpha, d_, d_);
      }

      /* v_bar = omega/tau */
      v_bar = omega/tau;

      /* c = (1+v_bar^2)^(-1/2) */
      c = ONE / RSqrt(ONE+SQR(v_bar));

      /* tau = tau*v_bar*c */
      tau = tau*v_bar*c;

      /* eta = c^2*alpha */
      eta = SQR(c)*alpha;

      /* x = x+eta*d_ */
      N_VLinearSum(ONE, x, eta, d_, x);

      /* Check for convergence... */
      /* NOTE: just use approximation to norm of residual, if possible */
      *res_norm = r_curr_norm = tau*RSqrt(m+1);

      /* Exit inner loop if iteration has converged based upon approximation
	 to norm of current residual */
      if (r_curr_norm <= delta) {
	converged = TRUE;
	break;
      }

      /* Decide if actual norm of residual vector should be computed */
      /* NOTES:
       *   (1) if r_curr_norm > delta, then check if actual residual norm
       *       is OK (recall we first compute an approximation)
       *   (2) if r_curr_norm >= r_init_norm and m == 1 and n == l_max, then
       *       compute actual residual norm to see if the iteration can be
       *       saved
       *   (3) the scaled and preconditioned right-hand side of the given
       *       linear system (denoted by b) is only computed once, and the
       *       result is stored in vtemp3 so it can be reused - reduces the
       *       number of psovles if using left preconditioning
       */
      if ((r_curr_norm > delta) ||
	  (r_curr_norm >= r_init_norm && m == 1 && n == l_max)) {

	/* Compute norm of residual ||b-A*x||_2 (preconditioned and scaled) */
	if (scale_x) N_VDiv(x, sx, vtemp1);
	else N_VScale(ONE, x, vtemp1);
	if (preOnRight) {
	  ier = psolve(P_data, vtemp1, vtemp2, PREC_RIGHT);
	  (*nps)++;
	  if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_UNREC);
	  N_VScale(ONE, vtemp2, vtemp1);
	}
	ier = atimes(A_data, vtemp1, vtemp2);
        if (ier != 0)
          return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC);
	if (preOnLeft) {
	  ier = psolve(P_data, vtemp2, vtemp1, PREC_LEFT);
	  (*nps)++;
	  if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
	}
	else N_VScale(ONE, vtemp2, vtemp1);
	if (scale_b) N_VProd(sb, vtemp1, vtemp2);
	else N_VScale(ONE, vtemp1, vtemp2);
	/* Only precondition and scale b once (result saved for reuse) */
	if (!b_ok) {
	  b_ok = TRUE;
	  if (preOnLeft) {
	    ier = psolve(P_data, b, vtemp3, PREC_LEFT);
	    (*nps)++;
	    if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
	  }
	  else N_VScale(ONE, b, vtemp3);
	  if (scale_b) N_VProd(sb, vtemp3, vtemp3);
	}
	N_VLinearSum(ONE, vtemp3, -ONE, vtemp2, vtemp1);
	*res_norm = r_curr_norm = RSqrt(N_VDotProd(vtemp1, vtemp1));

	/* Exit inner loop if inequality condition is satisfied 
	   (meaning exit if we have converged) */
	if (r_curr_norm <= delta) {
	  converged = TRUE;
	  break;
	}

      }

    }  /* END inner loop */

    /* If converged, then exit outer loop as well */
    if (converged == TRUE) break;

    /* rho[1] = r_star^T*r_[1] */
    rho[1] = N_VDotProd(r_star, r_[1]);

    /* beta = rho[1]/rho[0] */
    beta = rho[1]/rho[0];

    /* u_ = r_[1]+beta*q_ */
    N_VLinearSum(ONE, r_[1], beta, q_, u_);

    /* p_ = u_+beta*(q_+beta*p_) */
    N_VLinearSum(beta, q_, SQR(beta), p_, p_);
    N_VLinearSum(ONE, u_, ONE, p_, p_);

    /* v_ = A*p_ */
    if (scale_x) N_VDiv(p_, sx, vtemp1);
    else N_VScale(ONE, p_, vtemp1);
    if (preOnRight) {
      N_VScale(ONE, vtemp1, v_);
      ier = psolve(P_data, v_, vtemp1, PREC_RIGHT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
    }
    ier = atimes(A_data, vtemp1, v_);
    if (ier != 0)
      return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC);
    if (preOnLeft) {
      ier = psolve(P_data, v_, vtemp1, PREC_LEFT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC);
    }
    else N_VScale(ONE, v_, vtemp1);
    if (scale_b) N_VProd(sb, vtemp1, v_);
    else N_VScale(ONE, vtemp1, v_);

    /* Shift variable values */
    /* NOTE: reduces storage requirements */
    N_VScale(ONE, r_[1], r_[0]);
    rho[0] = rho[1];

  }  /* END outer loop */

  /* Determine return value */
  /* If iteration converged or residual was reduced, then return current iterate (x) */
  if ((converged == TRUE) || (r_curr_norm < r_init_norm)) {
    if (scale_x) N_VDiv(x, sx, x);
    if (preOnRight) {
      ier = psolve(P_data, x, vtemp1, PREC_RIGHT);
      (*nps)++;
      if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_UNREC);
      N_VScale(ONE, vtemp1, x);
    }
    if (converged == TRUE) return(SPTFQMR_SUCCESS);
    else return(SPTFQMR_RES_REDUCED);
  }
  /* Otherwise, return error code */
  else return(SPTFQMR_CONV_FAIL);
}
/* ----------------------------------------------------------------------
 * SUNLinSol_SPGMR Linear Solver Testing Routine
 *
 * We run multiple tests to exercise this solver:
 * 1. simple tridiagonal system (no preconditioning)
 * 2. simple tridiagonal system (Jacobi preconditioning)
 * 3. tridiagonal system w/ scale vector s1 (no preconditioning)
 * 4. tridiagonal system w/ scale vector s1 (Jacobi preconditioning)
 * 5. tridiagonal system w/ scale vector s2 (no preconditioning)
 * 6. tridiagonal system w/ scale vector s2 (Jacobi preconditioning)
 *
 * Note: We construct a tridiagonal matrix Ahat, a random solution xhat, 
 *       and a corresponding rhs vector bhat = Ahat*xhat, such that each 
 *       of these is unit-less.  To test row/column scaling, we use the 
 *       matrix A = S1-inverse Ahat S2, rhs vector b = S1-inverse bhat, 
 *       and solution vector x = (S2-inverse) xhat; hence the linear 
 *       system has rows scaled by S1-inverse and columns scaled by S2, 
 *       where S1 and S2 are the diagonal matrices with entries from the 
 *       vectors s1 and s2, the 'scaling' vectors supplied to SPGMR 
 *       having strictly positive entries.  When this is combined with 
 *       preconditioning, assume that Phat is the desired preconditioner 
 *       for Ahat, then our preconditioning matrix P \approx A should be
 *         left prec:  P-inverse \approx S1-inverse Ahat-inverse S1
 *         right prec:  P-inverse \approx S2-inverse Ahat-inverse S2.
 *       Here we use a diagonal preconditioner D, so the S*-inverse 
 *       and S* in the product cancel one another.
 * --------------------------------------------------------------------*/
int main(int argc, char *argv[]) 
{
  int             fails=0;          /* counter for test failures */
  int             passfail=0;       /* overall pass/fail flag    */
  SUNLinearSolver LS;               /* linear solver object      */
  N_Vector        xhat, x, b;       /* test vectors              */
  UserData        ProbData;         /* problem data structure    */
  int             gstype, pretype, maxl, print_timing;
  sunindextype    i;
  realtype        *vecdata;
  double          tol;

  /* check inputs: local problem size, timing flag */
  if (argc < 7) {
    printf("ERROR: SIX (6) Inputs required:\n");
    printf("  Problem size should be >0\n");
    printf("  Gram-Schmidt orthogonalization type should be 1 or 2\n");
    printf("  Preconditioning type should be 1 or 2\n");
    printf("  Maximum Krylov subspace dimension should be >0\n");
    printf("  Solver tolerance should be >0\n");
    printf("  timing output flag should be 0 or 1 \n");
    return 1;
  }
  ProbData.N = atol(argv[1]);
  problem_size = ProbData.N;
  if (ProbData.N <= 0) {
    printf("ERROR: Problem size must be a positive integer\n");
    return 1; 
  }
  gstype = atoi(argv[2]);
  if ((gstype < 1) || (gstype > 2)) {
    printf("ERROR: Gram-Schmidt orthogonalization type must be either 1 or 2\n");
    return 1; 
  }
  pretype = atoi(argv[3]);
  if ((pretype < 1) || (pretype > 2)) {
    printf("ERROR: Preconditioning type must be either 1 or 2\n");
    return 1; 
  }
  maxl = atoi(argv[4]);
  if (maxl <= 0) {
    printf("ERROR: Maximum Krylov subspace dimension must be a positive integer\n");
    return 1; 
  }
  tol = atof(argv[5]);
  if (tol <= ZERO) {
    printf("ERROR: Solver tolerance must be a positive real number\n");
    return 1; 
  }
  print_timing = atoi(argv[6]);
  SetTiming(print_timing);

  printf("\nSPGMR linear solver test:\n");
  printf("  Problem size = %ld\n", (long int) ProbData.N);
  printf("  Gram-Schmidt orthogonalization type = %i\n", gstype);
  printf("  Preconditioning type = %i\n", pretype);
  printf("  Maximum Krylov subspace dimension = %i\n", maxl);
  printf("  Solver Tolerance = %"GSYM"\n", tol);
  printf("  timing output flag = %i\n\n", print_timing);
  
  /* Create vectors */
  x = N_VNew_Serial(ProbData.N);
  if (check_flag(x, "N_VNew_Serial", 0)) return 1;
  xhat = N_VNew_Serial(ProbData.N);
  if (check_flag(xhat, "N_VNew_Serial", 0)) return 1;
  b = N_VNew_Serial(ProbData.N);
  if (check_flag(b, "N_VNew_Serial", 0)) return 1;
  ProbData.d = N_VNew_Serial(ProbData.N);
  if (check_flag(ProbData.d, "N_VNew_Serial", 0)) return 1;
  ProbData.s1 = N_VNew_Serial(ProbData.N);
  if (check_flag(ProbData.s1, "N_VNew_Serial", 0)) return 1;
  ProbData.s2 = N_VNew_Serial(ProbData.N);
  if (check_flag(ProbData.s2, "N_VNew_Serial", 0)) return 1;

  /* Fill xhat vector with uniform random data in [1,2] */
  vecdata = N_VGetArrayPointer(xhat);
  for (i=0; i<ProbData.N; i++) 
    vecdata[i] = ONE + urand();

  /* Fill Jacobi vector with matrix diagonal */
  N_VConst(FIVE, ProbData.d);
  
  /* Create SPGMR linear solver */
  LS = SUNLinSol_SPGMR(x, pretype, maxl);
  fails += Test_SUNLinSolGetType(LS, SUNLINEARSOLVER_ITERATIVE, 0);
  fails += Test_SUNLinSolSetATimes(LS, &ProbData, ATimes, 0);
  fails += Test_SUNLinSolSetPreconditioner(LS, &ProbData, PSetup, PSolve, 0);
  fails += Test_SUNLinSolSetScalingVectors(LS, ProbData.s1, ProbData.s2, 0);
  fails += Test_SUNLinSolInitialize(LS, 0);
  fails += Test_SUNLinSolSpace(LS, 0);
  fails += SUNLinSol_SPGMRSetGSType(LS, gstype);  
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module failed %i initialization tests\n\n", fails);
    return 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module passed all initialization tests\n\n");
  }

  
  /*** Test 1: simple Poisson-like solve (no preconditioning) ***/

  /* set scaling vectors */
  N_VConst(ONE, ProbData.s1);
  N_VConst(ONE, ProbData.s2);

  /* Fill x vector with scaled version */
  N_VDiv(xhat,ProbData.s2,x);

  /* Fill b vector with result of matrix-vector product */
  fails = ATimes(&ProbData, x, b);
  if (check_flag(&fails, "ATimes", 1)) return 1;

  /* Run tests with this setup */
  fails += SUNLinSol_SPGMRSetPrecType(LS, PREC_NONE);  
  fails += Test_SUNLinSolSetup(LS, NULL, 0);
  fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0);
  fails += Test_SUNLinSolLastFlag(LS, 0);
  fails += Test_SUNLinSolNumIters(LS, 0);
  fails += Test_SUNLinSolResNorm(LS, 0);
  fails += Test_SUNLinSolResid(LS, 0);
  
  /* Print result */
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module, problem 1, failed %i tests\n\n", fails);
    passfail += 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module, problem 1, passed all tests\n\n");
  }


  /*** Test 2: simple Poisson-like solve (Jacobi preconditioning) ***/

  /* set scaling vectors */
  N_VConst(ONE,  ProbData.s1);
  N_VConst(ONE,  ProbData.s2);

  /* Fill x vector with scaled version */
  N_VDiv(xhat,ProbData.s2,x);

  /* Fill b vector with result of matrix-vector product */
  fails = ATimes(&ProbData, x, b);
  if (check_flag(&fails, "ATimes", 1)) return 1;

  /* Run tests with this setup */
  fails += SUNLinSol_SPGMRSetPrecType(LS, pretype);  
  fails += Test_SUNLinSolSetup(LS, NULL, 0);
  fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0);
  fails += Test_SUNLinSolLastFlag(LS, 0);
  fails += Test_SUNLinSolNumIters(LS, 0);
  fails += Test_SUNLinSolResNorm(LS, 0);
  fails += Test_SUNLinSolResid(LS, 0);

  /* Print result */
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module, problem 2, failed %i tests\n\n", fails);
    passfail += 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module, problem 2, passed all tests\n\n");
  }
  

  /*** Test 3: Poisson-like solve w/ scaled rows (no preconditioning) ***/

  /* set scaling vectors */
  vecdata = N_VGetArrayPointer(ProbData.s1);
  for (i=0; i<ProbData.N; i++)
    vecdata[i] = ONE + THOUSAND*urand();
  N_VConst(ONE, ProbData.s2);

  /* Fill x vector with scaled version */
  N_VDiv(xhat,ProbData.s2,x);

  /* Fill b vector with result of matrix-vector product */
  fails = ATimes(&ProbData, x, b);
  if (check_flag(&fails, "ATimes", 1)) return 1;

  /* Run tests with this setup */
  fails += SUNLinSol_SPGMRSetPrecType(LS, PREC_NONE);  
  fails += Test_SUNLinSolSetup(LS, NULL, 0);
  fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0);
  fails += Test_SUNLinSolLastFlag(LS, 0);
  fails += Test_SUNLinSolNumIters(LS, 0);
  fails += Test_SUNLinSolResNorm(LS, 0);
  fails += Test_SUNLinSolResid(LS, 0);

  /* Print result */
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module, problem 3, failed %i tests\n\n", fails);
    passfail += 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module, problem 3, passed all tests\n\n");
  }


  /*** Test 4: Poisson-like solve w/ scaled rows (Jacobi preconditioning) ***/

  /* set scaling vectors */
  vecdata = N_VGetArrayPointer(ProbData.s1);
  for (i=0; i<ProbData.N; i++)
    vecdata[i] = ONE + THOUSAND*urand();
  N_VConst(ONE, ProbData.s2);

  /* Fill x vector with scaled version */
  N_VDiv(xhat,ProbData.s2,x);

  /* Fill b vector with result of matrix-vector product */
  fails = ATimes(&ProbData, x, b);
  if (check_flag(&fails, "ATimes", 1)) return 1;

  /* Run tests with this setup */
  fails += SUNLinSol_SPGMRSetPrecType(LS, pretype);  
  fails += Test_SUNLinSolSetup(LS, NULL, 0);
  fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0);
  fails += Test_SUNLinSolLastFlag(LS, 0);
  fails += Test_SUNLinSolNumIters(LS, 0);
  fails += Test_SUNLinSolResNorm(LS, 0);
  fails += Test_SUNLinSolResid(LS, 0);

  /* Print result */
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module, problem 4, failed %i tests\n\n", fails);
    passfail += 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module, problem 4, passed all tests\n\n");
  }


  /*** Test 5: Poisson-like solve w/ scaled columns (no preconditioning) ***/

  /* set scaling vectors */
  N_VConst(ONE, ProbData.s1);
  vecdata = N_VGetArrayPointer(ProbData.s2);
  for (i=0; i<ProbData.N; i++)
    vecdata[i] = ONE + THOUSAND*urand();

  /* Fill x vector with scaled version */
  N_VDiv(xhat,ProbData.s2,x);

  /* Fill b vector with result of matrix-vector product */
  fails = ATimes(&ProbData, x, b);
  if (check_flag(&fails, "ATimes", 1)) return 1;

  /* Run tests with this setup */
  fails += SUNLinSol_SPGMRSetPrecType(LS, PREC_NONE);
  fails += Test_SUNLinSolSetup(LS, NULL, 0);
  fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0);
  fails += Test_SUNLinSolLastFlag(LS, 0);
  fails += Test_SUNLinSolNumIters(LS, 0);
  fails += Test_SUNLinSolResNorm(LS, 0);
  fails += Test_SUNLinSolResid(LS, 0);

  /* Print result */
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module, problem 5, failed %i tests\n\n", fails);
    passfail += 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module, problem 5, passed all tests\n\n");
  }


  /*** Test 6: Poisson-like solve w/ scaled columns (Jacobi preconditioning) ***/

  /* set scaling vector, Jacobi solver vector */
  N_VConst(ONE, ProbData.s1);
  vecdata = N_VGetArrayPointer(ProbData.s2);
  for (i=0; i<ProbData.N; i++)
    vecdata[i] = ONE + THOUSAND*urand();

  /* Fill x vector with scaled version */
  N_VDiv(xhat,ProbData.s2,x);

  /* Fill b vector with result of matrix-vector product */
  fails = ATimes(&ProbData, x, b);
  if (check_flag(&fails, "ATimes", 1)) return 1;

  /* Run tests with this setup */
  fails += SUNLinSol_SPGMRSetPrecType(LS, pretype);  
  fails += Test_SUNLinSolSetup(LS, NULL, 0);
  fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0);
  fails += Test_SUNLinSolLastFlag(LS, 0);
  fails += Test_SUNLinSolNumIters(LS, 0);
  fails += Test_SUNLinSolResNorm(LS, 0);
  fails += Test_SUNLinSolResid(LS, 0);

  /* Print result */
  if (fails) {
    printf("FAIL: SUNLinSol_SPGMR module, problem 6, failed %i tests\n\n", fails);
    passfail += 1;
  } else {
    printf("SUCCESS: SUNLinSol_SPGMR module, problem 6, passed all tests\n\n");
  }


  /* Free solver and vectors */
  SUNLinSolFree(LS);
  N_VDestroy(x);
  N_VDestroy(xhat);
  N_VDestroy(b);
  N_VDestroy(ProbData.d);
  N_VDestroy(ProbData.s1);
  N_VDestroy(ProbData.s2);

  return(passfail);
}