Ejemplo n.º 1
0
Archivo: cgs.cpp Proyecto: ryseto/demsd
/* Ref: Weiss, Algorithm 11 CGS
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
cgs (int n, const double *b, double *x,
     void (*atimes) (int, const double *, double *, void *),
     void *atimes_param,
     struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_m1 = -1.0;
  double d_2 = 2.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *r  = (double *)malloc (sizeof (double) * n);
  double *r0 = (double *)malloc (sizeof (double) * n);
  double *p  = (double *)malloc (sizeof (double) * n);
  double *u  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  double *q  = (double *)malloc (sizeof (double) * n);
  double *t  = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (r,  "cgs");
  CHECK_MALLOC (r0, "cgs");
  CHECK_MALLOC (p,  "cgs");
  CHECK_MALLOC (u,  "cgs");
  CHECK_MALLOC (ap, "cgs");
  CHECK_MALLOC (q,  "cgs");
  CHECK_MALLOC (t,  "cgs");


  double r0ap;
  double rho, rho1;
  double delta;
  double beta;

  double res2 = 0.0;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b

  cblas_dcopy (n, r, 1, r0, 1); // r0* = r
  cblas_dcopy (n, r, 1, p, 1); // p = r
  cblas_dcopy (n, r, 1, u, 1); // u = r

  rho = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = cblas_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      cblas_dcopy (n, u, 1, q, 1); // q = u
      cblas_dscal (n, 2.0, q, 1); // q = 2 u
      cblas_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      cblas_daxpy (n, delta, t, 1, r, 1); // r = r + delta t
      cblas_daxpy (n, delta, q, 1, x, 1); // x = x + delta q

      res2 = cblas_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      cblas_dcopy (n, q, 1, qu, 1); // qu = q
      cblas_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u
      cblas_dcopy (n, r, 1, u, 1); // u = r
      cblas_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u)

      cblas_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p
      cblas_dcopy (n, u, 1, p, 1); // p = u
      cblas_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p)
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b

  dcopy_ (&n, r, &i_1, r0, &i_1); // r0* = r
  dcopy_ (&n, r, &i_1, p, &i_1); // p = r
  dcopy_ (&n, r, &i_1, u, &i_1); // u = r

  rho = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = ddot_ (&n, r0, &i_1, ap, &i_1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      dcopy_ (&n, u, &i_1, q, &i_1); // q = u
      dscal_ (&n, &d_2, q, &i_1); // q = 2 u
      daxpy_ (&n, &delta, ap, &i_1, q, &i_1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      daxpy_ (&n, &delta, t, &i_1, r, &i_1); // r = r + delta t
      daxpy_ (&n, &delta, q, &i_1, x, &i_1); // x = x + delta q

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      dcopy_ (&n, q, &i_1, qu, &i_1); // qu = q
      daxpy_ (&n, &d_m1, u, &i_1, qu, &i_1); // qu = q - u
      dcopy_ (&n, r, &i_1, u, &i_1); // u = r
      daxpy_ (&n, &beta, qu, &i_1, u, &i_1); // u = r + beta (q - u)

      daxpy_ (&n, &beta, p, &i_1, qu, &i_1); // qu = q - u + beta * p
      dcopy_ (&n, u, &i_1, p, &i_1); // p = u
      daxpy_ (&n, &beta, qu, &i_1, p, &i_1); // p = u + beta (q - u + b * p)
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b

  my_dcopy (n, r, 1, r0, 1); // r0* = r
  my_dcopy (n, r, 1, p, 1); // p = r
  my_dcopy (n, r, 1, u, 1); // u = r

  rho = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = my_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      my_dcopy (n, u, 1, q, 1); // q = u
      my_dscal (n, 2.0, q, 1); // q = 2 u
      my_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      my_daxpy (n, delta, t, 1, r, 1); // r = r + delta t
      my_daxpy (n, delta, q, 1, x, 1); // x = x + delta q

      res2 = my_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      my_dcopy (n, q, 1, qu, 1); // qu = q
      my_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u
      my_dcopy (n, r, 1, u, 1); // u = r
      my_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u)

      my_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p
      my_dcopy (n, u, 1, p, 1); // p = u
      my_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p)
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (r);
  free (r0);
  free (p);
  free (u);
  free (ap);
  free (q);
  free (t);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-cgs it= %d res^2= %e\n", i, res2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
Ejemplo n.º 2
0
/* Ref: Weiss, Algorithm 12 BiCGSTAB
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
bicgstab (int n, const double *b, double *x,
	  void (*atimes) (int, const double *, double *, void *),
	  void *atimes_param,
	  struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_1 = 1.0;
  double d_m1 = -1.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *r  = (double *)malloc (sizeof (double) * n);
  double *rs = (double *)malloc (sizeof (double) * n);
  double *p  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  double *s  = (double *)malloc (sizeof (double) * n);
  double *t  = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (r,  "bicgstab");
  CHECK_MALLOC (rs, "bicgstab");
  CHECK_MALLOC (p,  "bicgstab");
  CHECK_MALLOC (ap, "bicgstab");
  CHECK_MALLOC (s,  "bicgstab");
  CHECK_MALLOC (t,  "bicgstab");

  double rsap; // (r*, A.p)
  double st;
  double t2;

  double rho, rho1;
  double delta;
  double gamma;
  double beta;

  double res2 = 0.0;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);    // r = A.x ...
  cblas_daxpy (n, -1.0, b, 1, r, 1); //         - b

  cblas_dcopy (n, r, 1, rs, 1); // r* = r
  cblas_dcopy (n, r, 1, p, 1);  // p  = r

  rho = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = cblas_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      cblas_dcopy (n, r, 1, s, 1);         // s = r ...
      cblas_daxpy (n, delta, ap, 1, s, 1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = cblas_ddot (n, s, 1, t, 1); // st = (s, t)
      t2 = cblas_ddot (n, t, 1, t, 1); // t2 = (t, t)
      gamma = - st / t2;

      cblas_dcopy (n, s, 1, r, 1);        // r = s ...
      cblas_daxpy (n, gamma, t, 1, r, 1); //   + gamma t

      cblas_daxpy (n, delta, p, 1, x, 1); // x = x + delta p...
      cblas_daxpy (n, gamma, s, 1, x, 1); //       + gamma s

      res2 = cblas_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(cblas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      cblas_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p
      cblas_dscal (n, beta, p, 1);         // p = beta (p + gamma A.p)
      cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p)
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);       // r = A.x ...
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); //         - b

  dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r
  dcopy_ (&n, r, &i_1, p, &i_1);  // p  = r

  rho = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = ddot_ (&n, rs, &i_1, ap, &i_1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      dcopy_ (&n, r, &i_1, s, &i_1);          // s = r ...
      daxpy_ (&n, &delta, ap, &i_1, s, &i_1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = ddot_ (&n, s, &i_1, t, &i_1); // st = (s, t)
      t2 = ddot_ (&n, t, &i_1, t, &i_1); // t2 = (t, t)
      gamma = - st / t2;

      dcopy_ (&n, s, &i_1, r, &i_1);         // r = s ...
      daxpy_ (&n, &gamma, t, &i_1, r, &i_1); //   + gamma t

      daxpy_ (&n, &delta, p, &i_1, x, &i_1); // x = x + delta p...
      daxpy_ (&n, &gamma, s, &i_1, x, &i_1); //       + gamma s

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(blas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}
      if (res2 > 1.0e20)
	{
	  // already too big residual
	  break;
	}

      rho1 = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      daxpy_ (&n, &gamma, ap, &i_1, p, &i_1); // p = p + gamma A.p
      dscal_ (&n, &beta, p, &i_1);            // p = beta (p + gamma A.p)
      daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta(p + gamma A.p)
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);    // r = A.x ...
  my_daxpy (n, -1.0, b, 1, r, 1); //         - b

  my_dcopy (n, r, 1, rs, 1); // r* = r
  my_dcopy (n, r, 1, p, 1);  // p = r

  rho = my_ddot (n, rs, 1, r, 1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = my_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      my_dcopy (n, r, 1, s, 1);         // s = r ...
      my_daxpy (n, delta, ap, 1, s, 1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = my_ddot (n, s, 1, t, 1); // st = (s, t)
      t2 = my_ddot (n, t, 1, t, 1); // t2 = (t, t)
      gamma = - st / t2;

      my_dcopy (n, s, 1, r, 1);        // r = s ...
      my_daxpy (n, gamma, t, 1, r, 1); //   + gamma t

      my_daxpy (n, delta, p, 1, x, 1); // x = x + delta p...
      my_daxpy (n, gamma, s, 1, x, 1); //       + gamma s

      res2 = my_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(myblas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = my_ddot (n, rs, 1, r, 1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      my_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p
      my_dscal (n, beta, p, 1);         // p = beta (p + gamma A.p)
      my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p)
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (r);
  free (rs);
  free (p);
  free (ap);
  free (s);
  free (t);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-bicgstab %d %e\n", i, res2 / b2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
Ejemplo n.º 3
0
void PCBCGSolver::linbcg(unsigned long n, double b[], double x[], int itol, double tol,
                         int itmax, int *iter, double *err) {

    unsigned long j;
    double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zm1nrm,znrm;
    double *p,*pp,*r,*rr,*z,*zz;

    p=new double[n+1];
    pp=new double[n+1];
    r=new double[n+1];
    rr=new double[n+1];
    z=new double[n+1];
    zz=new double[n+1];

    *iter=0;
    atimes(n,x,r,0);
    for (j=1; j<=n; j++) {
        r[j]=b[j]-r[j];
        rr[j]=r[j];
    }
    znrm=1.0;
    if (itol == 1) bnrm=snrm(n,b,itol);
    else if (itol == 2) {
        asolve(n,b,z,0);
        bnrm=snrm(n,z,itol);
    }
    else if (itol == 3 || itol == 4) {
        asolve(n,b,z,0);
        bnrm=snrm(n,z,itol);
        asolve(n,r,z,0);
        znrm=snrm(n,z,itol);
    } else printf("illegal itol in linbcg");
    asolve(n,r,z,0);
    while (*iter <= itmax) {
        ++(*iter);
        zm1nrm=znrm;
        asolve(n,rr,zz,1);
        for (bknum=0.0,j=1; j<=n; j++) bknum += z[j]*rr[j];
        if (*iter == 1) {
            for (j=1; j<=n; j++) {
                p[j]=z[j];
                pp[j]=zz[j];
            }
        }
        else {
            bk=bknum/bkden;
            for (j=1; j<=n; j++) {
                p[j]=bk*p[j]+z[j];
                pp[j]=bk*pp[j]+zz[j];
            }
        }
        bkden=bknum;
        atimes(n,p,z,0);
        for (akden=0.0,j=1; j<=n; j++) akden += z[j]*pp[j];
        ak=bknum/akden;
        atimes(n,pp,zz,1);
        for (j=1; j<=n; j++) {
            x[j] += ak*p[j];
            r[j] -= ak*z[j];
            rr[j] -= ak*zz[j];
        }
        asolve(n,r,z,0);
        if (itol == 1 || itol == 2) {
            znrm=1.0;
            *err=snrm(n,r,itol)/bnrm;
        } else if (itol == 3 || itol == 4) {
            znrm=snrm(n,z,itol);
            if (fabs(zm1nrm-znrm) > EPS*znrm) {
                dxnrm=fabs(ak)*snrm(n,p,itol);
                *err=znrm/fabs(zm1nrm-znrm)*dxnrm;
            } else {
                *err=znrm/bnrm;
                continue;
            }
            xnrm=snrm(n,x,itol);
            if (*err <= 0.5*xnrm) *err /= xnrm;
            else {
                *err=znrm/bnrm;
                continue;
            }
        }
        //printf("iter=%4d err=%12.6f\n",*iter,*err);
        if (*err <= tol) break;
    }

    delete [] p;
    delete [] pp;
    delete [] r;
    delete [] rr;
    delete [] z;
    delete [] zz;
    p = NULL;
    pp = NULL;
    r = NULL;
    rr = NULL;
    z = NULL;
    zz = NULL;
}
Ejemplo n.º 4
0
void linbcg(unsigned long n, double b[], double x[], int itol, double tol,
	int itmax, int *iter, double *err)
{
	void asolve(unsigned long n, double b[], double x[], int itrnsp);
	void atimes(unsigned long n, double x[], double r[], int itrnsp);
	double snrm(unsigned long n, double sx[], int itol);
	unsigned long j;
	double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zm1nrm,znrm;
	double *p,*pp,*r,*rr,*z,*zz;

	p=dvector(1,n);
	pp=dvector(1,n);
	r=dvector(1,n);
	rr=dvector(1,n);
	z=dvector(1,n);
	zz=dvector(1,n);

	*iter=0;
	atimes(n,x,r,0);
	for (j=1;j<=n;j++) {
		r[j]=b[j]-r[j];
		rr[j]=r[j];
	}
	znrm=1.0;
	if (itol == 1) bnrm=snrm(n,b,itol);
	else if (itol == 2) {
		asolve(n,b,z,0);
		bnrm=snrm(n,z,itol);
	}
	else if (itol == 3 || itol == 4) {
		asolve(n,b,z,0);
		bnrm=snrm(n,z,itol);
		asolve(n,r,z,0);
		znrm=snrm(n,z,itol);
	} else nrerror("illegal itol in linbcg");
	asolve(n,r,z,0);
	while (*iter <= itmax) {
		++(*iter);
		zm1nrm=znrm;
		asolve(n,rr,zz,1);
		for (bknum=0.0,j=1;j<=n;j++) bknum += z[j]*rr[j];
		if (*iter == 1) {
			for (j=1;j<=n;j++) {
				p[j]=z[j];
				pp[j]=zz[j];
			}
		}
		else {
			bk=bknum/bkden;
			for (j=1;j<=n;j++) {
				p[j]=bk*p[j]+z[j];
				pp[j]=bk*pp[j]+zz[j];
			}
		}
		bkden=bknum;
		atimes(n,p,z,0);
		for (akden=0.0,j=1;j<=n;j++) akden += z[j]*pp[j];
		ak=bknum/akden;
		atimes(n,pp,zz,1);
		for (j=1;j<=n;j++) {
			x[j] += ak*p[j];
			r[j] -= ak*z[j];
			rr[j] -= ak*zz[j];
		}
		asolve(n,r,z,0);
		if (itol == 1 || itol == 2) {
			znrm=1.0;
			*err=snrm(n,r,itol)/bnrm;
		} else if (itol == 3 || itol == 4) {
			znrm=snrm(n,z,itol);
			if (fabs(zm1nrm-znrm) > EPS*znrm) {
				dxnrm=fabs(ak)*snrm(n,p,itol);
				*err=znrm/fabs(zm1nrm-znrm)*dxnrm;
			} else {
				*err=znrm/bnrm;
				continue;
			}
			xnrm=snrm(n,x,itol);
			if (*err <= 0.5*xnrm) *err /= xnrm;
			else {
				*err=znrm/bnrm;
				continue;
			}
		}
		printf("iter=%4d err=%12.6f\n",*iter,*err);
	if (*err <= tol) break;
	}

	free_dvector(p,1,n);
	free_dvector(pp,1,n);
	free_dvector(r,1,n);
	free_dvector(rr,1,n);
	free_dvector(z,1,n);
	free_dvector(zz,1,n);
}
Ejemplo n.º 5
0
/* BICO -- Weiss' Algorithm 9
 * INPUT
 *   n : dimension of the problem
 *   b[n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_t (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A^T.x = b.
 *   atimes_param : parameters for atimes() and atimes_t().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x[n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
bico (int n, const double *b, double *x,
      void (*atimes) (int, const double *, double *, void *),
      void (*atimes_t) (int, const double *, double *, void *),
      void *atimes_param,
      struct iter *it)
{
  int ret = -1;
  int itmax = it->max;
  double eps2 = it->eps * it->eps;

  int i_1 = 1;
  double d_1 = 1.0;
  double d_m1 = -1.0;

  double *xt   = (double *)malloc (sizeof (double) * n);
  double *r    = (double *)malloc (sizeof (double) * n);
  double *rt   = (double *)malloc (sizeof (double) * n);
  double *rs   = (double *)malloc (sizeof (double) * n);
  double *p    = (double *)malloc (sizeof (double) * n);
  double *ps   = (double *)malloc (sizeof (double) * n);
  double *atps = (double *)malloc (sizeof (double) * n);
  double *ap   = (double *)malloc (sizeof (double) * n);
  double *rtmr = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (xt,   "bico");
  CHECK_MALLOC (r,    "bico");
  CHECK_MALLOC (rt,   "bico");
  CHECK_MALLOC (rs,   "bico");
  CHECK_MALLOC (p,    "bico");
  CHECK_MALLOC (ps,   "bico");
  CHECK_MALLOC (atps, "bico");
  CHECK_MALLOC (ap,   "bico");
  CHECK_MALLOC (rtmr, "bico");

  double b2 = ddot_ (&n, b, &i_1, b, &i_1);
  eps2 *= b2;
  double res2 = 0.0;

  dcopy_ (&n, x, &i_1, xt, &i_1); // xt = x
  
  atimes (n, x, r, atimes_param); // r = A.x
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b

  dcopy_ (&n, r, &i_1, rt, &i_1); // rt = r
  dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r
  dcopy_ (&n, r, &i_1, p,  &i_1); // p  = r
  dcopy_ (&n, r, &i_1, ps, &i_1); // p* = r


  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes_t (n, ps, atps, atimes_param); // atps = At.p*
      double patps = ddot_ (&n, p, &i_1, atps, &i_1); // patps = (p, At.p*)
      double rtrs = ddot_ (&n, rt, &i_1, rs, &i_1); // rtrs = (rt, r*)
      double delta = - rtrs / patps;

      atimes (n, p, ap, atimes_param); // ap = A.p
      daxpy_ (&n, &delta, ap, &i_1, rt, &i_1); // rt += delta A.ps
      daxpy_ (&n, &delta, atps, &i_1, rs, &i_1); // r* += delta At.ps

      double rtrs1 = ddot_ (&n, rt, &i_1, rs, &i_1); // rtrs = (rt, r*) for news
      double beta = rtrs1 / rtrs;
      rtrs = rtrs1;

      daxpy_ (&n, &delta, p, &i_1, xt, &i_1); // xt += delta p(old)

      dscal_ (&n, &beta, p, &i_1); // p = beta p(old)
      daxpy_ (&n, &d_1, rt, &i_1, p, &i_1); // p = rt + beta p(old)

      dscal_ (&n, &beta, ps, &i_1); // p* = beta p*(old)
      daxpy_ (&n, &d_1, rs, &i_1, ps, &i_1); // p* = r* + beta p*(old)

      dcopy_ (&n, rt, &i_1, rtmr, &i_1); // rtmr = rt
      daxpy_ (&n, &d_m1, r, &i_1, rtmr, &i_1); // rtmr = rt - r
      // rtmr2 = (rt - r, rt - r)
      double rtmr2 = ddot_ (&n, rtmr, &i_1, rtmr, &i_1);
      double rrtmr = ddot_ (&n, r, &i_1, rtmr, &i_1); // rrtmr = (r, rt - r)
      double gamma = - rrtmr / rtmr2;

      double d_1_gamma = 1.0 - gamma;
      dscal_ (&n, &d_1_gamma, x, &i_1); // x = (1-gamma) x(old)
      daxpy_ (&n, &gamma, xt, &i_1, x, &i_1); // x += gamma(xt - x(old))

      dscal_ (&n, &d_1_gamma, r, &i_1); // r = (1-gamma) r(old)
      daxpy_ (&n, &gamma, rt, &i_1, r, &i_1); // r += gamma(rt - r(old))

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-bico %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}
    }

  free (xt);
  free (r);
  free (rt);
  free (rs);
  free (p);
  free (ps);
  free (atps);
  free (ap);
  free (rtmr);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-bico %d %e\n", i, res2 / b2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
Ejemplo n.º 6
0
/* obtain min and max of real part eigenvalues by dsaupd_()
 * INPUT
 *  n : dimension of the matrix
 *  atimes (n, x, b, user_data) : routine to calc A.x and return b[]
 *  user_data : pointer to be passed to solver and atimes routines
 *  eps : required precision
 * OUTPUT
 *  l[2] : l[0] = min
 *         l[1] = max
 */
void dsaupd_wrap_min_max (int n, double *l,
			  void (*atimes)
			  (int, const double *, double *, void *),
			  void *user_data,
			  double eps)
{
  char bmat[2] = "I"; // standard eigenvalue problem A*x = lambda*x
  char SA[3] = "SA"; // compute the NEV smallest (algebraic) eigenvalues.
  char LA[3] = "LA"; // compute the NEV largest (algebraic) eigenvalues.
  int nev = 1;

  double *resid = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (resid, "dsaupd_wrap_min_max");

  int ncv;
  /*
  //ncv = 2 * nev + 1;
  ncv = 2 * nev * 14;
  if (ncv < 4) ncv = 4;
  if (ncv > n) ncv = n;
  */
  ncv = n;
  //fprintf (stderr, "# n = %d, nev = %d, ncv = %d\n", n, nev, ncv);

  int ldv = n;
  double *v = (double *)malloc (sizeof (double) * ldv*ncv);
  CHECK_MALLOC (v, "dsaupd_wrap_min_max");

  int iparam[11];
  int ishift = 1;     // exact shifts
  int maxitr = 3 * n; // max iterations of Arnoldi steps
  int mode   = 1;     // type of eigenproblem
  iparam[0] = ishift; // IPARAM(1) = ISHIFT
  iparam[2] = maxitr; // IPARAM(3) = MXITER
  iparam[6] = mode;   // IPARAM(7) = MODE


  int ipntr[11];

  int lworkl = ncv*(ncv+8);
  double *workd = (double *)malloc (sizeof (double) * 3 * n);
  double *workl = (double *)malloc (sizeof (double) * lworkl);
  CHECK_MALLOC (workd, "dsaupd_wrap_min_max");
  CHECK_MALLOC (workl, "dsaupd_wrap_min_max");



  // for post-process
  int rvec = 0;  // false? (no eigenvectors)
  char howmny[2] = "A"; // Compute NEV Ritz vectors; 
  int *select = (int *)malloc (sizeof (int) * ncv);
  double *d = (double *)malloc (sizeof (double) * nev);
  double *z = (double *)malloc (sizeof (double) * n * nev);
  CHECK_MALLOC (select, "dsaupd_wrap_min_max");
  CHECK_MALLOC (d, "dsaupd_wrap_min_max");
  CHECK_MALLOC (z, "dsaupd_wrap_min_max");
  int ldz = n;
  double sigma;
  int ierr;


  // The Smallest Eigenvalue
  int ido = 0; // restart
  int info = 0; // a randomly initial residual vector is used.
  dsaupd_(&ido, bmat, &n, SA, &nev, &eps, resid, 
	  &ncv, v, &ldv, iparam, ipntr, workd, workl,
	  &lworkl, &info);

  while (ido == -1 || ido == 1)
    {
      atimes (n,
	      workd + ipntr[0] - 1, // workd(ipntr(1))
	      workd + ipntr[1] - 1, // workd(ipntr(2))
	      user_data);
      dsaupd_(&ido, bmat, &n, SA, &nev, &eps, resid, 
	      &ncv, v, &ldv, iparam, ipntr, workd, workl,
	      &lworkl, &info);
    }

  if (info < 0)
    {
      fprintf (stdout, "Error with dsaupd;\n");
      dsaupd_info (stderr, info);
    }
  else
    {
      dseupd_(&rvec, howmny, select, d, z, &ldz, 
	      &sigma, bmat, &n, SA, &nev, &eps, 
	      resid, &ncv, v, &ldv, iparam, ipntr, workd, workl,
	      &lworkl, &ierr);

      if (ierr != 0)
	{
	  fprintf (stdout, "Error with dseupd;\n");
	  dseupd_info (stdout, ierr);
	}
      else if (info != 0)
	{
	  dsaupd_info (stderr, info);
	}

      /*
      int nconv  = iparam[4];

      fprintf (stdout, " _NDRV1 \n");
      fprintf (stdout, " ====== \n\n");
      fprintf (stdout, " Size of the matrix is %d\n", n);
      fprintf (stdout, " The number of Ritz values requested is %d\n", nev);
      fprintf (stdout, " The number of Arnoldi vectors generated"
	       " (NCV) is %d\n", ncv);
      fprintf (stdout, " What portion of the spectrum: %s\n", SA);
      fprintf (stdout, " The number of converged Ritz values is %d\n",
	       nconv);
      fprintf (stdout, " The number of Implicit Arnoldi update"
	       " iterations taken is %d\n", iparam[2]);
      fprintf (stdout, " The number of OP*x is %d\n", iparam[8]);
      fprintf (stdout, " The convergence criterion is %e\n", eps);


      fprintf (stdout, "d [0] = %e + i %e\n", dr[0], di[0]);
      */
      l[0] = d[0];
    }

  // The Largest Eigenvalue
  ido = 0;
  info = 0; // a randomly initial residual vector is used.
  //info = 1;
  /* RESID contains the initial residual vector,
   * possibly from a previous run.
   */
  dsaupd_(&ido, bmat, &n, LA, &nev, &eps, resid, 
	  &ncv, v, &ldv, iparam, ipntr, workd, workl,
	  &lworkl, &info);

  while (ido == -1 || ido == 1)
    {
      atimes (n,
	      workd + ipntr[0] - 1, // workd(ipntr(1))
	      workd + ipntr[1] - 1, // workd(ipntr(2))
	      user_data);
      dsaupd_(&ido, bmat, &n, LA, &nev, &eps, resid, 
	      &ncv, v, &ldv, iparam, ipntr, workd, workl,
	      &lworkl, &info);
    }

  if (info < 0)
    {
      fprintf (stdout, "Error with dsaupd;\n");
      dsaupd_info (stderr, info);
    }
  else
    {
      dseupd_(&rvec, howmny, select, d, z, &ldz, 
	      &sigma, bmat, &n, LA, &nev, &eps, 
	      resid, &ncv, v, &ldv, iparam, ipntr, workd, workl,
	      &lworkl, &ierr);

      if (ierr != 0)
	{
	  fprintf (stdout, "Error with dseupd;\n");
	  dseupd_info (stdout, ierr);
	}
      else if (info != 0)
	{
	  dsaupd_info (stderr, info);
	}

      /*
      int nconv  = iparam[4];
      fprintf (stdout, "nconv = %d\n", nconv);

      fprintf (stdout, " _NDRV1 \n");
      fprintf (stdout, " ====== \n\n");
      fprintf (stdout, " Size of the matrix is %d\n", n);
      fprintf (stdout, " The number of Ritz values requested is %d\n", nev);
      fprintf (stdout, " The number of Arnoldi vectors generated"
	       " (NCV) is %d\n", ncv);
      fprintf (stdout, " What portion of the spectrum: %s\n", LA);
      fprintf (stdout, " The number of converged Ritz values is %d\n",
	       nconv);
      fprintf (stdout, " The number of Implicit Arnoldi update"
	       " iterations taken is %d\n", iparam[2]);
      fprintf (stdout, " The number of OP*x is %d\n", iparam[8]);
      fprintf (stdout, " The convergence criterion is %e\n", eps);


      fprintf (stdout, "d [0] = %e + i %e\n", dr[0], di[0]);
      */
      l[1] = d[0];
    }


  free (resid);
  free (workd);
  free (workl);
  free (v);

  free (select);
  free (d);
  free (z);
}
Ejemplo n.º 7
0
Archivo: cg.c Proyecto: kichiki/libiter
/* Classical CG method -- Weiss' Algorithm 2
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
cg (int n, const double *b, double *x,
    void (*atimes) (int, const double *, double *, void *),
    void *atimes_param,
    struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_1 = 1.0;
  double d_m1 = -1.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *p  = (double *)malloc (sizeof (double) * n);
  double *r  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (p,  "cg");
  CHECK_MALLOC (r,  "cg");
  CHECK_MALLOC (ap, "cg");

  double r2;
  double res2 = 0.0;
  double pap;

  double gamma;
  double beta;

  int i;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param); // r = A.x
  cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b
  
  cblas_dcopy (n, r, 1, p, 1); // p = r

  for (i = 0; i < itmax; i ++)
    {
      r2 = cblas_ddot (n, r, 1, r, 1); // r2 = (r, r)
      
      atimes (n, p, ap, atimes_param); // ap = A.p
      pap = cblas_ddot (n, p, 1, ap, 1); // pap = (p, A.p)

      gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p)
      
      cblas_daxpy (n, gamma, p, 1, x, 1); // x += gamma p
      cblas_daxpy (n, gamma, ap, 1, r, 1); // r += gamma Ap

      // new norm of r
      res2 = cblas_ddot (n, r, 1, r, 1); // (r, r)
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      beta = res2 / r2; // beta = (r, r) / (r0, r0)
      
      cblas_dscal (n, beta, p, 1); // p *= beta
      cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta p

      r2 = res2;
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param); // r = A.x
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b
  
  dcopy_ (&n, r, &i_1, p, &i_1); // p = r

  for (i = 0; i < itmax; i ++)
    {
      r2 = ddot_ (&n, r, &i_1, r, &i_1); // r2 = (r, r)
      
      atimes (n, p, ap, atimes_param); // ap = A.p
      pap = ddot_ (&n, p, &i_1, ap, &i_1); // pap = (p, A.p)

      gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p)
      
      daxpy_ (&n, &gamma, p, &i_1, x, &i_1); // x += gamma p
      daxpy_ (&n, &gamma, ap, &i_1, r, &i_1); // r += gamma Ap

      // new norm of r
      res2 = ddot_ (&n, r, &i_1, r, &i_1); // (r, r)
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      beta = res2 / r2; // beta = (r, r) / (r0, r0)
      
      dscal_ (&n, &beta, p, &i_1); // p *= beta
      daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta p

      r2 = res2;
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param); // r = A.x
  my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b
  
  my_dcopy (n, r, 1, p, 1); // p = r

  for (i = 0; i < itmax; i ++)
    {
      r2 = my_ddot (n, r, 1, r, 1); // r2 = (r, r)
      
      atimes (n, p, ap, atimes_param); // ap = A.p
      pap = my_ddot (n, p, 1, ap, 1); // pap = (p, A.p)

      gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p)
      
      my_daxpy (n, gamma, p, 1, x, 1); // x += gamma p
      my_daxpy (n, gamma, ap, 1, r, 1); // r += gamma Ap

      // new norm of r
      res2 = my_ddot (n, r, 1, r, 1); // (r, r)
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      beta = res2 / r2; // beta = (r, r) / (r0, r0)
      
      my_dscal (n, beta, p, 1); // p *= beta
      my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta p

      r2 = res2;
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (p);
  free (r);
  free (ap);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-cg it= %d res^2= %e\n", i, res2 / b2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}