Пример #1
0
/* Ref: Weiss, Algorithm 12 BiCGSTAB
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
bicgstab (int n, const double *b, double *x,
	  void (*atimes) (int, const double *, double *, void *),
	  void *atimes_param,
	  struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_1 = 1.0;
  double d_m1 = -1.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *r  = (double *)malloc (sizeof (double) * n);
  double *rs = (double *)malloc (sizeof (double) * n);
  double *p  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  double *s  = (double *)malloc (sizeof (double) * n);
  double *t  = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (r,  "bicgstab");
  CHECK_MALLOC (rs, "bicgstab");
  CHECK_MALLOC (p,  "bicgstab");
  CHECK_MALLOC (ap, "bicgstab");
  CHECK_MALLOC (s,  "bicgstab");
  CHECK_MALLOC (t,  "bicgstab");

  double rsap; // (r*, A.p)
  double st;
  double t2;

  double rho, rho1;
  double delta;
  double gamma;
  double beta;

  double res2 = 0.0;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);    // r = A.x ...
  cblas_daxpy (n, -1.0, b, 1, r, 1); //         - b

  cblas_dcopy (n, r, 1, rs, 1); // r* = r
  cblas_dcopy (n, r, 1, p, 1);  // p  = r

  rho = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = cblas_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      cblas_dcopy (n, r, 1, s, 1);         // s = r ...
      cblas_daxpy (n, delta, ap, 1, s, 1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = cblas_ddot (n, s, 1, t, 1); // st = (s, t)
      t2 = cblas_ddot (n, t, 1, t, 1); // t2 = (t, t)
      gamma = - st / t2;

      cblas_dcopy (n, s, 1, r, 1);        // r = s ...
      cblas_daxpy (n, gamma, t, 1, r, 1); //   + gamma t

      cblas_daxpy (n, delta, p, 1, x, 1); // x = x + delta p...
      cblas_daxpy (n, gamma, s, 1, x, 1); //       + gamma s

      res2 = cblas_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(cblas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      cblas_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p
      cblas_dscal (n, beta, p, 1);         // p = beta (p + gamma A.p)
      cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p)
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);       // r = A.x ...
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); //         - b

  dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r
  dcopy_ (&n, r, &i_1, p, &i_1);  // p  = r

  rho = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = ddot_ (&n, rs, &i_1, ap, &i_1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      dcopy_ (&n, r, &i_1, s, &i_1);          // s = r ...
      daxpy_ (&n, &delta, ap, &i_1, s, &i_1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = ddot_ (&n, s, &i_1, t, &i_1); // st = (s, t)
      t2 = ddot_ (&n, t, &i_1, t, &i_1); // t2 = (t, t)
      gamma = - st / t2;

      dcopy_ (&n, s, &i_1, r, &i_1);         // r = s ...
      daxpy_ (&n, &gamma, t, &i_1, r, &i_1); //   + gamma t

      daxpy_ (&n, &delta, p, &i_1, x, &i_1); // x = x + delta p...
      daxpy_ (&n, &gamma, s, &i_1, x, &i_1); //       + gamma s

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(blas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}
      if (res2 > 1.0e20)
	{
	  // already too big residual
	  break;
	}

      rho1 = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      daxpy_ (&n, &gamma, ap, &i_1, p, &i_1); // p = p + gamma A.p
      dscal_ (&n, &beta, p, &i_1);            // p = beta (p + gamma A.p)
      daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta(p + gamma A.p)
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);    // r = A.x ...
  my_daxpy (n, -1.0, b, 1, r, 1); //         - b

  my_dcopy (n, r, 1, rs, 1); // r* = r
  my_dcopy (n, r, 1, p, 1);  // p = r

  rho = my_ddot (n, rs, 1, r, 1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = my_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      my_dcopy (n, r, 1, s, 1);         // s = r ...
      my_daxpy (n, delta, ap, 1, s, 1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = my_ddot (n, s, 1, t, 1); // st = (s, t)
      t2 = my_ddot (n, t, 1, t, 1); // t2 = (t, t)
      gamma = - st / t2;

      my_dcopy (n, s, 1, r, 1);        // r = s ...
      my_daxpy (n, gamma, t, 1, r, 1); //   + gamma t

      my_daxpy (n, delta, p, 1, x, 1); // x = x + delta p...
      my_daxpy (n, gamma, s, 1, x, 1); //       + gamma s

      res2 = my_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(myblas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = my_ddot (n, rs, 1, r, 1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      my_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p
      my_dscal (n, beta, p, 1);         // p = beta (p + gamma A.p)
      my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p)
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (r);
  free (rs);
  free (p);
  free (ap);
  free (s);
  free (t);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-bicgstab %d %e\n", i, res2 / b2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
Пример #2
0
/* N>1 */
double
fit_N_point_em_f(hpixelf *parr, int npix, int Nf, double *freqs, double *bmaj, double *bmin, double *bpa, double ref_freq, int maxiter, int max_em_iter, double *ll, double *mm, double *sI, double *sP, int N, int Nh, hpoint *hull){

 int ci,cj,ck;
 double *p,*p1,*p2, // params m x 1
     *x; // observed data n x 1, the image pixel fluxes
 double *xdummy, *xsub; //extra arrays
 int m,n;
 double *b; /* affine combination */

 double opts[CLM_OPTS_SZ], info[CLM_INFO_SZ];

 double l_min,l_max,m_min,m_max,sumI;
 double fraction;

 double penalty; /* penalty for solutions with components out of pixel range */

 fit_double_point_dataf lmdata;

 /* for initial average pixel fit */
 hpixel *parrav;
 double mean_bmaj,mean_bmin,mean_bpa,mean_err;


 /***** first do a fit for average pixesl, using average PSF ******/
 if ((parrav=(hpixel*)calloc((size_t)(npix),sizeof(hpixel)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 for (ci=0; ci<npix; ++ci) {
   parrav[ci].x=parr[ci].x;
   parrav[ci].y=parr[ci].y;
   parrav[ci].l=parr[ci].l;
   parrav[ci].m=parr[ci].m;
   parrav[ci].ra=parr[ci].ra;
   parrav[ci].dec=parr[ci].dec;
   parrav[ci].sI=0.0;
   for (cj=0; cj<Nf; ++cj) {
    parrav[ci].sI+=parr[ci].sI[cj];
   }
   parrav[ci].sI/=(double)Nf;
 }
 mean_bmaj=mean_bmin=mean_bpa=0.0;
 for (cj=0; cj<Nf; ++cj) {
  mean_bmaj+=bmaj[cj];
  mean_bmin+=bmin[cj];
  mean_bpa+=bpa[cj];
 }
 mean_bmaj/=(double)Nf;
 mean_bmin/=(double)Nf;
 mean_bpa/=(double)Nf;
 

 /* we get mean_err=2*3*N+npix*log(error) */
 mean_err=fit_N_point_em(parrav, npix, mean_bmaj, mean_bmin, mean_bpa, maxiter, max_em_iter, ll, mm, sI, N, Nh, hull);

 free(parrav);


 opts[0]=CLM_INIT_MU; opts[1]=1E-15; opts[2]=1E-15; opts[3]=1E-15;
  opts[4]=-CLM_DIFF_DELTA; 

 m=4; /* 1x1 flux, 3x1 spec index for each component */
 n=Nf*npix; /* no of pixels */

 if ((p=(double*)calloc((size_t)(m),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 if ((p1=(double*)calloc((size_t)(m),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 if ((p2=(double*)calloc((size_t)(m),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 if ((x=(double*)calloc((size_t)(n),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 if ((xsub=(double*)calloc((size_t)(n),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 if ((xdummy=(double*)calloc((size_t)(n),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }
 if ((b=(double*)calloc((size_t)(N),sizeof(double)))==0) {
     fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
     exit(1);
 }



 l_min=m_min=INFINITY_L;
 l_max=m_max=-INFINITY_L;
 sumI=0.0;
 /* only use valid pixels for initial conditions */
 for (ci=0; ci<npix; ci++) {
   if (parr[ci].ra!=-1 && parr[ci].m!=-1) {
       if (l_min>parr[ci].l) {
          l_min=parr[ci].l;
       }
       if (l_max<parr[ci].l) {
         l_max=parr[ci].l;
       }
       if (m_min>parr[ci].m) {
          m_min=parr[ci].m;
       }
       if (m_max<parr[ci].m) {
         m_max=parr[ci].m;
       }
   }
 }
 ck=0;
 for (cj=0; cj<Nf; ++cj) {
  for (ci=0; ci<npix; ci++) {
       x[ck++]=parr[ci].sI[cj];
       sumI+=parr[ci].sI[cj];
  }
 }
 sumI/=(double)n;

 fraction=1.0;///(double)N;
/**********************************/
 for (ci=0; ci<N; ci++) {
  sP[ci]=0.0;
  sP[ci+N]=0.0;
  sP[ci+2*N]=0.0;

  b[ci]=fraction;    
 }
 
  lmdata.Nf=Nf;
  lmdata.parr=parr;
  lmdata.freqs=freqs;
  lmdata.bmaj=bmaj;
  lmdata.bmin=bmin;
  lmdata.bpa=bpa;
  lmdata.ref_freq=ref_freq;

 double aic1,aic2,aic3;
 for (ci=0; ci<max_em_iter; ci++) {
   for (cj=0; cj<N; cj++) {
     /* calculate contribution from hidden data, subtract from x */
    memcpy(xdummy,x,(size_t)(n)*sizeof(double));
    for (ck=0; ck<N; ck++) {
     if (ck!=cj) {
       lmdata.ll=&ll[ck]; /* pointer to positions */
       lmdata.mm=&mm[ck];
       p[0]=sI[ck];
       p[1]=sP[ck];
       p[2]=sP[ck+N];
       p[3]=sP[ck+2*N];

       mylm_fit_single_pf(p, xsub, m, n, (void*)&lmdata);
       /* xdummy=xdummy-b*xsub */
       my_daxpy(n, xsub, -b[ck], xdummy);
     }
    }

    lmdata.ll=&ll[cj]; /* pointer to positions */
    lmdata.mm=&mm[cj];
    p[0]=p1[0]=p2[0]=sI[cj];
    p[1]=p1[1]=p2[1]=sP[cj];
    p[2]=p2[2]=sP[cj+N]; p1[2]=0.0;
    p[3]=sP[cj+2*N]; p1[3]=p2[3]=0.0;

    //ret=dlevmar_dif(mylm_fit_single_pf, p, xdummy, m, n, maxiter, opts, info, NULL, NULL, (void*)&lmdata);  // no Jacobian
    clevmar_der_single_nocuda(mylm_fit_single_pf, NULL, p, xdummy, m, n, maxiter, opts, info, 2, (void*)&lmdata);  // no Jacobian
/* penalize only 1/10 of parameters */
    aic3=0.3+log(info[1]);
    clevmar_der_single_nocuda(mylm_fit_single_pf_2d, NULL, p2, xdummy, m-1, n, maxiter, opts, info, 2, (void*)&lmdata);  // no Jacobian
    aic2=0.2+log(info[1]);
    clevmar_der_single_nocuda(mylm_fit_single_pf_1d, NULL, p1, xdummy, m-2, n, maxiter, opts, info, 2, (void*)&lmdata);  // no Jacobian
    aic1=0.1+log(info[1]);
    /* choose one with minimum error */
    if (aic3<aic2) {
     if (aic3<aic1) {
       /* 3d */
       sI[cj]=p[0];
       sP[cj]=p[1];
       sP[cj+N]=p[2];
       sP[cj+2*N]=p[3];
     } else {
       /* 1d */
       sI[cj]=p1[0];
       sP[cj]=p1[1];
       sP[cj+N]=p1[2];
       sP[cj+2*N]=p1[3];
     }
   } else {
     if (aic2<aic1) {
       /* 2d */
       sI[cj]=p2[0];
       sP[cj]=p2[1];
       sP[cj+N]=p2[2];
       sP[cj+2*N]=p2[3];
     } else {
       /* 1d */
       sI[cj]=p1[0];
       sP[cj]=p1[1];
       sP[cj+N]=p1[2];
       sP[cj+2*N]=p1[3];
     }
   }
  }
 }
/**********************************/



#ifdef DEBUG
  print_levmar_info(info[0],info[1],(int)info[5], (int)info[6], (int)info[7], (int)info[8], (int)info[9]);
  printf("Levenberg-Marquardt returned %d in %g iter, reason %g\nSolution: ", ret, info[5], info[6]);
#endif
 /* check for solutions such that l_min <= ll <= l_max and m_min <= mm <= m_max */
 penalty=0.0;
 for (ci=0; ci<N; ci++) {
   /* position out of range */
   if (ll[ci]<l_min || ll[ci]>l_max || mm[ci]<m_min || mm[ci]>m_max) {
    penalty+=INFINITY_L;
   }
   /* spec index too high to be true */
   if (fabs(sP[ci])>20.0) {
    penalty+=INFINITY_L;
   }
 }

 /* calculate error */
 memcpy(xdummy,x,(size_t)(n)*sizeof(double));
 for (cj=0; cj<N; cj++) {
    for (ck=0; ck<N; ck++) {
       lmdata.ll=&ll[ck]; /* pointer to positions */
       lmdata.mm=&mm[ck];
       p[0]=sI[ck];
       p[1]=sP[ck];
       p[2]=sP[ck+N];
       p[3]=sP[ck+2*N];

       mylm_fit_single_pf(p, xsub, m, n, (void*)&lmdata);
       /* xdummy=xdummy-b*xsub */
       my_daxpy(n, xsub, -1.0, xdummy);
    }
 }
 /*sumI=0.0;
 for (ci=0; ci<n; ++ci ){
  sumI+=xdummy[ci]*xdummy[ci];
 } */
 sumI=my_dnrm2(n,xdummy);
 sumI=sumI*sumI;
 free(p);
 free(p1);
 free(p2);
 free(x);
 free(xdummy);
 free(xsub);
 free(b);

 /* AIC, 4*N parms */
 //return 2*4*N+npix*Nf*log(sumI)+penalty;
 return 2*4*N+Nf*(mean_err-2*3*N)+log(sumI)*npix*Nf+penalty;
}
Пример #3
0
/* Ref: Weiss, Algorithm 11 CGS
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
cgs (int n, const double *b, double *x,
     void (*atimes) (int, const double *, double *, void *),
     void *atimes_param,
     struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_m1 = -1.0;
  double d_2 = 2.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *r  = (double *)malloc (sizeof (double) * n);
  double *r0 = (double *)malloc (sizeof (double) * n);
  double *p  = (double *)malloc (sizeof (double) * n);
  double *u  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  double *q  = (double *)malloc (sizeof (double) * n);
  double *t  = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (r,  "cgs");
  CHECK_MALLOC (r0, "cgs");
  CHECK_MALLOC (p,  "cgs");
  CHECK_MALLOC (u,  "cgs");
  CHECK_MALLOC (ap, "cgs");
  CHECK_MALLOC (q,  "cgs");
  CHECK_MALLOC (t,  "cgs");


  double r0ap;
  double rho, rho1;
  double delta;
  double beta;

  double res2 = 0.0;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b

  cblas_dcopy (n, r, 1, r0, 1); // r0* = r
  cblas_dcopy (n, r, 1, p, 1); // p = r
  cblas_dcopy (n, r, 1, u, 1); // u = r

  rho = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = cblas_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      cblas_dcopy (n, u, 1, q, 1); // q = u
      cblas_dscal (n, 2.0, q, 1); // q = 2 u
      cblas_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      cblas_daxpy (n, delta, t, 1, r, 1); // r = r + delta t
      cblas_daxpy (n, delta, q, 1, x, 1); // x = x + delta q

      res2 = cblas_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      cblas_dcopy (n, q, 1, qu, 1); // qu = q
      cblas_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u
      cblas_dcopy (n, r, 1, u, 1); // u = r
      cblas_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u)

      cblas_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p
      cblas_dcopy (n, u, 1, p, 1); // p = u
      cblas_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p)
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b

  dcopy_ (&n, r, &i_1, r0, &i_1); // r0* = r
  dcopy_ (&n, r, &i_1, p, &i_1); // p = r
  dcopy_ (&n, r, &i_1, u, &i_1); // u = r

  rho = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = ddot_ (&n, r0, &i_1, ap, &i_1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      dcopy_ (&n, u, &i_1, q, &i_1); // q = u
      dscal_ (&n, &d_2, q, &i_1); // q = 2 u
      daxpy_ (&n, &delta, ap, &i_1, q, &i_1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      daxpy_ (&n, &delta, t, &i_1, r, &i_1); // r = r + delta t
      daxpy_ (&n, &delta, q, &i_1, x, &i_1); // x = x + delta q

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      dcopy_ (&n, q, &i_1, qu, &i_1); // qu = q
      daxpy_ (&n, &d_m1, u, &i_1, qu, &i_1); // qu = q - u
      dcopy_ (&n, r, &i_1, u, &i_1); // u = r
      daxpy_ (&n, &beta, qu, &i_1, u, &i_1); // u = r + beta (q - u)

      daxpy_ (&n, &beta, p, &i_1, qu, &i_1); // qu = q - u + beta * p
      dcopy_ (&n, u, &i_1, p, &i_1); // p = u
      daxpy_ (&n, &beta, qu, &i_1, p, &i_1); // p = u + beta (q - u + b * p)
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b

  my_dcopy (n, r, 1, r0, 1); // r0* = r
  my_dcopy (n, r, 1, p, 1); // p = r
  my_dcopy (n, r, 1, u, 1); // u = r

  rho = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = my_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      my_dcopy (n, u, 1, q, 1); // q = u
      my_dscal (n, 2.0, q, 1); // q = 2 u
      my_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      my_daxpy (n, delta, t, 1, r, 1); // r = r + delta t
      my_daxpy (n, delta, q, 1, x, 1); // x = x + delta q

      res2 = my_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      my_dcopy (n, q, 1, qu, 1); // qu = q
      my_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u
      my_dcopy (n, r, 1, u, 1); // u = r
      my_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u)

      my_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p
      my_dcopy (n, u, 1, p, 1); // p = u
      my_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p)
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (r);
  free (r0);
  free (p);
  free (u);
  free (ap);
  free (q);
  free (t);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-cgs it= %d res^2= %e\n", i, res2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
Пример #4
0
/* Classical CG method -- Weiss' Algorithm 2
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
cg (int n, const double *b, double *x,
    void (*atimes) (int, const double *, double *, void *),
    void *atimes_param,
    struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_1 = 1.0;
  double d_m1 = -1.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *p  = (double *)malloc (sizeof (double) * n);
  double *r  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (p,  "cg");
  CHECK_MALLOC (r,  "cg");
  CHECK_MALLOC (ap, "cg");

  double r2;
  double res2 = 0.0;
  double pap;

  double gamma;
  double beta;

  int i;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param); // r = A.x
  cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b
  
  cblas_dcopy (n, r, 1, p, 1); // p = r

  for (i = 0; i < itmax; i ++)
    {
      r2 = cblas_ddot (n, r, 1, r, 1); // r2 = (r, r)
      
      atimes (n, p, ap, atimes_param); // ap = A.p
      pap = cblas_ddot (n, p, 1, ap, 1); // pap = (p, A.p)

      gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p)
      
      cblas_daxpy (n, gamma, p, 1, x, 1); // x += gamma p
      cblas_daxpy (n, gamma, ap, 1, r, 1); // r += gamma Ap

      // new norm of r
      res2 = cblas_ddot (n, r, 1, r, 1); // (r, r)
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      beta = res2 / r2; // beta = (r, r) / (r0, r0)
      
      cblas_dscal (n, beta, p, 1); // p *= beta
      cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta p

      r2 = res2;
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param); // r = A.x
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b
  
  dcopy_ (&n, r, &i_1, p, &i_1); // p = r

  for (i = 0; i < itmax; i ++)
    {
      r2 = ddot_ (&n, r, &i_1, r, &i_1); // r2 = (r, r)
      
      atimes (n, p, ap, atimes_param); // ap = A.p
      pap = ddot_ (&n, p, &i_1, ap, &i_1); // pap = (p, A.p)

      gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p)
      
      daxpy_ (&n, &gamma, p, &i_1, x, &i_1); // x += gamma p
      daxpy_ (&n, &gamma, ap, &i_1, r, &i_1); // r += gamma Ap

      // new norm of r
      res2 = ddot_ (&n, r, &i_1, r, &i_1); // (r, r)
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      beta = res2 / r2; // beta = (r, r) / (r0, r0)
      
      dscal_ (&n, &beta, p, &i_1); // p *= beta
      daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta p

      r2 = res2;
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param); // r = A.x
  my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b
  
  my_dcopy (n, r, 1, p, 1); // p = r

  for (i = 0; i < itmax; i ++)
    {
      r2 = my_ddot (n, r, 1, r, 1); // r2 = (r, r)
      
      atimes (n, p, ap, atimes_param); // ap = A.p
      pap = my_ddot (n, p, 1, ap, 1); // pap = (p, A.p)

      gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p)
      
      my_daxpy (n, gamma, p, 1, x, 1); // x += gamma p
      my_daxpy (n, gamma, ap, 1, r, 1); // r += gamma Ap

      // new norm of r
      res2 = my_ddot (n, r, 1, r, 1); // (r, r)
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      beta = res2 / r2; // beta = (r, r) / (r0, r0)
      
      my_dscal (n, beta, p, 1); // p *= beta
      my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta p

      r2 = res2;
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (p);
  free (r);
  free (ap);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-cg it= %d res^2= %e\n", i, res2 / b2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}