/* Ref: Weiss, Algorithm 11 CGS * INPUT * n : dimension of the problem * b [n] : r-h-s vector * atimes (int n, static double *x, double *b, void *param) : * calc matrix-vector product A.x = b. * atimes_param : parameters for atimes(). * it : struct iter. following entries are used * it->max = kend : max of iteration * it->eps = eps : criteria for |r^2|/|b^2| * OUTPUT * returned value : 0 == success, otherwise (-1) == failed * x [n] : solution * it->niter : # of iteration * it->res2 : |r^2| / |b^2| */ int cgs (int n, const double *b, double *x, void (*atimes) (int, const double *, double *, void *), void *atimes_param, struct iter *it) { #ifndef HAVE_CBLAS_H # ifdef HAVE_BLAS_H /* use Fortran BLAS routines */ int i_1 = 1; double d_m1 = -1.0; double d_2 = 2.0; # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H int ret = -1; double eps2 = it->eps * it->eps; int itmax = it->max; double *r = (double *)malloc (sizeof (double) * n); double *r0 = (double *)malloc (sizeof (double) * n); double *p = (double *)malloc (sizeof (double) * n); double *u = (double *)malloc (sizeof (double) * n); double *ap = (double *)malloc (sizeof (double) * n); double *q = (double *)malloc (sizeof (double) * n); double *t = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (r, "cgs"); CHECK_MALLOC (r0, "cgs"); CHECK_MALLOC (p, "cgs"); CHECK_MALLOC (u, "cgs"); CHECK_MALLOC (ap, "cgs"); CHECK_MALLOC (q, "cgs"); CHECK_MALLOC (t, "cgs"); double r0ap; double rho, rho1; double delta; double beta; double res2 = 0.0; #ifdef HAVE_CBLAS_H /** * ATLAS version */ double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; // initial residue atimes (n, x, r, atimes_param); // r = A.x cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b cblas_dcopy (n, r, 1, r0, 1); // r0* = r cblas_dcopy (n, r, 1, p, 1); // p = r cblas_dcopy (n, r, 1, u, 1); // u = r rho = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p r0ap = cblas_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p) delta = - rho / r0ap; cblas_dcopy (n, u, 1, q, 1); // q = u cblas_dscal (n, 2.0, q, 1); // q = 2 u cblas_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p atimes (n, q, t, atimes_param); // t = A.q cblas_daxpy (n, delta, t, 1, r, 1); // r = r + delta t cblas_daxpy (n, delta, q, 1, x, 1); // x = x + delta q res2 = cblas_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r) beta = rho1 / rho; rho = rho1; // here t is not used so that this is used for working area. double *qu = t; cblas_dcopy (n, q, 1, qu, 1); // qu = q cblas_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u cblas_dcopy (n, r, 1, u, 1); // u = r cblas_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u) cblas_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p cblas_dcopy (n, u, 1, p, 1); // p = u cblas_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p) } #else // !HAVE_CBLAS_H # ifdef HAVE_BLAS_H /** * BLAS version */ double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b) eps2 *= b2; // initial residue atimes (n, x, r, atimes_param); // r = A.x daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b dcopy_ (&n, r, &i_1, r0, &i_1); // r0* = r dcopy_ (&n, r, &i_1, p, &i_1); // p = r dcopy_ (&n, r, &i_1, u, &i_1); // u = r rho = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p r0ap = ddot_ (&n, r0, &i_1, ap, &i_1); // r0ap = (r0*, A.p) delta = - rho / r0ap; dcopy_ (&n, u, &i_1, q, &i_1); // q = u dscal_ (&n, &d_2, q, &i_1); // q = 2 u daxpy_ (&n, &delta, ap, &i_1, q, &i_1); // q = 2 u + delta A.p atimes (n, q, t, atimes_param); // t = A.q daxpy_ (&n, &delta, t, &i_1, r, &i_1); // r = r + delta t daxpy_ (&n, &delta, q, &i_1, x, &i_1); // x = x + delta q res2 = ddot_ (&n, r, &i_1, r, &i_1); if (it->debug == 2) { fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r) beta = rho1 / rho; rho = rho1; // here t is not used so that this is used for working area. double *qu = t; dcopy_ (&n, q, &i_1, qu, &i_1); // qu = q daxpy_ (&n, &d_m1, u, &i_1, qu, &i_1); // qu = q - u dcopy_ (&n, r, &i_1, u, &i_1); // u = r daxpy_ (&n, &beta, qu, &i_1, u, &i_1); // u = r + beta (q - u) daxpy_ (&n, &beta, p, &i_1, qu, &i_1); // qu = q - u + beta * p dcopy_ (&n, u, &i_1, p, &i_1); // p = u daxpy_ (&n, &beta, qu, &i_1, p, &i_1); // p = u + beta (q - u + b * p) } # else // !HAVE_BLAS_H /** * local BLAS version */ double b2 = my_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; // initial residue atimes (n, x, r, atimes_param); // r = A.x my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b my_dcopy (n, r, 1, r0, 1); // r0* = r my_dcopy (n, r, 1, p, 1); // p = r my_dcopy (n, r, 1, u, 1); // u = r rho = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p r0ap = my_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p) delta = - rho / r0ap; my_dcopy (n, u, 1, q, 1); // q = u my_dscal (n, 2.0, q, 1); // q = 2 u my_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p atimes (n, q, t, atimes_param); // t = A.q my_daxpy (n, delta, t, 1, r, 1); // r = r + delta t my_daxpy (n, delta, q, 1, x, 1); // x = x + delta q res2 = my_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r) beta = rho1 / rho; rho = rho1; // here t is not used so that this is used for working area. double *qu = t; my_dcopy (n, q, 1, qu, 1); // qu = q my_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u my_dcopy (n, r, 1, u, 1); // u = r my_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u) my_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p my_dcopy (n, u, 1, p, 1); // p = u my_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p) } # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H free (r); free (r0); free (p); free (u); free (ap); free (q); free (t); if (it->debug == 1) { fprintf (it->out, "libiter-cgs it= %d res^2= %e\n", i, res2); } it->niter = i; it->res2 = res2 / b2; return (ret); }
/* Ref: Weiss, Algorithm 12 BiCGSTAB * INPUT * n : dimension of the problem * b [n] : r-h-s vector * atimes (int n, static double *x, double *b, void *param) : * calc matrix-vector product A.x = b. * atimes_param : parameters for atimes(). * it : struct iter. following entries are used * it->max = kend : max of iteration * it->eps = eps : criteria for |r^2|/|b^2| * OUTPUT * returned value : 0 == success, otherwise (-1) == failed * x [n] : solution * it->niter : # of iteration * it->res2 : |r^2| / |b^2| */ int bicgstab (int n, const double *b, double *x, void (*atimes) (int, const double *, double *, void *), void *atimes_param, struct iter *it) { #ifndef HAVE_CBLAS_H # ifdef HAVE_BLAS_H /* use Fortran BLAS routines */ int i_1 = 1; double d_1 = 1.0; double d_m1 = -1.0; # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H int ret = -1; double eps2 = it->eps * it->eps; int itmax = it->max; double *r = (double *)malloc (sizeof (double) * n); double *rs = (double *)malloc (sizeof (double) * n); double *p = (double *)malloc (sizeof (double) * n); double *ap = (double *)malloc (sizeof (double) * n); double *s = (double *)malloc (sizeof (double) * n); double *t = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (r, "bicgstab"); CHECK_MALLOC (rs, "bicgstab"); CHECK_MALLOC (p, "bicgstab"); CHECK_MALLOC (ap, "bicgstab"); CHECK_MALLOC (s, "bicgstab"); CHECK_MALLOC (t, "bicgstab"); double rsap; // (r*, A.p) double st; double t2; double rho, rho1; double delta; double gamma; double beta; double res2 = 0.0; #ifdef HAVE_CBLAS_H /** * ATLAS version */ double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x ... cblas_daxpy (n, -1.0, b, 1, r, 1); // - b cblas_dcopy (n, r, 1, rs, 1); // r* = r cblas_dcopy (n, r, 1, p, 1); // p = r rho = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p rsap = cblas_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p) delta = - rho / rsap; cblas_dcopy (n, r, 1, s, 1); // s = r ... cblas_daxpy (n, delta, ap, 1, s, 1); // + delta A.p atimes (n, s, t, atimes_param); // t = A.s st = cblas_ddot (n, s, 1, t, 1); // st = (s, t) t2 = cblas_ddot (n, t, 1, t, 1); // t2 = (t, t) gamma = - st / t2; cblas_dcopy (n, s, 1, r, 1); // r = s ... cblas_daxpy (n, gamma, t, 1, r, 1); // + gamma t cblas_daxpy (n, delta, p, 1, x, 1); // x = x + delta p... cblas_daxpy (n, gamma, s, 1, x, 1); // + gamma s res2 = cblas_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-bicgstab(cblas) %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r) beta = rho1 / rho * delta / gamma; rho = rho1; cblas_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p cblas_dscal (n, beta, p, 1); // p = beta (p + gamma A.p) cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p) } #else // !HAVE_CBLAS_H # ifdef HAVE_BLAS_H /** * BLAS version */ double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x ... daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // - b dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r dcopy_ (&n, r, &i_1, p, &i_1); // p = r rho = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p rsap = ddot_ (&n, rs, &i_1, ap, &i_1); // rsap = (r*, A.p) delta = - rho / rsap; dcopy_ (&n, r, &i_1, s, &i_1); // s = r ... daxpy_ (&n, &delta, ap, &i_1, s, &i_1); // + delta A.p atimes (n, s, t, atimes_param); // t = A.s st = ddot_ (&n, s, &i_1, t, &i_1); // st = (s, t) t2 = ddot_ (&n, t, &i_1, t, &i_1); // t2 = (t, t) gamma = - st / t2; dcopy_ (&n, s, &i_1, r, &i_1); // r = s ... daxpy_ (&n, &gamma, t, &i_1, r, &i_1); // + gamma t daxpy_ (&n, &delta, p, &i_1, x, &i_1); // x = x + delta p... daxpy_ (&n, &gamma, s, &i_1, x, &i_1); // + gamma s res2 = ddot_ (&n, r, &i_1, r, &i_1); if (it->debug == 2) { fprintf (it->out, "libiter-bicgstab(blas) %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } if (res2 > 1.0e20) { // already too big residual break; } rho1 = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r) beta = rho1 / rho * delta / gamma; rho = rho1; daxpy_ (&n, &gamma, ap, &i_1, p, &i_1); // p = p + gamma A.p dscal_ (&n, &beta, p, &i_1); // p = beta (p + gamma A.p) daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta(p + gamma A.p) } # else // !HAVE_BLAS_H /** * local BLAS version */ double b2 = my_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x ... my_daxpy (n, -1.0, b, 1, r, 1); // - b my_dcopy (n, r, 1, rs, 1); // r* = r my_dcopy (n, r, 1, p, 1); // p = r rho = my_ddot (n, rs, 1, r, 1); // rho = (r*, r) int i; for (i = 0; i < itmax; i ++) { atimes (n, p, ap, atimes_param); // ap = A.p rsap = my_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p) delta = - rho / rsap; my_dcopy (n, r, 1, s, 1); // s = r ... my_daxpy (n, delta, ap, 1, s, 1); // + delta A.p atimes (n, s, t, atimes_param); // t = A.s st = my_ddot (n, s, 1, t, 1); // st = (s, t) t2 = my_ddot (n, t, 1, t, 1); // t2 = (t, t) gamma = - st / t2; my_dcopy (n, s, 1, r, 1); // r = s ... my_daxpy (n, gamma, t, 1, r, 1); // + gamma t my_daxpy (n, delta, p, 1, x, 1); // x = x + delta p... my_daxpy (n, gamma, s, 1, x, 1); // + gamma s res2 = my_ddot (n, r, 1, r, 1); if (it->debug == 2) { fprintf (it->out, "libiter-bicgstab(myblas) %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } rho1 = my_ddot (n, rs, 1, r, 1); // rho = (r*, r) beta = rho1 / rho * delta / gamma; rho = rho1; my_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p my_dscal (n, beta, p, 1); // p = beta (p + gamma A.p) my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p) } # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H free (r); free (rs); free (p); free (ap); free (s); free (t); if (it->debug == 1) { fprintf (it->out, "libiter-bicgstab %d %e\n", i, res2 / b2); } it->niter = i; it->res2 = res2 / b2; return (ret); }
void PCBCGSolver::linbcg(unsigned long n, double b[], double x[], int itol, double tol, int itmax, int *iter, double *err) { unsigned long j; double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zm1nrm,znrm; double *p,*pp,*r,*rr,*z,*zz; p=new double[n+1]; pp=new double[n+1]; r=new double[n+1]; rr=new double[n+1]; z=new double[n+1]; zz=new double[n+1]; *iter=0; atimes(n,x,r,0); for (j=1; j<=n; j++) { r[j]=b[j]-r[j]; rr[j]=r[j]; } znrm=1.0; if (itol == 1) bnrm=snrm(n,b,itol); else if (itol == 2) { asolve(n,b,z,0); bnrm=snrm(n,z,itol); } else if (itol == 3 || itol == 4) { asolve(n,b,z,0); bnrm=snrm(n,z,itol); asolve(n,r,z,0); znrm=snrm(n,z,itol); } else printf("illegal itol in linbcg"); asolve(n,r,z,0); while (*iter <= itmax) { ++(*iter); zm1nrm=znrm; asolve(n,rr,zz,1); for (bknum=0.0,j=1; j<=n; j++) bknum += z[j]*rr[j]; if (*iter == 1) { for (j=1; j<=n; j++) { p[j]=z[j]; pp[j]=zz[j]; } } else { bk=bknum/bkden; for (j=1; j<=n; j++) { p[j]=bk*p[j]+z[j]; pp[j]=bk*pp[j]+zz[j]; } } bkden=bknum; atimes(n,p,z,0); for (akden=0.0,j=1; j<=n; j++) akden += z[j]*pp[j]; ak=bknum/akden; atimes(n,pp,zz,1); for (j=1; j<=n; j++) { x[j] += ak*p[j]; r[j] -= ak*z[j]; rr[j] -= ak*zz[j]; } asolve(n,r,z,0); if (itol == 1 || itol == 2) { znrm=1.0; *err=snrm(n,r,itol)/bnrm; } else if (itol == 3 || itol == 4) { znrm=snrm(n,z,itol); if (fabs(zm1nrm-znrm) > EPS*znrm) { dxnrm=fabs(ak)*snrm(n,p,itol); *err=znrm/fabs(zm1nrm-znrm)*dxnrm; } else { *err=znrm/bnrm; continue; } xnrm=snrm(n,x,itol); if (*err <= 0.5*xnrm) *err /= xnrm; else { *err=znrm/bnrm; continue; } } //printf("iter=%4d err=%12.6f\n",*iter,*err); if (*err <= tol) break; } delete [] p; delete [] pp; delete [] r; delete [] rr; delete [] z; delete [] zz; p = NULL; pp = NULL; r = NULL; rr = NULL; z = NULL; zz = NULL; }
void linbcg(unsigned long n, double b[], double x[], int itol, double tol, int itmax, int *iter, double *err) { void asolve(unsigned long n, double b[], double x[], int itrnsp); void atimes(unsigned long n, double x[], double r[], int itrnsp); double snrm(unsigned long n, double sx[], int itol); unsigned long j; double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zm1nrm,znrm; double *p,*pp,*r,*rr,*z,*zz; p=dvector(1,n); pp=dvector(1,n); r=dvector(1,n); rr=dvector(1,n); z=dvector(1,n); zz=dvector(1,n); *iter=0; atimes(n,x,r,0); for (j=1;j<=n;j++) { r[j]=b[j]-r[j]; rr[j]=r[j]; } znrm=1.0; if (itol == 1) bnrm=snrm(n,b,itol); else if (itol == 2) { asolve(n,b,z,0); bnrm=snrm(n,z,itol); } else if (itol == 3 || itol == 4) { asolve(n,b,z,0); bnrm=snrm(n,z,itol); asolve(n,r,z,0); znrm=snrm(n,z,itol); } else nrerror("illegal itol in linbcg"); asolve(n,r,z,0); while (*iter <= itmax) { ++(*iter); zm1nrm=znrm; asolve(n,rr,zz,1); for (bknum=0.0,j=1;j<=n;j++) bknum += z[j]*rr[j]; if (*iter == 1) { for (j=1;j<=n;j++) { p[j]=z[j]; pp[j]=zz[j]; } } else { bk=bknum/bkden; for (j=1;j<=n;j++) { p[j]=bk*p[j]+z[j]; pp[j]=bk*pp[j]+zz[j]; } } bkden=bknum; atimes(n,p,z,0); for (akden=0.0,j=1;j<=n;j++) akden += z[j]*pp[j]; ak=bknum/akden; atimes(n,pp,zz,1); for (j=1;j<=n;j++) { x[j] += ak*p[j]; r[j] -= ak*z[j]; rr[j] -= ak*zz[j]; } asolve(n,r,z,0); if (itol == 1 || itol == 2) { znrm=1.0; *err=snrm(n,r,itol)/bnrm; } else if (itol == 3 || itol == 4) { znrm=snrm(n,z,itol); if (fabs(zm1nrm-znrm) > EPS*znrm) { dxnrm=fabs(ak)*snrm(n,p,itol); *err=znrm/fabs(zm1nrm-znrm)*dxnrm; } else { *err=znrm/bnrm; continue; } xnrm=snrm(n,x,itol); if (*err <= 0.5*xnrm) *err /= xnrm; else { *err=znrm/bnrm; continue; } } printf("iter=%4d err=%12.6f\n",*iter,*err); if (*err <= tol) break; } free_dvector(p,1,n); free_dvector(pp,1,n); free_dvector(r,1,n); free_dvector(rr,1,n); free_dvector(z,1,n); free_dvector(zz,1,n); }
/* BICO -- Weiss' Algorithm 9 * INPUT * n : dimension of the problem * b[n] : r-h-s vector * atimes (int n, static double *x, double *b, void *param) : * calc matrix-vector product A.x = b. * atimes_t (int n, static double *x, double *b, void *param) : * calc matrix-vector product A^T.x = b. * atimes_param : parameters for atimes() and atimes_t(). * it : struct iter. following entries are used * it->max = kend : max of iteration * it->eps = eps : criteria for |r^2|/|b^2| * OUTPUT * returned value : 0 == success, otherwise (-1) == failed * x[n] : solution * it->niter : # of iteration * it->res2 : |r^2| / |b^2| */ int bico (int n, const double *b, double *x, void (*atimes) (int, const double *, double *, void *), void (*atimes_t) (int, const double *, double *, void *), void *atimes_param, struct iter *it) { int ret = -1; int itmax = it->max; double eps2 = it->eps * it->eps; int i_1 = 1; double d_1 = 1.0; double d_m1 = -1.0; double *xt = (double *)malloc (sizeof (double) * n); double *r = (double *)malloc (sizeof (double) * n); double *rt = (double *)malloc (sizeof (double) * n); double *rs = (double *)malloc (sizeof (double) * n); double *p = (double *)malloc (sizeof (double) * n); double *ps = (double *)malloc (sizeof (double) * n); double *atps = (double *)malloc (sizeof (double) * n); double *ap = (double *)malloc (sizeof (double) * n); double *rtmr = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (xt, "bico"); CHECK_MALLOC (r, "bico"); CHECK_MALLOC (rt, "bico"); CHECK_MALLOC (rs, "bico"); CHECK_MALLOC (p, "bico"); CHECK_MALLOC (ps, "bico"); CHECK_MALLOC (atps, "bico"); CHECK_MALLOC (ap, "bico"); CHECK_MALLOC (rtmr, "bico"); double b2 = ddot_ (&n, b, &i_1, b, &i_1); eps2 *= b2; double res2 = 0.0; dcopy_ (&n, x, &i_1, xt, &i_1); // xt = x atimes (n, x, r, atimes_param); // r = A.x daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b dcopy_ (&n, r, &i_1, rt, &i_1); // rt = r dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r dcopy_ (&n, r, &i_1, p, &i_1); // p = r dcopy_ (&n, r, &i_1, ps, &i_1); // p* = r int i; for (i = 0; i < itmax; i ++) { atimes_t (n, ps, atps, atimes_param); // atps = At.p* double patps = ddot_ (&n, p, &i_1, atps, &i_1); // patps = (p, At.p*) double rtrs = ddot_ (&n, rt, &i_1, rs, &i_1); // rtrs = (rt, r*) double delta = - rtrs / patps; atimes (n, p, ap, atimes_param); // ap = A.p daxpy_ (&n, &delta, ap, &i_1, rt, &i_1); // rt += delta A.ps daxpy_ (&n, &delta, atps, &i_1, rs, &i_1); // r* += delta At.ps double rtrs1 = ddot_ (&n, rt, &i_1, rs, &i_1); // rtrs = (rt, r*) for news double beta = rtrs1 / rtrs; rtrs = rtrs1; daxpy_ (&n, &delta, p, &i_1, xt, &i_1); // xt += delta p(old) dscal_ (&n, &beta, p, &i_1); // p = beta p(old) daxpy_ (&n, &d_1, rt, &i_1, p, &i_1); // p = rt + beta p(old) dscal_ (&n, &beta, ps, &i_1); // p* = beta p*(old) daxpy_ (&n, &d_1, rs, &i_1, ps, &i_1); // p* = r* + beta p*(old) dcopy_ (&n, rt, &i_1, rtmr, &i_1); // rtmr = rt daxpy_ (&n, &d_m1, r, &i_1, rtmr, &i_1); // rtmr = rt - r // rtmr2 = (rt - r, rt - r) double rtmr2 = ddot_ (&n, rtmr, &i_1, rtmr, &i_1); double rrtmr = ddot_ (&n, r, &i_1, rtmr, &i_1); // rrtmr = (r, rt - r) double gamma = - rrtmr / rtmr2; double d_1_gamma = 1.0 - gamma; dscal_ (&n, &d_1_gamma, x, &i_1); // x = (1-gamma) x(old) daxpy_ (&n, &gamma, xt, &i_1, x, &i_1); // x += gamma(xt - x(old)) dscal_ (&n, &d_1_gamma, r, &i_1); // r = (1-gamma) r(old) daxpy_ (&n, &gamma, rt, &i_1, r, &i_1); // r += gamma(rt - r(old)) res2 = ddot_ (&n, r, &i_1, r, &i_1); if (it->debug == 2) { fprintf (it->out, "libiter-bico %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } } free (xt); free (r); free (rt); free (rs); free (p); free (ps); free (atps); free (ap); free (rtmr); if (it->debug == 1) { fprintf (it->out, "libiter-bico %d %e\n", i, res2 / b2); } it->niter = i; it->res2 = res2 / b2; return (ret); }
/* obtain min and max of real part eigenvalues by dsaupd_() * INPUT * n : dimension of the matrix * atimes (n, x, b, user_data) : routine to calc A.x and return b[] * user_data : pointer to be passed to solver and atimes routines * eps : required precision * OUTPUT * l[2] : l[0] = min * l[1] = max */ void dsaupd_wrap_min_max (int n, double *l, void (*atimes) (int, const double *, double *, void *), void *user_data, double eps) { char bmat[2] = "I"; // standard eigenvalue problem A*x = lambda*x char SA[3] = "SA"; // compute the NEV smallest (algebraic) eigenvalues. char LA[3] = "LA"; // compute the NEV largest (algebraic) eigenvalues. int nev = 1; double *resid = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (resid, "dsaupd_wrap_min_max"); int ncv; /* //ncv = 2 * nev + 1; ncv = 2 * nev * 14; if (ncv < 4) ncv = 4; if (ncv > n) ncv = n; */ ncv = n; //fprintf (stderr, "# n = %d, nev = %d, ncv = %d\n", n, nev, ncv); int ldv = n; double *v = (double *)malloc (sizeof (double) * ldv*ncv); CHECK_MALLOC (v, "dsaupd_wrap_min_max"); int iparam[11]; int ishift = 1; // exact shifts int maxitr = 3 * n; // max iterations of Arnoldi steps int mode = 1; // type of eigenproblem iparam[0] = ishift; // IPARAM(1) = ISHIFT iparam[2] = maxitr; // IPARAM(3) = MXITER iparam[6] = mode; // IPARAM(7) = MODE int ipntr[11]; int lworkl = ncv*(ncv+8); double *workd = (double *)malloc (sizeof (double) * 3 * n); double *workl = (double *)malloc (sizeof (double) * lworkl); CHECK_MALLOC (workd, "dsaupd_wrap_min_max"); CHECK_MALLOC (workl, "dsaupd_wrap_min_max"); // for post-process int rvec = 0; // false? (no eigenvectors) char howmny[2] = "A"; // Compute NEV Ritz vectors; int *select = (int *)malloc (sizeof (int) * ncv); double *d = (double *)malloc (sizeof (double) * nev); double *z = (double *)malloc (sizeof (double) * n * nev); CHECK_MALLOC (select, "dsaupd_wrap_min_max"); CHECK_MALLOC (d, "dsaupd_wrap_min_max"); CHECK_MALLOC (z, "dsaupd_wrap_min_max"); int ldz = n; double sigma; int ierr; // The Smallest Eigenvalue int ido = 0; // restart int info = 0; // a randomly initial residual vector is used. dsaupd_(&ido, bmat, &n, SA, &nev, &eps, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); while (ido == -1 || ido == 1) { atimes (n, workd + ipntr[0] - 1, // workd(ipntr(1)) workd + ipntr[1] - 1, // workd(ipntr(2)) user_data); dsaupd_(&ido, bmat, &n, SA, &nev, &eps, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); } if (info < 0) { fprintf (stdout, "Error with dsaupd;\n"); dsaupd_info (stderr, info); } else { dseupd_(&rvec, howmny, select, d, z, &ldz, &sigma, bmat, &n, SA, &nev, &eps, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr); if (ierr != 0) { fprintf (stdout, "Error with dseupd;\n"); dseupd_info (stdout, ierr); } else if (info != 0) { dsaupd_info (stderr, info); } /* int nconv = iparam[4]; fprintf (stdout, " _NDRV1 \n"); fprintf (stdout, " ====== \n\n"); fprintf (stdout, " Size of the matrix is %d\n", n); fprintf (stdout, " The number of Ritz values requested is %d\n", nev); fprintf (stdout, " The number of Arnoldi vectors generated" " (NCV) is %d\n", ncv); fprintf (stdout, " What portion of the spectrum: %s\n", SA); fprintf (stdout, " The number of converged Ritz values is %d\n", nconv); fprintf (stdout, " The number of Implicit Arnoldi update" " iterations taken is %d\n", iparam[2]); fprintf (stdout, " The number of OP*x is %d\n", iparam[8]); fprintf (stdout, " The convergence criterion is %e\n", eps); fprintf (stdout, "d [0] = %e + i %e\n", dr[0], di[0]); */ l[0] = d[0]; } // The Largest Eigenvalue ido = 0; info = 0; // a randomly initial residual vector is used. //info = 1; /* RESID contains the initial residual vector, * possibly from a previous run. */ dsaupd_(&ido, bmat, &n, LA, &nev, &eps, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); while (ido == -1 || ido == 1) { atimes (n, workd + ipntr[0] - 1, // workd(ipntr(1)) workd + ipntr[1] - 1, // workd(ipntr(2)) user_data); dsaupd_(&ido, bmat, &n, LA, &nev, &eps, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); } if (info < 0) { fprintf (stdout, "Error with dsaupd;\n"); dsaupd_info (stderr, info); } else { dseupd_(&rvec, howmny, select, d, z, &ldz, &sigma, bmat, &n, LA, &nev, &eps, resid, &ncv, v, &ldv, iparam, ipntr, workd, workl, &lworkl, &ierr); if (ierr != 0) { fprintf (stdout, "Error with dseupd;\n"); dseupd_info (stdout, ierr); } else if (info != 0) { dsaupd_info (stderr, info); } /* int nconv = iparam[4]; fprintf (stdout, "nconv = %d\n", nconv); fprintf (stdout, " _NDRV1 \n"); fprintf (stdout, " ====== \n\n"); fprintf (stdout, " Size of the matrix is %d\n", n); fprintf (stdout, " The number of Ritz values requested is %d\n", nev); fprintf (stdout, " The number of Arnoldi vectors generated" " (NCV) is %d\n", ncv); fprintf (stdout, " What portion of the spectrum: %s\n", LA); fprintf (stdout, " The number of converged Ritz values is %d\n", nconv); fprintf (stdout, " The number of Implicit Arnoldi update" " iterations taken is %d\n", iparam[2]); fprintf (stdout, " The number of OP*x is %d\n", iparam[8]); fprintf (stdout, " The convergence criterion is %e\n", eps); fprintf (stdout, "d [0] = %e + i %e\n", dr[0], di[0]); */ l[1] = d[0]; } free (resid); free (workd); free (workl); free (v); free (select); free (d); free (z); }
/* Classical CG method -- Weiss' Algorithm 2 * INPUT * n : dimension of the problem * b [n] : r-h-s vector * atimes (int n, static double *x, double *b, void *param) : * calc matrix-vector product A.x = b. * atimes_param : parameters for atimes(). * it : struct iter. following entries are used * it->max = kend : max of iteration * it->eps = eps : criteria for |r^2|/|b^2| * OUTPUT * returned value : 0 == success, otherwise (-1) == failed * x [n] : solution * it->niter : # of iteration * it->res2 : |r^2| / |b^2| */ int cg (int n, const double *b, double *x, void (*atimes) (int, const double *, double *, void *), void *atimes_param, struct iter *it) { #ifndef HAVE_CBLAS_H # ifdef HAVE_BLAS_H /* use Fortran BLAS routines */ int i_1 = 1; double d_1 = 1.0; double d_m1 = -1.0; # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H int ret = -1; double eps2 = it->eps * it->eps; int itmax = it->max; double *p = (double *)malloc (sizeof (double) * n); double *r = (double *)malloc (sizeof (double) * n); double *ap = (double *)malloc (sizeof (double) * n); CHECK_MALLOC (p, "cg"); CHECK_MALLOC (r, "cg"); CHECK_MALLOC (ap, "cg"); double r2; double res2 = 0.0; double pap; double gamma; double beta; int i; #ifdef HAVE_CBLAS_H /** * ATLAS version */ double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b cblas_dcopy (n, r, 1, p, 1); // p = r for (i = 0; i < itmax; i ++) { r2 = cblas_ddot (n, r, 1, r, 1); // r2 = (r, r) atimes (n, p, ap, atimes_param); // ap = A.p pap = cblas_ddot (n, p, 1, ap, 1); // pap = (p, A.p) gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p) cblas_daxpy (n, gamma, p, 1, x, 1); // x += gamma p cblas_daxpy (n, gamma, ap, 1, r, 1); // r += gamma Ap // new norm of r res2 = cblas_ddot (n, r, 1, r, 1); // (r, r) if (it->debug == 2) { fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } beta = res2 / r2; // beta = (r, r) / (r0, r0) cblas_dscal (n, beta, p, 1); // p *= beta cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta p r2 = res2; } #else // !HAVE_CBLAS_H # ifdef HAVE_BLAS_H /** * BLAS version */ double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b dcopy_ (&n, r, &i_1, p, &i_1); // p = r for (i = 0; i < itmax; i ++) { r2 = ddot_ (&n, r, &i_1, r, &i_1); // r2 = (r, r) atimes (n, p, ap, atimes_param); // ap = A.p pap = ddot_ (&n, p, &i_1, ap, &i_1); // pap = (p, A.p) gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p) daxpy_ (&n, &gamma, p, &i_1, x, &i_1); // x += gamma p daxpy_ (&n, &gamma, ap, &i_1, r, &i_1); // r += gamma Ap // new norm of r res2 = ddot_ (&n, r, &i_1, r, &i_1); // (r, r) if (it->debug == 2) { fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } beta = res2 / r2; // beta = (r, r) / (r0, r0) dscal_ (&n, &beta, p, &i_1); // p *= beta daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta p r2 = res2; } # else // !HAVE_BLAS_H /** * local BLAS version */ double b2 = my_ddot (n, b, 1, b, 1); // (b,b) eps2 *= b2; atimes (n, x, r, atimes_param); // r = A.x my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b my_dcopy (n, r, 1, p, 1); // p = r for (i = 0; i < itmax; i ++) { r2 = my_ddot (n, r, 1, r, 1); // r2 = (r, r) atimes (n, p, ap, atimes_param); // ap = A.p pap = my_ddot (n, p, 1, ap, 1); // pap = (p, A.p) gamma = - r2 / pap; // gamma = - (r, r) / (p, A.p) my_daxpy (n, gamma, p, 1, x, 1); // x += gamma p my_daxpy (n, gamma, ap, 1, r, 1); // r += gamma Ap // new norm of r res2 = my_ddot (n, r, 1, r, 1); // (r, r) if (it->debug == 2) { fprintf (it->out, "libiter-cg %d %e\n", i, res2 / b2); } if (res2 <= eps2) { ret = 0; // success break; } beta = res2 / r2; // beta = (r, r) / (r0, r0) my_dscal (n, beta, p, 1); // p *= beta my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta p r2 = res2; } # endif // !HAVE_BLAS_H #endif // !HAVE_CBLAS_H free (p); free (r); free (ap); if (it->debug == 1) { fprintf (it->out, "libiter-cg it= %d res^2= %e\n", i, res2 / b2); } it->niter = i; it->res2 = res2 / b2; return (ret); }