static void test_mgcr(ITER *ip, int i, MAT *Q, MAT *R) #endif { VEC vt, vt1; static MAT *R1 = MNULL; static VEC *r = VNULL, *r1 = VNULL; VEC *rr; int k, j; Real sm; /* check Q*Q^T = I */ vt.dim = vt.max_dim = ip->b->dim; vt1.dim = vt1.max_dim = ip->b->dim; Q = m_resize(Q, i + 1, ip->b->dim); R1 = m_resize(R1, i + 1, i + 1); r = v_resize(r, ip->b->dim); r1 = v_resize(r1, ip->b->dim); MEM_STAT_REG(R1, TYPE_MAT); MEM_STAT_REG(r, TYPE_VEC); MEM_STAT_REG(r1, TYPE_VEC); m_zero(R1); for (k = 1; k <= i; k++) for (j = 1; j <= i; j++) { vt.ve = Q->me[k]; vt1.ve = Q->me[j]; R1->me[k][j] = in_prod(&vt, &vt1); } for (j = 1; j <= i; j++) R1->me[j][j] -= 1.0; #ifndef MEX if (m_norm_inf(R1) > MACHEPS * ip->b->dim) printf(" ! (mgcr:) m_norm_inf(Q*Q^T) = %g\n", m_norm_inf(R1)); #endif /* check (r_i,Ap_j) = 0 for j <= i */ ip->Ax(ip->A_par, ip->x, r); v_sub(ip->b, r, r); rr = r; if (ip->Bx) { ip->Bx(ip->B_par, r, r1); rr = r1; } #ifndef MEX printf(" ||r|| = %g\n", v_norm2(rr)); #endif sm = 0.0; for (j = 1; j <= i; j++) { vt.ve = Q->me[j]; sm = max(sm, in_prod(&vt,rr)); } #ifndef MEX if (sm >= MACHEPS * ip->b->dim) printf(" ! (mgcr:) max_j (r,Ap_j) = %g\n", sm); #endif }
static double calc_ll(MAT *Vw, MAT *X, VEC *y, int n) { /* * calculate negative log-likelyhood */ static MAT *M1 = MNULL; static VEC *res = VNULL, *tmp = VNULL; double zQz; volatile double ldet; int i; IminAw->m -= n; /* |B'(I-A)Vw(I-A)'B|, pretty inefficiently, can 4 x as fast: */ /* M1 = m_mlt(IminAw, Vw, M1); M2 = mmtr_mlt(M1, IminAw, M2); */ M1 = XVXt_mlt(IminAw, Vw, M1); LDLfactor(M1); for (i = 0, ldet = 0.0; i < M1->m; i++) { assert(M1->me[i][i] > 0.0); ldet += log(M1->me[i][i]); } /* y'B'A'(B'A'Vw A B)-1 A B y */ res = mv_mlt(IminAw, y, res); /* the m-n residuals B(I-A)'Y */ tmp = LDLsolve(M1, res, tmp); /* M1 tmp = res -> tmp = M1-1 res */ zQz = in_prod(res, tmp); /* res' M1inv res */ IminAw->m += n; return 0.5 * ((Vw->m - n)*log(2*PI) + ldet + zQz); }
static void calc_rhs_Tr_m(int n_models, MAT **Vk,MAT *VinvIminAw, VEC *y, VEC *rhs, MAT *Tr_m) { int j, k; MAT **Pr = NULL, *Tmp = MNULL; VEC *v_tmp = VNULL, *v_tmp2; Pr = (MAT **) emalloc(n_models * sizeof(MAT *)); v_tmp2 = vm_mlt(VinvIminAw, y, VNULL); /* Vw-(I-Aw)Y == Y'(I-Aw)'Vw- */ for (j = 0; j < n_models; j++) { Pr[j] = m_mlt(Vk[j], VinvIminAw, MNULL); Tmp = m_mlt(Pr[j], Pr[j], Tmp); Tr_m->me[j][j] = trace_matrix(Tmp); /* diagonal */ /* using Tr(A B) == Tr(B A) */ for (k = 0; k < j; k++) { /* we did Pr[k] and Pr[j], so */ Tmp = m_mlt(Pr[j], Pr[k], Tmp); /* off-diagonal */ Tr_m->me[j][k] = Tr_m->me[k][j] = trace_matrix(Tmp); } v_tmp = vm_mlt(Vk[j], v_tmp2, v_tmp); /* Vw-1(I-Aw)Y */ rhs->ve[j] = in_prod(v_tmp2, v_tmp); } for (j = 0; j < n_models; j++) m_free(Pr[j]); efree(Pr); m_free(Tmp); v_free(v_tmp); v_free(v_tmp2); return; }
VEC *iter_mgcr(ITER *ip) #endif { STATIC VEC *As=VNULL, *beta=VNULL, *alpha=VNULL, *z=VNULL; STATIC MAT *N=MNULL, *H=MNULL; VEC *rr, v, s; /* additional pointer and structures */ Real nres; /* norm of a residual */ Real dd; /* coefficient d_i */ int i,j; int done; /* if TRUE then stop the iterative process */ int dim; /* dimension of the problem */ /* ip cannot be NULL */ if (ip == INULL) error(E_NULL,"mgcr"); /* Ax, b and stopping criterion must be given */ if (! ip->Ax || ! ip->b || ! ip->stop_crit) error(E_NULL,"mgcr"); /* at least one direction vector must exist */ if ( ip->k <= 0) error(E_BOUNDS,"mgcr"); /* if the vector x is given then b and x must have the same dimension */ if ( ip->x && ip->x->dim != ip->b->dim) error(E_SIZES,"mgcr"); if (ip->eps <= 0.0) ip->eps = MACHEPS; dim = ip->b->dim; As = v_resize(As,dim); alpha = v_resize(alpha,ip->k); beta = v_resize(beta,ip->k); MEM_STAT_REG(As,TYPE_VEC); MEM_STAT_REG(alpha,TYPE_VEC); MEM_STAT_REG(beta,TYPE_VEC); H = m_resize(H,ip->k,ip->k); N = m_resize(N,ip->k,dim); MEM_STAT_REG(H,TYPE_MAT); MEM_STAT_REG(N,TYPE_MAT); /* if a preconditioner is defined */ if (ip->Bx) { z = v_resize(z,dim); MEM_STAT_REG(z,TYPE_VEC); } /* if x is NULL then it is assumed that x has entries with value zero */ if ( ! ip->x ) { ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } /* v and s are additional pointers to rows of N */ /* they must have the same dimension as rows of N */ v.dim = v.max_dim = s.dim = s.max_dim = dim; done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { (*ip->Ax)(ip->A_par,ip->x,As); /* As = A*x */ v_sub(ip->b,As,As); /* As = b - A*x */ rr = As; /* rr is an additional pointer */ /* if a preconditioner is defined */ if (ip->Bx) { (*ip->Bx)(ip->B_par,As,z); /* z = B*(b-A*x) */ rr = z; } /* norm of the residual */ nres = v_norm2(rr); dd = nres; /* dd = ||r_i|| */ /* check if the norm of the residual is zero */ if (ip->steps == 0) { /* information for a user */ if (ip->info) (*ip->info)(ip,nres,As,rr); ip->init_res = fabs(nres); } if (nres == 0.0) { /* iterative process is finished */ done = TRUE; break; } /* save this residual in the first row of N */ v.ve = N->me[0]; v_copy(rr,&v); for (i = 0; i < ip->k && ip->steps < ip->limit; i++) { ip->steps++; v.ve = N->me[i]; /* pointer to a row of N (=s_i) */ /* note that we must use here &v, not v */ (*ip->Ax)(ip->A_par,&v,As); rr = As; /* As = A*s_i */ if (ip->Bx) { (*ip->Bx)(ip->B_par,As,z); /* z = B*A*s_i */ rr = z; } if (i < ip->k - 1) { s.ve = N->me[i+1]; /* pointer to a row of N (=s_{i+1}) */ v_copy(rr,&s); /* s_{i+1} = B*A*s_i */ for (j = 0; j <= i-1; j++) { v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ /* beta->ve[j] = in_prod(&v,rr); */ /* beta_{j,i} */ /* modified Gram-Schmidt algorithm */ beta->ve[j] = in_prod(&v,&s); /* beta_{j,i} */ /* s_{i+1} -= beta_{j,i}*s_{j+1} */ v_mltadd(&s,&v,- beta->ve[j],&s); } /* beta_{i,i} = ||s_{i+1}||_2 */ beta->ve[i] = nres = v_norm2(&s); if ( nres <= MACHEPS*ip->init_res) { /* s_{i+1} == 0 */ i--; done = TRUE; break; } sv_mlt(1.0/nres,&s,&s); /* normalize s_{i+1} */ v.ve = N->me[0]; alpha->ve[i] = in_prod(&v,&s); /* alpha_i = (s_0 , s_{i+1}) */ } else { for (j = 0; j <= i-1; j++) { v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ beta->ve[j] = in_prod(&v,rr); /* beta_{j,i} */ } nres = in_prod(rr,rr); /* rr = B*A*s_{k-1} */ for (j = 0; j <= i-1; j++) nres -= beta->ve[j]*beta->ve[j]; if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { /* s_k is zero */ i--; done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } beta->ve[i] = sqrt(nres); /* beta_{k-1,k-1} */ v.ve = N->me[0]; alpha->ve[i] = in_prod(&v,rr); for (j = 0; j <= i-1; j++) alpha->ve[i] -= beta->ve[j]*alpha->ve[j]; alpha->ve[i] /= beta->ve[i]; /* alpha_{k-1} */ } set_col(H,i,beta); /* other method of computing dd */ /* if (fabs((double)alpha->ve[i]) > dd) { nres = - dd*dd + alpha->ve[i]*alpha->ve[i]; nres = sqrt((double) nres); if (ip->info) (*ip->info)(ip,-nres,VNULL,VNULL); break; } */ /* to avoid overflow/underflow in computing dd */ /* dd *= cos(asin((double)(alpha->ve[i]/dd))); */ nres = alpha->ve[i]/dd; if (fabs(nres-1.0) <= MACHEPS*ip->init_res) dd = 0.0; else { nres = 1.0 - nres*nres; if (nres < 0.0) { nres = sqrt((double) -nres); if (ip->info) (*ip->info)(ip,-dd*nres,VNULL,VNULL); break; } dd *= sqrt((double) nres); } if (ip->info) (*ip->info)(ip,dd,VNULL,VNULL); if ( ip->stop_crit(ip,dd,VNULL,VNULL) ) { /* stopping criterion is satisfied */ done = TRUE; break; } } /* end of for */ if (i >= ip->k) i = ip->k - 1; /* use (i+1) by (i+1) submatrix of H */ H = m_resize(H,i+1,i+1); alpha = v_resize(alpha,i+1); Usolve(H,alpha,alpha,0.0); /* c_i is saved in alpha */ for (j = 0; j <= i; j++) { v.ve = N->me[j]; v_mltadd(ip->x,&v,alpha->ve[j],ip->x); } if (done) break; /* stop the iterative process */ alpha = v_resize(alpha,ip->k); H = m_resize(H,ip->k,ip->k); } /* end of while */ #ifdef THREADSAFE V_FREE(As); V_FREE(beta); V_FREE(alpha); V_FREE(z); M_FREE(N); M_FREE(H); #endif return ip->x; /* return the solution */ }
VEC *iter_gmres(ITER *ip) #endif { STATIC VEC *u=VNULL, *r=VNULL, *rhs = VNULL; STATIC VEC *givs=VNULL, *givc=VNULL, *z = VNULL; STATIC MAT *Q = MNULL, *R = MNULL; VEC *rr, v, v1; /* additional pointers (not real vectors) */ int i,j, done; Real nres; /* Real last_h; */ if (ip == INULL) error(E_NULL,"iter_gmres"); if ( ! ip->Ax || ! ip->b ) error(E_NULL,"iter_gmres"); if ( ! ip->stop_crit ) error(E_NULL,"iter_gmres"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_gmres"); if (ip->x != VNULL && ip->x->dim != ip->b->dim) error(E_SIZES,"iter_gmres"); if (ip->eps <= 0.0) ip->eps = MACHEPS; r = v_resize(r,ip->k+1); u = v_resize(u,ip->b->dim); rhs = v_resize(rhs,ip->k+1); givs = v_resize(givs,ip->k); /* Givens rotations */ givc = v_resize(givc,ip->k); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(rhs,TYPE_VEC); MEM_STAT_REG(givs,TYPE_VEC); MEM_STAT_REG(givc,TYPE_VEC); R = m_resize(R,ip->k+1,ip->k); Q = m_resize(Q,ip->k,ip->b->dim); MEM_STAT_REG(R,TYPE_MAT); MEM_STAT_REG(Q,TYPE_MAT); if (ip->x == VNULL) { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } v.dim = v.max_dim = ip->b->dim; /* v and v1 are pointers to rows */ v1.dim = v1.max_dim = ip->b->dim; /* of matrix Q */ if (ip->Bx != (Fun_Ax)NULL) { /* if precondition is defined */ z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); } done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { /* restart */ ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ v_sub(ip->b,u,u); /* u = b - A*x */ rr = u; /* rr is a pointer only */ if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); /* tmp = B*(b-A*x) */ rr = z; } nres = v_norm2(rr); if (ip->steps == 0) { if (ip->info) ip->info(ip,nres,VNULL,VNULL); ip->init_res = nres; } if ( nres == 0.0 ) { done = TRUE; break; } v.ve = Q->me[0]; sv_mlt(1.0/nres,rr,&v); v_zero(r); v_zero(rhs); rhs->ve[0] = nres; for ( i = 0; i < ip->k && ip->steps < ip->limit; i++ ) { ip->steps++; v.ve = Q->me[i]; (ip->Ax)(ip->A_par,&v,u); rr = u; if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); rr = z; } if (i < ip->k - 1) { v1.ve = Q->me[i+1]; v_copy(rr,&v1); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* r->ve[j] = in_prod(&v,rr); */ /* modified Gram-Schmidt algorithm */ r->ve[j] = in_prod(&v,&v1); v_mltadd(&v1,&v,-r->ve[j],&v1); } r->ve[i+1] = nres = v_norm2(&v1); if (nres <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } sv_mlt(1.0/nres,&v1,&v1); } else { /* i == ip->k - 1 */ /* Q->me[ip->k] need not be computed */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; r->ve[j] = in_prod(&v,rr); } nres = in_prod(rr,rr) - in_prod(r,r); if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } r->ve[i+1] = sqrt(nres); } /* QR update */ /* last_h = r->ve[i+1]; */ /* for test only */ for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); givens(r->ve[i],r->ve[i+1],&givc->ve[i],&givs->ve[i]); rot_vec(r,i,i+1,givc->ve[i],givs->ve[i],r); rot_vec(rhs,i,i+1,givc->ve[i],givs->ve[i],rhs); set_col(R,i,r); nres = fabs((double) rhs->ve[i+1]); if (ip->info) ip->info(ip,nres,VNULL,VNULL); if ( ip->stop_crit(ip,nres,VNULL,VNULL) ) { done = TRUE; break; } } /* use ixi submatrix of R */ if (i >= ip->k) i = ip->k - 1; R = m_resize(R,i+1,i+1); rhs = v_resize(rhs,i+1); /* test only */ /* test_gmres(ip,i,Q,R,givc,givs,last_h); */ Usolve(R,rhs,rhs,0.0); /* solve a system: R*x = rhs */ /* new approximation */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; v_mltadd(ip->x,&v,rhs->ve[j],ip->x); } if (done) break; /* back to old dimensions */ rhs = v_resize(rhs,ip->k+1); R = m_resize(R,ip->k+1,ip->k); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(rhs); V_FREE(givs); V_FREE(givc); V_FREE(z); M_FREE(Q); M_FREE(R); #endif return ip->x; }
VEC *iter_cgs(ITER *ip, VEC *r0) #endif { STATIC VEC *p = VNULL, *q = VNULL, *r = VNULL, *u = VNULL; STATIC VEC *v = VNULL, *z = VNULL; VEC *tmp; Real alpha, beta, nres, rho, old_rho, sigma, inner; if (ip == INULL) error(E_NULL,"iter_cgs"); if (!ip->Ax || !ip->b || !r0) error(E_NULL,"iter_cgs"); if ( ip->x == ip->b ) error(E_INSITU,"iter_cgs"); if (!ip->stop_crit) error(E_NULL,"iter_cgs"); if ( r0->dim != ip->b->dim ) error(E_SIZES,"iter_cgs"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; p = v_resize(p,ip->b->dim); q = v_resize(q,ip->b->dim); r = v_resize(r,ip->b->dim); u = v_resize(u,ip->b->dim); v = v_resize(v,ip->b->dim); MEM_STAT_REG(p,TYPE_VEC); MEM_STAT_REG(q,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(v,TYPE_VEC); if (ip->Bx) { z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); } if (ip->x != VNULL) { if (ip->x->dim != ip->b->dim) error(E_SIZES,"iter_cgs"); ip->Ax(ip->A_par,ip->x,v); /* v = A*x */ if (ip->Bx) { v_sub(ip->b,v,v); /* v = b - A*x */ (ip->Bx)(ip->B_par,v,r); /* r = B*(b-A*x) */ } else v_sub(ip->b,v,r); /* r = b-A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); /* x == 0 */ ip->shared_x = FALSE; if (ip->Bx) (ip->Bx)(ip->B_par,ip->b,r); /* r = B*b */ else v_copy(ip->b,r); /* r = b */ } v_zero(p); v_zero(q); old_rho = 1.0; for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) { inner = in_prod(r,r); nres = sqrt(fabs(inner)); if (ip->steps == 0) ip->init_res = nres; if (ip->info) ip->info(ip,nres,r,VNULL); if ( ip->stop_crit(ip,nres,r,VNULL) ) break; rho = in_prod(r0,r); if ( old_rho == 0.0 ) error(E_BREAKDOWN,"iter_cgs"); beta = rho/old_rho; v_mltadd(r,q,beta,u); v_mltadd(q,p,beta,v); v_mltadd(u,v,beta,p); (ip->Ax)(ip->A_par,p,q); if (ip->Bx) { (ip->Bx)(ip->B_par,q,z); tmp = z; } else tmp = q; sigma = in_prod(r0,tmp); if ( sigma == 0.0 ) error(E_BREAKDOWN,"iter_cgs"); alpha = rho/sigma; v_mltadd(u,tmp,-alpha,q); v_add(u,q,v); (ip->Ax)(ip->A_par,v,u); if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); tmp = z; } else tmp = u; v_mltadd(r,tmp,-alpha,r); v_mltadd(ip->x,v,alpha,ip->x); old_rho = rho; } #ifdef THREADSAFE V_FREE(p); V_FREE(q); V_FREE(r); V_FREE(u); V_FREE(v); V_FREE(z); #endif return ip->x; }
MAT *iter_arnoldi(ITER *ip, Real *h_rem, MAT *Q, MAT *H) #endif { STATIC VEC *u=VNULL, *r=VNULL; VEC v; /* auxiliary vector */ int i,j; Real h_val, c; if (ip == INULL) error(E_NULL,"iter_arnoldi"); if ( ! ip->Ax || ! Q || ! ip->x ) error(E_NULL,"iter_arnoldi"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_arnoldi"); if ( Q->n != ip->x->dim || Q->m != ip->k ) error(E_SIZES,"iter_arnoldi"); m_zero(Q); H = m_resize(H,ip->k,ip->k); m_zero(H); u = v_resize(u,ip->x->dim); r = v_resize(r,ip->k); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); v.dim = v.max_dim = ip->x->dim; c = v_norm2(ip->x); if ( c <= 0.0) return H; else { v.ve = Q->me[0]; sv_mlt(1.0/c,ip->x,&v); } v_zero(r); for ( i = 0; i < ip->k; i++ ) { v.ve = Q->me[i]; u = (ip->Ax)(ip->A_par,&v,u); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* modified Gram-Schmidt */ r->ve[j] = in_prod(&v,u); v_mltadd(u,&v,-r->ve[j],u); } h_val = v_norm2(u); /* if u == 0 then we have an exact subspace */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } set_col(H,i,r); if ( i == ip->k-1 ) { *h_rem = h_val; continue; } /* H->me[i+1][i] = h_val; */ m_set_val(H,i+1,i,h_val); v.ve = Q->me[i+1]; sv_mlt(1.0/h_val,u,&v); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); #endif return H; }
MAT *iter_arnoldi_iref(ITER *ip, Real *h_rem, MAT *Q, MAT *H) #endif { STATIC VEC *u=VNULL, *r=VNULL, *s=VNULL, *tmp=VNULL; VEC v; /* auxiliary vector */ int i,j; Real h_val, c; if (ip == INULL) error(E_NULL,"iter_arnoldi_iref"); if ( ! ip->Ax || ! Q || ! ip->x ) error(E_NULL,"iter_arnoldi_iref"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_arnoldi_iref"); if ( Q->n != ip->x->dim || Q->m != ip->k ) error(E_SIZES,"iter_arnoldi_iref"); m_zero(Q); H = m_resize(H,ip->k,ip->k); m_zero(H); u = v_resize(u,ip->x->dim); r = v_resize(r,ip->k); s = v_resize(s,ip->k); tmp = v_resize(tmp,ip->x->dim); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(s,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); v.dim = v.max_dim = ip->x->dim; c = v_norm2(ip->x); if ( c <= 0.0) return H; else { v.ve = Q->me[0]; sv_mlt(1.0/c,ip->x,&v); } v_zero(r); v_zero(s); for ( i = 0; i < ip->k; i++ ) { v.ve = Q->me[i]; u = (ip->Ax)(ip->A_par,&v,u); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* modified Gram-Schmidt */ r->ve[j] = in_prod(&v,u); v_mltadd(u,&v,-r->ve[j],u); } h_val = v_norm2(u); /* if u == 0 then we have an exact subspace */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } /* iterative refinement -- ensures near orthogonality */ do { v_zero(tmp); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; s->ve[j] = in_prod(&v,u); v_mltadd(tmp,&v,s->ve[j],tmp); } v_sub(u,tmp,u); v_add(r,s,r); } while ( v_norm2(s) > 0.1*(h_val = v_norm2(u)) ); /* now that u is nearly orthogonal to Q, update H */ set_col(H,i,r); /* check once again if h_val is zero */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } if ( i == ip->k-1 ) { *h_rem = h_val; continue; } /* H->me[i+1][i] = h_val; */ m_set_val(H,i+1,i,h_val); v.ve = Q->me[i+1]; sv_mlt(1.0/h_val,u,&v); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(s); V_FREE(tmp); #endif return H; }
VEC *iter_cgne(ITER *ip) #endif { STATIC VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; Real alpha, beta, inner, old_inner, nres; VEC *rr1; /* pointer only */ if (ip == INULL) error(E_NULL,"iter_cgne"); if (!ip->Ax || ! ip->ATx || !ip->b) error(E_NULL,"iter_cgne"); if ( ip->x == ip->b ) error(E_INSITU,"iter_cgne"); if (!ip->stop_crit) error(E_NULL,"iter_cgne"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; r = v_resize(r,ip->b->dim); p = v_resize(p,ip->b->dim); q = v_resize(q,ip->b->dim); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(p,TYPE_VEC); MEM_STAT_REG(q,TYPE_VEC); z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); if (ip->x) { if (ip->x->dim != ip->b->dim) error(E_SIZES,"iter_cgne"); ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ v_sub(ip->b,p,z); /* z = b - A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; v_copy(ip->b,z); } rr1 = z; if (ip->Bx) { (ip->Bx)(ip->B_par,rr1,p); rr1 = p; } (ip->ATx)(ip->AT_par,rr1,r); /* r = A^T*B*(b-A*x) */ old_inner = 0.0; for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) { rr1 = r; if ( ip->Bx ) { (ip->Bx)(ip->B_par,r,z); /* rr = B*r */ rr1 = z; } inner = in_prod(r,rr1); nres = sqrt(fabs(inner)); if (ip->info) ip->info(ip,nres,r,rr1); if (ip->steps == 0) ip->init_res = nres; if ( ip->stop_crit(ip,nres,r,rr1) ) break; if ( ip->steps ) /* if ( ip->steps > 0 ) ... */ { beta = inner/old_inner; p = v_mltadd(rr1,p,beta,p); } else /* if ( ip->steps == 0 ) ... */ { beta = 0.0; p = v_copy(rr1,p); old_inner = 0.0; } (ip->Ax)(ip->A_par,p,q); /* q = A*p */ if (ip->Bx) { (ip->Bx)(ip->B_par,q,z); (ip->ATx)(ip->AT_par,z,q); rr1 = q; /* q = A^T*B*A*p */ } else { (ip->ATx)(ip->AT_par,q,z); /* z = A^T*A*p */ rr1 = z; } alpha = inner/in_prod(rr1,p); v_mltadd(ip->x,p,alpha,ip->x); v_mltadd(r,rr1,-alpha,r); old_inner = inner; } #ifdef THREADSAFE V_FREE(r); V_FREE(p); V_FREE(q); V_FREE(z); #endif return ip->x; }
VEC *iter_cg1(ITER *ip) #endif { STATIC VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; Real alpha; double inner,nres; VEC *rr; /* rr == r or rr == z */ if (ip == INULL) error(E_NULL,"iter_cg"); if (!ip->Ax || !ip->b) error(E_NULL,"iter_cg"); if ( ip->x == ip->b ) error(E_INSITU,"iter_cg"); if (!ip->stop_crit) error(E_NULL,"iter_cg"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; r = v_resize(r,ip->b->dim); p = v_resize(p,ip->b->dim); q = v_resize(q,ip->b->dim); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(p,TYPE_VEC); MEM_STAT_REG(q,TYPE_VEC); if (ip->Bx != (Fun_Ax)NULL) { z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); rr = z; } else rr = r; if (ip->x != VNULL) { if (ip->x->dim != ip->b->dim) error(E_SIZES,"iter_cg"); ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ v_sub(ip->b,p,r); /* r = b - A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; v_copy(ip->b,r); } if (ip->Bx) (ip->Bx)(ip->B_par,r,p); else v_copy(r,p); inner = in_prod(p,r); nres = sqrt(fabs(inner)); if (ip->info) ip->info(ip,nres,r,p); if ( nres == 0.0) return ip->x; for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) { ip->Ax(ip->A_par,p,q); inner = in_prod(q,p); if (sqrt(fabs(inner)) <= MACHEPS*ip->init_res) error(E_BREAKDOWN,"iter_cg1"); alpha = in_prod(p,r)/inner; v_mltadd(ip->x,p,alpha,ip->x); v_mltadd(r,q,-alpha,r); rr = r; if (ip->Bx) { ip->Bx(ip->B_par,r,z); rr = z; } nres = in_prod(r,rr); if (nres < 0.0) { warning(WARN_RES_LESS_0,"iter_cg"); break; } nres = sqrt(fabs(nres)); if (ip->info) ip->info(ip,nres,r,z); if (ip->steps == 0) ip->init_res = nres; if ( ip->stop_crit(ip,nres,r,z) ) break; alpha = -in_prod(rr,q)/inner; v_mltadd(rr,p,alpha,p); } #ifdef THREADSAFE V_FREE(r); V_FREE(p); V_FREE(q); V_FREE(z); #endif return ip->x; }
void iter_lanczos(ITER *ip, VEC *a, VEC *b, Real *beta2, MAT *Q) #endif { int j; STATIC VEC *v = VNULL, *w = VNULL, *tmp = VNULL; Real alpha, beta, c; if ( ! ip ) error(E_NULL,"iter_lanczos"); if ( ! ip->Ax || ! ip->x || ! a || ! b ) error(E_NULL,"iter_lanczos"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_lanczos"); if ( Q && ( Q->n < ip->x->dim || Q->m < ip->k ) ) error(E_SIZES,"iter_lanczos"); a = v_resize(a,(unsigned int)ip->k); b = v_resize(b,(unsigned int)(ip->k-1)); v = v_resize(v,ip->x->dim); w = v_resize(w,ip->x->dim); tmp = v_resize(tmp,ip->x->dim); MEM_STAT_REG(v,TYPE_VEC); MEM_STAT_REG(w,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); beta = 1.0; v_zero(a); v_zero(b); if (Q) m_zero(Q); /* normalise x as w */ c = v_norm2(ip->x); if (c <= MACHEPS) { /* ip->x == 0 */ *beta2 = 0.0; return; } else sv_mlt(1.0/c,ip->x,w); (ip->Ax)(ip->A_par,w,v); for ( j = 0; j < ip->k; j++ ) { /* store w in Q if Q not NULL */ if ( Q ) set_row(Q,j,w); alpha = in_prod(w,v); a->ve[j] = alpha; v_mltadd(v,w,-alpha,v); beta = v_norm2(v); if ( beta == 0.0 ) { *beta2 = 0.0; return; } if ( j < ip->k-1 ) b->ve[j] = beta; v_copy(w,tmp); sv_mlt(1/beta,v,w); sv_mlt(-beta,tmp,v); (ip->Ax)(ip->A_par,w,tmp); v_add(v,tmp,v); } *beta2 = beta; #ifdef THREADSAFE V_FREE(v); V_FREE(w); V_FREE(tmp); #endif }
VEC *iter_cg(ITER *ip) #endif { STATIC VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; Real alpha, beta, inner, old_inner, nres; VEC *rr; /* rr == r or rr == z */ if (ip == INULL) error(E_NULL,"iter_cg"); if (!ip->Ax || !ip->b) error(E_NULL,"iter_cg"); if ( ip->x == ip->b ) error(E_INSITU,"iter_cg"); if (!ip->stop_crit) error(E_NULL,"iter_cg"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; r = v_resize(r,ip->b->dim); p = v_resize(p,ip->b->dim); q = v_resize(q,ip->b->dim); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(p,TYPE_VEC); MEM_STAT_REG(q,TYPE_VEC); if (ip->Bx != (Fun_Ax)NULL) { z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); rr = z; } else rr = r; if (ip->x != VNULL) { if (ip->x->dim != ip->b->dim) error(E_SIZES,"iter_cg"); ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ v_sub(ip->b,p,r); /* r = b - A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; v_copy(ip->b,r); } old_inner = 0.0; for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) { if ( ip->Bx ) (ip->Bx)(ip->B_par,r,rr); /* rr = B*r */ inner = in_prod(rr,r); nres = sqrt(fabs(inner)); if (ip->info) ip->info(ip,nres,r,rr); if (ip->steps == 0) ip->init_res = nres; if ( ip->stop_crit(ip,nres,r,rr) ) break; if ( ip->steps ) /* if ( ip->steps > 0 ) ... */ { beta = inner/old_inner; p = v_mltadd(rr,p,beta,p); } else /* if ( ip->steps == 0 ) ... */ { beta = 0.0; p = v_copy(rr,p); old_inner = 0.0; } (ip->Ax)(ip->A_par,p,q); /* q = A*p */ alpha = in_prod(p,q); if (sqrt(fabs(alpha)) <= MACHEPS*ip->init_res) error(E_BREAKDOWN,"iter_cg"); alpha = inner/alpha; v_mltadd(ip->x,p,alpha,ip->x); v_mltadd(r,q,-alpha,r); old_inner = inner; } #ifdef THREADSAFE V_FREE(r); V_FREE(p); V_FREE(q); V_FREE(z); #endif return ip->x; }