/* v_lincomb -- returns sum_i a[i].v[i], a[i] real, v[i] vectors */ VEC *v_lincomb(int n,VEC *v[],Real a[],VEC *out) /* number of a's and v's */ { int i; if ( ! a || ! v ) error(E_NULL,"v_lincomb"); if ( n <= 0 ) return VNULL; for ( i = 1; i < n; i++ ) if ( out == v[i] ) error(E_INSITU,"v_lincomb"); out = sv_mlt(a[0],v[0],out); for ( i = 1; i < n; i++ ) { if ( ! v[i] ) error(E_NULL,"v_lincomb"); if ( v[i]->dim != out->dim ) error(E_SIZES,"v_lincomb"); out = v_mltadd(out,v[i],a[i],out); } return out; }
/* v_linlist -- linear combinations taken from a list of arguments; calling: v_linlist(out,v1,a1,v2,a2,...,vn,an,NULL); where vi are vectors (VEC *) and ai are numbers (double) */ VEC *v_linlist(VEC *out,VEC *v1,double a1,...) { va_list ap; VEC *par; double a_par; if ( ! v1 ) return VNULL; va_start(ap, a1); out = sv_mlt(a1,v1,out); while (par = va_arg(ap,VEC *)) { /* NULL ends the list*/ a_par = va_arg(ap,double); if (a_par == 0.0) continue; if ( out == par ) error(E_INSITU,"v_linlist"); if ( out->dim != par->dim ) error(E_SIZES,"v_linlist"); if (a_par == 1.0) out = v_add(out,par,out); else if (a_par == -1.0) out = v_sub(out,par,out); else out = v_mltadd(out,par,a_par,out); } va_end(ap); return out; }
/* v_mltadd -- scalar/vector multiplication and addition -- out = v1 + scale.v2 */ VEC *v_mltadd(VEC *v1,VEC *v2,double scale,VEC *out) { /* register u_int dim, i; */ /* Real *out_ve, *v1_ve, *v2_ve; */ if ( v1==(VEC *)NULL || v2==(VEC *)NULL ) error(E_NULL,"v_mltadd"); if ( v1->dim != v2->dim ) error(E_SIZES,"v_mltadd"); if ( scale == 0.0 ) return v_copy(v1,out); if ( scale == 1.0 ) return v_add(v1,v2,out); if ( v2 != out ) { tracecatch(out = v_copy(v1,out),"v_mltadd"); /* dim = v1->dim; */ __mltadd__(out->ve,v2->ve,scale,(int)(v1->dim)); } else { tracecatch(out = sv_mlt(scale,v2,out),"v_mltadd"); out = v_add(v1,out,out); } /************************************************************ out_ve = out->ve; v1_ve = v1->ve; v2_ve = v2->ve; for ( i=0; i < dim ; i++ ) out->ve[i] = v1->ve[i] + scale*v2->ve[i]; (*out_ve++) = (*v1_ve++) + scale*(*v2_ve++); ************************************************************/ return (out); }
/**********************Normalize vector******************************* ********************************************************************/ void normalize_vec(VEC *vec) { double norm; norm = v_norm2(vec); if(norm!=0) { sv_mlt((1/norm), vec, vec); } }
//-------------------------------------------------------------------------- void Hqp_IpRedSpBKP::step(const Hqp_Program *qp, const VEC *z, const VEC *w, const VEC *r1, const VEC *r2, const VEC *r3, const VEC *r4, VEC *dx, VEC *dy, VEC *dz, VEC *dw) { VEC v; assert((int)r1->dim == _n && (int)dx->dim == _n); assert((int)r2->dim == _me && (int)dy->dim == _me); assert((int)r3->dim == _m && (int)dz->dim == _m); assert((int)r4->dim == _m && (int)dw->dim == _m); // augment, copy, scale and permutate [r1;r2;r3] into _r12 // calculate, permutate and scale x // augment r1 // temporary store (W^{-1}r_4 + ZW^{-1}r_3) in dz v_part(_r12, 0, _n, &v); v_slash(w, r4, dw); v_star(_zw, r3, dz); v_add(dw, dz, dz); sp_mv_mlt(_CT, dz, &v); v_sub(r1, &v, &v); v_star(&v, _scale, &v); v_copy(r2, v_part(_r12, _n, _me, &v)); px_vec(_J2QP, _r12, _r12); spBKPsolve(_J, _pivot, _r12, _xy); px_vec(_QP2J, _xy, _xy); v_star(v_part(_xy, 0, _n, &v), _scale, dx); v_copy(v_part(_xy, _n, _me, &v), dy); sp_vm_mlt(_CT, dx, dw); v_star(_zw, dw, dw); v_sub(dz, dw, dz); // calculate dw sv_mlt(-1.0, r3, dw); sp_mv_mltadd(dw, dx, qp->C, 1.0, dw); // usage of _CT is numerically worse! //sp_vm_mltadd(dw, dx, _CT, 1.0, dw); }
VEC *iter_mgcr(ITER *ip) #endif { STATIC VEC *As=VNULL, *beta=VNULL, *alpha=VNULL, *z=VNULL; STATIC MAT *N=MNULL, *H=MNULL; VEC *rr, v, s; /* additional pointer and structures */ Real nres; /* norm of a residual */ Real dd; /* coefficient d_i */ int i,j; int done; /* if TRUE then stop the iterative process */ int dim; /* dimension of the problem */ /* ip cannot be NULL */ if (ip == INULL) error(E_NULL,"mgcr"); /* Ax, b and stopping criterion must be given */ if (! ip->Ax || ! ip->b || ! ip->stop_crit) error(E_NULL,"mgcr"); /* at least one direction vector must exist */ if ( ip->k <= 0) error(E_BOUNDS,"mgcr"); /* if the vector x is given then b and x must have the same dimension */ if ( ip->x && ip->x->dim != ip->b->dim) error(E_SIZES,"mgcr"); if (ip->eps <= 0.0) ip->eps = MACHEPS; dim = ip->b->dim; As = v_resize(As,dim); alpha = v_resize(alpha,ip->k); beta = v_resize(beta,ip->k); MEM_STAT_REG(As,TYPE_VEC); MEM_STAT_REG(alpha,TYPE_VEC); MEM_STAT_REG(beta,TYPE_VEC); H = m_resize(H,ip->k,ip->k); N = m_resize(N,ip->k,dim); MEM_STAT_REG(H,TYPE_MAT); MEM_STAT_REG(N,TYPE_MAT); /* if a preconditioner is defined */ if (ip->Bx) { z = v_resize(z,dim); MEM_STAT_REG(z,TYPE_VEC); } /* if x is NULL then it is assumed that x has entries with value zero */ if ( ! ip->x ) { ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } /* v and s are additional pointers to rows of N */ /* they must have the same dimension as rows of N */ v.dim = v.max_dim = s.dim = s.max_dim = dim; done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { (*ip->Ax)(ip->A_par,ip->x,As); /* As = A*x */ v_sub(ip->b,As,As); /* As = b - A*x */ rr = As; /* rr is an additional pointer */ /* if a preconditioner is defined */ if (ip->Bx) { (*ip->Bx)(ip->B_par,As,z); /* z = B*(b-A*x) */ rr = z; } /* norm of the residual */ nres = v_norm2(rr); dd = nres; /* dd = ||r_i|| */ /* check if the norm of the residual is zero */ if (ip->steps == 0) { /* information for a user */ if (ip->info) (*ip->info)(ip,nres,As,rr); ip->init_res = fabs(nres); } if (nres == 0.0) { /* iterative process is finished */ done = TRUE; break; } /* save this residual in the first row of N */ v.ve = N->me[0]; v_copy(rr,&v); for (i = 0; i < ip->k && ip->steps < ip->limit; i++) { ip->steps++; v.ve = N->me[i]; /* pointer to a row of N (=s_i) */ /* note that we must use here &v, not v */ (*ip->Ax)(ip->A_par,&v,As); rr = As; /* As = A*s_i */ if (ip->Bx) { (*ip->Bx)(ip->B_par,As,z); /* z = B*A*s_i */ rr = z; } if (i < ip->k - 1) { s.ve = N->me[i+1]; /* pointer to a row of N (=s_{i+1}) */ v_copy(rr,&s); /* s_{i+1} = B*A*s_i */ for (j = 0; j <= i-1; j++) { v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ /* beta->ve[j] = in_prod(&v,rr); */ /* beta_{j,i} */ /* modified Gram-Schmidt algorithm */ beta->ve[j] = in_prod(&v,&s); /* beta_{j,i} */ /* s_{i+1} -= beta_{j,i}*s_{j+1} */ v_mltadd(&s,&v,- beta->ve[j],&s); } /* beta_{i,i} = ||s_{i+1}||_2 */ beta->ve[i] = nres = v_norm2(&s); if ( nres <= MACHEPS*ip->init_res) { /* s_{i+1} == 0 */ i--; done = TRUE; break; } sv_mlt(1.0/nres,&s,&s); /* normalize s_{i+1} */ v.ve = N->me[0]; alpha->ve[i] = in_prod(&v,&s); /* alpha_i = (s_0 , s_{i+1}) */ } else { for (j = 0; j <= i-1; j++) { v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ beta->ve[j] = in_prod(&v,rr); /* beta_{j,i} */ } nres = in_prod(rr,rr); /* rr = B*A*s_{k-1} */ for (j = 0; j <= i-1; j++) nres -= beta->ve[j]*beta->ve[j]; if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { /* s_k is zero */ i--; done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } beta->ve[i] = sqrt(nres); /* beta_{k-1,k-1} */ v.ve = N->me[0]; alpha->ve[i] = in_prod(&v,rr); for (j = 0; j <= i-1; j++) alpha->ve[i] -= beta->ve[j]*alpha->ve[j]; alpha->ve[i] /= beta->ve[i]; /* alpha_{k-1} */ } set_col(H,i,beta); /* other method of computing dd */ /* if (fabs((double)alpha->ve[i]) > dd) { nres = - dd*dd + alpha->ve[i]*alpha->ve[i]; nres = sqrt((double) nres); if (ip->info) (*ip->info)(ip,-nres,VNULL,VNULL); break; } */ /* to avoid overflow/underflow in computing dd */ /* dd *= cos(asin((double)(alpha->ve[i]/dd))); */ nres = alpha->ve[i]/dd; if (fabs(nres-1.0) <= MACHEPS*ip->init_res) dd = 0.0; else { nres = 1.0 - nres*nres; if (nres < 0.0) { nres = sqrt((double) -nres); if (ip->info) (*ip->info)(ip,-dd*nres,VNULL,VNULL); break; } dd *= sqrt((double) nres); } if (ip->info) (*ip->info)(ip,dd,VNULL,VNULL); if ( ip->stop_crit(ip,dd,VNULL,VNULL) ) { /* stopping criterion is satisfied */ done = TRUE; break; } } /* end of for */ if (i >= ip->k) i = ip->k - 1; /* use (i+1) by (i+1) submatrix of H */ H = m_resize(H,i+1,i+1); alpha = v_resize(alpha,i+1); Usolve(H,alpha,alpha,0.0); /* c_i is saved in alpha */ for (j = 0; j <= i; j++) { v.ve = N->me[j]; v_mltadd(ip->x,&v,alpha->ve[j],ip->x); } if (done) break; /* stop the iterative process */ alpha = v_resize(alpha,ip->k); H = m_resize(H,ip->k,ip->k); } /* end of while */ #ifdef THREADSAFE V_FREE(As); V_FREE(beta); V_FREE(alpha); V_FREE(z); M_FREE(N); M_FREE(H); #endif return ip->x; /* return the solution */ }
VEC *iter_gmres(ITER *ip) #endif { STATIC VEC *u=VNULL, *r=VNULL, *rhs = VNULL; STATIC VEC *givs=VNULL, *givc=VNULL, *z = VNULL; STATIC MAT *Q = MNULL, *R = MNULL; VEC *rr, v, v1; /* additional pointers (not real vectors) */ int i,j, done; Real nres; /* Real last_h; */ if (ip == INULL) error(E_NULL,"iter_gmres"); if ( ! ip->Ax || ! ip->b ) error(E_NULL,"iter_gmres"); if ( ! ip->stop_crit ) error(E_NULL,"iter_gmres"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_gmres"); if (ip->x != VNULL && ip->x->dim != ip->b->dim) error(E_SIZES,"iter_gmres"); if (ip->eps <= 0.0) ip->eps = MACHEPS; r = v_resize(r,ip->k+1); u = v_resize(u,ip->b->dim); rhs = v_resize(rhs,ip->k+1); givs = v_resize(givs,ip->k); /* Givens rotations */ givc = v_resize(givc,ip->k); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(rhs,TYPE_VEC); MEM_STAT_REG(givs,TYPE_VEC); MEM_STAT_REG(givc,TYPE_VEC); R = m_resize(R,ip->k+1,ip->k); Q = m_resize(Q,ip->k,ip->b->dim); MEM_STAT_REG(R,TYPE_MAT); MEM_STAT_REG(Q,TYPE_MAT); if (ip->x == VNULL) { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } v.dim = v.max_dim = ip->b->dim; /* v and v1 are pointers to rows */ v1.dim = v1.max_dim = ip->b->dim; /* of matrix Q */ if (ip->Bx != (Fun_Ax)NULL) { /* if precondition is defined */ z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); } done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { /* restart */ ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ v_sub(ip->b,u,u); /* u = b - A*x */ rr = u; /* rr is a pointer only */ if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); /* tmp = B*(b-A*x) */ rr = z; } nres = v_norm2(rr); if (ip->steps == 0) { if (ip->info) ip->info(ip,nres,VNULL,VNULL); ip->init_res = nres; } if ( nres == 0.0 ) { done = TRUE; break; } v.ve = Q->me[0]; sv_mlt(1.0/nres,rr,&v); v_zero(r); v_zero(rhs); rhs->ve[0] = nres; for ( i = 0; i < ip->k && ip->steps < ip->limit; i++ ) { ip->steps++; v.ve = Q->me[i]; (ip->Ax)(ip->A_par,&v,u); rr = u; if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); rr = z; } if (i < ip->k - 1) { v1.ve = Q->me[i+1]; v_copy(rr,&v1); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* r->ve[j] = in_prod(&v,rr); */ /* modified Gram-Schmidt algorithm */ r->ve[j] = in_prod(&v,&v1); v_mltadd(&v1,&v,-r->ve[j],&v1); } r->ve[i+1] = nres = v_norm2(&v1); if (nres <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } sv_mlt(1.0/nres,&v1,&v1); } else { /* i == ip->k - 1 */ /* Q->me[ip->k] need not be computed */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; r->ve[j] = in_prod(&v,rr); } nres = in_prod(rr,rr) - in_prod(r,r); if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } r->ve[i+1] = sqrt(nres); } /* QR update */ /* last_h = r->ve[i+1]; */ /* for test only */ for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); givens(r->ve[i],r->ve[i+1],&givc->ve[i],&givs->ve[i]); rot_vec(r,i,i+1,givc->ve[i],givs->ve[i],r); rot_vec(rhs,i,i+1,givc->ve[i],givs->ve[i],rhs); set_col(R,i,r); nres = fabs((double) rhs->ve[i+1]); if (ip->info) ip->info(ip,nres,VNULL,VNULL); if ( ip->stop_crit(ip,nres,VNULL,VNULL) ) { done = TRUE; break; } } /* use ixi submatrix of R */ if (i >= ip->k) i = ip->k - 1; R = m_resize(R,i+1,i+1); rhs = v_resize(rhs,i+1); /* test only */ /* test_gmres(ip,i,Q,R,givc,givs,last_h); */ Usolve(R,rhs,rhs,0.0); /* solve a system: R*x = rhs */ /* new approximation */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; v_mltadd(ip->x,&v,rhs->ve[j],ip->x); } if (done) break; /* back to old dimensions */ rhs = v_resize(rhs,ip->k+1); R = m_resize(R,ip->k+1,ip->k); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(rhs); V_FREE(givs); V_FREE(givc); V_FREE(z); M_FREE(Q); M_FREE(R); #endif return ip->x; }
MAT *iter_arnoldi(ITER *ip, Real *h_rem, MAT *Q, MAT *H) #endif { STATIC VEC *u=VNULL, *r=VNULL; VEC v; /* auxiliary vector */ int i,j; Real h_val, c; if (ip == INULL) error(E_NULL,"iter_arnoldi"); if ( ! ip->Ax || ! Q || ! ip->x ) error(E_NULL,"iter_arnoldi"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_arnoldi"); if ( Q->n != ip->x->dim || Q->m != ip->k ) error(E_SIZES,"iter_arnoldi"); m_zero(Q); H = m_resize(H,ip->k,ip->k); m_zero(H); u = v_resize(u,ip->x->dim); r = v_resize(r,ip->k); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); v.dim = v.max_dim = ip->x->dim; c = v_norm2(ip->x); if ( c <= 0.0) return H; else { v.ve = Q->me[0]; sv_mlt(1.0/c,ip->x,&v); } v_zero(r); for ( i = 0; i < ip->k; i++ ) { v.ve = Q->me[i]; u = (ip->Ax)(ip->A_par,&v,u); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* modified Gram-Schmidt */ r->ve[j] = in_prod(&v,u); v_mltadd(u,&v,-r->ve[j],u); } h_val = v_norm2(u); /* if u == 0 then we have an exact subspace */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } set_col(H,i,r); if ( i == ip->k-1 ) { *h_rem = h_val; continue; } /* H->me[i+1][i] = h_val; */ m_set_val(H,i+1,i,h_val); v.ve = Q->me[i+1]; sv_mlt(1.0/h_val,u,&v); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); #endif return H; }
MAT *iter_arnoldi_iref(ITER *ip, Real *h_rem, MAT *Q, MAT *H) #endif { STATIC VEC *u=VNULL, *r=VNULL, *s=VNULL, *tmp=VNULL; VEC v; /* auxiliary vector */ int i,j; Real h_val, c; if (ip == INULL) error(E_NULL,"iter_arnoldi_iref"); if ( ! ip->Ax || ! Q || ! ip->x ) error(E_NULL,"iter_arnoldi_iref"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_arnoldi_iref"); if ( Q->n != ip->x->dim || Q->m != ip->k ) error(E_SIZES,"iter_arnoldi_iref"); m_zero(Q); H = m_resize(H,ip->k,ip->k); m_zero(H); u = v_resize(u,ip->x->dim); r = v_resize(r,ip->k); s = v_resize(s,ip->k); tmp = v_resize(tmp,ip->x->dim); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(s,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); v.dim = v.max_dim = ip->x->dim; c = v_norm2(ip->x); if ( c <= 0.0) return H; else { v.ve = Q->me[0]; sv_mlt(1.0/c,ip->x,&v); } v_zero(r); v_zero(s); for ( i = 0; i < ip->k; i++ ) { v.ve = Q->me[i]; u = (ip->Ax)(ip->A_par,&v,u); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* modified Gram-Schmidt */ r->ve[j] = in_prod(&v,u); v_mltadd(u,&v,-r->ve[j],u); } h_val = v_norm2(u); /* if u == 0 then we have an exact subspace */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } /* iterative refinement -- ensures near orthogonality */ do { v_zero(tmp); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; s->ve[j] = in_prod(&v,u); v_mltadd(tmp,&v,s->ve[j],tmp); } v_sub(u,tmp,u); v_add(r,s,r); } while ( v_norm2(s) > 0.1*(h_val = v_norm2(u)) ); /* now that u is nearly orthogonal to Q, update H */ set_col(H,i,r); /* check once again if h_val is zero */ if ( h_val <= 0.0 ) { *h_rem = h_val; return H; } if ( i == ip->k-1 ) { *h_rem = h_val; continue; } /* H->me[i+1][i] = h_val; */ m_set_val(H,i+1,i,h_val); v.ve = Q->me[i+1]; sv_mlt(1.0/h_val,u,&v); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(s); V_FREE(tmp); #endif return H; }
VEC *iter_lsqr(ITER *ip) #endif { STATIC VEC *u = VNULL, *v = VNULL, *w = VNULL, *tmp = VNULL; Real alpha, beta, phi, phi_bar; Real rho, rho_bar, rho_max, theta, nres; Real s, c; /* for Givens' rotations */ int m, n; if ( ! ip || ! ip->b || !ip->Ax || !ip->ATx ) error(E_NULL,"iter_lsqr"); if ( ip->x == ip->b ) error(E_INSITU,"iter_lsqr"); if (!ip->stop_crit || !ip->x) error(E_NULL,"iter_lsqr"); if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; m = ip->b->dim; n = ip->x->dim; u = v_resize(u,(unsigned int)m); v = v_resize(v,(unsigned int)n); w = v_resize(w,(unsigned int)n); tmp = v_resize(tmp,(unsigned int)n); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(v,TYPE_VEC); MEM_STAT_REG(w,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); if (ip->x != VNULL) { ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ v_sub(ip->b,u,u); /* u = b-A*x */ } else { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; v_copy(ip->b,u); /* u = b */ } beta = v_norm2(u); if ( beta == 0.0 ) return ip->x; sv_mlt(1.0/beta,u,u); (ip->ATx)(ip->AT_par,u,v); alpha = v_norm2(v); if ( alpha == 0.0 ) return ip->x; sv_mlt(1.0/alpha,v,v); v_copy(v,w); phi_bar = beta; rho_bar = alpha; rho_max = 1.0; for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) { tmp = v_resize(tmp,m); (ip->Ax)(ip->A_par,v,tmp); v_mltadd(tmp,u,-alpha,u); beta = v_norm2(u); sv_mlt(1.0/beta,u,u); tmp = v_resize(tmp,n); (ip->ATx)(ip->AT_par,u,tmp); v_mltadd(tmp,v,-beta,v); alpha = v_norm2(v); sv_mlt(1.0/alpha,v,v); rho = sqrt(rho_bar*rho_bar+beta*beta); if ( rho > rho_max ) rho_max = rho; c = rho_bar/rho; s = beta/rho; theta = s*alpha; rho_bar = -c*alpha; phi = c*phi_bar; phi_bar = s*phi_bar; /* update ip->x & w */ if ( rho == 0.0 ) error(E_BREAKDOWN,"iter_lsqr"); v_mltadd(ip->x,w,phi/rho,ip->x); v_mltadd(v,w,-theta/rho,w); nres = fabs(phi_bar*alpha*c)*rho_max; if (ip->info) ip->info(ip,nres,w,VNULL); if (ip->steps == 0) ip->init_res = nres; if ( ip->stop_crit(ip,nres,w,VNULL) ) break; } #ifdef THREADSAFE V_FREE(u); V_FREE(v); V_FREE(w); V_FREE(tmp); #endif return ip->x; }
void Sy_m( VEC *x, VEC *Torq_ext, VEC *out) { static MAT *iI = {MNULL}; MAT *sk_om = {MNULL}; VEC *q, *w, *q_dot, *w_dot, *v_res1, *v_res2; int i; if ( x == VNULL || out == VNULL ){ error(E_NULL,"f"); } if ( x->dim != 13 || out->dim != 13){ error(E_SIZES,"f"); } q = v_get(4); for (i=0; i<4; i++) { q->ve[i] = x->ve[i]; } w = v_get(3); for (i=4; i<7; i++) { w->ve[i-4] = x->ve[i]; } v_res1 = v_get(3); v_zero(v_res1); v_res2 = v_get(3); v_zero(v_res2); q_dot = v_get(4); v_zero(q_dot); w_dot = v_get(3); v_zero(w_dot); if(iI == MNULL){ iI = m_get(3,3); m_zero(iI); } sk_om = m_get(4,4); skew_mat(sk_om, w); mv_mlt(sk_om, q, q_dot); sv_mlt(0.5, q_dot, q); for (i=0; i<4; i++) { out->ve[i] = q->ve[i]; } /*****wd********/ m_resize(sk_om, 3, 3); mv_mlt(inertia, w, v_res1); mv_mlt(sk_om, v_res1, v_res2); v_sub(Torq_ext, v_res2, v_res1); m_inverse(inertia,iI); mv_mlt(iI, v_res1, w_dot); for (i=4; i<7; i++) { out->ve[i] = w_dot->ve[i-4]; } for (i=7; i<13; i++) { out->ve[i] = 0; } M_FREE(sk_om); //M_FREE(iI); V_FREE(q); V_FREE(w); V_FREE(q_dot); V_FREE(w_dot); V_FREE(v_res1); V_FREE(v_res2); }
void iter_lanczos(ITER *ip, VEC *a, VEC *b, Real *beta2, MAT *Q) #endif { int j; STATIC VEC *v = VNULL, *w = VNULL, *tmp = VNULL; Real alpha, beta, c; if ( ! ip ) error(E_NULL,"iter_lanczos"); if ( ! ip->Ax || ! ip->x || ! a || ! b ) error(E_NULL,"iter_lanczos"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_lanczos"); if ( Q && ( Q->n < ip->x->dim || Q->m < ip->k ) ) error(E_SIZES,"iter_lanczos"); a = v_resize(a,(unsigned int)ip->k); b = v_resize(b,(unsigned int)(ip->k-1)); v = v_resize(v,ip->x->dim); w = v_resize(w,ip->x->dim); tmp = v_resize(tmp,ip->x->dim); MEM_STAT_REG(v,TYPE_VEC); MEM_STAT_REG(w,TYPE_VEC); MEM_STAT_REG(tmp,TYPE_VEC); beta = 1.0; v_zero(a); v_zero(b); if (Q) m_zero(Q); /* normalise x as w */ c = v_norm2(ip->x); if (c <= MACHEPS) { /* ip->x == 0 */ *beta2 = 0.0; return; } else sv_mlt(1.0/c,ip->x,w); (ip->Ax)(ip->A_par,w,v); for ( j = 0; j < ip->k; j++ ) { /* store w in Q if Q not NULL */ if ( Q ) set_row(Q,j,w); alpha = in_prod(w,v); a->ve[j] = alpha; v_mltadd(v,w,-alpha,v); beta = v_norm2(v); if ( beta == 0.0 ) { *beta2 = 0.0; return; } if ( j < ip->k-1 ) b->ve[j] = beta; v_copy(w,tmp); sv_mlt(1/beta,v,w); sv_mlt(-beta,tmp,v); (ip->Ax)(ip->A_par,w,tmp); v_add(v,tmp,v); } *beta2 = beta; #ifdef THREADSAFE V_FREE(v); V_FREE(w); V_FREE(tmp); #endif }
static int fit_GaussNewton(VARIOGRAM *vp, PERM *p, LM *lm, int iter, int *bounded) { double s = 0.0, x, y, z; int i, j, n_fit, model, fit_ranges = 0; IVEC *fit = NULL; VEC *start = NULL; if (p->size == 0) return 1; fit = iv_resize(fit, 2 * vp->n_models); /* index fit parameters: parameter fit->ive[j] corresponds to model i */ for (i = n_fit = 0; i < vp->n_models; i++) { if (vp->part[i].fit_sill) fit->ive[n_fit++] = i; if (vp->part[i].fit_range) { fit->ive[n_fit++] = i + vp->n_models; /* large -->> ranges */ fit_ranges = 1; } } if (n_fit == 0) { iv_free(fit); return 0; } fit = iv_resize(fit, n_fit); /* shrink to fit */ lm->X = m_resize(lm->X, p->size, n_fit); lm->y = v_resize(lm->y, p->size); start = v_resize(start, n_fit); for (i = 0; i < n_fit; i++) { if (fit->ive[i] < vp->n_models) { model = fit->ive[i]; start->ve[i] = vp->part[model].sill; } else { model = fit->ive[i] - vp->n_models; start->ve[i] = vp->part[model].range[0]; } } for (i = 0; i < p->size; i++) { x = vp->ev->direction.x * vp->ev->dist[p->pe[i]]; y = vp->ev->direction.y * vp->ev->dist[p->pe[i]]; z = vp->ev->direction.z * vp->ev->dist[p->pe[i]]; /* fill y with current residuals: */ if (is_variogram(vp)) s = get_semivariance(vp, x, y, z); else s = get_covariance(vp, x, y, z); lm->y->ve[i] = vp->ev->gamma[p->pe[i]] - s; /* fill X: */ for (j = 0; j < n_fit; j++) { /* cols */ if (fit->ive[j] < vp->n_models) { model = fit->ive[j]; ME(lm->X, i, j) = (is_variogram(vp) ? UnitSemivariance(vp->part[model],x,y,z) : UnitCovariance(vp->part[model],x,y,z)); } else { model = fit->ive[j] - vp->n_models; ME(lm->X, i, j) = (is_variogram(vp) ? da_Semivariance(vp->part[model],x,y,z) : -da_Semivariance(vp->part[model],x,y,z)); } } } if (iter == 0 && fill_weights(vp, p, lm)) { iv_free(fit); v_free(start); return 1; } lm->has_intercept = 1; /* does not affect the fit */ lm = calc_lm(lm); /* solve WLS eqs. for beta */ if (DEBUG_FIT) { Rprintf("beta: "); v_logoutput(lm->beta); } if (lm->is_singular) { iv_free(fit); v_free(start); return 1; } if (fit_ranges) { s = v_norm2(lm->beta) / v_norm2(start); if (s > 0.2) { /* don't allow steps > 20% ---- */ sv_mlt(0.2 / s, lm->beta, lm->beta); *bounded = 1; } else *bounded = 0; /* a `free', voluntary step */ } else /* we're basically doing linear regression here: */ *bounded = 0; for (i = 0; i < n_fit; i++) { if (fit->ive[i] < vp->n_models) { model = fit->ive[i]; vp->part[model].sill = start->ve[i] + lm->beta->ve[i]; } else { model = fit->ive[i] - vp->n_models;; vp->part[model].range[0] = start->ve[i] + lm->beta->ve[i]; } } iv_free(fit); v_free(start); return 0; }
double QRcondest(const MAT *QR) #endif { STATIC VEC *y=VNULL; Real norm1, norm2, sum, tmp1, tmp2; int i, j, limit; if ( QR == MNULL ) error(E_NULL,"QRcondest"); limit = min(QR->m,QR->n); for ( i = 0; i < limit; i++ ) if ( QR->me[i][i] == 0.0 ) return HUGE_VAL; y = v_resize(y,limit); MEM_STAT_REG(y,TYPE_VEC); /* use the trick for getting a unit vector y with ||R.y||_inf small from the LU condition estimator */ for ( i = 0; i < limit; i++ ) { sum = 0.0; for ( j = 0; j < i; j++ ) sum -= QR->me[j][i]*y->ve[j]; sum -= (sum < 0.0) ? 1.0 : -1.0; y->ve[i] = sum / QR->me[i][i]; } UTmlt(QR,y,y); /* now apply inverse power method to R^T.R */ for ( i = 0; i < 3; i++ ) { tmp1 = v_norm2(y); sv_mlt(1/tmp1,y,y); UTsolve(QR,y,y,0.0); tmp2 = v_norm2(y); sv_mlt(1/v_norm2(y),y,y); Usolve(QR,y,y,0.0); } /* now compute approximation for ||R^{-1}||_2 */ norm1 = sqrt(tmp1)*sqrt(tmp2); /* now use complementary approach to compute approximation to ||R||_2 */ for ( i = limit-1; i >= 0; i-- ) { sum = 0.0; for ( j = i+1; j < limit; j++ ) sum += QR->me[i][j]*y->ve[j]; y->ve[i] = (sum >= 0.0) ? 1.0 : -1.0; y->ve[i] = (QR->me[i][i] >= 0.0) ? y->ve[i] : - y->ve[i]; } /* now apply power method to R^T.R */ for ( i = 0; i < 3; i++ ) { tmp1 = v_norm2(y); sv_mlt(1/tmp1,y,y); Umlt(QR,y,y); tmp2 = v_norm2(y); sv_mlt(1/tmp2,y,y); UTmlt(QR,y,y); } norm2 = sqrt(tmp1)*sqrt(tmp2); /* printf("QRcondest: norm1 = %g, norm2 = %g\n",norm1,norm2); */ #ifdef THREADSAFE V_FREE(y); #endif return norm1*norm2; }