static void gr_transform(SCF *scf, int k, double un[]) { int n = scf->n; double *f = scf->f; double *u = scf->u; int j, k1, kj, kk, n1, nj; double c, s; xassert(1 <= k && k <= n); /* main elimination loop */ for (k = k; k < n; k++) { /* determine location of U[k,k] */ kk = u_loc(scf, k, k); /* determine location of F[k,1] */ k1 = f_loc(scf, k, 1); /* determine location of F[n,1] */ n1 = f_loc(scf, n, 1); /* if both U[k,k] and U[n,k] are too small in the magnitude, replace them by exact zero */ if (fabs(u[kk]) < eps && fabs(un[k]) < eps) u[kk] = un[k] = 0.0; /* if U[n,k] is already zero, elimination is not needed */ if (un[k] == 0.0) continue; /* compute the parameters of Givens plane rotation */ givens(u[kk], un[k], &c, &s); /* apply Givens rotation to k-th and n-th rows of matrix U */ for (j = k, kj = kk; j <= n; j++, kj++) { double ukj = u[kj], unj = un[j]; u[kj] = c * ukj - s * unj; un[j] = s * ukj + c * unj; } /* apply Givens rotation to k-th and n-th rows of matrix F to keep the main equality F * C = U * P */ for (j = 1, kj = k1, nj = n1; j <= n; j++, kj++, nj++) { double fkj = f[kj], fnj = f[nj]; f[kj] = c * fkj - s * fnj; f[nj] = s * fkj + c * fnj; } } /* if U[n,n] is too small in the magnitude, replace it by exact zero */ if (fabs(un[n]) < eps) un[n] = 0.0; /* store U[n,n] in a proper location */ u[u_loc(scf, n, n)] = un[n]; return; }
void givensReduce(dense_bilinear &UH, dense_bilinear & rots){ fpp x1, x2, c, s; x1 = 0.0; x2 = 0.0; c = 0.0; s = 0.0; int i, j; i = 0; j = 0; if ((UH.get_rows() != (rots.get_rows() + 1)) || (rots.get_cols() != 2)) { std::cerr << "Givens reduce incompatibility! UH:(" << UH.get_rows() << "," << UH.get_cols() << "), rots:(" << rots.get_rows() << "," << rots.get_cols() << ")." << std::endl; exit(-1); } for(i = 0; i < (UH.get_rows() - 1); i++){ givens(UH(i,i), UH(i+1,i), c, s); rots(i,0) = c; rots(i,1) = s; for(j = i; j < UH.get_cols(); j++){ x1 = UH(i,j); x2 = UH(i+1,j); UH(i,j) = c*x1 - s*x2; UH(i+1,j) = s*x1 + c*x2; } } }
VEC *iter_gmres(ITER *ip) #endif { STATIC VEC *u=VNULL, *r=VNULL, *rhs = VNULL; STATIC VEC *givs=VNULL, *givc=VNULL, *z = VNULL; STATIC MAT *Q = MNULL, *R = MNULL; VEC *rr, v, v1; /* additional pointers (not real vectors) */ int i,j, done; Real nres; /* Real last_h; */ if (ip == INULL) error(E_NULL,"iter_gmres"); if ( ! ip->Ax || ! ip->b ) error(E_NULL,"iter_gmres"); if ( ! ip->stop_crit ) error(E_NULL,"iter_gmres"); if ( ip->k <= 0 ) error(E_BOUNDS,"iter_gmres"); if (ip->x != VNULL && ip->x->dim != ip->b->dim) error(E_SIZES,"iter_gmres"); if (ip->eps <= 0.0) ip->eps = MACHEPS; r = v_resize(r,ip->k+1); u = v_resize(u,ip->b->dim); rhs = v_resize(rhs,ip->k+1); givs = v_resize(givs,ip->k); /* Givens rotations */ givc = v_resize(givc,ip->k); MEM_STAT_REG(r,TYPE_VEC); MEM_STAT_REG(u,TYPE_VEC); MEM_STAT_REG(rhs,TYPE_VEC); MEM_STAT_REG(givs,TYPE_VEC); MEM_STAT_REG(givc,TYPE_VEC); R = m_resize(R,ip->k+1,ip->k); Q = m_resize(Q,ip->k,ip->b->dim); MEM_STAT_REG(R,TYPE_MAT); MEM_STAT_REG(Q,TYPE_MAT); if (ip->x == VNULL) { /* ip->x == 0 */ ip->x = v_get(ip->b->dim); ip->shared_x = FALSE; } v.dim = v.max_dim = ip->b->dim; /* v and v1 are pointers to rows */ v1.dim = v1.max_dim = ip->b->dim; /* of matrix Q */ if (ip->Bx != (Fun_Ax)NULL) { /* if precondition is defined */ z = v_resize(z,ip->b->dim); MEM_STAT_REG(z,TYPE_VEC); } done = FALSE; for (ip->steps = 0; ip->steps < ip->limit; ) { /* restart */ ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ v_sub(ip->b,u,u); /* u = b - A*x */ rr = u; /* rr is a pointer only */ if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); /* tmp = B*(b-A*x) */ rr = z; } nres = v_norm2(rr); if (ip->steps == 0) { if (ip->info) ip->info(ip,nres,VNULL,VNULL); ip->init_res = nres; } if ( nres == 0.0 ) { done = TRUE; break; } v.ve = Q->me[0]; sv_mlt(1.0/nres,rr,&v); v_zero(r); v_zero(rhs); rhs->ve[0] = nres; for ( i = 0; i < ip->k && ip->steps < ip->limit; i++ ) { ip->steps++; v.ve = Q->me[i]; (ip->Ax)(ip->A_par,&v,u); rr = u; if (ip->Bx) { (ip->Bx)(ip->B_par,u,z); rr = z; } if (i < ip->k - 1) { v1.ve = Q->me[i+1]; v_copy(rr,&v1); for (j = 0; j <= i; j++) { v.ve = Q->me[j]; /* r->ve[j] = in_prod(&v,rr); */ /* modified Gram-Schmidt algorithm */ r->ve[j] = in_prod(&v,&v1); v_mltadd(&v1,&v,-r->ve[j],&v1); } r->ve[i+1] = nres = v_norm2(&v1); if (nres <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } sv_mlt(1.0/nres,&v1,&v1); } else { /* i == ip->k - 1 */ /* Q->me[ip->k] need not be computed */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; r->ve[j] = in_prod(&v,rr); } nres = in_prod(rr,rr) - in_prod(r,r); if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); set_col(R,i,r); done = TRUE; break; } if (nres < 0.0) { /* do restart */ i--; ip->steps--; break; } r->ve[i+1] = sqrt(nres); } /* QR update */ /* last_h = r->ve[i+1]; */ /* for test only */ for (j = 0; j < i; j++) rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); givens(r->ve[i],r->ve[i+1],&givc->ve[i],&givs->ve[i]); rot_vec(r,i,i+1,givc->ve[i],givs->ve[i],r); rot_vec(rhs,i,i+1,givc->ve[i],givs->ve[i],rhs); set_col(R,i,r); nres = fabs((double) rhs->ve[i+1]); if (ip->info) ip->info(ip,nres,VNULL,VNULL); if ( ip->stop_crit(ip,nres,VNULL,VNULL) ) { done = TRUE; break; } } /* use ixi submatrix of R */ if (i >= ip->k) i = ip->k - 1; R = m_resize(R,i+1,i+1); rhs = v_resize(rhs,i+1); /* test only */ /* test_gmres(ip,i,Q,R,givc,givs,last_h); */ Usolve(R,rhs,rhs,0.0); /* solve a system: R*x = rhs */ /* new approximation */ for (j = 0; j <= i; j++) { v.ve = Q->me[j]; v_mltadd(ip->x,&v,rhs->ve[j],ip->x); } if (done) break; /* back to old dimensions */ rhs = v_resize(rhs,ip->k+1); R = m_resize(R,ip->k+1,ip->k); } #ifdef THREADSAFE V_FREE(u); V_FREE(r); V_FREE(rhs); V_FREE(givs); V_FREE(givc); V_FREE(z); M_FREE(Q); M_FREE(R); #endif return ip->x; }
void SparseMatrix::apply_givens(int row, int col, double* c_givens, double* s_givens) { requireDebug(row>=0 && row<_num_rows && col>=0 && col<_num_cols, "SparseMatrix::apply_givens: index outside matrix."); requireDebug(row>col, "SparseMatrix::apply_givens: can only zero entries below the diagonal."); const SparseVector& row_top = *_rows[col]; const SparseVector& row_bot = *_rows[row]; double a = row_top(col); double b = row_bot(col); double c, s; givens(a, b, c, s); if (c_givens) *c_givens = c; if (s_givens) *s_givens = s; int n = row_bot.nnz() + row_top.nnz(); SparseVector_p new_row_top = new SparseVector(n); SparseVector_p new_row_bot = new SparseVector(n); SparseVectorIter iter_top(row_top); SparseVectorIter iter_bot(row_bot); bool top_valid = iter_top.valid(); bool bot_valid = iter_bot.valid(); while (top_valid || bot_valid) { double val_top = 0.; double val_bot = 0.; int idx_top = (top_valid)?iter_top.get(val_top):-1; int idx_bot = (bot_valid)?iter_bot.get(val_bot):-1; int idx; if (idx_bot<0) { idx = idx_top; } else if (idx_top<0) { idx = idx_bot; } else { idx = min(idx_top, idx_bot); } if (top_valid) { if (idx==idx_top) { iter_top.next(); } else { val_top = 0.; } } if (bot_valid) { if (idx==idx_bot) { iter_bot.next(); } else { val_bot = 0.; } } double new_val_top = c*val_top - s*val_bot; double new_val_bot = s*val_top + c*val_bot; // remove numerically zero values to keep sparsity if (fabs(new_val_top) >= NUMERICAL_ZERO) { // append for O(1) operation - even O(log n) is too // slow here, because this is called extremely often! new_row_top->append(idx, new_val_top); } if (fabs(new_val_bot) >= NUMERICAL_ZERO) { new_row_bot->append(idx, new_val_bot); } top_valid = iter_top.valid(); bot_valid = iter_bot.valid(); } delete _rows[col]; delete _rows[row]; _rows[col] = new_row_top; _rows[row] = new_row_bot; _rows[row]->remove(col); // by definition, this entry is exactly 0 }
void eqnsys<nr_type_t>::diagonalize_svd (void) { bool split; int i, l, j, its, k, n, MaxIters = 30; nr_double_t an, f, g, h, d, c, s, b, a; // find largest bidiagonal value for (an = 0, i = 0; i < N; i++) an = MAX (an, fabs (S_(i)) + fabs (E_(i))); // diagonalize the bidiagonal matrix (stored as super-diagonal // vector E and diagonal vector S) for (k = N - 1; k >= 0; k--) { // loop over singular values for (its = 0; its <= MaxIters; its++) { split = true; // check for a zero entry along the super-diagonal E, if there // is one, it is possible to QR iterate on two separate matrices // above and below it for (n = 0, l = k; l >= 1; l--) { // note that E_(0) is always zero n = l - 1; if (fabs (E_(l)) + an == an) { split = false; break; } if (fabs (S_(n)) + an == an) break; } // if there is a zero on the diagonal S, it is possible to zero // out the corresponding super-diagonal E entry to its right if (split) { // cancellation of E_(l), if l > 0 c = 0.0; s = 1.0; for (i = l; i <= k; i++) { f = -s * E_(i); E_(i) *= c; if (fabs (f) + an == an) break; g = S_(i); S_(i) = givens (f, g, c, s); // apply inverse rotation to U givens_apply_u (n, i, c, s); } } d = S_(k); // convergence if (l == k) { // singular value is made non-negative if (d < 0.0) { S_(k) = -d; for (j = 0; j < N; j++) V_(k, j) = -V_(k, j); } break; } if (its == MaxIters) { logprint (LOG_ERROR, "WARNING: no convergence in %d SVD iterations\n", MaxIters); } // shift from bottom 2-by-2 minor a = S_(l); n = k - 1; b = S_(n); g = E_(n); h = E_(k); // compute QR shift value (as close as possible to the largest // eigenvalue of the 2-by-2 minor matrix) f = (b - d) * (b + d) + (g - h) * (g + h); f /= 2.0 * h * b; f += sign_(f) * xhypot (f, 1.0); f = ((a - d) * (a + d) + h * (b / f - h)) / a; // f => (B_{ll}^2 - u) / B_{ll} // u => eigenvalue of T = B' * B nearer T_{22} (Wilkinson shift) // next QR transformation c = s = 1.0; for (j = l; j <= n; j++) { i = j + 1; g = E_(i); b = S_(i); h = s * g; // h => right-hand non-zero to annihilate g *= c; E_(j) = givens (f, h, c, s); // perform the rotation f = a * c + g * s; g = g * c - a * s; h = b * s; b *= c; // here: +- -+ // | f g | = B * V'_j (also first V'_1) // | h b | // +- -+ // accumulate the rotation in V' givens_apply_v (j, i, c, s); d = S_(j) = xhypot (f, h); // rotation can be arbitrary if d = 0 if (d != 0.0) { // d => non-zero result on diagonal d = 1.0 / d; // rotation coefficients to annihilate the lower non-zero c = f * d; s = h * d; } f = c * g + s * b; a = c * b - s * g; // here: +- -+ // | d f | => U_j * B // | 0 a | // +- -+ // accumulate rotation into U givens_apply_u (j, i, c, s); } E_(l) = 0; E_(k) = f; S_(k) = a; } } }
VEC *bisvd(VEC *d, VEC *f, MAT *U, MAT *V) #endif { int i, j, n; int i_min, i_max, split; Real c, s, shift, size, z; Real d_tmp, diff, t11, t12, t22, *d_ve, *f_ve; if ( ! d || ! f ) error(E_NULL,"bisvd"); if ( d->dim != f->dim + 1 ) error(E_SIZES,"bisvd"); n = d->dim; if ( ( U && U->n < n ) || ( V && V->m < n ) ) error(E_SIZES,"bisvd"); if ( ( U && U->m != U->n ) || ( V && V->m != V->n ) ) error(E_SQUARE,"bisvd"); if ( n == 1 ) { if ( d->ve[0] < 0.0 ) { d->ve[0] = - d->ve[0]; if ( U != MNULL ) sm_mlt(-1.0,U,U); } return d; } d_ve = d->ve; f_ve = f->ve; size = v_norm_inf(d) + v_norm_inf(f); i_min = 0; while ( i_min < n ) /* outer while loop */ { /* find i_max to suit; submatrix i_min..i_max should be irreducible */ i_max = n - 1; for ( i = i_min; i < n - 1; i++ ) if ( d_ve[i] == 0.0 || f_ve[i] == 0.0 ) { i_max = i; if ( f_ve[i] != 0.0 ) { /* have to ``chase'' f[i] element out of matrix */ z = f_ve[i]; f_ve[i] = 0.0; for ( j = i; j < n-1 && z != 0.0; j++ ) { givens(d_ve[j+1],z, &c, &s); s = -s; d_ve[j+1] = c*d_ve[j+1] - s*z; if ( j+1 < n-1 ) { z = s*f_ve[j+1]; f_ve[j+1] = c*f_ve[j+1]; } if ( U ) rot_rows(U,i,j+1,c,s,U); } } break; } if ( i_max <= i_min ) { i_min = i_max + 1; continue; } /* printf("bisvd: i_min = %d, i_max = %d\n",i_min,i_max); */ split = FALSE; while ( ! split ) { /* compute shift */ t11 = d_ve[i_max-1]*d_ve[i_max-1] + (i_max > i_min+1 ? f_ve[i_max-2]*f_ve[i_max-2] : 0.0); t12 = d_ve[i_max-1]*f_ve[i_max-1]; t22 = d_ve[i_max]*d_ve[i_max] + f_ve[i_max-1]*f_ve[i_max-1]; /* use e-val of [[t11,t12],[t12,t22]] matrix closest to t22 */ diff = (t11-t22)/2; shift = t22 - t12*t12/(diff + sgn(diff)*sqrt(diff*diff+t12*t12)); /* initial Givens' rotation */ givens(d_ve[i_min]*d_ve[i_min]-shift, d_ve[i_min]*f_ve[i_min], &c, &s); /* do initial Givens' rotations */ d_tmp = c*d_ve[i_min] + s*f_ve[i_min]; f_ve[i_min] = c*f_ve[i_min] - s*d_ve[i_min]; d_ve[i_min] = d_tmp; z = s*d_ve[i_min+1]; d_ve[i_min+1] = c*d_ve[i_min+1]; if ( V ) rot_rows(V,i_min,i_min+1,c,s,V); /* 2nd Givens' rotation */ givens(d_ve[i_min],z, &c, &s); d_ve[i_min] = c*d_ve[i_min] + s*z; d_tmp = c*d_ve[i_min+1] - s*f_ve[i_min]; f_ve[i_min] = s*d_ve[i_min+1] + c*f_ve[i_min]; d_ve[i_min+1] = d_tmp; if ( i_min+1 < i_max ) { z = s*f_ve[i_min+1]; f_ve[i_min+1] = c*f_ve[i_min+1]; } if ( U ) rot_rows(U,i_min,i_min+1,c,s,U); for ( i = i_min+1; i < i_max; i++ ) { /* get Givens' rotation for zeroing z */ givens(f_ve[i-1],z, &c, &s); f_ve[i-1] = c*f_ve[i-1] + s*z; d_tmp = c*d_ve[i] + s*f_ve[i]; f_ve[i] = c*f_ve[i] - s*d_ve[i]; d_ve[i] = d_tmp; z = s*d_ve[i+1]; d_ve[i+1] = c*d_ve[i+1]; if ( V ) rot_rows(V,i,i+1,c,s,V); /* get 2nd Givens' rotation */ givens(d_ve[i],z, &c, &s); d_ve[i] = c*d_ve[i] + s*z; d_tmp = c*d_ve[i+1] - s*f_ve[i]; f_ve[i] = c*f_ve[i] + s*d_ve[i+1]; d_ve[i+1] = d_tmp; if ( i+1 < i_max ) { z = s*f_ve[i+1]; f_ve[i+1] = c*f_ve[i+1]; } if ( U ) rot_rows(U,i,i+1,c,s,U); } /* should matrix be split? */ for ( i = i_min; i < i_max; i++ ) if ( fabs(f_ve[i]) < MACHEPS*(fabs(d_ve[i])+fabs(d_ve[i+1])) ) { split = TRUE; f_ve[i] = 0.0; } else if ( fabs(d_ve[i]) < MACHEPS*size ) { split = TRUE; d_ve[i] = 0.0; } /* printf("bisvd: d =\n"); v_output(d); */ /* printf("bisvd: f = \n"); v_output(f); */ } } fixsvd(d,U,V); return d; }