VEC *pxinv_vec(PERM *px, const VEC *x, VEC *out) { unsigned int i, size; if ( ! px || ! x ) error(E_NULL,"pxinv_vec"); if ( px->size > x->dim ) error(E_SIZES,"pxinv_vec"); if ( ! out || out->dim < x->dim ) out = v_resize(out,x->dim); size = px->size; if ( size == 0 ) return v_copy(x,out); if ( out != x ) { for ( i=0; i<size; i++ ) if ( px->pe[i] >= size ) error(E_BOUNDS,"pxinv_vec"); else out->ve[px->pe[i]] = x->ve[i]; } else { px_inv(px,px); px_vec(px,x,out); px_inv(px,px); } return out; }
VEC *pxinv_vec(PERM *px, const VEC *x, VEC *out) #endif { unsigned int i, size; if ( ! px || ! x ) error(E_NULL,"pxinv_vec"); if ( px->size > x->dim ) error(E_SIZES,"pxinv_vec"); /* if ( x == out ) error(E_INSITU,"pxinv_vec"); */ if ( ! out || out->dim < x->dim ) out = v_resize(out,x->dim); size = px->size; if ( size == 0 ) return v_copy(x,out); if ( out != x ) { for ( i=0; i<size; i++ ) if ( px->pe[i] >= size ) error(E_BOUNDS,"pxinv_vec"); else out->ve[px->pe[i]] = x->ve[i]; } else { /* in situ algorithm --- cheat's way out */ px_inv(px,px); px_vec(px,x,out); px_inv(px,px); } return out; }
//-------------------------------------------------------------------------- void Hqp_IpRedSpBKP::step(const Hqp_Program *qp, const VEC *z, const VEC *w, const VEC *r1, const VEC *r2, const VEC *r3, const VEC *r4, VEC *dx, VEC *dy, VEC *dz, VEC *dw) { VEC v; assert((int)r1->dim == _n && (int)dx->dim == _n); assert((int)r2->dim == _me && (int)dy->dim == _me); assert((int)r3->dim == _m && (int)dz->dim == _m); assert((int)r4->dim == _m && (int)dw->dim == _m); // augment, copy, scale and permutate [r1;r2;r3] into _r12 // calculate, permutate and scale x // augment r1 // temporary store (W^{-1}r_4 + ZW^{-1}r_3) in dz v_part(_r12, 0, _n, &v); v_slash(w, r4, dw); v_star(_zw, r3, dz); v_add(dw, dz, dz); sp_mv_mlt(_CT, dz, &v); v_sub(r1, &v, &v); v_star(&v, _scale, &v); v_copy(r2, v_part(_r12, _n, _me, &v)); px_vec(_J2QP, _r12, _r12); spBKPsolve(_J, _pivot, _r12, _xy); px_vec(_QP2J, _xy, _xy); v_star(v_part(_xy, 0, _n, &v), _scale, dx); v_copy(v_part(_xy, _n, _me, &v), dy); sp_vm_mlt(_CT, dx, dw); v_star(_zw, dw, dw); v_sub(dz, dw, dz); // calculate dw sv_mlt(-1.0, r3, dw); sp_mv_mltadd(dw, dx, qp->C, 1.0, dw); // usage of _CT is numerically worse! //sp_vm_mltadd(dw, dx, _CT, 1.0, dw); }
VEC *bdLUsolve(const BAND *bA, PERM *pivot, const VEC *b, VEC *x) #endif { int i,j,l,n,n1,pi,lb,ub,jmin, maxj; Real c; Real **bA_v; if ( bA==(BAND *)NULL || b==(VEC *)NULL || pivot==(PERM *)NULL ) error(E_NULL,"bdLUsolve"); if ( bA->mat->n != b->dim || bA->mat->n != pivot->size) error(E_SIZES,"bdLUsolve"); lb = bA->lb; ub = bA->ub; n = b->dim; n1 = n-1; bA_v = bA->mat->me; x = v_resize(x,b->dim); px_vec(pivot,b,x); /* solve Lx = b; implicit diagonal = 1 L is not permuted, therefore it must be permuted now */ px_inv(pivot,pivot); for (j=0; j < n; j++) { jmin = j+1; c = x->ve[j]; maxj = max(0,j+lb-n1); for (i=jmin,l=lb-1; l >= maxj; i++,l--) { if ( (pi = pivot->pe[i]) < jmin) pi = pivot->pe[i] = pivot->pe[pi]; x->ve[pi] -= bA_v[l][j]*c; } } /* solve Ux = b; explicit diagonal */ x->ve[n1] /= bA_v[lb][n1]; for (i=n-2; i >= 0; i--) { c = x->ve[i]; for (j=min(n1,i+ub), l=lb+j-i; j > i; j--,l--) c -= bA_v[l][j]*x->ve[j]; x->ve[i] = c/bA_v[lb][i]; } return (x); }
VEC *LUsolve(const MAT *LU, PERM *pivot, const VEC *b, VEC *x) #endif { if ( ! LU || ! b || ! pivot ) error(E_NULL,"LUsolve"); if ( LU->m != LU->n || LU->n != b->dim ) error(E_SIZES,"LUsolve"); x = v_resize(x,b->dim); px_vec(pivot,b,x); /* x := P.b */ Lsolve(LU,x,x,1.0); /* implicit diagonal = 1 */ Usolve(LU,x,x,0.0); /* explicit diagonal */ return (x); }
VEC *spLUsolve(const SPMAT *A, PERM *pivot, const VEC *b, VEC *x) #endif { int i, idx, len, lim; Real sum, *x_ve; SPROW *r; row_elt *elt; if ( ! A || ! b ) error(E_NULL,"spLUsolve"); if ( (pivot != PNULL && A->m != pivot->size) || A->m != b->dim ) error(E_SIZES,"spLUsolve"); if ( ! x || x->dim != A->n ) x = v_resize(x,A->n); if ( pivot != PNULL ) x = px_vec(pivot,b,x); else x = v_copy(b,x); x_ve = x->ve; lim = min(A->m,A->n); for ( i = 0; i < lim; i++ ) { sum = x_ve[i]; r = &(A->row[i]); len = r->len; elt = r->elt; for ( idx = 0; idx < len && elt->col < i; idx++, elt++ ) sum -= elt->val*x_ve[elt->col]; x_ve[i] = sum; } for ( i = lim-1; i >= 0; i-- ) { sum = x_ve[i]; r = &(A->row[i]); len = r->len; elt = &(r->elt[len-1]); for ( idx = len-1; idx >= 0 && elt->col > i; idx--, elt-- ) sum -= elt->val*x_ve[elt->col]; if ( idx < 0 || elt->col != i || elt->val == 0.0 ) error(E_SING,"spLUsolve"); x_ve[i] = sum/elt->val; } return x; }
/* BKPsolve -- solves A.x = b where A has been factored a la BKPfactor() -- returns x, which is created if NULL */ extern VEC *BKPsolve(MAT *A, PERM *pivot, PERM *block, VEC *b, VEC *x) { static VEC *tmp=VNULL; /* dummy storage needed */ int i, j, n, onebyone; Real **A_me, a11, a12, a22, b1, b2, det, sum, *tmp_ve, tmp_diag; if ( ! A || ! pivot || ! block || ! b ) error(E_NULL,"BKPsolve"); if ( A->m != A->n ) error(E_SQUARE,"BKPsolve"); n = A->n; if ( b->dim != n || pivot->size != n || block->size != n ) error(E_SIZES,"BKPsolve"); x = v_resize(x,n); tmp = v_resize(tmp,n); MEM_STAT_REG(tmp,TYPE_VEC); A_me = A->me; tmp_ve = tmp->ve; px_vec(pivot,b,tmp); /* solve for lower triangular part */ for ( i = 0; i < n; i++ ) { sum = v_entry(tmp,i); if ( block->pe[i] < i ) for ( j = 0; j < i-1; j++ ) sum -= m_entry(A,i,j)*v_entry(tmp,j); else for ( j = 0; j < i; j++ ) sum -= m_entry(A,i,j)*v_entry(tmp,j); v_set_val(tmp,i,sum); } /* printf("# BKPsolve: solving L part: tmp =\n"); v_output(tmp); */ /* solve for diagonal part */ for ( i = 0; i < n; i = onebyone ? i+1 : i+2 ) { onebyone = ( block->pe[i] == i ); if ( onebyone ) { tmp_diag = m_entry(A,i,i); if ( tmp_diag == 0.0 ) error(E_SING,"BKPsolve"); /* tmp_ve[i] /= tmp_diag; */ v_set_val(tmp,i,v_entry(tmp,i) / tmp_diag); } else { a11 = m_entry(A,i,i); a22 = m_entry(A,i+1,i+1); a12 = m_entry(A,i+1,i); b1 = v_entry(tmp,i); b2 = v_entry(tmp,i+1); det = a11*a22-a12*a12; /* < 0 : see BKPfactor() */ if ( det == 0.0 ) error(E_SING,"BKPsolve"); det = 1/det; v_set_val(tmp,i,det*(a22*b1-a12*b2)); v_set_val(tmp,i+1,det*(a11*b2-a12*b1)); } } /* printf("# BKPsolve: solving D part: tmp =\n"); v_output(tmp); */ /* solve for transpose of lower traingular part */ for ( i = n-1; i >= 0; i-- ) { /* use symmetry of factored form to get stride 1 */ sum = v_entry(tmp,i); if ( block->pe[i] > i ) for ( j = i+2; j < n; j++ ) sum -= m_entry(A,i,j)*v_entry(tmp,j); else for ( j = i+1; j < n; j++ ) sum -= m_entry(A,i,j)*v_entry(tmp,j); v_set_val(tmp,i,sum); } /* printf("# BKPsolve: solving L^T part: tmp =\n");v_output(tmp); */ /* and do final permutation */ x = pxinv_vec(pivot,tmp,x); return x; }