/************************************************************************* Dense solver. This subroutine finds solution of the linear system A*X=B with non-square, possibly degenerate A. System is solved in the least squares sense, and general least squares solution X = X0 + CX*y which minimizes |A*X-B| is returned. If A is non-degenerate, solution in the usual sense is returned Additional features include: * iterative improvement INPUT PARAMETERS A - array[0..NRows-1,0..NCols-1], system matrix NRows - vertical size of A NCols - horizontal size of A B - array[0..NCols-1], right part Threshold- a number in [0,1]. Singular values beyond Threshold are considered zero. Set it to 0.0, if you don't understand what it means, so the solver will choose good value on its own. OUTPUT PARAMETERS Info - return code: * -4 SVD subroutine failed * -1 if NRows<=0 or NCols<=0 or Threshold<0 was passed * 1 if task is solved Rep - solver report, see below for more info X - array[0..N-1,0..M-1], it contains: * solution of A*X=B if A is non-singular (well-conditioned or ill-conditioned, but not very close to singular) * zeros, if A is singular or VERY close to singular (in this case Info=-3). SOLVER REPORT Subroutine sets following fields of the Rep structure: * R2 reciprocal of condition number: 1/cond(A), 2-norm. * N = NCols * K dim(Null(A)) * CX array[0..N-1,0..K-1], kernel of A. Columns of CX store such vectors that A*CX[i]=0. -- ALGLIB -- Copyright 24.08.2009 by Bochkanov Sergey *************************************************************************/ void rmatrixsolvels(const ap::real_2d_array& a, int nrows, int ncols, const ap::real_1d_array& b, double threshold, int& info, densesolverlsreport& rep, ap::real_1d_array& x) { ap::real_1d_array sv; ap::real_2d_array u; ap::real_2d_array vt; ap::real_1d_array rp; ap::real_1d_array utb; ap::real_1d_array sutb; ap::real_1d_array tmp; ap::real_1d_array ta; ap::real_1d_array tx; ap::real_1d_array buf; ap::real_1d_array w; int i; int j; int nsv; int kernelidx; double v; double verr; bool svdfailed; bool zeroa; int rfs; int nrfs; bool terminatenexttime; bool smallerr; if( nrows<=0||ncols<=0||ap::fp_less(threshold,0) ) { info = -1; return; } if( ap::fp_eq(threshold,0) ) { threshold = 1000*ap::machineepsilon; } // // Factorize A first // svdfailed = !rmatrixsvd(a, nrows, ncols, 1, 2, 2, sv, u, vt); zeroa = ap::fp_eq(sv(0),0); if( svdfailed||zeroa ) { if( svdfailed ) { info = -4; } else { info = 1; } x.setlength(ncols); for(i = 0; i <= ncols-1; i++) { x(i) = 0; } rep.n = ncols; rep.k = ncols; rep.cx.setlength(ncols, ncols); for(i = 0; i <= ncols-1; i++) { for(j = 0; j <= ncols-1; j++) { if( i==j ) { rep.cx(i,j) = 1; } else { rep.cx(i,j) = 0; } } } rep.r2 = 0; return; } nsv = ap::minint(ncols, nrows); if( nsv==ncols ) { rep.r2 = sv(nsv-1)/sv(0); } else { rep.r2 = 0; } rep.n = ncols; info = 1; // // Iterative improvement of xc combined with solution: // 1. xc = 0 // 2. calculate r = bc-A*xc using extra-precise dot product // 3. solve A*y = r // 4. update x:=x+r // 5. goto 2 // // This cycle is executed until one of two things happens: // 1. maximum number of iterations reached // 2. last iteration decreased error to the lower limit // utb.setlength(nsv); sutb.setlength(nsv); x.setlength(ncols); tmp.setlength(ncols); ta.setlength(ncols+1); tx.setlength(ncols+1); buf.setlength(ncols+1); for(i = 0; i <= ncols-1; i++) { x(i) = 0; } kernelidx = nsv; for(i = 0; i <= nsv-1; i++) { if( ap::fp_less_eq(sv(i),threshold*sv(0)) ) { kernelidx = i; break; } } rep.k = ncols-kernelidx; nrfs = densesolverrfsmaxv2(ncols, rep.r2); terminatenexttime = false; rp.setlength(nrows); for(rfs = 0; rfs <= nrfs; rfs++) { if( terminatenexttime ) { break; } // // calculate right part // if( rfs==0 ) { ap::vmove(&rp(0), &b(0), ap::vlen(0,nrows-1)); } else { smallerr = true; for(i = 0; i <= nrows-1; i++) { ap::vmove(&ta(0), &a(i, 0), ap::vlen(0,ncols-1)); ta(ncols) = -1; ap::vmove(&tx(0), &x(0), ap::vlen(0,ncols-1)); tx(ncols) = b(i); xdot(ta, tx, ncols+1, buf, v, verr); rp(i) = -v; smallerr = smallerr&&ap::fp_less(fabs(v),4*verr); } if( smallerr ) { terminatenexttime = true; } } // // solve A*dx = rp // for(i = 0; i <= ncols-1; i++) { tmp(i) = 0; } for(i = 0; i <= nsv-1; i++) { utb(i) = 0; } for(i = 0; i <= nrows-1; i++) { v = rp(i); ap::vadd(&utb(0), &u(i, 0), ap::vlen(0,nsv-1), v); } for(i = 0; i <= nsv-1; i++) { if( i<kernelidx ) { sutb(i) = utb(i)/sv(i); } else { sutb(i) = 0; } } for(i = 0; i <= nsv-1; i++) { v = sutb(i); ap::vadd(&tmp(0), &vt(i, 0), ap::vlen(0,ncols-1), v); } // // update x: x:=x+dx // ap::vadd(&x(0), &tmp(0), ap::vlen(0,ncols-1)); } // // fill CX // if( rep.k>0 ) { rep.cx.setlength(ncols, rep.k); for(i = 0; i <= rep.k-1; i++) { ap::vmove(rep.cx.getcolumn(i, 0, ncols-1), vt.getrow(kernelidx+i, 0, ncols-1)); } } }
/************************************************************************* Internal fitting subroutine *************************************************************************/ static void lsfitlinearinternal(const ap::real_1d_array& y, const ap::real_1d_array& w, const ap::real_2d_array& fmatrix, int n, int m, int& info, ap::real_1d_array& c, lsfitreport& rep) { double threshold; ap::real_2d_array ft; ap::real_2d_array q; ap::real_2d_array l; ap::real_2d_array r; ap::real_1d_array b; ap::real_1d_array wmod; ap::real_1d_array tau; int i; int j; double v; ap::real_1d_array sv; ap::real_2d_array u; ap::real_2d_array vt; ap::real_1d_array tmp; ap::real_1d_array utb; ap::real_1d_array sutb; int relcnt; if( n<1||m<1 ) { info = -1; return; } info = 1; threshold = sqrt(ap::machineepsilon); // // Degenerate case, needs special handling // if( n<m ) { // // Create design matrix. // ft.setlength(n, m); b.setlength(n); wmod.setlength(n); for(j = 0; j <= n-1; j++) { v = w(j); ap::vmove(&ft(j, 0), 1, &fmatrix(j, 0), 1, ap::vlen(0,m-1), v); b(j) = w(j)*y(j); wmod(j) = 1; } // // LQ decomposition and reduction to M=N // c.setlength(m); for(i = 0; i <= m-1; i++) { c(i) = 0; } rep.taskrcond = 0; rmatrixlq(ft, n, m, tau); rmatrixlqunpackq(ft, n, m, tau, n, q); rmatrixlqunpackl(ft, n, m, l); lsfitlinearinternal(b, wmod, l, n, n, info, tmp, rep); if( info<=0 ) { return; } for(i = 0; i <= n-1; i++) { v = tmp(i); ap::vadd(&c(0), 1, &q(i, 0), 1, ap::vlen(0,m-1), v); } return; } // // N>=M. Generate design matrix and reduce to N=M using // QR decomposition. // ft.setlength(n, m); b.setlength(n); for(j = 0; j <= n-1; j++) { v = w(j); ap::vmove(&ft(j, 0), 1, &fmatrix(j, 0), 1, ap::vlen(0,m-1), v); b(j) = w(j)*y(j); } rmatrixqr(ft, n, m, tau); rmatrixqrunpackq(ft, n, m, tau, m, q); rmatrixqrunpackr(ft, n, m, r); tmp.setlength(m); for(i = 0; i <= m-1; i++) { tmp(i) = 0; } for(i = 0; i <= n-1; i++) { v = b(i); ap::vadd(&tmp(0), 1, &q(i, 0), 1, ap::vlen(0,m-1), v); } b.setlength(m); ap::vmove(&b(0), 1, &tmp(0), 1, ap::vlen(0,m-1)); // // R contains reduced MxM design upper triangular matrix, // B contains reduced Mx1 right part. // // Determine system condition number and decide // should we use triangular solver (faster) or // SVD-based solver (more stable). // // We can use LU-based RCond estimator for this task. // rep.taskrcond = rmatrixlurcondinf(r, m); if( ap::fp_greater(rep.taskrcond,threshold) ) { // // use QR-based solver // c.setlength(m); c(m-1) = b(m-1)/r(m-1,m-1); for(i = m-2; i >= 0; i--) { v = ap::vdotproduct(&r(i, i+1), 1, &c(i+1), 1, ap::vlen(i+1,m-1)); c(i) = (b(i)-v)/r(i,i); } } else { // // use SVD-based solver // if( !rmatrixsvd(r, m, m, 1, 1, 2, sv, u, vt) ) { info = -4; return; } utb.setlength(m); sutb.setlength(m); for(i = 0; i <= m-1; i++) { utb(i) = 0; } for(i = 0; i <= m-1; i++) { v = b(i); ap::vadd(&utb(0), 1, &u(i, 0), 1, ap::vlen(0,m-1), v); } if( ap::fp_greater(sv(0),0) ) { rep.taskrcond = sv(m-1)/sv(0); for(i = 0; i <= m-1; i++) { if( ap::fp_greater(sv(i),threshold*sv(0)) ) { sutb(i) = utb(i)/sv(i); } else { sutb(i) = 0; } } } else { rep.taskrcond = 0; for(i = 0; i <= m-1; i++) { sutb(i) = 0; } } c.setlength(m); for(i = 0; i <= m-1; i++) { c(i) = 0; } for(i = 0; i <= m-1; i++) { v = sutb(i); ap::vadd(&c(0), 1, &vt(i, 0), 1, ap::vlen(0,m-1), v); } } // // calculate errors // rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.maxerror = 0; relcnt = 0; for(i = 0; i <= n-1; i++) { v = ap::vdotproduct(&fmatrix(i, 0), 1, &c(0), 1, ap::vlen(0,m-1)); rep.rmserror = rep.rmserror+ap::sqr(v-y(i)); rep.avgerror = rep.avgerror+fabs(v-y(i)); if( ap::fp_neq(y(i),0) ) { rep.avgrelerror = rep.avgrelerror+fabs(v-y(i))/fabs(y(i)); relcnt = relcnt+1; } rep.maxerror = ap::maxreal(rep.maxerror, fabs(v-y(i))); } rep.rmserror = sqrt(rep.rmserror/n); rep.avgerror = rep.avgerror/n; if( relcnt!=0 ) { rep.avgrelerror = rep.avgrelerror/relcnt; } }