void lbfgslincomb(const int& n, const double& da, const ap::real_1d_array& dx, int sx, ap::real_1d_array& dy, int sy) { int fx; int fy; fx = sx+n-1; fy = sy+n-1; ap::vadd(dy.getvector(sy, fy), dx.getvector(sx, fx), da); }
double lbfgsdotproduct(const int& n, const ap::real_1d_array& dx, int sx, const ap::real_1d_array& dy, int sy) { double result; double v; int fx; int fy; fx = sx+n-1; fy = sy+n-1; v = ap::vdotproduct(dx.getvector(sx, fx), dy.getvector(sy, fy)); result = v; return result; }
/************************************************************************* Dense solver. Similar to RMatrixSolveM() but solves task with one right part (where b/x are vectors, not matrices). See RMatrixSolveM() description for more information about subroutine parameters. -- ALGLIB -- Copyright 24.08.2009 by Bochkanov Sergey *************************************************************************/ void rmatrixsolve(const ap::real_2d_array& a, int n, const ap::real_1d_array& b, int& info, densesolverreport& rep, ap::real_1d_array& x) { ap::real_2d_array bm; ap::real_2d_array xm; if( n<=0 ) { info = -1; return; } bm.setlength(n, 1); ap::vmove(bm.getcolumn(0, 0, n-1), b.getvector(0, n-1)); rmatrixsolvem(a, n, bm, 1, info, rep, xm); x.setlength(n); ap::vmove(x.getvector(0, n-1), xm.getcolumn(0, 0, n-1)); }
/************************************************************************* Obsolete 1-based subroutine *************************************************************************/ void shermanmorrisonupdateuv(ap::real_2d_array& inva, int n, const ap::real_1d_array& u, const ap::real_1d_array& v) { ap::real_1d_array t1; ap::real_1d_array t2; int i; int j; double lambda; double vt; t1.setbounds(1, n); t2.setbounds(1, n); // // T1 = InvA * U // Lambda = v * T1 // for(i = 1; i <= n; i++) { vt = ap::vdotproduct(&inva(i, 1), &u(1), ap::vlen(1,n)); t1(i) = vt; } lambda = ap::vdotproduct(&v(1), &t1(1), ap::vlen(1,n)); // // T2 = v*InvA // for(j = 1; j <= n; j++) { vt = ap::vdotproduct(v.getvector(1, n), inva.getcolumn(j, 1, n)); t2(j) = vt; } // // InvA = InvA - correction // for(i = 1; i <= n; i++) { vt = t1(i)/(1+lambda); ap::vsub(&inva(i, 1), &t2(1), ap::vlen(1,n), vt); } }
/************************************************************************* Inverse matrix update by the Sherman-Morrison formula The algorithm computes the inverse of matrix A+u*v’ by using the given matrix A^-1 and the vectors u and v. Input parameters: InvA - inverse of matrix A. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. U - the vector modifying the matrix. Array whose index ranges within [0..N-1]. V - the vector modifying the matrix. Array whose index ranges within [0..N-1]. Output parameters: InvA - inverse of matrix A + u*v'. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ void rmatrixinvupdateuv(ap::real_2d_array& inva, int n, const ap::real_1d_array& u, const ap::real_1d_array& v) { ap::real_1d_array t1; ap::real_1d_array t2; int i; int j; double lambda; double vt; t1.setbounds(0, n-1); t2.setbounds(0, n-1); // // T1 = InvA * U // Lambda = v * T1 // for(i = 0; i <= n-1; i++) { vt = ap::vdotproduct(&inva(i, 0), &u(0), ap::vlen(0,n-1)); t1(i) = vt; } lambda = ap::vdotproduct(&v(0), &t1(0), ap::vlen(0,n-1)); // // T2 = v*InvA // for(j = 0; j <= n-1; j++) { vt = ap::vdotproduct(v.getvector(0, n-1), inva.getcolumn(j, 0, n-1)); t2(j) = vt; } // // InvA = InvA - correction // for(i = 0; i <= n-1; i++) { vt = t1(i)/(1+lambda); ap::vsub(&inva(i, 0), &t2(0), ap::vlen(0,n-1), vt); } }
/************************************************************************* Application of a sequence of elementary rotations to a matrix The algorithm post-multiplies the matrix by a sequence of rotation transformations which is given by arrays C and S. Depending on the value of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true) rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated. Not the whole matrix but only a part of it is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements of this submatrix are changed. Input parameters: IsForward - the sequence of the rotation application. M1,M2 - the range of rows to be transformed. N1, N2 - the range of columns to be transformed. C,S - transformation coefficients. Array whose index ranges within [1..N2-N1]. A - processed matrix. WORK - working array whose index ranges within [M1..M2]. Output parameters: A - transformed matrix. Utility subroutine. *************************************************************************/ void applyrotationsfromtheright(bool isforward, int m1, int m2, int n1, int n2, const ap::real_1d_array& c, const ap::real_1d_array& s, ap::real_2d_array& a, ap::real_1d_array& work) { int j; int jp1; double ctemp; double stemp; double temp; // // Form A * P' // if( isforward ) { if( m1!=m2 ) { // // Common case: M1<>M2 // for(j = n1; j <= n2-1; j++) { ctemp = c(j-n1+1); stemp = s(j-n1+1); if( ctemp!=1||stemp!=0 ) { jp1 = j+1; ap::vmove(work.getvector(m1, m2), a.getcolumn(jp1, m1, m2), ctemp); ap::vsub(work.getvector(m1, m2), a.getcolumn(j, m1, m2), stemp); ap::vmul(a.getcolumn(j, m1, m2), ctemp); ap::vadd(a.getcolumn(j, m1, m2), a.getcolumn(jp1, m1, m2), stemp); ap::vmove(a.getcolumn(jp1, m1, m2), work.getvector(m1, m2)); } } } else { // // Special case: M1=M2 // for(j = n1; j <= n2-1; j++) { ctemp = c(j-n1+1); stemp = s(j-n1+1); if( ctemp!=1||stemp!=0 ) { temp = a(m1,j+1); a(m1,j+1) = ctemp*temp-stemp*a(m1,j); a(m1,j) = stemp*temp+ctemp*a(m1,j); } } } } else { if( m1!=m2 ) { // // Common case: M1<>M2 // for(j = n2-1; j >= n1; j--) { ctemp = c(j-n1+1); stemp = s(j-n1+1); if( ctemp!=1||stemp!=0 ) { jp1 = j+1; ap::vmove(work.getvector(m1, m2), a.getcolumn(jp1, m1, m2), ctemp); ap::vsub(work.getvector(m1, m2), a.getcolumn(j, m1, m2), stemp); ap::vmul(a.getcolumn(j, m1, m2), ctemp); ap::vadd(a.getcolumn(j, m1, m2), a.getcolumn(jp1, m1, m2), stemp); ap::vmove(a.getcolumn(jp1, m1, m2), work.getvector(m1, m2)); } } } else { // // Special case: M1=M2 // for(j = n2-1; j >= n1; j--) { ctemp = c(j-n1+1); stemp = s(j-n1+1); if( ctemp!=1||stemp!=0 ) { temp = a(m1,j+1); a(m1,j+1) = ctemp*temp-stemp*a(m1,j); a(m1,j) = stemp*temp+ctemp*a(m1,j); } } } } }
/************************************************************************* LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION JORGE NOCEDAL The subroutine minimizes function F(x) of N arguments by using a quasi- Newton method (LBFGS scheme) which is optimized to use a minimum amount of memory. The subroutine generates the approximation of an inverse Hessian matrix by using information about the last M steps of the algorithm (instead of N). It lessens a required amount of memory from a value of order N^2 to a value of order 2*N*M. This subroutine uses the FuncGrad subroutine which calculates the value of the function F and gradient G in point X. The programmer should define the FuncGrad subroutine by himself. It should be noted that the subroutine doesn't need to waste time for memory allocation of array G, because the memory is allocated in calling the subroutine. Setting a dimension of array G each time when calling a subroutine will excessively slow down an algorithm. The programmer could also redefine the LBFGSNewIteration subroutine which is called on each new step. The current point X, the function value F and the gradient G are passed into this subroutine. It is reasonable to redefine the subroutine for better debugging, for example, to visualize the solution process. Input parameters: N - problem dimension. N>0 M - number of corrections in the BFGS scheme of Hessian approximation update. Recommended value: 3<=M<=7. The smaller value causes worse convergence, the bigger will not cause a considerably better convergence, but will cause a fall in the performance. M<=N. X - initial solution approximation. Array whose index ranges from 1 to N. EpsG - positive number which defines a precision of search. The subroutine finishes its work if the condition ||G|| < EpsG is satisfied, where ||.|| means Euclidian norm, G - gradient, X - current approximation. EpsF - positive number which defines a precision of search. The subroutine finishes its work if on iteration number k+1 the condition |F(k+1)-F(k)| <= EpsF*max{|F(k)|, |F(k+1)|, 1} is satisfied. EpsX - positive number which defines a precision of search. The subroutine finishes its work if on iteration number k+1 the condition |X(k+1)-X(k)| <= EpsX is fulfilled. MaxIts- maximum number of iterations. If MaxIts=0, the number of iterations is unlimited. Output parameters: X - solution approximation. Array whose index ranges from 1 to N. Info- a return code: * -1 wrong parameters were specified, * 0 interrupted by user, * 1 relative function decreasing is less or equal to EpsF, * 2 step is less or equal EpsX, * 4 gradient norm is less or equal to EpsG, * 5 number of iterations exceeds MaxIts. FuncGrad routine description. User-defined. Input parameters: X - array whose index ranges from 1 to N. Output parameters: F - function value at X. G - function gradient. Array whose index ranges from 1 to N. The memory for array G has already been allocated in the calling subroutine, and it isn't necessary to allocate it in the FuncGrad subroutine. *************************************************************************/ void lbfgsminimize(const int& n, const int& m, ap::real_1d_array& x, funcgrad_t funcgrad, newiteration_t newiteration, void *params, const double& epsg, const double& epsf, const double& epsx, const int& maxits, int& info) { ap::real_1d_array w; double f; double fold; double tf; double v; ap::real_1d_array xold; ap::real_1d_array g; ap::real_1d_array diag; double gnorm; double stp1; double ftol; double stp; double ys; double yy; double sq; double yr; double beta; // double xnorm; int iter; int nfun; int point; int ispt; int iypt; int maxfev; int bound; int npt=0; //to avoid warning about not-initialize int cp; int i; int nfev; int inmc; int iycn; int iscn; double xtol; double gtol; double stpmin; double stpmax; w.setbounds(1, n*(2*m+1)+2*m); g.setbounds(1, n); xold.setbounds(1, n); diag.setbounds(1, n); (*funcgrad)(x, f, g, params); iter = 0; info = 0; if( n<=0||m<=0||m>n||epsg<0||epsf<0||epsx<0||maxits<0 ) { info = -1; return; } nfun = 1; point = 0; for(i = 1; i <= n; i++) { diag(i) = 1; } xtol = 100*ap::machineepsilon; gtol = 0.9; stpmin = pow(double(10), double(-20)); stpmax = pow(double(10), double(20)); ispt = n+2*m; iypt = ispt+n*m; for(i = 1; i <= n; i++) { w(ispt+i) = -g(i); } gnorm = sqrt(lbfgsdotproduct(n, g, 1, g, 1)); stp1 = 1/gnorm; ftol = 0.0001; maxfev = 20; while(true) { ap::vmove(xold.getvector(1, n), x.getvector(1, n)); fold = f; iter = iter+1; info = 0; bound = iter-1; if( iter!=1 ) { if( iter>m ) { bound = m; } ys = lbfgsdotproduct(n, w, iypt+npt+1, w, ispt+npt+1); yy = lbfgsdotproduct(n, w, iypt+npt+1, w, iypt+npt+1); for(i = 1; i <= n; i++) { diag(i) = ys/yy; } cp = point; if( point==0 ) { cp = m; } w(n+cp) = 1/ys; for(i = 1; i <= n; i++) { w(i) = -g(i); } cp = point; for(i = 1; i <= bound; i++) { cp = cp-1; if( cp==-1 ) { cp = m-1; } sq = lbfgsdotproduct(n, w, ispt+cp*n+1, w, 1); inmc = n+m+cp+1; iycn = iypt+cp*n; w(inmc) = w(n+cp+1)*sq; lbfgslincomb(n, -w(inmc), w, iycn+1, w, 1); } for(i = 1; i <= n; i++) { w(i) = diag(i)*w(i); } for(i = 1; i <= bound; i++) { yr = lbfgsdotproduct(n, w, iypt+cp*n+1, w, 1); beta = w(n+cp+1)*yr; inmc = n+m+cp+1; beta = w(inmc)-beta; iscn = ispt+cp*n; lbfgslincomb(n, beta, w, iscn+1, w, 1); cp = cp+1; if( cp==m ) { cp = 0; } } for(i = 1; i <= n; i++) { w(ispt+point*n+i) = w(i); } } nfev = 0; stp = 1; if( iter==1 ) { stp = stp1; } for(i = 1; i <= n; i++) { w(i) = g(i); } lbfgsmcsrch(n, x, f, g, w, ispt+point*n+1, stp, ftol, xtol, maxfev, info, nfev, diag, gtol, stpmin, stpmax, funcgrad, params); if( info!=1 ) { if( info==0 ) { info = -1; return; } } nfun = nfun+nfev; npt = point*n; for(i = 1; i <= n; i++) { w(ispt+npt+i) = stp*w(ispt+npt+i); w(iypt+npt+i) = g(i)-w(i); } point = point+1; if( point==m ) { point = 0; } if( iter>maxits&&maxits>0 ) { info = 5; return; } if (newiteration != NULL) { (*newiteration)(iter, x, f, g, params); } gnorm = sqrt(lbfgsdotproduct(n, g, 1, g, 1)); if( gnorm<=epsg ) { info = 4; return; } tf = ap::maxreal(fabs(fold), ap::maxreal(fabs(f), 1.0)); if( fold-f<=epsf*tf ) { info = 1; return; } ap::vsub(xold.getvector(1, n), x.getvector(1, n)); v = sqrt(lbfgsdotproduct(n, xold, 1, xold, 1)); if( v<=epsx ) { info = 2; return; } } }
/************************************************************************* Obsolete 1-based subroutine *************************************************************************/ void totridiagonal(ap::real_2d_array& a, int n, bool isupper, ap::real_1d_array& tau, ap::real_1d_array& d, ap::real_1d_array& e) { int i; int ip1; int im1; int nmi; int nm1; double alpha; double taui; double v; ap::real_1d_array t; ap::real_1d_array t2; ap::real_1d_array t3; if( n<=0 ) { return; } t.setbounds(1, n); t2.setbounds(1, n); t3.setbounds(1, n); tau.setbounds(1, ap::maxint(1, n-1)); d.setbounds(1, n); e.setbounds(1, ap::maxint(1, n-1)); if( isupper ) { // // Reduce the upper triangle of A // for(i = n-1; i >= 1; i--) { // // Generate elementary reflector H(i) = I - tau * v * v' // to annihilate A(1:i-1,i+1) // // DLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ); // ip1 = i+1; im1 = i-1; if( i>=2 ) { ap::vmove(t.getvector(2, i), a.getcolumn(ip1, 1, im1)); } t(1) = a(i,ip1); generatereflection(t, i, taui); if( i>=2 ) { ap::vmove(a.getcolumn(ip1, 1, im1), t.getvector(2, i)); } a(i,ip1) = t(1); e(i) = a(i,i+1); if( taui!=0 ) { // // Apply H(i) from both sides to A(1:i,1:i) // a(i,i+1) = 1; // // Compute x := tau * A * v storing x in TAU(1:i) // // DSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, TAU, 1 ); // ip1 = i+1; ap::vmove(t.getvector(1, i), a.getcolumn(ip1, 1, i)); symmetricmatrixvectormultiply(a, isupper, 1, i, t, taui, tau); // // Compute w := x - 1/2 * tau * (x'*v) * v // ip1 = i+1; v = ap::vdotproduct(tau.getvector(1, i), a.getcolumn(ip1, 1, i)); alpha = -0.5*taui*v; ap::vadd(tau.getvector(1, i), a.getcolumn(ip1, 1, i), alpha); // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // // DSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, LDA ); // ap::vmove(t.getvector(1, i), a.getcolumn(ip1, 1, i)); symmetricrank2update(a, isupper, 1, i, t, tau, t2, double(-1)); a(i,i+1) = e(i); } d(i+1) = a(i+1,i+1); tau(i) = taui; } d(1) = a(1,1); } else { // // Reduce the lower triangle of A // for(i = 1; i <= n-1; i++) { // // Generate elementary reflector H(i) = I - tau * v * v' // to annihilate A(i+2:n,i) // //DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, TAUI ); // nmi = n-i; ip1 = i+1; ap::vmove(t.getvector(1, nmi), a.getcolumn(i, ip1, n)); generatereflection(t, nmi, taui); ap::vmove(a.getcolumn(i, ip1, n), t.getvector(1, nmi)); e(i) = a(i+1,i); if( taui!=0 ) { // // Apply H(i) from both sides to A(i+1:n,i+1:n) // a(i+1,i) = 1; // // Compute x := tau * A * v storing y in TAU(i:n-1) // //DSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO, TAU( I ), 1 ); // ip1 = i+1; nmi = n-i; nm1 = n-1; ap::vmove(t.getvector(1, nmi), a.getcolumn(i, ip1, n)); symmetricmatrixvectormultiply(a, isupper, i+1, n, t, taui, t2); ap::vmove(&tau(i), &t2(1), ap::vlen(i,nm1)); // // Compute w := x - 1/2 * tau * (x'*v) * v // nm1 = n-1; ip1 = i+1; v = ap::vdotproduct(tau.getvector(i, nm1), a.getcolumn(i, ip1, n)); alpha = -0.5*taui*v; ap::vadd(tau.getvector(i, nm1), a.getcolumn(i, ip1, n), alpha); // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // //DSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, A( I+1, I+1 ), LDA ); // nm1 = n-1; nmi = n-i; ip1 = i+1; ap::vmove(t.getvector(1, nmi), a.getcolumn(i, ip1, n)); ap::vmove(&t2(1), &tau(i), ap::vlen(1,nmi)); symmetricrank2update(a, isupper, i+1, n, t, t2, t3, double(-1)); a(i+1,i) = e(i); } d(i) = a(i,i); tau(i) = taui; } d(n) = a(n,n); } }
/************************************************************************* Reduction of a symmetric matrix which is given by its higher or lower triangular part to a tridiagonal matrix using orthogonal similarity transformation: Q'*A*Q=T. Input parameters: A - matrix to be transformed array with elements [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. If IsUpper = True, then matrix A is given by its upper triangle, and the lower triangle is not used and not modified by the algorithm, and vice versa if IsUpper = False. Output parameters: A - matrices T and Q in compact form (see lower) Tau - array of factors which are forming matrices H(i) array with elements [0..N-2]. D - main diagonal of symmetric matrix T. array with elements [0..N-1]. E - secondary diagonal of symmetric matrix T. array with elements [0..N-2]. If IsUpper=True, the matrix Q is represented as a product of elementary reflectors Q = H(n-2) . . . H(2) H(0). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in A(0:i-1,i+1), and tau in TAU(i). If IsUpper=False, the matrix Q is represented as a product of elementary reflectors Q = H(0) H(2) . . . H(n-2). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v1 v2 v3 ) ( d ) ( d e v2 v3 ) ( e d ) ( d e v3 ) ( v0 e d ) ( d e ) ( v0 v1 e d ) ( d ) ( v0 v1 v2 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 *************************************************************************/ void smatrixtd(ap::real_2d_array& a, int n, bool isupper, ap::real_1d_array& tau, ap::real_1d_array& d, ap::real_1d_array& e) { int i; double alpha; double taui; double v; ap::real_1d_array t; ap::real_1d_array t2; ap::real_1d_array t3; if( n<=0 ) { return; } t.setbounds(1, n); t2.setbounds(1, n); t3.setbounds(1, n); if( n>1 ) { tau.setbounds(0, n-2); } d.setbounds(0, n-1); if( n>1 ) { e.setbounds(0, n-2); } if( isupper ) { // // Reduce the upper triangle of A // for(i = n-2; i >= 0; i--) { // // Generate elementary reflector H() = E - tau * v * v' // if( i>=1 ) { ap::vmove(t.getvector(2, i+1), a.getcolumn(i+1, 0, i-1)); } t(1) = a(i,i+1); generatereflection(t, i+1, taui); if( i>=1 ) { ap::vmove(a.getcolumn(i+1, 0, i-1), t.getvector(2, i+1)); } a(i,i+1) = t(1); e(i) = a(i,i+1); if( taui!=0 ) { // // Apply H from both sides to A // a(i,i+1) = 1; // // Compute x := tau * A * v storing x in TAU // ap::vmove(t.getvector(1, i+1), a.getcolumn(i+1, 0, i)); symmetricmatrixvectormultiply(a, isupper, 0, i, t, taui, t3); ap::vmove(&tau(0), &t3(1), ap::vlen(0,i)); // // Compute w := x - 1/2 * tau * (x'*v) * v // v = ap::vdotproduct(tau.getvector(0, i), a.getcolumn(i+1, 0, i)); alpha = -0.5*taui*v; ap::vadd(tau.getvector(0, i), a.getcolumn(i+1, 0, i), alpha); // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // ap::vmove(t.getvector(1, i+1), a.getcolumn(i+1, 0, i)); ap::vmove(&t3(1), &tau(0), ap::vlen(1,i+1)); symmetricrank2update(a, isupper, 0, i, t, t3, t2, double(-1)); a(i,i+1) = e(i); } d(i+1) = a(i+1,i+1); tau(i) = taui; } d(0) = a(0,0); } else { // // Reduce the lower triangle of A // for(i = 0; i <= n-2; i++) { // // Generate elementary reflector H = E - tau * v * v' // ap::vmove(t.getvector(1, n-i-1), a.getcolumn(i, i+1, n-1)); generatereflection(t, n-i-1, taui); ap::vmove(a.getcolumn(i, i+1, n-1), t.getvector(1, n-i-1)); e(i) = a(i+1,i); if( taui!=0 ) { // // Apply H from both sides to A // a(i+1,i) = 1; // // Compute x := tau * A * v storing y in TAU // ap::vmove(t.getvector(1, n-i-1), a.getcolumn(i, i+1, n-1)); symmetricmatrixvectormultiply(a, isupper, i+1, n-1, t, taui, t2); ap::vmove(&tau(i), &t2(1), ap::vlen(i,n-2)); // // Compute w := x - 1/2 * tau * (x'*v) * v // v = ap::vdotproduct(tau.getvector(i, n-2), a.getcolumn(i, i+1, n-1)); alpha = -0.5*taui*v; ap::vadd(tau.getvector(i, n-2), a.getcolumn(i, i+1, n-1), alpha); // // Apply the transformation as a rank-2 update: // A := A - v * w' - w * v' // // ap::vmove(t.getvector(1, n-i-1), a.getcolumn(i, i+1, n-1)); ap::vmove(&t2(1), &tau(i), ap::vlen(1,n-i-1)); symmetricrank2update(a, isupper, i+1, n-1, t, t2, t3, double(-1)); a(i+1,i) = e(i); } d(i) = a(i,i); tau(i) = taui; } d(n-1) = a(n-1,n-1); } }
/************************************************************************* Solving a system of linear equations with a system matrix given by its Cholesky decomposition. The algorithm solves systems with a square matrix only. Input parameters: A - Cholesky decomposition of a system matrix (the result of the SMatrixCholesky subroutine). B - right side of a system. Array whose index ranges within [0..N-1]. N - size of matrix A. IsUpper - points to the triangle of matrix A in which the Cholesky decomposition is stored. If IsUpper=True, the Cholesky decomposition has the form of U'*U, and the upper triangle of matrix A stores matrix U (in that case, the lower triangle isn’t used and isn’t changed by the subroutine) Similarly, if IsUpper = False, the Cholesky decomposition has the form of L*L', and the lower triangle stores matrix L. Output parameters: X - solution of a system. Array whose index ranges within [0..N-1]. Result: True, if the system is not singular. X contains the solution. False, if the system is singular (there is a zero element on the main diagonal). In this case, X doesn't contain a solution. -- ALGLIB -- Copyright 2005-2008 by Bochkanov Sergey *************************************************************************/ bool spdmatrixcholeskysolve(const ap::real_2d_array& a, ap::real_1d_array b, int n, bool isupper, ap::real_1d_array& x) { bool result; int i; double v; ap::ap_error::make_assertion(n>0, "Error: N<=0 in SolveSystemCholesky"); // // det(A)=0? // result = true; for(i = 0; i <= n-1; i++) { if( ap::fp_eq(a(i,i),0) ) { result = false; return result; } } // // det(A)<>0 // x.setbounds(0, n-1); if( isupper ) { // // A = U'*U, solve U'*y = b first // b(0) = b(0)/a(0,0); for(i = 1; i <= n-1; i++) { v = ap::vdotproduct(a.getcolumn(i, 0, i-1), b.getvector(0, i-1)); b(i) = (b(i)-v)/a(i,i); } // // Solve U*x = y // b(n-1) = b(n-1)/a(n-1,n-1); for(i = n-2; i >= 0; i--) { v = ap::vdotproduct(&a(i, i+1), &b(i+1), ap::vlen(i+1,n-1)); b(i) = (b(i)-v)/a(i,i); } ap::vmove(&x(0), &b(0), ap::vlen(0,n-1)); } else { // // A = L*L', solve L'*y = b first // b(0) = b(0)/a(0,0); for(i = 1; i <= n-1; i++) { v = ap::vdotproduct(&a(i, 0), &b(0), ap::vlen(0,i-1)); b(i) = (b(i)-v)/a(i,i); } // // Solve L'*x = y // b(n-1) = b(n-1)/a(n-1,n-1); for(i = n-2; i >= 0; i--) { v = ap::vdotproduct(a.getcolumn(i, i+1, n-1), b.getvector(i+1, n-1)); b(i) = (b(i)-v)/a(i,i); } ap::vmove(&x(0), &b(0), ap::vlen(0,n-1)); } return result; }
bool solvesystemcholesky(const ap::real_2d_array& a, ap::real_1d_array b, int n, bool isupper, ap::real_1d_array& x) { bool result; int i; int im1; int ip1; double v; ap::ap_error::make_assertion(n>0, "Error: N<=0 in SolveSystemCholesky"); // // det(A)=0? // result = true; for(i = 1; i <= n; i++) { if( ap::fp_eq(a(i,i),0) ) { result = false; return result; } } // // det(A)<>0 // x.setbounds(1, n); if( isupper ) { // // A = U'*U, solve U'*y = b first // b(1) = b(1)/a(1,1); for(i = 2; i <= n; i++) { im1 = i-1; v = ap::vdotproduct(a.getcolumn(i, 1, im1), b.getvector(1, im1)); b(i) = (b(i)-v)/a(i,i); } // // Solve U*x = y // b(n) = b(n)/a(n,n); for(i = n-1; i >= 1; i--) { ip1 = i+1; v = ap::vdotproduct(&a(i, ip1), &b(ip1), ap::vlen(ip1,n)); b(i) = (b(i)-v)/a(i,i); } ap::vmove(&x(1), &b(1), ap::vlen(1,n)); } else { // // A = L*L', solve L'*y = b first // b(1) = b(1)/a(1,1); for(i = 2; i <= n; i++) { im1 = i-1; v = ap::vdotproduct(&a(i, 1), &b(1), ap::vlen(1,im1)); b(i) = (b(i)-v)/a(i,i); } // // Solve L'*x = y // b(n) = b(n)/a(n,n); for(i = n-1; i >= 1; i--) { ip1 = i+1; v = ap::vdotproduct(a.getcolumn(i, ip1, n), b.getvector(ip1, n)); b(i) = (b(i)-v)/a(i,i); } ap::vmove(&x(1), &b(1), ap::vlen(1,n)); } return result; }
/************************************************************************* Obsolete 1-based subroutine. See RMatrixTRSafeSolve for 0-based replacement. *************************************************************************/ void safesolvetriangular(const ap::real_2d_array& a, int n, ap::real_1d_array& x, double& s, bool isupper, bool istrans, bool isunit, bool normin, ap::real_1d_array& cnorm) { int i; int imax; int j; int jfirst; int jinc; int jlast; int jm1; int jp1; int ip1; int im1; int k; int flg; double v; double vd; double bignum; double grow; double rec; double smlnum; double sumj; double tjj; double tjjs; double tmax; double tscal; double uscal; double xbnd; double xj; double xmax; bool notran; bool upper; bool nounit; upper = isupper; notran = !istrans; nounit = !isunit; // // Quick return if possible // if( n==0 ) { return; } // // Determine machine dependent parameters to control overflow. // smlnum = ap::minrealnumber/(ap::machineepsilon*2); bignum = 1/smlnum; s = 1; if( !normin ) { cnorm.setbounds(1, n); // // Compute the 1-norm of each column, not including the diagonal. // if( upper ) { // // A is upper triangular. // for(j = 1; j <= n; j++) { v = 0; for(k = 1; k <= j-1; k++) { v = v+fabs(a(k,j)); } cnorm(j) = v; } } else { // // A is lower triangular. // for(j = 1; j <= n-1; j++) { v = 0; for(k = j+1; k <= n; k++) { v = v+fabs(a(k,j)); } cnorm(j) = v; } cnorm(n) = 0; } } // // Scale the column norms by TSCAL if the maximum element in CNORM is // greater than BIGNUM. // imax = 1; for(k = 2; k <= n; k++) { if( ap::fp_greater(cnorm(k),cnorm(imax)) ) { imax = k; } } tmax = cnorm(imax); if( ap::fp_less_eq(tmax,bignum) ) { tscal = 1; } else { tscal = 1/(smlnum*tmax); ap::vmul(&cnorm(1), ap::vlen(1,n), tscal); } // // Compute a bound on the computed solution vector to see if the // Level 2 BLAS routine DTRSV can be used. // j = 1; for(k = 2; k <= n; k++) { if( ap::fp_greater(fabs(x(k)),fabs(x(j))) ) { j = k; } } xmax = fabs(x(j)); xbnd = xmax; if( notran ) { // // Compute the growth in A * x = b. // if( upper ) { jfirst = n; jlast = 1; jinc = -1; } else { jfirst = 1; jlast = n; jinc = 1; } if( ap::fp_neq(tscal,1) ) { grow = 0; } else { if( nounit ) { // // A is non-unit triangular. // // Compute GROW = 1/G(j) and XBND = 1/M(j). // Initially, G(0) = max{x(i), i=1,...,n}. // grow = 1/ap::maxreal(xbnd, smlnum); xbnd = grow; j = jfirst; while(jinc>0&&j<=jlast||jinc<0&&j>=jlast) { // // Exit the loop if the growth factor is too small. // if( ap::fp_less_eq(grow,smlnum) ) { break; } // // M(j) = G(j-1) / abs(A(j,j)) // tjj = fabs(a(j,j)); xbnd = ap::minreal(xbnd, ap::minreal(double(1), tjj)*grow); if( ap::fp_greater_eq(tjj+cnorm(j),smlnum) ) { // // G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) // grow = grow*(tjj/(tjj+cnorm(j))); } else { // // G(j) could overflow, set GROW to 0. // grow = 0; } if( j==jlast ) { grow = xbnd; } j = j+jinc; } } else { // // A is unit triangular. // // Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. // grow = ap::minreal(double(1), 1/ap::maxreal(xbnd, smlnum)); j = jfirst; while(jinc>0&&j<=jlast||jinc<0&&j>=jlast) { // // Exit the loop if the growth factor is too small. // if( ap::fp_less_eq(grow,smlnum) ) { break; } // // G(j) = G(j-1)*( 1 + CNORM(j) ) // grow = grow*(1/(1+cnorm(j))); j = j+jinc; } } } } else { // // Compute the growth in A' * x = b. // if( upper ) { jfirst = 1; jlast = n; jinc = 1; } else { jfirst = n; jlast = 1; jinc = -1; } if( ap::fp_neq(tscal,1) ) { grow = 0; } else { if( nounit ) { // // A is non-unit triangular. // // Compute GROW = 1/G(j) and XBND = 1/M(j). // Initially, M(0) = max{x(i), i=1,...,n}. // grow = 1/ap::maxreal(xbnd, smlnum); xbnd = grow; j = jfirst; while(jinc>0&&j<=jlast||jinc<0&&j>=jlast) { // // Exit the loop if the growth factor is too small. // if( ap::fp_less_eq(grow,smlnum) ) { break; } // // G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) // xj = 1+cnorm(j); grow = ap::minreal(grow, xbnd/xj); // // M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) // tjj = fabs(a(j,j)); if( ap::fp_greater(xj,tjj) ) { xbnd = xbnd*(tjj/xj); } if( j==jlast ) { grow = ap::minreal(grow, xbnd); } j = j+jinc; } } else { // // A is unit triangular. // // Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. // grow = ap::minreal(double(1), 1/ap::maxreal(xbnd, smlnum)); j = jfirst; while(jinc>0&&j<=jlast||jinc<0&&j>=jlast) { // // Exit the loop if the growth factor is too small. // if( ap::fp_less_eq(grow,smlnum) ) { break; } // // G(j) = ( 1 + CNORM(j) )*G(j-1) // xj = 1+cnorm(j); grow = grow/xj; j = j+jinc; } } } } if( ap::fp_greater(grow*tscal,smlnum) ) { // // Use the Level 2 BLAS solve if the reciprocal of the bound on // elements of X is not too small. // if( upper&¬ran||!upper&&!notran ) { if( nounit ) { vd = a(n,n); } else { vd = 1; } x(n) = x(n)/vd; for(i = n-1; i >= 1; i--) { ip1 = i+1; if( upper ) { v = ap::vdotproduct(&a(i, ip1), &x(ip1), ap::vlen(ip1,n)); } else { v = ap::vdotproduct(a.getcolumn(i, ip1, n), x.getvector(ip1, n)); } if( nounit ) { vd = a(i,i); } else { vd = 1; } x(i) = (x(i)-v)/vd; } } else { if( nounit ) { vd = a(1,1); } else { vd = 1; } x(1) = x(1)/vd; for(i = 2; i <= n; i++) { im1 = i-1; if( upper ) { v = ap::vdotproduct(a.getcolumn(i, 1, im1), x.getvector(1, im1)); } else { v = ap::vdotproduct(&a(i, 1), &x(1), ap::vlen(1,im1)); } if( nounit ) { vd = a(i,i); } else { vd = 1; } x(i) = (x(i)-v)/vd; } } } else { // // Use a Level 1 BLAS solve, scaling intermediate results. // if( ap::fp_greater(xmax,bignum) ) { // // Scale X so that its components are less than or equal to // BIGNUM in absolute value. // s = bignum/xmax; ap::vmul(&x(1), ap::vlen(1,n), s); xmax = bignum; } if( notran ) { // // Solve A * x = b // j = jfirst; while(jinc>0&&j<=jlast||jinc<0&&j>=jlast) { // // Compute x(j) = b(j) / A(j,j), scaling x if necessary. // xj = fabs(x(j)); flg = 0; if( nounit ) { tjjs = a(j,j)*tscal; } else { tjjs = tscal; if( ap::fp_eq(tscal,1) ) { flg = 100; } } if( flg!=100 ) { tjj = fabs(tjjs); if( ap::fp_greater(tjj,smlnum) ) { // // abs(A(j,j)) > SMLNUM: // if( ap::fp_less(tjj,1) ) { if( ap::fp_greater(xj,tjj*bignum) ) { // // Scale x by 1/b(j). // rec = 1/xj; ap::vmul(&x(1), ap::vlen(1,n), rec); s = s*rec; xmax = xmax*rec; } } x(j) = x(j)/tjjs; xj = fabs(x(j)); } else { if( ap::fp_greater(tjj,0) ) { // // 0 < abs(A(j,j)) <= SMLNUM: // if( ap::fp_greater(xj,tjj*bignum) ) { // // Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM // to avoid overflow when dividing by A(j,j). // rec = tjj*bignum/xj; if( ap::fp_greater(cnorm(j),1) ) { // // Scale by 1/CNORM(j) to avoid overflow when // multiplying x(j) times column j. // rec = rec/cnorm(j); } ap::vmul(&x(1), ap::vlen(1,n), rec); s = s*rec; xmax = xmax*rec; } x(j) = x(j)/tjjs; xj = fabs(x(j)); } else { // // A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and // scale = 0, and compute a solution to A*x = 0. // for(i = 1; i <= n; i++) { x(i) = 0; } x(j) = 1; xj = 1; s = 0; xmax = 0; } } } // // Scale x if necessary to avoid overflow when adding a // multiple of column j of A. // if( ap::fp_greater(xj,1) ) { rec = 1/xj; if( ap::fp_greater(cnorm(j),(bignum-xmax)*rec) ) { // // Scale x by 1/(2*abs(x(j))). // rec = rec*0.5; ap::vmul(&x(1), ap::vlen(1,n), rec); s = s*rec; } } else { if( ap::fp_greater(xj*cnorm(j),bignum-xmax) ) { // // Scale x by 1/2. // ap::vmul(&x(1), ap::vlen(1,n), 0.5); s = s*0.5; } } if( upper ) { if( j>1 ) { // // Compute the update // x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) // v = x(j)*tscal; jm1 = j-1; ap::vsub(x.getvector(1, jm1), a.getcolumn(j, 1, jm1), v); i = 1; for(k = 2; k <= j-1; k++) { if( ap::fp_greater(fabs(x(k)),fabs(x(i))) ) { i = k; } } xmax = fabs(x(i)); } } else { if( j<n ) { // // Compute the update // x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) // jp1 = j+1; v = x(j)*tscal; ap::vsub(x.getvector(jp1, n), a.getcolumn(j, jp1, n), v); i = j+1; for(k = j+2; k <= n; k++) { if( ap::fp_greater(fabs(x(k)),fabs(x(i))) ) { i = k; } } xmax = fabs(x(i)); } } j = j+jinc; } } else { // // Solve A' * x = b // j = jfirst; while(jinc>0&&j<=jlast||jinc<0&&j>=jlast) { // // Compute x(j) = b(j) - sum A(k,j)*x(k). // k<>j // xj = fabs(x(j)); uscal = tscal; rec = 1/ap::maxreal(xmax, double(1)); if( ap::fp_greater(cnorm(j),(bignum-xj)*rec) ) { // // If x(j) could overflow, scale x by 1/(2*XMAX). // rec = rec*0.5; if( nounit ) { tjjs = a(j,j)*tscal; } else { tjjs = tscal; } tjj = fabs(tjjs); if( ap::fp_greater(tjj,1) ) { // // Divide by A(j,j) when scaling x if A(j,j) > 1. // rec = ap::minreal(double(1), rec*tjj); uscal = uscal/tjjs; } if( ap::fp_less(rec,1) ) { ap::vmul(&x(1), ap::vlen(1,n), rec); s = s*rec; xmax = xmax*rec; } } sumj = 0; if( ap::fp_eq(uscal,1) ) { // // If the scaling needed for A in the dot product is 1, // call DDOT to perform the dot product. // if( upper ) { if( j>1 ) { jm1 = j-1; sumj = ap::vdotproduct(a.getcolumn(j, 1, jm1), x.getvector(1, jm1)); } else { sumj = 0; } } else { if( j<n ) { jp1 = j+1; sumj = ap::vdotproduct(a.getcolumn(j, jp1, n), x.getvector(jp1, n)); } } } else { // // Otherwise, use in-line code for the dot product. // if( upper ) { for(i = 1; i <= j-1; i++) { v = a(i,j)*uscal; sumj = sumj+v*x(i); } } else { if( j<n ) { for(i = j+1; i <= n; i++) { v = a(i,j)*uscal; sumj = sumj+v*x(i); } } } } if( ap::fp_eq(uscal,tscal) ) { // // Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) // was not used to scale the dotproduct. // x(j) = x(j)-sumj; xj = fabs(x(j)); flg = 0; if( nounit ) { tjjs = a(j,j)*tscal; } else { tjjs = tscal; if( ap::fp_eq(tscal,1) ) { flg = 150; } } // // Compute x(j) = x(j) / A(j,j), scaling if necessary. // if( flg!=150 ) { tjj = fabs(tjjs); if( ap::fp_greater(tjj,smlnum) ) { // // abs(A(j,j)) > SMLNUM: // if( ap::fp_less(tjj,1) ) { if( ap::fp_greater(xj,tjj*bignum) ) { // // Scale X by 1/abs(x(j)). // rec = 1/xj; ap::vmul(&x(1), ap::vlen(1,n), rec); s = s*rec; xmax = xmax*rec; } } x(j) = x(j)/tjjs; } else { if( ap::fp_greater(tjj,0) ) { // // 0 < abs(A(j,j)) <= SMLNUM: // if( ap::fp_greater(xj,tjj*bignum) ) { // // Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. // rec = tjj*bignum/xj; ap::vmul(&x(1), ap::vlen(1,n), rec); s = s*rec; xmax = xmax*rec; } x(j) = x(j)/tjjs; } else { // // A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and // scale = 0, and compute a solution to A'*x = 0. // for(i = 1; i <= n; i++) { x(i) = 0; } x(j) = 1; s = 0; xmax = 0; } } } } else { // // Compute x(j) := x(j) / A(j,j) - sumj if the dot // product has already been divided by 1/A(j,j). // x(j) = x(j)/tjjs-sumj; } xmax = ap::maxreal(xmax, fabs(x(j))); j = j+jinc; } } s = s/tscal; } // // Scale the column norms by 1/TSCAL for return. // if( ap::fp_neq(tscal,1) ) { v = 1/tscal; ap::vmul(&cnorm(1), ap::vlen(1,n), v); } }