bool testblas(bool silent) { bool result; int pass; int passcount; int n; int i; int i1; int i2; int j; int j1; int j2; int l; int k; int r; int i3; int j3; int col1; int col2; int row1; int row2; ap::real_1d_array x1; ap::real_1d_array x2; ap::real_2d_array a; ap::real_2d_array b; ap::real_2d_array c1; ap::real_2d_array c2; double err; double e1; double e2; double e3; double v; double scl1; double scl2; double scl3; bool was1; bool was2; bool trans1; bool trans2; double threshold; bool n2errors; bool hsnerrors; bool amaxerrors; bool mverrors; bool iterrors; bool cterrors; bool mmerrors; bool waserrors; n2errors = false; amaxerrors = false; hsnerrors = false; mverrors = false; iterrors = false; cterrors = false; mmerrors = false; waserrors = false; threshold = 10000*ap::machineepsilon; // // Test Norm2 // passcount = 1000; e1 = 0; e2 = 0; e3 = 0; scl2 = 0.5*ap::maxrealnumber; scl3 = 2*ap::minrealnumber; for(pass = 1; pass <= passcount; pass++) { n = 1+ap::randominteger(1000); i1 = ap::randominteger(10); i2 = n+i1-1; x1.setbounds(i1, i2); x2.setbounds(i1, i2); for(i = i1; i <= i2; i++) { x1(i) = 2*ap::randomreal()-1; } v = 0; for(i = i1; i <= i2; i++) { v = v+ap::sqr(x1(i)); } v = sqrt(v); e1 = ap::maxreal(e1, fabs(v-vectornorm2(x1, i1, i2))); for(i = i1; i <= i2; i++) { x2(i) = scl2*x1(i); } e2 = ap::maxreal(e2, fabs(v*scl2-vectornorm2(x2, i1, i2))); for(i = i1; i <= i2; i++) { x2(i) = scl3*x1(i); } e3 = ap::maxreal(e3, fabs(v*scl3-vectornorm2(x2, i1, i2))); } e2 = e2/scl2; e3 = e3/scl3; n2errors = ap::fp_greater_eq(e1,threshold)||ap::fp_greater_eq(e2,threshold)||ap::fp_greater_eq(e3,threshold); // // Testing VectorAbsMax, Column/Row AbsMax // x1.setbounds(1, 5); x1(1) = 2.0; x1(2) = 0.2; x1(3) = -1.3; x1(4) = 0.7; x1(5) = -3.0; amaxerrors = vectoridxabsmax(x1, 1, 5)!=5||vectoridxabsmax(x1, 1, 4)!=1||vectoridxabsmax(x1, 2, 4)!=3; n = 30; x1.setbounds(1, n); a.setbounds(1, n, 1, n); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { a(i,j) = 2*ap::randomreal()-1; } } was1 = false; was2 = false; for(pass = 1; pass <= 1000; pass++) { j = 1+ap::randominteger(n); i1 = 1+ap::randominteger(n); i2 = i1+ap::randominteger(n+1-i1); ap::vmove(x1.getvector(i1, i2), a.getcolumn(j, i1, i2)); if( vectoridxabsmax(x1, i1, i2)!=columnidxabsmax(a, i1, i2, j) ) { was1 = true; } i = 1+ap::randominteger(n); j1 = 1+ap::randominteger(n); j2 = j1+ap::randominteger(n+1-j1); ap::vmove(&x1(j1), &a(i, j1), ap::vlen(j1,j2)); if( vectoridxabsmax(x1, j1, j2)!=rowidxabsmax(a, j1, j2, i) ) { was2 = true; } } amaxerrors = amaxerrors||was1||was2; // // Testing upper Hessenberg 1-norm // a.setbounds(1, 3, 1, 3); x1.setbounds(1, 3); a(1,1) = 2; a(1,2) = 3; a(1,3) = 1; a(2,1) = 4; a(2,2) = -5; a(2,3) = 8; a(3,1) = 99; a(3,2) = 3; a(3,3) = 1; hsnerrors = ap::fp_greater(fabs(upperhessenberg1norm(a, 1, 3, 1, 3, x1)-11),threshold); // // Testing MatrixVectorMultiply // a.setbounds(2, 3, 3, 5); x1.setbounds(1, 3); x2.setbounds(1, 2); a(2,3) = 2; a(2,4) = -1; a(2,5) = -1; a(3,3) = 1; a(3,4) = -2; a(3,5) = 2; x1(1) = 1; x1(2) = 2; x1(3) = 1; x2(1) = -1; x2(2) = -1; matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 1.0); matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 1.0); e1 = fabs(x1(1)+5)+fabs(x1(2)-8)+fabs(x1(3)+1)+fabs(x2(1)+2)+fabs(x2(2)+2); x1(1) = 1; x1(2) = 2; x1(3) = 1; x2(1) = -1; x2(2) = -1; matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 0.0); matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 0.0); e2 = fabs(x1(1)+3)+fabs(x1(2)-3)+fabs(x1(3)+1)+fabs(x2(1)+1)+fabs(x2(2)+1); mverrors = ap::fp_greater_eq(e1+e2,threshold); // // testing inplace transpose // n = 10; a.setbounds(1, n, 1, n); b.setbounds(1, n, 1, n); x1.setbounds(1, n-1); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { a(i,j) = ap::randomreal(); } } passcount = 10000; was1 = false; for(pass = 1; pass <= passcount; pass++) { i1 = 1+ap::randominteger(n); i2 = i1+ap::randominteger(n-i1+1); j1 = 1+ap::randominteger(n-(i2-i1)); j2 = j1+(i2-i1); copymatrix(a, i1, i2, j1, j2, b, i1, i2, j1, j2); inplacetranspose(b, i1, i2, j1, j2, x1); for(i = i1; i <= i2; i++) { for(j = j1; j <= j2; j++) { if( ap::fp_neq(a(i,j),b(i1+(j-j1),j1+(i-i1))) ) { was1 = true; } } } } iterrors = was1; // // testing copy and transpose // n = 10; a.setbounds(1, n, 1, n); b.setbounds(1, n, 1, n); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { a(i,j) = ap::randomreal(); } } passcount = 10000; was1 = false; for(pass = 1; pass <= passcount; pass++) { i1 = 1+ap::randominteger(n); i2 = i1+ap::randominteger(n-i1+1); j1 = 1+ap::randominteger(n); j2 = j1+ap::randominteger(n-j1+1); copyandtranspose(a, i1, i2, j1, j2, b, j1, j2, i1, i2); for(i = i1; i <= i2; i++) { for(j = j1; j <= j2; j++) { if( ap::fp_neq(a(i,j),b(j,i)) ) { was1 = true; } } } } cterrors = was1; // // Testing MatrixMatrixMultiply // n = 10; a.setbounds(1, 2*n, 1, 2*n); b.setbounds(1, 2*n, 1, 2*n); c1.setbounds(1, 2*n, 1, 2*n); c2.setbounds(1, 2*n, 1, 2*n); x1.setbounds(1, n); x2.setbounds(1, n); for(i = 1; i <= 2*n; i++) { for(j = 1; j <= 2*n; j++) { a(i,j) = ap::randomreal(); b(i,j) = ap::randomreal(); } } passcount = 1000; was1 = false; for(pass = 1; pass <= passcount; pass++) { for(i = 1; i <= 2*n; i++) { for(j = 1; j <= 2*n; j++) { c1(i,j) = 2.1*i+3.1*j; c2(i,j) = c1(i,j); } } l = 1+ap::randominteger(n); k = 1+ap::randominteger(n); r = 1+ap::randominteger(n); i1 = 1+ap::randominteger(n); j1 = 1+ap::randominteger(n); i2 = 1+ap::randominteger(n); j2 = 1+ap::randominteger(n); i3 = 1+ap::randominteger(n); j3 = 1+ap::randominteger(n); trans1 = ap::fp_greater(ap::randomreal(),0.5); trans2 = ap::fp_greater(ap::randomreal(),0.5); if( trans1 ) { col1 = l; row1 = k; } else { col1 = k; row1 = l; } if( trans2 ) { col2 = k; row2 = r; } else { col2 = r; row2 = k; } scl1 = ap::randomreal(); scl2 = ap::randomreal(); matrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c1, i3, i3+l-1, j3, j3+r-1, scl2, x1); naivematrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c2, i3, i3+l-1, j3, j3+r-1, scl2); err = 0; for(i = 1; i <= l; i++) { for(j = 1; j <= r; j++) { err = ap::maxreal(err, fabs(c1(i3+i-1,j3+j-1)-c2(i3+i-1,j3+j-1))); } } if( ap::fp_greater(err,threshold) ) { was1 = true; break; } } mmerrors = was1; // // report // waserrors = n2errors||amaxerrors||hsnerrors||mverrors||iterrors||cterrors||mmerrors; if( !silent ) { printf("TESTING BLAS\n"); printf("VectorNorm2: "); if( n2errors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("AbsMax (vector/row/column): "); if( amaxerrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("UpperHessenberg1Norm: "); if( hsnerrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("MatrixVectorMultiply: "); if( mverrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("InplaceTranspose: "); if( iterrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("CopyAndTranspose: "); if( cterrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("MatrixMatrixMultiply: "); if( mmerrors ) { printf("FAILED\n"); } else { printf("OK\n"); } if( waserrors ) { printf("TEST FAILED\n"); } else { printf("TEST PASSED\n"); } printf("\n\n"); } result = !waserrors; return result; }
/************************************************************************* Singular value decomposition of a rectangular matrix. The algorithm calculates the singular value decomposition of a matrix of size MxN: A = U * S * V^T The algorithm finds the singular values and, optionally, matrices U and V^T. The algorithm can find both first min(M,N) columns of matrix U and rows of matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM and NxN respectively). Take into account that the subroutine does not return matrix V but V^T. Input parameters: A - matrix to be decomposed. Array whose indexes range within [0..M-1, 0..N-1]. M - number of rows in matrix A. N - number of columns in matrix A. UNeeded - 0, 1 or 2. See the description of the parameter U. VTNeeded - 0, 1 or 2. See the description of the parameter VT. AdditionalMemory - If the parameter: * equals 0, the algorithm doesn’t use additional memory (lower requirements, lower performance). * equals 1, the algorithm uses additional memory of size min(M,N)*min(M,N) of real numbers. It often speeds up the algorithm. * equals 2, the algorithm uses additional memory of size M*min(M,N) of real numbers. It allows to get a maximum performance. The recommended value of the parameter is 2. Output parameters: W - contains singular values in descending order. U - if UNeeded=0, U isn't changed, the left singular vectors are not calculated. if Uneeded=1, U contains left singular vectors (first min(M,N) columns of matrix U). Array whose indexes range within [0..M-1, 0..Min(M,N)-1]. if UNeeded=2, U contains matrix U wholly. Array whose indexes range within [0..M-1, 0..M-1]. VT - if VTNeeded=0, VT isn’t changed, the right singular vectors are not calculated. if VTNeeded=1, VT contains right singular vectors (first min(M,N) rows of matrix V^T). Array whose indexes range within [0..min(M,N)-1, 0..N-1]. if VTNeeded=2, VT contains matrix V^T wholly. Array whose indexes range within [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005 by Bochkanov Sergey *************************************************************************/ bool rmatrixsvd(ap::real_2d_array a, int m, int n, int uneeded, int vtneeded, int additionalmemory, ap::real_1d_array& w, ap::real_2d_array& u, ap::real_2d_array& vt) { bool result; ap::real_1d_array tauq; ap::real_1d_array taup; ap::real_1d_array tau; ap::real_1d_array e; ap::real_1d_array work; ap::real_2d_array t2; bool isupper; int minmn; int ncu; int nrvt; int nru; int ncvt; int i; int j; result = true; if( m==0||n==0 ) { return result; } ap::ap_error::make_assertion(uneeded>=0&&uneeded<=2, "SVDDecomposition: wrong parameters!"); ap::ap_error::make_assertion(vtneeded>=0&&vtneeded<=2, "SVDDecomposition: wrong parameters!"); ap::ap_error::make_assertion(additionalmemory>=0&&additionalmemory<=2, "SVDDecomposition: wrong parameters!"); // // initialize // minmn = ap::minint(m, n); w.setbounds(1, minmn); ncu = 0; nru = 0; if( uneeded==1 ) { nru = m; ncu = minmn; u.setbounds(0, nru-1, 0, ncu-1); } if( uneeded==2 ) { nru = m; ncu = m; u.setbounds(0, nru-1, 0, ncu-1); } nrvt = 0; ncvt = 0; if( vtneeded==1 ) { nrvt = minmn; ncvt = n; vt.setbounds(0, nrvt-1, 0, ncvt-1); } if( vtneeded==2 ) { nrvt = n; ncvt = n; vt.setbounds(0, nrvt-1, 0, ncvt-1); } // // M much larger than N // Use bidiagonal reduction with QR-decomposition // if( ap::fp_greater(m,1.6*n) ) { if( uneeded==0 ) { // // No left singular vectors to be computed // rmatrixqr(a, m, n, tau); for(i = 0; i <= n-1; i++) { for(j = 0; j <= i-1; j++) { a(i,j) = 0; } } rmatrixbd(a, n, n, tauq, taup); rmatrixbdunpackpt(a, n, n, taup, nrvt, vt); rmatrixbdunpackdiagonals(a, n, n, isupper, w, e); result = rmatrixbdsvd(w, e, n, isupper, false, u, 0, a, 0, vt, ncvt); return result; } else { // // Left singular vectors (may be full matrix U) to be computed // rmatrixqr(a, m, n, tau); rmatrixqrunpackq(a, m, n, tau, ncu, u); for(i = 0; i <= n-1; i++) { for(j = 0; j <= i-1; j++) { a(i,j) = 0; } } rmatrixbd(a, n, n, tauq, taup); rmatrixbdunpackpt(a, n, n, taup, nrvt, vt); rmatrixbdunpackdiagonals(a, n, n, isupper, w, e); if( additionalmemory<1 ) { // // No additional memory can be used // rmatrixbdmultiplybyq(a, n, n, tauq, u, m, n, true, false); result = rmatrixbdsvd(w, e, n, isupper, false, u, m, a, 0, vt, ncvt); } else { // // Large U. Transforming intermediate matrix T2 // work.setbounds(1, ap::maxint(m, n)); rmatrixbdunpackq(a, n, n, tauq, n, t2); copymatrix(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1); inplacetranspose(t2, 0, n-1, 0, n-1, work); result = rmatrixbdsvd(w, e, n, isupper, false, u, 0, t2, n, vt, ncvt); matrixmatrixmultiply(a, 0, m-1, 0, n-1, false, t2, 0, n-1, 0, n-1, true, 1.0, u, 0, m-1, 0, n-1, 0.0, work); } return result; } } // // N much larger than M // Use bidiagonal reduction with LQ-decomposition // if( ap::fp_greater(n,1.6*m) ) { if( vtneeded==0 ) { // // No right singular vectors to be computed // rmatrixlq(a, m, n, tau); for(i = 0; i <= m-1; i++) { for(j = i+1; j <= m-1; j++) { a(i,j) = 0; } } rmatrixbd(a, m, m, tauq, taup); rmatrixbdunpackq(a, m, m, tauq, ncu, u); rmatrixbdunpackdiagonals(a, m, m, isupper, w, e); work.setbounds(1, m); inplacetranspose(u, 0, nru-1, 0, ncu-1, work); result = rmatrixbdsvd(w, e, m, isupper, false, a, 0, u, nru, vt, 0); inplacetranspose(u, 0, nru-1, 0, ncu-1, work); return result; } else { // // Right singular vectors (may be full matrix VT) to be computed // rmatrixlq(a, m, n, tau); rmatrixlqunpackq(a, m, n, tau, nrvt, vt); for(i = 0; i <= m-1; i++) { for(j = i+1; j <= m-1; j++) { a(i,j) = 0; } } rmatrixbd(a, m, m, tauq, taup); rmatrixbdunpackq(a, m, m, tauq, ncu, u); rmatrixbdunpackdiagonals(a, m, m, isupper, w, e); work.setbounds(1, ap::maxint(m, n)); inplacetranspose(u, 0, nru-1, 0, ncu-1, work); if( additionalmemory<1 ) { // // No additional memory available // rmatrixbdmultiplybyp(a, m, m, taup, vt, m, n, false, true); result = rmatrixbdsvd(w, e, m, isupper, false, a, 0, u, nru, vt, n); } else { // // Large VT. Transforming intermediate matrix T2 // rmatrixbdunpackpt(a, m, m, taup, m, t2); result = rmatrixbdsvd(w, e, m, isupper, false, a, 0, u, nru, t2, m); copymatrix(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1); matrixmatrixmultiply(t2, 0, m-1, 0, m-1, false, a, 0, m-1, 0, n-1, false, 1.0, vt, 0, m-1, 0, n-1, 0.0, work); } inplacetranspose(u, 0, nru-1, 0, ncu-1, work); return result; } } // // M<=N // We can use inplace transposition of U to get rid of columnwise operations // if( m<=n ) { rmatrixbd(a, m, n, tauq, taup); rmatrixbdunpackq(a, m, n, tauq, ncu, u); rmatrixbdunpackpt(a, m, n, taup, nrvt, vt); rmatrixbdunpackdiagonals(a, m, n, isupper, w, e); work.setbounds(1, m); inplacetranspose(u, 0, nru-1, 0, ncu-1, work); result = rmatrixbdsvd(w, e, minmn, isupper, false, a, 0, u, nru, vt, ncvt); inplacetranspose(u, 0, nru-1, 0, ncu-1, work); return result; } // // Simple bidiagonal reduction // rmatrixbd(a, m, n, tauq, taup); rmatrixbdunpackq(a, m, n, tauq, ncu, u); rmatrixbdunpackpt(a, m, n, taup, nrvt, vt); rmatrixbdunpackdiagonals(a, m, n, isupper, w, e); if( additionalmemory<2||uneeded==0 ) { // // We cant use additional memory or there is no need in such operations // result = rmatrixbdsvd(w, e, minmn, isupper, false, u, nru, a, 0, vt, ncvt); } else { // // We can use additional memory // t2.setbounds(0, minmn-1, 0, m-1); copyandtranspose(u, 0, m-1, 0, minmn-1, t2, 0, minmn-1, 0, m-1); result = rmatrixbdsvd(w, e, minmn, isupper, false, u, 0, t2, m, vt, ncvt); copyandtranspose(t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1); } return result; }