void internalschurdecomposition(ap::real_2d_array& h, int n, int tneeded, int zneeded, ap::real_1d_array& wr, ap::real_1d_array& wi, ap::real_2d_array& z, int& info) { ap::real_1d_array work; int i; int i1; int i2; int ierr; int ii; int itemp; int itn; int its; int j; int k; int l; int maxb; int nr; int ns; int nv; double absw; double ovfl; double smlnum; double tau; double temp; double tst1; double ulp; double unfl; ap::real_2d_array s; ap::real_1d_array v; ap::real_1d_array vv; ap::real_1d_array workc1; ap::real_1d_array works1; ap::real_1d_array workv3; ap::real_1d_array tmpwr; ap::real_1d_array tmpwi; bool initz; bool wantt; bool wantz; double cnst; bool failflag; int p1; int p2; int p3; int p4; double vt; // // Set the order of the multi-shift QR algorithm to be used. // If you want to tune algorithm, change this values // ns = 12; maxb = 50; // // Now 2 < NS <= MAXB < NH. // maxb = ap::maxint(3, maxb); ns = ap::minint(maxb, ns); // // Initialize // cnst = 1.5; work.setbounds(1, ap::maxint(n, 1)); s.setbounds(1, ns, 1, ns); v.setbounds(1, ns+1); vv.setbounds(1, ns+1); wr.setbounds(1, ap::maxint(n, 1)); wi.setbounds(1, ap::maxint(n, 1)); workc1.setbounds(1, 1); works1.setbounds(1, 1); workv3.setbounds(1, 3); tmpwr.setbounds(1, ap::maxint(n, 1)); tmpwi.setbounds(1, ap::maxint(n, 1)); ap::ap_error::make_assertion(n>=0, "InternalSchurDecomposition: incorrect N!"); ap::ap_error::make_assertion(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!"); ap::ap_error::make_assertion(zneeded==0||zneeded==1||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!"); wantt = tneeded==1; initz = zneeded==2; wantz = zneeded!=0; info = 0; // // Initialize Z, if necessary // if( initz ) { z.setbounds(1, n, 1, n); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { if( i==j ) { z(i,j) = 1; } else { z(i,j) = 0; } } } } // // Quick return if possible // if( n==0 ) { return; } if( n==1 ) { wr(1) = h(1,1); wi(1) = 0; return; } // // Set rows and columns 1 to N to zero below the first // subdiagonal. // for(j = 1; j <= n-2; j++) { for(i = j+2; i <= n; i++) { h(i,j) = 0; } } // // Test if N is sufficiently small // if( ns<=2||ns>n||maxb>=n ) { // // Use the standard double-shift algorithm // internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, work, workv3, workc1, works1, info); // // fill entries under diagonal blocks of T with zeros // if( wantt ) { j = 1; while(j<=n) { if( wi(j)==0 ) { for(i = j+1; i <= n; i++) { h(i,j) = 0; } j = j+1; } else { for(i = j+2; i <= n; i++) { h(i,j) = 0; h(i,j+1) = 0; } j = j+2; } } } return; } unfl = ap::minrealnumber; ovfl = 1/unfl; ulp = 2*ap::machineepsilon; smlnum = unfl*(n/ulp); // // I1 and I2 are the indices of the first row and last column of H // to which transformations must be applied. If eigenvalues only are // being computed, I1 and I2 are set inside the main loop. // if( wantt ) { i1 = 1; i2 = n; } // // ITN is the total number of multiple-shift QR iterations allowed. // itn = 30*n; // // The main loop begins here. I is the loop index and decreases from // IHI to ILO in steps of at most MAXB. Each iteration of the loop // works with the active submatrix in rows and columns L to I. // Eigenvalues I+1 to IHI have already converged. Either L = ILO or // H(L,L-1) is negligible so that the matrix splits. // i = n; while(true) { l = 1; if( i<1 ) { // // fill entries under diagonal blocks of T with zeros // if( wantt ) { j = 1; while(j<=n) { if( wi(j)==0 ) { for(i = j+1; i <= n; i++) { h(i,j) = 0; } j = j+1; } else { for(i = j+2; i <= n; i++) { h(i,j) = 0; h(i,j+1) = 0; } j = j+2; } } } // // Exit // return; } // // Perform multiple-shift QR iterations on rows and columns ILO to I // until a submatrix of order at most MAXB splits off at the bottom // because a subdiagonal element has become negligible. // failflag = true; for(its = 0; its <= itn; its++) { // // Look for a single small subdiagonal element. // for(k = i; k >= l+1; k--) { tst1 = fabs(h(k-1,k-1))+fabs(h(k,k)); if( tst1==0 ) { tst1 = upperhessenberg1norm(h, l, i, l, i, work); } if( fabs(h(k,k-1))<=ap::maxreal(ulp*tst1, smlnum) ) { break; } } l = k; if( l>1 ) { // // H(L,L-1) is negligible. // h(l,l-1) = 0; } // // Exit from loop if a submatrix of order <= MAXB has split off. // if( l>=i-maxb+1 ) { failflag = false; break; } // // Now the active submatrix is in rows and columns L to I. If // eigenvalues only are being computed, only the active submatrix // need be transformed. // if( !wantt ) { i1 = l; i2 = i; } if( its==20||its==30 ) { // // Exceptional shifts. // for(ii = i-ns+1; ii <= i; ii++) { wr(ii) = cnst*(fabs(h(ii,ii-1))+fabs(h(ii,ii))); wi(ii) = 0; } } else { // // Use eigenvalues of trailing submatrix of order NS as shifts. // copymatrix(h, i-ns+1, i, i-ns+1, i, s, 1, ns, 1, ns); internalauxschur(false, false, ns, 1, ns, s, tmpwr, tmpwi, 1, ns, z, work, workv3, workc1, works1, ierr); for(p1 = 1; p1 <= ns; p1++) { wr(i-ns+p1) = tmpwr(p1); wi(i-ns+p1) = tmpwi(p1); } if( ierr>0 ) { // // If DLAHQR failed to compute all NS eigenvalues, use the // unconverged diagonal elements as the remaining shifts. // for(ii = 1; ii <= ierr; ii++) { wr(i-ns+ii) = s(ii,ii); wi(i-ns+ii) = 0; } } } // // Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) // where G is the Hessenberg submatrix H(L:I,L:I) and w is // the vector of shifts (stored in WR and WI). The result is // stored in the local array V. // v(1) = 1; for(ii = 2; ii <= ns+1; ii++) { v(ii) = 0; } nv = 1; for(j = i-ns+1; j <= i; j++) { if( wi(j)>=0 ) { if( wi(j)==0 ) { // // real shift // p1 = nv+1; ap::vmove(&vv(1), &v(1), ap::vlen(1,p1)); matrixvectormultiply(h, l, l+nv, l, l+nv-1, false, vv, 1, nv, 1.0, v, 1, nv+1, -wr(j)); nv = nv+1; } else { if( wi(j)>0 ) { // // complex conjugate pair of shifts // p1 = nv+1; ap::vmove(&vv(1), &v(1), ap::vlen(1,p1)); matrixvectormultiply(h, l, l+nv, l, l+nv-1, false, v, 1, nv, 1.0, vv, 1, nv+1, -2*wr(j)); itemp = vectoridxabsmax(vv, 1, nv+1); temp = 1/ap::maxreal(fabs(vv(itemp)), smlnum); p1 = nv+1; ap::vmul(&vv(1), ap::vlen(1,p1), temp); absw = pythag2(wr(j), wi(j)); temp = temp*absw*absw; matrixvectormultiply(h, l, l+nv+1, l, l+nv, false, vv, 1, nv+1, 1.0, v, 1, nv+2, temp); nv = nv+2; } } // // Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, // reset it to the unit vector. // itemp = vectoridxabsmax(v, 1, nv); temp = fabs(v(itemp)); if( temp==0 ) { v(1) = 1; for(ii = 2; ii <= nv; ii++) { v(ii) = 0; } } else { temp = ap::maxreal(temp, smlnum); vt = 1/temp; ap::vmul(&v(1), ap::vlen(1,nv), vt); } } } // // Multiple-shift QR step // for(k = l; k <= i-1; k++) { // // The first iteration of this loop determines a reflection G // from the vector V and applies it from left and right to H, // thus creating a nonzero bulge below the subdiagonal. // // Each subsequent iteration determines a reflection G to // restore the Hessenberg form in the (K-1)th column, and thus // chases the bulge one step toward the bottom of the active // submatrix. NR is the order of G. // nr = ap::minint(ns+1, i-k+1); if( k>l ) { p1 = k-1; p2 = k+nr-1; ap::vmove(v.getvector(1, nr), h.getcolumn(p1, k, p2)); } generatereflection(v, nr, tau); if( k>l ) { h(k,k-1) = v(1); for(ii = k+1; ii <= i; ii++) { h(ii,k-1) = 0; } } v(1) = 1; // // Apply G from the left to transform the rows of the matrix in // columns K to I2. // applyreflectionfromtheleft(h, tau, v, k, k+nr-1, k, i2, work); // // Apply G from the right to transform the columns of the // matrix in rows I1 to min(K+NR,I). // applyreflectionfromtheright(h, tau, v, i1, ap::minint(k+nr, i), k, k+nr-1, work); if( wantz ) { // // Accumulate transformations in the matrix Z // applyreflectionfromtheright(z, tau, v, 1, n, k, k+nr-1, work); } } } // // Failure to converge in remaining number of iterations // if( failflag ) { info = i; return; } // // A submatrix of order <= MAXB in rows and columns L to I has split // off. Use the double-shift QR algorithm to handle it. // internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, work, workv3, workc1, works1, info); if( info>0 ) { return; } // // Decrement number of remaining iterations, and return to start of // the main loop with a new value of I. // itn = itn-its; i = l-1; } }
static void internalauxschur(bool wantt, bool wantz, int n, int ilo, int ihi, ap::real_2d_array& h, ap::real_1d_array& wr, ap::real_1d_array& wi, int iloz, int ihiz, ap::real_2d_array& z, ap::real_1d_array& work, ap::real_1d_array& workv3, ap::real_1d_array& workc1, ap::real_1d_array& works1, int& info) { int i; int i1; int i2; int itn; int its; int j; int k; int l; int m; int nh; int nr; int nz; double ave; double cs; double disc; double h00; double h10; double h11; double h12; double h21; double h22; double h33; double h33s; double h43h34; double h44; double h44s; double ovfl; double s; double smlnum; double sn; double sum; double t1; double t2; double t3; double tst1; double unfl; double v1; double v2; double v3; bool failflag; double dat1; double dat2; int p1; double him1im1; double him1i; double hiim1; double hii; double wrim1; double wri; double wiim1; double wii; double ulp; info = 0; dat1 = 0.75; dat2 = -0.4375; ulp = ap::machineepsilon; // // Quick return if possible // if( n==0 ) { return; } if( ilo==ihi ) { wr(ilo) = h(ilo,ilo); wi(ilo) = 0; return; } nh = ihi-ilo+1; nz = ihiz-iloz+1; // // Set machine-dependent constants for the stopping criterion. // If norm(H) <= sqrt(OVFL), overflow should not occur. // unfl = ap::minrealnumber; ovfl = 1/unfl; smlnum = unfl*(nh/ulp); // // I1 and I2 are the indices of the first row and last column of H // to which transformations must be applied. If eigenvalues only are // being computed, I1 and I2 are set inside the main loop. // if( wantt ) { i1 = 1; i2 = n; } // // ITN is the total number of QR iterations allowed. // itn = 30*nh; // // The main loop begins here. I is the loop index and decreases from // IHI to ILO in steps of 1 or 2. Each iteration of the loop works // with the active submatrix in rows and columns L to I. // Eigenvalues I+1 to IHI have already converged. Either L = ILO or // H(L,L-1) is negligible so that the matrix splits. // i = ihi; while(true) { l = ilo; if( i<ilo ) { return; } // // Perform QR iterations on rows and columns ILO to I until a // submatrix of order 1 or 2 splits off at the bottom because a // subdiagonal element has become negligible. // failflag = true; for(its = 0; its <= itn; its++) { // // Look for a single small subdiagonal element. // for(k = i; k >= l+1; k--) { tst1 = fabs(h(k-1,k-1))+fabs(h(k,k)); if( tst1==0 ) { tst1 = upperhessenberg1norm(h, l, i, l, i, work); } if( fabs(h(k,k-1))<=ap::maxreal(ulp*tst1, smlnum) ) { break; } } l = k; if( l>ilo ) { // // H(L,L-1) is negligible // h(l,l-1) = 0; } // // Exit from loop if a submatrix of order 1 or 2 has split off. // if( l>=i-1 ) { failflag = false; break; } // // Now the active submatrix is in rows and columns L to I. If // eigenvalues only are being computed, only the active submatrix // need be transformed. // if( !wantt ) { i1 = l; i2 = i; } if( its==10||its==20 ) { // // Exceptional shift. // s = fabs(h(i,i-1))+fabs(h(i-1,i-2)); h44 = dat1*s+h(i,i); h33 = h44; h43h34 = dat2*s*s; } else { // // Prepare to use Francis' double shift // (i.e. 2nd degree generalized Rayleigh quotient) // h44 = h(i,i); h33 = h(i-1,i-1); h43h34 = h(i,i-1)*h(i-1,i); s = h(i-1,i-2)*h(i-1,i-2); disc = (h33-h44)*0.5; disc = disc*disc+h43h34; if( disc>0 ) { // // Real roots: use Wilkinson's shift twice // disc = sqrt(disc); ave = 0.5*(h33+h44); if( fabs(h33)-fabs(h44)>0 ) { h33 = h33*h44-h43h34; h44 = h33/(extschursign(disc, ave)+ave); } else { h44 = extschursign(disc, ave)+ave; } h33 = h44; h43h34 = 0; } } // // Look for two consecutive small subdiagonal elements. // for(m = i-2; m >= l; m--) { // // Determine the effect of starting the double-shift QR // iteration at row M, and see if this would make H(M,M-1) // negligible. // h11 = h(m,m); h22 = h(m+1,m+1); h21 = h(m+1,m); h12 = h(m,m+1); h44s = h44-h11; h33s = h33-h11; v1 = (h33s*h44s-h43h34)/h21+h12; v2 = h22-h11-h33s-h44s; v3 = h(m+2,m+1); s = fabs(v1)+fabs(v2)+fabs(v3); v1 = v1/s; v2 = v2/s; v3 = v3/s; workv3(1) = v1; workv3(2) = v2; workv3(3) = v3; if( m==l ) { break; } h00 = h(m-1,m-1); h10 = h(m,m-1); tst1 = fabs(v1)*(fabs(h00)+fabs(h11)+fabs(h22)); if( fabs(h10)*(fabs(v2)+fabs(v3))<=ulp*tst1 ) { break; } } // // Double-shift QR step // for(k = m; k <= i-1; k++) { // // The first iteration of this loop determines a reflection G // from the vector V and applies it from left and right to H, // thus creating a nonzero bulge below the subdiagonal. // // Each subsequent iteration determines a reflection G to // restore the Hessenberg form in the (K-1)th column, and thus // chases the bulge one step toward the bottom of the active // submatrix. NR is the order of G. // nr = ap::minint(3, i-k+1); if( k>m ) { for(p1 = 1; p1 <= nr; p1++) { workv3(p1) = h(k+p1-1,k-1); } } generatereflection(workv3, nr, t1); if( k>m ) { h(k,k-1) = workv3(1); h(k+1,k-1) = 0; if( k<i-1 ) { h(k+2,k-1) = 0; } } else { if( m>l ) { h(k,k-1) = -h(k,k-1); } } v2 = workv3(2); t2 = t1*v2; if( nr==3 ) { v3 = workv3(3); t3 = t1*v3; // // Apply G from the left to transform the rows of the matrix // in columns K to I2. // for(j = k; j <= i2; j++) { sum = h(k,j)+v2*h(k+1,j)+v3*h(k+2,j); h(k,j) = h(k,j)-sum*t1; h(k+1,j) = h(k+1,j)-sum*t2; h(k+2,j) = h(k+2,j)-sum*t3; } // // Apply G from the right to transform the columns of the // matrix in rows I1 to min(K+3,I). // for(j = i1; j <= ap::minint(k+3, i); j++) { sum = h(j,k)+v2*h(j,k+1)+v3*h(j,k+2); h(j,k) = h(j,k)-sum*t1; h(j,k+1) = h(j,k+1)-sum*t2; h(j,k+2) = h(j,k+2)-sum*t3; } if( wantz ) { // // Accumulate transformations in the matrix Z // for(j = iloz; j <= ihiz; j++) { sum = z(j,k)+v2*z(j,k+1)+v3*z(j,k+2); z(j,k) = z(j,k)-sum*t1; z(j,k+1) = z(j,k+1)-sum*t2; z(j,k+2) = z(j,k+2)-sum*t3; } } } else { if( nr==2 ) { // // Apply G from the left to transform the rows of the matrix // in columns K to I2. // for(j = k; j <= i2; j++) { sum = h(k,j)+v2*h(k+1,j); h(k,j) = h(k,j)-sum*t1; h(k+1,j) = h(k+1,j)-sum*t2; } // // Apply G from the right to transform the columns of the // matrix in rows I1 to min(K+3,I). // for(j = i1; j <= i; j++) { sum = h(j,k)+v2*h(j,k+1); h(j,k) = h(j,k)-sum*t1; h(j,k+1) = h(j,k+1)-sum*t2; } if( wantz ) { // // Accumulate transformations in the matrix Z // for(j = iloz; j <= ihiz; j++) { sum = z(j,k)+v2*z(j,k+1); z(j,k) = z(j,k)-sum*t1; z(j,k+1) = z(j,k+1)-sum*t2; } } } } } } if( failflag ) { // // Failure to converge in remaining number of iterations // info = i; return; } if( l==i ) { // // H(I,I-1) is negligible: one eigenvalue has converged. // wr(i) = h(i,i); wi(i) = 0; } else { if( l==i-1 ) { // // H(I-1,I-2) is negligible: a pair of eigenvalues have converged. // // Transform the 2-by-2 submatrix to standard Schur form, // and compute and store the eigenvalues. // him1im1 = h(i-1,i-1); him1i = h(i-1,i); hiim1 = h(i,i-1); hii = h(i,i); aux2x2schur(him1im1, him1i, hiim1, hii, wrim1, wiim1, wri, wii, cs, sn); wr(i-1) = wrim1; wi(i-1) = wiim1; wr(i) = wri; wi(i) = wii; h(i-1,i-1) = him1im1; h(i-1,i) = him1i; h(i,i-1) = hiim1; h(i,i) = hii; if( wantt ) { // // Apply the transformation to the rest of H. // if( i2>i ) { workc1(1) = cs; works1(1) = sn; applyrotationsfromtheleft(true, i-1, i, i+1, i2, workc1, works1, h, work); } workc1(1) = cs; works1(1) = sn; applyrotationsfromtheright(true, i1, i-2, i-1, i, workc1, works1, h, work); } if( wantz ) { // // Apply the transformation to Z. // workc1(1) = cs; works1(1) = sn; applyrotationsfromtheright(true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work); } } } // // Decrement number of remaining iterations, and return to start of // the main loop with new value of I. // itn = itn-its; i = l-1; } }
bool testblas(bool silent) { bool result; int pass; int passcount; int n; int i; int i1; int i2; int j; int j1; int j2; int l; int k; int r; int i3; int j3; int col1; int col2; int row1; int row2; ap::real_1d_array x1; ap::real_1d_array x2; ap::real_2d_array a; ap::real_2d_array b; ap::real_2d_array c1; ap::real_2d_array c2; double err; double e1; double e2; double e3; double v; double scl1; double scl2; double scl3; bool was1; bool was2; bool trans1; bool trans2; double threshold; bool n2errors; bool hsnerrors; bool amaxerrors; bool mverrors; bool iterrors; bool cterrors; bool mmerrors; bool waserrors; n2errors = false; amaxerrors = false; hsnerrors = false; mverrors = false; iterrors = false; cterrors = false; mmerrors = false; waserrors = false; threshold = 10000*ap::machineepsilon; // // Test Norm2 // passcount = 1000; e1 = 0; e2 = 0; e3 = 0; scl2 = 0.5*ap::maxrealnumber; scl3 = 2*ap::minrealnumber; for(pass = 1; pass <= passcount; pass++) { n = 1+ap::randominteger(1000); i1 = ap::randominteger(10); i2 = n+i1-1; x1.setbounds(i1, i2); x2.setbounds(i1, i2); for(i = i1; i <= i2; i++) { x1(i) = 2*ap::randomreal()-1; } v = 0; for(i = i1; i <= i2; i++) { v = v+ap::sqr(x1(i)); } v = sqrt(v); e1 = ap::maxreal(e1, fabs(v-vectornorm2(x1, i1, i2))); for(i = i1; i <= i2; i++) { x2(i) = scl2*x1(i); } e2 = ap::maxreal(e2, fabs(v*scl2-vectornorm2(x2, i1, i2))); for(i = i1; i <= i2; i++) { x2(i) = scl3*x1(i); } e3 = ap::maxreal(e3, fabs(v*scl3-vectornorm2(x2, i1, i2))); } e2 = e2/scl2; e3 = e3/scl3; n2errors = ap::fp_greater_eq(e1,threshold)||ap::fp_greater_eq(e2,threshold)||ap::fp_greater_eq(e3,threshold); // // Testing VectorAbsMax, Column/Row AbsMax // x1.setbounds(1, 5); x1(1) = 2.0; x1(2) = 0.2; x1(3) = -1.3; x1(4) = 0.7; x1(5) = -3.0; amaxerrors = vectoridxabsmax(x1, 1, 5)!=5||vectoridxabsmax(x1, 1, 4)!=1||vectoridxabsmax(x1, 2, 4)!=3; n = 30; x1.setbounds(1, n); a.setbounds(1, n, 1, n); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { a(i,j) = 2*ap::randomreal()-1; } } was1 = false; was2 = false; for(pass = 1; pass <= 1000; pass++) { j = 1+ap::randominteger(n); i1 = 1+ap::randominteger(n); i2 = i1+ap::randominteger(n+1-i1); ap::vmove(x1.getvector(i1, i2), a.getcolumn(j, i1, i2)); if( vectoridxabsmax(x1, i1, i2)!=columnidxabsmax(a, i1, i2, j) ) { was1 = true; } i = 1+ap::randominteger(n); j1 = 1+ap::randominteger(n); j2 = j1+ap::randominteger(n+1-j1); ap::vmove(&x1(j1), &a(i, j1), ap::vlen(j1,j2)); if( vectoridxabsmax(x1, j1, j2)!=rowidxabsmax(a, j1, j2, i) ) { was2 = true; } } amaxerrors = amaxerrors||was1||was2; // // Testing upper Hessenberg 1-norm // a.setbounds(1, 3, 1, 3); x1.setbounds(1, 3); a(1,1) = 2; a(1,2) = 3; a(1,3) = 1; a(2,1) = 4; a(2,2) = -5; a(2,3) = 8; a(3,1) = 99; a(3,2) = 3; a(3,3) = 1; hsnerrors = ap::fp_greater(fabs(upperhessenberg1norm(a, 1, 3, 1, 3, x1)-11),threshold); // // Testing MatrixVectorMultiply // a.setbounds(2, 3, 3, 5); x1.setbounds(1, 3); x2.setbounds(1, 2); a(2,3) = 2; a(2,4) = -1; a(2,5) = -1; a(3,3) = 1; a(3,4) = -2; a(3,5) = 2; x1(1) = 1; x1(2) = 2; x1(3) = 1; x2(1) = -1; x2(2) = -1; matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 1.0); matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 1.0); e1 = fabs(x1(1)+5)+fabs(x1(2)-8)+fabs(x1(3)+1)+fabs(x2(1)+2)+fabs(x2(2)+2); x1(1) = 1; x1(2) = 2; x1(3) = 1; x2(1) = -1; x2(2) = -1; matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 0.0); matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 0.0); e2 = fabs(x1(1)+3)+fabs(x1(2)-3)+fabs(x1(3)+1)+fabs(x2(1)+1)+fabs(x2(2)+1); mverrors = ap::fp_greater_eq(e1+e2,threshold); // // testing inplace transpose // n = 10; a.setbounds(1, n, 1, n); b.setbounds(1, n, 1, n); x1.setbounds(1, n-1); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { a(i,j) = ap::randomreal(); } } passcount = 10000; was1 = false; for(pass = 1; pass <= passcount; pass++) { i1 = 1+ap::randominteger(n); i2 = i1+ap::randominteger(n-i1+1); j1 = 1+ap::randominteger(n-(i2-i1)); j2 = j1+(i2-i1); copymatrix(a, i1, i2, j1, j2, b, i1, i2, j1, j2); inplacetranspose(b, i1, i2, j1, j2, x1); for(i = i1; i <= i2; i++) { for(j = j1; j <= j2; j++) { if( ap::fp_neq(a(i,j),b(i1+(j-j1),j1+(i-i1))) ) { was1 = true; } } } } iterrors = was1; // // testing copy and transpose // n = 10; a.setbounds(1, n, 1, n); b.setbounds(1, n, 1, n); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { a(i,j) = ap::randomreal(); } } passcount = 10000; was1 = false; for(pass = 1; pass <= passcount; pass++) { i1 = 1+ap::randominteger(n); i2 = i1+ap::randominteger(n-i1+1); j1 = 1+ap::randominteger(n); j2 = j1+ap::randominteger(n-j1+1); copyandtranspose(a, i1, i2, j1, j2, b, j1, j2, i1, i2); for(i = i1; i <= i2; i++) { for(j = j1; j <= j2; j++) { if( ap::fp_neq(a(i,j),b(j,i)) ) { was1 = true; } } } } cterrors = was1; // // Testing MatrixMatrixMultiply // n = 10; a.setbounds(1, 2*n, 1, 2*n); b.setbounds(1, 2*n, 1, 2*n); c1.setbounds(1, 2*n, 1, 2*n); c2.setbounds(1, 2*n, 1, 2*n); x1.setbounds(1, n); x2.setbounds(1, n); for(i = 1; i <= 2*n; i++) { for(j = 1; j <= 2*n; j++) { a(i,j) = ap::randomreal(); b(i,j) = ap::randomreal(); } } passcount = 1000; was1 = false; for(pass = 1; pass <= passcount; pass++) { for(i = 1; i <= 2*n; i++) { for(j = 1; j <= 2*n; j++) { c1(i,j) = 2.1*i+3.1*j; c2(i,j) = c1(i,j); } } l = 1+ap::randominteger(n); k = 1+ap::randominteger(n); r = 1+ap::randominteger(n); i1 = 1+ap::randominteger(n); j1 = 1+ap::randominteger(n); i2 = 1+ap::randominteger(n); j2 = 1+ap::randominteger(n); i3 = 1+ap::randominteger(n); j3 = 1+ap::randominteger(n); trans1 = ap::fp_greater(ap::randomreal(),0.5); trans2 = ap::fp_greater(ap::randomreal(),0.5); if( trans1 ) { col1 = l; row1 = k; } else { col1 = k; row1 = l; } if( trans2 ) { col2 = k; row2 = r; } else { col2 = r; row2 = k; } scl1 = ap::randomreal(); scl2 = ap::randomreal(); matrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c1, i3, i3+l-1, j3, j3+r-1, scl2, x1); naivematrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c2, i3, i3+l-1, j3, j3+r-1, scl2); err = 0; for(i = 1; i <= l; i++) { for(j = 1; j <= r; j++) { err = ap::maxreal(err, fabs(c1(i3+i-1,j3+j-1)-c2(i3+i-1,j3+j-1))); } } if( ap::fp_greater(err,threshold) ) { was1 = true; break; } } mmerrors = was1; // // report // waserrors = n2errors||amaxerrors||hsnerrors||mverrors||iterrors||cterrors||mmerrors; if( !silent ) { printf("TESTING BLAS\n"); printf("VectorNorm2: "); if( n2errors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("AbsMax (vector/row/column): "); if( amaxerrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("UpperHessenberg1Norm: "); if( hsnerrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("MatrixVectorMultiply: "); if( mverrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("InplaceTranspose: "); if( iterrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("CopyAndTranspose: "); if( cterrors ) { printf("FAILED\n"); } else { printf("OK\n"); } printf("MatrixMatrixMultiply: "); if( mmerrors ) { printf("FAILED\n"); } else { printf("OK\n"); } if( waserrors ) { printf("TEST FAILED\n"); } else { printf("TEST PASSED\n"); } printf("\n\n"); } result = !waserrors; return result; }