Beispiel #1
0
void internalschurdecomposition(ap::real_2d_array& h,
     int n,
     int tneeded,
     int zneeded,
     ap::real_1d_array& wr,
     ap::real_1d_array& wi,
     ap::real_2d_array& z,
     int& info)
{
    ap::real_1d_array work;
    int i;
    int i1;
    int i2;
    int ierr;
    int ii;
    int itemp;
    int itn;
    int its;
    int j;
    int k;
    int l;
    int maxb;
    int nr;
    int ns;
    int nv;
    double absw;
    double ovfl;
    double smlnum;
    double tau;
    double temp;
    double tst1;
    double ulp;
    double unfl;
    ap::real_2d_array s;
    ap::real_1d_array v;
    ap::real_1d_array vv;
    ap::real_1d_array workc1;
    ap::real_1d_array works1;
    ap::real_1d_array workv3;
    ap::real_1d_array tmpwr;
    ap::real_1d_array tmpwi;
    bool initz;
    bool wantt;
    bool wantz;
    double cnst;
    bool failflag;
    int p1;
    int p2;
    int p3;
    int p4;
    double vt;

    
    //
    // Set the order of the multi-shift QR algorithm to be used.
    // If you want to tune algorithm, change this values
    //
    ns = 12;
    maxb = 50;
    
    //
    // Now 2 < NS <= MAXB < NH.
    //
    maxb = ap::maxint(3, maxb);
    ns = ap::minint(maxb, ns);
    
    //
    // Initialize
    //
    cnst = 1.5;
    work.setbounds(1, ap::maxint(n, 1));
    s.setbounds(1, ns, 1, ns);
    v.setbounds(1, ns+1);
    vv.setbounds(1, ns+1);
    wr.setbounds(1, ap::maxint(n, 1));
    wi.setbounds(1, ap::maxint(n, 1));
    workc1.setbounds(1, 1);
    works1.setbounds(1, 1);
    workv3.setbounds(1, 3);
    tmpwr.setbounds(1, ap::maxint(n, 1));
    tmpwi.setbounds(1, ap::maxint(n, 1));
    ap::ap_error::make_assertion(n>=0, "InternalSchurDecomposition: incorrect N!");
    ap::ap_error::make_assertion(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!");
    ap::ap_error::make_assertion(zneeded==0||zneeded==1||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!");
    wantt = tneeded==1;
    initz = zneeded==2;
    wantz = zneeded!=0;
    info = 0;
    
    //
    // Initialize Z, if necessary
    //
    if( initz )
    {
        z.setbounds(1, n, 1, n);
        for(i = 1; i <= n; i++)
        {
            for(j = 1; j <= n; j++)
            {
                if( i==j )
                {
                    z(i,j) = 1;
                }
                else
                {
                    z(i,j) = 0;
                }
            }
        }
    }
    
    //
    // Quick return if possible
    //
    if( n==0 )
    {
        return;
    }
    if( n==1 )
    {
        wr(1) = h(1,1);
        wi(1) = 0;
        return;
    }
    
    //
    // Set rows and columns 1 to N to zero below the first
    // subdiagonal.
    //
    for(j = 1; j <= n-2; j++)
    {
        for(i = j+2; i <= n; i++)
        {
            h(i,j) = 0;
        }
    }
    
    //
    // Test if N is sufficiently small
    //
    if( ns<=2||ns>n||maxb>=n )
    {
        
        //
        // Use the standard double-shift algorithm
        //
        internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, work, workv3, workc1, works1, info);
        
        //
        // fill entries under diagonal blocks of T with zeros
        //
        if( wantt )
        {
            j = 1;
            while(j<=n)
            {
                if( wi(j)==0 )
                {
                    for(i = j+1; i <= n; i++)
                    {
                        h(i,j) = 0;
                    }
                    j = j+1;
                }
                else
                {
                    for(i = j+2; i <= n; i++)
                    {
                        h(i,j) = 0;
                        h(i,j+1) = 0;
                    }
                    j = j+2;
                }
            }
        }
        return;
    }
    unfl = ap::minrealnumber;
    ovfl = 1/unfl;
    ulp = 2*ap::machineepsilon;
    smlnum = unfl*(n/ulp);
    
    //
    // I1 and I2 are the indices of the first row and last column of H
    // to which transformations must be applied. If eigenvalues only are
    // being computed, I1 and I2 are set inside the main loop.
    //
    if( wantt )
    {
        i1 = 1;
        i2 = n;
    }
    
    //
    // ITN is the total number of multiple-shift QR iterations allowed.
    //
    itn = 30*n;
    
    //
    // The main loop begins here. I is the loop index and decreases from
    // IHI to ILO in steps of at most MAXB. Each iteration of the loop
    // works with the active submatrix in rows and columns L to I.
    // Eigenvalues I+1 to IHI have already converged. Either L = ILO or
    // H(L,L-1) is negligible so that the matrix splits.
    //
    i = n;
    while(true)
    {
        l = 1;
        if( i<1 )
        {
            
            //
            // fill entries under diagonal blocks of T with zeros
            //
            if( wantt )
            {
                j = 1;
                while(j<=n)
                {
                    if( wi(j)==0 )
                    {
                        for(i = j+1; i <= n; i++)
                        {
                            h(i,j) = 0;
                        }
                        j = j+1;
                    }
                    else
                    {
                        for(i = j+2; i <= n; i++)
                        {
                            h(i,j) = 0;
                            h(i,j+1) = 0;
                        }
                        j = j+2;
                    }
                }
            }
            
            //
            // Exit
            //
            return;
        }
        
        //
        // Perform multiple-shift QR iterations on rows and columns ILO to I
        // until a submatrix of order at most MAXB splits off at the bottom
        // because a subdiagonal element has become negligible.
        //
        failflag = true;
        for(its = 0; its <= itn; its++)
        {
            
            //
            // Look for a single small subdiagonal element.
            //
            for(k = i; k >= l+1; k--)
            {
                tst1 = fabs(h(k-1,k-1))+fabs(h(k,k));
                if( tst1==0 )
                {
                    tst1 = upperhessenberg1norm(h, l, i, l, i, work);
                }
                if( fabs(h(k,k-1))<=ap::maxreal(ulp*tst1, smlnum) )
                {
                    break;
                }
            }
            l = k;
            if( l>1 )
            {
                
                //
                // H(L,L-1) is negligible.
                //
                h(l,l-1) = 0;
            }
            
            //
            // Exit from loop if a submatrix of order <= MAXB has split off.
            //
            if( l>=i-maxb+1 )
            {
                failflag = false;
                break;
            }
            
            //
            // Now the active submatrix is in rows and columns L to I. If
            // eigenvalues only are being computed, only the active submatrix
            // need be transformed.
            //
            if( !wantt )
            {
                i1 = l;
                i2 = i;
            }
            if( its==20||its==30 )
            {
                
                //
                // Exceptional shifts.
                //
                for(ii = i-ns+1; ii <= i; ii++)
                {
                    wr(ii) = cnst*(fabs(h(ii,ii-1))+fabs(h(ii,ii)));
                    wi(ii) = 0;
                }
            }
            else
            {
                
                //
                // Use eigenvalues of trailing submatrix of order NS as shifts.
                //
                copymatrix(h, i-ns+1, i, i-ns+1, i, s, 1, ns, 1, ns);
                internalauxschur(false, false, ns, 1, ns, s, tmpwr, tmpwi, 1, ns, z, work, workv3, workc1, works1, ierr);
                for(p1 = 1; p1 <= ns; p1++)
                {
                    wr(i-ns+p1) = tmpwr(p1);
                    wi(i-ns+p1) = tmpwi(p1);
                }
                if( ierr>0 )
                {
                    
                    //
                    // If DLAHQR failed to compute all NS eigenvalues, use the
                    // unconverged diagonal elements as the remaining shifts.
                    //
                    for(ii = 1; ii <= ierr; ii++)
                    {
                        wr(i-ns+ii) = s(ii,ii);
                        wi(i-ns+ii) = 0;
                    }
                }
            }
            
            //
            // Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
            // where G is the Hessenberg submatrix H(L:I,L:I) and w is
            // the vector of shifts (stored in WR and WI). The result is
            // stored in the local array V.
            //
            v(1) = 1;
            for(ii = 2; ii <= ns+1; ii++)
            {
                v(ii) = 0;
            }
            nv = 1;
            for(j = i-ns+1; j <= i; j++)
            {
                if( wi(j)>=0 )
                {
                    if( wi(j)==0 )
                    {
                        
                        //
                        // real shift
                        //
                        p1 = nv+1;
                        ap::vmove(&vv(1), &v(1), ap::vlen(1,p1));
                        matrixvectormultiply(h, l, l+nv, l, l+nv-1, false, vv, 1, nv, 1.0, v, 1, nv+1, -wr(j));
                        nv = nv+1;
                    }
                    else
                    {
                        if( wi(j)>0 )
                        {
                            
                            //
                            // complex conjugate pair of shifts
                            //
                            p1 = nv+1;
                            ap::vmove(&vv(1), &v(1), ap::vlen(1,p1));
                            matrixvectormultiply(h, l, l+nv, l, l+nv-1, false, v, 1, nv, 1.0, vv, 1, nv+1, -2*wr(j));
                            itemp = vectoridxabsmax(vv, 1, nv+1);
                            temp = 1/ap::maxreal(fabs(vv(itemp)), smlnum);
                            p1 = nv+1;
                            ap::vmul(&vv(1), ap::vlen(1,p1), temp);
                            absw = pythag2(wr(j), wi(j));
                            temp = temp*absw*absw;
                            matrixvectormultiply(h, l, l+nv+1, l, l+nv, false, vv, 1, nv+1, 1.0, v, 1, nv+2, temp);
                            nv = nv+2;
                        }
                    }
                    
                    //
                    // Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
                    // reset it to the unit vector.
                    //
                    itemp = vectoridxabsmax(v, 1, nv);
                    temp = fabs(v(itemp));
                    if( temp==0 )
                    {
                        v(1) = 1;
                        for(ii = 2; ii <= nv; ii++)
                        {
                            v(ii) = 0;
                        }
                    }
                    else
                    {
                        temp = ap::maxreal(temp, smlnum);
                        vt = 1/temp;
                        ap::vmul(&v(1), ap::vlen(1,nv), vt);
                    }
                }
            }
            
            //
            // Multiple-shift QR step
            //
            for(k = l; k <= i-1; k++)
            {
                
                //
                // The first iteration of this loop determines a reflection G
                // from the vector V and applies it from left and right to H,
                // thus creating a nonzero bulge below the subdiagonal.
                //
                // Each subsequent iteration determines a reflection G to
                // restore the Hessenberg form in the (K-1)th column, and thus
                // chases the bulge one step toward the bottom of the active
                // submatrix. NR is the order of G.
                //
                nr = ap::minint(ns+1, i-k+1);
                if( k>l )
                {
                    p1 = k-1;
                    p2 = k+nr-1;
                    ap::vmove(v.getvector(1, nr), h.getcolumn(p1, k, p2));
                }
                generatereflection(v, nr, tau);
                if( k>l )
                {
                    h(k,k-1) = v(1);
                    for(ii = k+1; ii <= i; ii++)
                    {
                        h(ii,k-1) = 0;
                    }
                }
                v(1) = 1;
                
                //
                // Apply G from the left to transform the rows of the matrix in
                // columns K to I2.
                //
                applyreflectionfromtheleft(h, tau, v, k, k+nr-1, k, i2, work);
                
                //
                // Apply G from the right to transform the columns of the
                // matrix in rows I1 to min(K+NR,I).
                //
                applyreflectionfromtheright(h, tau, v, i1, ap::minint(k+nr, i), k, k+nr-1, work);
                if( wantz )
                {
                    
                    //
                    // Accumulate transformations in the matrix Z
                    //
                    applyreflectionfromtheright(z, tau, v, 1, n, k, k+nr-1, work);
                }
            }
        }
        
        //
        // Failure to converge in remaining number of iterations
        //
        if( failflag )
        {
            info = i;
            return;
        }
        
        //
        // A submatrix of order <= MAXB in rows and columns L to I has split
        // off. Use the double-shift QR algorithm to handle it.
        //
        internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, work, workv3, workc1, works1, info);
        if( info>0 )
        {
            return;
        }
        
        //
        // Decrement number of remaining iterations, and return to start of
        // the main loop with a new value of I.
        //
        itn = itn-its;
        i = l-1;
    }
}
Beispiel #2
0
static void internalauxschur(bool wantt,
     bool wantz,
     int n,
     int ilo,
     int ihi,
     ap::real_2d_array& h,
     ap::real_1d_array& wr,
     ap::real_1d_array& wi,
     int iloz,
     int ihiz,
     ap::real_2d_array& z,
     ap::real_1d_array& work,
     ap::real_1d_array& workv3,
     ap::real_1d_array& workc1,
     ap::real_1d_array& works1,
     int& info)
{
    int i;
    int i1;
    int i2;
    int itn;
    int its;
    int j;
    int k;
    int l;
    int m;
    int nh;
    int nr;
    int nz;
    double ave;
    double cs;
    double disc;
    double h00;
    double h10;
    double h11;
    double h12;
    double h21;
    double h22;
    double h33;
    double h33s;
    double h43h34;
    double h44;
    double h44s;
    double ovfl;
    double s;
    double smlnum;
    double sn;
    double sum;
    double t1;
    double t2;
    double t3;
    double tst1;
    double unfl;
    double v1;
    double v2;
    double v3;
    bool failflag;
    double dat1;
    double dat2;
    int p1;
    double him1im1;
    double him1i;
    double hiim1;
    double hii;
    double wrim1;
    double wri;
    double wiim1;
    double wii;
    double ulp;

    info = 0;
    dat1 = 0.75;
    dat2 = -0.4375;
    ulp = ap::machineepsilon;
    
    //
    // Quick return if possible
    //
    if( n==0 )
    {
        return;
    }
    if( ilo==ihi )
    {
        wr(ilo) = h(ilo,ilo);
        wi(ilo) = 0;
        return;
    }
    nh = ihi-ilo+1;
    nz = ihiz-iloz+1;
    
    //
    // Set machine-dependent constants for the stopping criterion.
    // If norm(H) <= sqrt(OVFL), overflow should not occur.
    //
    unfl = ap::minrealnumber;
    ovfl = 1/unfl;
    smlnum = unfl*(nh/ulp);
    
    //
    // I1 and I2 are the indices of the first row and last column of H
    // to which transformations must be applied. If eigenvalues only are
    // being computed, I1 and I2 are set inside the main loop.
    //
    if( wantt )
    {
        i1 = 1;
        i2 = n;
    }
    
    //
    // ITN is the total number of QR iterations allowed.
    //
    itn = 30*nh;
    
    //
    // The main loop begins here. I is the loop index and decreases from
    // IHI to ILO in steps of 1 or 2. Each iteration of the loop works
    // with the active submatrix in rows and columns L to I.
    // Eigenvalues I+1 to IHI have already converged. Either L = ILO or
    // H(L,L-1) is negligible so that the matrix splits.
    //
    i = ihi;
    while(true)
    {
        l = ilo;
        if( i<ilo )
        {
            return;
        }
        
        //
        // Perform QR iterations on rows and columns ILO to I until a
        // submatrix of order 1 or 2 splits off at the bottom because a
        // subdiagonal element has become negligible.
        //
        failflag = true;
        for(its = 0; its <= itn; its++)
        {
            
            //
            // Look for a single small subdiagonal element.
            //
            for(k = i; k >= l+1; k--)
            {
                tst1 = fabs(h(k-1,k-1))+fabs(h(k,k));
                if( tst1==0 )
                {
                    tst1 = upperhessenberg1norm(h, l, i, l, i, work);
                }
                if( fabs(h(k,k-1))<=ap::maxreal(ulp*tst1, smlnum) )
                {
                    break;
                }
            }
            l = k;
            if( l>ilo )
            {
                
                //
                // H(L,L-1) is negligible
                //
                h(l,l-1) = 0;
            }
            
            //
            // Exit from loop if a submatrix of order 1 or 2 has split off.
            //
            if( l>=i-1 )
            {
                failflag = false;
                break;
            }
            
            //
            // Now the active submatrix is in rows and columns L to I. If
            // eigenvalues only are being computed, only the active submatrix
            // need be transformed.
            //
            if( !wantt )
            {
                i1 = l;
                i2 = i;
            }
            if( its==10||its==20 )
            {
                
                //
                // Exceptional shift.
                //
                s = fabs(h(i,i-1))+fabs(h(i-1,i-2));
                h44 = dat1*s+h(i,i);
                h33 = h44;
                h43h34 = dat2*s*s;
            }
            else
            {
                
                //
                // Prepare to use Francis' double shift
                // (i.e. 2nd degree generalized Rayleigh quotient)
                //
                h44 = h(i,i);
                h33 = h(i-1,i-1);
                h43h34 = h(i,i-1)*h(i-1,i);
                s = h(i-1,i-2)*h(i-1,i-2);
                disc = (h33-h44)*0.5;
                disc = disc*disc+h43h34;
                if( disc>0 )
                {
                    
                    //
                    // Real roots: use Wilkinson's shift twice
                    //
                    disc = sqrt(disc);
                    ave = 0.5*(h33+h44);
                    if( fabs(h33)-fabs(h44)>0 )
                    {
                        h33 = h33*h44-h43h34;
                        h44 = h33/(extschursign(disc, ave)+ave);
                    }
                    else
                    {
                        h44 = extschursign(disc, ave)+ave;
                    }
                    h33 = h44;
                    h43h34 = 0;
                }
            }
            
            //
            // Look for two consecutive small subdiagonal elements.
            //
            for(m = i-2; m >= l; m--)
            {
                
                //
                // Determine the effect of starting the double-shift QR
                // iteration at row M, and see if this would make H(M,M-1)
                // negligible.
                //
                h11 = h(m,m);
                h22 = h(m+1,m+1);
                h21 = h(m+1,m);
                h12 = h(m,m+1);
                h44s = h44-h11;
                h33s = h33-h11;
                v1 = (h33s*h44s-h43h34)/h21+h12;
                v2 = h22-h11-h33s-h44s;
                v3 = h(m+2,m+1);
                s = fabs(v1)+fabs(v2)+fabs(v3);
                v1 = v1/s;
                v2 = v2/s;
                v3 = v3/s;
                workv3(1) = v1;
                workv3(2) = v2;
                workv3(3) = v3;
                if( m==l )
                {
                    break;
                }
                h00 = h(m-1,m-1);
                h10 = h(m,m-1);
                tst1 = fabs(v1)*(fabs(h00)+fabs(h11)+fabs(h22));
                if( fabs(h10)*(fabs(v2)+fabs(v3))<=ulp*tst1 )
                {
                    break;
                }
            }
            
            //
            // Double-shift QR step
            //
            for(k = m; k <= i-1; k++)
            {
                
                //
                // The first iteration of this loop determines a reflection G
                // from the vector V and applies it from left and right to H,
                // thus creating a nonzero bulge below the subdiagonal.
                //
                // Each subsequent iteration determines a reflection G to
                // restore the Hessenberg form in the (K-1)th column, and thus
                // chases the bulge one step toward the bottom of the active
                // submatrix. NR is the order of G.
                //
                nr = ap::minint(3, i-k+1);
                if( k>m )
                {
                    for(p1 = 1; p1 <= nr; p1++)
                    {
                        workv3(p1) = h(k+p1-1,k-1);
                    }
                }
                generatereflection(workv3, nr, t1);
                if( k>m )
                {
                    h(k,k-1) = workv3(1);
                    h(k+1,k-1) = 0;
                    if( k<i-1 )
                    {
                        h(k+2,k-1) = 0;
                    }
                }
                else
                {
                    if( m>l )
                    {
                        h(k,k-1) = -h(k,k-1);
                    }
                }
                v2 = workv3(2);
                t2 = t1*v2;
                if( nr==3 )
                {
                    v3 = workv3(3);
                    t3 = t1*v3;
                    
                    //
                    // Apply G from the left to transform the rows of the matrix
                    // in columns K to I2.
                    //
                    for(j = k; j <= i2; j++)
                    {
                        sum = h(k,j)+v2*h(k+1,j)+v3*h(k+2,j);
                        h(k,j) = h(k,j)-sum*t1;
                        h(k+1,j) = h(k+1,j)-sum*t2;
                        h(k+2,j) = h(k+2,j)-sum*t3;
                    }
                    
                    //
                    // Apply G from the right to transform the columns of the
                    // matrix in rows I1 to min(K+3,I).
                    //
                    for(j = i1; j <= ap::minint(k+3, i); j++)
                    {
                        sum = h(j,k)+v2*h(j,k+1)+v3*h(j,k+2);
                        h(j,k) = h(j,k)-sum*t1;
                        h(j,k+1) = h(j,k+1)-sum*t2;
                        h(j,k+2) = h(j,k+2)-sum*t3;
                    }
                    if( wantz )
                    {
                        
                        //
                        // Accumulate transformations in the matrix Z
                        //
                        for(j = iloz; j <= ihiz; j++)
                        {
                            sum = z(j,k)+v2*z(j,k+1)+v3*z(j,k+2);
                            z(j,k) = z(j,k)-sum*t1;
                            z(j,k+1) = z(j,k+1)-sum*t2;
                            z(j,k+2) = z(j,k+2)-sum*t3;
                        }
                    }
                }
                else
                {
                    if( nr==2 )
                    {
                        
                        //
                        // Apply G from the left to transform the rows of the matrix
                        // in columns K to I2.
                        //
                        for(j = k; j <= i2; j++)
                        {
                            sum = h(k,j)+v2*h(k+1,j);
                            h(k,j) = h(k,j)-sum*t1;
                            h(k+1,j) = h(k+1,j)-sum*t2;
                        }
                        
                        //
                        // Apply G from the right to transform the columns of the
                        // matrix in rows I1 to min(K+3,I).
                        //
                        for(j = i1; j <= i; j++)
                        {
                            sum = h(j,k)+v2*h(j,k+1);
                            h(j,k) = h(j,k)-sum*t1;
                            h(j,k+1) = h(j,k+1)-sum*t2;
                        }
                        if( wantz )
                        {
                            
                            //
                            // Accumulate transformations in the matrix Z
                            //
                            for(j = iloz; j <= ihiz; j++)
                            {
                                sum = z(j,k)+v2*z(j,k+1);
                                z(j,k) = z(j,k)-sum*t1;
                                z(j,k+1) = z(j,k+1)-sum*t2;
                            }
                        }
                    }
                }
            }
        }
        if( failflag )
        {
            
            //
            // Failure to converge in remaining number of iterations
            //
            info = i;
            return;
        }
        if( l==i )
        {
            
            //
            // H(I,I-1) is negligible: one eigenvalue has converged.
            //
            wr(i) = h(i,i);
            wi(i) = 0;
        }
        else
        {
            if( l==i-1 )
            {
                
                //
                // H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
                //
                //        Transform the 2-by-2 submatrix to standard Schur form,
                //        and compute and store the eigenvalues.
                //
                him1im1 = h(i-1,i-1);
                him1i = h(i-1,i);
                hiim1 = h(i,i-1);
                hii = h(i,i);
                aux2x2schur(him1im1, him1i, hiim1, hii, wrim1, wiim1, wri, wii, cs, sn);
                wr(i-1) = wrim1;
                wi(i-1) = wiim1;
                wr(i) = wri;
                wi(i) = wii;
                h(i-1,i-1) = him1im1;
                h(i-1,i) = him1i;
                h(i,i-1) = hiim1;
                h(i,i) = hii;
                if( wantt )
                {
                    
                    //
                    // Apply the transformation to the rest of H.
                    //
                    if( i2>i )
                    {
                        workc1(1) = cs;
                        works1(1) = sn;
                        applyrotationsfromtheleft(true, i-1, i, i+1, i2, workc1, works1, h, work);
                    }
                    workc1(1) = cs;
                    works1(1) = sn;
                    applyrotationsfromtheright(true, i1, i-2, i-1, i, workc1, works1, h, work);
                }
                if( wantz )
                {
                    
                    //
                    // Apply the transformation to Z.
                    //
                    workc1(1) = cs;
                    works1(1) = sn;
                    applyrotationsfromtheright(true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work);
                }
            }
        }
        
        //
        // Decrement number of remaining iterations, and return to start of
        // the main loop with new value of I.
        //
        itn = itn-its;
        i = l-1;
    }
}
bool testblas(bool silent)
{
    bool result;
    int pass;
    int passcount;
    int n;
    int i;
    int i1;
    int i2;
    int j;
    int j1;
    int j2;
    int l;
    int k;
    int r;
    int i3;
    int j3;
    int col1;
    int col2;
    int row1;
    int row2;
    ap::real_1d_array x1;
    ap::real_1d_array x2;
    ap::real_2d_array a;
    ap::real_2d_array b;
    ap::real_2d_array c1;
    ap::real_2d_array c2;
    double err;
    double e1;
    double e2;
    double e3;
    double v;
    double scl1;
    double scl2;
    double scl3;
    bool was1;
    bool was2;
    bool trans1;
    bool trans2;
    double threshold;
    bool n2errors;
    bool hsnerrors;
    bool amaxerrors;
    bool mverrors;
    bool iterrors;
    bool cterrors;
    bool mmerrors;
    bool waserrors;

    n2errors = false;
    amaxerrors = false;
    hsnerrors = false;
    mverrors = false;
    iterrors = false;
    cterrors = false;
    mmerrors = false;
    waserrors = false;
    threshold = 10000*ap::machineepsilon;
    
    //
    // Test Norm2
    //
    passcount = 1000;
    e1 = 0;
    e2 = 0;
    e3 = 0;
    scl2 = 0.5*ap::maxrealnumber;
    scl3 = 2*ap::minrealnumber;
    for(pass = 1; pass <= passcount; pass++)
    {
        n = 1+ap::randominteger(1000);
        i1 = ap::randominteger(10);
        i2 = n+i1-1;
        x1.setbounds(i1, i2);
        x2.setbounds(i1, i2);
        for(i = i1; i <= i2; i++)
        {
            x1(i) = 2*ap::randomreal()-1;
        }
        v = 0;
        for(i = i1; i <= i2; i++)
        {
            v = v+ap::sqr(x1(i));
        }
        v = sqrt(v);
        e1 = ap::maxreal(e1, fabs(v-vectornorm2(x1, i1, i2)));
        for(i = i1; i <= i2; i++)
        {
            x2(i) = scl2*x1(i);
        }
        e2 = ap::maxreal(e2, fabs(v*scl2-vectornorm2(x2, i1, i2)));
        for(i = i1; i <= i2; i++)
        {
            x2(i) = scl3*x1(i);
        }
        e3 = ap::maxreal(e3, fabs(v*scl3-vectornorm2(x2, i1, i2)));
    }
    e2 = e2/scl2;
    e3 = e3/scl3;
    n2errors = ap::fp_greater_eq(e1,threshold)||ap::fp_greater_eq(e2,threshold)||ap::fp_greater_eq(e3,threshold);
    
    //
    // Testing VectorAbsMax, Column/Row AbsMax
    //
    x1.setbounds(1, 5);
    x1(1) = 2.0;
    x1(2) = 0.2;
    x1(3) = -1.3;
    x1(4) = 0.7;
    x1(5) = -3.0;
    amaxerrors = vectoridxabsmax(x1, 1, 5)!=5||vectoridxabsmax(x1, 1, 4)!=1||vectoridxabsmax(x1, 2, 4)!=3;
    n = 30;
    x1.setbounds(1, n);
    a.setbounds(1, n, 1, n);
    for(i = 1; i <= n; i++)
    {
        for(j = 1; j <= n; j++)
        {
            a(i,j) = 2*ap::randomreal()-1;
        }
    }
    was1 = false;
    was2 = false;
    for(pass = 1; pass <= 1000; pass++)
    {
        j = 1+ap::randominteger(n);
        i1 = 1+ap::randominteger(n);
        i2 = i1+ap::randominteger(n+1-i1);
        ap::vmove(x1.getvector(i1, i2), a.getcolumn(j, i1, i2));
        if( vectoridxabsmax(x1, i1, i2)!=columnidxabsmax(a, i1, i2, j) )
        {
            was1 = true;
        }
        i = 1+ap::randominteger(n);
        j1 = 1+ap::randominteger(n);
        j2 = j1+ap::randominteger(n+1-j1);
        ap::vmove(&x1(j1), &a(i, j1), ap::vlen(j1,j2));
        if( vectoridxabsmax(x1, j1, j2)!=rowidxabsmax(a, j1, j2, i) )
        {
            was2 = true;
        }
    }
    amaxerrors = amaxerrors||was1||was2;
    
    //
    // Testing upper Hessenberg 1-norm
    //
    a.setbounds(1, 3, 1, 3);
    x1.setbounds(1, 3);
    a(1,1) = 2;
    a(1,2) = 3;
    a(1,3) = 1;
    a(2,1) = 4;
    a(2,2) = -5;
    a(2,3) = 8;
    a(3,1) = 99;
    a(3,2) = 3;
    a(3,3) = 1;
    hsnerrors = ap::fp_greater(fabs(upperhessenberg1norm(a, 1, 3, 1, 3, x1)-11),threshold);
    
    //
    // Testing MatrixVectorMultiply
    //
    a.setbounds(2, 3, 3, 5);
    x1.setbounds(1, 3);
    x2.setbounds(1, 2);
    a(2,3) = 2;
    a(2,4) = -1;
    a(2,5) = -1;
    a(3,3) = 1;
    a(3,4) = -2;
    a(3,5) = 2;
    x1(1) = 1;
    x1(2) = 2;
    x1(3) = 1;
    x2(1) = -1;
    x2(2) = -1;
    matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 1.0);
    matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 1.0);
    e1 = fabs(x1(1)+5)+fabs(x1(2)-8)+fabs(x1(3)+1)+fabs(x2(1)+2)+fabs(x2(2)+2);
    x1(1) = 1;
    x1(2) = 2;
    x1(3) = 1;
    x2(1) = -1;
    x2(2) = -1;
    matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 0.0);
    matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 0.0);
    e2 = fabs(x1(1)+3)+fabs(x1(2)-3)+fabs(x1(3)+1)+fabs(x2(1)+1)+fabs(x2(2)+1);
    mverrors = ap::fp_greater_eq(e1+e2,threshold);
    
    //
    // testing inplace transpose
    //
    n = 10;
    a.setbounds(1, n, 1, n);
    b.setbounds(1, n, 1, n);
    x1.setbounds(1, n-1);
    for(i = 1; i <= n; i++)
    {
        for(j = 1; j <= n; j++)
        {
            a(i,j) = ap::randomreal();
        }
    }
    passcount = 10000;
    was1 = false;
    for(pass = 1; pass <= passcount; pass++)
    {
        i1 = 1+ap::randominteger(n);
        i2 = i1+ap::randominteger(n-i1+1);
        j1 = 1+ap::randominteger(n-(i2-i1));
        j2 = j1+(i2-i1);
        copymatrix(a, i1, i2, j1, j2, b, i1, i2, j1, j2);
        inplacetranspose(b, i1, i2, j1, j2, x1);
        for(i = i1; i <= i2; i++)
        {
            for(j = j1; j <= j2; j++)
            {
                if( ap::fp_neq(a(i,j),b(i1+(j-j1),j1+(i-i1))) )
                {
                    was1 = true;
                }
            }
        }
    }
    iterrors = was1;
    
    //
    // testing copy and transpose
    //
    n = 10;
    a.setbounds(1, n, 1, n);
    b.setbounds(1, n, 1, n);
    for(i = 1; i <= n; i++)
    {
        for(j = 1; j <= n; j++)
        {
            a(i,j) = ap::randomreal();
        }
    }
    passcount = 10000;
    was1 = false;
    for(pass = 1; pass <= passcount; pass++)
    {
        i1 = 1+ap::randominteger(n);
        i2 = i1+ap::randominteger(n-i1+1);
        j1 = 1+ap::randominteger(n);
        j2 = j1+ap::randominteger(n-j1+1);
        copyandtranspose(a, i1, i2, j1, j2, b, j1, j2, i1, i2);
        for(i = i1; i <= i2; i++)
        {
            for(j = j1; j <= j2; j++)
            {
                if( ap::fp_neq(a(i,j),b(j,i)) )
                {
                    was1 = true;
                }
            }
        }
    }
    cterrors = was1;
    
    //
    // Testing MatrixMatrixMultiply
    //
    n = 10;
    a.setbounds(1, 2*n, 1, 2*n);
    b.setbounds(1, 2*n, 1, 2*n);
    c1.setbounds(1, 2*n, 1, 2*n);
    c2.setbounds(1, 2*n, 1, 2*n);
    x1.setbounds(1, n);
    x2.setbounds(1, n);
    for(i = 1; i <= 2*n; i++)
    {
        for(j = 1; j <= 2*n; j++)
        {
            a(i,j) = ap::randomreal();
            b(i,j) = ap::randomreal();
        }
    }
    passcount = 1000;
    was1 = false;
    for(pass = 1; pass <= passcount; pass++)
    {
        for(i = 1; i <= 2*n; i++)
        {
            for(j = 1; j <= 2*n; j++)
            {
                c1(i,j) = 2.1*i+3.1*j;
                c2(i,j) = c1(i,j);
            }
        }
        l = 1+ap::randominteger(n);
        k = 1+ap::randominteger(n);
        r = 1+ap::randominteger(n);
        i1 = 1+ap::randominteger(n);
        j1 = 1+ap::randominteger(n);
        i2 = 1+ap::randominteger(n);
        j2 = 1+ap::randominteger(n);
        i3 = 1+ap::randominteger(n);
        j3 = 1+ap::randominteger(n);
        trans1 = ap::fp_greater(ap::randomreal(),0.5);
        trans2 = ap::fp_greater(ap::randomreal(),0.5);
        if( trans1 )
        {
            col1 = l;
            row1 = k;
        }
        else
        {
            col1 = k;
            row1 = l;
        }
        if( trans2 )
        {
            col2 = k;
            row2 = r;
        }
        else
        {
            col2 = r;
            row2 = k;
        }
        scl1 = ap::randomreal();
        scl2 = ap::randomreal();
        matrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c1, i3, i3+l-1, j3, j3+r-1, scl2, x1);
        naivematrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c2, i3, i3+l-1, j3, j3+r-1, scl2);
        err = 0;
        for(i = 1; i <= l; i++)
        {
            for(j = 1; j <= r; j++)
            {
                err = ap::maxreal(err, fabs(c1(i3+i-1,j3+j-1)-c2(i3+i-1,j3+j-1)));
            }
        }
        if( ap::fp_greater(err,threshold) )
        {
            was1 = true;
            break;
        }
    }
    mmerrors = was1;
    
    //
    // report
    //
    waserrors = n2errors||amaxerrors||hsnerrors||mverrors||iterrors||cterrors||mmerrors;
    if( !silent )
    {
        printf("TESTING BLAS\n");
        printf("VectorNorm2:                             ");
        if( n2errors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("AbsMax (vector/row/column):              ");
        if( amaxerrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("UpperHessenberg1Norm:                    ");
        if( hsnerrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("MatrixVectorMultiply:                    ");
        if( mverrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("InplaceTranspose:                        ");
        if( iterrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("CopyAndTranspose:                        ");
        if( cterrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("MatrixMatrixMultiply:                    ");
        if( mmerrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        if( waserrors )
        {
            printf("TEST FAILED\n");
        }
        else
        {
            printf("TEST PASSED\n");
        }
        printf("\n\n");
    }
    result = !waserrors;
    return result;
}