static void testrotations()
{
    ap::real_2d_array al1;
    ap::real_2d_array al2;
    ap::real_2d_array ar1;
    ap::real_2d_array ar2;
    ap::real_1d_array cl;
    ap::real_1d_array sl;
    ap::real_1d_array cr;
    ap::real_1d_array sr;
    ap::real_1d_array w;
    int m;
    int n;
    int maxmn;
    double t;
    int pass;
    int passcount;
    int i;
    int j;
    double err;
    double maxerr;
    bool isforward;

    passcount = 1000;
    maxerr = 0;
    for(pass = 1; pass <= passcount; pass++)
    {
        
        //
        // settings
        //
        m = 2+ap::randominteger(50);
        n = 2+ap::randominteger(50);
        isforward = ap::randomreal()>0.5;
        maxmn = ap::maxint(m, n);
        al1.setbounds(1, m, 1, n);
        al2.setbounds(1, m, 1, n);
        ar1.setbounds(1, m, 1, n);
        ar2.setbounds(1, m, 1, n);
        cl.setbounds(1, m-1);
        sl.setbounds(1, m-1);
        cr.setbounds(1, n-1);
        sr.setbounds(1, n-1);
        w.setbounds(1, maxmn);
        
        //
        // matrices and rotaions
        //
        for(i = 1; i <= m; i++)
        {
            for(j = 1; j <= n; j++)
            {
                al1(i,j) = 2*ap::randomreal()-1;
                al2(i,j) = al1(i,j);
                ar1(i,j) = al1(i,j);
                ar2(i,j) = al1(i,j);
            }
        }
        for(i = 1; i <= m-1; i++)
        {
            t = 2*ap::pi()*ap::randomreal();
            cl(i) = cos(t);
            sl(i) = sin(t);
        }
        for(j = 1; j <= n-1; j++)
        {
            t = 2*ap::pi()*ap::randomreal();
            cr(j) = cos(t);
            sr(j) = sin(t);
        }
        
        //
        // Test left
        //
        applyrotationsfromtheleft(isforward, 1, m, 1, n, cl, sl, al1, w);
        for(j = 1; j <= n; j++)
        {
            applyrotationsfromtheleft(isforward, 1, m, j, j, cl, sl, al2, w);
        }
        err = 0;
        for(i = 1; i <= m; i++)
        {
            for(j = 1; j <= n; j++)
            {
                err = ap::maxreal(err, fabs(al1(i,j)-al2(i,j)));
            }
        }
        maxerr = ap::maxreal(err, maxerr);
        
        //
        // Test right
        //
        applyrotationsfromtheright(isforward, 1, m, 1, n, cr, sr, ar1, w);
        for(i = 1; i <= m; i++)
        {
            applyrotationsfromtheright(isforward, i, i, 1, n, cr, sr, ar2, w);
        }
        err = 0;
        for(i = 1; i <= m; i++)
        {
            for(j = 1; j <= n; j++)
            {
                err = ap::maxreal(err, fabs(ar1(i,j)-ar2(i,j)));
            }
        }
        maxerr = ap::maxreal(err, maxerr);
    }
    printf("TESTING ROTATIONS\n");
    printf("Pass count %0ld\n",
        long(passcount));
    printf("Error is %5.3le\n",
        double(maxerr));
}
Example #2
0
static void internalauxschur(bool wantt,
     bool wantz,
     int n,
     int ilo,
     int ihi,
     ap::real_2d_array& h,
     ap::real_1d_array& wr,
     ap::real_1d_array& wi,
     int iloz,
     int ihiz,
     ap::real_2d_array& z,
     ap::real_1d_array& work,
     ap::real_1d_array& workv3,
     ap::real_1d_array& workc1,
     ap::real_1d_array& works1,
     int& info)
{
    int i;
    int i1;
    int i2;
    int itn;
    int its;
    int j;
    int k;
    int l;
    int m;
    int nh;
    int nr;
    int nz;
    double ave;
    double cs;
    double disc;
    double h00;
    double h10;
    double h11;
    double h12;
    double h21;
    double h22;
    double h33;
    double h33s;
    double h43h34;
    double h44;
    double h44s;
    double ovfl;
    double s;
    double smlnum;
    double sn;
    double sum;
    double t1;
    double t2;
    double t3;
    double tst1;
    double unfl;
    double v1;
    double v2;
    double v3;
    bool failflag;
    double dat1;
    double dat2;
    int p1;
    double him1im1;
    double him1i;
    double hiim1;
    double hii;
    double wrim1;
    double wri;
    double wiim1;
    double wii;
    double ulp;

    info = 0;
    dat1 = 0.75;
    dat2 = -0.4375;
    ulp = ap::machineepsilon;
    
    //
    // Quick return if possible
    //
    if( n==0 )
    {
        return;
    }
    if( ilo==ihi )
    {
        wr(ilo) = h(ilo,ilo);
        wi(ilo) = 0;
        return;
    }
    nh = ihi-ilo+1;
    nz = ihiz-iloz+1;
    
    //
    // Set machine-dependent constants for the stopping criterion.
    // If norm(H) <= sqrt(OVFL), overflow should not occur.
    //
    unfl = ap::minrealnumber;
    ovfl = 1/unfl;
    smlnum = unfl*(nh/ulp);
    
    //
    // I1 and I2 are the indices of the first row and last column of H
    // to which transformations must be applied. If eigenvalues only are
    // being computed, I1 and I2 are set inside the main loop.
    //
    if( wantt )
    {
        i1 = 1;
        i2 = n;
    }
    
    //
    // ITN is the total number of QR iterations allowed.
    //
    itn = 30*nh;
    
    //
    // The main loop begins here. I is the loop index and decreases from
    // IHI to ILO in steps of 1 or 2. Each iteration of the loop works
    // with the active submatrix in rows and columns L to I.
    // Eigenvalues I+1 to IHI have already converged. Either L = ILO or
    // H(L,L-1) is negligible so that the matrix splits.
    //
    i = ihi;
    while(true)
    {
        l = ilo;
        if( i<ilo )
        {
            return;
        }
        
        //
        // Perform QR iterations on rows and columns ILO to I until a
        // submatrix of order 1 or 2 splits off at the bottom because a
        // subdiagonal element has become negligible.
        //
        failflag = true;
        for(its = 0; its <= itn; its++)
        {
            
            //
            // Look for a single small subdiagonal element.
            //
            for(k = i; k >= l+1; k--)
            {
                tst1 = fabs(h(k-1,k-1))+fabs(h(k,k));
                if( tst1==0 )
                {
                    tst1 = upperhessenberg1norm(h, l, i, l, i, work);
                }
                if( fabs(h(k,k-1))<=ap::maxreal(ulp*tst1, smlnum) )
                {
                    break;
                }
            }
            l = k;
            if( l>ilo )
            {
                
                //
                // H(L,L-1) is negligible
                //
                h(l,l-1) = 0;
            }
            
            //
            // Exit from loop if a submatrix of order 1 or 2 has split off.
            //
            if( l>=i-1 )
            {
                failflag = false;
                break;
            }
            
            //
            // Now the active submatrix is in rows and columns L to I. If
            // eigenvalues only are being computed, only the active submatrix
            // need be transformed.
            //
            if( !wantt )
            {
                i1 = l;
                i2 = i;
            }
            if( its==10||its==20 )
            {
                
                //
                // Exceptional shift.
                //
                s = fabs(h(i,i-1))+fabs(h(i-1,i-2));
                h44 = dat1*s+h(i,i);
                h33 = h44;
                h43h34 = dat2*s*s;
            }
            else
            {
                
                //
                // Prepare to use Francis' double shift
                // (i.e. 2nd degree generalized Rayleigh quotient)
                //
                h44 = h(i,i);
                h33 = h(i-1,i-1);
                h43h34 = h(i,i-1)*h(i-1,i);
                s = h(i-1,i-2)*h(i-1,i-2);
                disc = (h33-h44)*0.5;
                disc = disc*disc+h43h34;
                if( disc>0 )
                {
                    
                    //
                    // Real roots: use Wilkinson's shift twice
                    //
                    disc = sqrt(disc);
                    ave = 0.5*(h33+h44);
                    if( fabs(h33)-fabs(h44)>0 )
                    {
                        h33 = h33*h44-h43h34;
                        h44 = h33/(extschursign(disc, ave)+ave);
                    }
                    else
                    {
                        h44 = extschursign(disc, ave)+ave;
                    }
                    h33 = h44;
                    h43h34 = 0;
                }
            }
            
            //
            // Look for two consecutive small subdiagonal elements.
            //
            for(m = i-2; m >= l; m--)
            {
                
                //
                // Determine the effect of starting the double-shift QR
                // iteration at row M, and see if this would make H(M,M-1)
                // negligible.
                //
                h11 = h(m,m);
                h22 = h(m+1,m+1);
                h21 = h(m+1,m);
                h12 = h(m,m+1);
                h44s = h44-h11;
                h33s = h33-h11;
                v1 = (h33s*h44s-h43h34)/h21+h12;
                v2 = h22-h11-h33s-h44s;
                v3 = h(m+2,m+1);
                s = fabs(v1)+fabs(v2)+fabs(v3);
                v1 = v1/s;
                v2 = v2/s;
                v3 = v3/s;
                workv3(1) = v1;
                workv3(2) = v2;
                workv3(3) = v3;
                if( m==l )
                {
                    break;
                }
                h00 = h(m-1,m-1);
                h10 = h(m,m-1);
                tst1 = fabs(v1)*(fabs(h00)+fabs(h11)+fabs(h22));
                if( fabs(h10)*(fabs(v2)+fabs(v3))<=ulp*tst1 )
                {
                    break;
                }
            }
            
            //
            // Double-shift QR step
            //
            for(k = m; k <= i-1; k++)
            {
                
                //
                // The first iteration of this loop determines a reflection G
                // from the vector V and applies it from left and right to H,
                // thus creating a nonzero bulge below the subdiagonal.
                //
                // Each subsequent iteration determines a reflection G to
                // restore the Hessenberg form in the (K-1)th column, and thus
                // chases the bulge one step toward the bottom of the active
                // submatrix. NR is the order of G.
                //
                nr = ap::minint(3, i-k+1);
                if( k>m )
                {
                    for(p1 = 1; p1 <= nr; p1++)
                    {
                        workv3(p1) = h(k+p1-1,k-1);
                    }
                }
                generatereflection(workv3, nr, t1);
                if( k>m )
                {
                    h(k,k-1) = workv3(1);
                    h(k+1,k-1) = 0;
                    if( k<i-1 )
                    {
                        h(k+2,k-1) = 0;
                    }
                }
                else
                {
                    if( m>l )
                    {
                        h(k,k-1) = -h(k,k-1);
                    }
                }
                v2 = workv3(2);
                t2 = t1*v2;
                if( nr==3 )
                {
                    v3 = workv3(3);
                    t3 = t1*v3;
                    
                    //
                    // Apply G from the left to transform the rows of the matrix
                    // in columns K to I2.
                    //
                    for(j = k; j <= i2; j++)
                    {
                        sum = h(k,j)+v2*h(k+1,j)+v3*h(k+2,j);
                        h(k,j) = h(k,j)-sum*t1;
                        h(k+1,j) = h(k+1,j)-sum*t2;
                        h(k+2,j) = h(k+2,j)-sum*t3;
                    }
                    
                    //
                    // Apply G from the right to transform the columns of the
                    // matrix in rows I1 to min(K+3,I).
                    //
                    for(j = i1; j <= ap::minint(k+3, i); j++)
                    {
                        sum = h(j,k)+v2*h(j,k+1)+v3*h(j,k+2);
                        h(j,k) = h(j,k)-sum*t1;
                        h(j,k+1) = h(j,k+1)-sum*t2;
                        h(j,k+2) = h(j,k+2)-sum*t3;
                    }
                    if( wantz )
                    {
                        
                        //
                        // Accumulate transformations in the matrix Z
                        //
                        for(j = iloz; j <= ihiz; j++)
                        {
                            sum = z(j,k)+v2*z(j,k+1)+v3*z(j,k+2);
                            z(j,k) = z(j,k)-sum*t1;
                            z(j,k+1) = z(j,k+1)-sum*t2;
                            z(j,k+2) = z(j,k+2)-sum*t3;
                        }
                    }
                }
                else
                {
                    if( nr==2 )
                    {
                        
                        //
                        // Apply G from the left to transform the rows of the matrix
                        // in columns K to I2.
                        //
                        for(j = k; j <= i2; j++)
                        {
                            sum = h(k,j)+v2*h(k+1,j);
                            h(k,j) = h(k,j)-sum*t1;
                            h(k+1,j) = h(k+1,j)-sum*t2;
                        }
                        
                        //
                        // Apply G from the right to transform the columns of the
                        // matrix in rows I1 to min(K+3,I).
                        //
                        for(j = i1; j <= i; j++)
                        {
                            sum = h(j,k)+v2*h(j,k+1);
                            h(j,k) = h(j,k)-sum*t1;
                            h(j,k+1) = h(j,k+1)-sum*t2;
                        }
                        if( wantz )
                        {
                            
                            //
                            // Accumulate transformations in the matrix Z
                            //
                            for(j = iloz; j <= ihiz; j++)
                            {
                                sum = z(j,k)+v2*z(j,k+1);
                                z(j,k) = z(j,k)-sum*t1;
                                z(j,k+1) = z(j,k+1)-sum*t2;
                            }
                        }
                    }
                }
            }
        }
        if( failflag )
        {
            
            //
            // Failure to converge in remaining number of iterations
            //
            info = i;
            return;
        }
        if( l==i )
        {
            
            //
            // H(I,I-1) is negligible: one eigenvalue has converged.
            //
            wr(i) = h(i,i);
            wi(i) = 0;
        }
        else
        {
            if( l==i-1 )
            {
                
                //
                // H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
                //
                //        Transform the 2-by-2 submatrix to standard Schur form,
                //        and compute and store the eigenvalues.
                //
                him1im1 = h(i-1,i-1);
                him1i = h(i-1,i);
                hiim1 = h(i,i-1);
                hii = h(i,i);
                aux2x2schur(him1im1, him1i, hiim1, hii, wrim1, wiim1, wri, wii, cs, sn);
                wr(i-1) = wrim1;
                wi(i-1) = wiim1;
                wr(i) = wri;
                wi(i) = wii;
                h(i-1,i-1) = him1im1;
                h(i-1,i) = him1i;
                h(i,i-1) = hiim1;
                h(i,i) = hii;
                if( wantt )
                {
                    
                    //
                    // Apply the transformation to the rest of H.
                    //
                    if( i2>i )
                    {
                        workc1(1) = cs;
                        works1(1) = sn;
                        applyrotationsfromtheleft(true, i-1, i, i+1, i2, workc1, works1, h, work);
                    }
                    workc1(1) = cs;
                    works1(1) = sn;
                    applyrotationsfromtheright(true, i1, i-2, i-1, i, workc1, works1, h, work);
                }
                if( wantz )
                {
                    
                    //
                    // Apply the transformation to Z.
                    //
                    workc1(1) = cs;
                    works1(1) = sn;
                    applyrotationsfromtheright(true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work);
                }
            }
        }
        
        //
        // Decrement number of remaining iterations, and return to start of
        // the main loop with new value of I.
        //
        itn = itn-its;
        i = l-1;
    }
}
Example #3
0
File: bdsvd.cpp Project: 0004c/VTK
/*************************************************************************
Internal working subroutine for bidiagonal decomposition
*************************************************************************/
bool bidiagonalsvddecompositioninternal(ap::real_1d_array& d,
     ap::real_1d_array e,
     int n,
     bool isupper,
     bool isfractionalaccuracyrequired,
     ap::real_2d_array& u,
     int ustart,
     int nru,
     ap::real_2d_array& c,
     int cstart,
     int ncc,
     ap::real_2d_array& vt,
     int vstart,
     int ncvt)
{
    bool result;
    int i;
    int idir;
    int isub;
    int iter;
    int j;
    int ll = 0; // Eliminate compiler warning.
    int lll;
    int m;
    int maxit;
    int oldll;
    int oldm;
    double abse;
    double abss;
    double cosl;
    double cosr;
    double cs;
    double eps;
    double f;
    double g;
    double h;
    double mu;
    double oldcs;
    double oldsn = 0.; // Eliminate compiler warning.
    double r;
    double shift;
    double sigmn;
    double sigmx;
    double sinl;
    double sinr;
    double sll;
    double smax;
    double smin;
    double sminl;
    double sminoa;
    double sn;
    double thresh;
    double tol;
    double tolmul;
    double unfl;
    ap::real_1d_array work0;
    ap::real_1d_array work1;
    ap::real_1d_array work2;
    ap::real_1d_array work3;
    int maxitr;
    bool matrixsplitflag;
    bool iterflag;
    ap::real_1d_array utemp;
    ap::real_1d_array vttemp;
    ap::real_1d_array ctemp;
    ap::real_1d_array etemp;
    bool fwddir;
    double tmp;
    int mm1;
    int mm0;
    bool bchangedir;
    int uend;
    int cend;
    int vend;

    result = true;
    if( n==0 )
    {
        return result;
    }
    if( n==1 )
    {
        if( d(1)<0 )
        {
            d(1) = -d(1);
            if( ncvt>0 )
            {
                ap::vmul(&vt(vstart, vstart), ap::vlen(vstart,vstart+ncvt-1), -1);
            }
        }
        return result;
    }
    
    //
    // init
    //
    work0.setbounds(1, n-1);
    work1.setbounds(1, n-1);
    work2.setbounds(1, n-1);
    work3.setbounds(1, n-1);
    uend = ustart+ap::maxint(nru-1, 0);
    vend = vstart+ap::maxint(ncvt-1, 0);
    cend = cstart+ap::maxint(ncc-1, 0);
    utemp.setbounds(ustart, uend);
    vttemp.setbounds(vstart, vend);
    ctemp.setbounds(cstart, cend);
    maxitr = 12;
    fwddir = true;
    
    //
    // resize E from N-1 to N
    //
    etemp.setbounds(1, n);
    for(i = 1; i <= n-1; i++)
    {
        etemp(i) = e(i);
    }
    e.setbounds(1, n);
    for(i = 1; i <= n-1; i++)
    {
        e(i) = etemp(i);
    }
    e(n) = 0;
    idir = 0;
    
    //
    // Get machine constants
    //
    eps = ap::machineepsilon;
    unfl = ap::minrealnumber;
    
    //
    // If matrix lower bidiagonal, rotate to be upper bidiagonal
    // by applying Givens rotations on the left
    //
    if( !isupper )
    {
        for(i = 1; i <= n-1; i++)
        {
            generaterotation(d(i), e(i), cs, sn, r);
            d(i) = r;
            e(i) = sn*d(i+1);
            d(i+1) = cs*d(i+1);
            work0(i) = cs;
            work1(i) = sn;
        }
        
        //
        // Update singular vectors if desired
        //
        if( nru>0 )
        {
            applyrotationsfromtheright(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp);
        }
        if( ncc>0 )
        {
            applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp);
        }
    }
    
    //
    // Compute singular values to relative accuracy TOL
    // (By setting TOL to be negative, algorithm will compute
    // singular values to absolute accuracy ABS(TOL)*norm(input matrix))
    //
    tolmul = ap::maxreal(double(10), ap::minreal(double(100), pow(eps, -0.125)));
    tol = tolmul*eps;
    if( !isfractionalaccuracyrequired )
    {
        tol = -tol;
    }
    
    //
    // Compute approximate maximum, minimum singular values
    //
    smax = 0;
    for(i = 1; i <= n; i++)
    {
        smax = ap::maxreal(smax, fabs(d(i)));
    }
    for(i = 1; i <= n-1; i++)
    {
        smax = ap::maxreal(smax, fabs(e(i)));
    }
    sminl = 0;
    if( tol>=0 )
    {
        
        //
        // Relative accuracy desired
        //
        sminoa = fabs(d(1));
        if( sminoa!=0 )
        {
            mu = sminoa;
            for(i = 2; i <= n; i++)
            {
                mu = fabs(d(i))*(mu/(mu+fabs(e(i-1))));
                sminoa = ap::minreal(sminoa, mu);
                if( sminoa==0 )
                {
                    break;
                }
            }
        }
        sminoa = sminoa/sqrt(double(n));
        thresh = ap::maxreal(tol*sminoa, maxitr*n*n*unfl);
    }
    else
    {
        
        //
        // Absolute accuracy desired
        //
        thresh = ap::maxreal(fabs(tol)*smax, maxitr*n*n*unfl);
    }
    
    //
    // Prepare for main iteration loop for the singular values
    // (MAXIT is the maximum number of passes through the inner
    // loop permitted before nonconvergence signalled.)
    //
    maxit = maxitr*n*n;
    iter = 0;
    oldll = -1;
    oldm = -1;
    
    //
    // M points to last element of unconverged part of matrix
    //
    m = n;
    
    //
    // Begin main iteration loop
    //
    while(true)
    {
        
        //
        // Check for convergence or exceeding iteration count
        //
        if( m<=1 )
        {
            break;
        }
        if( iter>maxit )
        {
            result = false;
            return result;
        }
        
        //
        // Find diagonal block of matrix to work on
        //
        if( tol<0&&fabs(d(m))<=thresh )
        {
            d(m) = 0;
        }
        smax = fabs(d(m));
        smin = smax;
        matrixsplitflag = false;
        for(lll = 1; lll <= m-1; lll++)
        {
            ll = m-lll;
            abss = fabs(d(ll));
            abse = fabs(e(ll));
            if( tol<0&&abss<=thresh )
            {
                d(ll) = 0;
            }
            if( abse<=thresh )
            {
                matrixsplitflag = true;
                break;
            }
            smin = ap::minreal(smin, abss);
            smax = ap::maxreal(smax, ap::maxreal(abss, abse));
        }
        if( !matrixsplitflag )
        {
            ll = 0;
        }
        else
        {
            
            //
            // Matrix splits since E(LL) = 0
            //
            e(ll) = 0;
            if( ll==m-1 )
            {
                
                //
                // Convergence of bottom singular value, return to top of loop
                //
                m = m-1;
                continue;
            }
        }
        ll = ll+1;
        
        //
        // E(LL) through E(M-1) are nonzero, E(LL-1) is zero
        //
        if( ll==m-1 )
        {
            
            //
            // 2 by 2 block, handle separately
            //
            svdv2x2(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl);
            d(m-1) = sigmx;
            e(m-1) = 0;
            d(m) = sigmn;
            
            //
            // Compute singular vectors, if desired
            //
            if( ncvt>0 )
            {
                mm0 = m+(vstart-1);
                mm1 = m-1+(vstart-1);
                ap::vmove(&vttemp(vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), cosr);
                ap::vadd(&vttemp(vstart), &vt(mm0, vstart), ap::vlen(vstart,vend), sinr);
                ap::vmul(&vt(mm0, vstart), ap::vlen(vstart,vend), cosr);
                ap::vsub(&vt(mm0, vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), sinr);
                ap::vmove(&vt(mm1, vstart), &vttemp(vstart), ap::vlen(vstart,vend));
            }
            if( nru>0 )
            {
                mm0 = m+ustart-1;
                mm1 = m-1+ustart-1;
                ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl);
                ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl);
                ap::vmul(u.getcolumn(mm0, ustart, uend), cosl);
                ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl);
                ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend));
            }
            if( ncc>0 )
            {
                mm0 = m+cstart-1;
                mm1 = m-1+cstart-1;
                ap::vmove(&ctemp(cstart), &c(mm1, cstart), ap::vlen(cstart,cend), cosl);
                ap::vadd(&ctemp(cstart), &c(mm0, cstart), ap::vlen(cstart,cend), sinl);
                ap::vmul(&c(mm0, cstart), ap::vlen(cstart,cend), cosl);
                ap::vsub(&c(mm0, cstart), &c(mm1, cstart), ap::vlen(cstart,cend), sinl);
                ap::vmove(&c(mm1, cstart), &ctemp(cstart), ap::vlen(cstart,cend));
            }
            m = m-2;
            continue;
        }
        
        //
        // If working on new submatrix, choose shift direction
        // (from larger end diagonal element towards smaller)
        //
        // Previously was
        //     "if (LL>OLDM) or (M<OLDLL) then"
        // fixed thanks to Michael Rolle < *****@*****.** >
        // Very strange that LAPACK still contains it.
        //
        bchangedir = false;
        if( idir==1&&fabs(d(ll))<1.0E-3*fabs(d(m)) )
        {
            bchangedir = true;
        }
        if( idir==2&&fabs(d(m))<1.0E-3*fabs(d(ll)) )
        {
            bchangedir = true;
        }
        if( ll!=oldll||m!=oldm||bchangedir )
        {
            if( fabs(d(ll))>=fabs(d(m)) )
            {
                
                //
                // Chase bulge from top (big end) to bottom (small end)
                //
                idir = 1;
            }
            else
            {
                
                //
                // Chase bulge from bottom (big end) to top (small end)
                //
                idir = 2;
            }
        }
        
        //
        // Apply convergence tests
        //
        if( idir==1 )
        {
            
            //
            // Run convergence test in forward direction
            // First apply standard test to bottom of matrix
            //
            if( (fabs(e(m-1))<=fabs(tol)*fabs(d(m)))||(tol<0&&fabs(e(m-1))<=thresh) )
            {
                e(m-1) = 0;
                continue;
            }
            if( tol>=0 )
            {
                
                //
                // If relative accuracy desired,
                // apply convergence criterion forward
                //
                mu = fabs(d(ll));
                sminl = mu;
                iterflag = false;
                for(lll = ll; lll <= m-1; lll++)
                {
                    if( fabs(e(lll))<=tol*mu )
                    {
                        e(lll) = 0;
                        iterflag = true;
                        break;
                    }
                    mu = fabs(d(lll+1))*(mu/(mu+fabs(e(lll))));
                    sminl = ap::minreal(sminl, mu);
                }
                if( iterflag )
                {
                    continue;
                }
            }
        }
        else
        {
            
            //
            // Run convergence test in backward direction
            // First apply standard test to top of matrix
            //
            if( (fabs(e(ll))<=fabs(tol)*fabs(d(ll)))||(tol<0&&fabs(e(ll))<=thresh) )
            {
                e(ll) = 0;
                continue;
            }
            if( tol>=0 )
            {
                
                //
                // If relative accuracy desired,
                // apply convergence criterion backward
                //
                mu = fabs(d(m));
                sminl = mu;
                iterflag = false;
                for(lll = m-1; lll >= ll; lll--)
                {
                    if( fabs(e(lll))<=tol*mu )
                    {
                        e(lll) = 0;
                        iterflag = true;
                        break;
                    }
                    mu = fabs(d(lll))*(mu/(mu+fabs(e(lll))));
                    sminl = ap::minreal(sminl, mu);
                }
                if( iterflag )
                {
                    continue;
                }
            }
        }
        oldll = ll;
        oldm = m;
        
        //
        // Compute shift.  First, test if shifting would ruin relative
        // accuracy, and if so set the shift to zero.
        //
        if( tol>=0&&n*tol*(sminl/smax)<=ap::maxreal(eps, 0.01*tol) )
        {
            
            //
            // Use a zero shift to avoid loss of relative accuracy
            //
            shift = 0;
        }
        else
        {
            
            //
            // Compute the shift from 2-by-2 block at end of matrix
            //
            if( idir==1 )
            {
                sll = fabs(d(ll));
                svd2x2(d(m-1), e(m-1), d(m), shift, r);
            }
            else
            {
                sll = fabs(d(m));
                svd2x2(d(ll), e(ll), d(ll+1), shift, r);
            }
            
            //
            // Test if shift negligible, and if so set to zero
            //
            if( sll>0 )
            {
                if( ap::sqr(shift/sll)<eps )
                {
                    shift = 0;
                }
            }
        }
        
        //
        // Increment iteration count
        //
        iter = iter+m-ll;
        
        //
        // If SHIFT = 0, do simplified QR iteration
        //
        if( shift==0 )
        {
            if( idir==1 )
            {
                
                //
                // Chase bulge from top to bottom
                // Save cosines and sines for later singular vector updates
                //
                cs = 1;
                oldcs = 1;
                for(i = ll; i <= m-1; i++)
                {
                    generaterotation(d(i)*cs, e(i), cs, sn, r);
                    if( i>ll )
                    {
                        e(i-1) = oldsn*r;
                    }
                    generaterotation(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp);
                    d(i) = tmp;
                    work0(i-ll+1) = cs;
                    work1(i-ll+1) = sn;
                    work2(i-ll+1) = oldcs;
                    work3(i-ll+1) = oldsn;
                }
                h = d(m)*cs;
                d(m) = h*oldcs;
                e(m-1) = h*oldsn;
                
                //
                // Update singular vectors
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
                }
                
                //
                // Test convergence
                //
                if( fabs(e(m-1))<=thresh )
                {
                    e(m-1) = 0;
                }
            }
            else
            {
                
                //
                // Chase bulge from bottom to top
                // Save cosines and sines for later singular vector updates
                //
                cs = 1;
                oldcs = 1;
                for(i = m; i >= ll+1; i--)
                {
                    generaterotation(d(i)*cs, e(i-1), cs, sn, r);
                    if( i<m )
                    {
                        e(i) = oldsn*r;
                    }
                    generaterotation(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp);
                    d(i) = tmp;
                    work0(i-ll) = cs;
                    work1(i-ll) = -sn;
                    work2(i-ll) = oldcs;
                    work3(i-ll) = -oldsn;
                }
                h = d(ll)*cs;
                d(ll) = h*oldcs;
                e(ll) = h*oldsn;
                
                //
                // Update singular vectors
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
                }
                
                //
                // Test convergence
                //
                if( fabs(e(ll))<=thresh )
                {
                    e(ll) = 0;
                }
            }
        }
        else
        {
            
            //
            // Use nonzero shift
            //
            if( idir==1 )
            {
                
                //
                // Chase bulge from top to bottom
                // Save cosines and sines for later singular vector updates
                //
                f = (fabs(d(ll))-shift)*(extsignbdsqr(double(1), d(ll))+shift/d(ll));
                g = e(ll);
                for(i = ll; i <= m-1; i++)
                {
                    generaterotation(f, g, cosr, sinr, r);
                    if( i>ll )
                    {
                        e(i-1) = r;
                    }
                    f = cosr*d(i)+sinr*e(i);
                    e(i) = cosr*e(i)-sinr*d(i);
                    g = sinr*d(i+1);
                    d(i+1) = cosr*d(i+1);
                    generaterotation(f, g, cosl, sinl, r);
                    d(i) = r;
                    f = cosl*e(i)+sinl*d(i+1);
                    d(i+1) = cosl*d(i+1)-sinl*e(i);
                    if( i<m-1 )
                    {
                        g = sinl*e(i+1);
                        e(i+1) = cosl*e(i+1);
                    }
                    work0(i-ll+1) = cosr;
                    work1(i-ll+1) = sinr;
                    work2(i-ll+1) = cosl;
                    work3(i-ll+1) = sinl;
                }
                e(m-1) = f;
                
                //
                // Update singular vectors
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
                }
                
                //
                // Test convergence
                //
                if( fabs(e(m-1))<=thresh )
                {
                    e(m-1) = 0;
                }
            }
            else
            {
                
                //
                // Chase bulge from bottom to top
                // Save cosines and sines for later singular vector updates
                //
                f = (fabs(d(m))-shift)*(extsignbdsqr(double(1), d(m))+shift/d(m));
                g = e(m-1);
                for(i = m; i >= ll+1; i--)
                {
                    generaterotation(f, g, cosr, sinr, r);
                    if( i<m )
                    {
                        e(i) = r;
                    }
                    f = cosr*d(i)+sinr*e(i-1);
                    e(i-1) = cosr*e(i-1)-sinr*d(i);
                    g = sinr*d(i-1);
                    d(i-1) = cosr*d(i-1);
                    generaterotation(f, g, cosl, sinl, r);
                    d(i) = r;
                    f = cosl*e(i-1)+sinl*d(i-1);
                    d(i-1) = cosl*d(i-1)-sinl*e(i-1);
                    if( i>ll+1 )
                    {
                        g = sinl*e(i-2);
                        e(i-2) = cosl*e(i-2);
                    }
                    work0(i-ll) = cosr;
                    work1(i-ll) = -sinr;
                    work2(i-ll) = cosl;
                    work3(i-ll) = -sinl;
                }
                e(ll) = f;
                
                //
                // Test convergence
                //
                if( fabs(e(ll))<=thresh )
                {
                    e(ll) = 0;
                }
                
                //
                // Update singular vectors if desired
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
                }
            }
        }
        
        //
        // QR iteration finished, go back and check convergence
        //
        continue;
    }
    
    //
    // All singular values converged, so make them positive
    //
    for(i = 1; i <= n; i++)
    {
        if( d(i)<0 )
        {
            d(i) = -d(i);
            
            //
            // Change sign of singular vectors, if desired
            //
            if( ncvt>0 )
            {
                ap::vmul(&vt(i+vstart-1, vstart), ap::vlen(vstart,vend), -1);
            }
        }
    }
    
    //
    // Sort the singular values into decreasing order (insertion sort on
    // singular values, but only one transposition per singular vector)
    //
    for(i = 1; i <= n-1; i++)
    {
        
        //
        // Scan for smallest D(I)
        //
        isub = 1;
        smin = d(1);
        for(j = 2; j <= n+1-i; j++)
        {
            if( d(j)<=smin )
            {
                isub = j;
                smin = d(j);
            }
        }
        if( isub!=n+1-i )
        {
            
            //
            // Swap singular values and vectors
            //
            d(isub) = d(n+1-i);
            d(n+1-i) = smin;
            if( ncvt>0 )
            {
                j = n+1-i;
                ap::vmove(&vttemp(vstart), &vt(isub+vstart-1, vstart), ap::vlen(vstart,vend));
                ap::vmove(&vt(isub+vstart-1, vstart), &vt(j+vstart-1, vstart), ap::vlen(vstart,vend));
                ap::vmove(&vt(j+vstart-1, vstart), &vttemp(vstart), ap::vlen(vstart,vend));
            }
            if( nru>0 )
            {
                j = n+1-i;
                ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend));
                ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend));
                ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend));
            }
            if( ncc>0 )
            {
                j = n+1-i;
                ap::vmove(&ctemp(cstart), &c(isub+cstart-1, cstart), ap::vlen(cstart,cend));
                ap::vmove(&c(isub+cstart-1, cstart), &c(j+cstart-1, cstart), ap::vlen(cstart,cend));
                ap::vmove(&c(j+cstart-1, cstart), &ctemp(cstart), ap::vlen(cstart,cend));
            }
        }
    }
    return result;
}