/*************************************************************************
k-means++ clusterization

INPUT PARAMETERS:
    XY          -   dataset, array [0..NPoints-1,0..NVars-1].
    NPoints     -   dataset size, NPoints>=K
    NVars       -   number of variables, NVars>=1
    K           -   desired number of clusters, K>=1
    Restarts    -   number of restarts, Restarts>=1

OUTPUT PARAMETERS:
    Info        -   return code:
                    * -3, if taskis degenerate (number of distinct points is
                          less than K)
                    * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
                    *  1, if subroutine finished successfully
    C           -   array[0..NVars-1,0..K-1].matrix whose columns store
                    cluster's centers
    XYC         -   array which contains number of clusters dataset points
                    belong to.

  -- ALGLIB --
     Copyright 21.03.2009 by Bochkanov Sergey
*************************************************************************/
void kmeansgenerate(const ap::real_2d_array& xy,
     int npoints,
     int nvars,
     int k,
     int restarts,
     int& info,
     ap::real_2d_array& c,
     ap::integer_1d_array& xyc)
{
    int i;
    int j;
    ap::real_2d_array ct;
    ap::real_2d_array ctbest;
    double e;
    double ebest;
    ap::real_1d_array x;
    ap::real_1d_array tmp;
    int cc;
    ap::real_1d_array d2;
    ap::real_1d_array p;
    ap::integer_1d_array csizes;
    ap::boolean_1d_array cbusy;
    double v;
    double s;
    int cclosest;
    double dclosest;
    ap::real_1d_array work;
    bool waschanges;
    bool zerosizeclusters;
    int pass;

    
    //
    // Test parameters
    //
    if( npoints<k||nvars<1||k<1||restarts<1 )
    {
        info = -1;
        return;
    }
    
    //
    // TODO: special case K=1
    // TODO: special case K=NPoints
    //
    info = 1;
    
    //
    // Multiple passes of k-means++ algorithm
    //
    ct.setbounds(0, k-1, 0, nvars-1);
    ctbest.setbounds(0, k-1, 0, nvars-1);
    xyc.setbounds(0, npoints-1);
    d2.setbounds(0, npoints-1);
    p.setbounds(0, npoints-1);
    tmp.setbounds(0, nvars-1);
    csizes.setbounds(0, k-1);
    cbusy.setbounds(0, k-1);
    ebest = ap::maxrealnumber;
    for(pass = 1; pass <= restarts; pass++)
    {
        
        //
        // Select initial centers  using k-means++ algorithm
        // 1. Choose first center at random
        // 2. Choose next centers using their distance from centers already chosen
        //
        // Note that for performance reasons centers are stored in ROWS of CT, not
        // in columns. We'll transpose CT in the end and store it in the C.
        //
        i = ap::randominteger(npoints);
        ap::vmove(&ct(0, 0), &xy(i, 0), ap::vlen(0,nvars-1));
        cbusy(0) = true;
        for(i = 1; i <= k-1; i++)
        {
            cbusy(i) = false;
        }
        if( !selectcenterpp(xy, npoints, nvars, ct, cbusy, k, d2, p, tmp) )
        {
            info = -3;
            return;
        }
        
        //
        // Update centers:
        // 2. update center positions
        //
        while(true)
        {
            
            //
            // fill XYC with center numbers
            //
            waschanges = false;
            for(i = 0; i <= npoints-1; i++)
            {
                cclosest = -1;
                dclosest = ap::maxrealnumber;
                for(j = 0; j <= k-1; j++)
                {
                    ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
                    ap::vsub(&tmp(0), &ct(j, 0), ap::vlen(0,nvars-1));
                    v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
                    if( v<dclosest )
                    {
                        cclosest = j;
                        dclosest = v;
                    }
                }
                if( xyc(i)!=cclosest )
                {
                    waschanges = true;
                }
                xyc(i) = cclosest;
            }
            
            //
            // Update centers
            //
            for(j = 0; j <= k-1; j++)
            {
                csizes(j) = 0;
            }
            for(i = 0; i <= k-1; i++)
            {
                for(j = 0; j <= nvars-1; j++)
                {
                    ct(i,j) = 0;
                }
            }
            for(i = 0; i <= npoints-1; i++)
            {
                csizes(xyc(i)) = csizes(xyc(i))+1;
                ap::vadd(&ct(xyc(i), 0), &xy(i, 0), ap::vlen(0,nvars-1));
            }
            zerosizeclusters = false;
            for(i = 0; i <= k-1; i++)
            {
                cbusy(i) = csizes(i)!=0;
                zerosizeclusters = zerosizeclusters||csizes(i)==0;
            }
            if( zerosizeclusters )
            {
                
                //
                // Some clusters have zero size - rare, but possible.
                // We'll choose new centers for such clusters using k-means++ rule
                // and restart algorithm
                //
                if( !selectcenterpp(xy, npoints, nvars, ct, cbusy, k, d2, p, tmp) )
                {
                    info = -3;
                    return;
                }
                continue;
            }
            for(j = 0; j <= k-1; j++)
            {
                v = double(1)/double(csizes(j));
                ap::vmul(&ct(j, 0), ap::vlen(0,nvars-1), v);
            }
            
            //
            // if nothing has changed during iteration
            //
            if( !waschanges )
            {
                break;
            }
        }
        
        //
        // 3. Calculate E, compare with best centers found so far
        //
        e = 0;
        for(i = 0; i <= npoints-1; i++)
        {
            ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
            ap::vsub(&tmp(0), &ct(xyc(i), 0), ap::vlen(0,nvars-1));
            v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
            e = e+v;
        }
        if( e<ebest )
        {
            
            //
            // store partition
            //
            copymatrix(ct, 0, k-1, 0, nvars-1, ctbest, 0, k-1, 0, nvars-1);
        }
    }
    
    //
    // Copy and transpose
    //
    c.setbounds(0, nvars-1, 0, k-1);
    copyandtranspose(ctbest, 0, k-1, 0, nvars-1, c, 0, nvars-1, 0, k-1);
}
示例#2
0
/*************************************************************************
Principal components analysis

Subroutine  builds  orthogonal  basis  where  first  axis  corresponds  to
direction with maximum variance, second axis maximizes variance in subspace
orthogonal to first axis and so on.

It should be noted that, unlike LDA, PCA does not use class labels.

INPUT PARAMETERS:
    X           -   dataset, array[0..NPoints-1,0..NVars-1].
                    matrix contains ONLY INDEPENDENT VARIABLES.
    NPoints     -   dataset size, NPoints>=0
    NVars       -   number of independent variables, NVars>=1

бшундмше оюпюлерпш:
    Info        -   return code:
                    * -4, if SVD subroutine haven't converged
                    * -1, if wrong parameters has been passed (NPoints<0,
                          NVars<1)
                    *  1, if task is solved
    S2          -   array[0..NVars-1]. variance values corresponding
                    to basis vectors.
    V           -   array[0..NVars-1,0..NVars-1]
                    matrix, whose columns store basis vectors.

  -- ALGLIB --
     Copyright 25.08.2008 by Bochkanov Sergey
*************************************************************************/
void pcabuildbasis(const ap::real_2d_array& x,
     int npoints,
     int nvars,
     int& info,
     ap::real_1d_array& s2,
     ap::real_2d_array& v)
{
    ap::real_2d_array a;
    ap::real_2d_array u;
    ap::real_2d_array vt;
    ap::real_1d_array m;
    ap::real_1d_array t;
    int i;
    int j;
    double mean;
    double variance;
    double skewness;
    double kurtosis;

    
    //
    // Check input data
    //
    if( npoints<0||nvars<1 )
    {
        info = -1;
        return;
    }
    info = 1;
    
    //
    // Special case: NPoints=0
    //
    if( npoints==0 )
    {
        s2.setbounds(0, nvars-1);
        v.setbounds(0, nvars-1, 0, nvars-1);
        for(i = 0; i <= nvars-1; i++)
        {
            s2(i) = 0;
        }
        for(i = 0; i <= nvars-1; i++)
        {
            for(j = 0; j <= nvars-1; j++)
            {
                if( i==j )
                {
                    v(i,j) = 1;
                }
                else
                {
                    v(i,j) = 0;
                }
            }
        }
        return;
    }
    
    //
    // Calculate means
    //
    m.setbounds(0, nvars-1);
    t.setbounds(0, npoints-1);
    for(j = 0; j <= nvars-1; j++)
    {
        ap::vmove(t.getvector(0, npoints-1), x.getcolumn(j, 0, npoints-1));
        calculatemoments(t, npoints, mean, variance, skewness, kurtosis);
        m(j) = mean;
    }
    
    //
    // Center, apply SVD, prepare output
    //
    a.setbounds(0, ap::maxint(npoints, nvars)-1, 0, nvars-1);
    for(i = 0; i <= npoints-1; i++)
    {
        ap::vmove(&a(i, 0), &x(i, 0), ap::vlen(0,nvars-1));
        ap::vsub(&a(i, 0), &m(0), ap::vlen(0,nvars-1));
    }
    for(i = npoints; i <= nvars-1; i++)
    {
        for(j = 0; j <= nvars-1; j++)
        {
            a(i,j) = 0;
        }
    }
    if( !rmatrixsvd(a, ap::maxint(npoints, nvars), nvars, 0, 1, 2, s2, u, vt) )
    {
        info = -4;
        return;
    }
    if( npoints!=1 )
    {
        for(i = 0; i <= nvars-1; i++)
        {
            s2(i) = ap::sqr(s2(i))/(npoints-1);
        }
    }
    v.setbounds(0, nvars-1, 0, nvars-1);
    copyandtranspose(vt, 0, nvars-1, 0, nvars-1, v, 0, nvars-1, 0, nvars-1);
}
bool testblas(bool silent)
{
    bool result;
    int pass;
    int passcount;
    int n;
    int i;
    int i1;
    int i2;
    int j;
    int j1;
    int j2;
    int l;
    int k;
    int r;
    int i3;
    int j3;
    int col1;
    int col2;
    int row1;
    int row2;
    ap::real_1d_array x1;
    ap::real_1d_array x2;
    ap::real_2d_array a;
    ap::real_2d_array b;
    ap::real_2d_array c1;
    ap::real_2d_array c2;
    double err;
    double e1;
    double e2;
    double e3;
    double v;
    double scl1;
    double scl2;
    double scl3;
    bool was1;
    bool was2;
    bool trans1;
    bool trans2;
    double threshold;
    bool n2errors;
    bool hsnerrors;
    bool amaxerrors;
    bool mverrors;
    bool iterrors;
    bool cterrors;
    bool mmerrors;
    bool waserrors;

    n2errors = false;
    amaxerrors = false;
    hsnerrors = false;
    mverrors = false;
    iterrors = false;
    cterrors = false;
    mmerrors = false;
    waserrors = false;
    threshold = 10000*ap::machineepsilon;
    
    //
    // Test Norm2
    //
    passcount = 1000;
    e1 = 0;
    e2 = 0;
    e3 = 0;
    scl2 = 0.5*ap::maxrealnumber;
    scl3 = 2*ap::minrealnumber;
    for(pass = 1; pass <= passcount; pass++)
    {
        n = 1+ap::randominteger(1000);
        i1 = ap::randominteger(10);
        i2 = n+i1-1;
        x1.setbounds(i1, i2);
        x2.setbounds(i1, i2);
        for(i = i1; i <= i2; i++)
        {
            x1(i) = 2*ap::randomreal()-1;
        }
        v = 0;
        for(i = i1; i <= i2; i++)
        {
            v = v+ap::sqr(x1(i));
        }
        v = sqrt(v);
        e1 = ap::maxreal(e1, fabs(v-vectornorm2(x1, i1, i2)));
        for(i = i1; i <= i2; i++)
        {
            x2(i) = scl2*x1(i);
        }
        e2 = ap::maxreal(e2, fabs(v*scl2-vectornorm2(x2, i1, i2)));
        for(i = i1; i <= i2; i++)
        {
            x2(i) = scl3*x1(i);
        }
        e3 = ap::maxreal(e3, fabs(v*scl3-vectornorm2(x2, i1, i2)));
    }
    e2 = e2/scl2;
    e3 = e3/scl3;
    n2errors = ap::fp_greater_eq(e1,threshold)||ap::fp_greater_eq(e2,threshold)||ap::fp_greater_eq(e3,threshold);
    
    //
    // Testing VectorAbsMax, Column/Row AbsMax
    //
    x1.setbounds(1, 5);
    x1(1) = 2.0;
    x1(2) = 0.2;
    x1(3) = -1.3;
    x1(4) = 0.7;
    x1(5) = -3.0;
    amaxerrors = vectoridxabsmax(x1, 1, 5)!=5||vectoridxabsmax(x1, 1, 4)!=1||vectoridxabsmax(x1, 2, 4)!=3;
    n = 30;
    x1.setbounds(1, n);
    a.setbounds(1, n, 1, n);
    for(i = 1; i <= n; i++)
    {
        for(j = 1; j <= n; j++)
        {
            a(i,j) = 2*ap::randomreal()-1;
        }
    }
    was1 = false;
    was2 = false;
    for(pass = 1; pass <= 1000; pass++)
    {
        j = 1+ap::randominteger(n);
        i1 = 1+ap::randominteger(n);
        i2 = i1+ap::randominteger(n+1-i1);
        ap::vmove(x1.getvector(i1, i2), a.getcolumn(j, i1, i2));
        if( vectoridxabsmax(x1, i1, i2)!=columnidxabsmax(a, i1, i2, j) )
        {
            was1 = true;
        }
        i = 1+ap::randominteger(n);
        j1 = 1+ap::randominteger(n);
        j2 = j1+ap::randominteger(n+1-j1);
        ap::vmove(&x1(j1), &a(i, j1), ap::vlen(j1,j2));
        if( vectoridxabsmax(x1, j1, j2)!=rowidxabsmax(a, j1, j2, i) )
        {
            was2 = true;
        }
    }
    amaxerrors = amaxerrors||was1||was2;
    
    //
    // Testing upper Hessenberg 1-norm
    //
    a.setbounds(1, 3, 1, 3);
    x1.setbounds(1, 3);
    a(1,1) = 2;
    a(1,2) = 3;
    a(1,3) = 1;
    a(2,1) = 4;
    a(2,2) = -5;
    a(2,3) = 8;
    a(3,1) = 99;
    a(3,2) = 3;
    a(3,3) = 1;
    hsnerrors = ap::fp_greater(fabs(upperhessenberg1norm(a, 1, 3, 1, 3, x1)-11),threshold);
    
    //
    // Testing MatrixVectorMultiply
    //
    a.setbounds(2, 3, 3, 5);
    x1.setbounds(1, 3);
    x2.setbounds(1, 2);
    a(2,3) = 2;
    a(2,4) = -1;
    a(2,5) = -1;
    a(3,3) = 1;
    a(3,4) = -2;
    a(3,5) = 2;
    x1(1) = 1;
    x1(2) = 2;
    x1(3) = 1;
    x2(1) = -1;
    x2(2) = -1;
    matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 1.0);
    matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 1.0);
    e1 = fabs(x1(1)+5)+fabs(x1(2)-8)+fabs(x1(3)+1)+fabs(x2(1)+2)+fabs(x2(2)+2);
    x1(1) = 1;
    x1(2) = 2;
    x1(3) = 1;
    x2(1) = -1;
    x2(2) = -1;
    matrixvectormultiply(a, 2, 3, 3, 5, false, x1, 1, 3, 1.0, x2, 1, 2, 0.0);
    matrixvectormultiply(a, 2, 3, 3, 5, true, x2, 1, 2, 1.0, x1, 1, 3, 0.0);
    e2 = fabs(x1(1)+3)+fabs(x1(2)-3)+fabs(x1(3)+1)+fabs(x2(1)+1)+fabs(x2(2)+1);
    mverrors = ap::fp_greater_eq(e1+e2,threshold);
    
    //
    // testing inplace transpose
    //
    n = 10;
    a.setbounds(1, n, 1, n);
    b.setbounds(1, n, 1, n);
    x1.setbounds(1, n-1);
    for(i = 1; i <= n; i++)
    {
        for(j = 1; j <= n; j++)
        {
            a(i,j) = ap::randomreal();
        }
    }
    passcount = 10000;
    was1 = false;
    for(pass = 1; pass <= passcount; pass++)
    {
        i1 = 1+ap::randominteger(n);
        i2 = i1+ap::randominteger(n-i1+1);
        j1 = 1+ap::randominteger(n-(i2-i1));
        j2 = j1+(i2-i1);
        copymatrix(a, i1, i2, j1, j2, b, i1, i2, j1, j2);
        inplacetranspose(b, i1, i2, j1, j2, x1);
        for(i = i1; i <= i2; i++)
        {
            for(j = j1; j <= j2; j++)
            {
                if( ap::fp_neq(a(i,j),b(i1+(j-j1),j1+(i-i1))) )
                {
                    was1 = true;
                }
            }
        }
    }
    iterrors = was1;
    
    //
    // testing copy and transpose
    //
    n = 10;
    a.setbounds(1, n, 1, n);
    b.setbounds(1, n, 1, n);
    for(i = 1; i <= n; i++)
    {
        for(j = 1; j <= n; j++)
        {
            a(i,j) = ap::randomreal();
        }
    }
    passcount = 10000;
    was1 = false;
    for(pass = 1; pass <= passcount; pass++)
    {
        i1 = 1+ap::randominteger(n);
        i2 = i1+ap::randominteger(n-i1+1);
        j1 = 1+ap::randominteger(n);
        j2 = j1+ap::randominteger(n-j1+1);
        copyandtranspose(a, i1, i2, j1, j2, b, j1, j2, i1, i2);
        for(i = i1; i <= i2; i++)
        {
            for(j = j1; j <= j2; j++)
            {
                if( ap::fp_neq(a(i,j),b(j,i)) )
                {
                    was1 = true;
                }
            }
        }
    }
    cterrors = was1;
    
    //
    // Testing MatrixMatrixMultiply
    //
    n = 10;
    a.setbounds(1, 2*n, 1, 2*n);
    b.setbounds(1, 2*n, 1, 2*n);
    c1.setbounds(1, 2*n, 1, 2*n);
    c2.setbounds(1, 2*n, 1, 2*n);
    x1.setbounds(1, n);
    x2.setbounds(1, n);
    for(i = 1; i <= 2*n; i++)
    {
        for(j = 1; j <= 2*n; j++)
        {
            a(i,j) = ap::randomreal();
            b(i,j) = ap::randomreal();
        }
    }
    passcount = 1000;
    was1 = false;
    for(pass = 1; pass <= passcount; pass++)
    {
        for(i = 1; i <= 2*n; i++)
        {
            for(j = 1; j <= 2*n; j++)
            {
                c1(i,j) = 2.1*i+3.1*j;
                c2(i,j) = c1(i,j);
            }
        }
        l = 1+ap::randominteger(n);
        k = 1+ap::randominteger(n);
        r = 1+ap::randominteger(n);
        i1 = 1+ap::randominteger(n);
        j1 = 1+ap::randominteger(n);
        i2 = 1+ap::randominteger(n);
        j2 = 1+ap::randominteger(n);
        i3 = 1+ap::randominteger(n);
        j3 = 1+ap::randominteger(n);
        trans1 = ap::fp_greater(ap::randomreal(),0.5);
        trans2 = ap::fp_greater(ap::randomreal(),0.5);
        if( trans1 )
        {
            col1 = l;
            row1 = k;
        }
        else
        {
            col1 = k;
            row1 = l;
        }
        if( trans2 )
        {
            col2 = k;
            row2 = r;
        }
        else
        {
            col2 = r;
            row2 = k;
        }
        scl1 = ap::randomreal();
        scl2 = ap::randomreal();
        matrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c1, i3, i3+l-1, j3, j3+r-1, scl2, x1);
        naivematrixmatrixmultiply(a, i1, i1+row1-1, j1, j1+col1-1, trans1, b, i2, i2+row2-1, j2, j2+col2-1, trans2, scl1, c2, i3, i3+l-1, j3, j3+r-1, scl2);
        err = 0;
        for(i = 1; i <= l; i++)
        {
            for(j = 1; j <= r; j++)
            {
                err = ap::maxreal(err, fabs(c1(i3+i-1,j3+j-1)-c2(i3+i-1,j3+j-1)));
            }
        }
        if( ap::fp_greater(err,threshold) )
        {
            was1 = true;
            break;
        }
    }
    mmerrors = was1;
    
    //
    // report
    //
    waserrors = n2errors||amaxerrors||hsnerrors||mverrors||iterrors||cterrors||mmerrors;
    if( !silent )
    {
        printf("TESTING BLAS\n");
        printf("VectorNorm2:                             ");
        if( n2errors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("AbsMax (vector/row/column):              ");
        if( amaxerrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("UpperHessenberg1Norm:                    ");
        if( hsnerrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("MatrixVectorMultiply:                    ");
        if( mverrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("InplaceTranspose:                        ");
        if( iterrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("CopyAndTranspose:                        ");
        if( cterrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        printf("MatrixMatrixMultiply:                    ");
        if( mmerrors )
        {
            printf("FAILED\n");
        }
        else
        {
            printf("OK\n");
        }
        if( waserrors )
        {
            printf("TEST FAILED\n");
        }
        else
        {
            printf("TEST PASSED\n");
        }
        printf("\n\n");
    }
    result = !waserrors;
    return result;
}
示例#4
0
文件: svd.cpp 项目: gilso/Packages
/*************************************************************************
Singular value decomposition of a rectangular matrix.

The algorithm calculates the singular value decomposition of a matrix of
size MxN: A = U * S * V^T

The algorithm finds the singular values and, optionally, matrices U and V^T.
The algorithm can find both first min(M,N) columns of matrix U and rows of
matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
and NxN respectively).

Take into account that the subroutine does not return matrix V but V^T.

Input parameters:
    A           -   matrix to be decomposed.
                    Array whose indexes range within [0..M-1, 0..N-1].
    M           -   number of rows in matrix A.
    N           -   number of columns in matrix A.
    UNeeded     -   0, 1 or 2. See the description of the parameter U.
    VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
    AdditionalMemory -
                    If the parameter:
                     * equals 0, the algorithm doesn’t use additional
                       memory (lower requirements, lower performance).
                     * equals 1, the algorithm uses additional
                       memory of size min(M,N)*min(M,N) of real numbers.
                       It often speeds up the algorithm.
                     * equals 2, the algorithm uses additional
                       memory of size M*min(M,N) of real numbers.
                       It allows to get a maximum performance.
                    The recommended value of the parameter is 2.

Output parameters:
    W           -   contains singular values in descending order.
    U           -   if UNeeded=0, U isn't changed, the left singular vectors
                    are not calculated.
                    if Uneeded=1, U contains left singular vectors (first
                    min(M,N) columns of matrix U). Array whose indexes range
                    within [0..M-1, 0..Min(M,N)-1].
                    if UNeeded=2, U contains matrix U wholly. Array whose
                    indexes range within [0..M-1, 0..M-1].
    VT          -   if VTNeeded=0, VT isn’t changed, the right singular vectors
                    are not calculated.
                    if VTNeeded=1, VT contains right singular vectors (first
                    min(M,N) rows of matrix V^T). Array whose indexes range
                    within [0..min(M,N)-1, 0..N-1].
                    if VTNeeded=2, VT contains matrix V^T wholly. Array whose
                    indexes range within [0..N-1, 0..N-1].

  -- ALGLIB --
     Copyright 2005 by Bochkanov Sergey
*************************************************************************/
bool rmatrixsvd(ap::real_2d_array a,
     int m,
     int n,
     int uneeded,
     int vtneeded,
     int additionalmemory,
     ap::real_1d_array& w,
     ap::real_2d_array& u,
     ap::real_2d_array& vt)
{
    bool result;
    ap::real_1d_array tauq;
    ap::real_1d_array taup;
    ap::real_1d_array tau;
    ap::real_1d_array e;
    ap::real_1d_array work;
    ap::real_2d_array t2;
    bool isupper;
    int minmn;
    int ncu;
    int nrvt;
    int nru;
    int ncvt;
    int i;
    int j;

    result = true;
    if( m==0||n==0 )
    {
        return result;
    }
    ap::ap_error::make_assertion(uneeded>=0&&uneeded<=2, "SVDDecomposition: wrong parameters!");
    ap::ap_error::make_assertion(vtneeded>=0&&vtneeded<=2, "SVDDecomposition: wrong parameters!");
    ap::ap_error::make_assertion(additionalmemory>=0&&additionalmemory<=2, "SVDDecomposition: wrong parameters!");
    
    //
    // initialize
    //
    minmn = ap::minint(m, n);
    w.setbounds(1, minmn);
    ncu = 0;
    nru = 0;
    if( uneeded==1 )
    {
        nru = m;
        ncu = minmn;
        u.setbounds(0, nru-1, 0, ncu-1);
    }
    if( uneeded==2 )
    {
        nru = m;
        ncu = m;
        u.setbounds(0, nru-1, 0, ncu-1);
    }
    nrvt = 0;
    ncvt = 0;
    if( vtneeded==1 )
    {
        nrvt = minmn;
        ncvt = n;
        vt.setbounds(0, nrvt-1, 0, ncvt-1);
    }
    if( vtneeded==2 )
    {
        nrvt = n;
        ncvt = n;
        vt.setbounds(0, nrvt-1, 0, ncvt-1);
    }
    
    //
    // M much larger than N
    // Use bidiagonal reduction with QR-decomposition
    //
    if( ap::fp_greater(m,1.6*n) )
    {
        if( uneeded==0 )
        {
            
            //
            // No left singular vectors to be computed
            //
            rmatrixqr(a, m, n, tau);
            for(i = 0; i <= n-1; i++)
            {
                for(j = 0; j <= i-1; j++)
                {
                    a(i,j) = 0;
                }
            }
            rmatrixbd(a, n, n, tauq, taup);
            rmatrixbdunpackpt(a, n, n, taup, nrvt, vt);
            rmatrixbdunpackdiagonals(a, n, n, isupper, w, e);
            result = rmatrixbdsvd(w, e, n, isupper, false, u, 0, a, 0, vt, ncvt);
            return result;
        }
        else
        {
            
            //
            // Left singular vectors (may be full matrix U) to be computed
            //
            rmatrixqr(a, m, n, tau);
            rmatrixqrunpackq(a, m, n, tau, ncu, u);
            for(i = 0; i <= n-1; i++)
            {
                for(j = 0; j <= i-1; j++)
                {
                    a(i,j) = 0;
                }
            }
            rmatrixbd(a, n, n, tauq, taup);
            rmatrixbdunpackpt(a, n, n, taup, nrvt, vt);
            rmatrixbdunpackdiagonals(a, n, n, isupper, w, e);
            if( additionalmemory<1 )
            {
                
                //
                // No additional memory can be used
                //
                rmatrixbdmultiplybyq(a, n, n, tauq, u, m, n, true, false);
                result = rmatrixbdsvd(w, e, n, isupper, false, u, m, a, 0, vt, ncvt);
            }
            else
            {
                
                //
                // Large U. Transforming intermediate matrix T2
                //
                work.setbounds(1, ap::maxint(m, n));
                rmatrixbdunpackq(a, n, n, tauq, n, t2);
                copymatrix(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1);
                inplacetranspose(t2, 0, n-1, 0, n-1, work);
                result = rmatrixbdsvd(w, e, n, isupper, false, u, 0, t2, n, vt, ncvt);
                matrixmatrixmultiply(a, 0, m-1, 0, n-1, false, t2, 0, n-1, 0, n-1, true, 1.0, u, 0, m-1, 0, n-1, 0.0, work);
            }
            return result;
        }
    }
    
    //
    // N much larger than M
    // Use bidiagonal reduction with LQ-decomposition
    //
    if( ap::fp_greater(n,1.6*m) )
    {
        if( vtneeded==0 )
        {
            
            //
            // No right singular vectors to be computed
            //
            rmatrixlq(a, m, n, tau);
            for(i = 0; i <= m-1; i++)
            {
                for(j = i+1; j <= m-1; j++)
                {
                    a(i,j) = 0;
                }
            }
            rmatrixbd(a, m, m, tauq, taup);
            rmatrixbdunpackq(a, m, m, tauq, ncu, u);
            rmatrixbdunpackdiagonals(a, m, m, isupper, w, e);
            work.setbounds(1, m);
            inplacetranspose(u, 0, nru-1, 0, ncu-1, work);
            result = rmatrixbdsvd(w, e, m, isupper, false, a, 0, u, nru, vt, 0);
            inplacetranspose(u, 0, nru-1, 0, ncu-1, work);
            return result;
        }
        else
        {
            
            //
            // Right singular vectors (may be full matrix VT) to be computed
            //
            rmatrixlq(a, m, n, tau);
            rmatrixlqunpackq(a, m, n, tau, nrvt, vt);
            for(i = 0; i <= m-1; i++)
            {
                for(j = i+1; j <= m-1; j++)
                {
                    a(i,j) = 0;
                }
            }
            rmatrixbd(a, m, m, tauq, taup);
            rmatrixbdunpackq(a, m, m, tauq, ncu, u);
            rmatrixbdunpackdiagonals(a, m, m, isupper, w, e);
            work.setbounds(1, ap::maxint(m, n));
            inplacetranspose(u, 0, nru-1, 0, ncu-1, work);
            if( additionalmemory<1 )
            {
                
                //
                // No additional memory available
                //
                rmatrixbdmultiplybyp(a, m, m, taup, vt, m, n, false, true);
                result = rmatrixbdsvd(w, e, m, isupper, false, a, 0, u, nru, vt, n);
            }
            else
            {
                
                //
                // Large VT. Transforming intermediate matrix T2
                //
                rmatrixbdunpackpt(a, m, m, taup, m, t2);
                result = rmatrixbdsvd(w, e, m, isupper, false, a, 0, u, nru, t2, m);
                copymatrix(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1);
                matrixmatrixmultiply(t2, 0, m-1, 0, m-1, false, a, 0, m-1, 0, n-1, false, 1.0, vt, 0, m-1, 0, n-1, 0.0, work);
            }
            inplacetranspose(u, 0, nru-1, 0, ncu-1, work);
            return result;
        }
    }
    
    //
    // M<=N
    // We can use inplace transposition of U to get rid of columnwise operations
    //
    if( m<=n )
    {
        rmatrixbd(a, m, n, tauq, taup);
        rmatrixbdunpackq(a, m, n, tauq, ncu, u);
        rmatrixbdunpackpt(a, m, n, taup, nrvt, vt);
        rmatrixbdunpackdiagonals(a, m, n, isupper, w, e);
        work.setbounds(1, m);
        inplacetranspose(u, 0, nru-1, 0, ncu-1, work);
        result = rmatrixbdsvd(w, e, minmn, isupper, false, a, 0, u, nru, vt, ncvt);
        inplacetranspose(u, 0, nru-1, 0, ncu-1, work);
        return result;
    }
    
    //
    // Simple bidiagonal reduction
    //
    rmatrixbd(a, m, n, tauq, taup);
    rmatrixbdunpackq(a, m, n, tauq, ncu, u);
    rmatrixbdunpackpt(a, m, n, taup, nrvt, vt);
    rmatrixbdunpackdiagonals(a, m, n, isupper, w, e);
    if( additionalmemory<2||uneeded==0 )
    {
        
        //
        // We cant use additional memory or there is no need in such operations
        //
        result = rmatrixbdsvd(w, e, minmn, isupper, false, u, nru, a, 0, vt, ncvt);
    }
    else
    {
        
        //
        // We can use additional memory
        //
        t2.setbounds(0, minmn-1, 0, m-1);
        copyandtranspose(u, 0, m-1, 0, minmn-1, t2, 0, minmn-1, 0, m-1);
        result = rmatrixbdsvd(w, e, minmn, isupper, false, u, 0, t2, m, vt, ncvt);
        copyandtranspose(t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1);
    }
    return result;
}