Beispiel #1
0
/********************************************************************
complex rank-1 kernel
********************************************************************/
bool ialglib::_i_cmatrixrank1f(int m,
     int n,
     ap::complex_2d_array& a,
     int ia,
     int ja,
     ap::complex_1d_array& u,
     int uoffs,
     ap::complex_1d_array& v,
     int voffs)
{
    ap::complex *arow, *pu, *pv, *vtmp, *dst;
    int n2 = n/2;
    int stride  = a.getstride();
    int i, j;

    //
    // update pairs of rows
    //
    arow  = &a(ia,ja);
    pu    = &u(uoffs);
    vtmp  = &v(voffs);
    for(i=0; i<m; i++, arow+=stride, pu++)
    {
        //
        // update by two
        //
        for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2)
        {
            double ux  = pu[0].x;
            double uy  = pu[0].y;
            double v0x = pv[0].x;
            double v0y = pv[0].y;
            double v1x = pv[1].x;
            double v1y = pv[1].y;
            dst[0].x += ux*v0x-uy*v0y;
            dst[0].y += ux*v0y+uy*v0x;
            dst[1].x += ux*v1x-uy*v1y;
            dst[1].y += ux*v1y+uy*v1x;
            //dst[0] += pu[0]*pv[0];
            //dst[1] += pu[0]*pv[1];
        }

        //
        // final update
        //
        if( n%2!=0 )
            dst[0] += pu[0]*pv[0];
    }
    return true;
}
Beispiel #2
0
/*************************************************************************
LU inverse
*************************************************************************/
static bool cmatrixinvmatlu(ap::complex_2d_array& a,
     const ap::integer_1d_array& pivots,
     int n)
{
    bool result;
    ap::complex_1d_array work;
    int i;
    int iws;
    int j;
    int jb;
    int jj;
    int jp;
    ap::complex v;

    result = true;
    
    //
    // Quick return if possible
    //
    if( n==0 )
    {
        return result;
    }
    work.setbounds(0, n-1);
    
    //
    // Form inv(U)
    //
    if( !cmatrixinvmattr(a, n, true, false) )
    {
        result = false;
        return result;
    }
    
    //
    // Solve the equation inv(A)*L = inv(U) for inv(A).
    //
    for(j = n-1; j >= 0; j--)
    {
        
        //
        // Copy current column of L to WORK and replace with zeros.
        //
        for(i = j+1; i <= n-1; i++)
        {
            work(i) = a(i,j);
            a(i,j) = 0;
        }
        
        //
        // Compute current column of inv(A).
        //
        if( j<n-1 )
        {
            for(i = 0; i <= n-1; i++)
            {
                v = ap::vdotproduct(&a(i, j+1), 1, "N", &work(j+1), 1, "N", ap::vlen(j+1,n-1));
                a(i,j) = a(i,j)-v;
            }
        }
    }
    
    //
    // Apply column interchanges.
    //
    for(j = n-2; j >= 0; j--)
    {
        jp = pivots(j);
        if( jp!=j )
        {
            ap::vmove(&work(0), 1, &a(0, j), a.getstride(), "N", ap::vlen(0,n-1));
            ap::vmove(&a(0, j), a.getstride(), &a(0, jp), a.getstride(), "N", ap::vlen(0,n-1));
            ap::vmove(&a(0, jp), a.getstride(), &work(0), 1, "N", ap::vlen(0,n-1));
        }
    }
    return result;
}
Beispiel #3
0
/*************************************************************************
triangular inverse
*************************************************************************/
static bool cmatrixinvmattr(ap::complex_2d_array& a,
     int n,
     bool isupper,
     bool isunittriangular)
{
    bool result;
    bool nounit;
    int i;
    int j;
    ap::complex v;
    ap::complex ajj;
    ap::complex_1d_array t;

    result = true;
    t.setbounds(0, n-1);
    
    //
    // Test the input parameters.
    //
    nounit = !isunittriangular;
    if( isupper )
    {
        
        //
        // Compute inverse of upper triangular matrix.
        //
        for(j = 0; j <= n-1; j++)
        {
            if( nounit )
            {
                if( a(j,j)==0 )
                {
                    result = false;
                    return result;
                }
                a(j,j) = 1/a(j,j);
                ajj = -a(j,j);
            }
            else
            {
                ajj = -1;
            }
            
            //
            // Compute elements 1:j-1 of j-th column.
            //
            if( j>0 )
            {
                ap::vmove(&t(0), 1, &a(0, j), a.getstride(), "N", ap::vlen(0,j-1));
                for(i = 0; i <= j-1; i++)
                {
                    if( i<j-1 )
                    {
                        v = ap::vdotproduct(&a(i, i+1), 1, "N", &t(i+1), 1, "N", ap::vlen(i+1,j-1));
                    }
                    else
                    {
                        v = 0;
                    }
                    if( nounit )
                    {
                        a(i,j) = v+a(i,i)*t(i);
                    }
                    else
                    {
                        a(i,j) = v+t(i);
                    }
                }
                ap::vmul(&a(0, j), a.getstride(), ap::vlen(0,j-1), ajj);
            }
        }
    }
    else
    {
        
        //
        // Compute inverse of lower triangular matrix.
        //
        for(j = n-1; j >= 0; j--)
        {
            if( nounit )
            {
                if( a(j,j)==0 )
                {
                    result = false;
                    return result;
                }
                a(j,j) = 1/a(j,j);
                ajj = -a(j,j);
            }
            else
            {
                ajj = -1;
            }
            if( j<n-1 )
            {
                
                //
                // Compute elements j+1:n of j-th column.
                //
                ap::vmove(&t(j+1), 1, &a(j+1, j), a.getstride(), "N", ap::vlen(j+1,n-1));
                for(i = j+1; i <= n-1; i++)
                {
                    if( i>j+1 )
                    {
                        v = ap::vdotproduct(&a(i, j+1), 1, "N", &t(j+1), 1, "N", ap::vlen(j+1,i-1));
                    }
                    else
                    {
                        v = 0;
                    }
                    if( nounit )
                    {
                        a(i,j) = v+a(i,i)*t(i);
                    }
                    else
                    {
                        a(i,j) = v+t(i);
                    }
                }
                ap::vmul(&a(j+1, j), a.getstride(), ap::vlen(j+1,n-1), ajj);
            }
        }
    }
    return result;
}
Beispiel #4
0
/********************************************************************
complex TRSM kernel
********************************************************************/
bool ialglib::_i_cmatrixrighttrsmf(int m,
     int n,
     const ap::complex_2d_array& a,
     int i1,
     int j1,
     bool isupper,
     bool isunit,
     int optype,
     ap::complex_2d_array& x,
     int i2,
     int j2)
{
    if( m>alglib_c_block || n>alglib_c_block )
        return false;


    //
    // local buffers
    //
    double *pdiag;
    int i;
    double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double __xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double __tmpbuf[2*alglib_c_block+alglib_simd_alignment];
    double * const abuf   = (double * const) alglib_align(__abuf,  alglib_simd_alignment);
    double * const xbuf   = (double * const) alglib_align(__xbuf,  alglib_simd_alignment);
    double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment);

    //
    // Prepare
    //
    bool uppera;
    mcopyblock_complex(n, n, &a(i1,j1), optype, a.getstride(), abuf);
    mcopyblock_complex(m, n, &x(i2,j2), 0, x.getstride(), xbuf);
    if( isunit )
        for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
        {
            pdiag[0] = 1.0;
            pdiag[1] = 0.0;
        }
    if( optype==0 )
        uppera = isupper;
    else
        uppera = !isupper;

    //
    // Solve Y*A^-1=X where A is upper or lower triangular
    //
    if( uppera )
    {
        for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
        {
            ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]);
            ap::complex alpha;
            alpha.x = -beta.x;
            alpha.y = -beta.y;
            vcopy_complex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj");
            mv_complex(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
        }
        mcopyunblock_complex(m, n, xbuf, 0, &x(i2,j2), x.getstride());
    }
    else
    {
        for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
        {
            ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]);
            ap::complex alpha;
            alpha.x = -beta.x;
            alpha.y = -beta.y;
            vcopy_complex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj");
            mv_complex(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
        }
        mcopyunblock_complex(m, n, xbuf, 0, &x(i2,j2), x.getstride());
    }
    return true;
}
Beispiel #5
0
/********************************************************************
complex GEMM kernel
********************************************************************/
bool ialglib::_i_cmatrixgemmf(int m,
     int n,
     int k,
     ap::complex alpha,
     const ap::complex_2d_array& _a,
     int ia,
     int ja,
     int optypea,
     const ap::complex_2d_array& _b,
     int ib,
     int jb,
     int optypeb,
     ap::complex beta,
     ap::complex_2d_array& _c,
     int ic,
     int jc)
 {
    if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block )
        return false;

    const ap::complex *arow;
    ap::complex *crow;
    int i, stride, cstride;
    double __abuf[2*alglib_c_block+alglib_simd_alignment];
    double __b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double * const abuf = (double * const) alglib_align(__abuf,alglib_simd_alignment);
    double * const b    = (double * const) alglib_align(__b,   alglib_simd_alignment);

    //
    // copy b
    //
    int brows = optypeb==0 ? k : n;
    int bcols = optypeb==0 ? n : k;
    if( optypeb==0 )
        mcopyblock_complex(brows, bcols, &_b(ib,jb), 1, _b.getstride(), b);
    if( optypeb==1 )
        mcopyblock_complex(brows, bcols, &_b(ib,jb), 0, _b.getstride(), b);
    if( optypeb==2 )
        mcopyblock_complex(brows, bcols, &_b(ib,jb), 3, _b.getstride(), b);

    //
    // multiply B by A (from the right, by rows)
    // and store result in C
    //
    arow  = &_a(ia,ja);
    crow  = &_c(ic,jc);
    stride = _a.getstride();
    cstride = _c.getstride();
    for(i=0; i<m; i++)
    {
        if( optypea==0 )
        {
            vcopy_complex(k, arow, 1, abuf, 1, "No conj");
            arow += stride;
        }
        else if( optypea==1 )
        {
            vcopy_complex(k, arow, stride, abuf, 1, "No conj");
            arow++;
        }
        else
        {
            vcopy_complex(k, arow, stride, abuf, 1, "Conj");
            arow++;
        }
        if( beta==0 )
            vzero_complex(n, crow, 1);
        mv_complex(n, k, b, abuf, crow, NULL, 1, alpha, beta);
        crow += cstride;
    }
    return true;
}
Beispiel #6
0
/********************************************************************
complex SYRK kernel
********************************************************************/
bool ialglib::_i_cmatrixsyrkf(int n,
     int k,
     double alpha,
     const ap::complex_2d_array& a,
     int ia,
     int ja,
     int optypea,
     double beta,
     ap::complex_2d_array& c,
     int ic,
     int jc,
     bool isupper)
{
    if( n>alglib_c_block || k>alglib_c_block )
        return false;
    if( n==0 )
        return true;
    
    //
    // local buffers
    //
    double *arow, *crow;
    int i;
    double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double __cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double __tmpbuf[2*alglib_c_block+alglib_simd_alignment];
    double * const abuf   = (double * const) alglib_align(__abuf,  alglib_simd_alignment);
    double * const cbuf   = (double * const) alglib_align(__cbuf,  alglib_simd_alignment);
    double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment);

    //
    // copy A and C, task is transformed to "A*A^H"-form.
    // if beta==0, then C is filled by zeros (and not referenced)
    //
    // alpha==0 or k==0 are correctly processed (A is not referenced)
    //
    if( alpha==0 )
        k = 0;
    if( k>0 )
    {
        if( optypea==0 )
            mcopyblock_complex(n, k, &a(ia,ja), 3, a.getstride(), abuf);
        else
            mcopyblock_complex(k, n, &a(ia,ja), 1, a.getstride(), abuf);
    }
    mcopyblock_complex(n, n, &c(ic,jc), 0, c.getstride(), cbuf);
    if( beta==0 )
    {
        for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block)
            if( isupper )
                vzero(2*(n-i), crow+2*i, 1);
            else
                vzero(2*(i+1), crow, 1);
    }


    //
    // update C
    //
    if( isupper )
    {
        for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
        {
            vcopy_complex(k, arow, 1, tmpbuf, 1, "Conj");
            mv_complex(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, alpha, beta);
        }
    }
    else
    {
        for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
        {
            vcopy_complex(k, arow, 1, tmpbuf, 1, "Conj");
            mv_complex(i+1, k, abuf, tmpbuf, NULL, crow, 1, alpha, beta);
        }
    }

    //
    // copy back
    //
    mcopyunblock_complex(n, n, cbuf, 0, &c(ic,jc), c.getstride());

    return true;
}
Beispiel #7
0
/********************************************************************
complex TRSM kernel
********************************************************************/
bool ialglib::_i_cmatrixlefttrsmf(int m,
     int n,
     const ap::complex_2d_array& a,
     int i1,
     int j1,
     bool isupper,
     bool isunit,
     int optype,
     ap::complex_2d_array& x,
     int i2,
     int j2)
{
    if( m>alglib_c_block || n>alglib_c_block )
        return false;
    
    //
    // local buffers
    //
    double *pdiag, *arow;
    int i;
    double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double __xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
    double __tmpbuf[2*alglib_c_block+alglib_simd_alignment];
    double * const abuf   = (double * const) alglib_align(__abuf,  alglib_simd_alignment);
    double * const xbuf   = (double * const) alglib_align(__xbuf,  alglib_simd_alignment);
    double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment);

    //
    // Prepare
    // Transpose X (so we may use mv, which calculates A*x, but not x*A)
    //
    bool uppera;
    mcopyblock_complex(m, m, &a(i1,j1), optype, a.getstride(), abuf);
    mcopyblock_complex(m, n, &x(i2,j2), 1, x.getstride(), xbuf);
    if( isunit )
        for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1))
        {
            pdiag[0] = 1.0;
            pdiag[1] = 0.0;
        }
    if( optype==0 )
        uppera = isupper;
    else
        uppera = !isupper;

    //
    // Solve A^-1*Y^T=X^T where A is upper or lower triangular
    //
    if( uppera )
    {
        for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
        {
            ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]);
            ap::complex alpha;
            alpha.x = -beta.x;
            alpha.y = -beta.y;
            vcopy_complex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj");
            mv_complex(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
        }
        mcopyunblock_complex(m, n, xbuf, 1, &x(i2,j2), x.getstride());
    }
    else
    {   for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block)
        {
            ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]);
            ap::complex alpha;
            alpha.x = -beta.x;
            alpha.y = -beta.y;
            vcopy_complex(i, arow, 1, tmpbuf, 1, "No conj");
            mv_complex(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
        }
        mcopyunblock_complex(m, n, xbuf, 1, &x(i2,j2), x.getstride());
    }
    return true;
}