Example #1
0
inline void
Hemv
( UpperOrLower uplo,
  T alpha, const Matrix<T>& A, const Matrix<T>& x, T beta, Matrix<T>& y )
{
#ifndef RELEASE
    PushCallStack("Hemv");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( ( x.Height() != 1 && x.Width() != 1 ) ||
        ( y.Height() != 1 && y.Width() != 1 ) )
        throw std::logic_error("x and y must be vectors");
    const int xLength = ( x.Width()==1 ? x.Height() : x.Width() );
    const int yLength = ( y.Width()==1 ? y.Height() : y.Width() );
    if( A.Height() != xLength || A.Height() != yLength )
        throw std::logic_error("A must conform with x and y");
#endif
    const char uploChar = UpperOrLowerToChar( uplo );
    const int m = A.Height();
    const int incx = ( x.Width()==1 ? 1 : x.LDim() );
    const int incy = ( y.Width()==1 ? 1 : y.LDim() );
    blas::Hemv
    ( uploChar, m,
      alpha, A.LockedBuffer(), A.LDim(), x.LockedBuffer(), incx,
      beta,  y.Buffer(), incy );
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #2
0
inline void
Symm
( LeftOrRight side, UpperOrLower uplo,
  T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C,
  bool conjugate=false )
{
#ifndef RELEASE
    CallStackEntry entry("Symm");
#endif
    const char sideChar = LeftOrRightToChar( side );
    const char uploChar = UpperOrLowerToChar( uplo );
    if( conjugate )
    {
        blas::Hemm
        ( sideChar, uploChar, C.Height(), C.Width(),
          alpha, A.LockedBuffer(), A.LDim(),
                 B.LockedBuffer(), B.LDim(),
          beta,  C.Buffer(),       C.LDim() );
    }
    else
    {
        blas::Symm
        ( sideChar, uploChar, C.Height(), C.Width(),
          alpha, A.LockedBuffer(), A.LDim(),
                 B.LockedBuffer(), B.LDim(),
          beta,  C.Buffer(),       C.LDim() );
    }
}
Example #3
0
void FusedRowPanelGemvs
( bool conjugate, T alpha, 
  const Matrix<T>& A, const Matrix<T>& q, const Matrix<T>& r, 
                            Matrix<T>& s,       Matrix<T>& t,
  Int bsize )
{
    const Int m = A.Height();
    const Int n = A.Width();
    const char transChar = ( conjugate ? 'C' : 'T' );
    const T* ABuf = A.LockedBuffer();
    const T* qBuf = q.LockedBuffer();
    const T* rBuf = r.LockedBuffer();
          T* sBuf = s.Buffer();
          T* tBuf = t.Buffer();
    const Int ALDim = A.LDim();
    for( Int k=0; k<n; k+=bsize )
    {
        const Int nb = Min(n-k,bsize);
        blas::Gemv
        ( 'N', m, nb, alpha, 
          &ABuf[k*ALDim], ALDim, &qBuf[k], 1, T(1), sBuf, 1 );
        blas::Gemv
        ( transChar, m, nb, alpha,
          &ABuf[k*ALDim], ALDim, rBuf,     1, T(1), &tBuf[k], 1 );
    }
}
Example #4
0
inline void
Syr
( UpperOrLower uplo, T alpha, const Matrix<T>& x, Matrix<T>& A, 
  bool conjugate=false )
{
#ifndef RELEASE
    PushCallStack("Syr");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( x.Width() != 1 && x.Height() != 1 )
        throw std::logic_error("x must be a vector");
    const int xLength = ( x.Width()==1 ? x.Height() : x.Width() );
    if( xLength != A.Height() )
        throw std::logic_error("x must conform with A");
#endif
    const char uploChar = UpperOrLowerToChar( uplo );
    const int m = A.Height();
    const int incx = ( x.Width()==1 ? 1 : x.LDim() );
    if( conjugate )
    {
        blas::Her
        ( uploChar, m, alpha, x.LockedBuffer(), incx, A.Buffer(), A.LDim() );
    }
    else
    {
        blas::Syr
        ( uploChar, m, alpha, x.LockedBuffer(), incx, A.Buffer(), A.LDim() );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #5
0
void TestLAPACK
( const Matrix<double>& d, const double& rho, const Matrix<double>& z )
{
    typedef double Real;
    const Int n = d.Height();
    Timer timer;
    Matrix<Real> w(n,1), dPlusShift(n,1), dMinusShift(n,1);
    BlasInt nBLAS = n;
    BlasInt info;
    timer.Start();
    for( Int i=0; i<n; ++i )
    {
        Real sigma;
        const BlasInt ip1 = i+1;
        EL_LAPACK(dlasd4)
        ( &nBLAS, &ip1, d.LockedBuffer(), z.LockedBuffer(),
          dMinusShift.Buffer(), &rho, &sigma,
          dPlusShift.Buffer(), &info );
        if( info != 0 )
            RuntimeError("LAPACK's dlasd4 did not converge");
        w(i) = sigma;
    }
    const Real lapackTime = timer.Stop(); 
    Output("LAPACK secular singular value time: ",lapackTime," seconds");
}
Example #6
0
inline void
Ger( T alpha, const Matrix<T>& x, const Matrix<T>& y, Matrix<T>& A )
{
#ifndef RELEASE
    CallStackEntry entry("Ger");
    if( ( x.Height() != 1 && x.Width() != 1 ) ||
            ( y.Height() != 1 && y.Width() != 1 ) )
        LogicError("x and y must be vectors");
    const Int xLength = ( x.Width()==1 ? x.Height() : x.Width() );
    const Int yLength = ( y.Width()==1 ? y.Height() : y.Width() );
    if( xLength != A.Height() || yLength != A.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal Ger:\n"
            << "  x ~ " << x.Height() << " x " << x.Width() << "\n"
            << "  y ~ " << y.Height() << " x " << y.Width() << "\n"
            << "  A ~ " << A.Height() << " x " << A.Width();
        LogicError( msg.str() );
    }
#endif
    const Int m = A.Height();
    const Int n = A.Width();
    const Int incx = ( x.Width()==1 ? 1 : x.LDim() );
    const Int incy = ( y.Width()==1 ? 1 : y.LDim() );
    blas::Ger
    ( m, n, alpha, x.LockedBuffer(), incx, y.LockedBuffer(), incy,
      A.Buffer(), A.LDim() );
}
Example #7
0
void Apply
( const Matrix<Real>& x, 
  const Matrix<Real>& y,
        Matrix<Real>& z,
  const Matrix<Int>& orders, 
  const Matrix<Int>& firstInds )
{
    DEBUG_ONLY(CSE cse("soc::Apply"))
    soc::Dots( x, y, z, orders, firstInds );
    auto xRoots = x;
    auto yRoots = y;
    cone::Broadcast( xRoots, orders, firstInds );
    cone::Broadcast( yRoots, orders, firstInds );

    const Int height = x.Height();
    const Real* xBuf     = x.LockedBuffer();
    const Real* xRootBuf = xRoots.LockedBuffer();
    const Real* yBuf     = y.LockedBuffer();
    const Real* yRootBuf = yRoots.LockedBuffer();
          Real* zBuf = z.Buffer();
    const Int* firstIndBuf = firstInds.LockedBuffer();

    for( Int i=0; i<height; ++i )
        if( i != firstIndBuf[i] )
            zBuf[i] += xRootBuf[i]*yBuf[i] + yRootBuf[i]*xBuf[i];
}
Example #8
0
inline void
Her2k
( UpperOrLower uplo, Orientation orientation,
  T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("Her2k");
    if( orientation == NORMAL )
    {
        if( A.Height() != C.Height() || A.Height() != C.Width() ||
            B.Height() != C.Height() ||B.Height() != C.Width() )
            throw std::logic_error("Nonconformal Her2k");
    }
    else if( orientation == ADJOINT )
    {
        if( A.Width() != C.Height() || A.Width() != C.Width() ||
            B.Width() != C.Height() || B.Width() != C.Width() )
            throw std::logic_error("Nonconformal Her2k");
    }
    else
        throw std::logic_error
        ("Her2k only accepts NORMAL and ADJOINT options");
#endif
    const char uploChar = UpperOrLowerToChar( uplo );
    const char transChar = OrientationToChar( orientation );
    const int k = ( orientation == NORMAL ? A.Width() : A.Height() );
    blas::Her2k
    ( uploChar, transChar, C.Height(), k,
      alpha, A.LockedBuffer(), A.LDim(),
             B.LockedBuffer(), B.LDim(),
      beta,  C.Buffer(),       C.LDim() );
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #9
0
void NesterovTodd
( const Matrix<Real>& s, 
  const Matrix<Real>& z,
        Matrix<Real>& w )
{
    DEBUG_CSE
    const Int k = s.Height();
    w.Resize( k, 1 );
    const Real* sBuf = s.LockedBuffer();
    const Real* zBuf = z.LockedBuffer();
          Real* wBuf = w.Buffer();

    for( Int i=0; i<k; ++i )
        wBuf[i] = Sqrt(sBuf[i]/zBuf[i]);
}
Example #10
0
inline void
Trmm
( LeftOrRight side, UpperOrLower uplo,
  Orientation orientation, UnitOrNonUnit diag,
  T alpha, const Matrix<T>& A, Matrix<T>& B )
{
#ifndef RELEASE
    CallStackEntry entry("Trmm");
    if( A.Height() != A.Width() )
        LogicError("Triangular matrix must be square");
    if( side == LEFT )
    {
        if( A.Height() != B.Height() )
            LogicError("Nonconformal Trmm");
    }
    else
    {
        if( A.Height() != B.Width() )
            LogicError("Nonconformal Trmm");
    }
#endif
    const char sideChar = LeftOrRightToChar( side );
    const char uploChar = UpperOrLowerToChar( uplo );
    const char transChar = OrientationToChar( orientation );
    const char diagChar = UnitOrNonUnitToChar( diag );
    blas::Trmm
    ( sideChar, uploChar, transChar, diagChar, B.Height(), B.Width(),
      alpha, A.LockedBuffer(), A.LDim(), B.Buffer(), B.LDim() );
}
Example #11
0
inline void
Trmv
( UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag,
  const Matrix<T>& A, Matrix<T>& x )
{
#ifndef RELEASE
    PushCallStack("Trmv");
    if( x.Height() != 1 && x.Width() != 1 )
        throw std::logic_error("x must be a vector");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    const int xLength = ( x.Width()==1 ? x.Height() : x.Width() );
    if( xLength != A.Height() )
        throw std::logic_error("x must conform with A");
#endif
    const char uploChar = UpperOrLowerToChar( uplo );
    const char transChar = OrientationToChar( orientation );
    const char diagChar = UnitOrNonUnitToChar( diag );
    const int m = A.Height();
    const int incx = ( x.Width()==1 ? 1 : x.LDim() );
    blas::Trmv
    ( uploChar, transChar, diagChar, m,
      A.LockedBuffer(), A.LDim(), x.Buffer(), incx );
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #12
0
inline void
BinaryFlat( const Matrix<T>& A, string basename="matrix" )
{
    DEBUG_ONLY(CallStackEntry cse("write::BinaryFlat"))
    
    string filename = basename + "." + FileExtension(BINARY_FLAT);
    std::ofstream file( filename.c_str(), std::ios::binary );
    if( !file.is_open() )
        RuntimeError("Could not open ",filename);

    if( A.Height() == A.LDim() )
        file.write( (char*)A.LockedBuffer(), A.Height()*A.Width()*sizeof(T) );
    else
        for( Int j=0; j<A.Width(); ++j )
            file.write( (char*)A.LockedBuffer(0,j), A.Height()*sizeof(T) );
}
Example #13
0
void TransposeAxpy
(       S alphaS,
  const Matrix<T>& X,
        Matrix<T>& Y,
        bool conjugate )
{
    DEBUG_CSE
    const T alpha = T(alphaS);
    const Int mX = X.Height();
    const Int nX = X.Width();
    const Int nY = Y.Width();
    const Int ldX = X.LDim();
    const Int ldY = Y.LDim();
    const T* XBuf = X.LockedBuffer();
          T* YBuf = Y.Buffer();
    // If X and Y are vectors, we can allow one to be a column and the other
    // to be a row. Otherwise we force X and Y to be the same dimension.
    if( mX == 1 || nX == 1 )
    {
        const Int lengthX = ( nX==1 ? mX : nX );
        const Int incX = ( nX==1 ? 1  : ldX );
        const Int incY = ( nY==1 ? 1  : ldY );
        DEBUG_ONLY(
          const Int mY = Y.Height();
          const Int lengthY = ( nY==1 ? mY : nY );
          if( lengthX != lengthY )
              LogicError("Nonconformal TransposeAxpy");
        )
        if( conjugate )
            for( Int j=0; j<lengthX; ++j )
                YBuf[j*incY] += alpha*Conj(XBuf[j*incX]);
        else
            blas::Axpy( lengthX, alpha, XBuf, incX, YBuf, incY );
    }
Example #14
0
inline void
SortEig( Matrix<R>& w, Matrix<R>& Z )
{
#ifndef RELEASE
    PushCallStack("SortEig");
#endif
    const int n = Z.Height();
    const int k = Z.Width();

    // Initialize the pairs of indices and eigenvalues
    std::vector<internal::IndexValuePair<R> > pairs( k );
    for( int i=0; i<k; ++i )
    {
        pairs[i].index = i;
        pairs[i].value = w.Get(i,0);
    }

    // Sort the eigenvalues and simultaneously form the permutation
    std::sort
    ( pairs.begin(), pairs.end(), internal::IndexValuePair<R>::Compare );

    // Reorder the eigenvectors and eigenvalues using the new ordering
    Matrix<R> ZPerm( n, k );
    for( int j=0; j<k; ++j )
    {
        const int source = pairs[j].index;
        MemCopy( ZPerm.Buffer(0,j), Z.LockedBuffer(0,source), n );
        w.Set(j,0,pairs[j].value);
    }
    Z = ZPerm;
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #15
0
void LLNUnb( UnitOrNonUnit diag, F alpha, const Matrix<F>& L, Matrix<F>& X )
{
    DEBUG_CSE
    const bool isUnit = ( diag==UNIT );
    const Int n = L.Height();
    const Int LLDim = L.LDim();
    const Int XLDim = X.LDim();
    const F* LBuffer = L.LockedBuffer();
    F* XBuffer = X.Buffer();

    // X := alpha X
    if( alpha != F(1) )
        for( Int j=0; j<n; ++j ) 
            for( Int i=j; i<n; ++i )
                XBuffer[i+j*XLDim] *= alpha;

    for( Int i=0; i<n; ++i )
    {
        if( !isUnit )
        {
            const F lambda11 = LBuffer[i+i*LLDim];
            for( Int j=0; j<i; ++j )
                XBuffer[i+j*XLDim] /= lambda11;
            XBuffer[i+i*XLDim] /= lambda11;
        }

        const Int l21Height = n - (i+1);
        const F* l21 = &LBuffer[(i+1)+i*LLDim];
        const F* x1L = &XBuffer[i];
        F* X2L = &XBuffer[i+1];
        blas::Geru( l21Height, i+1, F(-1), l21, 1, x1L, XLDim, X2L, XLDim );
    }
}
UnitaryCoherence( Matrix<F>& U )
{
#ifndef RELEASE
    CallStackEntry entry("UnitaryCoherence");
#endif
    typedef BASE(F) R;
    const Int n = U.Height();
    const Int r = U.Width();

    // Z := U U' in n^2 r work
    Matrix<F> Z;
    Herk( UPPER, NORMAL, F(1), U, Z );

    // Now make Z explicitly Hermitian so that our job is easier
    MakeHermitian( UPPER, Z );

    // Compute the maximum column two-norm
    R maxColNorm = 0;
    for( Int j=0; j<n; ++j )
    {
        const R colNorm = blas::Nrm2( n, Z.LockedBuffer(0,j), 1 );
        maxColNorm = std::max( colNorm, maxColNorm );
    }
    return (n*maxColNorm*maxColNorm)/r;
}
Example #17
0
void Axpy( S alphaS, const Matrix<T>& X, Matrix<T>& Y )
{
    EL_DEBUG_CSE
    const T alpha = T(alphaS);
    const Int mX = X.Height();
    const Int nX = X.Width();
    const Int nY = Y.Width();
    const Int ldX = X.LDim();
    const Int ldY = Y.LDim();
    const T* XBuf = X.LockedBuffer();
          T* YBuf = Y.Buffer();
    // If X and Y are vectors, we can allow one to be a column and the other
    // to be a row. Otherwise we force X and Y to be the same dimension.
    if( mX == 1 || nX == 1 )
    {
        const Int XLength = ( nX==1 ? mX : nX );
        const Int XStride = ( nX==1 ? 1  : ldX );
        const Int YStride = ( nY==1 ? 1  : ldY );
        EL_DEBUG_ONLY(
          const Int mY = Y.Height();
          const Int YLength = ( nY==1 ? mY : nY );
          if( XLength != YLength )
              LogicError("Nonconformal Axpy");
        )
        blas::Axpy( XLength, alpha, XBuf, XStride, YBuf, YStride );
    }
Example #18
0
    void Enqueue( const Matrix<Field>& y )
    {
        MemCopy( Y_.Buffer(0,numQueued_), y.LockedBuffer(), y.Height() ); 

        ++numQueued_;
        if( numQueued_ == Y_.Width() )
            Flush();
    }
Example #19
0
void UUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
{
    EL_DEBUG_CSE
    // Use the Variant 4 algorithm
    // (which annoyingly requires conjugations for the Her2)
    const Int n = A.Height();
    const Int lda = A.LDim();
    const Int ldu = U.LDim();
    F* ABuffer = A.Buffer();
    const F* UBuffer = U.LockedBuffer();
    vector<F> a12Conj( n ), u12Conj( n );
    for( Int j=0; j<n; ++j )
    {
        const Int a21Height = n - (j+1);

        // Extract and store the diagonal value of U
        const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] );

        // a01 := a01 / upsilon11
        F* a01 = &ABuffer[j*lda];
        if( diag != UNIT )
            for( Int k=0; k<j; ++k )
                a01[k] /= upsilon11;

        // A02 := A02 - a01 u12
        F* A02 = &ABuffer[(j+1)*lda];
        const F* u12 = &UBuffer[j+(j+1)*ldu];
        blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda );

        // alpha11 := alpha11 / |upsilon11|^2
        ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11);
        const F alpha11 = ABuffer[j+j*lda];

        // a12 := a12 / conj(upsilon11)
        F* a12 = &ABuffer[j+(j+1)*lda];
        if( diag != UNIT )
            for( Int k=0; k<a21Height; ++k )
                a12[k*lda] /= Conj(upsilon11);

        // a12 := a12 - (alpha11/2)u12
        for( Int k=0; k<a21Height; ++k )
            a12[k*lda] -= (alpha11/F(2))*u12[k*ldu];

        // A22 := A22 - (a12' u12 + u12' a12)
        F* A22 = &ABuffer[(j+1)+(j+1)*lda];
        for( Int k=0; k<a21Height; ++k )
            a12Conj[k] = Conj(a12[k*lda]);
        for( Int k=0; k<a21Height; ++k )
            u12Conj[k] = Conj(u12[k*ldu]);
        blas::Her2
        ( 'U', a21Height,
          F(-1), u12Conj.data(), 1, a12Conj.data(), 1, A22, lda );

        // a12 := a12 - (alpha11/2)u12
        for( Int k=0; k<a21Height; ++k )
            a12[k*lda] -= (alpha11/F(2))*u12[k*ldu];
    }
}
Example #20
0
void LUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& L )
{
    EL_DEBUG_CSE
    // Use the Variant 4 algorithm
    // (which annoyingly requires conjugations for the Her2)
    const Int n = A.Height();
    const Int lda = A.LDim();
    const Int ldl = L.LDim();
    T* ABuffer = A.Buffer();
    const T* LBuffer = L.LockedBuffer();
    vector<T> a10Conj( n ), l10Conj( n );
    for( Int j=0; j<n; ++j )
    {
        const Int a21Height = n - (j+1);

        // Extract and store the diagonal values of A and L
        const T alpha11 = ABuffer[j+j*lda];
        const T lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] );

        // a10 := a10 + (alpha11/2)l10
        T* a10 = &ABuffer[j];
        const T* l10 = &LBuffer[j];
        for( Int k=0; k<j; ++k )
            a10[k*lda] += (alpha11/T(2))*l10[k*ldl];

        // A00 := A00 + (a10' l10 + l10' a10)
        T* A00 = ABuffer;
        for( Int k=0; k<j; ++k )
            a10Conj[k] = Conj(a10[k*lda]);
        for( Int k=0; k<j; ++k )
            l10Conj[k] = Conj(l10[k*ldl]);
        blas::Her2
        ( 'L', j, T(1), a10Conj.data(), 1, l10Conj.data(), 1, A00, lda );

        // a10 := a10 + (alpha11/2)l10
        for( Int k=0; k<j; ++k )
            a10[k*lda] += (alpha11/T(2))*l10[k*ldl];

        // a10 := conj(lambda11) a10
        if( diag != UNIT )
            for( Int k=0; k<j; ++k )
                a10[k*lda] *= Conj(lambda11);

        // alpha11 := alpha11 * |lambda11|^2
        ABuffer[j+j*lda] *= Conj(lambda11)*lambda11;

        // A20 := A20 + a21 l10
        T* a21 = &ABuffer[(j+1)+j*lda];
        T* A20 = &ABuffer[j+1];
        blas::Geru( a21Height, j, T(1), a21, 1, l10, ldl, A20, lda );

        // a21 := lambda11 a21
        if( diag != UNIT )
            for( Int k=0; k<a21Height; ++k )
                a21[k] *= lambda11;
    }
}
Example #21
0
void MaxEig
( const Matrix<Real>& x, 
        Matrix<Real>& maxEigs,
  const Matrix<Int>& orders, 
  const Matrix<Int>& firstInds )
{
    DEBUG_ONLY(CSE cse("soc::MaxEig"))
    soc::LowerNorms( x, maxEigs, orders, firstInds );

          Real* maxEigBuf = maxEigs.Buffer();
    const Real* xBuf = x.LockedBuffer();
    const Int* firstIndBuf = firstInds.LockedBuffer();

    const Int height = x.Height();
    for( Int i=0; i<height; ++i )
        if( i == firstIndBuf[i] ) 
            maxEigBuf[i] = xBuf[i]+maxEigBuf[i];
}
Example #22
0
inline void 
TwoSidedTrsmLUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L )
{
#ifndef RELEASE
    PushCallStack("internal::TwoSidedTrsmLUnb");
#endif
    // Use the Variant 4 algorithm
    const int n = A.Height();
    const int lda = A.LDim();
    const int ldl = L.LDim();
    F* ABuffer = A.Buffer();
    const F* LBuffer = L.LockedBuffer();
    for( int j=0; j<n; ++j )
    {
        const int a21Height = n - (j+1);

        // Extract and store the diagonal value of L
        const F lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] );

        // a10 := a10 / lambda11
        F* a10 = &ABuffer[j];
        if( diag != UNIT )
            for( int k=0; k<j; ++k )
                a10[k*lda] /= lambda11;

        // A20 := A20 - l21 a10
        F* A20 = &ABuffer[j+1];
        const F* l21 = &LBuffer[(j+1)+j*ldl];
        blas::Geru( a21Height, j, F(-1), l21, 1, a10, lda, A20, lda );

        // alpha11 := alpha11 / |lambda11|^2
        ABuffer[j+j*lda] /= lambda11*Conj(lambda11);
        const F alpha11 = ABuffer[j+j*lda];

        // a21 := a21 / conj(lambda11)
        F* a21 = &ABuffer[(j+1)+j*lda];
        if( diag != UNIT )
            for( int k=0; k<a21Height; ++k )
                a21[k] /= Conj(lambda11);

        // a21 := a21 - (alpha11/2)l21
        for( int k=0; k<a21Height; ++k )
            a21[k] -= (alpha11/2)*l21[k];

        // A22 := A22 - (l21 a21' + a21 l21')
        F* A22 = &ABuffer[(j+1)+(j+1)*lda];
        blas::Her2( 'L', a21Height, F(-1), l21, 1, a21, 1, A22, lda );

        // a21 := a21 - (alpha11/2)l21
        for( int k=0; k<a21Height; ++k )
            a21[k] -= (alpha11/2)*l21[k];
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #23
0
inline void
Unb( Matrix<F>& A )
{
    EL_DEBUG_CSE
    const Int m = A.Height();
    const Int n = A.Width();
    for( Int j=0; j<Min(m,n); ++j )
    {
        const F alpha = A(j,j);
        if( alpha == F(0) )
            throw SingularMatrixException();

        blas::Scal( m-(j+1), F(1)/alpha, A.Buffer(j+1,j), 1 );
        blas::Geru
        ( m-(j+1), n-(j+1),
          F(-1), A.LockedBuffer(j+1,j), 1, A.LockedBuffer(j,j+1), A.LDim(),
                 A.Buffer(j+1,j+1), A.LDim() );
    }
}
Example #24
0
inline void
Hemm
( LeftOrRight side, UpperOrLower uplo,
  T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("Hemm");
#endif
    const char sideChar = LeftOrRightToChar( side );
    const char uploChar = UpperOrLowerToChar( uplo );
    blas::Hemm
    ( sideChar, uploChar, C.Height(), C.Width(),
      alpha, A.LockedBuffer(), A.LDim(),
             B.LockedBuffer(), B.LDim(),
      beta,  C.Buffer(),       C.LDim() );
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #25
0
Real ComplementRatio
( const Matrix<Real>& s,
  const Matrix<Real>& z )
{
    DEBUG_CSE
    const Int k = s.Height();
    const Real* sBuf = s.LockedBuffer();
    const Real* zBuf = z.LockedBuffer();

    Real maxProd = 0;
    for( Int i=0; i<k; ++i )
        maxProd = Max( sBuf[i]*zBuf[i], maxProd );

    Real minProd = maxProd;
    for( Int i=0; i<k; ++i )
        minProd = Min( sBuf[i]*zBuf[i], minProd );

    return maxProd/minProd;
}
Example #26
0
                                      void TrrkMKL
                                      ( UpperOrLower uplo,
                                        Orientation orientA, Orientation orientB,
                                        T alpha, const Matrix<T>& A, const Matrix<T>& B,
                                        T beta,        Matrix<T>& C )
{
    EL_DEBUG_CSE
    const char uploChar = UpperOrLowerToChar( uplo );
    const char orientAChar = OrientationToChar( orientA );
    const char orientBChar = OrientationToChar( orientB );
    const auto n = C.Height();
    const auto k = orientA == NORMAL ? A.Width() : A.Height();
    mkl::Trrk
    ( uploChar, orientAChar, orientBChar,
      n, k,
      alpha, A.LockedBuffer(), A.LDim(),
      B.LockedBuffer(), B.LDim(),
      beta,  C.Buffer(),       C.LDim() );
}
Example #27
0
inline typename Base<F>::type
Nrm2( const Matrix<F>& x )
{
#ifndef RELEASE
    PushCallStack("Nrm2");
    if( x.Height() != 1 && x.Width() != 1 )
        throw std::logic_error("Expected vector input");
#endif
    typedef typename Base<F>::type R;

    R norm;
    if( x.Width() == 1 )
        norm = blas::Nrm2( x.Height(), x.LockedBuffer(), 1 );
    else
        norm = blas::Nrm2( x.Width(), x.LockedBuffer(), x.LDim() );
#ifndef RELEASE
    PopCallStack();
#endif
    return norm;
}
Example #28
0
inline void
Unb( Matrix<F>& A )
{
#ifndef RELEASE
    CallStackEntry entry("lu::Unb");
#endif
    const Int m = A.Height();
    const Int n = A.Width();
    for( Int j=0; j<Min(m,n); ++j )
    {
        const F alpha = A.Get(j,j);
        if( alpha == F(0) )
            throw SingularMatrixException();

        blas::Scal( m-(j+1), 1/alpha, A.Buffer(j+1,j), 1 );
        blas::Geru
        ( m-(j+1), n-(j+1),
          F(-1), A.LockedBuffer(j+1,j), 1, A.LockedBuffer(j,j+1), A.LDim(),
                 A.Buffer(j+1,j+1), A.LDim() );
    }
}
Example #29
0
void UUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& U )
{
    EL_DEBUG_CSE
    // Use the Variant 4 algorithm
    const Int n = A.Height();
    const Int lda = A.LDim();
    const Int ldu = U.LDim();
    T* ABuffer = A.Buffer();
    const T* UBuffer = U.LockedBuffer();
    for( Int j=0; j<n; ++j )
    {
        const Int a21Height = n - (j+1);

        // Extract and store the diagonal values of A and U
        const T alpha11 = ABuffer[j+j*lda];
        const T upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] );

        // a01 := a01 + (alpha11/2)u01
        T* a01 = &ABuffer[j*lda];
        const T* u01 = &UBuffer[j*ldu];
        for( Int k=0; k<j; ++k )
            a01[k] += (alpha11/T(2))*u01[k];

        // A00 := A00 + (u01 a01' + a01 u01')
        T* A00 = ABuffer;
        blas::Her2( 'U', j, T(1), u01, 1, a01, 1, A00, lda );

        // a01 := a01 + (alpha11/2)u01
        for( Int k=0; k<j; ++k )
            a01[k] += (alpha11/T(2))*u01[k];

        // a01 := conj(upsilon11) a01
        if( diag != UNIT )
            for( Int k=0; k<j; ++k )
                a01[k] *= Conj(upsilon11);

        // A02 := A02 + u01 a12
        T* a12 = &ABuffer[j+(j+1)*lda];
        T* A02 = &ABuffer[(j+1)*lda];
        blas::Geru( j, a21Height, T(1), u01, 1, a12, lda, A02, lda );

        // alpha11 := alpha11 * |upsilon11|^2
        ABuffer[j+j*lda] *= Conj(upsilon11)*upsilon11;

        // a12 := upsilon11 a12
        if( diag != UNIT )
            for( Int k=0; k<a21Height; ++k )
                a12[k*lda] *= upsilon11;
    }
}
Example #30
0
Real Max( const Matrix<Real>& A )
{
    DEBUG_CSE
    const Int m = A.Height();
    const Int n = A.Width();
    const Real* ABuf = A.LockedBuffer();
    const Int ALDim = A.LDim();

    Real value = limits::Lowest<Real>();
    for( Int j=0; j<n; ++j )
        for( Int i=0; i<m; ++i )
            value = Max(value,ABuf[i+j*ALDim]);
    return value;
}