Пример #1
0
inline void 
CheckInput
( const DistMatrix<T,MC,  STAR>& A, 
  const DistMatrix<T,STAR,MR  >& B,
  const DistMatrix<T,MC,  MR  >& C )
{
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("A, B, and C must be distributed over the same grid");
    if( A.Height() != C.Height() || B.Width()  != C.Width() ||
        A.Width()  != B.Height() || A.Height() != B.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalTrrk: \n"
            << "  A[MC,* ] ~ " << A.Height() << " x "
                               << A.Width()  << "\n"
            << "  B[* ,MR] ~ " << B.Height() << " x "
                               << B.Width()  << "\n"
            << "  C[MC,MR] ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( A.ColAlignment() != C.ColAlignment() ||
        B.RowAlignment() != C.RowAlignment() )
    {
        std::ostringstream msg;
        msg << "Misaligned LocalTrrk: \n"
            << "  A[MC,* ] ~ " << A.ColAlignment() << "\n"
            << "  B[* ,MR] ~ " << B.RowAlignment() << "\n"
            << "  C[MC,MR] ~ " << C.ColAlignment() << " , " <<
                                  C.RowAlignment() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
}
Пример #2
0
inline F
Reflector( DistMatrix<F>& chi, DistMatrix<F>& x )
{
#ifndef RELEASE
    CallStackEntry entry("Reflector");
    if( chi.Grid() != x.Grid() )
        LogicError("chi and x must be distributed over the same grid");
    if( chi.Height() != 1 || chi.Width() != 1 )
        LogicError("chi must be a scalar");
    if( x.Height() != 1 && x.Width() != 1 )
        LogicError("x must be a vector");
#endif
    const Grid& g = x.Grid();
    F tau;
    if( x.Width() == 1 && x.RowAlignment() == chi.RowAlignment() )
    {
        if( g.Col() == x.RowAlignment() )
            tau = reflector::Col( chi, x );
        mpi::Broadcast( tau, x.RowAlignment(), g.RowComm() );
    }
    else
    {
        if( g.Row() == x.ColAlignment() )
            tau = reflector::Row( chi, x );
        mpi::Broadcast( tau, x.ColAlignment(), g.ColComm() );
    }
    return tau;
}
Пример #3
0
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,VR,STAR>& A )
{
#ifndef RELEASE
    CallStackEntry entry("[* ,* ] = [VR,* ]");
    this->AssertNotLocked();
    this->AssertSameGrid( A.Grid() );
#endif
    const elem::Grid& g = this->Grid();
    this->ResizeTo( A.Height(), A.Width() );
    if( !this->Participating() )
        return *this;

    const Int p = g.Size();
    const Int height = this->Height();
    const Int width = this->Width();
    const Int localHeightOfA = A.LocalHeight();
    const Int maxLocalHeight = MaxLength(height,p);

    const Int portionSize = mpi::Pad( maxLocalHeight*width );
    T* buffer = this->auxMemory_.Require( (p+1)*portionSize );
    T* sendBuf = &buffer[0];
    T* recvBuf = &buffer[portionSize];

    // Pack
    const Int ALDim = A.LDim();
    const T* ABuf = A.LockedBuffer();
    PARALLEL_FOR
    for( Int j=0; j<width; ++j )
        MemCopy
        ( &sendBuf[j*localHeightOfA], &ABuf[j*ALDim], localHeightOfA );

    // Communicate
    mpi::AllGather
    ( sendBuf, portionSize,
      recvBuf, portionSize, g.VRComm() );

    // Unpack
    T* thisBuf = this->Buffer();
    const Int thisLDim = this->LDim();
    const Int colAlignmentOfA = A.ColAlignment();
    OUTER_PARALLEL_FOR
    for( Int k=0; k<p; ++k )
    {
        const T* data = &recvBuf[k*portionSize];
        const Int colShift = Shift_( k, colAlignmentOfA, p );
        const Int localHeight = Length_( height, colShift, p );
        INNER_PARALLEL_FOR
        for( Int j=0; j<width; ++j )
        {
            T* destCol = &thisBuf[colShift+j*thisLDim];
            const T* sourceCol = &data[j*localHeight];
            for( Int iLoc=0; iLoc<localHeight; ++iLoc )
                destCol[iLoc*p] = sourceCol[iLoc];
        }
    }
    this->auxMemory_.Release();
    return *this;
}
Пример #4
0
inline void
DiagonalScale
( LeftOrRight side, Orientation orientation,
  const DistMatrix<typename Base<T>::type,U,V>& d, DistMatrix<T,W,Z>& X )
{
#ifndef RELEASE
    PushCallStack("DiagonalScale");
#endif
    typedef typename Base<T>::type R;

    if( side == LEFT )
    {
        if( U == W && V == STAR && d.ColAlignment() == X.ColAlignment() )
        {
            DiagonalScale( LEFT, orientation, d.LockedMatrix(), X.Matrix() );
        }
        else
        {
            DistMatrix<R,W,STAR> d_W_STAR( X.Grid() );
            d_W_STAR = d;
            DiagonalScale
            ( LEFT, orientation, d_W_STAR.LockedMatrix(), X.Matrix() );
        }
    }
    else
    {
        if( U == Z && V == STAR && d.ColAlignment() == X.RowAlignment() )
        {
            DiagonalScale( RIGHT, orientation, d.LockedMatrix(), X.Matrix() );
        }
        else
        {
            DistMatrix<R,Z,STAR> d_Z_STAR( X.Grid() );
            d_Z_STAR = d;
            DiagonalScale
            ( RIGHT, orientation, d_Z_STAR.LockedMatrix(), X.Matrix() );
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #5
0
inline void
CheckInput
( Orientation orientationOfA,
  Orientation orientationOfB,
  const DistMatrix<T,STAR,MC  >& A,
  const DistMatrix<T,MR,  STAR>& B,
  const DistMatrix<T,MC,  MR  >& C )
{
    if( orientationOfA == NORMAL )
        throw std::logic_error("A[* ,MC] must be (Conjugate)Transpose'd");
    if( orientationOfB == NORMAL )
        throw std::logic_error("B[MR,* ] must be (Conjugate)Transpose'd");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("A, B, and C must be distributed over the same grid");
    if( A.Width() != C.Height() || B.Height() != C.Width() ||
        A.Height() != B.Width() || A.Width() != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalTrrk: \n"
            << "  A[* ,MC] ~ " << A.Height() << " x "
                               << A.Width()  << "\n"
            << "  B[MR,* ] ~ " << B.Height() << " x "
                               << B.Width()  << "\n"
            << "  C[MC,MR] ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( A.RowAlignment() != C.ColAlignment() ||
        B.ColAlignment() != C.RowAlignment() )
    {
        std::ostringstream msg;
        msg << "Misaligned LocalTrrk: \n"
            << "  A[* ,MC] ~ " << A.RowAlignment() << "\n"
            << "  B[MR,* ] ~ " << B.ColAlignment() << "\n"
            << "  C[MC,MR] ~ " << C.ColAlignment() << " , " <<
                                  C.RowAlignment() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
}
Пример #6
0
inline void
DistMatrix<T,MD,STAR,Int>::AlignWithDiagonal
( const DistMatrix<S,MR,MC,N>& A, Int offset )
{
#ifndef RELEASE
    PushCallStack("[MD,* ]::AlignWithDiagonal([MR,MC])");
    this->AssertFreeColAlignment();
    this->AssertSameGrid( A );
#endif
    const elem::Grid& g = this->Grid();
    const Int r = g.Height();
    const Int c = g.Width();
    const Int lcm = g.LCM();
    const Int colAlignment = A.ColAlignment();
    const Int rowAlignment = A.RowAlignment();

    this->Empty();
    Int owner;
    if( offset >= 0 )
    {
        const Int ownerRow = rowAlignment;
        const Int ownerCol = (colAlignment + offset) % c;
        owner = ownerRow + r*ownerCol;
    }
    else
    {
        const Int ownerRow = (rowAlignment-offset) % r;
        const Int ownerCol = colAlignment;
        owner = ownerRow + r*ownerCol;
    }
    this->diagPath_ = g.DiagPath(owner);
    this->colAlignment_ = g.DiagPathRank(owner);
    this->constrainedColAlignment_ = true;
    if( this->Participating() )
        this->colShift_ = (g.DiagPathRank()+lcm-this->colAlignment_) % lcm;
    else
        this->colShift_ = 0;
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #7
0
inline bool
DistMatrix<T,MD,STAR,Int>::AlignedWithDiagonal
( const DistMatrix<S,MR,MC,N>& A, Int offset ) const
{
#ifndef RELEASE
    PushCallStack("[MD,* ]::AlignedWithDiagonal([MR,MC])");
    this->AssertSameGrid( A );
#endif
    const elem::Grid& g = this->Grid();
    const Int r = g.Height();
    const Int c = g.Width();
    const Int colAlignment = A.ColAlignment();
    const Int rowAlignment = A.RowAlignment();

    const Int firstDiagRow = 0;
    const Int firstDiagCol = this->diagPath_;
    const Int diagRow = (firstDiagRow+this->ColAlignment()) % r;
    const Int diagCol = (firstDiagCol+this->ColAlignment()) % c;

    bool aligned;
    if( offset >= 0 )
    {
        const Int ownerCol = colAlignment;
        const Int ownerRow = (rowAlignment + offset) % r;
        aligned = ( ownerRow==diagRow && ownerCol==diagCol );
    }
    else
    {
        const Int ownerCol = (colAlignment-offset) % c;
        const Int ownerRow = rowAlignment;
        aligned = ( ownerRow==diagRow && ownerCol==diagCol );
    }
#ifndef RELEASE
    PopCallStack();
#endif
    return aligned;
}
Пример #8
0
inline void
internal::CholeskyUVar3Square( DistMatrix<F,MC,MR>& A )
{
#ifndef RELEASE
    PushCallStack("internal::CholeskyUVar3Square");
    if( A.Height() != A.Width() )
        throw std::logic_error
        ("Can only compute Cholesky factor of square matrices.");
    if( A.Grid().Height() != A.Grid().Width() )
        throw std::logic_error
        ("CholeskyUVar3Square assumes a square process grid.");
#endif
    const Grid& g = A.Grid();

    // Find the process holding our transposed data
    const int r = g.Height();
    int transposeRank;
    {
        const int colAlignment = A.ColAlignment();
        const int rowAlignment = A.RowAlignment();
        const int colShift = A.ColShift();
        const int rowShift = A.RowShift();

        const int transposeRow = (colAlignment+rowShift) % r;
        const int transposeCol = (rowAlignment+colShift) % r;
        transposeRank = transposeRow + r*transposeCol;
    }
    const bool onDiagonal = ( transposeRank == g.VCRank() );

    // Matrix views
    DistMatrix<F,MC,MR> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);

    // Temporary matrix distributions
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,STAR,VR  > A12_STAR_VR(g);
    DistMatrix<F,STAR,MC  > A12_STAR_MC(g);
    DistMatrix<F,STAR,MR  > A12_STAR_MR(g);

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 ); 
    while( ABR.Height() > 0 )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        A12_STAR_MC.AlignWith( A22 );
        A12_STAR_MR.AlignWith( A22 );
        A12_STAR_VR.AlignWith( A22 );
        //--------------------------------------------------------------------//
        A11_STAR_STAR = A11;
        internal::LocalCholesky( UPPER, A11_STAR_STAR );
        A11 = A11_STAR_STAR;

        A12_STAR_VR = A12;
        internal::LocalTrsm
        ( LEFT, UPPER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A12_STAR_VR );

        A12_STAR_MR = A12_STAR_VR;
        // SendRecv to form A12[* ,MC] from A12[* ,MR]
        A12_STAR_MC.ResizeTo( A12.Height(), A12.Width() );
        {
            if( onDiagonal )
            {
                const int size = A11.Height()*A22.LocalWidth();
                MemCopy
                ( A12_STAR_MC.LocalBuffer(), 
                  A12_STAR_MR.LocalBuffer(), size );
            }
            else
            {
                const int sendSize = A11.Height()*A22.LocalWidth();
                const int recvSize = A11.Width()*A22.LocalHeight();
                // We know that the ldim is the height since we have manually
                // created both temporary matrices.
                mpi::SendRecv
                ( A12_STAR_MR.LocalBuffer(), sendSize, transposeRank, 0,
                  A12_STAR_MC.LocalBuffer(), recvSize, transposeRank, 0,
                  g.VCComm() );
            }
        }
        internal::LocalTrrk
        ( UPPER, ADJOINT, (F)-1, A12_STAR_MC, A12_STAR_MR, (F)1, A22 );
        A12 = A12_STAR_MR;
        //--------------------------------------------------------------------//
        A12_STAR_MC.FreeAlignments();
        A12_STAR_MR.FreeAlignments();
        A12_STAR_VR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #9
0
inline void
PanelLU
( DistMatrix<F,  STAR,STAR>& A, 
  DistMatrix<F,  MC,  STAR>& B, 
  DistMatrix<int,STAR,STAR>& p, 
  int pivotOffset )
{
#ifndef RELEASE
    PushCallStack("internal::PanelLU");
    if( A.Grid() != p.Grid() || p.Grid() != B.Grid() )
        throw std::logic_error
        ("Matrices must be distributed over the same grid");
    if( A.Width() != B.Width() )
        throw std::logic_error("A and B must be the same width");
    if( A.Height() != p.Height() || p.Width() != 1 )
        throw std::logic_error("p must be a vector that conforms with A");
#endif
    const Grid& g = A.Grid();
    const int r = g.Height();
    const int colShift = B.ColShift();
    const int colAlignment = B.ColAlignment();

    // Matrix views
    DistMatrix<F,STAR,STAR> 
        ATL(g), ATR(g),  A00(g), a01(g),     A02(g),  
        ABL(g), ABR(g),  a10(g), alpha11(g), a12(g),  
                         A20(g), a21(g),     A22(g);

    DistMatrix<F,MC,STAR>
        BL(g), BR(g),
        B0(g), b1(g), B2(g);

    const int width = A.Width();
    const int numBytes = (width+1)*sizeof(F)+sizeof(int);
    std::vector<byte> sendData(numBytes);
    std::vector<byte> recvData(numBytes);

    // Extract pointers to send and recv data
    // TODO: Think of how to make this safer with respect to alignment issues
    F* sendBufFloat = (F*)&sendData[0];
    F* recvBufFloat = (F*)&recvData[0];
    int* sendBufInt = (int*)&sendData[(width+1)*sizeof(F)];
    int* recvBufInt = (int*)&recvData[(width+1)*sizeof(F)];

    // Start the algorithm
    PushBlocksizeStack( 1 );
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionRight( B, BL, BR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );

        RepartitionRight
        ( BL, /**/ BR,  
          B0, /**/ b1, B2 );

        //--------------------------------------------------------------------//
        const int currentRow = a01.Height();
        
        // Store the index/value of the pivot candidate in A
        F pivot = alpha11.GetLocal(0,0);
        int pivotRow = currentRow;
        for( int i=0; i<a21.Height(); ++i )
        {
            F value = a21.GetLocal(i,0);
            if( FastAbs(value) > FastAbs(pivot) )
            {
                pivot = value;
                pivotRow = currentRow + i + 1;
            }
        }

        // Update the pivot candidate to include local data from B
        for( int i=0; i<B.LocalHeight(); ++i )
        {
            F value = b1.GetLocal(i,0);
            if( FastAbs(value) > FastAbs(pivot) )
            {
                pivot = value;
                pivotRow = A.Height() + colShift + i*r;
            }
        }

        // Fill the send buffer with:
        // [ pivotValue | pivot row data | pivotRow ]
        if( pivotRow < A.Height() )
        {
            sendBufFloat[0] = A.GetLocal(pivotRow,a10.Width());

            const int ALDim = A.LocalLDim();
            const F* ABuffer = A.LocalBuffer(pivotRow,0);
            for( int j=0; j<width; ++j )
                sendBufFloat[j+1] = ABuffer[j*ALDim];
        }
        else
        {
            const int localRow = ((pivotRow-A.Height())-colShift)/r;
            sendBufFloat[0] = b1.GetLocal(localRow,0);

            const int BLDim = B.LocalLDim();
            const F* BBuffer = B.LocalBuffer(localRow,0);
            for( int j=0; j<width; ++j )
                sendBufFloat[j+1] = BBuffer[j*BLDim];
        }
        *sendBufInt = pivotRow;

        // Communicate to establish the pivot information
        mpi::AllReduce
        ( &sendData[0], &recvData[0], numBytes, PivotOp<F>(), g.ColComm() );

        // Update the pivot vector
        pivotRow = *recvBufInt;
        p.SetLocal(currentRow,0,pivotRow+pivotOffset);

        // Copy the current row into the pivot row
        if( pivotRow < A.Height() )
        {
            const int ALDim = A.LocalLDim();
            F* ASetBuffer = A.LocalBuffer(pivotRow,0);
            const F* AGetBuffer = A.LocalBuffer(currentRow,0);
            for( int j=0; j<width; ++j )
                ASetBuffer[j*ALDim] = AGetBuffer[j*ALDim];
        }
        else
        {
            const int ownerRank = (colAlignment+(pivotRow-A.Height())) % r;
            if( g.Row() == ownerRank )
            {
                const int localRow = ((pivotRow-A.Height())-colShift) / r;

                const int ALDim = A.LocalLDim();
                const int BLDim = B.LocalLDim();
                F* BBuffer = B.LocalBuffer(localRow,0);
                const F* ABuffer = A.LocalBuffer(currentRow,0);
                for( int j=0; j<width; ++j )
                    BBuffer[j*BLDim] = ABuffer[j*ALDim];
            }
        }

        // Copy the pivot row into the current row
        {
            F* ABuffer = A.LocalBuffer(currentRow,0);
            const int ALDim = A.LocalLDim();
            for( int j=0; j<width; ++j )
                ABuffer[j*ALDim] = recvBufFloat[j+1];
        }

        // Now we can perform the update of the current panel
        const F alpha = alpha11.GetLocal(0,0);
        if( alpha == F(0) )
            throw SingularMatrixException();
        const F alpha11Inv = F(1) / alpha;
        Scale( alpha11Inv, a21.LocalMatrix() );
        Scale( alpha11Inv, b1.LocalMatrix()  );
        Geru( F(-1), a21.LocalMatrix(), a12.LocalMatrix(), A22.LocalMatrix() );
        Geru( F(-1), b1.LocalMatrix(), a12.LocalMatrix(), B2.LocalMatrix() );
        //--------------------------------------------------------------------//

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );

        SlidePartitionRight
        ( BL,     /**/ BR,  
          B0, b1, /**/ B2 );
    }
    PopBlocksizeStack();

#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #10
0
inline void
Var3( Orientation orientation, DistMatrix<F>& A, DistMatrix<F,MC,STAR>& d )
{
#ifndef RELEASE
    PushCallStack("ldl::Var3");
    if( orientation == NORMAL )
        throw std::logic_error("Can only perform LDL^T and LDL^H");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( A.Grid() != d.Grid() )
        throw std::logic_error("A and d must use the same grid");
    if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) )
        throw std::logic_error
        ("d must be a column vector of the same height as A");
    if( d.Viewing() && d.ColAlignment() != A.ColAlignment() )
        throw std::logic_error("d must be aligned with A");
#endif
    const Grid& g = A.Grid();
    if( !d.Viewing() )
    {
        d.AlignWith( A );
        d.ResizeTo( A.Height(), 1 );
    }

    // Matrix views
    DistMatrix<F>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<F,MC,STAR>
        dT(g),  d0(g),
        dB(g),  d1(g),
                d2(g);

    // Temporary matrices
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> d1_STAR_STAR(g);
    DistMatrix<F,VC,  STAR> A21_VC_STAR(g);
    DistMatrix<F,VR,  STAR> A21_VR_STAR(g);
    DistMatrix<F,STAR,MC  > S21Trans_STAR_MC(g);
    DistMatrix<F,STAR,MR  > A21AdjOrTrans_STAR_MR(g);

    const bool conjugate = ( orientation == ADJOINT );

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( d, dT,
         dB, 0 );
    while( ABR.Height() > 0 )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( dT,  d0,
         /**/ /**/
               d1,
          dB,  d2 );

        A21_VC_STAR.AlignWith( A22 );
        A21_VR_STAR.AlignWith( A22 );
        S21Trans_STAR_MC.AlignWith( A22 );
        A21AdjOrTrans_STAR_MR.AlignWith( A22 );
        //--------------------------------------------------------------------//
        A11_STAR_STAR = A11;
        LocalLDL( orientation, A11_STAR_STAR, d1_STAR_STAR );
        A11 = A11_STAR_STAR;
        d1 = d1_STAR_STAR;

        A21_VC_STAR = A21;
        LocalTrsm
        ( RIGHT, LOWER, orientation, UNIT,
          F(1), A11_STAR_STAR, A21_VC_STAR );

        S21Trans_STAR_MC.TransposeFrom( A21_VC_STAR );
        DiagonalSolve( RIGHT, NORMAL, d1_STAR_STAR, A21_VC_STAR );
        A21_VR_STAR = A21_VC_STAR;
        A21AdjOrTrans_STAR_MR.TransposeFrom( A21_VR_STAR, conjugate );
        LocalTrrk
        ( LOWER, TRANSPOSE,
          F(-1), S21Trans_STAR_MC, A21AdjOrTrans_STAR_MR, F(1), A22 );

        A21 = A21_VC_STAR;
        //--------------------------------------------------------------------//
        A21_VC_STAR.FreeAlignments();
        A21_VR_STAR.FreeAlignments();
        S21Trans_STAR_MC.FreeAlignments();
        A21AdjOrTrans_STAR_MR.FreeAlignments();

        SlidePartitionDown
        ( dT,  d0,
               d1,
         /**/ /**/
          dB,  d2 );

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #11
0
inline void
LocalTrmmAccumulateRUN
( Orientation orientation, UnitOrNonUnit diag, T alpha,
  const DistMatrix<T,MC,  MR  >& U,
  const DistMatrix<T,STAR,MC  >& X_STAR_MC,
        DistMatrix<T,MR,  STAR>& ZTrans_MR_STAR )
{
#ifndef RELEASE
    CallStackEntry entry("internal::LocalTrmmAccumulateRUN");
    if( U.Grid() != X_STAR_MC.Grid() ||
        X_STAR_MC.Grid() != ZTrans_MR_STAR.Grid() )
        throw std::logic_error
        ("{U,X,Z} must be distributed over the same grid");
    if( U.Height() != U.Width() ||
        U.Height() != X_STAR_MC.Width() ||
        U.Height() != ZTrans_MR_STAR.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalTrmmAccumulateRUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X[* ,MC] ~ " << X_STAR_MC.Height() << " x "
                               << X_STAR_MC.Width() << "\n"
            << "  Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x "
                                   << ZTrans_MR_STAR.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( X_STAR_MC.RowAlignment() != U.ColAlignment() ||
        ZTrans_MR_STAR.ColAlignment() != U.RowAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<T>
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<T> D11(g);

    DistMatrix<T,STAR,MC>
        XL_STAR_MC(g), XR_STAR_MC(g),
        X0_STAR_MC(g), X1_STAR_MC(g), X2_STAR_MC(g);

    DistMatrix<T,MR,STAR>
        ZTTrans_MR_STAR(g),  Z0Trans_MR_STAR(g),
        ZBTrans_MR_STAR(g),  Z1Trans_MR_STAR(g),
                             Z2Trans_MR_STAR(g);

    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*Blocksize() );

    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    LockedPartitionRight( X_STAR_MC,  XL_STAR_MC, XR_STAR_MC, 0 );
    PartitionDown
    ( ZTrans_MR_STAR, ZTTrans_MR_STAR,
                      ZBTrans_MR_STAR, 0 );
    while( UTL.Height() < U.Height() )
    {
        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        LockedRepartitionRight
        ( XL_STAR_MC, /**/ XR_STAR_MC,
          X0_STAR_MC, /**/ X1_STAR_MC, X2_STAR_MC );

        RepartitionDown
        ( ZTTrans_MR_STAR,  Z0Trans_MR_STAR,
         /***************/ /***************/
                            Z1Trans_MR_STAR,
          ZBTrans_MR_STAR,  Z2Trans_MR_STAR );

        D11.AlignWith( U11 );
        //--------------------------------------------------------------------//
        D11 = U11;
        MakeTriangular( UPPER, D11 );
        if( diag == UNIT )
            SetDiagonal( D11, T(1) );
        LocalGemm
        ( orientation, orientation,
          alpha, D11, X1_STAR_MC, T(1), Z1Trans_MR_STAR );
        LocalGemm
        ( orientation, orientation,
          alpha, U01, X0_STAR_MC, T(1), Z1Trans_MR_STAR );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        SlideLockedPartitionRight
        ( XL_STAR_MC,             /**/ XR_STAR_MC,
          X0_STAR_MC, X1_STAR_MC, /**/ X2_STAR_MC );

        SlidePartitionDown
        ( ZTTrans_MR_STAR,  Z0Trans_MR_STAR,
                            Z1Trans_MR_STAR,
         /***************/ /***************/
          ZBTrans_MR_STAR,  Z2Trans_MR_STAR );
    }
    PopBlocksizeStack();
}
Пример #12
0
inline void
Cannon_NN
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    CallStackEntry entry("gemm::Cannon_NN");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        LogicError("{A,B,C} must have the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal matrices: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        LogicError( msg.str() );
    }
#endif
    const Grid& g = A.Grid();
    if( g.Height() != g.Width() )
        LogicError("Process grid must be square for Cannon's");
    if( C.ColAlignment() != A.ColAlignment() || 
        C.RowAlignment() != B.RowAlignment() )
        LogicError("C is not properly aligned");

    const Int row = g.Row();
    const Int col = g.Col();
    const Int pSqrt = g.Height();
    mpi::Comm rowComm = g.RowComm();
    mpi::Comm colComm = g.ColComm(); 
    if( A.Width() % pSqrt != 0 )
        LogicError("For now, width(A) must be integer multiple of sqrt(p)");

    // Begin by scaling our local portion of C
    Scale( beta, C );

    // Load the initial A and B packages (may want to transpose B...)
    const Int localHeightA = A.LocalHeight();
    const Int localHeightB = B.LocalHeight();
    const Int localWidthA = A.LocalWidth();
    const Int localWidthB = B.LocalWidth();
    Matrix<T> pkgA(localHeightA,localWidthA,localHeightA), 
              pkgB(localHeightB,localWidthB,localHeightB);
    for( Int jLoc=0; jLoc<localWidthA; ++jLoc )
        MemCopy
        ( pkgA.Buffer(0,jLoc), A.LockedBuffer(0,jLoc), localHeightA );
    for( Int jLoc=0; jLoc<localWidthB; ++jLoc )
        MemCopy
        ( pkgB.Buffer(0,jLoc), B.LockedBuffer(0,jLoc), localHeightB );

    // Perform the initial circular shifts so that our A and B packages align
    const Int rowShiftA = A.RowShift();
    const Int colShiftB = B.ColShift();
    const Int leftInitA = (col+pSqrt-colShiftB) % pSqrt;
    const Int rightInitA = (col+colShiftB) % pSqrt;
    const Int aboveInitB = (row+pSqrt-rowShiftA) % pSqrt;
    const Int belowInitB = (row+rowShiftA) % pSqrt;
    const Int pkgSizeA = localHeightA*localWidthA;
    const Int pkgSizeB = localHeightB*localWidthB;
    mpi::SendRecv( pkgA.Buffer(), pkgSizeA, leftInitA, rightInitA, rowComm );
    mpi::SendRecv( pkgB.Buffer(), pkgSizeB, aboveInitB, belowInitB, colComm );

    // Now begin the data flow
    const Int aboveRow = (row+pSqrt-1) % pSqrt;
    const Int belowRow = (row+1) % pSqrt;
    const Int leftCol = (col+pSqrt-1) % pSqrt;
    const Int rightCol = (col+1) % pSqrt;
    for( Int q=0; q<pSqrt; ++q )
    {
        Gemm( NORMAL, NORMAL, alpha, pkgA, pkgB, T(1), C.Matrix() );
        if( q != pSqrt-1 )
        {
            mpi::SendRecv
            ( pkgA.Buffer(), pkgSizeA, leftCol, rightCol, rowComm );
            mpi::SendRecv
            ( pkgB.Buffer(), pkgSizeB, aboveRow, belowRow, colComm );
        }
    }
}
Пример #13
0
inline void
LocalSymvColAccumulateU
( T alpha, 
  const DistMatrix<T>& A,
  const DistMatrix<T,MC,STAR>& x_MC_STAR,
  const DistMatrix<T,MR,STAR>& x_MR_STAR,
        DistMatrix<T,MC,STAR>& z_MC_STAR,
        DistMatrix<T,MR,STAR>& z_MR_STAR,
  bool conjugate=false )
{
#ifndef RELEASE
    CallStackEntry entry("internal::LocalSymvColAccumulateU");
    if( A.Grid() != x_MC_STAR.Grid() ||
        x_MC_STAR.Grid() != x_MR_STAR.Grid() ||
        x_MR_STAR.Grid() != z_MC_STAR.Grid() ||
        z_MC_STAR.Grid() != z_MR_STAR.Grid() )
        LogicError
        ("{A,x,z} must be distributed over the same grid");
    if( x_MC_STAR.Width() != 1 || x_MR_STAR.Width() != 1 ||
        z_MC_STAR.Width() != 1 || z_MR_STAR.Width() != 1 )
        LogicError("Expected x and z to be column vectors");
    if( A.Height() != A.Width() || 
        A.Height() != x_MC_STAR.Height() ||
        A.Height() != x_MR_STAR.Height() ||
        A.Height() != z_MC_STAR.Height() ||
        A.Height() != z_MR_STAR.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalSymvColAccumulateU: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  x[MC,* ] ~ " << x_MC_STAR.Height() << " x " 
                               << x_MC_STAR.Width() << "\n"
            << "  x[MR,* ] ~ " << x_MR_STAR.Height() << " x " 
                               << x_MR_STAR.Width() << "\n"
            << "  z[MC,* ] ~ " << z_MC_STAR.Height() << " x " 
                               << z_MC_STAR.Width() << "\n"
            << "  z[MR,* ] ~ " << z_MR_STAR.Height() << " x " 
                               << z_MR_STAR.Width() << "\n";
        LogicError( msg.str() );
    }
    if( x_MC_STAR.ColAlignment() != A.ColAlignment() ||
        x_MR_STAR.ColAlignment() != A.RowAlignment() ||
        z_MC_STAR.ColAlignment() != A.ColAlignment() ||
        z_MR_STAR.ColAlignment() != A.RowAlignment() )
        LogicError("Partial matrix distributions are misaligned");
#endif
    const Grid& g = A.Grid();
    const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE );

    // Matrix views
    DistMatrix<T> A11(g), A12(g);
    DistMatrix<T> D11(g);

    DistMatrix<T,MC,STAR> x1_MC_STAR(g);
    DistMatrix<T,MR,STAR> 
        xT_MR_STAR(g),  x0_MR_STAR(g),
        xB_MR_STAR(g),  x1_MR_STAR(g),
                        x2_MR_STAR(g);
    DistMatrix<T,MC,STAR> z1_MC_STAR(g);
    DistMatrix<T,MR,STAR> z1_MR_STAR(g),
                          z2_MR_STAR(g);

    // We want our local gemvs to be of width blocksize, so we will 
    // temporarily change to max(r,c) times the current blocksize
    const Int ratio = Max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() );
                 
    LockedPartitionDown
    ( x_MR_STAR, xT_MR_STAR,
                 xB_MR_STAR, 0 );
    while( xT_MR_STAR.Height() < x_MR_STAR.Height() )
    {
        LockedRepartitionDown
        ( xT_MR_STAR,  x0_MR_STAR,
         /**********/ /**********/
                       x1_MR_STAR,
          xB_MR_STAR,  x2_MR_STAR );

        const Int n0 = x0_MR_STAR.Height();
        const Int n1 = x1_MR_STAR.Height();
        const Int n2 = x2_MR_STAR.Height();
        LockedView( A11, A, n0, n0,    n1, n1 );
        LockedView( A12, A, n0, n0+n1, n1, n2 );
        LockedView( x1_MC_STAR, x_MC_STAR, n0, 0, n1, 1 );
        View( z1_MC_STAR, z_MC_STAR, n0,    0, n1, 1 );
        View( z1_MR_STAR, z_MR_STAR, n0,    0, n1, 1 );
        View( z2_MR_STAR, z_MR_STAR, n0+n1, 0, n2, 1 );

        D11.AlignWith( A11 );
        //--------------------------------------------------------------------//
        // TODO: These diagonal block updates can be greatly improved
        D11 = A11;
        MakeTriangular( UPPER, D11 );
        LocalGemv( NORMAL, alpha, D11, x1_MR_STAR, T(1), z1_MC_STAR );
        SetDiagonal( D11, T(0) );
        LocalGemv( orientation, alpha, D11, x1_MC_STAR, T(1), z1_MR_STAR );
        
        LocalGemv( NORMAL, alpha, A12, x2_MR_STAR, T(1), z1_MC_STAR );
        LocalGemv( orientation, alpha, A12, x1_MC_STAR, T(1), z2_MR_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( xT_MR_STAR,  x0_MR_STAR,
                       x1_MR_STAR,
         /**********/ /**********/
          xB_MR_STAR,  x2_MR_STAR );
    }
    PopBlocksizeStack();
}
Пример #14
0
inline T
DotuHelper( const DistMatrix<T,U,V>& x, const DistMatrix<T,MC,MR>& y )
{
#ifndef RELEASE
    PushCallStack("internal::DotuHelper");
    if( x.Grid() != y.Grid() )
        throw std::logic_error("{x,y} must be distributed over the same grid");
    if( (x.Height() != 1 && x.Width() != 1) ||
        (y.Height() != 1 && y.Width() != 1) )
        throw std::logic_error("Dotu requires x and y to be vectors");
    int xLength = ( x.Width() == 1 ? x.Height() : x.Width() );
    int yLength = ( y.Width() == 1 ? y.Height() : y.Width() );
    if( xLength != yLength )
        throw std::logic_error("Dotu requires x and y to be the same length");
#endif
    const Grid& g = x.Grid();

    T globalDotu;
    if( x.Width() == 1 && y.Width() == 1 )
    {
        DistMatrix<T,MC,MR> xRedist(g);
        xRedist.AlignWith( y );
        xRedist = x;

        int ownerCol = y.RowAlignment();
        if( g.Col() == ownerCol )
        { 
            T localDotu = 
                Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
            mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.ColComm() );
        }
        mpi::Broadcast( &globalDotu, 1, ownerCol, g.RowComm() );
    }
    else if( x.Width() == 1 )
    {
        DistMatrix<T,MR,MC> xRedist(g);
        xRedist.AlignWith( y );
        xRedist = x;

        int ownerRow = y.ColAlignment();
        if( g.Row() == ownerRow )
        {
            T localDotu = 
                Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
            mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.RowComm() );
        }
        mpi::Broadcast( &globalDotu, 1, ownerRow, g.ColComm() );
    }
    else if( y.Width() == 1 )
    {
        DistMatrix<T,MR,MC> xRedist(g);
        xRedist.AlignWith( y );
        xRedist = x;

        int ownerCol = y.RowAlignment();
        if( g.Col() == ownerCol )
        {
            T localDotu = 
                Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
            mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.ColComm() );
        }
        mpi::Broadcast( &globalDotu, 1, ownerCol, g.RowComm() );
    }
    else
    {
        DistMatrix<T,MC,MR> xRedist(g);
        xRedist.AlignWith( y );
        xRedist = x;

        int ownerRow = y.ColAlignment();
        if( g.Row() == ownerRow )
        {
            T localDotu = 
                Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
            mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.RowComm() );
        }
        mpi::Broadcast( &globalDotu, 1, ownerRow, g.ColComm() );
    }
#ifndef RELEASE
    PopCallStack();
#endif
    return globalDotu;
}
Пример #15
0
inline void
LocalSymmetricAccumulateLU
( Orientation orientation, T alpha,
  const DistMatrix<T>& A,
  const DistMatrix<T,MC,  STAR>& B_MC_STAR,
  const DistMatrix<T,STAR,MR  >& BAdjOrTrans_STAR_MR,
        DistMatrix<T,MC,  STAR>& Z_MC_STAR,
        DistMatrix<T,MR,  STAR>& Z_MR_STAR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalSymmetricAccumulateLU");
    if( A.Grid() != B_MC_STAR.Grid() ||
        B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() ||
        BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ||
        Z_MC_STAR.Grid() != Z_MR_STAR.Grid() )
        throw std::logic_error
        ("{A,B,Z} must be distributed over the same grid");
    if( A.Height() != A.Width() ||
        A.Height() != B_MC_STAR.Height() ||
        A.Height() != BAdjOrTrans_STAR_MR.Width() ||
        A.Height() != Z_MC_STAR.Height() ||
        A.Height() != Z_MR_STAR.Height() ||
        B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() ||
        BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ||
        Z_MC_STAR.Width() != Z_MR_STAR.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalSymmetricAccumulateLU: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B[MC,* ] ~ " << B_MC_STAR.Height() << " x "
                               << B_MC_STAR.Width() << "\n"
            << "  B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x "
                                   << BAdjOrTrans_STAR_MR.Width() << "\n"
            << "  Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x "
                               << Z_MC_STAR.Width() << "\n"
            << "  Z[MR,* ] ` " << Z_MR_STAR.Height() << " x "
                               << Z_MR_STAR.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( B_MC_STAR.ColAlignment() != A.ColAlignment() ||
        BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() ||
        Z_MC_STAR.ColAlignment() != A.ColAlignment() ||
        Z_MR_STAR.ColAlignment() != A.RowAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = A.Grid();

    DistMatrix<T>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);

    DistMatrix<T> D11(g);

    DistMatrix<T,MC,STAR>
        BT_MC_STAR(g),  B0_MC_STAR(g),
        BB_MC_STAR(g),  B1_MC_STAR(g),
                        B2_MC_STAR(g);

    DistMatrix<T,STAR,MR>
        BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g),
        B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g), 
        B2AdjOrTrans_STAR_MR(g);

    DistMatrix<T,MC,STAR>
        ZT_MC_STAR(g),  Z0_MC_STAR(g),
        ZB_MC_STAR(g),  Z1_MC_STAR(g),
                        Z2_MC_STAR(g);

    DistMatrix<T,MR,STAR>
        ZT_MR_STAR(g),  Z0_MR_STAR(g),
        ZB_MR_STAR(g),  Z1_MR_STAR(g),
                        Z2_MR_STAR(g);

    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*Blocksize() );

    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDown
    ( B_MC_STAR, BT_MC_STAR,
                 BB_MC_STAR, 0 );
    LockedPartitionRight
    ( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 );
    PartitionDown
    ( Z_MC_STAR, ZT_MC_STAR,
                 ZB_MC_STAR, 0 );
    PartitionDown
    ( Z_MR_STAR, ZT_MR_STAR,
                 ZB_MR_STAR, 0 );
    while( ATL.Height() < A.Height() )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
          /************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDown
        ( BT_MC_STAR,  B0_MC_STAR,
         /**********/ /**********/
                       B1_MC_STAR,
          BB_MC_STAR,  B2_MC_STAR );

        LockedRepartitionRight
        ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR,
          B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR, 
                                     B2AdjOrTrans_STAR_MR );

        RepartitionDown
        ( ZT_MC_STAR,  Z0_MC_STAR,
         /**********/ /**********/
                       Z1_MC_STAR,
          ZB_MC_STAR,  Z2_MC_STAR );

        RepartitionDown
        ( ZT_MR_STAR,  Z0_MR_STAR,
         /**********/ /**********/
                       Z1_MR_STAR,
          ZB_MR_STAR,  Z2_MR_STAR );

        D11.AlignWith( A11 );
        //--------------------------------------------------------------------//
        D11 = A11;
        MakeTrapezoidal( LEFT, UPPER, 0, D11 );
        LocalGemm
        ( NORMAL, orientation, 
          alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR );
        MakeTrapezoidal( LEFT, UPPER, 1, D11 );

        LocalGemm
        ( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR );

        LocalGemm
        ( NORMAL, orientation, 
          alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR );

        LocalGemm
        ( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDown
        ( BT_MC_STAR,  B0_MC_STAR,
                       B1_MC_STAR,
         /**********/ /**********/
          BB_MC_STAR,  B2_MC_STAR );

        SlideLockedPartitionRight
        ( BLAdjOrTrans_STAR_MR,                       /**/ BRAdjOrTrans_STAR_MR,
          B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR         );

        SlidePartitionDown
        ( ZT_MC_STAR,  Z0_MC_STAR,
                       Z1_MC_STAR,
         /**********/ /**********/
          ZB_MC_STAR,  Z2_MC_STAR );

        SlidePartitionDown
        ( ZT_MR_STAR,  Z0_MR_STAR,
                       Z1_MR_STAR,
         /**********/ /**********/
          ZB_MR_STAR,  Z2_MR_STAR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #16
0
inline void
TrsmLUNSmall
( UnitOrNonUnit diag,
  F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmLUNSmall");
    if( U.Grid() != X.Grid() )
        throw std::logic_error
        ("U and X must be distributed over the same grid");
    if( U.Height() != U.Width() || U.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrsmLUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
    if( U.ColAlignment() != X.ColAlignment() )
        throw std::logic_error("U and X are assumed to be aligned");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F,VC,STAR> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F,VC,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),
                                  X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,   U00, U01, /**/ U02,
               /**/        U10, U11, /**/ U12,
         /*************/  /******************/
          UBL, /**/ UBR,   U20, U21, /**/ U22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VC,* ]
        
        // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ]
        LocalTrsm
        ( LEFT, UPPER, NORMAL, diag,
          F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular );
        X1 = X1_STAR_STAR;

        // X0[VC,* ] -= U01[VC,* ] X1[* ,* ]
        LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #17
0
inline void
ApplyRowPivots
( DistMatrix<F>& A, 
  const std::vector<int>& image,
  const std::vector<int>& preimage )
{
    const int b = image.size();
#ifndef RELEASE
    PushCallStack("ApplyRowPivots");
    if( A.Height() < b || b != (int)preimage.size() )
        throw std::logic_error
        ("image and preimage must be vectors of equal length that are not "
         "taller than A.");
#endif
    const int localWidth = A.LocalWidth();
    if( A.Height() == 0 || A.Width() == 0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }
    
    // Extract the relevant process grid information
    const Grid& g = A.Grid();
    const int r = g.Height();
    const int colAlignment = A.ColAlignment();
    const int colShift = A.ColShift();
    const int myRow = g.Row();

    // Extract the send and recv counts from the image and preimage.
    // This process's sends may be logically partitioned into two sets:
    //   (a) sends from rows [0,...,b-1]
    //   (b) sends from rows [b,...]
    // The latter is analyzed with image, the former deduced with preimage.
    std::vector<int> sendCounts(r,0), recvCounts(r,0);
    for( int i=colShift; i<b; i+=r )
    {
        const int sendRow = preimage[i];         
        const int sendTo = (colAlignment+sendRow) % r; 
        sendCounts[sendTo] += localWidth;

        const int recvRow = image[i];
        const int recvFrom = (colAlignment+recvRow) % r;
        recvCounts[recvFrom] += localWidth;
    }
    for( int i=0; i<b; ++i )
    {
        const int sendRow = preimage[i];
        if( sendRow >= b )
        {
            const int sendTo = (colAlignment+sendRow) % r;
            if( sendTo == myRow )
            {
                const int sendFrom = (colAlignment+i) % r;
                recvCounts[sendFrom] += localWidth;
            }
        }

        const int recvRow = image[i];
        if( recvRow >= b )
        {
            const int recvFrom = (colAlignment+recvRow) % r;
            if( recvFrom == myRow )
            {
                const int recvTo = (colAlignment+i) % r;
                sendCounts[recvTo] += localWidth;
            }
        }
    }

    // Construct the send and recv displacements from the counts
    std::vector<int> sendDispls(r), recvDispls(r);
    int totalSend=0, totalRecv=0;
    for( int i=0; i<r; ++i )
    {
        sendDispls[i] = totalSend;
        recvDispls[i] = totalRecv;
        totalSend += sendCounts[i];
        totalRecv += recvCounts[i];
    }
#ifndef RELEASE
    if( totalSend != totalRecv )
    {
        std::ostringstream msg;
        msg << "Send and recv counts do not match: (send,recv)=" 
             << totalSend << "," << totalRecv;
        throw std::logic_error( msg.str().c_str() );
    }
#endif

    // Fill vectors with the send data
    const int ALDim = A.LocalLDim();
    std::vector<F> sendData(std::max(1,totalSend));
    std::vector<int> offsets(r,0);
    const int localHeight = LocalLength( b, colShift, r );
    for( int iLocal=0; iLocal<localHeight; ++iLocal )
    {
        const int sendRow = preimage[colShift+iLocal*r];
        const int sendTo = (colAlignment+sendRow) % r;
        const int offset = sendDispls[sendTo]+offsets[sendTo];
        const F* ABuffer = A.LocalBuffer(iLocal,0);
        for( int jLocal=0; jLocal<localWidth; ++jLocal )
            sendData[offset+jLocal] = ABuffer[jLocal*ALDim];
        offsets[sendTo] += localWidth;
    }
    for( int i=0; i<b; ++i )
    {
        const int recvRow = image[i];
        if( recvRow >= b )
        {
            const int recvFrom = (colAlignment+recvRow) % r; 
            if( recvFrom == myRow )
            {
                const int recvTo = (colAlignment+i) % r;
                const int iLocal = (recvRow-colShift) / r;
                const int offset = sendDispls[recvTo]+offsets[recvTo];
                const F* ABuffer = A.LocalBuffer(iLocal,0);
                for( int jLocal=0; jLocal<localWidth; ++jLocal )
                    sendData[offset+jLocal] = ABuffer[jLocal*ALDim];
                offsets[recvTo] += localWidth;
            }
        }
    }

    // Communicate all pivot rows
    std::vector<F> recvData(std::max(1,totalRecv));
    mpi::AllToAll
    ( &sendData[0], &sendCounts[0], &sendDispls[0],
      &recvData[0], &recvCounts[0], &recvDispls[0], g.ColComm() );

    // Unpack the recv data
    for( int k=0; k<r; ++k )
    {
        offsets[k] = 0;
        int thisColShift = Shift( k, colAlignment, r );
        for( int i=thisColShift; i<b; i+=r )
        {
            const int sendRow = preimage[i];
            const int sendTo = (colAlignment+sendRow) % r;
            if( sendTo == myRow )
            {
                const int offset = recvDispls[k]+offsets[k];
                const int iLocal = (sendRow-colShift) / r;
                F* ABuffer = A.LocalBuffer(iLocal,0);
                for( int jLocal=0; jLocal<localWidth; ++jLocal )
                    ABuffer[jLocal*ALDim] = recvData[offset+jLocal];
                offsets[k] += localWidth;
            }
        }
    }
    for( int i=0; i<b; ++i )
    {
        const int recvRow = image[i];
        if( recvRow >= b )
        {
            const int recvTo = (colAlignment+i) % r;
            if( recvTo == myRow )
            {
                const int recvFrom = (colAlignment+recvRow) % r; 
                const int iLocal = (i-colShift) / r;
                const int offset = recvDispls[recvFrom]+offsets[recvFrom];
                F* ABuffer = A.LocalBuffer(iLocal,0);
                for( int jLocal=0; jLocal<localWidth; ++jLocal )
                    ABuffer[jLocal*ALDim] = recvData[offset+jLocal];
                offsets[recvFrom] += localWidth;
            }
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #18
0
inline void
internal::LocalTrmmAccumulateLUN
( Orientation orientation, UnitOrNonUnit diag, T alpha,
  const DistMatrix<T,MC,  MR  >& U,
  const DistMatrix<T,STAR,MR  >& XAdjOrTrans_STAR_MR,
        DistMatrix<T,MC,  STAR>& Z_MC_STAR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalTrmmAccumulateLUN");
    if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() ||
        XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() )
        throw std::logic_error
        ("{U,X,Z} must be distributed over the same grid");
    if( U.Height() != U.Width() ||
        U.Height() != XAdjOrTrans_STAR_MR.Width() ||
        U.Height() != Z_MC_STAR.Height() ||
        XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalTrmmAccumulateLUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x "
                                   << XAdjOrTrans_STAR_MR.Width() << "\n"
            << "  Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x "
                               << Z_MC_STAR.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() ||
        Z_MC_STAR.ColAlignment() != U.ColAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<T,MC,MR>
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<T,MC,MR> D11(g);

    DistMatrix<T,STAR,MR>
        XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g),
        X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), 
        X2AdjOrTrans_STAR_MR(g);

    DistMatrix<T,MC,STAR>
        ZT_MC_STAR(g),  Z0_MC_STAR(g),
        ZB_MC_STAR(g),  Z1_MC_STAR(g),
                        Z2_MC_STAR(g);

    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*Blocksize() );

    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    LockedPartitionRight
    ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 );
    PartitionDown
    ( Z_MC_STAR, ZT_MC_STAR,
                 ZB_MC_STAR, 0 );
    while( UTL.Height() < U.Height() )
    {
        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        LockedRepartitionRight
        ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR,
          X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR 
        );

        RepartitionDown
        ( ZT_MC_STAR,  Z0_MC_STAR,
         /**********/ /**********/
                       Z1_MC_STAR,
          ZB_MC_STAR,  Z2_MC_STAR );

        D11.AlignWith( U11 );
        //--------------------------------------------------------------------//
        D11 = U11;
        MakeTrapezoidal( LEFT, UPPER, 0, D11 );
        if( diag == UNIT )
            SetDiagonalToOne( D11 );
        internal::LocalGemm
        ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR,
          (T)1, Z1_MC_STAR );

        internal::LocalGemm
        ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR,
          (T)1, Z0_MC_STAR );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        SlideLockedPartitionRight
        ( XLAdjOrTrans_STAR_MR,                       /**/ XRAdjOrTrans_STAR_MR,
          X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR 
        );

        SlidePartitionDown
        ( ZT_MC_STAR,  Z0_MC_STAR,
                       Z1_MC_STAR,
         /**********/ /**********/
          ZB_MC_STAR,  Z2_MC_STAR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #19
0
inline void
LocalSymvRowAccumulateU
( T alpha, 
  const DistMatrix<T>& A,
  const DistMatrix<T,STAR,MC>& x_STAR_MC,
  const DistMatrix<T,STAR,MR>& x_STAR_MR,
        DistMatrix<T,STAR,MC>& z_STAR_MC,
        DistMatrix<T,STAR,MR>& z_STAR_MR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalSymvRowAccumulateU");
    if( A.Grid() != x_STAR_MC.Grid() ||
        x_STAR_MC.Grid() != x_STAR_MR.Grid() ||
        x_STAR_MR.Grid() != z_STAR_MC.Grid() ||
        z_STAR_MC.Grid() != z_STAR_MR.Grid() )
        throw std::logic_error
        ("{A,x,z} must be distributed over the same grid");
    if( x_STAR_MC.Height() != 1 || x_STAR_MR.Height() != 1 ||
        z_STAR_MC.Height() != 1 || z_STAR_MR.Height() != 1 )
        throw std::logic_error("Expected x and z to be row vectors");
    if( A.Height() != A.Width() || 
        A.Height() != x_STAR_MC.Width() ||
        A.Height() != x_STAR_MR.Width() ||
        A.Height() != z_STAR_MC.Width() ||
        A.Height() != z_STAR_MR.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalSymvRowAccumulateU: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  x[* ,MC] ~ " << x_STAR_MC.Height() << " x " 
                               << x_STAR_MC.Width() << "\n"
            << "  x[* ,MR] ~ " << x_STAR_MR.Height() << " x " 
                               << x_STAR_MR.Width() << "\n"
            << "  z[* ,MC] ~ " << z_STAR_MC.Height() << " x " 
                               << z_STAR_MC.Width() << "\n"
            << "  z[* ,MR] ~ " << z_STAR_MR.Height() << " x " 
                               << z_STAR_MR.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
    if( x_STAR_MC.RowAlignment() != A.ColAlignment() ||
        x_STAR_MR.RowAlignment() != A.RowAlignment() ||
        z_STAR_MC.RowAlignment() != A.ColAlignment() ||
        z_STAR_MR.RowAlignment() != A.RowAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> A11(g), A12(g);
    DistMatrix<T> D11(g);

    DistMatrix<T,STAR,MC> x1_STAR_MC(g);
    DistMatrix<T,STAR,MR> 
        xL_STAR_MR(g), xR_STAR_MR(g),
        x0_STAR_MR(g), x1_STAR_MR(g), x2_STAR_MR(g);
    DistMatrix<T,STAR,MC> z1_STAR_MC(g);
    DistMatrix<T,STAR,MR> z1_STAR_MR(g), z2_STAR_MR(g);

    // We want our local gemvs to be of width blocksize, so we will 
    // temporarily change to max(r,c) times the current blocksize
    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() );
                 
    LockedPartitionRight( x_STAR_MR,  xL_STAR_MR, xR_STAR_MR, 0 );
    while( xL_STAR_MR.Width() < x_STAR_MR.Width() )
    {
        LockedRepartitionRight
        ( xL_STAR_MR, /**/ xR_STAR_MR, 
          x0_STAR_MR, /**/ x1_STAR_MR, x2_STAR_MR );

        const int n0 = x0_STAR_MR.Width();
        const int n1 = x1_STAR_MR.Width();
        const int n2 = x2_STAR_MR.Width();
        LockedView( A11, A, n0, n0,    n1, n1 );
        LockedView( A12, A, n0, n0+n1, n1, n2 );
        LockedView( x1_STAR_MC, x_STAR_MC, 0, n0, 1, n1 );
        View( z1_STAR_MC, z_STAR_MC, 0, n0,    1, n1 );
        View( z1_STAR_MR, z_STAR_MR, 0, n0,    1, n1 );
        View( z2_STAR_MR, z_STAR_MR, 0, n0+n1, 1, n2 );

        D11.AlignWith( A11 );
        //--------------------------------------------------------------------//
        // TODO: These diagonal block updates can be greatly improved
        D11 = A11;
        MakeTrapezoidal( LEFT, UPPER, 0, D11 );
        Gemv
        ( NORMAL, 
          alpha, D11.LockedLocalMatrix(), 
                 x1_STAR_MR.LockedLocalMatrix(),
          T(1),  z1_STAR_MC.LocalMatrix() );
        MakeTrapezoidal( LEFT, UPPER, 1, D11 );
        Gemv
        ( TRANSPOSE,
          alpha, D11.LockedLocalMatrix(),
                 x1_STAR_MC.LockedLocalMatrix(),
          T(1),  z1_STAR_MR.LocalMatrix() );

        Gemv
        ( NORMAL,
          alpha, A12.LockedLocalMatrix(),
                 x2_STAR_MR.LockedLocalMatrix(),
          T(1),  z1_STAR_MC.LocalMatrix() );
        Gemv
        ( TRANSPOSE,
          alpha, A12.LockedLocalMatrix(),
                 x1_STAR_MC.LockedLocalMatrix(),
          T(1),  z2_STAR_MR.LocalMatrix() );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionRight
        ( xL_STAR_MR,             /**/ xR_STAR_MR,
          x0_STAR_MR, x1_STAR_MR, /**/ x2_STAR_MR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #20
0
inline void
TrsmLLTSmall
( Orientation orientation, UnitOrNonUnit diag,
  F alpha, const DistMatrix<F,STAR,VR>& L, DistMatrix<F,VR,STAR>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmLLTSmall");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( orientation == NORMAL )
        throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option");
    if( L.Height() != L.Width() || L.Height() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrsmLLT: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( L.RowAlignment() != X.ColAlignment() )
        throw std::logic_error("L and X must be aligned");
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<F,STAR,VR> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<F,VR,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),
                                  X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 ); 

        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11; // L11[* ,* ] <- L11[* ,VR]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VR,* ]

        // X1[* ,* ] := L11^-[T/H][* ,* ] X1[* ,* ]
        LocalTrsm
        ( LEFT, LOWER, orientation, diag,
          F(1), L11_STAR_STAR, X1_STAR_STAR, checkIfSingular );

        X1 = X1_STAR_STAR;

        // X0[VR,* ] -= L10[* ,VR]^(T/H) X1[* ,* ]
        LocalGemm( orientation, NORMAL, F(-1), L10, X1_STAR_STAR, F(1), X0 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #21
0
inline R
Row( DistMatrix<R>& chi, DistMatrix<R>& x )
{
#ifndef RELEASE
    PushCallStack("reflector::Row");
    if( chi.Grid() != x.Grid() )
        throw std::logic_error
        ("chi and x must be distributed over the same grid");
    if( chi.Height() != 1 || chi.Width() != 1 )
        throw std::logic_error("chi must be a scalar");
    if( x.Height() != 1 )
        throw std::logic_error("x must be a row vector");
    if( chi.Grid().Row() != chi.ColAlignment() )
        throw std::logic_error("Reflecting with incorrect row of processes");
    if( x.Grid().Row() != x.ColAlignment() )
        throw std::logic_error("Reflecting with incorrect row of processes");
#endif
    const Grid& grid = x.Grid();
    mpi::Comm rowComm = grid.RowComm();
    const int gridCol = grid.Col();
    const int gridWidth = grid.Width();
    const int rowAlignment = chi.RowAlignment();

    std::vector<R> localNorms(gridWidth);
    R localNorm = Nrm2( x.LockedMatrix() ); 
    mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm );
    R norm = blas::Nrm2( gridWidth, &localNorms[0], 1 );

    if( norm == 0 )
    {
        if( gridCol == rowAlignment )
            chi.SetLocal(0,0,-chi.GetLocal(0,0));
#ifndef RELEASE
        PopCallStack();
#endif
        return R(2);
    }

    R alpha;
    if( gridCol == rowAlignment )
        alpha = chi.GetLocal(0,0);
    mpi::Broadcast( &alpha, 1, rowAlignment, rowComm );

    R beta;
    if( alpha <= 0 )
        beta = lapack::SafeNorm( alpha, norm );
    else
        beta = -lapack::SafeNorm( alpha, norm );

    const R one = 1;
    const R safeMin = lapack::MachineSafeMin<R>();
    const R epsilon = lapack::MachineEpsilon<R>();
    const R safeInv = safeMin/epsilon;
    int count = 0;
    if( Abs(beta) < safeInv )
    {
        R invOfSafeInv = one/safeInv;
        do
        {
            ++count;
            Scale( invOfSafeInv, x );
            alpha *= invOfSafeInv;
            beta *= invOfSafeInv;
        } while( Abs(beta) < safeInv );

        localNorm = Nrm2( x.LockedMatrix() );
        mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm );
        norm = blas::Nrm2( gridWidth, &localNorms[0], 1 );
        if( alpha <= 0 )
            beta = lapack::SafeNorm( alpha, norm );
        else
            beta = -lapack::SafeNorm( alpha, norm );
    }

    R tau = (beta-alpha)/beta;
    Scale( one/(alpha-beta), x );

    for( int j=0; j<count; ++j )
        beta *= safeInv;
    if( gridCol == rowAlignment )
        chi.SetLocal(0,0,beta);
        
#ifndef RELEASE
    PopCallStack();
#endif
    return tau;
}
Пример #22
0
inline void
LocalTrmmAccumulateLLT
( Orientation orientation, UnitOrNonUnit diag, T alpha,
  const DistMatrix<T>& L,
  const DistMatrix<T,MC,STAR>& X_MC_STAR,
        DistMatrix<T,MR,STAR>& Z_MR_STAR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalTrmmAccumulateLLT");
    if( L.Grid() != X_MC_STAR.Grid() ||
        X_MC_STAR.Grid() != Z_MR_STAR.Grid() )
        throw std::logic_error
        ("{L,X,Z} must be distributed over the same grid");
    if( L.Height() != L.Width() ||
        L.Height() != X_MC_STAR.Height() ||
        L.Height() != Z_MR_STAR.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalTrmmAccumulateLLT: " << "\n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X[MC,* ] ~ " << X_MC_STAR.Height() << " x "
                               << X_MC_STAR.Width() << "\n"
            << "  Z[MR,* ] ` " << Z_MR_STAR.Height() << " x "
                               << Z_MR_STAR.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( X_MC_STAR.ColAlignment() != L.ColAlignment() ||
        Z_MR_STAR.ColAlignment() != L.RowAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = L.Grid();
    
    // Matrix views
    DistMatrix<T>
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<T> D11(g);

    DistMatrix<T,MC,STAR>
        XT_MC_STAR(g),  X0_MC_STAR(g),
        XB_MC_STAR(g),  X1_MC_STAR(g),
                        X2_MC_STAR(g);

    DistMatrix<T,MR,STAR>
        ZT_MR_STAR(g),  Z0_MR_STAR(g),
        ZB_MR_STAR(g),  Z1_MR_STAR(g),
                        Z2_MR_STAR(g);

    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*Blocksize() );

    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    LockedPartitionDown
    ( X_MC_STAR, XT_MC_STAR,
                 XB_MC_STAR, 0 );
    PartitionDown
    ( Z_MR_STAR, ZT_MR_STAR,
                 ZB_MR_STAR, 0 );
    while( LTL.Height() < L.Height() )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        LockedRepartitionDown
        ( XT_MC_STAR,  X0_MC_STAR,
         /**********/ /**********/
                       X1_MC_STAR,
          XB_MC_STAR,  X2_MC_STAR );

        RepartitionDown
        ( ZT_MR_STAR,  Z0_MR_STAR,
         /**********/ /**********/
                       Z1_MR_STAR,
          ZB_MR_STAR,  Z2_MR_STAR );

        D11.AlignWith( L11 );
        //--------------------------------------------------------------------//
        D11 = L11;
        MakeTrapezoidal( LEFT, LOWER, 0, D11 );
        if( diag == UNIT )
            SetDiagonalToOne( D11 );
        LocalGemm
        ( orientation, NORMAL, alpha, D11, X1_MC_STAR, T(1), Z1_MR_STAR );

        LocalGemm
        ( orientation, NORMAL, alpha, L21, X2_MC_STAR, T(1), Z1_MR_STAR );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlideLockedPartitionDown
        ( XT_MC_STAR,  X0_MC_STAR,
                       X1_MC_STAR,
         /**********/ /**********/
          XB_MC_STAR,  X2_MC_STAR );

        SlidePartitionDown
        ( ZT_MR_STAR,  Z0_MR_STAR,
                       Z1_MR_STAR,
         /**********/ /**********/
          ZB_MR_STAR,  Z2_MR_STAR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Пример #23
0
inline void
LocalSymmetricAccumulateRU
( Orientation orientation, T alpha,
  const DistMatrix<T,MC,  MR  >& A,
  const DistMatrix<T,STAR,MC  >& B_STAR_MC,
  const DistMatrix<T,MR,  STAR>& BTrans_MR_STAR,
        DistMatrix<T,MC,  STAR>& ZTrans_MC_STAR,
        DistMatrix<T,MR,  STAR>& ZTrans_MR_STAR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalSymmetricAccumulateRU");
    if( A.Grid() != B_STAR_MC.Grid() ||
        B_STAR_MC.Grid() != BTrans_MR_STAR.Grid() ||
        BTrans_MR_STAR.Grid() != ZTrans_MC_STAR.Grid() ||
        ZTrans_MC_STAR.Grid() != ZTrans_MR_STAR.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != A.Width() ||
        A.Height() != B_STAR_MC.Width() ||
        A.Height() != BTrans_MR_STAR.Height() ||
        A.Height() != ZTrans_MC_STAR.Height() ||
        A.Height() != ZTrans_MR_STAR.Height() ||
        B_STAR_MC.Height() != BTrans_MR_STAR.Width() ||
        BTrans_MR_STAR.Width() != ZTrans_MC_STAR.Width() ||
        ZTrans_MC_STAR.Width() != ZTrans_MR_STAR.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalSymmetricAccumulateRU: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B[* ,MC] ~ " << B_STAR_MC.Height() << " x "
                               << B_STAR_MC.Width() << "\n"
            << "  B^H/T[MR,* ] ~ " << BTrans_MR_STAR.Height() << " x "
                                   << BTrans_MR_STAR.Width() << "\n"
            << "  Z^H/T[MC,* ] ~ " << ZTrans_MC_STAR.Height() << " x "
                                   << ZTrans_MC_STAR.Width() << "\n"
            << "  Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x "
                                   << ZTrans_MR_STAR.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( B_STAR_MC.RowAlignment() != A.ColAlignment() ||
        BTrans_MR_STAR.ColAlignment() != A.RowAlignment() ||
        ZTrans_MC_STAR.ColAlignment() != A.ColAlignment() ||
        ZTrans_MR_STAR.ColAlignment() != A.RowAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);

    DistMatrix<T> D11(g);

    DistMatrix<T,STAR,MC>
        BL_STAR_MC(g), BR_STAR_MC(g),
        B0_STAR_MC(g), B1_STAR_MC(g), B2_STAR_MC(g);

    DistMatrix<T,MR,STAR>
        BTTrans_MR_STAR(g),  B0Trans_MR_STAR(g),
        BBTrans_MR_STAR(g),  B1Trans_MR_STAR(g),
                             B2Trans_MR_STAR(g);

    DistMatrix<T,MC,STAR>
        ZTTrans_MC_STAR(g),  Z0Trans_MC_STAR(g),
        ZBTrans_MC_STAR(g),  Z1Trans_MC_STAR(g),
                             Z2Trans_MC_STAR(g);

    DistMatrix<T,MR,STAR>
        ZBTrans_MR_STAR(g),  Z0Trans_MR_STAR(g),
        ZTTrans_MR_STAR(g),  Z1Trans_MR_STAR(g),
                             Z2Trans_MR_STAR(g);

    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*Blocksize() );

    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionRight( B_STAR_MC,  BL_STAR_MC, BR_STAR_MC, 0 );
    LockedPartitionDown
    ( BTrans_MR_STAR, BTTrans_MR_STAR,
                      BBTrans_MR_STAR, 0 );
    PartitionDown
    ( ZTrans_MC_STAR, ZTTrans_MC_STAR,
                      ZBTrans_MC_STAR, 0 );
    PartitionDown
    ( ZTrans_MR_STAR, ZTTrans_MR_STAR,
                      ZBTrans_MR_STAR, 0 );
    while( ATL.Height() < A.Height() )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionRight
        ( BL_STAR_MC, /**/ BR_STAR_MC,
          B0_STAR_MC, /**/ B1_STAR_MC, B2_STAR_MC );

        LockedRepartitionDown
        ( BTTrans_MR_STAR,  B0Trans_MR_STAR,
         /***************/ /***************/
                            B1Trans_MR_STAR,
          BBTrans_MR_STAR,  B2Trans_MR_STAR );

        RepartitionDown
        ( ZTTrans_MC_STAR,  Z0Trans_MC_STAR,
         /***************/ /***************/
                            Z1Trans_MC_STAR,
          ZBTrans_MC_STAR,  Z2Trans_MC_STAR );

        RepartitionDown
        ( ZTTrans_MR_STAR,  Z0Trans_MR_STAR,
         /***************/ /***************/
                            Z1Trans_MR_STAR,
          ZBTrans_MR_STAR,  Z2Trans_MR_STAR );

        D11.AlignWith( A11 );
        //--------------------------------------------------------------------//
        D11 = A11;
        MakeTriangular( UPPER, D11 );
        LocalGemm
        ( orientation, orientation,
          alpha, D11, B1_STAR_MC, T(1), Z1Trans_MR_STAR );
        SetDiagonal( D11, T(0) );

        LocalGemm
        ( NORMAL, NORMAL, alpha, D11, B1Trans_MR_STAR, T(1), Z1Trans_MC_STAR );

        LocalGemm
        ( orientation, orientation, 
          alpha, A12, B1_STAR_MC, T(1), Z2Trans_MR_STAR );

        LocalGemm
        ( NORMAL, NORMAL, alpha, A12, B2Trans_MR_STAR, T(1), Z1Trans_MC_STAR );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionRight
        ( BL_STAR_MC,             /**/ BR_STAR_MC,
          B0_STAR_MC, B1_STAR_MC, /**/ B2_STAR_MC );

        SlideLockedPartitionDown
        ( BTTrans_MR_STAR,  B0Trans_MR_STAR,
                            B1Trans_MR_STAR,
         /***************/ /***************/
          BBTrans_MR_STAR,  B2Trans_MR_STAR );

        SlidePartitionDown
        ( ZTTrans_MC_STAR,  Z0Trans_MC_STAR,
                            Z1Trans_MC_STAR,
         /***************/ /***************/
          ZBTrans_MC_STAR,  Z2Trans_MC_STAR );

        SlidePartitionDown
        ( ZTTrans_MR_STAR,  Z0Trans_MR_STAR,
                            Z1Trans_MR_STAR,
         /***************/ /***************/
          ZBTrans_MR_STAR,  Z2Trans_MR_STAR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}