void SDC ( UpperOrLower uplo, ElementalMatrix<F>& APre, ElementalMatrix<Base<F>>& wPre, const HermitianSDCCtrl<Base<F>> ctrl ) { DEBUG_CSE typedef Base<F> Real; const Int n = APre.Height(); wPre.Resize( n, 1 ); if( APre.Grid().Size() == 1 ) { HermitianEig( uplo, APre.Matrix(), wPre.Matrix() ); return; } if( n <= ctrl.cutoff ) { HermitianEig( uplo, APre, wPre ); return; } DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixWriteProxy<Real,Real,VR,STAR> wProx( wPre ); auto& A = AProx.Get(); auto& w = wProx.Get(); // Perform this level's split const auto part = SpectralDivide( uplo, A, ctrl ); auto ind1 = IR(0,part.index); auto ind2 = IR(part.index,n); auto ATL = A( ind1, ind1 ); auto ATR = A( ind1, ind2 ); auto ABL = A( ind2, ind1 ); auto ABR = A( ind2, ind2 ); auto wT = w( ind1, ALL ); auto wB = w( ind2, ALL ); if( uplo == LOWER ) Zero( ABL ); else Zero( ATR ); // Recurse on the two subproblems DistMatrix<F> ATLSub, ABRSub; DistMatrix<Real,VR,STAR> wTSub, wBSub; PushSubproblems ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ctrl.progress ); if( ATL.Participating() ) SDC( uplo, ATLSub, wTSub, ctrl ); if( ABR.Participating() ) SDC( uplo, ABRSub, wBSub, ctrl ); PullSubproblems( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub ); }
void Contract ( const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("Contract")) AssertSameGrids( A, B ); const Dist U = B.ColDist(); const Dist V = B.RowDist(); // TODO: Shorten this implementation? if( A.ColDist() == U && A.RowDist() == V ) { Copy( A, B ); } else if( A.ColDist() == U && A.RowDist() == Partial(V) ) { B.AlignAndResize ( A.ColAlign(), A.RowAlign(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == Partial(U) && A.RowDist() == V ) { B.AlignAndResize ( A.ColAlign(), A.RowAlign(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == U && A.RowDist() == Collect(V) ) { B.AlignColsAndResize ( A.ColAlign(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == Collect(U) && A.RowDist() == V ) { B.AlignRowsAndResize ( A.RowAlign(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == Collect(U) && A.RowDist() == Collect(V) ) { Zeros( B, A.Height(), A.Width() ); AxpyContract( T(1), A, B ); } else LogicError("Incompatible distributions"); }
void TransposeAxpyContract ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B, bool conjugate ) { EL_DEBUG_CSE const Dist U = B.ColDist(); const Dist V = B.RowDist(); if( A.ColDist() == V && A.RowDist() == U ) { TransposeAxpy( alpha, A, B, conjugate ); } else if( (A.ColDist() == V && A.RowDist() == Partial(U)) || (A.ColDist() == V && A.RowDist() == Collect(U)) || (A.RowDist() == U && A.ColDist() == Partial(V)) || (A.RowDist() == U && A.ColDist() == Collect(V)) ) { unique_ptr<ElementalMatrix<T>> ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) ); if( B.ColConstrained() ) ASumFilt->AlignRowsWith( B, true ); if( B.RowConstrained() ) ASumFilt->AlignColsWith( B, true ); Contract( A, *ASumFilt ); if( !B.ColConstrained() ) B.AlignColsWith( *ASumFilt, false ); if( !B.RowConstrained() ) B.AlignRowsWith( *ASumFilt, false ); // We should have ensured that the alignments are compatible TransposeAxpy( alpha, ASumFilt->LockedMatrix(), B.Matrix(), conjugate ); } else LogicError("Incompatible distributions"); }
void EntrywiseMap ( const ElementalMatrix<S>& A, ElementalMatrix<T>& B, function<T(S)> func ) { if( A.DistData().colDist == B.DistData().colDist && A.DistData().rowDist == B.DistData().rowDist ) { B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); EntrywiseMap( A.LockedMatrix(), B.Matrix(), func ); } else { B.Resize( A.Height(), A.Width() ); #define GUARD(CDIST,RDIST) \ B.DistData().colDist == CDIST && B.DistData().rowDist == RDIST #define PAYLOAD(CDIST,RDIST) \ DistMatrix<S,CDIST,RDIST> AProx(B.Grid()); \ AProx.AlignWith( B.DistData() ); \ Copy( A, AProx ); \ EntrywiseMap( AProx.Matrix(), B.Matrix(), func ); #include <El/macros/GuardAndPayload.h> #undef GUARD #undef PAYLOAD } }
void TransposeContract ( const ElementalMatrix<T>& A, ElementalMatrix<T>& B, bool conjugate ) { EL_DEBUG_CSE const Dist U = B.ColDist(); const Dist V = B.RowDist(); if( A.ColDist() == V && A.RowDist() == Partial(U) ) { Transpose( A, B, conjugate ); } else { unique_ptr<ElementalMatrix<T>> ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) ); if( B.ColConstrained() ) ASumFilt->AlignRowsWith( B, true ); if( B.RowConstrained() ) ASumFilt->AlignColsWith( B, true ); Contract( A, *ASumFilt ); if( !B.ColConstrained() ) B.AlignColsWith( *ASumFilt, false ); if( !B.RowConstrained() ) B.AlignRowsWith( *ASumFilt, false ); // We should have ensured that the alignments match B.Resize( A.Width(), A.Height() ); Transpose( ASumFilt->LockedMatrix(), B.Matrix(), conjugate ); } }
void MakeExtendedKahan ( ElementalMatrix<F>& A, Base<F> phi, Base<F> mu ) { EL_DEBUG_CSE typedef Base<F> Real; if( A.Height() != A.Width() ) LogicError("Extended Kahan matrices must be square"); const Int n = A.Height(); if( n % 3 != 0 ) LogicError("Dimension must be an integer multiple of 3"); const Int l = n / 3; if( !l || (l & (l-1)) ) LogicError("n/3 is not a power of two"); Int k=0; while( Int(1u<<k) < l ) ++k; if( phi <= Real(0) || phi >= Real(1) ) LogicError("phi must be in (0,1)"); if( mu <= Real(0) || mu >= Real(1) ) LogicError("mu must be in (0,1)"); // Start by setting A to the identity, and then modify the necessary // l x l blocks of its 3 x 3 partitioning. MakeIdentity( A ); unique_ptr<ElementalMatrix<F>> ABlock( A.Construct(A.Grid(),A.Root()) ); View( *ABlock, A, IR(2*l,3*l), IR(2*l,3*l) ); *ABlock *= mu; View( *ABlock, A, IR(0,l), IR(l,2*l) ); Walsh( *ABlock, k ); *ABlock *= -phi; View( *ABlock, A, IR(l,2*l), IR(2*l,3*l) ); Walsh( *ABlock, k ); *ABlock *= phi; // Now scale A by S const Real zeta = Sqrt(Real(1)-phi*phi); auto& ALoc = A.Matrix(); for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc ) { const Int i = A.GlobalRow(iLoc); const Real gamma = Pow(zeta,Real(i)); for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc ) ALoc(iLoc,jLoc) *= gamma; } }
void IndexDependentMap ( const ElementalMatrix<S>& A, ElementalMatrix<T>& B, function<T(Int,Int,S)> func ) { DEBUG_CSE const Int mLoc = A.LocalHeight(); const Int nLoc = A.LocalWidth(); B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) { const Int i = A.GlobalRow(iLoc); BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc)); } } }
void SDC ( UpperOrLower uplo, ElementalMatrix<F>& APre, ElementalMatrix<Base<F>>& wPre, ElementalMatrix<F>& QPre, const HermitianSDCCtrl<Base<F>> ctrl ) { DEBUG_CSE typedef Base<F> Real; const Grid& g = APre.Grid(); const Int n = APre.Height(); wPre.Resize( n, 1 ); QPre.Resize( n, n ); if( APre.Grid().Size() == 1 ) { HermitianEig( uplo, APre.Matrix(), wPre.Matrix(), QPre.Matrix() ); return; } if( n <= ctrl.cutoff ) { HermitianEig( uplo, APre, wPre, QPre ); return; } DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixWriteProxy<F,F,MC,MR> QProx( QPre ); DistMatrixWriteProxy<Real,Real,VR,STAR> wProx( wPre ); auto& A = AProx.Get(); auto& Q = QProx.Get(); auto& w = wProx.Get(); // Perform this level's split const auto part = SpectralDivide( uplo, A, Q, ctrl ); auto ind1 = IR(0,part.index); auto ind2 = IR(part.index,n); auto ATL = A( ind1, ind1 ); auto ATR = A( ind1, ind2 ); auto ABL = A( ind2, ind1 ); auto ABR = A( ind2, ind2 ); auto wT = w( ind1, ALL ); auto wB = w( ind2, ALL ); auto QL = Q( ALL, ind1 ); auto QR = Q( ALL, ind2 ); if( uplo == LOWER ) Zero( ABL ); else Zero( ATR ); // Recurse on the two subproblems DistMatrix<F> ATLSub, ABRSub, ZTSub, ZBSub; DistMatrix<Real,VR,STAR> wTSub, wBSub; PushSubproblems ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ZTSub, ZBSub, ctrl.progress ); if( ATLSub.Participating() ) SDC( uplo, ATLSub, wTSub, ZTSub, ctrl ); if( ABRSub.Participating() ) SDC( uplo, ABRSub, wBSub, ZBSub, ctrl ); // Pull the results back to this grid DistMatrix<F> ZT(g), ZB(g); PullSubproblems ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ZT, ZB, ZTSub, ZBSub ); // Update the eigen vectors auto G( QL ); Gemm( NORMAL, NORMAL, F(1), G, ZT, QL ); G = QR; Gemm( NORMAL, NORMAL, F(1), G, ZB, QR ); }
void Scatter ( const DistMatrix<T,CIRC,CIRC>& A, ElementalMatrix<T>& B ) { DEBUG_CSE AssertSameGrids( A, B ); const Int m = A.Height(); const Int n = A.Width(); const Int colStride = B.ColStride(); const Int rowStride = B.RowStride(); B.Resize( m, n ); if( B.CrossSize() != 1 || B.RedundantSize() != 1 ) { // TODO: // Broadcast over the redundant communicator and use mpi::Translate // rank to determine whether a process is the root of the broadcast. GeneralPurpose( A, B ); return; } const Int pkgSize = mpi::Pad(MaxLength(m,colStride)*MaxLength(n,rowStride)); const Int recvSize = pkgSize; const Int sendSize = B.DistSize()*pkgSize; // Translate the root of A into the DistComm of B (if possible) const Int root = A.Root(); const Int target = mpi::Translate( A.CrossComm(), root, B.DistComm() ); if( target == mpi::UNDEFINED ) return; if( B.DistSize() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); return; } vector<T> buffer; T* recvBuf=0; // some compilers (falsely) warn otherwise if( A.CrossRank() == root ) { FastResize( buffer, sendSize+recvSize ); T* sendBuf = &buffer[0]; recvBuf = &buffer[sendSize]; // Pack the send buffer copy::util::StridedPack ( m, n, B.ColAlign(), colStride, B.RowAlign(), rowStride, A.LockedBuffer(), A.LDim(), sendBuf, pkgSize ); // Scatter from the root mpi::Scatter ( sendBuf, pkgSize, recvBuf, pkgSize, target, B.DistComm() ); } else { FastResize( buffer, recvSize ); recvBuf = &buffer[0]; // Perform the receiving portion of the scatter from the non-root mpi::Scatter ( static_cast<T*>(0), pkgSize, recvBuf, pkgSize, target, B.DistComm() ); } // Unpack copy::util::InterleaveMatrix ( B.LocalHeight(), B.LocalWidth(), recvBuf, 1, B.LocalHeight(), B.Buffer(), 1, B.LDim() ); }