/* * Distributes A in such a way that * Layer 0 <- A(:, 0:(n/h - 1)) * Layer 1 <- A(:, (n/h):(2n/h - 1)) * . * . * . * Layer h-1 <- A(:, ((h-1)n/h):n) */ void DistributeCols ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthSize = mpi::CommSize( depthComm ); const int depthRank = mpi::CommRank( depthComm ); const int sendCount = A.LocalHeight()*A.LocalWidth(); const int recvCount = sendCount / depthSize; // For now, we will make B as large as A... // TODO: NOT DO THIS if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Local height did not match local ldim"); B.Empty(); B.AlignWith( A ); Zeros( A.Height(), A.Width(), B ); // Scatter const int localColOffset = (A.LocalWidth()/depthSize)*depthRank; mpi::Scatter ( A.LockedLocalBuffer(), recvCount, B.LocalBuffer(0,localColOffset), recvCount, 0, depthComm ); }
// Broadcast a matrix from the root grid to the others void DepthBroadcast ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const int rank = mpi::CommRank(mpi::COMM_WORLD); const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthRank = rank / meshSize; const int localSize = A.LocalHeight()*A.LocalWidth(); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Leading dimension did not match local height"); B.Empty(); B.AlignWith( A ); B.ResizeTo( A.Height(), A.Width() ); // Have the root pack the broadcast data if( depthRank == 0 ) MemCopy( B.LocalBuffer(), A.LockedLocalBuffer(), localSize ); // Broadcast from the root mpi::Broadcast( B.LocalBuffer(), localSize, 0, depthComm ); }
void RowMaxNorms ( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms ) { DEBUG_CSE norms.AlignWith( A ); norms.Resize( A.Height(), 1 ); RowMaxNorms( A.LockedMatrix(), norms.Matrix() ); AllReduce( norms, A.RowComm(), mpi::MAX ); }
void ColumnMinAbs ( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,V,STAR>& mins ) { EL_DEBUG_CSE const Int n = A.Width(); mins.AlignWith( A ); mins.Resize( n, 1 ); ColumnMinAbs( A.LockedMatrix(), mins.Matrix() ); AllReduce( mins.Matrix(), A.ColComm(), mpi::MIN ); }
void StackedGeometricColumnScaling ( const DistMatrix<Field, U,V >& A, const DistMatrix<Field, U,V >& B, DistMatrix<Base<Field>,V,STAR>& geomScaling ) { EL_DEBUG_CSE // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments // are equal typedef Base<Field> Real; DistMatrix<Real,V,STAR> maxScalingA(A.Grid()), maxScalingB(A.Grid()); ColumnMaxNorms( A, maxScalingA ); ColumnMaxNorms( B, maxScalingB ); const Int mLocalA = A.LocalHeight(); const Int mLocalB = B.LocalHeight(); const Int nLocal = A.LocalWidth(); geomScaling.AlignWith( maxScalingA ); geomScaling.Resize( A.Width(), 1 ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.LockedMatrix(); auto& geomScalingLoc = geomScaling.Matrix(); auto& maxScalingALoc = maxScalingA.Matrix(); auto& maxScalingBLoc = maxScalingB.Matrix(); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc)); for( Int iLoc=0; iLoc<mLocalA; ++iLoc ) { const Real absVal = Abs(ALoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } for( Int iLoc=0; iLoc<mLocalB; ++iLoc ) { const Real absVal = Abs(BLoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } geomScalingLoc(jLoc) = minAbs; } mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() ); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { const Real maxAbsA = maxScalingALoc(jLoc); const Real maxAbsB = maxScalingBLoc(jLoc); const Real maxAbs = Max(maxAbsA,maxAbsB); const Real minAbs = geomScalingLoc(jLoc); geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs); } }
void RowTwoNorms ( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms ) { DEBUG_CSE norms.AlignWith( A ); norms.Resize( A.Height(), 1 ); if( A.Width() == 0 ) { Zero( norms ); return; } RowTwoNormsHelper( A.LockedMatrix(), norms.Matrix(), A.RowComm() ); }
inline void HermitianSVD ( UpperOrLower uplo, DistMatrix<F>& A, DistMatrix<BASE(F),VR,STAR>& s, DistMatrix<F>& U, DistMatrix<F>& V ) { #ifndef RELEASE CallStackEntry entry("HermitianSVD"); #endif #ifdef HAVE_PMRRR typedef BASE(F) R; // Grab an eigenvalue decomposition of A HermitianEig( uplo, A, s, V ); // Redistribute the singular values into an [MR,* ] distribution const Grid& grid = A.Grid(); DistMatrix<R,MR,STAR> s_MR_STAR( grid ); s_MR_STAR.AlignWith( V.DistData() ); s_MR_STAR = s; // Set the singular values to the absolute value of the eigenvalues const Int numLocalVals = s.LocalHeight(); for( Int iLoc=0; iLoc<numLocalVals; ++iLoc ) { const R sigma = s.GetLocal(iLoc,0); s.SetLocal(iLoc,0,Abs(sigma)); } // Copy V into U (flipping the sign as necessary) U.AlignWith( V ); U.ResizeTo( V.Height(), V.Width() ); const Int localHeight = V.LocalHeight(); const Int localWidth = V.LocalWidth(); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const R sigma = s_MR_STAR.GetLocal( jLoc, 0 ); F* UCol = U.Buffer( 0, jLoc ); const F* VCol = V.LockedBuffer( 0, jLoc ); if( sigma >= 0 ) for( Int iLoc=0; iLoc<localHeight; ++iLoc ) UCol[iLoc] = VCol[iLoc]; else for( Int iLoc=0; iLoc<localHeight; ++iLoc ) UCol[iLoc] = -VCol[iLoc]; } #else U = A; MakeHermitian( uplo, U ); SVD( U, s, V ); #endif // ifdef HAVE_PMRRR }
inline void HermitianSVD ( UpperOrLower uplo, DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s, DistMatrix<F>& U, DistMatrix<F>& V ) { #ifndef RELEASE PushCallStack("HermitianSVD"); #endif typedef typename Base<F>::type R; // Grab an eigenvalue decomposition of A HermitianEig( uplo, A, s, V ); // Redistribute the singular values into an [MR,* ] distribution const Grid& grid = A.Grid(); DistMatrix<R,MR,STAR> s_MR_STAR( grid ); s_MR_STAR.AlignWith( V ); s_MR_STAR = s; // Set the singular values to the absolute value of the eigenvalues const int numLocalVals = s.LocalHeight(); for( int iLocal=0; iLocal<numLocalVals; ++iLocal ) { const R sigma = s.GetLocal(iLocal,0); s.SetLocal(iLocal,0,Abs(sigma)); } // Copy V into U (flipping the sign as necessary) U.AlignWith( V ); U.ResizeTo( V.Height(), V.Width() ); const int localHeight = V.LocalHeight(); const int localWidth = V.LocalWidth(); for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const R sigma = s_MR_STAR.GetLocal( jLocal, 0 ); F* UCol = U.LocalBuffer( 0, jLocal ); const F* VCol = V.LockedLocalBuffer( 0, jLocal ); if( sigma >= 0 ) for( int iLocal=0; iLocal<localHeight; ++iLocal ) UCol[iLocal] = VCol[iLocal]; else for( int iLocal=0; iLocal<localHeight; ++iLocal ) UCol[iLocal] = -VCol[iLocal]; } #ifndef RELEASE PopCallStack(); #endif }
// Create a new set of distributed matrices, so that, // if depthRank == 0, B = A, // otherwise, B = 0. void CopyOrReset ( const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const int rank = mpi::CommRank( mpi::COMM_WORLD ); const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthRank = rank / meshSize; //Layer 0 if( depthRank == 0 ) B = A; else { B.AlignWith( A ); Zeros( A.Height(), A.Width(), B ); } }
void ScaLAPACKHelper ( DistMatrix<F,MC,MR,BLOCK>& A, DistMatrix<F,MR,STAR,BLOCK>& householderScalars ) { EL_DEBUG_CSE AssertScaLAPACKSupport(); #ifdef EL_HAVE_SCALAPACK const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); householderScalars.AlignWith( A ); householderScalars.Resize( minDim, 1 ); auto descA = FillDesc( A ); scalapack::QR ( m, n, A.Buffer(), descA.data(), householderScalars.Buffer() ); #endif }
void IndexDependentMap ( const DistMatrix<S,U,V,wrap>& A, DistMatrix<T,U,V,wrap>& B, function<T(Int,Int,const S&)> func ) { EL_DEBUG_CSE const Int mLoc = A.LocalHeight(); const Int nLoc = A.LocalWidth(); B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) { const Int i = A.GlobalRow(iLoc); BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc)); } } }
// Reduce across depth to get end result C void SumContributions ( mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& APartial, DistMatrix<double,MC,MR>& A ) { const int rank = mpi::CommRank( mpi::COMM_WORLD ); const Grid& meshGrid = APartial.Grid(); A.Empty(); A.AlignWith( APartial ); A.ResizeTo( APartial.Height(), APartial.Width() ); if( APartial.LocalHeight() != APartial.LocalLDim() ) throw std::logic_error ("APartial did not have matching local height/ldim"); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("A did not have matching local height/ldim"); const int dataSize = APartial.LocalHeight()*APartial.LocalWidth(); mpi::AllReduce ( APartial.LockedLocalBuffer(), A.LocalBuffer(), dataSize, mpi::SUM, depthComm ); }
void QR ( DistMatrix<F,MC,MR,BLOCK>& A, DistMatrix<F,MR,STAR,BLOCK>& phase ) { DEBUG_CSE AssertScaLAPACKSupport(); #ifdef EL_HAVE_SCALAPACK const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); phase.AlignWith( A ); phase.Resize( minDim, 1 ); const int bHandle = blacs::Handle( A ); const int context = blacs::GridInit( bHandle, A ); auto descA = FillDesc( A, context ); scalapack::QR( m, n, A.Buffer(), descA.data(), phase.Buffer() ); blacs::FreeGrid( context ); blacs::FreeHandle( bHandle ); #endif }
inline void Var3( Orientation orientation, DistMatrix<F>& A, DistMatrix<F,MC,STAR>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T and LDL^H"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Grid() != d.Grid() ) throw std::logic_error("A and d must use the same grid"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector of the same height as A"); if( d.Viewing() && d.ColAlignment() != A.ColAlignment() ) throw std::logic_error("d must be aligned with A"); #endif const Grid& g = A.Grid(); if( !d.Viewing() ) { d.AlignWith( A ); d.ResizeTo( A.Height(), 1 ); } // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,STAR> dT(g), d0(g), dB(g), d1(g), d2(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,STAR> d1_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MC > S21Trans_STAR_MC(g); DistMatrix<F,STAR,MR > A21AdjOrTrans_STAR_MR(g); const bool conjugate = ( orientation == ADJOINT ); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( d, dT, dB, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( dT, d0, /**/ /**/ d1, dB, d2 ); A21_VC_STAR.AlignWith( A22 ); A21_VR_STAR.AlignWith( A22 ); S21Trans_STAR_MC.AlignWith( A22 ); A21AdjOrTrans_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalLDL( orientation, A11_STAR_STAR, d1_STAR_STAR ); A11 = A11_STAR_STAR; d1 = d1_STAR_STAR; A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, orientation, UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); S21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); DiagonalSolve( RIGHT, NORMAL, d1_STAR_STAR, A21_VC_STAR ); A21_VR_STAR = A21_VC_STAR; A21AdjOrTrans_STAR_MR.TransposeFrom( A21_VR_STAR, conjugate ); LocalTrrk ( LOWER, TRANSPOSE, F(-1), S21Trans_STAR_MC, A21AdjOrTrans_STAR_MR, F(1), A22 ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); A21_VR_STAR.FreeAlignments(); S21Trans_STAR_MC.FreeAlignments(); A21AdjOrTrans_STAR_MR.FreeAlignments(); SlidePartitionDown ( dT, d0, d1, /**/ /**/ dB, d2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }