/* * Distributes A in such a way that * Layer 0 <- A(:, 0:(n/h - 1)) * Layer 1 <- A(:, (n/h):(2n/h - 1)) * . * . * . * Layer h-1 <- A(:, ((h-1)n/h):n) */ void DistributeCols ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthSize = mpi::CommSize( depthComm ); const int depthRank = mpi::CommRank( depthComm ); const int sendCount = A.LocalHeight()*A.LocalWidth(); const int recvCount = sendCount / depthSize; // For now, we will make B as large as A... // TODO: NOT DO THIS if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Local height did not match local ldim"); B.Empty(); B.AlignWith( A ); Zeros( A.Height(), A.Width(), B ); // Scatter const int localColOffset = (A.LocalWidth()/depthSize)*depthRank; mpi::Scatter ( A.LockedLocalBuffer(), recvCount, B.LocalBuffer(0,localColOffset), recvCount, 0, depthComm ); }
// Broadcast a matrix from the root grid to the others void DepthBroadcast ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const int rank = mpi::CommRank(mpi::COMM_WORLD); const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthRank = rank / meshSize; const int localSize = A.LocalHeight()*A.LocalWidth(); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Leading dimension did not match local height"); B.Empty(); B.AlignWith( A ); B.ResizeTo( A.Height(), A.Width() ); // Have the root pack the broadcast data if( depthRank == 0 ) MemCopy( B.LocalBuffer(), A.LockedLocalBuffer(), localSize ); // Broadcast from the root mpi::Broadcast( B.LocalBuffer(), localSize, 0, depthComm ); }
inline void HermitianSVD ( UpperOrLower uplo, DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s, DistMatrix<F>& U, DistMatrix<F>& V ) { #ifndef RELEASE PushCallStack("HermitianSVD"); #endif typedef typename Base<F>::type R; // Grab an eigenvalue decomposition of A HermitianEig( uplo, A, s, V ); // Redistribute the singular values into an [MR,* ] distribution const Grid& grid = A.Grid(); DistMatrix<R,MR,STAR> s_MR_STAR( grid ); s_MR_STAR.AlignWith( V ); s_MR_STAR = s; // Set the singular values to the absolute value of the eigenvalues const int numLocalVals = s.LocalHeight(); for( int iLocal=0; iLocal<numLocalVals; ++iLocal ) { const R sigma = s.GetLocal(iLocal,0); s.SetLocal(iLocal,0,Abs(sigma)); } // Copy V into U (flipping the sign as necessary) U.AlignWith( V ); U.ResizeTo( V.Height(), V.Width() ); const int localHeight = V.LocalHeight(); const int localWidth = V.LocalWidth(); for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const R sigma = s_MR_STAR.GetLocal( jLocal, 0 ); F* UCol = U.LocalBuffer( 0, jLocal ); const F* VCol = V.LockedLocalBuffer( 0, jLocal ); if( sigma >= 0 ) for( int iLocal=0; iLocal<localHeight; ++iLocal ) UCol[iLocal] = VCol[iLocal]; else for( int iLocal=0; iLocal<localHeight; ++iLocal ) UCol[iLocal] = -VCol[iLocal]; } #ifndef RELEASE PopCallStack(); #endif }
inline const DistMatrix<T,MD,STAR,Int>& DistMatrix<T,MD,STAR,Int>::operator=( const DistMatrix<T,STAR,STAR,Int>& A ) { #ifndef RELEASE PushCallStack("[MD,* ] = [* ,* ]"); this->AssertNotLockedView(); this->AssertSameGrid( A ); if( this->Viewing() ) this->AssertSameSize( A ); #endif if( !this->Viewing() ) this->ResizeTo( A.Height(), A.Width() ); if( this->Participating() ) { const Int lcm = this->grid_->LCM(); const Int colShift = this->ColShift(); const Int width = this->Width(); const Int localHeight = this->LocalHeight(); const T* ALocalBuffer = A.LockedLocalBuffer(); const Int ALDim = A.LocalLDim(); T* thisLocalBuffer = this->LocalBuffer(); const Int thisLDim = this->LocalLDim(); #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( Int j=0; j<width; ++j ) { T* destCol = &thisLocalBuffer[j*thisLDim]; const T* sourceCol = &ALocalBuffer[colShift+j*ALDim]; for( Int iLocal=0; iLocal<localHeight; ++iLocal ) destCol[iLocal] = sourceCol[iLocal*lcm]; } } #ifndef RELEASE PopCallStack(); #endif return *this; }
inline void AddInLocalData ( const DistMatrix<F,VC,STAR>& X1, DistMatrix<F,STAR,STAR>& Z ) { #ifndef RELEASE PushCallStack("internal::AddInLocalData"); #endif const int width = X1.Width(); const int localHeight = X1.LocalHeight(); const int stride = X1.Grid().Size(); const int offset = X1.ColShift(); for( int j=0; j<width; ++j ) { F* ZColBuffer = Z.LocalBuffer(0,j); const F* X1ColBuffer = X1.LockedLocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) ZColBuffer[offset+stride*iLocal] += X1ColBuffer[iLocal]; } #ifndef RELEASE PopCallStack(); #endif }
// Reduce across depth to get end result C void SumContributions ( mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& APartial, DistMatrix<double,MC,MR>& A ) { const int rank = mpi::CommRank( mpi::COMM_WORLD ); const Grid& meshGrid = APartial.Grid(); A.Empty(); A.AlignWith( APartial ); A.ResizeTo( APartial.Height(), APartial.Width() ); if( APartial.LocalHeight() != APartial.LocalLDim() ) throw std::logic_error ("APartial did not have matching local height/ldim"); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("A did not have matching local height/ldim"); const int dataSize = APartial.LocalHeight()*APartial.LocalWidth(); mpi::AllReduce ( APartial.LockedLocalBuffer(), A.LocalBuffer(), dataSize, mpi::SUM, depthComm ); }