/* * Distributes A in such a way that * Layer 0 <- A(:, 0:(n/h - 1)) * Layer 1 <- A(:, (n/h):(2n/h - 1)) * . * . * . * Layer h-1 <- A(:, ((h-1)n/h):n) */ void DistributeCols ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthSize = mpi::CommSize( depthComm ); const int depthRank = mpi::CommRank( depthComm ); const int sendCount = A.LocalHeight()*A.LocalWidth(); const int recvCount = sendCount / depthSize; // For now, we will make B as large as A... // TODO: NOT DO THIS if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Local height did not match local ldim"); B.Empty(); B.AlignWith( A ); Zeros( A.Height(), A.Width(), B ); // Scatter const int localColOffset = (A.LocalWidth()/depthSize)*depthRank; mpi::Scatter ( A.LockedLocalBuffer(), recvCount, B.LocalBuffer(0,localColOffset), recvCount, 0, depthComm ); }
// Broadcast a matrix from the root grid to the others void DepthBroadcast ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const int rank = mpi::CommRank(mpi::COMM_WORLD); const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthRank = rank / meshSize; const int localSize = A.LocalHeight()*A.LocalWidth(); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Leading dimension did not match local height"); B.Empty(); B.AlignWith( A ); B.ResizeTo( A.Height(), A.Width() ); // Have the root pack the broadcast data if( depthRank == 0 ) MemCopy( B.LocalBuffer(), A.LockedLocalBuffer(), localSize ); // Broadcast from the root mpi::Broadcast( B.LocalBuffer(), localSize, 0, depthComm ); }
void AccumulateRHS( const DistMatrix<F,VC,STAR>& X, DistMatrix<F,STAR,STAR>& Z ) { const Int height = X.Height(); const Int width = X.Width(); Z.Empty(); Zeros( Z, height, width ); const Int localHeight = X.LocalHeight(); const Int colShift = X.ColShift(); const int commSize = X.Grid().Size(); const F* XBuffer = X.LockedBuffer(); F* ZBuffer = Z.Buffer(); const Int XLDim = X.LDim(); const Int ZLDim = Z.LDim(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = colShift + iLoc*commSize; for( Int j=0; j<width; ++j ) ZBuffer[i+j*ZLDim] = XBuffer[iLoc+j*XLDim]; } mpi::AllReduce( ZBuffer, ZLDim*width, mpi::SUM, X.Grid().VCComm() ); }
// Reduce across depth to get end result C void SumContributions ( mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& APartial, DistMatrix<double,MC,MR>& A ) { const int rank = mpi::CommRank( mpi::COMM_WORLD ); const Grid& meshGrid = APartial.Grid(); A.Empty(); A.AlignWith( APartial ); A.ResizeTo( APartial.Height(), APartial.Width() ); if( APartial.LocalHeight() != APartial.LocalLDim() ) throw std::logic_error ("APartial did not have matching local height/ldim"); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("A did not have matching local height/ldim"); const int dataSize = APartial.LocalHeight()*APartial.LocalWidth(); mpi::AllReduce ( APartial.LockedLocalBuffer(), A.LocalBuffer(), dataSize, mpi::SUM, depthComm ); }