void EntrywiseMap ( const ElementalMatrix<S>& A, ElementalMatrix<T>& B, function<T(S)> func ) { if( A.DistData().colDist == B.DistData().colDist && A.DistData().rowDist == B.DistData().rowDist ) { B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); EntrywiseMap( A.LockedMatrix(), B.Matrix(), func ); } else { B.Resize( A.Height(), A.Width() ); #define GUARD(CDIST,RDIST) \ B.DistData().colDist == CDIST && B.DistData().rowDist == RDIST #define PAYLOAD(CDIST,RDIST) \ DistMatrix<S,CDIST,RDIST> AProx(B.Grid()); \ AProx.AlignWith( B.DistData() ); \ Copy( A, AProx ); \ EntrywiseMap( AProx.Matrix(), B.Matrix(), func ); #include <El/macros/GuardAndPayload.h> #undef GUARD #undef PAYLOAD } }
void IndexDependentMap ( const ElementalMatrix<S>& A, ElementalMatrix<T>& B, function<T(Int,Int,S)> func ) { DEBUG_CSE const Int mLoc = A.LocalHeight(); const Int nLoc = A.LocalWidth(); B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) { const Int i = A.GlobalRow(iLoc); BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc)); } } }
void Gather ( const ElementalMatrix<T>& A, DistMatrix<T,CIRC,CIRC>& B ) { DEBUG_ONLY(CSE cse("copy::Gather")) AssertSameGrids( A, B ); if( A.DistSize() == 1 && A.CrossSize() == 1 ) { B.Resize( A.Height(), A.Width() ); if( B.CrossRank() == B.Root() ) Copy( A.LockedMatrix(), B.Matrix() ); return; } const Int height = A.Height(); const Int width = A.Width(); B.SetGrid( A.Grid() ); B.Resize( height, width ); // Gather the colShifts and rowShifts // ================================== Int myShifts[2]; myShifts[0] = A.ColShift(); myShifts[1] = A.RowShift(); vector<Int> shifts; const Int crossSize = B.CrossSize(); if( B.CrossRank() == B.Root() ) shifts.resize( 2*crossSize ); mpi::Gather( myShifts, 2, shifts.data(), 2, B.Root(), B.CrossComm() ); // Gather the payload data // ======================= const bool irrelevant = ( A.RedundantRank()!=0 || A.CrossRank()!=A.Root() ); int totalSend = ( irrelevant ? 0 : A.LocalHeight()*A.LocalWidth() ); vector<int> recvCounts, recvOffsets; if( B.CrossRank() == B.Root() ) recvCounts.resize( crossSize ); mpi::Gather( &totalSend, 1, recvCounts.data(), 1, B.Root(), B.CrossComm() ); int totalRecv = Scan( recvCounts, recvOffsets ); //vector<T> sendBuf(totalSend), recvBuf(totalRecv); vector<T> sendBuf, recvBuf; sendBuf.reserve( totalSend ); recvBuf.reserve( totalRecv ); if( !irrelevant ) copy::util::InterleaveMatrix ( A.LocalHeight(), A.LocalWidth(), A.LockedBuffer(), 1, A.LDim(), sendBuf.data(), 1, A.LocalHeight() ); mpi::Gather ( sendBuf.data(), totalSend, recvBuf.data(), recvCounts.data(), recvOffsets.data(), B.Root(), B.CrossComm() ); // Unpack // ====== if( B.Root() == B.CrossRank() ) { for( Int q=0; q<crossSize; ++q ) { if( recvCounts[q] == 0 ) continue; const Int colShift = shifts[2*q+0]; const Int rowShift = shifts[2*q+1]; const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int localHeight = Length( height, colShift, colStride ); const Int localWidth = Length( width, rowShift, rowStride ); copy::util::InterleaveMatrix ( localHeight, localWidth, &recvBuf[recvOffsets[q]], 1, localHeight, B.Buffer(colShift,rowShift), colStride, rowStride*B.LDim() ); } } }