void PartialColAllGather ( const DistMatrix<T, U, V>& A, DistMatrix<T,Partial<U>(),V>& B ) { EL_DEBUG_CSE AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); #ifdef EL_VECTOR_WARNINGS if( width == 1 && A.Grid().Rank() == 0 ) { cerr << "The vector version of PartialColAllGather is not yet written but " "would only require modifying the vector version of " "PartialRowAllGather" << endl; } #endif #ifdef EL_CACHE_WARNINGS if( width && A.Grid().Rank() == 0 ) { cerr << "PartialColAllGather potentially causes a large amount of cache-" "thrashing. If possible, avoid it by performing the redistribution" "on the (conjugate-)transpose" << endl; } #endif B.AlignColsAndResize ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false ); if( !A.Participating() ) return; EL_DEBUG_ONLY( if( A.LocalWidth() != A.Width() ) LogicError("This routine assumes rows are not distributed"); )
void ColAllToAllPromote ( const DistMatrix<T, U, V >& A, DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B ) { DEBUG_CSE AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.AlignColsAndResize ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false ); if( !B.Participating() ) return; const Int colStride = A.ColStride(); const Int colStridePart = A.PartialColStride(); const Int colStrideUnion = A.PartialUnionColStride(); const Int colRankPart = A.PartialColRank(); const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart); const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,colStrideUnion); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); if( colDiff == 0 ) { if( A.PartialUnionColStride() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else { vector<T> buffer; FastResize( buffer, 2*colStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[colStrideUnion*portionSize]; // Pack util::RowStridedPack ( A.LocalHeight(), width, B.RowAlign(), colStrideUnion, A.LockedBuffer(), A.LDim(), firstBuf, portionSize ); // Simultaneously Gather in columns and Scatter in rows mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, A.PartialUnionColComm() ); // Unpack util::PartialColStridedUnpack ( height, B.LocalWidth(), A.ColAlign(), colStride, colStrideUnion, colStridePart, colRankPart, B.ColShift(), secondBuf, portionSize, B.Buffer(), B.LDim() ); } } else { #ifdef EL_UNALIGNED_WARNINGS if( A.Grid().Rank() == 0 ) cerr << "Unaligned PartialColAllToAllPromote" << endl; #endif const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart ); const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart ); vector<T> buffer; FastResize( buffer, 2*colStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[colStrideUnion*portionSize]; // Pack util::RowStridedPack ( A.LocalHeight(), width, B.RowAlign(), colStrideUnion, A.LockedBuffer(), A.LDim(), secondBuf, portionSize ); // Realign the input mpi::SendRecv ( secondBuf, colStrideUnion*portionSize, sendColRankPart, firstBuf, colStrideUnion*portionSize, recvColRankPart, A.PartialColComm() ); // Simultaneously Scatter in columns and Gather in rows mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, A.PartialUnionColComm() ); // Unpack util::PartialColStridedUnpack ( height, B.LocalWidth(), A.ColAlign(), colStride, colStrideUnion, colStridePart, recvColRankPart, B.ColShift(), secondBuf, portionSize, B.Buffer(), B.LDim() ); } }
void ColAllToAllDemote ( const DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& A, DistMatrix<T, U, V >& B ) { DEBUG_ONLY(CallStackEntry cse("copy::ColAllToAllDemote")) AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.AlignColsAndResize( A.ColAlign(), height, width, false, false ); if( !B.Participating() ) return; const Int colAlign = B.ColAlign(); const Int rowAlignA = A.RowAlign(); const Int colStride = B.ColStride(); const Int colStridePart = B.PartialColStride(); const Int colStrideUnion = B.PartialUnionColStride(); const Int colRankPart = B.PartialColRank(); const Int colDiff = (colAlign%colStridePart) - A.ColAlign(); const Int colShiftA = A.ColShift(); const Int localHeightB = B.LocalHeight(); const Int localWidthA = A.LocalWidth(); const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,colStrideUnion); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); std::vector<T> buffer( 2*colStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[colStrideUnion*portionSize]; if( colDiff == 0 ) { // Pack util::PartialColStridedPack ( height, localWidthA, colAlign, colStride, colStrideUnion, colStridePart, colRankPart, colShiftA, A.LockedBuffer(), A.LDim(), firstBuf, portionSize ); // Simultaneously Scatter in columns and Gather in rows mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, B.PartialUnionColComm() ); // Unpack util::RowStridedUnpack ( localHeightB, width, rowAlignA, colStrideUnion, secondBuf, portionSize, B.Buffer(), B.LDim() ); } else { #ifdef EL_UNALIGNED_WARNINGS if( B.Grid().Rank() == 0 ) std::cerr << "Unaligned ColAllToAllDemote" << std::endl; #endif const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart ); const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart ); // Pack util::PartialColStridedPack ( height, localWidthA, colAlign, colStride, colStrideUnion, colStridePart, sendColRankPart, colShiftA, A.LockedBuffer(), A.LDim(), secondBuf, portionSize ); // Simultaneously Scatter in columns and Gather in rows mpi::AllToAll ( secondBuf, portionSize, firstBuf, portionSize, B.PartialUnionColComm() ); // Realign the result mpi::SendRecv ( firstBuf, colStrideUnion*portionSize, sendColRankPart, secondBuf, colStrideUnion*portionSize, recvColRankPart, B.PartialColComm() ); // Unpack util::RowStridedUnpack ( localHeightB, width, rowAlignA, colStrideUnion, secondBuf, portionSize, B.Buffer(), B.LDim() ); } }