void PartialRowScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::PartialRowScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("Matrix sizes did not match"); if( !B.Participating() ) return; if( B.RowAlign() % A.RowStride() == A.RowAlign() ) { const Int rowStride = B.RowStride(); const Int rowStridePart = B.PartialRowStride(); const Int rowStrideUnion = B.PartialUnionRowStride(); const Int rowRankPart = B.PartialRowRank(); const Int height = B.Height(); const Int width = B.Width(); const Int maxLocalWidth = MaxLength( width, rowStride ); const Int recvSize = mpi::Pad( height*maxLocalWidth ); const Int sendSize = rowStrideUnion*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::PartialRowStridedPack ( height, width, B.RowAlign(), rowStride, rowStrideUnion, rowStridePart, rowRankPart, A.RowShift(), A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionRowComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, height, B.LocalWidth(), buffer.data(), 1, height, B.Buffer(), 1, B.LDim() ); } else LogicError("Unaligned PartialRowScatter not implemented"); }