void RowMaxNorms ( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms ) { DEBUG_CSE norms.AlignWith( A ); norms.Resize( A.Height(), 1 ); RowMaxNorms( A.LockedMatrix(), norms.Matrix() ); AllReduce( norms, A.RowComm(), mpi::MAX ); }
void RowTwoNorms ( const DistMatrix<F,U,V>& A, DistMatrix<Base<F>,U,STAR>& norms ) { DEBUG_CSE norms.AlignWith( A ); norms.Resize( A.Height(), 1 ); if( A.Width() == 0 ) { Zero( norms ); return; } RowTwoNormsHelper( A.LockedMatrix(), norms.Matrix(), A.RowComm() ); }
void TransformColumns ( const Matrix<F>& Z, DistMatrix<F,MC,MR,BLOCK>& H ) { DEBUG_CSE const Int width = H.Width(); const Grid& grid = H.Grid(); const Int blockWidth = H.BlockWidth(); const Int firstBlockWidth = blockWidth - H.RowCut(); if( width <= firstBlockWidth || grid.Width() == 1 ) { if( grid.Col() == H.ColOwner(0) ) { // This process row can locally update its portion of H Matrix<F> HLocCopy( H.Matrix() ); Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H.Matrix() ); } } else if( width <= firstBlockWidth + blockWidth ) { const bool firstCol = H.ColOwner( 0 ); const bool secondCol = H.ColOwner( firstBlockWidth ); if( grid.Col() == firstCol ) { // // Replace H with // // | HLeft, HRight | | ZLeft, ZRight |, // // where HLeft is owned by this process column and HRight by the // next. // auto ZLeft = Z( ALL, IR(0,firstBlockWidth) ); // Partition space for the combined matrix Matrix<F> HCombine( H.LocalHeight(), width ); auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) ); auto HRight = HCombine( ALL, IR(firstBlockWidth,END) ); // Copy our portion into the combined matrix HLeft = H.LockedMatrix(); // Exchange the data El::SendRecv( HLeft, HRight, H.RowComm(), secondCol, secondCol ); // Form our portion of the result Gemm( NORMAL, NORMAL, F(1), HCombine, ZLeft, H.Matrix() ); } else if( grid.Col() == secondCol ) { // // Replace H with // // | HLeft, HRight | | ZLeft, ZRight |, // // where HLeft is owned by the previous process column and HRight // by this one. // auto ZRight = Z( ALL, IR(firstBlockWidth,END) ); // Partition space for the combined matrix Matrix<F> HCombine( H.LocalHeight(), width ); auto HLeft = HCombine( ALL, IR(0,firstBlockWidth) ); auto HRight = HCombine( ALL, IR(firstBlockWidth,END) ); // Copy our portion into the combined matrix HRight = H.LockedMatrix(); // Exchange the data El::SendRecv( HRight, HLeft, H.RowComm(), firstCol, firstCol ); // Form our portion of the result Gemm( NORMAL, NORMAL, F(1), HCombine, ZRight, H.Matrix() ); } } else { // Fall back to the entire process column interacting. // TODO(poulson): Only form the subset of the result that we need. DistMatrix<F,MC,STAR,BLOCK> H_MC_STAR( H ); Matrix<F> HLocCopy( H_MC_STAR.Matrix() ); Gemm( NORMAL, NORMAL, F(1), HLocCopy, Z, H_MC_STAR.Matrix() ); H = H_MC_STAR; } }
void TransposeDist( const DistMatrix<T,U,V>& A, DistMatrix<T,V,U>& B ) { DEBUG_ONLY(CSE cse("copy::TransposeDist")) AssertSameGrids( A, B ); const Grid& g = B.Grid(); B.Resize( A.Height(), A.Width() ); if( !B.Participating() ) return; const Int colStrideA = A.ColStride(); const Int rowStrideA = A.RowStride(); const Int distSize = A.DistSize(); if( A.DistSize() == 1 && B.DistSize() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else if( A.Width() == 1 ) { const Int height = A.Height(); const Int maxLocalHeight = MaxLength(height,distSize); const Int portionSize = mpi::Pad( maxLocalHeight ); const Int colDiff = Shift(A.DistRank(),A.ColAlign(),distSize) - Shift(B.DistRank(),B.ColAlign(),distSize); const Int sendRankB = Mod( B.DistRank()+colDiff, distSize ); const Int recvRankA = Mod( A.DistRank()-colDiff, distSize ); const Int recvRankB = (recvRankA/colStrideA)+rowStrideA*(recvRankA%colStrideA); vector<T> buffer; FastResize( buffer, (colStrideA+rowStrideA)*portionSize ); T* sendBuf = &buffer[0]; T* recvBuf = &buffer[colStrideA*portionSize]; if( A.RowRank() == A.RowAlign() ) { // Pack // TODO: Use kernel from copy::util const Int AColShift = A.ColShift(); const T* ABuf = A.LockedBuffer(); EL_PARALLEL_FOR for( Int k=0; k<rowStrideA; ++k ) { T* data = &recvBuf[k*portionSize]; const Int shift = Shift_(A.ColRank()+colStrideA*k,A.ColAlign(),distSize); const Int offset = (shift-AColShift) / colStrideA; const Int thisLocalHeight = Length_(height,shift,distSize); for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc ) data[iLoc] = ABuf[offset+iLoc*rowStrideA]; } } // (e.g., A[VC,STAR] <- A[MC,MR]) mpi::Scatter ( recvBuf, portionSize, sendBuf, portionSize, A.RowAlign(), A.RowComm() ); // (e.g., A[VR,STAR] <- A[VC,STAR]) mpi::SendRecv ( sendBuf, portionSize, sendRankB, recvBuf, portionSize, recvRankB, B.DistComm() ); // (e.g., A[MR,MC] <- A[VR,STAR]) mpi::Gather ( recvBuf, portionSize, sendBuf, portionSize, B.RowAlign(), B.RowComm() ); if( B.RowRank() == B.RowAlign() ) { // Unpack // TODO: Use kernel from copy::util T* bufB = B.Buffer(); EL_PARALLEL_FOR for( Int k=0; k<colStrideA; ++k ) { const T* data = &sendBuf[k*portionSize]; const Int shift = Shift_(B.ColRank()+rowStrideA*k,B.ColAlign(),distSize); const Int offset = (shift-B.ColShift()) / rowStrideA; const Int thisLocalHeight = Length_(height,shift,distSize); for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc ) bufB[offset+iLoc*colStrideA] = data[iLoc]; } } }
void TransformColumns ( const Matrix<F>& V, DistMatrix<F,MC,MR,BLOCK>& A ) { DEBUG_CSE const Int width = A.Width(); const Grid& grid = A.Grid(); const Int blockWidth = A.BlockWidth(); const Int firstBlockWidth = blockWidth - A.RowCut(); if( width <= firstBlockWidth || grid.Width() == 1 ) { if( grid.Col() == A.ColOwner(0) ) { // This process row can locally update its portion of A TransformColumns( V, A.Matrix() ); } } else if( width <= firstBlockWidth + blockWidth ) { const int firstCol = A.ColOwner( 0 ); const int secondCol = A.ColOwner( firstBlockWidth ); if( grid.Col() == firstCol ) { // // Replace A with // // | ALeft, ARight | | VLeft, VRight |, // // where ALeft is owned by this process column and ARight by the // next. // // Partition space for the combined matrix Matrix<F> ACombine( A.LocalHeight(), width ); auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) ); auto ARight = ACombine( ALL, IR(firstBlockWidth,END) ); // Copy our portion into the combined matrix ALeft = A.LockedMatrix(); // Exchange the data El::SendRecv( ALeft, ARight, A.RowComm(), secondCol, secondCol ); // Form our portion of the result auto VLeft = V( ALL, IR(0,firstBlockWidth) ); Gemm( NORMAL, NORMAL, F(1), ACombine, VLeft, A.Matrix() ); } else if( grid.Col() == secondCol ) { // // Replace A with // // | ALeft, ARight | | VLeft, VRight |, // // where ALeft is owned by the previous process column and ARight // by this one. // // Partition space for the combined matrix Matrix<F> ACombine( A.LocalHeight(), width ); auto ALeft = ACombine( ALL, IR(0,firstBlockWidth) ); auto ARight = ACombine( ALL, IR(firstBlockWidth,END) ); // Copy our portion into the combined matrix ARight = A.LockedMatrix(); // Exchange the data El::SendRecv( ARight, ALeft, A.RowComm(), firstCol, firstCol ); // Form our portion of the result auto VRight = V( ALL, IR(firstBlockWidth,END) ); Gemm( NORMAL, NORMAL, F(1), ACombine, VRight, A.Matrix() ); } } else { // Fall back to the entire process column interacting. // TODO(poulson): Only form the subset of the result that we need. DistMatrix<F,MC,STAR,BLOCK> A_MC_STAR( A ); Matrix<F> ALocCopy( A_MC_STAR.Matrix() ); Gemm( NORMAL, NORMAL, F(1), ALocCopy, V, A_MC_STAR.Matrix() ); A = A_MC_STAR; } }