inline void Binary( AbstractDistMatrix<T>& A, const string filename ) { EL_DEBUG_CSE std::ifstream file( filename.c_str(), std::ios::binary ); if( !file.is_open() ) RuntimeError("Could not open ",filename); Int height, width; file.read( (char*)&height, sizeof(Int) ); file.read( (char*)&width, sizeof(Int) ); const Int numBytes = FileSize( file ); const Int metaBytes = 2*sizeof(Int); const Int dataBytes = height*width*sizeof(T); const Int numBytesExp = metaBytes + dataBytes; if( numBytes != numBytesExp ) RuntimeError ("Expected file to be ",numBytesExp," bytes but found ",numBytes); A.Resize( height, width ); if( A.CrossRank() != A.Root() ) return; if( A.ColStride() == 1 && A.RowStride() == 1 ) { if( A.Height() == A.LDim() ) file.read( (char*)A.Buffer(), height*width*sizeof(T) ); else for( Int j=0; j<width; ++j ) file.read( (char*)A.Buffer(0,j), height*sizeof(T) ); } else if( A.ColStride() == 1 ) { const Int localWidth = A.LocalWidth(); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = A.GlobalCol(jLoc); const Int localIndex = j*height; const std::streamoff pos = metaBytes + localIndex*sizeof(T); file.seekg( pos ); file.read( (char*)A.Buffer(0,jLoc), height*sizeof(T) ); } } else { const Int localHeight = A.LocalHeight(); const Int localWidth = A.LocalWidth(); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = A.GlobalRow(iLoc); const Int localIndex = i+j*height; const std::streamoff pos = metaBytes + localIndex*sizeof(T); file.seekg( pos ); file.read( (char*)A.Buffer(iLoc,jLoc), sizeof(T) ); } } } }
void Transform2x2Rows ( const Matrix<T>& G, AbstractDistMatrix<T>& A, Int i1, Int i2 ) { DEBUG_CSE const int rowOwner1 = A.RowOwner(i1); const int rowOwner2 = A.RowOwner(i2); const bool inFirstRow = ( A.ColRank() == rowOwner1 ); const bool inSecondRow = ( A.ColRank() == rowOwner2 ); if( !inFirstRow && !inSecondRow ) return; T* ABuf = A.Buffer(); const Int ALDim = A.LDim(); const Int nLoc = A.LocalWidth(); const T gamma11 = G(0,0); const T gamma12 = G(0,1); const T gamma21 = G(1,0); const T gamma22 = G(1,1); if( inFirstRow && inSecondRow ) { const Int i1Loc = A.LocalRow(i1); const Int i2Loc = A.LocalRow(i2); Transform2x2 ( nLoc, gamma11, gamma12, gamma21, gamma22, &ABuf[i1Loc], ALDim, &ABuf[i2Loc], ALDim ); } else if( inFirstRow ) { const Int i1Loc = A.LocalRow(i1); vector<T> buf(nLoc); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) buf[jLoc] = ABuf[i1Loc+jLoc*ALDim]; mpi::SendRecv( buf.data(), nLoc, rowOwner2, rowOwner2, A.ColComm() ); // TODO: Generalized Axpy? blas::Scal( nLoc, gamma11, &ABuf[i1Loc], ALDim ); blas::Axpy( nLoc, gamma12, buf.data(), 1, &ABuf[i1Loc], ALDim ); } else { const Int i2Loc = A.LocalRow(i2); vector<T> buf(nLoc); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) buf[jLoc] = ABuf[i2Loc+jLoc*ALDim]; mpi::SendRecv( buf.data(), nLoc, rowOwner1, rowOwner1, A.ColComm() ); // TODO: Generalized Axpy? blas::Scal( nLoc, gamma22, &ABuf[i2Loc], ALDim ); blas::Axpy( nLoc, gamma21, buf.data(), 1, &ABuf[i2Loc], ALDim ); } }
void Broadcast( AbstractDistMatrix<T>& A, mpi::Comm comm, int rank ) { DEBUG_CSE const int commSize = mpi::Size( comm ); const int commRank = mpi::Rank( comm ); if( commSize == 1 ) return; if( !A.Participating() ) return; const Int localHeight = A.LocalHeight(); const Int localWidth = A.LocalWidth(); const Int localSize = localHeight*localWidth; if( localHeight == A.LDim() ) { mpi::Broadcast( A.Buffer(), localSize, rank, comm ); } else { vector<T> buf; FastResize( buf, localSize ); // Pack if( commRank == rank ) copy::util::InterleaveMatrix ( localHeight, localWidth, A.LockedBuffer(), 1, A.LDim(), buf.data(), 1, localHeight ); mpi::Broadcast( buf.data(), localSize, rank, comm ); // Unpack if( commRank != rank ) copy::util::InterleaveMatrix ( localHeight, localWidth, buf.data(), 1, localHeight, A.Buffer(), 1, A.LDim() ); } }
void ShiftDiagonal( AbstractDistMatrix<T>& A, S alpha, Int offset ) { EL_DEBUG_CSE const Int height = A.Height(); const Int localWidth = A.LocalWidth(); T* ABuf = A.Buffer(); const Int ALDim = A.LDim(); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = A.GlobalCol(jLoc); const Int i = j-offset; if( i >= 0 && i < height && A.IsLocalRow(i) ) { const Int iLoc = A.LocalRow(i); ABuf[iLoc+jLoc*ALDim] += alpha; } } }
void Transform2x2Cols ( const Matrix<T>& G, AbstractDistMatrix<T>& A, Int j1, Int j2 ) { DEBUG_CSE const int colOwner1 = A.ColOwner(j1); const int colOwner2 = A.ColOwner(j2); const bool inFirstCol = ( A.RowRank() == colOwner1 ); const bool inSecondCol = ( A.RowRank() == colOwner2 ); if( !inFirstCol && !inSecondCol ) return; T* ABuf = A.Buffer(); const Int ALDim = A.LDim(); const Int mLoc = A.LocalHeight(); vector<T> buf(mLoc); const T gamma11 = G(0,0); const T gamma12 = G(0,1); const T gamma21 = G(1,0); const T gamma22 = G(1,1); if( inFirstCol && inSecondCol ) { const Int j1Loc = A.LocalCol(j1); const Int j2Loc = A.LocalCol(j2); // Since the scalar version of Transform2x2 assumes that a1 and a2 are // row vectors, we implicitly transpose G on input to it so that we can // apply [a1, a2] G via G^T [a1^T; a2^T]. Transform2x2 ( mLoc, gamma11, gamma21, gamma12, gamma22, &ABuf[j1Loc*ALDim], 1, &ABuf[j2Loc*ALDim], 1 ); } else if( inFirstCol ) { const Int j1Loc = A.LocalCol(j1); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) buf[iLoc] = ABuf[iLoc+j1Loc*ALDim]; mpi::SendRecv( buf.data(), mLoc, colOwner2, colOwner2, A.RowComm() ); // TODO: Generalized Axpy? blas::Scal( mLoc, gamma11, &ABuf[j1Loc*ALDim], 1 ); blas::Axpy( mLoc, gamma21, buf.data(), 1, &ABuf[j1Loc*ALDim], 1 ); } else { const Int j2Loc = A.LocalCol(j2); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) buf[iLoc] = ABuf[iLoc+j2Loc*ALDim]; mpi::SendRecv( buf.data(), mLoc, colOwner1, colOwner1, A.RowComm() ); // TODO: Generalized Axpy? blas::Scal( mLoc, gamma22, &ABuf[j2Loc*ALDim], 1 ); blas::Axpy( mLoc, gamma12, buf.data(), 1, &ABuf[j2Loc*ALDim], 1 ); } }