inline typename Base<F>::type HermitianMaxNorm( UpperOrLower uplo, const DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianMaxNorm"); #endif typedef typename Base<F>::type R; if( A.Height() != A.Width() ) throw std::logic_error("Hermitian matrices must be square."); const int r = A.Grid().Height(); const int c = A.Grid().Width(); const int colShift = A.ColShift(); const int rowShift = A.RowShift(); R localMaxAbs = 0; const int localWidth = A.LocalWidth(); if( uplo == UPPER ) { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { int j = rowShift + jLocal*c; int numUpperRows = LocalLength(j+1,colShift,r); for( int iLocal=0; iLocal<numUpperRows; ++iLocal ) { const R thisAbs = Abs(A.GetLocal(iLocal,jLocal)); localMaxAbs = std::max( localMaxAbs, thisAbs ); } } } else { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { int j = rowShift + jLocal*c; int numStrictlyUpperRows = LocalLength(j,colShift,r); for( int iLocal=numStrictlyUpperRows; iLocal<A.LocalHeight(); ++iLocal ) { const R thisAbs = Abs(A.GetLocal(iLocal,jLocal)); localMaxAbs = std::max( localMaxAbs, thisAbs ); } } } R maxAbs; mpi::AllReduce( &localMaxAbs, &maxAbs, 1, mpi::MAX, A.Grid().VCComm() ); #ifndef RELEASE PopCallStack(); #endif return maxAbs; }
inline void DistMatrix<T,MD,STAR,Int>::LockedAttach ( Int height, Int width, Int colAlignmentVC, const T* buffer, Int ldim, const elem::Grid& grid ) { #ifndef RELEASE PushCallStack("[MD,* ]::LockedAttach"); #endif this->Empty(); this->grid_ = &grid; this->height_ = height; this->width_ = width; this->diagPath_ = grid.DiagPath(colAlignmentVC); this->colAlignment_ = grid.DiagPathRank(colAlignmentVC); this->viewing_ = true; this->lockedView_ = true; if( this->Participating() ) { this->colShift_ = Shift( grid.DiagPathRank(), this->colAlignment_, grid.LCM() ); const Int localHeight = LocalLength(height,this->colShift_,grid.LCM()); this->localMatrix_.LockedAttach( localHeight, width, buffer, ldim ); } else this->colShift_ = 0; #ifndef RELEASE PopCallStack(); #endif }
inline DistMatrix<T,MD,STAR,Int>::DistMatrix ( Int height, Int width, const elem::Grid& g ) : AbstractDistMatrix<T,Int> (height,width,false,false,0,0, (g.InGrid() && g.DiagPath()==0 ? g.DiagPathRank() : 0),0, (g.InGrid() && g.DiagPath()==0 ? LocalLength(height,g.DiagPathRank(),0,g.LCM()) : 0),width,g), diagPath_(0) { }
inline DistMatrix<T,MD,STAR,Int>::DistMatrix ( Int height, Int width, Int colAlignmentVC, T* buffer, Int ldim, const elem::Grid& g ) : AbstractDistMatrix<T,Int> (height,width,g.DiagPathRank(colAlignmentVC),0, (g.InGrid() && g.DiagPath()==g.DiagPath(colAlignmentVC) ? Shift(g.DiagPathRank(),g.DiagPathRank(colAlignmentVC),g.LCM()) : 0),0, (g.InGrid() && g.DiagPath()==g.DiagPath(colAlignmentVC) ? LocalLength(height,g.DiagPathRank(),g.DiagPathRank(colAlignmentVC),g.LCM()) : 0),width,buffer,ldim,g), diagPath_(g.DiagPath(colAlignmentVC)) { }
inline void DistMatrix<T,MD,STAR,Int>::ResizeTo( Int height, Int width ) { #ifndef RELEASE PushCallStack("[MD,* ]::ResizeTo"); this->AssertNotLockedView(); if( height < 0 || width < 0 ) throw std::logic_error("Height and width must be non-negative"); #endif this->height_ = height; this->width_ = width; if( this->Participating() ) this->localMatrix_.ResizeTo ( LocalLength(height,this->ColShift(),this->Grid().LCM()), width ); #ifndef RELEASE PopCallStack(); #endif }
inline typename Base<F>::type HermitianFrobeniusNorm ( UpperOrLower uplo, const DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianFrobeniusNorm"); #endif typedef typename Base<F>::type R; if( A.Height() != A.Width() ) throw std::logic_error("Hermitian matrices must be square."); const int r = A.Grid().Height(); const int c = A.Grid().Width(); const int colShift = A.ColShift(); const int rowShift = A.RowShift(); R localScale = 0; R localScaledSquare = 1; const int localWidth = A.LocalWidth(); if( uplo == UPPER ) { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { int j = rowShift + jLocal*c; int numUpperRows = LocalLength(j+1,colShift,r); for( int iLocal=0; iLocal<numUpperRows; ++iLocal ) { int i = colShift + iLocal*r; const R alphaAbs = Abs(A.GetLocal(iLocal,jLocal)); if( alphaAbs != 0 ) { if( alphaAbs <= localScale ) { const R relScale = alphaAbs/localScale; if( i != j ) localScaledSquare += 2*relScale*relScale; else localScaledSquare += relScale*relScale; } else { const R relScale = localScale/alphaAbs; if( i != j ) localScaledSquare = localScaledSquare*relScale*relScale + 2; else localScaledSquare = localScaledSquare*relScale*relScale + 1; localScale = alphaAbs; } } } } } else { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { int j = rowShift + jLocal*c; int numStrictlyUpperRows = LocalLength(j,colShift,r); for( int iLocal=numStrictlyUpperRows; iLocal<A.LocalHeight(); ++iLocal ) { int i = colShift + iLocal*r; const R alphaAbs = Abs(A.GetLocal(iLocal,jLocal)); if( alphaAbs != 0 ) { if( alphaAbs <= localScale ) { const R relScale = alphaAbs/localScale; if( i != j ) localScaledSquare += 2*relScale*relScale; else localScaledSquare += relScale*relScale; } else { const R relScale = localScale/alphaAbs; if( i != j ) localScaledSquare = localScaledSquare*relScale*relScale + 2; else localScaledSquare = localScaledSquare*relScale*relScale + 1; localScale = alphaAbs; } } } } } // Find the maximum relative scale R scale; mpi::AllReduce( &localScale, &scale, 1, mpi::MAX, A.Grid().VCComm() ); R norm = 0; if( scale != 0 ) { // Equilibrate our local scaled sum to the maximum scale R relScale = localScale/scale; localScaledSquare *= relScale*relScale; // The scaled square is now simply the sum of the local contributions R scaledSquare; mpi::AllReduce ( &localScaledSquare, &scaledSquare, 1, mpi::SUM, A.Grid().VCComm() ); norm = scale*Sqrt(scaledSquare); } #ifndef RELEASE PopCallStack(); #endif return norm; }
inline void Her ( UpperOrLower uplo, T alpha, const DistMatrix<T>& x, DistMatrix<T>& A ) { #ifndef RELEASE PushCallStack("Her"); if( A.Grid() != x.Grid() ) throw std::logic_error("{A,x} must be distributed over the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); if( A.Height() != xLength ) { std::ostringstream msg; msg << "A must conform with x: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); const int localHeight = A.LocalHeight(); const int localWidth = A.LocalWidth(); const int r = g.Height(); const int c = g.Width(); const int colShift = A.ColShift(); const int rowShift = A.RowShift(); if( x.Width() == 1 ) { DistMatrix<T,MC,STAR> x_MC_STAR(g); DistMatrix<T,MR,STAR> x_MR_STAR(g); x_MC_STAR.AlignWith( A ); x_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; x_MR_STAR = x_MC_STAR; const T* xLocal = x_MC_STAR.LockedLocalBuffer(); if( uplo == LOWER ) { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int j = rowShift + jLocal*c; const int heightAboveDiag = LocalLength(j,colShift,r); const T gamma = alpha*Conj(x_MR_STAR.GetLocal(jLocal,0)); T* ALocalCol = A.LocalBuffer(0,jLocal); for( int iLocal=heightAboveDiag; iLocal<localHeight; ++iLocal ) ALocalCol[iLocal] += gamma*xLocal[iLocal]; } } else { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int j = rowShift + jLocal*c; const int heightToDiag = LocalLength(j+1,colShift,r); const T gamma = alpha*Conj(x_MR_STAR.GetLocal(jLocal,0)); T* ALocalCol = A.LocalBuffer(0,jLocal); for( int iLocal=0; iLocal<heightToDiag; ++iLocal ) ALocalCol[iLocal] += gamma*xLocal[iLocal]; } } //--------------------------------------------------------------------// x_MC_STAR.FreeAlignments(); x_MR_STAR.FreeAlignments(); } else { DistMatrix<T,STAR,MC> x_STAR_MC(g); DistMatrix<T,STAR,MR> x_STAR_MR(g); x_STAR_MC.AlignWith( A ); x_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MR = x; x_STAR_MC = x_STAR_MR; const T* xLocal = x_STAR_MC.LockedLocalBuffer(); const int incx = x_STAR_MC.LocalLDim(); if( uplo == LOWER ) { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int j = rowShift + jLocal*c; const int heightAboveDiag = LocalLength(j,colShift,r); const T gamma = alpha*Conj(x_STAR_MR.GetLocal(0,jLocal)); T* ALocalCol = A.LocalBuffer(0,jLocal); for( int iLocal=heightAboveDiag; iLocal<localHeight; ++iLocal ) ALocalCol[iLocal] += gamma*xLocal[iLocal*incx]; } } else { for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int j = rowShift + jLocal*c; const int heightToDiag = LocalLength(j+1,colShift,r); const T gamma = alpha*Conj(x_STAR_MR.GetLocal(0,jLocal)); T* ALocalCol = A.LocalBuffer(0,jLocal); for( int iLocal=0; iLocal<heightToDiag; ++iLocal ) ALocalCol[iLocal] += gamma*xLocal[iLocal*incx]; } } //--------------------------------------------------------------------// x_STAR_MC.FreeAlignments(); x_STAR_MR.FreeAlignments(); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyColumnPivots ( DistMatrix<F>& A, const std::vector<int>& image, const std::vector<int>& preimage ) { const int b = image.size(); #ifndef RELEASE PushCallStack("ApplyColumnPivots"); if( A.Width() < b || b != preimage.size() ) throw std::logic_error ("image and preimage must be vectors of equal length that are not " "wider than A."); #endif const int localHeight = A.LocalHeight(); if( A.Height() == 0 || A.Width() == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } // Extract the relevant process grid information const Grid& g = A.Grid(); const int c = g.Width(); const int rowAlignment = A.RowAlignment(); const int rowShift = A.RowShift(); const int myCol = g.Col(); // Extract the send and recv counts from the image and preimage. // This process's sends may be logically partitioned into two sets: // (a) sends from rows [0,...,b-1] // (b) sends from rows [b,...] // The latter is analyzed with image, the former deduced with preimage. std::vector<int> sendCounts(c,0), recvCounts(c,0); for( int j=rowShift; j<b; j+=c ) { const int sendCol = preimage[j]; const int sendTo = (rowAlignment+sendCol) % c; sendCounts[sendTo] += localHeight; const int recvCol = image[j]; const int recvFrom = (rowAlignment+recvCol) % c; recvCounts[recvFrom] += localHeight; } for( int j=0; j<b; ++j ) { const int sendCol = preimage[j]; if( sendCol >= b ) { const int sendTo = (rowAlignment+sendCol) % c; if( sendTo == myCol ) { const int sendFrom = (rowAlignment+j) % c; recvCounts[sendFrom] += localHeight; } } const int recvCol = image[j]; if( recvCol >= b ) { const int recvFrom = (rowAlignment+recvCol) % c; if( recvFrom == myCol ) { const int recvTo = (rowAlignment+j) % c; sendCounts[recvTo] += localHeight; } } } // Construct the send and recv displacements from the counts std::vector<int> sendDispls(c), recvDispls(c); int totalSend=0, totalRecv=0; for( int i=0; i<c; ++i ) { sendDispls[i] = totalSend; recvDispls[i] = totalRecv; totalSend += sendCounts[i]; totalRecv += recvCounts[i]; } #ifndef RELEASE if( totalSend != totalRecv ) { std::ostringstream msg; msg << "Send and recv counts do not match: (send,recv)=" << totalSend << "," << totalRecv; throw std::logic_error( msg.str().c_str() ); } #endif // Fill vectors with the send data std::vector<F> sendData(std::max(1,totalSend)); std::vector<int> offsets(c,0); const int localWidth = LocalLength( b, rowShift, c ); for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int sendCol = preimage[rowShift+jLocal*c]; const int sendTo = (rowAlignment+sendCol) % c; const int offset = sendDispls[sendTo]+offsets[sendTo]; MemCopy( &sendData[offset], A.LocalBuffer(0,jLocal), localHeight ); offsets[sendTo] += localHeight; } for( int j=0; j<b; ++j ) { const int recvCol = image[j]; if( recvCol >= b ) { const int recvFrom = (rowAlignment+recvCol) % c; if( recvFrom == myCol ) { const int recvTo = (rowAlignment+j) % c; const int jLocal = (recvCol-rowShift) / c; const int offset = sendDispls[recvTo]+offsets[recvTo]; MemCopy ( &sendData[offset], A.LocalBuffer(0,jLocal), localHeight ); offsets[recvTo] += localHeight; } } } // Communicate all pivot rows std::vector<F> recvData(std::max(1,totalRecv)); mpi::AllToAll ( &sendData[0], &sendCounts[0], &sendDispls[0], &recvData[0], &recvCounts[0], &recvDispls[0], g.RowComm() ); // Unpack the recv data for( int k=0; k<c; ++k ) { offsets[k] = 0; int thisRowShift = Shift( k, rowAlignment, c ); for( int j=thisRowShift; j<b; j+=c ) { const int sendCol = preimage[j]; const int sendTo = (rowAlignment+sendCol) % c; if( sendTo == myCol ) { const int offset = recvDispls[k]+offsets[k]; const int jLocal = (sendCol-rowShift) / c; MemCopy ( A.LocalBuffer(0,jLocal), &recvData[offset], localHeight ); offsets[k] += localHeight; } } } for( int j=0; j<b; ++j ) { const int recvCol = image[j]; if( recvCol >= b ) { const int recvTo = (rowAlignment+j) % c; if( recvTo == myCol ) { const int recvFrom = (rowAlignment+recvCol) % c; const int jLocal = (j-rowShift) / c; const int offset = recvDispls[recvFrom]+offsets[recvFrom]; MemCopy ( A.LocalBuffer(0,jLocal), &recvData[offset], localHeight ); offsets[recvFrom] += localHeight; } } } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyRowPivots ( DistMatrix<F>& A, const std::vector<int>& image, const std::vector<int>& preimage ) { const int b = image.size(); #ifndef RELEASE PushCallStack("ApplyRowPivots"); if( A.Height() < b || b != (int)preimage.size() ) throw std::logic_error ("image and preimage must be vectors of equal length that are not " "taller than A."); #endif const int localWidth = A.LocalWidth(); if( A.Height() == 0 || A.Width() == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } // Extract the relevant process grid information const Grid& g = A.Grid(); const int r = g.Height(); const int colAlignment = A.ColAlignment(); const int colShift = A.ColShift(); const int myRow = g.Row(); // Extract the send and recv counts from the image and preimage. // This process's sends may be logically partitioned into two sets: // (a) sends from rows [0,...,b-1] // (b) sends from rows [b,...] // The latter is analyzed with image, the former deduced with preimage. std::vector<int> sendCounts(r,0), recvCounts(r,0); for( int i=colShift; i<b; i+=r ) { const int sendRow = preimage[i]; const int sendTo = (colAlignment+sendRow) % r; sendCounts[sendTo] += localWidth; const int recvRow = image[i]; const int recvFrom = (colAlignment+recvRow) % r; recvCounts[recvFrom] += localWidth; } for( int i=0; i<b; ++i ) { const int sendRow = preimage[i]; if( sendRow >= b ) { const int sendTo = (colAlignment+sendRow) % r; if( sendTo == myRow ) { const int sendFrom = (colAlignment+i) % r; recvCounts[sendFrom] += localWidth; } } const int recvRow = image[i]; if( recvRow >= b ) { const int recvFrom = (colAlignment+recvRow) % r; if( recvFrom == myRow ) { const int recvTo = (colAlignment+i) % r; sendCounts[recvTo] += localWidth; } } } // Construct the send and recv displacements from the counts std::vector<int> sendDispls(r), recvDispls(r); int totalSend=0, totalRecv=0; for( int i=0; i<r; ++i ) { sendDispls[i] = totalSend; recvDispls[i] = totalRecv; totalSend += sendCounts[i]; totalRecv += recvCounts[i]; } #ifndef RELEASE if( totalSend != totalRecv ) { std::ostringstream msg; msg << "Send and recv counts do not match: (send,recv)=" << totalSend << "," << totalRecv; throw std::logic_error( msg.str().c_str() ); } #endif // Fill vectors with the send data const int ALDim = A.LocalLDim(); std::vector<F> sendData(std::max(1,totalSend)); std::vector<int> offsets(r,0); const int localHeight = LocalLength( b, colShift, r ); for( int iLocal=0; iLocal<localHeight; ++iLocal ) { const int sendRow = preimage[colShift+iLocal*r]; const int sendTo = (colAlignment+sendRow) % r; const int offset = sendDispls[sendTo]+offsets[sendTo]; const F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) sendData[offset+jLocal] = ABuffer[jLocal*ALDim]; offsets[sendTo] += localWidth; } for( int i=0; i<b; ++i ) { const int recvRow = image[i]; if( recvRow >= b ) { const int recvFrom = (colAlignment+recvRow) % r; if( recvFrom == myRow ) { const int recvTo = (colAlignment+i) % r; const int iLocal = (recvRow-colShift) / r; const int offset = sendDispls[recvTo]+offsets[recvTo]; const F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) sendData[offset+jLocal] = ABuffer[jLocal*ALDim]; offsets[recvTo] += localWidth; } } } // Communicate all pivot rows std::vector<F> recvData(std::max(1,totalRecv)); mpi::AllToAll ( &sendData[0], &sendCounts[0], &sendDispls[0], &recvData[0], &recvCounts[0], &recvDispls[0], g.ColComm() ); // Unpack the recv data for( int k=0; k<r; ++k ) { offsets[k] = 0; int thisColShift = Shift( k, colAlignment, r ); for( int i=thisColShift; i<b; i+=r ) { const int sendRow = preimage[i]; const int sendTo = (colAlignment+sendRow) % r; if( sendTo == myRow ) { const int offset = recvDispls[k]+offsets[k]; const int iLocal = (sendRow-colShift) / r; F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) ABuffer[jLocal*ALDim] = recvData[offset+jLocal]; offsets[k] += localWidth; } } } for( int i=0; i<b; ++i ) { const int recvRow = image[i]; if( recvRow >= b ) { const int recvTo = (colAlignment+i) % r; if( recvTo == myRow ) { const int recvFrom = (colAlignment+recvRow) % r; const int iLocal = (i-colShift) / r; const int offset = recvDispls[recvFrom]+offsets[recvFrom]; F* ABuffer = A.LocalBuffer(iLocal,0); for( int jLocal=0; jLocal<localWidth; ++jLocal ) ABuffer[jLocal*ALDim] = recvData[offset+jLocal]; offsets[recvFrom] += localWidth; } } } #ifndef RELEASE PopCallStack(); #endif }