// Change the graph size // --------------------- // TODO: Replace Empty/SoftEmpty in favor of this approach void DistGraph::Empty( bool freeMemory ) { numSources_ = 0; numTargets_ = 0; numLocalSources_ = 0; blocksize_ = 1; locallyConsistent_ = true; frozenSparsity_ = false; if( freeMemory ) { SwapClear( sources_ ); SwapClear( targets_ ); SwapClear( localSourceOffsets_ ); } else { sources_.resize( 0 ); targets_.resize( 0 ); } localSourceOffsets_.resize( 1 ); localSourceOffsets_[0] = 0; SwapClear( remoteSources_ ); SwapClear( remoteTargets_ ); }
void DistSparseMatrix<T>::Empty( bool freeMemory ) { distGraph_.Empty( freeMemory ); if( freeMemory ) SwapClear( vals_ ); else vals_.resize( 0 ); distGraph_.multMeta.Clear(); SwapClear( remoteVals_ ); }
void DistGraph::Resize( Int numSources, Int numTargets ) { if( numSources_ == numSources && numTargets == numTargets_ ) return; frozenSparsity_ = false; numSources_ = numSources; numTargets_ = numTargets; InitializeLocalData(); SwapClear( remoteSources_ ); SwapClear( remoteTargets_ ); }
void DistSparseMatrix<Ring>::Resize( Int height, Int width ) { EL_DEBUG_CSE distGraph_.Resize( height, width ); vals_.resize( 0 ); SwapClear( remoteVals_ ); }
void DistSparseMatrix<T>::Resize( Int height, Int width ) { distGraph_.Resize( height, width ); vals_.resize( 0 ); SwapClear( remoteVals_ ); }
void MatrixNode<T>::Pull ( const vector<Int>& invMap, const NodeInfo& info, const Matrix<T>& X ) { EL_DEBUG_CSE const Int width = X.Width(); matrix.Resize( info.size, width ); for( Int t=0; t<info.size; ++t ) { const Int i = invMap[info.off+t]; for( Int j=0; j<width; ++j ) matrix(t,j) = X(i,j); } // Clean up any pre-existing children if not the right amount const Int numChildren = info.children.size(); if( children.size() != info.children.size() ) { SwapClear( children ); children.resize( numChildren ); for( Int c=0; c<numChildren; ++c ) children[c].reset( new MatrixNode<T>(this) ); } for( Int c=0; c<numChildren; ++c ) children[c]->Pull( invMap, *info.children[c], X ); }
void AbstractDistMatrix<T>::EmptyData( bool freeMemory ) { matrix_.Empty_( freeMemory ); viewType_ = OWNER; height_ = 0; width_ = 0; SwapClear( remoteUpdates ); }
void DistSparseMatrix<T>::Empty( bool clearMemory ) { distGraph_.Empty( clearMemory ); if( clearMemory ) SwapClear( vals_ ); else vals_.resize( 0 ); multMeta.Clear(); }
void DistSparseMatrix<T>::SetComm( mpi::Comm comm ) { if( Comm() == comm ) return; distGraph_.SetComm( comm ); vals_.resize( 0 ); SwapClear( remoteVals_ ); }
void DistMultiVec<T>::Empty( bool freeMemory ) { height_ = 0; width_ = 0; blocksize_ = 1; multiVec_.Empty( freeMemory ); SwapClear( remoteUpdates_ ); }
void DistMultiVec<T>::Resize( Int height, Int width ) { if( height_ == height && width == width_ ) return; height_ = height; width_ = width; InitializeLocalData(); SwapClear( remoteUpdates_ ); }
void AbstractDistMatrix<T>::Empty( bool freeMemory ) { matrix_.Empty_( freeMemory ); viewType_ = OWNER; height_ = 0; width_ = 0; colAlign_ = 0; rowAlign_ = 0; colConstrained_ = false; rowConstrained_ = false; rootConstrained_ = false; SetShifts(); SwapClear( remoteUpdates ); }
const MatrixNode<T>& MatrixNode<T>::operator=( const MatrixNode<T>& X ) { EL_DEBUG_CSE matrix = X.matrix; // Clean up any pre-existing children if not the right amount const Int numChildren = X.children.size(); if( children.size() != X.children.size() ) { SwapClear( children ); children.resize( numChildren ); for( Int c=0; c<numChildren; ++c ) children[c].reset( new MatrixNode<T>(this) ); } for( Int c=0; c<numChildren; ++c ) *children[c] = *X.children[c]; return *this; }
void RiffleStationary( AbstractDistMatrix<F>& PInf, Int n ) { DEBUG_CSE typedef Base<F> Real; // NOTE: This currently requires quadratic time vector<Real> sigma(n,0), sigmaTmp(n,0); sigma[0] = sigmaTmp[0] = 1; for( Int j=1; j<n; ++j ) { sigmaTmp[0] = sigma[0]; for( Int k=1; k<=j; ++k ) sigmaTmp[k] = (k+1)*sigma[k] + (j-k+1)*sigma[k-1]; for( Int k=0; k<n; ++k ) sigma[k] = sigmaTmp[k]/(j+1); } SwapClear( sigmaTmp ); PInf.Resize( n, n ); auto riffleStatFill = [&]( Int i, Int j ) { return sigma[j]; }; IndexDependentFill( PInf, function<F(Int,Int)>(riffleStatFill) ); }
void Empty() { SwapClear( numChildSendInds ); SwapClear( childRecvInds ); }
void Empty() { SwapClear( numChildSendInds ); EmptyChildRecvIndices(); }
void EmptyChildRecvIndices() const { SwapClear(childRecvInds); }
inline void DistSparseMatrix<T>::SetComm( mpi::Comm comm ) { distGraph_.SetComm( comm ); SwapClear( vals_ ); }
inline void NestedDissectionRecursion ( const Graph& graph, const vector<Int>& perm, Separator& sep, NodeInfo& node, Int off, const BisectCtrl& ctrl ) { DEBUG_CSE const Int numSources = graph.NumSources(); const Int* offsetBuf = graph.LockedOffsetBuffer(); const Int* sourceBuf = graph.LockedSourceBuffer(); const Int* targetBuf = graph.LockedTargetBuffer(); if( numSources <= ctrl.cutoff ) { // Filter out the graph of the diagonal block Int numValidEdges = 0; const Int numEdges = graph.NumEdges(); for( Int e=0; e<numEdges; ++e ) if( targetBuf[e] < numSources ) ++numValidEdges; vector<Int> subOffsets(numSources+1), subTargets(Max(numValidEdges,1)); Int sourceOff = 0; Int validCounter = 0; Int prevSource = -1; for( Int e=0; e<numEdges; ++e ) { const Int source = sourceBuf[e]; const Int target = targetBuf[e]; while( source != prevSource ) { subOffsets[sourceOff++] = validCounter; ++prevSource; } if( target < numSources ) subTargets[validCounter++] = target; } while( sourceOff <= numSources ) { subOffsets[sourceOff++] = validCounter; } // Technically, SuiteSparse expects column-major storage, but since // the matrix is structurally symmetric, it's okay to pass in the // row-major representation vector<Int> amdPerm; AMDOrder( subOffsets, subTargets, amdPerm ); // Compute the symbolic factorization of this leaf node using the // reordering just computed node.LOffsets.resize( numSources+1 ); node.LParents.resize( numSources ); vector<Int> LNnz( numSources ), Flag( numSources ), amdPermInv( numSources ); suite_sparse::ldl::Symbolic ( numSources, subOffsets.data(), subTargets.data(), node.LOffsets.data(), node.LParents.data(), LNnz.data(), Flag.data(), amdPerm.data(), amdPermInv.data() ); // Fill in this node of the local separator tree sep.off = off; sep.inds.resize( numSources ); for( Int i=0; i<numSources; ++i ) sep.inds[i] = perm[amdPerm[i]]; // TODO: Replace with better deletion mechanism SwapClear( sep.children ); // Fill in this node of the local elimination tree node.size = numSources; node.off = off; // TODO: Replace with better deletion mechanism SwapClear( node.children ); set<Int> lowerStruct; for( Int s=0; s<node.size; ++s ) { const Int edgeOff = offsetBuf[s]; const Int numConn = offsetBuf[s+1] - edgeOff; for( Int t=0; t<numConn; ++t ) { const Int target = targetBuf[edgeOff+t]; if( target >= numSources ) lowerStruct.insert( off+target ); } } CopySTL( lowerStruct, node.origLowerStruct ); } else { DEBUG_ONLY( if( !IsSymmetric(graph) ) { Print( graph, "graph" ); LogicError("Graph was not symmetric"); } ) // Partition the graph and construct the inverse map Graph leftChild, rightChild; vector<Int> map; const Int sepSize = Bisect( graph, leftChild, rightChild, map, ctrl ); vector<Int> invMap( numSources ); for( Int s=0; s<numSources; ++s ) invMap[map[s]] = s; DEBUG_ONLY( if( !IsSymmetric(leftChild) ) { Print( graph, "graph" ); Print( leftChild, "leftChild" ); LogicError("Left child was not symmetric"); } )
void Helper ( const AbstractDistMatrix<S>& A, AbstractDistMatrix<T>& B ) { EL_DEBUG_CSE // TODO: Decide whether S or T should be used as the transmission type // based upon which is smaller. Transmit S by default. const Int height = A.Height(); const Int width = A.Width(); const Grid& g = B.Grid(); B.Resize( height, width ); Zero( B ); const bool BPartic = B.Participating(); const int BRoot = B.Root(); const bool includeViewers = (A.Grid() != B.Grid()); const Int localHeight = A.LocalHeight(); const Int localWidth = A.LocalWidth(); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); // TODO: Break into smaller pieces to avoid excessive memory usage? vector<Entry<S>> remoteEntries; vector<int> distOwners; if( A.RedundantRank() == 0 ) { const bool noRedundant = B.RedundantSize() == 1; const int colStride = B.ColStride(); const int rowRank = B.RowRank(); const int colRank = B.ColRank(); vector<Int> globalRows(localHeight), localRows(localHeight); vector<int> ownerRows(localHeight); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = A.GlobalRow(iLoc); const int ownerRow = B.RowOwner(i); globalRows[iLoc] = i; ownerRows[iLoc] = ownerRow; localRows[iLoc] = B.LocalRow(i,ownerRow); } remoteEntries.reserve( localHeight*localWidth ); distOwners.reserve( localHeight*localWidth ); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = A.GlobalCol(jLoc); const int ownerCol = B.ColOwner(j); const Int localCol = B.LocalCol(j,ownerCol); const bool isLocalCol = ( BPartic && ownerCol == rowRank ); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const int ownerRow = ownerRows[iLoc]; const Int localRow = localRows[iLoc]; const bool isLocalRow = ( BPartic && ownerRow == colRank ); const S& alpha = ALoc(iLoc,jLoc); if( noRedundant && isLocalRow && isLocalCol ) { BLoc(localRow,localCol) = Caster<S,T>::Cast(alpha); } else { remoteEntries.push_back ( Entry<S>{localRow,localCol,alpha} ); distOwners.push_back( ownerRow + colStride*ownerCol ); } } } } // We will first push to redundant rank 0 of B const int redundantRootB = 0; // Compute the metadata // ==================== const Int totalSend = remoteEntries.size(); mpi::Comm comm; vector<int> sendCounts, owners(totalSend); if( includeViewers ) { comm = g.ViewingComm(); const int viewingSize = mpi::Size( g.ViewingComm() ); const int distBSize = mpi::Size( B.DistComm() ); vector<int> distBToViewing(distBSize); for( int distBRank=0; distBRank<distBSize; ++distBRank ) { const int vcOwner = g.CoordsToVC (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB); distBToViewing[distBRank] = g.VCToViewing(vcOwner); } sendCounts.resize(viewingSize,0); for( Int k=0; k<totalSend; ++k ) { owners[k] = distBToViewing[distOwners[k]]; ++sendCounts[owners[k]]; } } else { if( !g.InGrid() ) return; comm = g.VCComm(); const int distBSize = mpi::Size( B.DistComm() ); vector<int> distBToVC(distBSize); for( int distBRank=0; distBRank<distBSize; ++distBRank ) { distBToVC[distBRank] = g.CoordsToVC (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB); } const int vcSize = mpi::Size( g.VCComm() ); sendCounts.resize(vcSize,0); for( Int k=0; k<totalSend; ++k ) { owners[k] = distBToVC[distOwners[k]]; ++sendCounts[owners[k]]; } } SwapClear( distOwners ); // Pack the data // ============= vector<int> sendOffs; Scan( sendCounts, sendOffs ); vector<Entry<S>> sendBuf; FastResize( sendBuf, totalSend ); auto offs = sendOffs; for( Int k=0; k<totalSend; ++k ) sendBuf[offs[owners[k]]++] = remoteEntries[k]; SwapClear( remoteEntries ); SwapClear( owners ); // Exchange and unpack the data // ============================ auto recvBuf = mpi::AllToAll( sendBuf, sendCounts, sendOffs, comm ); if( BPartic ) { if( B.RedundantRank() == redundantRootB ) { Int recvBufSize = recvBuf.size(); for( Int k=0; k<recvBufSize; ++k ) { const auto& entry = recvBuf[k]; BLoc(entry.i,entry.j) = Caster<S,T>::Cast(entry.value); } } El::Broadcast( B, B.RedundantComm(), redundantRootB ); } }