void StackedGeometricColumnScaling ( const DistMatrix<Field, U,V >& A, const DistMatrix<Field, U,V >& B, DistMatrix<Base<Field>,V,STAR>& geomScaling ) { EL_DEBUG_CSE // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments // are equal typedef Base<Field> Real; DistMatrix<Real,V,STAR> maxScalingA(A.Grid()), maxScalingB(A.Grid()); ColumnMaxNorms( A, maxScalingA ); ColumnMaxNorms( B, maxScalingB ); const Int mLocalA = A.LocalHeight(); const Int mLocalB = B.LocalHeight(); const Int nLocal = A.LocalWidth(); geomScaling.AlignWith( maxScalingA ); geomScaling.Resize( A.Width(), 1 ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.LockedMatrix(); auto& geomScalingLoc = geomScaling.Matrix(); auto& maxScalingALoc = maxScalingA.Matrix(); auto& maxScalingBLoc = maxScalingB.Matrix(); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc)); for( Int iLoc=0; iLoc<mLocalA; ++iLoc ) { const Real absVal = Abs(ALoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } for( Int iLoc=0; iLoc<mLocalB; ++iLoc ) { const Real absVal = Abs(BLoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } geomScalingLoc(jLoc) = minAbs; } mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() ); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { const Real maxAbsA = maxScalingALoc(jLoc); const Real maxAbsB = maxScalingBLoc(jLoc); const Real maxAbs = Max(maxAbsA,maxAbsB); const Real minAbs = geomScalingLoc(jLoc); geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs); } }
void QP ( const DistSparseMatrix<Real>& A, const DistMultiVec<Real>& B, DistMultiVec<Real>& X, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const Int k = B.Width(); mpi::Comm comm = A.Comm(); DistSparseMatrix<Real> Q(comm), AHat(comm); DistMultiVec<Real> bHat(comm), c(comm); Herk( LOWER, ADJOINT, Real(1), A, Q ); MakeHermitian( LOWER, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); DistMultiVec<Real> q(comm), y(comm), z(comm); auto& qLoc = q.Matrix(); auto& XLoc = X.Matrix(); auto& BLoc = B.LockedMatrix(); for( Int j=0; j<k; ++j ) { auto xLoc = XLoc( ALL, IR(j) ); auto bLoc = BLoc( ALL, IR(j) ); Zeros( c, n, 1 ); Zeros( q, m, 1 ); qLoc = bLoc; Multiply( ADJOINT, Real(-1), A, q, Real(0), c ); Zeros( q, n, 1 ); qLoc = xLoc; El::QP( Q, AHat, bHat, c, q, y, z, ctrl ); xLoc = qLoc; } }
void IndexDependentMap ( const BlockMatrix<S>& A, BlockMatrix<T>& B, function<T(Int,Int,S)> func ) { DEBUG_CSE const Int mLoc = A.LocalHeight(); const Int nLoc = A.LocalWidth(); B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) { const Int i = A.GlobalRow(iLoc); BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc)); } } }
void Helper ( const AbstractDistMatrix<S>& A, AbstractDistMatrix<T>& B ) { EL_DEBUG_CSE // TODO: Decide whether S or T should be used as the transmission type // based upon which is smaller. Transmit S by default. const Int height = A.Height(); const Int width = A.Width(); const Grid& g = B.Grid(); B.Resize( height, width ); Zero( B ); const bool BPartic = B.Participating(); const int BRoot = B.Root(); const bool includeViewers = (A.Grid() != B.Grid()); const Int localHeight = A.LocalHeight(); const Int localWidth = A.LocalWidth(); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); // TODO: Break into smaller pieces to avoid excessive memory usage? vector<Entry<S>> remoteEntries; vector<int> distOwners; if( A.RedundantRank() == 0 ) { const bool noRedundant = B.RedundantSize() == 1; const int colStride = B.ColStride(); const int rowRank = B.RowRank(); const int colRank = B.ColRank(); vector<Int> globalRows(localHeight), localRows(localHeight); vector<int> ownerRows(localHeight); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = A.GlobalRow(iLoc); const int ownerRow = B.RowOwner(i); globalRows[iLoc] = i; ownerRows[iLoc] = ownerRow; localRows[iLoc] = B.LocalRow(i,ownerRow); } remoteEntries.reserve( localHeight*localWidth ); distOwners.reserve( localHeight*localWidth ); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = A.GlobalCol(jLoc); const int ownerCol = B.ColOwner(j); const Int localCol = B.LocalCol(j,ownerCol); const bool isLocalCol = ( BPartic && ownerCol == rowRank ); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const int ownerRow = ownerRows[iLoc]; const Int localRow = localRows[iLoc]; const bool isLocalRow = ( BPartic && ownerRow == colRank ); const S& alpha = ALoc(iLoc,jLoc); if( noRedundant && isLocalRow && isLocalCol ) { BLoc(localRow,localCol) = Caster<S,T>::Cast(alpha); } else { remoteEntries.push_back ( Entry<S>{localRow,localCol,alpha} ); distOwners.push_back( ownerRow + colStride*ownerCol ); } } } } // We will first push to redundant rank 0 of B const int redundantRootB = 0; // Compute the metadata // ==================== const Int totalSend = remoteEntries.size(); mpi::Comm comm; vector<int> sendCounts, owners(totalSend); if( includeViewers ) { comm = g.ViewingComm(); const int viewingSize = mpi::Size( g.ViewingComm() ); const int distBSize = mpi::Size( B.DistComm() ); vector<int> distBToViewing(distBSize); for( int distBRank=0; distBRank<distBSize; ++distBRank ) { const int vcOwner = g.CoordsToVC (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB); distBToViewing[distBRank] = g.VCToViewing(vcOwner); } sendCounts.resize(viewingSize,0); for( Int k=0; k<totalSend; ++k ) { owners[k] = distBToViewing[distOwners[k]]; ++sendCounts[owners[k]]; } } else { if( !g.InGrid() ) return; comm = g.VCComm(); const int distBSize = mpi::Size( B.DistComm() ); vector<int> distBToVC(distBSize); for( int distBRank=0; distBRank<distBSize; ++distBRank ) { distBToVC[distBRank] = g.CoordsToVC (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB); } const int vcSize = mpi::Size( g.VCComm() ); sendCounts.resize(vcSize,0); for( Int k=0; k<totalSend; ++k ) { owners[k] = distBToVC[distOwners[k]]; ++sendCounts[owners[k]]; } } SwapClear( distOwners ); // Pack the data // ============= vector<int> sendOffs; Scan( sendCounts, sendOffs ); vector<Entry<S>> sendBuf; FastResize( sendBuf, totalSend ); auto offs = sendOffs; for( Int k=0; k<totalSend; ++k ) sendBuf[offs[owners[k]]++] = remoteEntries[k]; SwapClear( remoteEntries ); SwapClear( owners ); // Exchange and unpack the data // ============================ auto recvBuf = mpi::AllToAll( sendBuf, sendCounts, sendOffs, comm ); if( BPartic ) { if( B.RedundantRank() == redundantRootB ) { Int recvBufSize = recvBuf.size(); for( Int k=0; k<recvBufSize; ++k ) { const auto& entry = recvBuf[k]; BLoc(entry.i,entry.j) = Caster<S,T>::Cast(entry.value); } } El::Broadcast( B, B.RedundantComm(), redundantRootB ); } }