void Dets ( const ElementalMatrix<Real>& xPre, ElementalMatrix<Real>& dPre, const ElementalMatrix<Int>& ordersPre, const ElementalMatrix<Int>& firstIndsPre, Int cutoff ) { DEBUG_CSE AssertSameGrids( xPre, dPre, ordersPre, firstIndsPre ); ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; DistMatrixReadProxy<Real,Real,VC,STAR> xProx( xPre, ctrl ); DistMatrixWriteProxy<Real,Real,VC,STAR> dProx( dPre, ctrl ); DistMatrixReadProxy<Int,Int,VC,STAR> ordersProx( ordersPre, ctrl ), firstIndsProx( firstIndsPre, ctrl ); auto& x = xProx.GetLocked(); auto& d = dProx.Get(); auto& orders = ordersProx.GetLocked(); auto& firstInds = firstIndsProx.GetLocked(); auto Rx = x; soc::Reflect( Rx, orders, firstInds ); soc::Dots( x, Rx, d, orders, firstInds, cutoff ); }
void PartialColScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::PartialColScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("A and B must be the same size"); #ifdef EL_CACHE_WARNINGS if( A.Width() != 1 && A.Grid().Rank() == 0 ) { cerr << "axpy_contract::PartialColScatterUpdate potentially causes a large " "amount of cache-thrashing. If possible, avoid it by forming the " "(conjugate-)transpose of the [UGath,* ] matrix instead." << endl; } #endif if( B.ColAlign() % A.ColStride() == A.ColAlign() ) { const Int colStride = B.ColStride(); const Int colStridePart = B.PartialColStride(); const Int colStrideUnion = B.PartialUnionColStride(); const Int colRankPart = B.PartialColRank(); const Int colAlign = B.ColAlign(); const Int height = B.Height(); const Int width = B.Width(); const Int localHeight = B.LocalHeight(); const Int maxLocalHeight = MaxLength( height, colStride ); const Int recvSize = mpi::Pad( maxLocalHeight*width ); const Int sendSize = colStrideUnion*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::PartialColStridedPack ( height, width, colAlign, colStride, colStrideUnion, colStridePart, colRankPart, A.ColShift(), A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionColComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, width, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); } else LogicError("Unaligned PartialColScatter not implemented"); }
void UPan ( DistMatrix<F>& A, DistMatrix<F>& W, DistMatrix<F,MD,STAR>& t, DistMatrix<F,MC,STAR>& B_MC_STAR, DistMatrix<F,MR,STAR>& B_MR_STAR, DistMatrix<F,MC,STAR>& W_MC_STAR, DistMatrix<F,MR,STAR>& W_MR_STAR, const SymvCtrl<F>& ctrl ) { const Int n = A.Height(); const Int nW = W.Width(); DEBUG_ONLY( CSE cse("herm_tridiag::UPan"); AssertSameGrids( A, W, t ); if( n != A.Width() ) LogicError("A must be square."); if( n != W.Height() ) LogicError( "A and W must be the same height."); if( n <= nW ) LogicError("W must be a column panel."); if( t.Height() != nW || t.Width() != 1 ) LogicError ("t must be a column vector of the same length as W's width."); )
void Scatter ( const DistMatrix<T,CIRC,CIRC>& A, DistMatrix<T,STAR,STAR>& B ) { DEBUG_CSE AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.Resize( height, width ); if( B.Participating() ) { const Int pkgSize = mpi::Pad( height*width ); vector<T> buffer; FastResize( buffer, pkgSize ); // Pack if( A.Participating() ) util::InterleaveMatrix ( height, width, A.LockedBuffer(), 1, A.LDim(), buffer.data(), 1, height ); // Broadcast from the process that packed mpi::Broadcast( buffer.data(), pkgSize, A.Root(), A.CrossComm() ); // Unpack util::InterleaveMatrix ( height, width, buffer.data(), 1, height, B.Buffer(), 1, B.LDim() ); } }
void MinEig ( const AbstractDistMatrix<Real>& xPre, AbstractDistMatrix<Real>& minEigsPre, const AbstractDistMatrix<Int>& orders, const AbstractDistMatrix<Int>& firstIndsPre, Int cutoff ) { EL_DEBUG_CSE AssertSameGrids( xPre, minEigsPre, orders, firstIndsPre ); ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; DistMatrixReadProxy<Real,Real,VC,STAR> xProx( xPre, ctrl ); DistMatrixWriteProxy<Real,Real,VC,STAR> minEigsProx( minEigsPre, ctrl ); DistMatrixReadProxy<Int,Int,VC,STAR> firstIndsProx( firstIndsPre, ctrl ); auto& x = xProx.GetLocked(); auto& minEigs = minEigsProx.Get(); auto& firstInds = firstIndsProx.GetLocked(); const Int height = x.Height(); const Int localHeight = x.LocalHeight(); EL_DEBUG_ONLY( if( x.Width() != 1 || orders.Width() != 1 || firstInds.Width() != 1 ) LogicError("x, orders, and firstInds should be column vectors"); if( orders.Height() != height || firstInds.Height() != height ) LogicError("orders and firstInds should be of the same height as x"); )
void LPan ( DistMatrix<F>& A, DistMatrix<F>& W, DistMatrix<F,MD,STAR>& t, DistMatrix<F,MC,STAR>& APan_MC_STAR, DistMatrix<F,MR,STAR>& APan_MR_STAR, DistMatrix<F,MC,STAR>& W_MC_STAR, DistMatrix<F,MR,STAR>& W_MR_STAR ) { const Int n = A.Height(); const Int nW = W.Width(); DEBUG_ONLY( CallStackEntry cse("herm_tridiag::LPan"); AssertSameGrids( A, W, t ); if( n != A.Width() ) LogicError("A must be square"); if( n != W.Height() ) LogicError("A and W must be the same height"); if( n <= nW ) LogicError("W must be a column panel"); if( W.ColAlign() != A.ColAlign() || W.RowAlign() != A.RowAlign() ) LogicError("W and A must be aligned"); if( t.Height() != nW || t.Width() != 1 ) LogicError ("t must be a column vector of the same length as W's width"); if( !A.DiagonalAlignedWith(t,-1) ) LogicError("t is not aligned with A's subdiagonal."); )
void ForceIntoSOC ( AbstractDistMatrix<Real>& xPre, const AbstractDistMatrix<Int>& ordersPre, const AbstractDistMatrix<Int>& firstIndsPre, Real minDist, Int cutoff ) { DEBUG_ONLY(CSE cse("ForceIntoSOC")) AssertSameGrids( xPre, ordersPre, firstIndsPre ); ProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; auto xPtr = ReadWriteProxy<Real,VC,STAR>(&xPre,ctrl); auto ordersPtr = ReadProxy<Int,VC,STAR>(&ordersPre,ctrl); auto firstIndsPtr = ReadProxy<Int,VC,STAR>(&firstIndsPre,ctrl); auto& x = *xPtr; auto& orders = *ordersPtr; auto& firstInds = *firstIndsPtr; DistMatrix<Real,VC,STAR> d(x.Grid()); SOCLowerNorms( x, d, orders, firstInds, cutoff ); const Int localHeight = x.LocalHeight(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = x.GlobalRow(iLoc); const Real x0 = x.GetLocal(iLoc,0); const Real lowerNorm = d.GetLocal(iLoc,0); if( i == firstInds.GetLocal(iLoc,0) && x0-lowerNorm < minDist ) x.UpdateLocal( iLoc, 0, minDist - (x0-lowerNorm) ); } }
void PushInto ( ElementalMatrix<Real>& xPre, const ElementalMatrix<Int>& ordersPre, const ElementalMatrix<Int>& firstIndsPre, Real minDist, Int cutoff ) { DEBUG_ONLY(CSE cse("soc::PushInto")) AssertSameGrids( xPre, ordersPre, firstIndsPre ); ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; DistMatrixReadWriteProxy<Real,Real,VC,STAR> xProx( xPre, ctrl ); DistMatrixReadProxy<Int,Int,VC,STAR> ordersProx( ordersPre, ctrl ), firstIndsProx( firstIndsPre, ctrl ); auto& x = xProx.Get(); auto& orders = ordersProx.GetLocked(); auto& firstInds = firstIndsProx.GetLocked(); DistMatrix<Real,VC,STAR> d(x.Grid()); soc::LowerNorms( x, d, orders, firstInds, cutoff ); const Int localHeight = x.LocalHeight(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = x.GlobalRow(iLoc); const Real x0 = x.GetLocal(iLoc,0); const Real lowerNorm = d.GetLocal(iLoc,0); if( i == firstInds.GetLocal(iLoc,0) && x0-lowerNorm < minDist ) x.UpdateLocal( iLoc, 0, minDist - (x0-lowerNorm) ); } }
void LowerPanel ( DistMatrix<F>& A, DistMatrix<F>& W, DistMatrix<F,MD,STAR>& t, DistMatrix<F,MC,STAR>& B_MC_STAR, DistMatrix<F,MR,STAR>& B_MR_STAR, DistMatrix<F,MC,STAR>& W_MC_STAR, DistMatrix<F,MR,STAR>& W_MR_STAR, const SymvCtrl<F>& ctrl ) { DEBUG_CSE const Int n = A.Height(); const Int nW = W.Width(); DEBUG_ONLY( AssertSameGrids( A, W, t ); if( n != A.Width() ) LogicError("A must be square"); if( n != W.Height() ) LogicError("A and W must be the same height"); if( n <= nW ) LogicError("W must be a column panel"); if( W.ColAlign() != A.ColAlign() || W.RowAlign() != A.RowAlign() ) LogicError("W and A must be aligned"); if( t.Height() != nW || t.Width() != 1 ) LogicError ("t must be a column vector of the same length as W's width"); if( !A.DiagonalAlignedWith(t,-1) ) LogicError("t is not aligned with A's subdiagonal."); )
void ColAllToAllPromote ( const DistMatrix<T, U, V ,BLOCK>& A, DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>(),BLOCK>& B ) { DEBUG_ONLY(CSE cse("copy::ColAllToAllPromote")) AssertSameGrids( A, B ); LogicError("This routine is not yet written"); }
void CheckInput ( const DistMatrix<T,UA,VA>& A, const DistMatrix<T,UB,VB>& B, const DistMatrix<T>& C ) { AssertSameGrids( A, B, C ); EnsureConformal( A, C, "A" ); EnsureConformal( B, C, "B" ); }
inline void AssertSameGrids ( const AbstractDistMatrix<T1>& A1, const AbstractDistMatrix<T2>& A2, Args&... args ) { if( A1.Grid() != A2.Grid() ) LogicError("Grids did not match"); AssertSameGrids( A2, args... ); }
void Scatter ( const DistMatrix<T,CIRC,CIRC,BLOCK_CYCLIC>& A, BlockCyclicMatrix<T>& B ) { DEBUG_ONLY(CSE cse("copy::Scatter")) AssertSameGrids( A, B ); LogicError("This routine is not yet written"); }
void ColAllToAllPromote ( const BlockDistMatrix<T, U, V >& A, BlockDistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B ) { DEBUG_ONLY(CallStackEntry cse("copy::ColAllToAllPromote")) AssertSameGrids( A, B ); LogicError("This routine is not yet written"); }
void AllGather ( const DistMatrix<T, U, V ,BLOCK_CYCLIC>& A, DistMatrix<T,Collect<U>(),Collect<V>(),BLOCK_CYCLIC>& B ) { DEBUG_ONLY(CSE cse("copy::AllGather")) AssertSameGrids( A, B ); LogicError("This routine is not yet written"); }
void AllGather ( const DistMatrix<T, U, V ,BLOCK>& A, DistMatrix<T,Collect<U>(),Collect<V>(),BLOCK>& B ) { DEBUG_ONLY(CSE cse("copy::AllGather")) AssertSameGrids( A, B ); // TODO: More efficient implementation GeneralPurpose( A, B ); }
void Scatter ( const DistMatrix<T,CIRC,CIRC,BLOCK>& A, BlockMatrix<T>& B ) { DEBUG_CSE AssertSameGrids( A, B ); // TODO: More efficient implementation GeneralPurpose( A, B ); }
void AllGather ( const DistMatrix<T, U, V ,BLOCK>& A, DistMatrix<T,Collect<U>(),Collect<V>(),BLOCK>& B ) { EL_DEBUG_CSE AssertSameGrids( A, B ); // TODO(poulson): More efficient implementation GeneralPurpose( A, B ); }
void Scatter ( const DistMatrix<T,CIRC,CIRC,BLOCK>& A, DistMatrix<T,STAR,STAR,BLOCK>& B ) { DEBUG_ONLY(CSE cse("copy::Scatter")) AssertSameGrids( A, B ); // TODO: More efficient implementation GeneralPurpose( A, B ); }
void ColAllToAllPromote ( const DistMatrix<T, U, V ,BLOCK>& A, DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>(),BLOCK>& B ) { DEBUG_CSE AssertSameGrids( A, B ); // TODO: More efficient implementation GeneralPurpose( A, B ); }
void RowAllToAllPromote ( const DistMatrix<T, U, V ,BLOCK>& A, DistMatrix<T,PartialUnionCol<U,V>(),Partial<V>(),BLOCK>& B ) { DEBUG_ONLY(CSE cse("copy::RowAllToAllPromote")) AssertSameGrids( A, B ); // TODO: More efficient implementation GeneralPurpose( A, B ); }
void Contract ( const BlockMatrix<T>& A, BlockMatrix<T>& B ) { DEBUG_ONLY(CSE cse("Contract")) AssertSameGrids( A, B ); const Dist U = B.ColDist(); const Dist V = B.RowDist(); // TODO: Shorten this implementation? if( A.ColDist() == U && A.RowDist() == V ) { Copy( A, B ); } else if( A.ColDist() == U && A.RowDist() == Partial(V) ) { B.AlignAndResize ( A.BlockHeight(), A.BlockWidth(), A.ColAlign(), A.RowAlign(), A.ColCut(), A.RowCut(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == Partial(U) && A.RowDist() == V ) { B.AlignAndResize ( A.BlockHeight(), A.BlockWidth(), A.ColAlign(), A.RowAlign(), A.ColCut(), A.RowCut(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == U && A.RowDist() == Collect(V) ) { B.AlignColsAndResize ( A.BlockHeight(), A.ColAlign(), A.ColCut(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == Collect(U) && A.RowDist() == V ) { B.AlignRowsAndResize ( A.BlockWidth(), A.RowAlign(), A.RowCut(), A.Height(), A.Width(), false, false ); Zeros( B.Matrix(), B.LocalHeight(), B.LocalWidth() ); AxpyContract( T(1), A, B ); } else if( A.ColDist() == Collect(U) && A.RowDist() == Collect(V) ) { Zeros( B, A.Height(), A.Width() ); AxpyContract( T(1), A, B ); } else LogicError("Incompatible distributions"); }
void AllGather ( const DistMatrix<T, U, V >& A, DistMatrix<T,Collect<U>(),Collect<V>()>& B ) { EL_DEBUG_CSE AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.SetGrid( A.Grid() ); B.Resize( height, width ); if( A.Participating() ) { if( A.DistSize() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else { const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int distStride = colStride*rowStride; const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,rowStride); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); vector<T> buf; FastResize( buf, (distStride+1)*portionSize ); T* sendBuf = &buf[0]; T* recvBuf = &buf[portionSize]; // Pack util::InterleaveMatrix ( A.LocalHeight(), A.LocalWidth(), A.LockedBuffer(), 1, A.LDim(), sendBuf, 1, A.LocalHeight() ); // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() ); // Unpack util::StridedUnpack ( height, width, A.ColAlign(), colStride, A.RowAlign(), rowStride, recvBuf, portionSize, B.Buffer(), B.LDim() ); } } if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF ) El::Broadcast( B, A.CrossComm(), A.Root() ); }
void Apply ( const ElementalMatrix<Real>& xPre, const ElementalMatrix<Real>& yPre, ElementalMatrix<Real>& zPre, const ElementalMatrix<Int>& ordersPre, const ElementalMatrix<Int>& firstIndsPre, Int cutoff ) { DEBUG_ONLY(CSE cse("soc::Apply")) AssertSameGrids( xPre, yPre, zPre, ordersPre, firstIndsPre ); ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; DistMatrixReadProxy<Real,Real,VC,STAR> xProx( xPre, ctrl ), yProx( yPre, ctrl ); DistMatrixWriteProxy<Real,Real,VC,STAR> zProx( zPre, ctrl ); DistMatrixReadProxy<Int,Int,VC,STAR> ordersProx( ordersPre, ctrl ), firstIndsProx( firstIndsPre, ctrl ); auto& x = xProx.GetLocked(); auto& y = yProx.GetLocked(); auto& z = zProx.Get(); auto& orders = ordersProx.GetLocked(); auto& firstInds = firstIndsProx.GetLocked(); soc::Dots( x, y, z, orders, firstInds ); auto xRoots = x; auto yRoots = y; cone::Broadcast( xRoots, orders, firstInds ); cone::Broadcast( yRoots, orders, firstInds ); const Int localHeight = x.LocalHeight(); const Real* xBuf = x.LockedBuffer(); const Real* xRootBuf = xRoots.LockedBuffer(); const Real* yBuf = y.LockedBuffer(); const Real* yRootBuf = yRoots.LockedBuffer(); Real* zBuf = z.Buffer(); const Int* firstIndBuf = firstInds.LockedBuffer(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = x.GlobalRow(iLoc); const Int firstInd = firstIndBuf[iLoc]; if( i != firstInd ) zBuf[iLoc] += xRootBuf[iLoc]*yBuf[iLoc] + yRootBuf[iLoc]*xBuf[iLoc]; } }
void PartialRowScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::PartialRowScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("Matrix sizes did not match"); if( !B.Participating() ) return; if( B.RowAlign() % A.RowStride() == A.RowAlign() ) { const Int rowStride = B.RowStride(); const Int rowStridePart = B.PartialRowStride(); const Int rowStrideUnion = B.PartialUnionRowStride(); const Int rowRankPart = B.PartialRowRank(); const Int height = B.Height(); const Int width = B.Width(); const Int maxLocalWidth = MaxLength( width, rowStride ); const Int recvSize = mpi::Pad( height*maxLocalWidth ); const Int sendSize = rowStrideUnion*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::PartialRowStridedPack ( height, width, B.RowAlign(), rowStride, rowStrideUnion, rowStridePart, rowRankPart, A.RowShift(), A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionRowComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, height, B.LocalWidth(), buffer.data(), 1, height, B.Buffer(), 1, B.LDim() ); } else LogicError("Unaligned PartialRowScatter not implemented"); }
void Scatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::Scatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("Sizes of A and B must match"); if( !B.Participating() ) return; const Int colStride = B.ColStride(); const Int rowStride = B.RowStride(); const Int colAlign = B.ColAlign(); const Int rowAlign = B.RowAlign(); const Int height = B.Height(); const Int width = B.Width(); const Int localHeight = B.LocalHeight(); const Int localWidth = B.LocalWidth(); const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,rowStride); const Int recvSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); const Int sendSize = colStride*rowStride*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::StridedPack ( height, width, colAlign, colStride, rowAlign, rowStride, A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.DistComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, localWidth, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); }
void PushPairInto ( ElementalMatrix<Real>& sPre, ElementalMatrix<Real>& zPre, const ElementalMatrix<Real>& wPre, const ElementalMatrix<Int>& ordersPre, const ElementalMatrix<Int>& firstIndsPre, Real wMaxNormLimit, Int cutoff ) { DEBUG_ONLY(CSE cse("soc::PushPairInto")) AssertSameGrids( sPre, zPre, wPre, ordersPre, firstIndsPre ); ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; DistMatrixReadWriteProxy<Real,Real,VC,STAR> sProx( sPre, ctrl ), zProx( zPre, ctrl ); DistMatrixReadProxy<Real,Real,VC,STAR> wProx( wPre, ctrl ); DistMatrixReadProxy<Int,Int,VC,STAR> ordersProx( ordersPre, ctrl ), firstIndsProx( firstIndsPre, ctrl ); auto& s = sProx.Get(); auto& z = zProx.Get(); auto& w = wProx.GetLocked(); auto& orders = ordersProx.GetLocked(); auto& firstInds = firstIndsProx.GetLocked(); DistMatrix<Real,VC,STAR> sLower(s.Grid()), zLower(z.Grid()); soc::LowerNorms( s, sLower, orders, firstInds, cutoff ); soc::LowerNorms( z, zLower, orders, firstInds, cutoff ); const Int localHeight = s.LocalHeight(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = s.GlobalRow(iLoc); const Real w0 = w.GetLocal(iLoc,0); if( i == firstInds.GetLocal(iLoc,0) && w0 > wMaxNormLimit ) { // TODO: Switch to a non-adhoc modification s.UpdateLocal( iLoc, 0, Real(1)/wMaxNormLimit ); z.UpdateLocal( iLoc, 0, Real(1)/wMaxNormLimit ); } } }
void SOCSquareRoot ( const ElementalMatrix<Real>& xPre, ElementalMatrix<Real>& xRootPre, const ElementalMatrix<Int>& ordersPre, const ElementalMatrix<Int>& firstIndsPre, Int cutoff ) { DEBUG_ONLY(CSE cse("SOCSquareRoot")) AssertSameGrids( xPre, xRootPre, ordersPre, firstIndsPre ); ProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = 0; auto xPtr = ReadProxy<Real,VC,STAR>(&xPre,ctrl); auto xRootPtr = WriteProxy<Real,VC,STAR>(&xRootPre,ctrl); auto ordersPtr = ReadProxy<Int,VC,STAR>(&ordersPre,ctrl); auto firstIndsPtr = ReadProxy<Int,VC,STAR>(&firstIndsPre,ctrl); auto& x = *xPtr; auto& xRoot = *xRootPtr; auto& orders = *ordersPtr; auto& firstInds = *firstIndsPtr; DistMatrix<Real,VC,STAR> d(x.Grid()); SOCDets( x, d, orders, firstInds ); ConeBroadcast( d, orders, firstInds ); auto roots = x; ConeBroadcast( roots, orders, firstInds ); const Int localHeight = x.LocalHeight(); xRoot.SetGrid( x.Grid() ); Zeros( xRoot, x.Height(), 1 ); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = x.GlobalRow(iLoc); const Real x0 = roots.GetLocal(iLoc,0); const Real det = d.GetLocal(iLoc,0); const Real eta0 = Sqrt(x0+Sqrt(det))/Sqrt(Real(2)); if( i == firstInds.GetLocal(iLoc,0) ) xRoot.SetLocal( iLoc, 0, eta0 ); else xRoot.SetLocal( iLoc, 0, x.GetLocal(iLoc,0)/(2*eta0) ); } }
void Filter ( const DistMatrix<T,Collect<U>(),Collect<V>()>& A, DistMatrix<T, U, V >& B ) { DEBUG_CSE AssertSameGrids( A, B ); B.Resize( A.Height(), A.Width() ); if( !B.Participating() ) return; const Int colShift = B.ColShift(); const Int rowShift = B.RowShift(); util::InterleaveMatrix ( B.LocalHeight(), B.LocalWidth(), A.LockedBuffer(colShift,rowShift), B.ColStride(), B.RowStride()*A.LDim(), B.Buffer(), 1, B.LDim() ); }
void GetMappedDiagonal ( const DistMatrix<T,U,V>& A, AbstractDistMatrix<S>& dPre, function<S(const T&)> func, Int offset ) { EL_DEBUG_CSE EL_DEBUG_ONLY(AssertSameGrids( A, dPre )) ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = A.DiagonalAlign(offset); ctrl.rootConstrain = true; ctrl.root = A.DiagonalRoot(offset); DistMatrixWriteProxy<S,S,DiagCol<U,V>(),DiagRow<U,V>()> dProx( dPre, ctrl ); auto& d = dProx.Get(); d.Resize( A.DiagonalLength(offset), 1 ); if( d.Participating() ) { const Int diagShift = d.ColShift(); const Int iStart = diagShift + Max(-offset,0); const Int jStart = diagShift + Max( offset,0); const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int iLocStart = (iStart-A.ColShift()) / colStride; const Int jLocStart = (jStart-A.RowShift()) / rowStride; const Int iLocStride = d.ColStride() / colStride; const Int jLocStride = d.ColStride() / rowStride; const Int localDiagLength = d.LocalHeight(); S* dBuf = d.Buffer(); const T* ABuf = A.LockedBuffer(); const Int ldim = A.LDim(); EL_PARALLEL_FOR for( Int k=0; k<localDiagLength; ++k ) { const Int iLoc = iLocStart + k*iLocStride; const Int jLoc = jLocStart + k*jLocStride; dBuf[k] = func(ABuf[iLoc+jLoc*ldim]); } } }