void UUnb( Matrix<F>& A, Matrix<F>& householderScalars ) { DEBUG_CSE const Int n = A.Height(); const Int householderScalarsHeight = Max(n-1,0); householderScalars.Resize( householderScalarsHeight, 1 ); // Temporary products Matrix<F> x1, x12Adj; for( Int k=0; k<n-1; ++k ) { const Range<Int> ind1( k, k+1 ), ind2( k+1, n ); auto a21 = A( ind2, ind1 ); auto A22 = A( ind2, ind2 ); auto A2 = A( IR(0,n), ind2 ); auto alpha21T = A( IR(k+1,k+2), ind1 ); auto a21B = A( IR(k+2,n), ind1 ); // Find tau and v such that // / I - tau | 1 | | 1, v^H | \ | alpha21T | = | beta | // \ | v | / | a21B | | 0 | const F tau = LeftReflector( alpha21T, a21B ); householderScalars(k) = tau; // Temporarily set a21 := | 1 | // | v | const F beta = alpha21T(0); alpha21T(0) = F(1); // A2 := A2 Hous(a21,tau)^H // = A2 (I - conj(tau) a21 a21^H) // = A2 - conj(tau) (A2 a21) a21^H // ----------------------------------- // x1 := A2 a21 Zeros( x1, n, 1 ); Gemv( NORMAL, F(1), A2, a21, F(0), x1 ); // A2 := A2 - conj(tau) x1 a21^H Ger( -Conj(tau), x1, a21, A2 ); // A22 := Hous(a21,tau) A22 // = (I - tau a21 a21^H) A22 // = A22 - tau a21 (A22^H a21)^H // ---------------------------------- // x12^H := (a21^H A22)^H = A22^H a21 Zeros( x12Adj, A22.Width(), 1 ); Gemv( ADJOINT, F(1), A22, a21, F(0), x12Adj ); // A22 := A22 - tau a21 x12 Ger( -tau, a21, x12Adj, A22 ); // Put beta back alpha21T(0) = beta; } }
void QP ( const Matrix<Real>& A, const Matrix<Real>& B, Matrix<Real>& X, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE const Int n = A.Width(); const Int k = B.Width(); Matrix<Real> Q, AHat, bHat, c; Herk( LOWER, ADJOINT, Real(1), A, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); Matrix<Real> y, z; for( Int j=0; j<k; ++j ) { auto x = X( ALL, IR(j) ); auto b = B( ALL, IR(j) ); Zeros( c, n, 1 ); Gemv( ADJOINT, Real(-1), A, b, Real(0), c ); El::QP( Q, AHat, bHat, c, x, y, z, ctrl ); } }
void PanelHouseholder ( Matrix<F>& A, Matrix<F>& householderScalars, Matrix<Base<F>>& signature ) { DEBUG_CSE typedef Base<F> Real; const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); householderScalars.Resize( minDim, 1 ); signature.Resize( minDim, 1 ); Matrix<F> z21; for( Int k=0; k<minDim; ++k ) { const Range<Int> ind1( k ), ind2( k+1, END ), indR( k, n ); F& alpha11 = A(k,k); auto a12 = A( ind1, ind2 ); auto a1R = A( ind1, indR ); auto A2R = A( ind2, indR ); // Find tau and v such that // |alpha11 a12| /I - tau |1 | |1 conj(v)|\ = |beta 0| // \ |v^T| / const F tau = RightReflector( alpha11, a12 ); householderScalars(k) = tau; // Temporarily set a1R = | 1 v | const F alpha = alpha11; alpha11 = 1; // A2R := A2R Hous(a1R^T,tau) // = A2R (I - tau a1R^T conj(a1R)) // = A2R - tau (A2R a1R^T) conj(a1R) Zeros( z21, A2R.Height(), 1 ); Gemv( NORMAL, F(1), A2R, a1R, F(0), z21 ); Ger( -tau, z21, a1R, A2R ); // Reset alpha11's value alpha11 = alpha; } // Form d and rescale L auto L = A( ALL, IR(0,minDim) ); GetRealPartOfDiagonal(L,signature); auto sgn = []( const Real& delta ) { return delta >= Real(0) ? Real(1) : Real(-1); }; EntrywiseMap( signature, function<Real(Real)>(sgn) ); DiagonalScaleTrapezoid( RIGHT, LOWER, NORMAL, signature, L ); }
inline void PanelQR( Matrix<Real>& A ) { #ifndef RELEASE PushCallStack("internal::PanelQR"); #endif Matrix<Real> ATL, ATR, A00, a01, A02, aLeftCol, ARightPan, ABL, ABR, a10, alpha11, a12, A20, a21, A22; Matrix<Real> z; PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); aLeftCol.View2x1( alpha11, a21 ); ARightPan.View2x1( a12, A22 ); Zeros( ARightPan.Width(), 1, z ); //--------------------------------------------------------------------// const Real tau = Reflector( alpha11, a21 ); const Real alpha = alpha11.Get(0,0); alpha11.Set(0,0,1); Gemv( TRANSPOSE, Real(1), ARightPan, aLeftCol, Real(0), z ); Ger( -tau, aLeftCol, z, ARightPan ); alpha11.Set(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void PanelLQ( Matrix<Real>& A ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); #endif Matrix<Real> ATL, ATR, A00, a01, A02, aTopRow, ABottomPan, ABL, ABR, a10, alpha11, a12, A20, a21, A22; Matrix<Real> z; PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); Zeros( ABottomPan.Height(), 1, z ); //--------------------------------------------------------------------// const Real tau = Reflector( alpha11, a12 ); const Real alpha = alpha11.Get(0,0); alpha11.Set(0,0,1); Gemv( NORMAL, Real(1), ABottomPan, aTopRow, Real(0), z ); Ger( -tau, z, aTopRow, ABottomPan ); alpha11.Set(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
void QP ( const ElementalMatrix<Real>& APre, const ElementalMatrix<Real>& BPre, ElementalMatrix<Real>& XPre, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE DistMatrixReadProxy<Real,Real,MC,MR> AProx( APre ), BProx( BPre ); DistMatrixWriteProxy<Real,Real,MC,MR> XProx( XPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& X = XProx.Get(); const Int n = A.Width(); const Int k = B.Width(); const Grid& g = A.Grid(); DistMatrix<Real> Q(g), AHat(g), bHat(g), c(g); Herk( LOWER, ADJOINT, Real(1), A, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); DistMatrix<Real> y(g), z(g); for( Int j=0; j<k; ++j ) { auto x = X( ALL, IR(j) ); auto b = B( ALL, IR(j) ); Zeros( c, n, 1 ); Gemv( ADJOINT, Real(-1), A, b, Real(0), c ); El::QP( Q, AHat, bHat, c, x, y, z, ctrl ); } }
void Covariance( const Matrix<F>& D, Matrix<F>& S ) { DEBUG_CSE const Int numObs = D.Height(); const Int n = D.Width(); // Compute the average column Matrix<F> ones, xMean; Ones( ones, numObs, 1 ); Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean ); // Subtract the mean from each column of D Matrix<F> DDev( D ); for( Int i=0; i<numObs; ++i ) blas::Axpy ( n, F(-1), xMean.LockedBuffer(), 1, DDev.Buffer(i,0), DDev.LDim() ); // Form S := 1/(numObs-1) DDev DDev' Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S ); Conjugate( S ); MakeHermitian( LOWER, S ); }
void Covariance ( const ElementalMatrix<F>& DPre, ElementalMatrix<F>& SPre ) { DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> DProx( DPre ); DistMatrixWriteProxy<F,F,MC,MR> SProx( SPre ); auto& D = DProx.GetLocked(); auto& S = SProx.Get(); const Grid& g = D.Grid(); const Int numObs = D.Height(); // Compute the average column DistMatrix<F> ones(g), xMean(g); Ones( ones, numObs, 1 ); Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean ); DistMatrix<F,MR,STAR> xMean_MR(g); xMean_MR.AlignWith( D ); xMean_MR = xMean; // Subtract the mean from each column of D DistMatrix<F> DDev( D ); for( Int iLoc=0; iLoc<DDev.LocalHeight(); ++iLoc ) blas::Axpy ( DDev.LocalWidth(), F(-1), xMean_MR.LockedBuffer(), 1, DDev.Buffer(iLoc,0), DDev.LDim() ); // Form S := 1/(numObs-1) DDev DDev' Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S ); Conjugate( S ); MakeHermitian( LOWER, S ); }
inline void TrsvUN( UnitOrNonUnit diag, const DistMatrix<F>& U, DistMatrix<F>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvUN"); if( U.Grid() != x.Grid() ) throw std::logic_error("{U,x} must be distributed over the same grid"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( U.Width() != xLength ) throw std::logic_error("Nonconformal TrsvUN"); #endif const Grid& g = U.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F> U01(g), U11(g); DistMatrix<F> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MR, STAR> x1_MR_STAR(g); DistMatrix<F,MC, STAR> z_MC_STAR(g); // Views of z[MC,* ], which will store updates to x DistMatrix<F,MC,STAR> z0_MC_STAR(g), z1_MC_STAR(g); z_MC_STAR.AlignWith( U ); Zeros( x.Height(), 1, z_MC_STAR ); // Start the algorithm PartitionUp ( x, xT, xB, 0 ); while( xT.Height() > 0 ) { RepartitionUp ( xT, x0, x1, /**/ /**/ xB, x2 ); const int n0 = x0.Height(); const int n1 = x1.Height(); LockedView( U01, U, 0, n0, n0, n1 ); LockedView( U11, U, n0, n0, n1, n1 ); View( z0_MC_STAR, z_MC_STAR, 0, 0, n0, 1 ); View( z1_MC_STAR, z_MC_STAR, n0, 0, n1, 1 ); x1_MR_STAR.AlignWith( U01 ); //----------------------------------------------------------------// if( x2.Height() != 0 ) x1.SumScatterUpdate( F(1), z1_MC_STAR ); x1_STAR_STAR = x1; U11_STAR_STAR = U11; Trsv ( UPPER, NORMAL, diag, U11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MR_STAR = x1_STAR_STAR; Gemv ( NORMAL, F(-1), U01.LockedLocalMatrix(), x1_MR_STAR.LockedLocalMatrix(), F(1), z0_MC_STAR.LocalMatrix() ); //----------------------------------------------------------------// x1_MR_STAR.FreeAlignments(); SlidePartitionUp ( xT, x0, /**/ /**/ x1, xB, x2 ); } } else { // Matrix views DistMatrix<F> U01(g), U11(g); DistMatrix<F> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MR > x1_STAR_MR(g); DistMatrix<F,MC, MR > z1(g); DistMatrix<F,MR, MC > z1_MR_MC(g); DistMatrix<F,STAR,MC > z_STAR_MC(g); // Views of z[* ,MC] DistMatrix<F,STAR,MC> z0_STAR_MC(g), z1_STAR_MC(g); z_STAR_MC.AlignWith( U ); Zeros( 1, x.Width(), z_STAR_MC ); // Start the algorithm PartitionLeft( x, xL, xR, 0 ); while( xL.Width() > 0 ) { RepartitionLeft ( xL, /**/ xR, x0, x1, /**/ x2 ); const int n0 = x0.Width(); const int n1 = x1.Width(); LockedView( U01, U, 0, n0, n0, n1 ); LockedView( U11, U, n0, n0, n1, n1 ); View( z0_STAR_MC, z_STAR_MC, 0, 0, 1, n0 ); View( z1_STAR_MC, z_STAR_MC, 0, n0, 1, n1 ); x1_STAR_MR.AlignWith( U01 ); z1.AlignWith( x1 ); //----------------------------------------------------------------// if( x2.Width() != 0 ) { z1_MR_MC.SumScatterFrom( z1_STAR_MC ); z1 = z1_MR_MC; Axpy( F(1), z1, x1 ); } x1_STAR_STAR = x1; U11_STAR_STAR = U11; Trsv ( UPPER, NORMAL, diag, U11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MR = x1_STAR_STAR; Gemv ( NORMAL, F(-1), U01.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), F(1), z0_STAR_MC.LocalMatrix() ); //----------------------------------------------------------------// x1_STAR_MR.FreeAlignments(); z1.FreeAlignments(); SlidePartitionLeft ( xL, /**/ xR, x0, /**/ x1, x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
bool LatticeCoordinates ( const Matrix<F>& B, const Matrix<F>& y, Matrix<F>& x ) { DEBUG_CSE typedef Base<F> Real; const Int m = B.Height(); const Int n = B.Width(); if( y.Height() != m || y.Width() != 1 ) LogicError("y should have been an ",m," x 1 vector"); if( FrobeniusNorm(y) == Real(0) ) { Zeros( x, n, 1 ); return true; } Matrix<F> BRed( B ); Matrix<F> UB, RB; auto infoB = LLL( BRed, UB, RB ); auto MB = BRed( ALL, IR(0,infoB.rank) ); Matrix<F> A; Zeros( A, m, infoB.rank+1 ); { auto AL = A( ALL, IR(0,infoB.rank) ); auto aR = A( ALL, IR(infoB.rank) ); AL = MB; aR = y; } // Reduce A in-place Matrix<F> UA, RA; auto infoA = LLL( A, UA, RA ); if( infoA.nullity != 1 ) return false; // Solve for x_M such that M_B x_M = y // NOTE: The last column of U_A should hold the coordinates of the single // member of the null-space of (the original) A Matrix<F> xM; xM = UA( IR(0,infoA.rank), IR(infoB.rank) ); const F gamma = UA(infoA.rank,infoB.rank); if( Abs(gamma) != Real(1) ) LogicError("Invalid member of null space"); else xM *= -Conj(gamma); // Map xM back to the original coordinates using the portion of the // unimodular transformation of B (U_B) which produced the image of B auto UBM = UB( ALL, IR(0,infoB.rank) ); Zeros( x, n, 1 ); Gemv( NORMAL, F(1), UBM, xM, F(0), x ); /* if( infoB.nullity != 0 ) { Matrix<F> C; Zeros( C, m, infoB.nullity+1 ); auto cL = C( ALL, IR(infoB.rank-1) ); auto CR = C( ALL, IR(infoB.rank,END) ); // Reduce the kernel of B CR = UB( ALL, IR(infoB.rank,END) ); LLL( CR ); // Attempt to reduce the (reduced) kernel out of the coordinates // TODO: Which column to grab from the result?!? cL = x; LLL( C ); x = cL; } */ return true; }
inline void PanelHouseholder( Matrix<F>& A, Matrix<F>& t ) { #ifndef RELEASE CallStackEntry entry("lq::PanelHouseholder"); if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 ) LogicError ("t must be a vector of height equal to the minimum dimension of A"); #endif Matrix<F> ATL, ATR, A00, a01, A02, aTopRow, ABottomPan, ABL, ABR, a10, alpha11, a12, A20, a21, A22; Matrix<F> tT, t0, tB, tau1, t2; Matrix<F> z, aTopRowConj; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2, 1 ); View1x2( aTopRow, alpha11, a12 ); View1x2( ABottomPan, a21, A22 ); //--------------------------------------------------------------------// // Compute the Householder reflector const F tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); // Apply the Householder reflector const F alpha = alpha11.Get(0,0); alpha11.Set(0,0,1); Conjugate( aTopRow, aTopRowConj ); Zeros( z, ABottomPan.Height(), 1 ); Gemv( NORMAL, F(1), ABottomPan, aTopRowConj, F(0), z ); Ger( -Conj(tau), z, aTopRowConj, ABottomPan ); alpha11.Set(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void internal::TrsvUN ( UnitOrNonUnit diag, const DistMatrix<F,MC,MR>& U, DistMatrix<F,MC,MR>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvUN"); if( U.Grid() != x.Grid() ) throw std::logic_error("{U,x} must be distributed over the same grid"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( U.Width() != xLength ) throw std::logic_error("Nonconformal TrsvUN"); #endif const Grid& g = U.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,MC,MR> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MR, STAR> x1_MR_STAR(g); DistMatrix<F,MC, STAR> z0_MC_STAR(g); // Start the algorithm LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( x, xT, xB, 0 ); while( xT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( xT, x0, x1, /**/ /**/ xB, x2 ); x1_MR_STAR.AlignWith( U01 ); z0_MC_STAR.AlignWith( U01 ); z0_MC_STAR.ResizeTo( x0.Height(), 1 ); //----------------------------------------------------------------// x1_STAR_STAR = x1; U11_STAR_STAR = U11; Trsv ( UPPER, NORMAL, diag, U11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MR_STAR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, U01.LockedLocalMatrix(), x1_MR_STAR.LockedLocalMatrix(), (F)0, z0_MC_STAR.LocalMatrix() ); x0.SumScatterUpdate( (F)1, z0_MC_STAR ); //----------------------------------------------------------------// x1_MR_STAR.FreeAlignments(); z0_MC_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( xT, x0, /**/ /**/ x1, xB, x2 ); } } else { // Matrix views DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,MC,MR> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MR > x1_STAR_MR(g); DistMatrix<F,STAR,MC > z0_STAR_MC(g); DistMatrix<F,MR, MC > z0_MR_MC(g); DistMatrix<F,MC, MR > z0(g); // Start the algorithm LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( x, xL, xR, 0 ); while( xL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( xL, /**/ xR, x0, x1, /**/ x2 ); x1_STAR_MR.AlignWith( U01 ); z0_STAR_MC.AlignWith( U01 ); z0.AlignWith( x0 ); z0_STAR_MC.ResizeTo( 1, x0.Width() ); //----------------------------------------------------------------// x1_STAR_STAR = x1; U11_STAR_STAR = U11; Trsv ( UPPER, NORMAL, diag, U11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, U01.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), (F)0, z0_STAR_MC.LocalMatrix() ); z0_MR_MC.SumScatterFrom( z0_STAR_MC ); z0 = z0_MR_MC; Axpy( (F)1, z0, x0 ); //----------------------------------------------------------------// x1_STAR_MR.FreeAlignments(); z0_STAR_MC.FreeAlignments(); z0.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( xL, /**/ xR, x0, /**/ x1, x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsvLT ( Orientation orientation, UnitOrNonUnit diag, const DistMatrix<F>& L, DistMatrix<F>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvLT"); if( L.Grid() != x.Grid() ) throw std::logic_error("{L,x} must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsvLT expects a (conjugate-)transpose option"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( L.Width() != xLength ) throw std::logic_error("Nonconformal TrsvLT"); #endif const Grid& g = L.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F> L10(g), L11(g); DistMatrix<F> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MC, STAR> x1_MC_STAR(g); DistMatrix<F,MC, MR > z1(g); DistMatrix<F,MR, MC > z1_MR_MC(g); DistMatrix<F,MR, STAR> z_MR_STAR(g); // Views of z[MR,* ] DistMatrix<F,MR,STAR> z0_MR_STAR(g), z1_MR_STAR(g); z_MR_STAR.AlignWith( L ); Zeros( x.Height(), 1, z_MR_STAR ); // Start the algorithm PartitionUp ( x, xT, xB, 0 ); while( xT.Height() > 0 ) { RepartitionUp ( xT, x0, x1, /**/ /**/ xB, x2 ); const int n0 = x0.Height(); const int n1 = x1.Height(); LockedView( L10, L, n0, 0, n1, n0 ); LockedView( L11, L, n0, n0, n1, n1 ); View( z0_MR_STAR, z_MR_STAR, 0, 0, n0, 1 ); View( z1_MR_STAR, z_MR_STAR, n0, 0, n1, 1 ); x1_MC_STAR.AlignWith( L10 ); z1.AlignWith( x1 ); //----------------------------------------------------------------// if( x2.Height() != 0 ) { z1_MR_MC.SumScatterFrom( z1_MR_STAR ); z1 = z1_MR_MC; Axpy( F(1), z1, x1 ); } x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, orientation, diag, L11_STAR_STAR.LockedMatrix(), x1_STAR_STAR.Matrix() ); x1 = x1_STAR_STAR; x1_MC_STAR = x1_STAR_STAR; Gemv ( orientation, F(-1), L10.LockedMatrix(), x1_MC_STAR.LockedMatrix(), F(1), z0_MR_STAR.Matrix() ); //----------------------------------------------------------------// x1_MC_STAR.FreeAlignments(); z1.FreeAlignments(); SlidePartitionUp ( xT, x0, /**/ /**/ x1, xB, x2 ); } } else { // Matrix views DistMatrix<F> L10(g), L11(g); DistMatrix<F> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MC > x1_STAR_MC(g); DistMatrix<F,STAR,MR > z_STAR_MR(g); // Views of z[* ,MR], which will store updates to x DistMatrix<F,STAR,MR> z0_STAR_MR(g), z1_STAR_MR(g); z_STAR_MR.AlignWith( L ); Zeros( 1, x.Width(), z_STAR_MR ); // Start the algorithm PartitionLeft( x, xL, xR, 0 ); while( xL.Width() > 0 ) { RepartitionLeft ( xL, /**/ xR, x0, x1, /**/ x2 ); const int n0 = x0.Width(); const int n1 = x1.Width(); LockedView( L10, L, n0, 0, n1, n0 ); LockedView( L11, L, n0, n0, n1, n1 ); View( z0_STAR_MR, z_STAR_MR, 0, 0, 1, n0 ); View( z1_STAR_MR, z_STAR_MR, 0, n0, 1, n1 ); x1_STAR_MC.AlignWith( L10 ); //----------------------------------------------------------------// if( x2.Width() != 0 ) x1.SumScatterUpdate( F(1), z1_STAR_MR ); x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, orientation, diag, L11_STAR_STAR.LockedMatrix(), x1_STAR_STAR.Matrix() ); x1 = x1_STAR_STAR; x1_STAR_MC = x1_STAR_STAR; Gemv ( orientation, F(-1), L10.LockedMatrix(), x1_STAR_MC.LockedMatrix(), F(1), z0_STAR_MR.Matrix() ); //----------------------------------------------------------------// x1_STAR_MC.FreeAlignments(); SlidePartitionLeft ( xL, /**/ xR, x0, /**/ x1, x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::TrsvLN ( UnitOrNonUnit diag, const DistMatrix<F,MC,MR>& L, DistMatrix<F,MC,MR>& x ) { #ifndef RELEASE PushCallStack("internal::TrsvLN"); if( L.Grid() != x.Grid() ) throw std::logic_error("{L,x} must be distributed over the same grid"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); if( L.Width() != xLength ) throw std::logic_error("Nonconformal TrsvLN"); #endif const Grid& g = L.Grid(); if( x.Width() == 1 ) { // Matrix views DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MC,MR> xT(g), x0(g), xB(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,MR, STAR> x1_MR_STAR(g); DistMatrix<F,MC, STAR> z2_MC_STAR(g); // Start the algorithm LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDown ( x, xT, xB, 0 ); while( xB.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDown ( xT, x0, /**/ /**/ x1, xB, x2 ); x1_MR_STAR.AlignWith( L21 ); z2_MC_STAR.AlignWith( L21 ); z2_MC_STAR.ResizeTo( x2.Height(), 1 ); //----------------------------------------------------------------// x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, NORMAL, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_MR_STAR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, L21.LockedLocalMatrix(), x1_MR_STAR.LockedLocalMatrix(), (F)0, z2_MC_STAR.LocalMatrix() ); x2.SumScatterUpdate( (F)1, z2_MC_STAR ); //----------------------------------------------------------------// x1_MR_STAR.FreeAlignments(); z2_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDown ( xT, x0, x1, /**/ /**/ xB, x2 ); } } else { // Matrix views DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,MC,MR> xL(g), xR(g), x0(g), x1(g), x2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> x1_STAR_STAR(g); DistMatrix<F,STAR,MR > x1_STAR_MR(g); DistMatrix<F,STAR,MC > z2_STAR_MC(g); DistMatrix<F,MR, MC > z2_MR_MC(g); DistMatrix<F,MC, MR > z2(g); // Start the algorithm LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( x, xL, xR, 0 ); while( xR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( xL, /**/ xR, x0, /**/ x1, x2 ); x1_STAR_MR.AlignWith( L21 ); z2_STAR_MC.AlignWith( L21 ); z2.AlignWith( x2 ); z2_STAR_MC.ResizeTo( 1, x2.Width() ); //----------------------------------------------------------------// x1_STAR_STAR = x1; L11_STAR_STAR = L11; Trsv ( LOWER, NORMAL, diag, L11_STAR_STAR.LockedLocalMatrix(), x1_STAR_STAR.LocalMatrix() ); x1 = x1_STAR_STAR; x1_STAR_MR = x1_STAR_STAR; Gemv ( NORMAL, (F)-1, L21.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), (F)0, z2_STAR_MC.LocalMatrix() ); z2_MR_MC.SumScatterFrom( z2_STAR_MC ); z2 = z2_MR_MC; Axpy( (F)1, z2, x2 ); //----------------------------------------------------------------// x1_STAR_MR.FreeAlignments(); z2_STAR_MC.FreeAlignments(); z2.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( xL, /**/ xR, x0, x1, /**/ x2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymvRowAccumulateU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T,STAR,MC>& x_STAR_MC, const DistMatrix<T,STAR,MR>& x_STAR_MR, DistMatrix<T,STAR,MC>& z_STAR_MC, DistMatrix<T,STAR,MR>& z_STAR_MR ) { #ifndef RELEASE PushCallStack("internal::LocalSymvRowAccumulateU"); if( A.Grid() != x_STAR_MC.Grid() || x_STAR_MC.Grid() != x_STAR_MR.Grid() || x_STAR_MR.Grid() != z_STAR_MC.Grid() || z_STAR_MC.Grid() != z_STAR_MR.Grid() ) throw std::logic_error ("{A,x,z} must be distributed over the same grid"); if( x_STAR_MC.Height() != 1 || x_STAR_MR.Height() != 1 || z_STAR_MC.Height() != 1 || z_STAR_MR.Height() != 1 ) throw std::logic_error("Expected x and z to be row vectors"); if( A.Height() != A.Width() || A.Height() != x_STAR_MC.Width() || A.Height() != x_STAR_MR.Width() || A.Height() != z_STAR_MC.Width() || A.Height() != z_STAR_MR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymvRowAccumulateU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x[* ,MC] ~ " << x_STAR_MC.Height() << " x " << x_STAR_MC.Width() << "\n" << " x[* ,MR] ~ " << x_STAR_MR.Height() << " x " << x_STAR_MR.Width() << "\n" << " z[* ,MC] ~ " << z_STAR_MC.Height() << " x " << z_STAR_MC.Width() << "\n" << " z[* ,MR] ~ " << z_STAR_MR.Height() << " x " << z_STAR_MR.Width() << "\n"; throw std::logic_error( msg.str() ); } if( x_STAR_MC.RowAlignment() != A.ColAlignment() || x_STAR_MR.RowAlignment() != A.RowAlignment() || z_STAR_MC.RowAlignment() != A.ColAlignment() || z_STAR_MR.RowAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> A11(g), A12(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> x1_STAR_MC(g); DistMatrix<T,STAR,MR> xL_STAR_MR(g), xR_STAR_MR(g), x0_STAR_MR(g), x1_STAR_MR(g), x2_STAR_MR(g); DistMatrix<T,STAR,MC> z1_STAR_MC(g); DistMatrix<T,STAR,MR> z1_STAR_MR(g), z2_STAR_MR(g); // We want our local gemvs to be of width blocksize, so we will // temporarily change to max(r,c) times the current blocksize const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() ); LockedPartitionRight( x_STAR_MR, xL_STAR_MR, xR_STAR_MR, 0 ); while( xL_STAR_MR.Width() < x_STAR_MR.Width() ) { LockedRepartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, /**/ x1_STAR_MR, x2_STAR_MR ); const int n0 = x0_STAR_MR.Width(); const int n1 = x1_STAR_MR.Width(); const int n2 = x2_STAR_MR.Width(); LockedView( A11, A, n0, n0, n1, n1 ); LockedView( A12, A, n0, n0+n1, n1, n2 ); LockedView( x1_STAR_MC, x_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MC, z_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MR, z_STAR_MR, 0, n0, 1, n1 ); View( z2_STAR_MR, z_STAR_MR, 0, n0+n1, 1, n2 ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// // TODO: These diagonal block updates can be greatly improved D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); Gemv ( NORMAL, alpha, D11.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); Gemv ( TRANSPOSE, alpha, D11.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z1_STAR_MR.LocalMatrix() ); Gemv ( NORMAL, alpha, A12.LockedLocalMatrix(), x2_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); Gemv ( TRANSPOSE, alpha, A12.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z2_STAR_MR.LocalMatrix() ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, x1_STAR_MR, /**/ x2_STAR_MR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
void Flush() { if( numQueued_ == 0 ) return; auto YActive = Y_( ALL, IR(0,numQueued_) ); Matrix<Base<Field>> colNorms; if( useTranspose_ ) { // TODO(poulson): Add this as an option /* Timer timer; timer.Start(); BatchTransposedSparseToCoordinates ( NTrans_, YActive, VCand_, blocksize_ ); const double transformTime = timer.Stop(); const double n = YActive.Height(); const double transformGflops = double(numQueued_)*n*n/(1.e9*transformTime); Output (numQueued_," transforms: ",timer.Stop()," seconds (", transformGflops," GFlop/s"); timer.Start(); colNorms = BatchTransposedCoordinatesToNorms ( d_, NTrans_, VCand_, insertionBound_ ); const double normTime = timer.Stop(); const double normGflops = double(numQueued_)*n*n/(1.e9*normTime); Output (numQueued_," norms: ",timer.Stop()," seconds (", normGflops," GFlop/s"); */ BatchTransposedSparseToCoordinates ( NTrans_, YActive, VCand_, blocksize_ ); colNorms = BatchTransposedCoordinatesToNorms ( d_, NTrans_, VCand_, insertionBound_ ); } else { BatchSparseToCoordinates( N_, YActive, VCand_, blocksize_ ); colNorms = BatchCoordinatesToNorms( d_, N_, VCand_, insertionBound_ ); } for( Int j=0; j<numQueued_; ++j ) { for( Int k=0; k<insertionBound_; ++k ) { const Base<Field> bNorm = colNorms(j,k); if( bNorm < normUpperBounds_(k) && bNorm != Base<Field>(0) ) { const Range<Int> subInd(k,END); auto y = YActive(subInd,IR(j)); auto vCand = VCand_(subInd,IR(j)); Output ("normUpperBound=",normUpperBounds_(k), ", bNorm=",bNorm,", k=",k); Print( y, "y" ); // Check that the reverse transformation holds Matrix<Field> yCheck; CoordinatesToSparse( N_(subInd,subInd), vCand, yCheck ); yCheck -= y; if( FrobeniusNorm(yCheck) != Base<Field>(0) ) { Print( B_(ALL,subInd), "B" ); Print( d_(subInd,ALL), "d" ); Print( N_(subInd,subInd), "N" ); Print( vCand, "vCand" ); Print( yCheck, "eCheck" ); LogicError("Invalid sparse transformation"); } Copy( vCand, v_ ); Print( v_, "v" ); Matrix<Field> b; Zeros( b, B_.Height(), 1 ); Gemv( NORMAL, Field(1), B_(ALL,subInd), v_, Field(0), b ); Print( b, "b" ); normUpperBounds_(k) = bNorm; foundVector_ = true; insertionBound_ = k+1; } // TODO(poulson): Keep track of 'stock' vectors? } } numQueued_ = 0; Zero( Y_ ); }
Int ADMM ( const Matrix<Real>& A, const Matrix<Real>& b, const Matrix<Real>& c, Matrix<Real>& z, const ADMMCtrl<Real>& ctrl ) { EL_DEBUG_CSE // Cache a custom partially-pivoted LU factorization of // | rho*I A^H | = | B11 B12 | // | A 0 | | B21 B22 | // by (justifiably) avoiding pivoting in the first n steps of // the factorization, so that // [I,rho*I] = lu(rho*I). // The factorization would then proceed with // B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho // B12 := L11^{-1} B12 = I A^H = A^H. // The Schur complement would then be // B22 := B22 - B21 B12 = 0 - (A*A^H)/rho. // We then factor said matrix with LU with partial pivoting and // swap the necessary rows of B21 in order to implicitly commute // the row pivots with the Gauss transforms in the manner standard // for GEPP. Unless A A' is singular, pivoting should not be needed, // as Cholesky factorization of the negative matrix should be valid. // // The result is the factorization // | I 0 | | rho*I A^H | = | I 0 | | rho*I U12 |, // | 0 P22 | | A 0 | | L21 L22 | | 0 U22 | // where [L22,U22] are stored within B22. Matrix<Real> U12, L21, B22, bPiv; Adjoint( A, U12 ); L21 = A; L21 *= 1/ctrl.rho; Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 ); MakeHermitian( LOWER, B22 ); // TODO: Replace with sparse-direct Cholesky version? Permutation P2; LU( B22, P2 ); P2.PermuteRows( L21 ); bPiv = b; P2.PermuteRows( bPiv ); // Possibly form the inverse of L22 U22 Matrix<Real> X22; if( ctrl.inv ) { X22 = B22; MakeTrapezoidal( LOWER, X22 ); FillDiagonal( X22, Real(1) ); TriangularInverse( LOWER, UNIT, X22 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 ); } Int numIter=0; const Int m = A.Height(); const Int n = A.Width(); Matrix<Real> g, xTmp, y, t; Zeros( g, m+n, 1 ); PartitionDown( g, xTmp, y, n ); Matrix<Real> x, u, zOld, xHat; Zeros( z, n, 1 ); Zeros( u, n, 1 ); Zeros( t, n, 1 ); while( numIter < ctrl.maxIter ) { zOld = z; // Find x from // | rho*I A^H | | x | = | rho*(z-u)-c | // | A 0 | | y | | b | // via our cached custom factorization: // // |x| = inv(U) inv(L) P' |rho*(z-u)-c| // |y| |b | // = |rho*I U12|^{-1} |I 0 | |I 0 | |rho*(z-u)-c| // = |0 U22| |L21 L22| |0 P22'| |b | // = " " |rho*(z-u)-c| // | P22' b | xTmp = z; xTmp -= u; xTmp *= ctrl.rho; xTmp -= c; y = bPiv; Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y ); if( ctrl.inv ) { Gemv( NORMAL, Real(1), X22, y, t ); y = t; } else { Trsv( LOWER, NORMAL, UNIT, B22, y ); Trsv( UPPER, NORMAL, NON_UNIT, B22, y ); } Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp ); xTmp *= 1/ctrl.rho; // xHat := alpha*x + (1-alpha)*zOld xHat = xTmp; xHat *= ctrl.alpha; Axpy( 1-ctrl.alpha, zOld, xHat ); // z := pos(xHat+u) z = xHat; z += u; LowerClip( z, Real(0) ); // u := u + (xHat-z) u += xHat; u -= z; const Real objective = Dot( c, xTmp ); // rNorm := || x - z ||_2 t = xTmp; t -= z; const Real rNorm = FrobeniusNorm( t ); // sNorm := |rho| || z - zOld ||_2 t = z; t -= zOld; const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t ); const Real epsPri = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z)); const Real epsDual = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u); if( ctrl.print ) { t = xTmp; LowerClip( t, Real(0) ); t -= xTmp; const Real clipDist = FrobeniusNorm( t ); cout << numIter << ": " << "||x-z||_2=" << rNorm << ", " << "epsPri=" << epsPri << ", " << "|rho| ||z-zOld||_2=" << sNorm << ", " << "epsDual=" << epsDual << ", " << "||x-Pos(x)||_2=" << clipDist << ", " << "c'x=" << objective << endl; } if( rNorm < epsPri && sNorm < epsDual ) break; ++numIter; } if( ctrl.maxIter == numIter ) cout << "ADMM failed to converge" << endl; x = xTmp; return numIter; }
inline void PanelLQ( DistMatrix<Real>& A ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<Real> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); // Temporary distributions DistMatrix<Real,STAR,MR> aTopRow_STAR_MR(g); DistMatrix<Real,MC,STAR> z_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); aTopRow_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); Zeros( ABottomPan.Height(), 1, z_MC_STAR ); //--------------------------------------------------------------------// const Real tau = Reflector( alpha11, a12 ); const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); Real alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } aTopRow_STAR_MR = aTopRow; Gemv ( NORMAL, Real(1), ABottomPan.LockedLocalMatrix(), aTopRow_STAR_MR.LockedLocalMatrix(), Real(0), z_MC_STAR.LocalMatrix() ); z_MC_STAR.SumOverRow(); Ger ( -tau, z_MC_STAR.LockedLocalMatrix(), aTopRow_STAR_MR.LockedLocalMatrix(), ABottomPan.LocalMatrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// aTopRow_STAR_MR.FreeAlignments(); z_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void PanelLQ ( DistMatrix<Complex<Real> >& A, DistMatrix<Complex<Real>,MD,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) throw std::logic_error("t must be aligned with A's main diagonal"); #endif typedef Complex<Real> C; const Grid& g = A.Grid(); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<C> aTopRowConj(g); DistMatrix<C,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<C,MC, STAR> z_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); Zeros( ABottomPan.Height(), 1, z_MC_STAR ); //--------------------------------------------------------------------// const C tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); C alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Gemv ( NORMAL, C(1), ABottomPan.LockedLocalMatrix(), aTopRowConj_STAR_MR.LockedLocalMatrix(), C(0), z_MC_STAR.LocalMatrix() ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedLocalMatrix(), aTopRowConj_STAR_MR.LockedLocalMatrix(), ABottomPan.LocalMatrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// aTopRowConj_STAR_MR.FreeAlignments(); z_MC_STAR.FreeAlignments(); SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void PanelLQ ( Matrix<Complex<Real> >& A, Matrix<Complex<Real> >& t ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error ("t must be a vector of height equal to the minimum dimension of A"); #endif typedef Complex<Real> C; Matrix<C> ATL, ATR, A00, a01, A02, aTopRow, ABottomPan, ABL, ABR, a10, alpha11, a12, A20, a21, A22; Matrix<C> tT, t0, tB, tau1, t2; Matrix<C> z, aTopRowConj; PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); Zeros( ABottomPan.Height(), 1, z ); //--------------------------------------------------------------------// const C tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); const C alpha = alpha11.Get(0,0); alpha11.Set(0,0,1); Conjugate( aTopRow, aTopRowConj ); Gemv( NORMAL, C(1), ABottomPan, aTopRowConj, C(0), z ); Ger( -Conj(tau), z, aTopRowConj, ABottomPan ); alpha11.Set(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }