void Sylvester ( Int m, Matrix<F>& W, Matrix<F>& X, SignCtrl<Base<F>> ctrl ) { EL_DEBUG_CSE Sign( W, ctrl ); Matrix<F> WTL, WTR, WBL, WBR; PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, m ); // WTL and WBR should be the positive and negative identity, WBL should be // zero, and WTR should be -2 X X = WTR; X *= -F(1)/F(2); // TODO: Think of how to probe for checks on other quadrants. /* typedef Base<F> Real; UpdateDiagonal( WTL, F(-1) ); const Real errorWTL = FrobeniusNorm( WTL ); const Int n = W.Height() - m; UpdateDiagonal( WBR, F(1) ); const Real errorWBR = FrobeniusNorm( WBR ); const Real errorWBL = FrobeniusNorm( WBL ); */ }
void MakeExplicitlyHermitian( UpperOrLower uplo, DistMatrix<F,MC,MR>& A ) { const Grid& g = A.Grid(); DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> A11Adj(g); DistMatrix<F,MR,MC> A11_MR_MC(g); DistMatrix<F,MR,MC> A21_MR_MC(g); DistMatrix<F,MR,MC> A12_MR_MC(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A11Adj.AlignWith( A11 ); A11_MR_MC.AlignWith( A11 ); A12_MR_MC.AlignWith( A21 ); A21_MR_MC.AlignWith( A12 ); //--------------------------------------------------------------------// A11_MR_MC = A11; A11Adj.ResizeTo( A11.Height(), A11.Width() ); Adjoint( A11_MR_MC.LocalMatrix(), A11Adj.LocalMatrix() ); if( uplo == LOWER ) { MakeTrapezoidal( LEFT, UPPER, 1, A11Adj ); Axpy( (F)1, A11Adj, A11 ); A21_MR_MC = A21; Adjoint( A21_MR_MC.LocalMatrix(), A12.LocalMatrix() ); } else { MakeTrapezoidal( LEFT, LOWER, -1, A11Adj ); Axpy( (F)1, A11Adj, A11 ); A12_MR_MC = A12; Adjoint( A12_MR_MC.LocalMatrix(), A21.LocalMatrix() ); } //--------------------------------------------------------------------// A21_MR_MC.FreeAlignments(); A12_MR_MC.FreeAlignments(); A11_MR_MC.FreeAlignments(); A11Adj.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
void Sylvester ( Int m, ElementalMatrix<F>& WPre, ElementalMatrix<F>& X, SignCtrl<Base<F>> ctrl ) { EL_DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> WProx( WPre ); auto& W = WProx.Get(); const Grid& g = W.Grid(); Sign( W, ctrl ); DistMatrix<F> WTL(g), WTR(g), WBL(g), WBR(g); PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, m ); // WTL and WBR should be the positive and negative identity, WBL should be // zero, and WTR should be -2 X Copy( WTR, X ); X *= -F(1)/F(2); // TODO: Think of how to probe for checks on other quadrants. // Add UpdateDiagonal routine to avoid explicit identity Axpy? /* typedef Base<F> Real; UpdateDiagonal( WTL, F(-1) ); const Real errorWTL = FrobeniusNorm( WTL ); const Int n = W.Height() - m; UpdateDiagonal( WBR, F(1) ); const Real errorWBR = FrobeniusNorm( WBR ); const Real errorWBL = FrobeniusNorm( WBL ); */ }
inline int Lyapunov( const DistMatrix<F>& A, const DistMatrix<F>& C, DistMatrix<F>& X ) { #ifndef RELEASE CallStackEntry cse("Sylvester"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( C.Height() != A.Height() || C.Width() != A.Height() ) LogicError("C must conform with A"); if( A.Grid() != C.Grid() ) LogicError("A and C must have the same grid"); #endif const Grid& g = A.Grid(); const Int m = A.Height(); DistMatrix<F> W(g), WTL(g), WTR(g), WBL(g), WBR(g); Zeros( W, 2*m, 2*m ); PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, m ); WTL = A; Adjoint( A, WBR ); Scale( F(-1), WBR ); WTR = C; Scale( F(-1), WTR ); return Sylvester( m, W, X ); }
void Riccati ( ElementalMatrix<F>& WPre, ElementalMatrix<F>& X, SignCtrl<Base<F>> ctrl ) { DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> WProx( WPre ); auto& W = WProx.Get(); const Grid& g = W.Grid(); Sign( W, ctrl ); const Int n = W.Height()/2; DistMatrix<F> WTL(g), WTR(g), WBL(g), WBR(g); PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, n ); // (ML, MR) = sgn(W) - I ShiftDiagonal( W, F(-1) ); // Solve for X in ML X = -MR DistMatrix<F> ML(g), MR(g); PartitionRight( W, ML, MR, n ); MR *= -1; ls::Overwrite( NORMAL, ML, MR, X ); }
inline void Householder( Matrix<F>& A, Matrix<F>& t ) { #ifndef RELEASE CallStackEntry entry("lq::Householder"); #endif t.ResizeTo( Min(A.Height(),A.Width()), 1 ); // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ATopPan, ABottomPan, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> tT, t0, tB, t1, t2; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); View1x2( ATopPan, A11, A12 ); View1x2( ABottomPan, A21, A22 ); //--------------------------------------------------------------------// PanelHouseholder( ATopPan, t1 ); ApplyQ( RIGHT, ADJOINT, ATopPan, t1, ABottomPan ); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
void L( Matrix<F>& A, Matrix<F>& t ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::L"); if( A.Height() != A.Width() ) LogicError("A must be square"); #endif typedef BASE(F) R; const Int tHeight = Max(A.Height()-1,0); t.ResizeTo( tHeight, 1 ); // Matrix views Matrix<F> ATL, ATR, A00, a01, A02, alpha21T, ABL, ABR, a10, alpha11, a12, a21B, A20, a21, A22; // Temporary matrices Matrix<F> w21; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height()+1 < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); PartitionDown ( a21, alpha21T, a21B, 1 ); //--------------------------------------------------------------------// const F tau = Reflector( alpha21T, a21B ); const R epsilon1 = alpha21T.GetRealPart(0,0); t.Set(A00.Height(),0,tau); alpha21T.Set(0,0,F(1)); Zeros( w21, a21.Height(), 1 ); Hemv( LOWER, tau, A22, a21, F(0), w21 ); const F alpha = -tau*Dot( w21, a21 )/F(2); Axpy( alpha, a21, w21 ); Her2( LOWER, F(-1), a21, w21, A22 ); alpha21T.Set(0,0,epsilon1); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void LocalTrrkKernel ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T,STAR,MC >& A, const DistMatrix<T,MR, STAR>& B, T beta, DistMatrix<T,MC, MR >& C ) { #ifndef RELEASE PushCallStack("LocalTrrkKernel"); CheckInput( orientationOfA, orientationOfB, A, B, C ); #endif const Grid& g = C.Grid(); DistMatrix<T,STAR,MC> AL(g), AR(g); DistMatrix<T,MR,STAR> BT(g), BB(g); DistMatrix<T,MC,MR> CTL(g), CTR(g), CBL(g), CBR(g); DistMatrix<T,MC,MR> DTL(g), DBR(g); const int half = C.Height()/2; ScaleTrapezoid( beta, LEFT, uplo, 0, C ); LockedPartitionRight( A, AL, AR, half ); LockedPartitionDown ( B, BT, BB, half ); PartitionDownDiagonal ( C, CTL, CTR, CBL, CBR, half ); DTL.AlignWith( CTL ); DBR.AlignWith( CBR ); DTL.ResizeTo( CTL.Height(), CTL.Width() ); DBR.ResizeTo( CBR.Height(), CBR.Width() ); //------------------------------------------------------------------------// if( uplo == LOWER ) internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AR, BT, T(1), CBL ); else internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AL, BB, T(1), CTR ); internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AL, BT, T(0), DTL ); AxpyTriangle( uplo, T(1), DTL, CTL ); internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AR, BB, T(0), DBR ); AxpyTriangle( uplo, T(1), DBR, CBR ); //------------------------------------------------------------------------// #ifndef RELEASE PopCallStack(); #endif }
inline void HermitianTridiagL( Matrix<R>& A ) { #ifndef RELEASE PushCallStack("HermitianTridiagL"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); #endif // Matrix views Matrix<R> ATL, ATR, A00, a01, A02, alpha21T, ABL, ABR, a10, alpha11, a12, a21B, A20, a21, A22; // Temporary matrices Matrix<R> w21; PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height()+1 < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); PartitionDown ( a21, alpha21T, a21B, 1 ); w21.ResizeTo( a21.Height(), 1 ); //--------------------------------------------------------------------// const R tau = Reflector( alpha21T, a21B ); const R epsilon1 = alpha21T.Get(0,0); alpha21T.Set(0,0,R(1)); Symv( LOWER, tau, A22, a21, R(0), w21 ); const R alpha = -tau*Dot( w21, a21 )/R(2); Axpy( alpha, a21, w21 ); Syr2( LOWER, R(-1), a21, w21, A22 ); alpha21T.Set(0,0,epsilon1); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void RepartitionDownDiagonal ( DM& ATL, DM& ATR, DM& A00, DM& A01, DM& A02, DM& A10, DM& A11, DM& A12, DM& ABL, DM& ABR, DM& A20, DM& A21, DM& A22, Int bsize=Blocksize() ) { DEBUG_ONLY(CallStackEntry cse("RepartitionDownDiagonal")) View( A00, ATL ); PartitionDownDiagonal( ABR, A11, A12, A21, A22, bsize ); PartitionDown( ABL, A10, A20, A11.Height() ); PartitionRight( ATR, A01, A02, A11.Width() ); }
inline void TrrkNTKernel ( UpperOrLower uplo, Orientation orientationOfB, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { #ifndef RELEASE PushCallStack("TrrkNTKernel"); CheckInputNT( orientationOfB, A, B, C ); #endif Matrix<T> AT, AB; Matrix<T> BT, BB; Matrix<T> CTL, CTR, CBL, CBR; Matrix<T> DTL, DBR; const int half = C.Height()/2; ScaleTrapezoid( beta, LEFT, uplo, 0, C ); LockedPartitionDown ( A, AT, AB, half ); LockedPartitionDown ( B, BT, BB, half ); PartitionDownDiagonal ( C, CTL, CTR, CBL, CBR, half ); DTL.ResizeTo( CTL.Height(), CTL.Width() ); DBR.ResizeTo( CBR.Height(), CBR.Width() ); //------------------------------------------------------------------------// if( uplo == LOWER ) Gemm( NORMAL, orientationOfB, alpha, AB, BT, T(1), CBL ); else Gemm( NORMAL, orientationOfB, alpha, AT, BB, T(1), CTR ); Gemm( NORMAL, orientationOfB, alpha, AT, BT, T(0), DTL ); AxpyTriangle( uplo, T(1), DTL, CTL ); Gemm( NORMAL, orientationOfB, alpha, AB, BB, T(0), DBR ); AxpyTriangle( uplo, T(1), DBR, CBR ); //------------------------------------------------------------------------// #ifndef RELEASE PopCallStack(); #endif }
inline void CholeskyUVar2( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("hpd_inverse::CholeskyUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// Cholesky( UPPER, A11 ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11, A01 ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 ); Herk( UPPER, NORMAL, F(1), A01, F(1), A00 ); Gemm( NORMAL, NORMAL, F(-1), A01, A12, F(1), A02 ); Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 ); Trsm( RIGHT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A01 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, F(-1), A11, A12 ); TriangularInverse( UPPER, NON_UNIT, A11 ); Trtrmm( ADJOINT, UPPER, A11 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RepartitionDownDiagonal ( DM& ATL, DM& ATR, DM& A00, DM& A01, DM& A02, DM& A10, DM& A11, DM& A12, DM& ABL, DM& ABR, DM& A20, DM& A21, DM& A22, Int bsize ) { #ifndef RELEASE CallStackEntry cse("RepartitionDownDiagonal [DistMatrix]"); #endif View( A00, ATL ); PartitionDownDiagonal( ABR, A11, A12, A21, A22, bsize ); PartitionDown( ABL, A10, A20, A11.Height() ); PartitionRight( ATR, A01, A02, A11.Width() ); }
inline void TrdtrmmUVar1( Orientation orientation, Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TrtdrmmUVar1"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( orientation == NORMAL ) throw std::logic_error("Orientation must be (conjugate-)transpose"); #endif Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; Matrix<F> d1, S01; PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------/ U11.GetDiagonal( d1 ); S01 = U01; DiagonalSolve( LEFT, NORMAL, d1, U01, true ); Trrk( UPPER, NORMAL, orientation, F(1), U01, S01, F(1), U00 ); Trmm( RIGHT, UPPER, ADJOINT, UNIT, F(1), U11, U01 ); TrdtrmmUUnblocked( orientation, U11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void CholeskyLVar2( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar2"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// Herk( LOWER, NORMAL, F(-1), A10, F(1), A11 ); CholeskyLVar3Unb( A11 ); Gemm( NORMAL, ADJOINT, F(-1), A20, A10, F(1), A21 ); Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrdtrmmLVar1( Orientation orientation, Matrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TrdtrmmLVar1"); if( L.Height() != L.Width() ) LogicError("L must be square"); if( orientation == NORMAL ) LogicError("Orientation must be (conjugate-)transpose"); #endif Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; Matrix<F> d1, S10; PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() && LTL.Width() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------/ L11.GetDiagonal( d1 ); S10 = L10; DiagonalSolve( LEFT, NORMAL, d1, L10, true ); Trrk( LOWER, orientation, NORMAL, F(1), S10, L10, F(1), L00 ); Trmm( LEFT, LOWER, orientation, UNIT, F(1), L11, L10 ); TrdtrmmLUnblocked( orientation, L11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void LU( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("LU"); #endif // Matrix views Matrix<F> ATL, ATR, A00, a01, A02, alpha21T, ABL, ABR, a10, alpha11, a12, a21B, A20, a21, A22; PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); //--------------------------------------------------------------------// F alpha = alpha11.Get(0,0); if( alpha == static_cast<F>(0) ) throw SingularMatrixException(); Scal( static_cast<F>(1)/alpha, a21 ); Geru( (F)-1, a21, a12, A22 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
void Ricatti( Matrix<F>& W, Matrix<F>& X, SignCtrl<Base<F>> ctrl ) { DEBUG_ONLY(CallStackEntry cse("Ricatti")) Sign( W, ctrl ); const Int n = W.Height()/2; Matrix<F> WTL, WTR, WBL, WBR; PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, n ); // (ML, MR) = sgn(W) - I ShiftDiagonal( W, F(-1) ); // Solve for X in ML X = -MR Matrix<F> ML, MR; PartitionRight( W, ML, MR, n ); Scale( F(-1), MR ); ls::Overwrite( NORMAL, ML, MR, X ); }
inline void LU( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("LU"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// internal::LUUnb( A11 ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11, A21 ); Trsm( LEFT, LOWER, NORMAL, UNIT, F(1), A11, A12 ); Gemm( NORMAL, NORMAL, F(-1), A21, A12, F(1), A22 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
void Riccati ( Matrix<F>& W, Matrix<F>& X, SignCtrl<Base<F>> ctrl ) { DEBUG_CSE Sign( W, ctrl ); const Int n = W.Height()/2; Matrix<F> WTL, WTR, WBL, WBR; PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, n ); // (ML, MR) = sgn(W) - I ShiftDiagonal( W, F(-1) ); // Solve for X in ML X = -MR Matrix<F> ML, MR; PartitionRight( W, ML, MR, n ); MR *= -1; ls::Overwrite( NORMAL, ML, MR, X ); }
inline void UVar3( Matrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("cholesky::UVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// cholesky::UVar3Unb( A11 ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 ); Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline int Lyapunov( const Matrix<F>& A, const Matrix<F>& C, Matrix<F>& X ) { #ifndef RELEASE CallStackEntry cse("Lyapunov"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( C.Height() != A.Height() || C.Width() != A.Height() ) LogicError("C must conform with A"); #endif const Int m = A.Height(); Matrix<F> W, WTL, WTR, WBL, WBR; Zeros( W, 2*m, 2*m ); PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, m ); WTL = A; Adjoint( A, WBR ); Scale( F(-1), WBR ); WTR = C; Scale( F(-1), WTR ); return Sylvester( m, W, X ); }
inline void Ricatti ( Matrix<F>& W, Matrix<F>& X, SignCtrl<Base<F>> signCtrl=SignCtrl<Base<F>>() ) { DEBUG_ONLY(CallStackEntry cse("Ricatti")) Sign( W, signCtrl ); const Int n = W.Height()/2; Matrix<F> WTL, WTR, WBL, WBR; PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, n ); // (ML, MR) = sgn(W) - I UpdateDiagonal( W, F(-1) ); // Solve for X in ML X = -MR Matrix<F> ML, MR; PartitionRight( W, ML, MR, n ); Scale( F(-1), MR ); LeastSquares( NORMAL, ML, MR, X ); }
inline void TrtrmmUVar1( Orientation orientation, Matrix<T>& U ) { #ifndef RELEASE PushCallStack("internal::TrtrmmUVar1"); #endif Matrix<T> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------/ Trrk( UPPER, NORMAL, orientation, T(1), U01, U01, T(1), U00 ); Trmm( RIGHT, UPPER, orientation, NON_UNIT, T(1), U11, U01 ); TrtrmmUUnblocked( orientation, U11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void UnbFLAME( Matrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("lu::UnbFLAME"); #endif // Matrix views Matrix<F> ATL, ATR, A00, a01, A02, alpha21T, ABL, ABR, a10, alpha11, a12, a21B, A20, a21, A22; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); //--------------------------------------------------------------------// F alpha = alpha11.Get(0,0); if( alpha == F(0) ) throw SingularMatrixException(); Scale( 1/alpha, a21 ); Geru( F(-1), a21, a12, A22 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void TrtrmmLVar1( Orientation orientation, Matrix<T>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TrtrmmLVar1"); if( orientation == NORMAL ) LogicError("Must be (conjugate-)transposed"); #endif Matrix<T> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() && LTL.Width() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------/ Trrk( LOWER, orientation, NORMAL, T(1), L10, L10, T(1), L00 ); Trmm( LEFT, LOWER, orientation, NON_UNIT, T(1), L11, L10 ); TrtrmmLUnblocked( orientation, L11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void LVar2( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("cholesky::LVar2"); if( A.Height() != A.Width() ) LogicError("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10Adj_MR_STAR.AlignWith( A10 ); X11_MC_STAR.AlignWith( A10 ); X21_MC_STAR.AlignWith( A20 ); //--------------------------------------------------------------------// A10Adj_MR_STAR.AdjointFrom( A10 ); LocalGemm( NORMAL, NORMAL, F(1), A10, A10Adj_MR_STAR, X11_MC_STAR ); A11.SumScatterUpdate( F(-1), X11_MC_STAR ); A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; LocalGemm( NORMAL, NORMAL, F(1), A20, A10Adj_MR_STAR, X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
void LocalTrr2kKernel ( UpperOrLower uplo, Orientation orientA, Orientation orientB, Orientation orientC, Orientation orientD, T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B, T beta, const ElementalMatrix<T>& C, const ElementalMatrix<T>& D, ElementalMatrix<T>& E ) { DEBUG_CSE const bool transA = orientA != NORMAL; const bool transB = orientB != NORMAL; const bool transC = orientC != NORMAL; const bool transD = orientD != NORMAL; // TODO: Stringent distribution and alignment checks typedef ElementalMatrix<T> ADM; auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto FTL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto FBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); const Int half = E.Height() / 2; if( transA ) LockedPartitionRight( A, *A0, *A1, half ); else LockedPartitionDown( A, *A0, *A1, half ); if( transB ) LockedPartitionDown( B, *B0, *B1, half ); else LockedPartitionRight( B, *B0, *B1, half ); if( transC ) LockedPartitionRight( C, *C0, *C1, half ); else LockedPartitionDown( C, *C0, *C1, half ); if( transD ) LockedPartitionDown( D, *D0, *D1, half ); else LockedPartitionRight( D, *D0, *D1, half ); PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half ); if( uplo == LOWER ) { Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B0->LockedMatrix(), T(1), EBL->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D0->LockedMatrix(), T(1), EBL->Matrix() ); } else { Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B1->LockedMatrix(), T(1), ETR->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D1->LockedMatrix(), T(1), ETR->Matrix() ); } FTL->AlignWith( *ETL ); FTL->Resize( ETL->Height(), ETL->Width() ); Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B0->LockedMatrix(), T(0), FTL->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D0->LockedMatrix(), T(1), FTL->Matrix() ); AxpyTrapezoid( uplo, T(1), *FTL, *ETL ); FBR->AlignWith( *EBR ); FBR->Resize( EBR->Height(), EBR->Width() ); Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B1->LockedMatrix(), T(0), FBR->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D1->LockedMatrix(), T(1), FBR->Matrix() ); AxpyTrapezoid( uplo, T(1), *FBR, *EBR ); }
inline void HPDInverseLVar2( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::HPDInverseLVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A21Trans_STAR_MC(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10_STAR_VR.AlignWith( A00 ); A21_VC_STAR.AlignWith( A20 ); A10_STAR_MC.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A21Trans_STAR_MC.AlignWith( A20 ); A21_VR_STAR.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A10_STAR_VR = A10; LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A10_STAR_MC = A10_STAR_VR; A10_STAR_MR = A10_STAR_VR; LocalTrrk ( LOWER, ADJOINT, F(1), A10_STAR_MC, A10_STAR_MR, F(1), A00 ); A21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); LocalGemm ( TRANSPOSE, NORMAL, F(-1), A21Trans_STAR_MC, A10_STAR_MR, F(1), A20 ); A21_VR_STAR = A21_VC_STAR; A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); LocalTrrk ( LOWER, TRANSPOSE, F(-1), A21Trans_STAR_MC, A21Adj_STAR_MR, F(1), A22 ); LocalTrsm ( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR ); LocalTrsm ( RIGHT, LOWER, NORMAL, NON_UNIT, F(-1), A11_STAR_STAR, A21_VC_STAR ); LocalTriangularInverse( LOWER, NON_UNIT, A11_STAR_STAR ); LocalTrtrmm( ADJOINT, LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A10 = A10_STAR_VR; A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A21Trans_STAR_MC.FreeAlignments(); A21_VR_STAR.FreeAlignments(); A21Adj_STAR_MR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar1( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar1"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Temporary products Matrix<F> Y01; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// // Y01 := A00 U01 Zeros( Y01, A01.Height(), A01.Width() ); Hemm( LEFT, UPPER, F(1), A00, U01, F(0), Y01 ); // A01 := inv(U00)' A01 Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A11 := A11 - (U01' A01 + A01' U01) Her2k( UPPER, ADJOINT, F(-1), U01, A01, F(1), A11 ); // A11 := inv(U11)' A11 inv(U11) TwoSidedTrsmUUnb( diag, A11, U11 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A01 := A01 inv(U11) Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U11, A01 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }