inline void HermitianTridiagU( Matrix<R>& A ) { #ifndef RELEASE PushCallStack("HermitianTridiagU"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); #endif // Matrix views Matrix<R> ATL, ATR, A00, a01, A02, a01T, ABL, ABR, a10, alpha11, a12, alpha01B, A20, a21, A22; // Temporary matrices Matrix<R> w01; PushBlocksizeStack( 1 ); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height()+1 < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); PartitionUp ( a01, a01T, alpha01B, 1 ); w01.ResizeTo( a01.Height(), 1 ); //--------------------------------------------------------------------// const R tau = Reflector( alpha01B, a01T ); const R epsilon1 = alpha01B.Get(0,0); alpha01B.Set(0,0,R(1)); Symv( UPPER, tau, A00, a01, R(0), w01 ); const R alpha = -tau*Dot( w01, a01 )/R(2); Axpy( alpha, a01, w01 ); Syr2( UPPER, R(-1), a01, w01, A00 ); alpha01B.Set(0,0,epsilon1); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void TriangularInverseUVar3( UnitOrNonUnit diag, Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TriangularInverseUVar3"); if( U.Height() != U.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif // Matrix views Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Start the algorithm PartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UBR.Height() < U.Height() ) { RepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); //--------------------------------------------------------------------// Trsm( RIGHT, UPPER, NORMAL, diag, F(-1), U11, U01 ); Gemm( NORMAL, NORMAL, F(1), U01, U12, F(1), U02 ); Trsm( LEFT, UPPER, NORMAL, diag, F(1), U11, U12 ); TriangularInverseUVar3Unb( diag, U11 ); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void UVar3( UnitOrNonUnit diag, DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("triangular_inverse::UVar3"); if( U.Height() != U.Width() ) LogicError("Nonsquare matrices cannot be triangular"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,MC > U01Trans_STAR_MC(g); DistMatrix<F,MR, STAR> U12Trans_MR_STAR(g); // Start the algorithm PartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UBR.Height() < U.Height() ) { RepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); U01Trans_STAR_MC.AlignWith( U02 ); U12Trans_MR_STAR.AlignWith( U02 ); //--------------------------------------------------------------------// U01_VC_STAR = U01; U11_STAR_STAR = U11; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(-1), U11_STAR_STAR, U01_VC_STAR ); // We transpose before the communication to avoid cache-thrashing // in the unpacking stage. U12Trans_MR_STAR.TransposeFrom( U12 ); U01Trans_STAR_MC.TransposeFrom( U01_VC_STAR ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(1), U01Trans_STAR_MC, U12Trans_MR_STAR, F(1), U02 ); U01.TransposeFrom( U01Trans_STAR_MC ); U12_STAR_VR.TransposeFrom( U12Trans_MR_STAR ); LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, U12_STAR_VR ); LocalTriangularInverse( UPPER, diag, U11_STAR_STAR ); U11 = U11_STAR_STAR; U12 = U12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); } }
inline void internal::HermitianTridiagU( DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagU"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); #endif const Grid& g = A.Grid(); if( g.InGrid() ) { // Matrix views DistMatrix<R,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R,MC, MR > WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A01_MC_STAR(g), A11_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A01_MR_STAR(g), A11_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W01_MC_STAR(g), W11_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W01_MR_STAR(g), W11_MR_STAR(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); if( A00.Height() > 0 ) { WPan.AlignWith( A01 ); APan_MC_STAR.AlignWith( A00 ); WPan_MC_STAR.AlignWith( A00 ); APan_MR_STAR.AlignWith( A00 ); WPan_MR_STAR.AlignWith( A00 ); //------------------------------------------------------------// WPan.ResizeTo( ATL.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); internal::HermitianPanelTridiagU ( ATL, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionUp ( APan_MC_STAR, A01_MC_STAR, A11_MC_STAR, A11.Height() ); PartitionUp ( APan_MR_STAR, A01_MR_STAR, A11_MR_STAR, A11.Height() ); PartitionUp ( WPan_MC_STAR, W01_MC_STAR, W11_MC_STAR, A11.Height() ); PartitionUp ( WPan_MR_STAR, W01_MR_STAR, W11_MR_STAR, A11.Height() ); internal::LocalTrr2k ( UPPER, TRANSPOSE, TRANSPOSE, (R)-1, A01_MC_STAR, W01_MR_STAR, W01_MC_STAR, A01_MR_STAR, (R)1, A00 ); //------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( UPPER, A11_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; } SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HermitianTridiagU ( DistMatrix<Complex<R>,MC, MR >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagU"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, 1 ); tDiag.ResizeTo( A.Height()-1, 1 ); if( g.InGrid() ) { // Matrix views DistMatrix<C,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C,MC, MR > WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A01_MC_STAR(g), A11_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A01_MR_STAR(g), A11_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W01_MC_STAR(g), W11_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W01_MR_STAR(g), W11_MR_STAR(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionUp ( tDiag, tT, tB, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); RepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2 ); if( A00.Height() > 0 ) { WPan.AlignWith( A01 ); APan_MC_STAR.AlignWith( A00 ); WPan_MC_STAR.AlignWith( A00 ); APan_MR_STAR.AlignWith( A00 ); WPan_MR_STAR.AlignWith( A00 ); //------------------------------------------------------------// WPan.ResizeTo( ATL.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); internal::HermitianPanelTridiagU ( ATL, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionUp ( APan_MC_STAR, A01_MC_STAR, A11_MC_STAR, A11.Height() ); PartitionUp ( APan_MR_STAR, A01_MR_STAR, A11_MR_STAR, A11.Height() ); PartitionUp ( WPan_MC_STAR, W01_MC_STAR, W11_MC_STAR, A11.Height() ); PartitionUp ( WPan_MR_STAR, W01_MR_STAR, W11_MR_STAR, A11.Height() ); internal::LocalTrr2k ( UPPER, ADJOINT, ADJOINT, (C)-1, A01_MC_STAR, W01_MR_STAR, W01_MC_STAR, A01_MR_STAR, (C)1, A00 ); //------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( UPPER, A11_STAR_STAR.LocalMatrix(), t1_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; #ifndef RELEASE PopCallStack(); #endif }
inline void Inverse( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("Inverse"); if( A.Height() != A.Width() ) throw std::logic_error("Cannot invert non-square matrices"); #endif const Grid& g = A.Grid(); DistMatrix<int,VC,STAR> p( g ); LU( A, p ); TriangularInverse( UPPER, NON_UNIT, A ); // Solve inv(A) L = inv(U) for inv(A) DistMatrix<F> ATL(g), ATR(g), ABL(g), ABR(g); DistMatrix<F> A00(g), A01(g), A02(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> A1(g), A2(g); DistMatrix<F,VC, STAR> A1_VC_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21Trans_STAR_MR(g); DistMatrix<F,MC, STAR> Z1(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); View( A1, A, 0, A00.Width(), A.Height(), A01.Width() ); View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() ); L21_VR_STAR.AlignWith( A2 ); L21Trans_STAR_MR.AlignWith( A2 ); Z1.AlignWith( A01 ); Z1.ResizeTo( A.Height(), A01.Width() ); //--------------------------------------------------------------------// // Copy out L1 L11_STAR_STAR = A11; L21_VR_STAR = A21; L21Trans_STAR_MR.TransposeFrom( L21_VR_STAR ); // Zero the strictly lower triangular portion of A1 MakeTrapezoidal( LEFT, UPPER, 0, A11 ); Zero( A21 ); // Perform the lazy update of A1 internal::LocalGemm ( NORMAL, TRANSPOSE, F(-1), A2, L21Trans_STAR_MR, F(0), Z1 ); A1.SumScatterUpdate( F(1), Z1 ); // Solve against this diagonal block of L11 A1_VC_STAR = A1; internal::LocalTrsm ( RIGHT, LOWER, NORMAL, UNIT, F(1), L11_STAR_STAR, A1_VC_STAR ); A1 = A1_VC_STAR; //--------------------------------------------------------------------// Z1.FreeAlignments(); L21Trans_STAR_MR.FreeAlignments(); L21_VR_STAR.FreeAlignments(); SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /*******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } // inv(A) := inv(A) P ApplyInverseColumnPivots( A, p ); #ifndef RELEASE PopCallStack(); #endif }
inline void Inverse( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("Inverse"); if( A.Height() != A.Width() ) throw std::logic_error("Cannot invert non-square matrices"); #endif Matrix<int> p; LU( A, p ); TriangularInverse( UPPER, NON_UNIT, A ); // Solve inv(A) L = inv(U) for inv(A) Matrix<F> ATL, ATR, ABL, ABR; Matrix<F> A00, A01, A02, A10, A11, A12, A20, A21, A22; Matrix<F> A1, A2; Matrix<F> L11, L21; PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); View( A1, A, 0, A00.Width(), A.Height(), A01.Width() ); View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() ); //--------------------------------------------------------------------// // Copy out L1 L11 = A11; L21 = A21; // Zero the strictly lower triangular portion of A1 MakeTrapezoidal( LEFT, UPPER, 0, A11 ); Zero( A21 ); // Perform the lazy update of A1 Gemm( NORMAL, NORMAL, F(-1), A2, L21, F(1), A1 ); // Solve against this diagonal block of L11 Trsm( RIGHT, LOWER, NORMAL, UNIT, F(1), L11, A1 ); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /*******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } // inv(A) := inv(A) P ApplyInverseColumnPivots( A, p ); #ifndef RELEASE PopCallStack(); #endif }
inline void HermitianTridiagU ( Matrix<Complex<R> >& A, Matrix<Complex<R> >& t ) { #ifndef RELEASE PushCallStack("HermitianTridiagU"); #endif const int tHeight = std::max(A.Height()-1,0); #ifndef RELEASE if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() && (t.Height() != tHeight || t.Width() != 1) ) throw std::logic_error("t is of the wrong size"); #endif typedef Complex<R> C; if( !t.Viewing() ) t.ResizeTo( tHeight, 1 ); // Matrix views Matrix<C> ATL, ATR, A00, a01, A02, a01T, ABL, ABR, a10, alpha11, a12, alpha01B, A20, a21, A22; // Temporary matrices Matrix<C> w01; PushBlocksizeStack( 1 ); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height()+1 < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); PartitionUp ( a01, a01T, alpha01B, 1 ); w01.ResizeTo( a01.Height(), 1 ); //--------------------------------------------------------------------// const C tau = Reflector( alpha01B, a01T ); const R epsilon1 = alpha01B.GetRealPart(0,0); t.Set(t.Height()-1-A22.Height(),0,tau); alpha01B.Set(0,0,C(1)); Hemv( UPPER, tau, A00, a01, C(0), w01 ); const C alpha = -tau*Dot( w01, a01 )/C(2); Axpy( alpha, a01, w01 ); Her2( UPPER, C(-1), a01, w01, A00 ); alpha01B.Set(0,0,epsilon1); //--------------------------------------------------------------------// SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }