inline void PanelHouseholder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("lq::PanelHouseholder"); if( A.Grid() != t.Grid() ) LogicError("{A,t} must be distributed over the same grid"); if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 ) LogicError ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) LogicError("t must be aligned with A's main diagonal"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<F> aTopRowConj(g); DistMatrix<F,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<F,MC, STAR> z_MC_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2, 1 ); View1x2( aTopRow, alpha11, a12 ); View1x2( ABottomPan, a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); //--------------------------------------------------------------------// // Compute the Householder reflector const F tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); // Apply the Householder reflector const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); F alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Zeros( z_MC_STAR, ABottomPan.Height(), 1 ); LocalGemv ( NORMAL, F(1), ABottomPan, aTopRowConj_STAR_MR, F(0), z_MC_STAR ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedMatrix(), aTopRowConj_STAR_MR.LockedMatrix(), ABottomPan.Matrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void internal::ApplyPackedReflectorsLLVF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R>,MC,MR >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R>,MC,MR >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLLVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < -H.Height() ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Height() != A.Height() ) throw std::logic_error ("Height of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag."); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); // Matrix views DistMatrix<C,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MC, STAR> HPan_MC_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); int HPanHeight = H11.Height() + H21.Height(); int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); HPan_MC_STAR.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); Z_STAR_MR.ResizeTo( HPan.Width(), AB.Width() ); SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Herk ( UPPER, ADJOINT, (C)1, HPan_VC_STAR.LockedLocalMatrix(), (C)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( ADJOINT, NORMAL, (C)1, HPan_MC_STAR, AB, (C)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (C)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (C)-1, HPan_MC_STAR, Z_STAR_MR, (C)1, AB ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RUVF ( Conjugation conjugation, Int offset, const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::RUVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) LogicError("{H,t,A} must be distributed over the same grid"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) LogicError("t must be aligned with H's 'offset' diagonal"); #endif const Grid& g = H.Grid(); DistMatrix<F> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<F> ALeft(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<F,VC, STAR> HPan_VC_STAR(g); DistMatrix<F,MR, STAR> HPan_MR_STAR(g); DistMatrix<F,STAR,STAR> t1_STAR_STAR(g); DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<F,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<F,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); LockedView2x1( HPan, H01, H11 ); View( ALeft, A, 0, 0, A.Height(), HPan.Height() ); HPan_MR_STAR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT ); SetDiagonal( HPanCopy, F(1), 0, RIGHT ); HPan_VC_STAR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() ); Herk ( UPPER, ADJOINT, F(1), HPan_VC_STAR.LockedMatrix(), F(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm( ADJOINT, ADJOINT, F(1), HPan_MR_STAR, ALeft, ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, ADJOINT, F(-1), ZAdj_STAR_MC, HPan_MR_STAR, F(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } }
inline void Householder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("qr::Householder"); if( A.Grid() != t.Grid() ) LogicError("{A,s} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); if( t.Viewing() ) { if( !t.AlignedWithDiagonal( A ) ) LogicError("t was not aligned with A"); } else { t.AlignWithDiagonal( A ); } t.ResizeTo( Min(A.Height(),A.Width()), 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ALeftPan(g), ARightPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); View2x1 ( ALeftPan, A11, A21 ); View2x1 ( ARightPan, A12, A22 ); //--------------------------------------------------------------------// PanelHouseholder( ALeftPan, t1 ); ApplyQ( LEFT, ADJOINT, ALeftPan, t1, ARightPan ); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void LQ( DistMatrix<Complex<R>,MC,MR >& A, DistMatrix<Complex<R>,MD,STAR>& t ) { #ifndef RELEASE PushCallStack("LQ"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); if( t.Viewing() ) { if( !t.AlignedWithDiagonal( A ) ) throw std::logic_error("t was not aligned with A"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error("t was not the appropriate shape"); } else { t.AlignWithDiagonal( A ); t.ResizeTo( std::min(A.Height(),A.Width()), 1 ); } // Matrix views DistMatrix<C,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ATopPan(g), ABottomPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); ATopPan.View1x2( A11, A12 ); ABottomPan.View1x2( A21, A22 ); //--------------------------------------------------------------------// internal::PanelLQ( ATopPan, t1 ); ApplyPackedReflectors ( RIGHT, UPPER, HORIZONTAL, FORWARD, CONJUGATED, 0, ATopPan, t1, ABottomPan ); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RLHF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLHF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ALeft(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,STAR,VR > HPan_STAR_VR(g); DistMatrix<C,STAR,MR > HPan_STAR_MR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<C,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; LockedView ( HPan, H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanHeight ); View( ALeft, A, 0, 0, A.Height(), HPanWidth ); HPan_STAR_MR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); Zeros( HPan.Height(), ALeft.Height(), ZAdj_STAR_MC ); Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, C(1) ); HPan_STAR_VR = HPanCopy; Herk ( UPPER, NORMAL, C(1), HPan_STAR_VR.LockedMatrix(), C(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MR = HPan_STAR_VR; LocalGemm ( NORMAL, ADJOINT, C(1), HPan_STAR_MR, ALeft, C(0), ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, NORMAL, C(-1), ZAdj_STAR_MC, HPan_STAR_MR, C(1), ALeft ); //--------------------------------------------------------------------// HPan_STAR_MR.FreeAlignments(); ZAdj_STAR_MC.FreeAlignments(); ZAdj_STAR_VC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LUHF ( Conjugation conjugation, Int offset, const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::LUHF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) LogicError("{H,t,A} must be distributed over the same grid"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) LogicError("t must be aligned with H's offset diagonal"); #endif const Grid& g = H.Grid(); DistMatrix<F> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<F> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<F,STAR,VR > HPan_STAR_VR(g); DistMatrix<F,STAR,MC > HPan_STAR_MC(g); DistMatrix<F,STAR,STAR> t1_STAR_STAR(g); DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<F,STAR,MR > Z_STAR_MR(g); DistMatrix<F,STAR,VR > Z_STAR_VR(g); LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2, H11.Height() ); LockedView1x2( HPan, H11, H12 ); HPan_STAR_MC.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTriangular( UPPER, HPanCopy ); SetDiagonal( HPanCopy, F(1) ); HPan_STAR_VR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Height(), HPan.Height() ); Herk ( LOWER, NORMAL, F(1), HPan_STAR_VR.LockedMatrix(), F(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm( NORMAL, NORMAL, F(1), HPan_STAR_MC, AB, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, F(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm( ADJOINT, NORMAL, F(-1), HPan_STAR_MC, Z_STAR_MR, F(1), AB ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } }
inline void ApplyPackedReflectorsLUHB ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUHB"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's offset diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ABottom(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,STAR,VR > HPan_STAR_VR(g); DistMatrix<C,STAR,MC > HPan_STAR_MC(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanWidth = H11.Width() + H12.Width(); const int HPanHeight = std::min( H11.Height(), std::max(HPanWidth-offset,0) ); const int leftover = A.Height()-HPanWidth; HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanHeight ); ABottom.View( A, leftover, 0, HPanWidth, A.Width() ); HPan_STAR_MC.AlignWith( ABottom ); Z_STAR_MR.AlignWith( ABottom ); Z_STAR_VR.AlignWith( ABottom ); Zeros( HPanHeight, ABottom.Width(), Z_STAR_MR ); Zeros( HPanHeight, HPanHeight, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, UPPER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Herk ( UPPER, NORMAL, C(1), HPan_STAR_VR.LockedLocalMatrix(), C(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MC = HPan_STAR_VR; LocalGemm ( NORMAL, NORMAL, C(1), HPan_STAR_MC, ABottom, C(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, C(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm ( ADJOINT, NORMAL, C(-1), HPan_STAR_MC, Z_STAR_MR, C(1), ABottom ); //--------------------------------------------------------------------// HPan_STAR_MC.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void PanelLQ ( DistMatrix<Complex<Real> >& A, DistMatrix<Complex<Real>,MD,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) throw std::logic_error("t must be aligned with A's main diagonal"); #endif typedef Complex<Real> C; const Grid& g = A.Grid(); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<C> aTopRowConj(g); DistMatrix<C,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<C,MC, STAR> z_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); Zeros( ABottomPan.Height(), 1, z_MC_STAR ); //--------------------------------------------------------------------// const C tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); C alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Gemv ( NORMAL, C(1), ABottomPan.LockedLocalMatrix(), aTopRowConj_STAR_MR.LockedLocalMatrix(), C(0), z_MC_STAR.LocalMatrix() ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedLocalMatrix(), aTopRowConj_STAR_MR.LockedLocalMatrix(), ABottomPan.LocalMatrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// aTopRowConj_STAR_MR.FreeAlignments(); z_MC_STAR.FreeAlignments(); SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }