inline void RLVB ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLVB"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ARight(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MR, STAR> HPan_MR_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<C,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H11.Height() + H21.Height(); const int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); const int leftover = A.Width()-HPanHeight; LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanWidth ); View( ARight, A, 0, leftover, A.Height(), HPanHeight ); HPan_MR_STAR.AlignWith( ARight ); ZAdj_STAR_MC.AlignWith( ARight ); ZAdj_STAR_VC.AlignWith( ARight ); Zeros( HPan.Width(), ARight.Height(), ZAdj_STAR_MC ); Zeros( HPan.Width(), HPan.Width(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, C(1) ); HPan_VC_STAR = HPanCopy; Herk ( LOWER, ADJOINT, C(1), HPan_VC_STAR.LockedMatrix(), C(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm ( ADJOINT, ADJOINT, C(1), HPan_MR_STAR, ARight, C(0), ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, ADJOINT, C(-1), ZAdj_STAR_MC, HPan_MR_STAR, C(1), ARight ); //--------------------------------------------------------------------// HPan_MR_STAR.FreeAlignments(); ZAdj_STAR_MC.FreeAlignments(); ZAdj_STAR_VC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RUVF ( Conjugation conjugation, Int offset, const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry cse("apply_packed_reflectors::RUVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) LogicError("{H,t,A} must be distributed over the same grid"); // TODO: Proper dimension checks if( t.Height() != H.DiagonalLength(offset) ) LogicError("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) LogicError("t must be aligned with H's 'offset' diagonal"); #endif const Grid& g = H.Grid(); DistMatrix<F> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<F> ALeft(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<F,VC, STAR> HPan_VC_STAR(g); DistMatrix<F,MR, STAR> HPan_MR_STAR(g); DistMatrix<F,STAR,STAR> t1_STAR_STAR(g); DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<F,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<F,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionDownOffsetDiagonal ( offset, H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); LockedView2x1( HPan, H01, H11 ); View( ALeft, A, 0, 0, A.Height(), HPan.Height() ); HPan_MR_STAR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT ); SetDiagonal( HPanCopy, F(1), 0, RIGHT ); HPan_VC_STAR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() ); Herk ( UPPER, ADJOINT, F(1), HPan_VC_STAR.LockedMatrix(), F(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm( ADJOINT, ADJOINT, F(1), HPan_MR_STAR, ALeft, ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, ADJOINT, F(-1), ZAdj_STAR_MC, HPan_MR_STAR, F(1), ALeft ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } }
inline void RLVB ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("apply_packed_reflectors::RLVB"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ARight(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MR, STAR> HPan_MR_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MC > ZTrans_STAR_MC(g); DistMatrix<R,STAR,VC > ZTrans_STAR_VC(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H11.Height() + H21.Height(); const int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); const int leftover = A.Width()-HPanHeight; LockedView( HPan, H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); View( ARight, A, 0, leftover, A.Height(), HPanHeight ); HPan_MR_STAR.AlignWith( ARight ); ZTrans_STAR_MC.AlignWith( ARight ); ZTrans_STAR_VC.AlignWith( ARight ); Zeros( HPanWidth, ARight.Height(), ZTrans_STAR_MC ); Zeros( HPanWidth, HPanWidth, SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonal( LEFT, offset, HPanCopy, R(1) ); HPan_VC_STAR = HPanCopy; Syrk ( LOWER, TRANSPOSE, R(1), HPan_VC_STAR.LockedMatrix(), R(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm ( TRANSPOSE, TRANSPOSE, R(1), HPan_MR_STAR, ARight, R(0), ZTrans_STAR_MC ); ZTrans_STAR_VC.SumScatterFrom( ZTrans_STAR_MC ); LocalTrsm ( LEFT, LOWER, TRANSPOSE, NON_UNIT, R(1), SInv_STAR_STAR, ZTrans_STAR_VC ); ZTrans_STAR_MC = ZTrans_STAR_VC; LocalGemm ( TRANSPOSE, TRANSPOSE, R(-1), ZTrans_STAR_MC, HPan_MR_STAR, R(1), ARight ); //--------------------------------------------------------------------// HPan_MR_STAR.FreeAlignments(); ZTrans_STAR_MC.FreeAlignments(); ZTrans_STAR_VC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } #ifndef RELEASE PopCallStack(); #endif }