void BatchTransposedCoordinatesToSparse ( const Matrix<F>& NTrans, const Matrix<F>& V, Matrix<F>& Y ) { DEBUG_ONLY(CSE cse("svp::BatchTransposedCoordinatesToSparse")) Y = V; Trmm( LEFT, LOWER, TRANSPOSE, UNIT, F(1), NTrans, Y ); Round( Y ); }
void BatchTransposedCoordinatesToSparse ( const Matrix<Field>& NTrans, const Matrix<Field>& V, Matrix<Field>& Y ) { EL_DEBUG_CSE Y = V; Trmm( LEFT, LOWER, TRANSPOSE, UNIT, Field(1), NTrans, Y ); Round( Y ); }
void BatchCoordinatesToSparse ( const Matrix<F>& N, const Matrix<F>& V, Matrix<F>& Y ) { DEBUG_ONLY(CSE cse("svp::BatchCoordinatesToSparse")) Y = V; Trmm( LEFT, UPPER, NORMAL, UNIT, F(1), N, Y ); Round( Y ); }
void BatchCoordinatesToSparse ( const Matrix<Field>& N, const Matrix<Field>& V, Matrix<Field>& Y ) { EL_DEBUG_CSE Y = V; Trmm( LEFT, UPPER, NORMAL, UNIT, Field(1), N, Y ); Round( Y ); }
inline void TrdtrmmUVar1( Orientation orientation, Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TrtdrmmUVar1"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( orientation == NORMAL ) throw std::logic_error("Orientation must be (conjugate-)transpose"); #endif Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; Matrix<F> d1, S01; PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------/ U11.GetDiagonal( d1 ); S01 = U01; DiagonalSolve( LEFT, NORMAL, d1, U01, true ); Trrk( UPPER, NORMAL, orientation, F(1), U01, S01, F(1), U00 ); Trmm( RIGHT, UPPER, ADJOINT, UNIT, F(1), U11, U01 ); TrdtrmmUUnblocked( orientation, U11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmm ( LeftOrRight side, UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,STAR,STAR>& A, DistMatrix<T,BColDist,BRowDist>& B ) { #ifndef RELEASE CallStackEntry entry("LocalTrmm"); if( (side == LEFT && BColDist != STAR) || (side == RIGHT && BRowDist != STAR) ) LogicError ("Distribution of RHS must conform with that of triangle"); #endif Trmm ( side, uplo, orientation, diag, alpha, A.LockedMatrix(), B.Matrix() ); }
Matrix<Base<Field>> BatchTransposedCoordinatesToNorms ( const Matrix<Base<Field>>& d, const Matrix<Field>& NTrans, const Matrix<Field>& V, Int numNested=1 ) { EL_DEBUG_CSE Matrix<Field> Z( V ); // TODO(poulson): Decide whether this branch is necessary or not... if( V.Width() == 1 ) Trmv( LOWER, TRANSPOSE, UNIT, NTrans, Z ); else Trmm( LEFT, LOWER, TRANSPOSE, UNIT, Field(1), NTrans, Z ); DiagonalScale( LEFT, NORMAL, d, Z ); return NestedColumnTwoNorms( Z, numNested ); }
inline void TrdtrmmLVar1( Orientation orientation, Matrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TrdtrmmLVar1"); if( L.Height() != L.Width() ) LogicError("L must be square"); if( orientation == NORMAL ) LogicError("Orientation must be (conjugate-)transpose"); #endif Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; Matrix<F> d1, S10; PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() && LTL.Width() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------/ L11.GetDiagonal( d1 ); S10 = L10; DiagonalSolve( LEFT, NORMAL, d1, L10, true ); Trrk( LOWER, orientation, NORMAL, F(1), S10, L10, F(1), L00 ); Trmm( LEFT, LOWER, orientation, UNIT, F(1), L11, L10 ); TrdtrmmLUnblocked( orientation, L11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TrtrmmUVar1( Orientation orientation, Matrix<T>& U ) { #ifndef RELEASE PushCallStack("internal::TrtrmmUVar1"); #endif Matrix<T> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------/ Trrk( UPPER, NORMAL, orientation, T(1), U01, U01, T(1), U00 ); Trmm( RIGHT, UPPER, orientation, NON_UNIT, T(1), U11, U01 ); TrtrmmUUnblocked( orientation, U11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
void UVar1( Matrix<T>& U, bool conjugate=false ) { EL_DEBUG_CSE const Int n = U.Height(); const Int bsize = Blocksize(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U00 = U( ind0, ind0 ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); Trrk( UPPER, NORMAL, orientation, T(1), U01, U01, T(1), U00 ); Trmm( RIGHT, UPPER, orientation, NON_UNIT, T(1), U11, U01 ); trtrmm::UUnblocked( U11, conjugate ); } }
inline void TrtrmmLVar1( Orientation orientation, Matrix<T>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TrtrmmLVar1"); if( orientation == NORMAL ) LogicError("Must be (conjugate-)transposed"); #endif Matrix<T> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; PartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( LTL.Height() < L.Height() && LTL.Width() < L.Height() ) { RepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------/ Trrk( LOWER, orientation, NORMAL, T(1), L10, L10, T(1), L00 ); Trmm( LEFT, LOWER, orientation, NON_UNIT, T(1), L11, L10 ); TrtrmmLUnblocked( orientation, L11 ); //--------------------------------------------------------------------/ SlidePartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TwoSidedTrmmUVar5( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Temporary products Matrix<F> Y01; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// // Y01 := U01 A11 Zeros( A01.Height(), A01.Width(), Y01 ); Hemm( RIGHT, UPPER, F(1), A11, U01, F(0), Y01 ); // A01 := U00 A01 Trmm( LEFT, UPPER, NORMAL, diag, F(1), U00, A01 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01, A01 ); // A00 := A00 + (U01 A01' + A01 U01') Her2k( UPPER, NORMAL, F(1), U01, A01, F(1), A00 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01, A01 ); // A01 := A01 U11' Trmm( RIGHT, UPPER, ADJOINT, diag, F(1), U11, A01 ); // A11 := U11 A11 U11' TwoSidedTrmmUUnb( diag, A11, U11 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmUVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,MR, STAR> A01_MR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,MR, STAR> U01_MR_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VC, STAR> Y01_VC_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( A00 ); A01_MR_STAR.AlignWith( A00 ); A01_VC_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_MR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); Y01.AlignWith( A01 ); Y01_VC_STAR.AlignWith( A01 ); //--------------------------------------------------------------------// // Y01 := U01 A11 A11_STAR_STAR = A11; U01_VC_STAR = U01; Y01_VC_STAR.ResizeTo( A01.Height(), A01.Width() ); Hemm ( RIGHT, UPPER, F(1), A11_STAR_STAR.LocalMatrix(), U01_VC_STAR.LocalMatrix(), F(0), Y01_VC_STAR.LocalMatrix() ); Y01 = Y01_VC_STAR; // A01 := U00 A01 Trmm( LEFT, UPPER, NORMAL, diag, F(1), U00, A01 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01, A01 ); // A00 := A00 + (U01 A01' + A01 U01') A01_MC_STAR = A01; U01_MC_STAR = U01; A01_VC_STAR = A01_MC_STAR; A01_MR_STAR = A01_VC_STAR; U01_MR_STAR = U01_MC_STAR; LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(1), U01_MC_STAR, A01_MR_STAR, A01_MC_STAR, U01_MR_STAR, F(1), A00 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A01 := A01 U11' U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := U11 A11 U11' LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; //--------------------------------------------------------------------// A01_MC_STAR.FreeAlignments(); A01_MR_STAR.FreeAlignments(); A01_VC_STAR.FreeAlignments(); U01_MC_STAR.FreeAlignments(); U01_MR_STAR.FreeAlignments(); U01_VC_STAR.FreeAlignments(); Y01.FreeAlignments(); Y01_VC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmLVar4( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; // Temporary products Matrix<F> Y10; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------// // Y10 := A11 L10 Zeros( Y10, A10.Height(), A10.Width() ); Hemm( LEFT, LOWER, F(1), A11, L10, F(0), Y10 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10, A10 ); // A00 := A00 + (A10' L10 + L10' A10) Her2k( LOWER, ADJOINT, F(1), A10, L10, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10, A10 ); // A10 := L11' A10 Trmm( LEFT, LOWER, ADJOINT, diag, F(1), L11, A10 ); // A20 := A20 + A21 L10 Gemm( NORMAL, NORMAL, F(1), A21, L10, F(1), A20 ); // A11 := L11' A11 L11 TwoSidedTrmmLUnb( diag, A11, L11 ); // A21 := A21 L11 Trmm( RIGHT, LOWER, NORMAL, diag, F(1), L11, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void TwoSidedTrmmLVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); if( L.Height() != L.Width() ) throw std::logic_error( "Triangular matrices must be square." ); if( A.Height() != L.Height() ) throw std::logic_error( "A and L must be the same size." ); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; // Temporary products Matrix<F> Y21; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------// // A10 := L11' A10 Trmm( LEFT, LOWER, ADJOINT, diag, F(1), L11, A10 ); // A10 := A10 + L21' A20 Gemm( ADJOINT, NORMAL, F(1), L21, A20, F(1), A10 ); // Y21 := A22 L21 Zeros( A21.Height(), A21.Width(), Y21 ); Hemm( LEFT, LOWER, F(1), A22, L21, F(0), Y21 ); // A21 := A21 L11 Trmm( RIGHT, LOWER, NORMAL, diag, F(1), L11, A21 ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); // A11 := L11' A11 L11 TwoSidedTrmmLUnb( diag, A11, L11 ); // A11 := A11 + (A21' L21 + L21' A21) Her2k( LOWER, ADJOINT, F(1), A21, L21, F(1), A11 ); // A21 := A21 + 1/2 Y21 Axpy( F(1)/F(2), Y21, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmUVar1 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar1"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,MR, STAR> U12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U12Adj_VC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MR, MC > Z12Adj_MR_MC(g); DistMatrix<F,MC, STAR> Z12Adj_MC_STAR(g); DistMatrix<F,MR, STAR> Z12Adj_MR_STAR(g); DistMatrix<F> Z12Adj(g); DistMatrix<F> Y12(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_VR.AlignWith( A12 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A12 ); U12Adj_MR_STAR.AlignWith( A22 ); U12Adj_VC_STAR.AlignWith( A22 ); X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Y12.AlignWith( A12 ); Z12Adj.AlignWith( A12 ); Z12Adj_MR_MC.AlignWith( A12 ); Z12Adj_MC_STAR.AlignWith( A22 ); Z12Adj_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // Y12 := U12 A22 U12Adj_MR_STAR.AdjointFrom( U12 ); U12Adj_VC_STAR = U12Adj_MR_STAR; U12_STAR_MC.AdjointFrom( U12Adj_VC_STAR ); Z12Adj_MC_STAR.ResizeTo( A12.Width(), A12.Height() ); Z12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() ); Zero( Z12Adj_MC_STAR ); Zero( Z12Adj_MR_STAR ); LocalSymmetricAccumulateRU ( ADJOINT, F(1), A22, U12_STAR_MC, U12Adj_MR_STAR, Z12Adj_MC_STAR, Z12Adj_MR_STAR ); Z12Adj.SumScatterFrom( Z12Adj_MC_STAR ); Z12Adj_MR_MC = Z12Adj; Z12Adj_MR_MC.SumScatterUpdate( F(1), Z12Adj_MR_STAR ); Y12.ResizeTo( A12.Height(), A12.Width() ); Adjoint( Z12Adj_MR_MC.LockedLocalMatrix(), Y12.LocalMatrix() ); // A12 := U11 A12 A12_STAR_VR = A12; U11_STAR_STAR = U11; LocalTrmm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 + 1/2 Y12 Axpy( F(1)/F(2), Y12, A12 ); // A11 := U11 A11 U11' A11_STAR_STAR = A11; LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (U12 A12' + A12 U12') A12_STAR_VR = A12; U12_STAR_VR = U12; Her2k ( UPPER, NORMAL, F(1), A12_STAR_VR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), F(0), X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A12 := A12 + 1/2 Y12 Axpy( F(1)/F(2), Y12, A12 ); // A12 := A12 U22' Trmm( RIGHT, UPPER, ADJOINT, diag, F(1), U22, A12 ); //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); U12Adj_MR_STAR.FreeAlignments(); U12Adj_VC_STAR.FreeAlignments(); Y12.FreeAlignments(); Z12Adj.FreeAlignments(); Z12Adj_MR_MC.FreeAlignments(); Z12Adj_MC_STAR.FreeAlignments(); Z12Adj_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }