inline void TrmmLLNA ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLNA"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNA: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); DistMatrix<T,VR, STAR> X1_VR_STAR(g); DistMatrix<T,STAR,MR > X1Trans_STAR_MR(g); DistMatrix<T,MC, STAR> Z1_MC_STAR(g); X1_VR_STAR.AlignWith( L ); X1Trans_STAR_MR.AlignWith( L ); Z1_MC_STAR.AlignWith( L ); PartitionRight( X, XL, XR, 0 ); while( XL.Width() < X.Width() ) { RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); Zeros( X1.Height(), X1.Width(), Z1_MC_STAR ); //--------------------------------------------------------------------// X1_VR_STAR = X1; X1Trans_STAR_MR.TransposeFrom( X1_VR_STAR ); LocalTrmmAccumulateLLN ( TRANSPOSE, diag, alpha, L, X1Trans_STAR_MR, Z1_MC_STAR ); X1.SumScatterFrom( Z1_MC_STAR ); //--------------------------------------------------------------------// SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
void HermitianFromEVD ( UpperOrLower uplo, AbstractDistMatrix<F>& APre, const AbstractDistMatrix<Base<F>>& wPre, const AbstractDistMatrix<F>& ZPre ) { DEBUG_CSE typedef Base<F> Real; DistMatrixWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixReadProxy<Real,Real,VR,STAR> wProx( wPre ); DistMatrixReadProxy<F,F,MC,MR> ZProx( ZPre ); auto& A = AProx.Get(); auto& w = wProx.GetLocked(); auto& Z = ZProx.GetLocked(); const Grid& g = A.Grid(); DistMatrix<F,MC, STAR> Z1_MC_STAR(g); DistMatrix<F,VR, STAR> Z1_VR_STAR(g); DistMatrix<F,STAR,MR > Z1Adj_STAR_MR(g); const Int m = Z.Height(); const Int n = Z.Width(); A.Resize( m, m ); if( uplo == LOWER ) MakeTrapezoidal( UPPER, A, 1 ); else MakeTrapezoidal( LOWER, A, -1 ); const Int bsize = Blocksize(); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); auto Z1 = Z( ALL, IR(k,k+nb) ); auto w1 = w( IR(k,k+nb), ALL ); Z1_MC_STAR.AlignWith( A ); Z1_MC_STAR = Z1; Z1_VR_STAR.AlignWith( A ); Z1_VR_STAR = Z1_MC_STAR; DiagonalScale( RIGHT, NORMAL, w1, Z1_VR_STAR ); Z1Adj_STAR_MR.AlignWith( A ); Adjoint( Z1_VR_STAR, Z1Adj_STAR_MR ); LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A ); } }
inline void SymmLLA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T> Z1(g); DistMatrix<T,MC,STAR> Z1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); B1_MC_STAR.AlignWith( A ); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); Z1_MC_STAR.AlignWith( A ); Z1_MR_STAR.AlignWith( A ); Scale( beta, C ); LockedPartitionRight ( B, BL, BR, 0 ); PartitionRight ( C, CL, CR, 0 ); while( CL.Width() < C.Width() ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Z1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), Z1_MC_STAR ); Zeros( C1.Height(), C1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// B1_MC_STAR = B1; B1_VR_STAR = B1_MC_STAR; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); LocalSymmetricAccumulateLL ( TRANSPOSE, alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); Z1 = Z1_MR_MC; Z1.SumScatterUpdate( T(1), Z1_MC_STAR ); Axpy( T(1), Z1, C1 ); //--------------------------------------------------------------------// Z1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateLLN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& L, const DistMatrix<T,STAR,MR >& XTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE CallStackEntry entry("internal::LocalTrmmAccumulateLLN"); if( L.Grid() != XTrans_STAR_MR.Grid() || XTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != XTrans_STAR_MR.Width() || L.Height() != Z_MC_STAR.Height() || XTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLN: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X^H/T[* ,MR] ~ " << XTrans_STAR_MR.Height() << " x " << XTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XTrans_STAR_MR.RowAlignment() != L.RowAlignment() || Z_MC_STAR.ColAlignment() != L.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MR> XLTrans_STAR_MR(g), XRTrans_STAR_MR(g), X0Trans_STAR_MR(g), X1Trans_STAR_MR(g), X2Trans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionRight ( XTrans_STAR_MR, XLTrans_STAR_MR, XRTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, /**/ X1Trans_STAR_MR, X2Trans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTriangular( LOWER, D11 ); if( diag == UNIT ) SetDiagonal( D11, T(1) ); LocalGemm ( NORMAL, orientation, alpha, D11, X1Trans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( NORMAL, orientation, alpha, L21, X1Trans_STAR_MR, T(1), Z2_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, X1Trans_STAR_MR, /**/ X2Trans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); }
inline void LocalSymmetricAccumulateLU ( Orientation orientation, T alpha, const DistMatrix<T>& A, const DistMatrix<T,MC, STAR>& B_MC_STAR, const DistMatrix<T,STAR,MR >& BAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR, DistMatrix<T,MR, STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateLU"); if( A.Grid() != B_MC_STAR.Grid() || B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() || BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() || Z_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{A,B,Z} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_MC_STAR.Height() || A.Height() != BAdjOrTrans_STAR_MR.Width() || A.Height() != Z_MC_STAR.Height() || A.Height() != Z_MR_STAR.Height() || B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() || BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() || Z_MC_STAR.Width() != Z_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateLU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[MC,* ] ~ " << B_MC_STAR.Height() << " x " << B_MC_STAR.Width() << "\n" << " B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x " << BAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_MC_STAR.ColAlignment() != A.ColAlignment() || BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() || Z_MC_STAR.ColAlignment() != A.ColAlignment() || Z_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> BT_MC_STAR(g), B0_MC_STAR(g), BB_MC_STAR(g), B1_MC_STAR(g), B2_MC_STAR(g); DistMatrix<T,STAR,MR> BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g), B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g), B2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B_MC_STAR, BT_MC_STAR, BB_MC_STAR, 0 ); LockedPartitionRight ( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT_MC_STAR, B0_MC_STAR, /**********/ /**********/ B1_MC_STAR, BB_MC_STAR, B2_MC_STAR ); LockedRepartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR, B2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); LocalGemm ( NORMAL, orientation, alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( NORMAL, orientation, alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT_MC_STAR, B0_MC_STAR, B1_MC_STAR, /**********/ /**********/ BB_MC_STAR, B2_MC_STAR ); SlideLockedPartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void internal::LocalTrmmAccumulateLUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MR >& XAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLUN"); if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() || XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != XAdjOrTrans_STAR_MR.Width() || U.Height() != Z_MC_STAR.Height() || XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x " << XAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() || Z_MC_STAR.ColAlignment() != U.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> D11(g); DistMatrix<T,STAR,MR> XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g), X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), X2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); internal::LocalGemm ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR, (T)1, Z1_MC_STAR ); internal::LocalGemm ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR, (T)1, Z0_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void ReformHermitianMatrix ( UpperOrLower uplo, DistMatrix<R,MC,MR>& A, const DistMatrix<R,VR,STAR>& w, const DistMatrix<R,MC,MR>& Z, const RealFunctor& f ) { #ifndef RELEASE PushCallStack("hermitian_function::ReformHermitianMatrix"); #endif const Grid& g = A.Grid(); DistMatrix<R,MC,MR> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<R,MC, STAR> Z1_MC_STAR(g); DistMatrix<R,VR, STAR> Z1_VR_STAR(g); DistMatrix<R,STAR,MR > Z1Trans_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); if( uplo == LOWER ) MakeTrapezoidal( LEFT, UPPER, 1, A ); else MakeTrapezoidal( LEFT, LOWER, -1, A ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Trans_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; // Scale Z1[VR,* ] with the modified eigenvalues const int width = Z1_VR_STAR.Width(); const int localHeight = Z1_VR_STAR.LocalHeight(); for( int j=0; j<width; ++j ) { const R omega = f(w1_STAR_STAR.GetLocalEntry(j,0)); R* buffer = Z1_VR_STAR.LocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) buffer[iLocal] *= omega; } Z1Trans_STAR_MR.TransposeFrom( Z1_VR_STAR ); internal::LocalTrrk( uplo, (R)1, Z1_MC_STAR, Z1Trans_STAR_MR, (R)1, A ); //--------------------------------------------------------------------// Z1Trans_STAR_MR.FreeAlignments(); Z1_VR_STAR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ReformNormalMatrix ( DistMatrix<Complex<R>,MC,MR >& A, const DistMatrix<R, VR,STAR>& w, const DistMatrix<Complex<R>,MC,MR >& Z, const ComplexFunctor& f ) { #ifndef RELEASE PushCallStack("hermitian_function::ReformNormalMatrix"); #endif const Grid& g = A.Grid(); typedef Complex<R> C; DistMatrix<C,MC,MR> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<C,MC, STAR> Z1_MC_STAR(g); DistMatrix<C,VR, STAR> Z1_VR_STAR(g); DistMatrix<C,STAR,MR > Z1Adj_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); Zero( A ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Adj_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; // Scale Z1[VR,* ] with the modified eigenvalues const int width = Z1_VR_STAR.Width(); const int localHeight = Z1_VR_STAR.LocalHeight(); for( int j=0; j<width; ++j ) { const C conjOmega = Conj(f(w1_STAR_STAR.GetLocalEntry(j,0))); C* buffer = Z1_VR_STAR.LocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) buffer[iLocal] *= conjOmega; } Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR ); internal::LocalGemm ( NORMAL, NORMAL, (C)1, Z1_MC_STAR, Z1Adj_STAR_MR, (C)1, A ); //--------------------------------------------------------------------// Z1Adj_STAR_MR.FreeAlignments(); Z1_VR_STAR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HermitianFromEVD ( UpperOrLower uplo, DistMatrix<F>& A, const DistMatrix<BASE(F),VR,STAR>& w, const DistMatrix<F>& Z ) { #ifndef RELEASE CallStackEntry entry("HermitianFromEVD"); #endif const Grid& g = A.Grid(); typedef BASE(F) R; DistMatrix<F> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<F,MC, STAR> Z1_MC_STAR(g); DistMatrix<F,VR, STAR> Z1_VR_STAR(g); DistMatrix<F,STAR,MR > Z1Adj_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); A.ResizeTo( Z.Height(), Z.Height() ); if( uplo == LOWER ) MakeTrapezoidal( UPPER, A, 1 ); else MakeTrapezoidal( LOWER, A, -1 ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Adj_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; DiagonalScale( RIGHT, NORMAL, w1_STAR_STAR, Z1_VR_STAR ); Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR ); LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } }