inline void SyrkLN ( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SyrkLN"); if( A.Grid() != C.Grid() ) throw std::logic_error ("A and C must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() ) { std::ostringstream msg; msg << "Nonconformal SyrkLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate ); LocalTrrk( LOWER, alpha, A1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HermitianFromEVD ( UpperOrLower uplo, Matrix<F>& A, const Matrix<BASE(F)>& w, const Matrix<F>& Z ) { #ifndef RELEASE CallStackEntry entry("HermitianFromEVD"); #endif typedef BASE(F) R; Matrix<F> ZL, ZR, Z0, Z1, Z2; Matrix<R> wT, w0, wB, w1, w2; Matrix<F> Z1Copy, Y1; A.ResizeTo( Z.Height(), Z.Height() ); if( uplo == LOWER ) MakeTrapezoidal( UPPER, A, 1 ); else MakeTrapezoidal( LOWER, A, -1 ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); //--------------------------------------------------------------------// Y1 = Z1Copy = Z1; DiagonalScale( RIGHT, NORMAL, w1, Y1 ); Trrk( uplo, NORMAL, ADJOINT, F(1), Z1Copy, Y1, F(1), A ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } }
inline void LocalTrmmAccumulateRLN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& L, const DistMatrix<T,STAR,MC >& X_STAR_MC, DistMatrix<T,MR, STAR>& ZAdjOrTrans_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateRLN"); if( L.Grid() != X_STAR_MC.Grid() || X_STAR_MC.Grid() != ZAdjOrTrans_MR_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != X_STAR_MC.Width() || L.Height() != ZAdjOrTrans_MR_STAR.Height() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateRLN: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X[* ,MC] ~ " << X_STAR_MC.Height() << " x " << X_STAR_MC.Width() << "\n" << " Z^H/T[MR,* ] ~ " << ZAdjOrTrans_MR_STAR.Height() << " x " << ZAdjOrTrans_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( X_STAR_MC.RowAlignment() != L.ColAlignment() || ZAdjOrTrans_MR_STAR.ColAlignment() != L.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> XL_STAR_MC(g), XR_STAR_MC(g), X0_STAR_MC(g), X1_STAR_MC(g), X2_STAR_MC(g); DistMatrix<T,MR,STAR> ZTAdjOrTrans_MR_STAR(g), Z0AdjOrTrans_MR_STAR(g), ZBAdjOrTrans_MR_STAR(g), Z1AdjOrTrans_MR_STAR(g), Z2AdjOrTrans_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionRight( X_STAR_MC, XL_STAR_MC, XR_STAR_MC, 0 ); PartitionDown ( ZAdjOrTrans_MR_STAR, ZTAdjOrTrans_MR_STAR, ZBAdjOrTrans_MR_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionRight ( XL_STAR_MC, /**/ XR_STAR_MC, X0_STAR_MC, /**/ X1_STAR_MC, X2_STAR_MC ); RepartitionDown ( ZTAdjOrTrans_MR_STAR, Z0AdjOrTrans_MR_STAR, /*********************/ /*********************/ Z1AdjOrTrans_MR_STAR, ZBAdjOrTrans_MR_STAR, Z2AdjOrTrans_MR_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTrapezoidal( LEFT, LOWER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); LocalGemm ( orientation, orientation, alpha, D11, X1_STAR_MC, T(1), Z1AdjOrTrans_MR_STAR ); LocalGemm ( orientation, orientation, alpha, L21, X2_STAR_MC, T(1), Z1AdjOrTrans_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionRight ( XL_STAR_MC, /**/ XR_STAR_MC, X0_STAR_MC, X1_STAR_MC, /**/ X2_STAR_MC ); SlidePartitionDown ( ZTAdjOrTrans_MR_STAR, Z0AdjOrTrans_MR_STAR, Z1AdjOrTrans_MR_STAR, /********************/ /********************/ ZBAdjOrTrans_MR_STAR, Z2AdjOrTrans_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void Her2kLN ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::Her2kLN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Her2kLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Adj_STAR_MR(g); DistMatrix<T,STAR,MR > B1Adj_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Adj_STAR_MR.AlignWith( C ); B1Adj_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Adj_STAR_MR.AdjointFrom( A1_VR_STAR ); B1_VR_STAR = B1_MC_STAR = B1; B1Adj_STAR_MR.AdjointFrom( B1_VR_STAR ); LocalTrr2k ( LOWER, alpha, A1_MC_STAR, B1Adj_STAR_MR, B1_MC_STAR, A1Adj_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateLLN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& L, const DistMatrix<T,STAR,MR >& XTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE CallStackEntry entry("internal::LocalTrmmAccumulateLLN"); if( L.Grid() != XTrans_STAR_MR.Grid() || XTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != XTrans_STAR_MR.Width() || L.Height() != Z_MC_STAR.Height() || XTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLN: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X^H/T[* ,MR] ~ " << XTrans_STAR_MR.Height() << " x " << XTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XTrans_STAR_MR.RowAlignment() != L.RowAlignment() || Z_MC_STAR.ColAlignment() != L.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MR> XLTrans_STAR_MR(g), XRTrans_STAR_MR(g), X0Trans_STAR_MR(g), X1Trans_STAR_MR(g), X2Trans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionRight ( XTrans_STAR_MR, XLTrans_STAR_MR, XRTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, /**/ X1Trans_STAR_MR, X2Trans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTriangular( LOWER, D11 ); if( diag == UNIT ) SetDiagonal( D11, T(1) ); LocalGemm ( NORMAL, orientation, alpha, D11, X1Trans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( NORMAL, orientation, alpha, L21, X1Trans_STAR_MR, T(1), Z2_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionRight ( XLTrans_STAR_MR, /**/ XRTrans_STAR_MR, X0Trans_STAR_MR, X1Trans_STAR_MR, /**/ X2Trans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); }
inline void GemmTTC ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTC expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( C ); B1_VR_STAR.AlignWith( C ); B1AdjOrTrans_STAR_MR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); // C[MC,MR] += alpha (A1[*,MC])^[T/H] (B1[MR,*])^[T/H] // = alpha (A1^[T/H])[MC,*] (B1^[T/H])[*,MR] LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNDot: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } B1_VC_STAR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } A1_STAR_VR.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T,MC,STAR> D1_MC_STAR(g); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); D1_MC_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Zeros( C1.Height(), C1.Width(), D1_MC_STAR ); //--------------------------------------------------------------------// B1_VR_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); // D1[MC,*] := alpha A[MC,MR] B1[MR,*] LocalGemm ( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, T(0), D1_MC_STAR ); // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows C1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::LocalTrmmAccumulateLUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MR >& XAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLUN"); if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() || XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != XAdjOrTrans_STAR_MR.Width() || U.Height() != Z_MC_STAR.Height() || XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x " << XAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() || Z_MC_STAR.ColAlignment() != U.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> D11(g); DistMatrix<T,STAR,MR> XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g), X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), X2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); internal::LocalGemm ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR, (T)1, Z1_MC_STAR ); internal::LocalGemm ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR, (T)1, Z0_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void internal::GemmTNA ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNA assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T,MC,MR> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,MR,STAR> D1_MR_STAR(g); DistMatrix<T,MR,MC > D1_MR_MC(g); DistMatrix<T,MC,MR > D1(g); // Start the algorithm Scal( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_MC_STAR.AlignWith( A ); D1_MR_STAR.AlignWith( A ); D1_MR_STAR.ResizeTo( C1.Height(), C1.Width() ); D1.AlignWith( C1 ); //--------------------------------------------------------------------// B1_MC_STAR = B1; // B1[MC,*] <- B1[MC,MR] // D1[MR,*] := alpha (A1[MC,MR])^T B1[MC,*] // = alpha (A1^T)[MR,MC] B1[MC,*] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A, B1_MC_STAR, (T)0, D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( (T)1, D1, C1 ); //--------------------------------------------------------------------// B1_MC_STAR.FreeAlignments(); D1_MR_STAR.FreeAlignments(); D1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::GemmTNB ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNB assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,STAR,MR> D1_STAR_MR(g); // Start the algorithm Scal( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_MC_STAR.AlignWith( B ); D1_STAR_MR.AlignWith( B ); D1_STAR_MR.ResizeTo( C1.Height(), C1.Width() ); //--------------------------------------------------------------------// A1_MC_STAR = A1; // A1[MC,*] <- A1[MC,MR] // D1[*,MR] := alpha (A1[MC,*])^T B[MC,MR] // = alpha (A1^T)[*,MC] B[MC,MR] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A1_MC_STAR, B, (T)0, D1_STAR_MR ); // C1[MC,MR] += scattered result of D1[*,MR] summed over grid cols C1.SumScatterUpdate( (T)1, D1_STAR_MR ); //--------------------------------------------------------------------// A1_MC_STAR.FreeAlignments(); D1_STAR_MR.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ReformHermitianMatrix ( UpperOrLower uplo, DistMatrix<R,MC,MR>& A, const DistMatrix<R,VR,STAR>& w, const DistMatrix<R,MC,MR>& Z, const RealFunctor& f ) { #ifndef RELEASE PushCallStack("hermitian_function::ReformHermitianMatrix"); #endif const Grid& g = A.Grid(); DistMatrix<R,MC,MR> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<R,MC, STAR> Z1_MC_STAR(g); DistMatrix<R,VR, STAR> Z1_VR_STAR(g); DistMatrix<R,STAR,MR > Z1Trans_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); if( uplo == LOWER ) MakeTrapezoidal( LEFT, UPPER, 1, A ); else MakeTrapezoidal( LEFT, LOWER, -1, A ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Trans_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; // Scale Z1[VR,* ] with the modified eigenvalues const int width = Z1_VR_STAR.Width(); const int localHeight = Z1_VR_STAR.LocalHeight(); for( int j=0; j<width; ++j ) { const R omega = f(w1_STAR_STAR.GetLocalEntry(j,0)); R* buffer = Z1_VR_STAR.LocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) buffer[iLocal] *= omega; } Z1Trans_STAR_MR.TransposeFrom( Z1_VR_STAR ); internal::LocalTrrk( uplo, (R)1, Z1_MC_STAR, Z1Trans_STAR_MR, (R)1, A ); //--------------------------------------------------------------------// Z1Trans_STAR_MR.FreeAlignments(); Z1_VR_STAR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ReformNormalMatrix ( DistMatrix<Complex<R>,MC,MR >& A, const DistMatrix<R, VR,STAR>& w, const DistMatrix<Complex<R>,MC,MR >& Z, const ComplexFunctor& f ) { #ifndef RELEASE PushCallStack("hermitian_function::ReformNormalMatrix"); #endif const Grid& g = A.Grid(); typedef Complex<R> C; DistMatrix<C,MC,MR> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<C,MC, STAR> Z1_MC_STAR(g); DistMatrix<C,VR, STAR> Z1_VR_STAR(g); DistMatrix<C,STAR,MR > Z1Adj_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); Zero( A ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Adj_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; // Scale Z1[VR,* ] with the modified eigenvalues const int width = Z1_VR_STAR.Width(); const int localHeight = Z1_VR_STAR.LocalHeight(); for( int j=0; j<width; ++j ) { const C conjOmega = Conj(f(w1_STAR_STAR.GetLocalEntry(j,0))); C* buffer = Z1_VR_STAR.LocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) buffer[iLocal] *= conjOmega; } Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR ); internal::LocalGemm ( NORMAL, NORMAL, (C)1, Z1_MC_STAR, Z1Adj_STAR_MR, (C)1, A ); //--------------------------------------------------------------------// Z1Adj_STAR_MR.FreeAlignments(); Z1_VR_STAR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HermitianFromEVD ( UpperOrLower uplo, DistMatrix<F>& A, const DistMatrix<BASE(F),VR,STAR>& w, const DistMatrix<F>& Z ) { #ifndef RELEASE CallStackEntry entry("HermitianFromEVD"); #endif const Grid& g = A.Grid(); typedef BASE(F) R; DistMatrix<F> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<F,MC, STAR> Z1_MC_STAR(g); DistMatrix<F,VR, STAR> Z1_VR_STAR(g); DistMatrix<F,STAR,MR > Z1Adj_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); A.ResizeTo( Z.Height(), Z.Height() ); if( uplo == LOWER ) MakeTrapezoidal( UPPER, A, 1 ); else MakeTrapezoidal( LOWER, A, -1 ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Adj_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; DiagonalScale( RIGHT, NORMAL, w1_STAR_STAR, Z1_VR_STAR ); Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR ); LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } }
inline void Trr2kTTTT ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, Orientation orientationOfC, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kTTTT"); if( E.Height() != E.Width() || A.Height() != C.Height() || A.Width() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Height() != E.Width() || A.Height() != B.Width() || C.Height() != D.Width() ) throw std::logic_error("Nonconformal Trr2kTTTT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AT.Height() < A.Height() ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionRight ( DL, /**/ DR, D0, /**/ D1, D2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; D1_VR_STAR = D1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, orientationOfA, orientationOfC, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
void Trr2kNTTN ( UpperOrLower uplo, Orientation orientationOfB, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE CallStackEntry entry("internal::Trr2kNTTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Width() != E.Width() || A.Width() != B.Width() || C.Height() != D.Height() ) LogicError("Nonconformal Trr2kNTTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); } }
inline void LocalSymvRowAccumulateU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T,STAR,MC>& x_STAR_MC, const DistMatrix<T,STAR,MR>& x_STAR_MR, DistMatrix<T,STAR,MC>& z_STAR_MC, DistMatrix<T,STAR,MR>& z_STAR_MR ) { #ifndef RELEASE PushCallStack("internal::LocalSymvRowAccumulateU"); if( A.Grid() != x_STAR_MC.Grid() || x_STAR_MC.Grid() != x_STAR_MR.Grid() || x_STAR_MR.Grid() != z_STAR_MC.Grid() || z_STAR_MC.Grid() != z_STAR_MR.Grid() ) throw std::logic_error ("{A,x,z} must be distributed over the same grid"); if( x_STAR_MC.Height() != 1 || x_STAR_MR.Height() != 1 || z_STAR_MC.Height() != 1 || z_STAR_MR.Height() != 1 ) throw std::logic_error("Expected x and z to be row vectors"); if( A.Height() != A.Width() || A.Height() != x_STAR_MC.Width() || A.Height() != x_STAR_MR.Width() || A.Height() != z_STAR_MC.Width() || A.Height() != z_STAR_MR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymvRowAccumulateU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x[* ,MC] ~ " << x_STAR_MC.Height() << " x " << x_STAR_MC.Width() << "\n" << " x[* ,MR] ~ " << x_STAR_MR.Height() << " x " << x_STAR_MR.Width() << "\n" << " z[* ,MC] ~ " << z_STAR_MC.Height() << " x " << z_STAR_MC.Width() << "\n" << " z[* ,MR] ~ " << z_STAR_MR.Height() << " x " << z_STAR_MR.Width() << "\n"; throw std::logic_error( msg.str() ); } if( x_STAR_MC.RowAlignment() != A.ColAlignment() || x_STAR_MR.RowAlignment() != A.RowAlignment() || z_STAR_MC.RowAlignment() != A.ColAlignment() || z_STAR_MR.RowAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> A11(g), A12(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> x1_STAR_MC(g); DistMatrix<T,STAR,MR> xL_STAR_MR(g), xR_STAR_MR(g), x0_STAR_MR(g), x1_STAR_MR(g), x2_STAR_MR(g); DistMatrix<T,STAR,MC> z1_STAR_MC(g); DistMatrix<T,STAR,MR> z1_STAR_MR(g), z2_STAR_MR(g); // We want our local gemvs to be of width blocksize, so we will // temporarily change to max(r,c) times the current blocksize const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() ); LockedPartitionRight( x_STAR_MR, xL_STAR_MR, xR_STAR_MR, 0 ); while( xL_STAR_MR.Width() < x_STAR_MR.Width() ) { LockedRepartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, /**/ x1_STAR_MR, x2_STAR_MR ); const int n0 = x0_STAR_MR.Width(); const int n1 = x1_STAR_MR.Width(); const int n2 = x2_STAR_MR.Width(); LockedView( A11, A, n0, n0, n1, n1 ); LockedView( A12, A, n0, n0+n1, n1, n2 ); LockedView( x1_STAR_MC, x_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MC, z_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MR, z_STAR_MR, 0, n0, 1, n1 ); View( z2_STAR_MR, z_STAR_MR, 0, n0+n1, 1, n2 ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// // TODO: These diagonal block updates can be greatly improved D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); Gemv ( NORMAL, alpha, D11.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); Gemv ( TRANSPOSE, alpha, D11.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z1_STAR_MR.LocalMatrix() ); Gemv ( NORMAL, alpha, A12.LockedLocalMatrix(), x2_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); Gemv ( TRANSPOSE, alpha, A12.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z2_STAR_MR.LocalMatrix() ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, x1_STAR_MR, /**/ x2_STAR_MR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; B1Trans_MR_STAR.TransposeFrom( B1 ); // C[MC,MR] += alpha A1[MC,*] (B1^T[MR,*])^T // = alpha A1[MC,*] B1[*,MR] LocalGemm ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionDown( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Syr2kUN ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = C.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); DistMatrix<T,STAR,MR > B1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); B1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate ); B1_VR_STAR = B1_MC_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR, conjugate ); LocalTrr2k ( UPPER, alpha, A1_MC_STAR, B1Trans_STAR_MR, B1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } }
inline void GemmTTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTB expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_STAR_MC ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HemmRUC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error("{A,B,C} must be distributed on the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g), CLeft(g), CRight(g); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> AColPan_VR_STAR(g); DistMatrix<T,STAR,MR > AColPanAdj_STAR_MR(g); DistMatrix<T,MR, STAR> ARowPanAdj_MR_STAR(g); B1_MC_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( CR.Width() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); ARowPan.LockedView1x2( A11, A12 ); AColPan.LockedView2x1 ( A01, A11 ); CLeft.View1x2( C0, C1 ); CRight.View1x2( C1, C2 ); AColPan_VR_STAR.AlignWith( CLeft ); AColPanAdj_STAR_MR.AlignWith( CLeft ); ARowPanAdj_MR_STAR.AlignWith( CRight ); //--------------------------------------------------------------------// B1_MC_STAR = B1; AColPan_VR_STAR = AColPan; AColPanAdj_STAR_MR.AdjointFrom( AColPan_VR_STAR ); ARowPanAdj_MR_STAR.AdjointFrom( ARowPan ); MakeTrapezoidal( LEFT, LOWER, 0, ARowPanAdj_MR_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, AColPanAdj_STAR_MR ); LocalGemm ( NORMAL, ADJOINT, alpha, B1_MC_STAR, ARowPanAdj_MR_STAR, T(1), CRight ); LocalGemm ( NORMAL, NORMAL, alpha, B1_MC_STAR, AColPanAdj_STAR_MR, T(1), CLeft ); //--------------------------------------------------------------------// AColPan_VR_STAR.FreeAlignments(); AColPanAdj_STAR_MR.FreeAlignments(); ARowPanAdj_MR_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymmetricAccumulateLU ( Orientation orientation, T alpha, const DistMatrix<T>& A, const DistMatrix<T,MC, STAR>& B_MC_STAR, const DistMatrix<T,STAR,MR >& BAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR, DistMatrix<T,MR, STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateLU"); if( A.Grid() != B_MC_STAR.Grid() || B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() || BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() || Z_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{A,B,Z} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_MC_STAR.Height() || A.Height() != BAdjOrTrans_STAR_MR.Width() || A.Height() != Z_MC_STAR.Height() || A.Height() != Z_MR_STAR.Height() || B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() || BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() || Z_MC_STAR.Width() != Z_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateLU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[MC,* ] ~ " << B_MC_STAR.Height() << " x " << B_MC_STAR.Width() << "\n" << " B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x " << BAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_MC_STAR.ColAlignment() != A.ColAlignment() || BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() || Z_MC_STAR.ColAlignment() != A.ColAlignment() || Z_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> BT_MC_STAR(g), B0_MC_STAR(g), BB_MC_STAR(g), B1_MC_STAR(g), B2_MC_STAR(g); DistMatrix<T,STAR,MR> BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g), B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g), B2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B_MC_STAR, BT_MC_STAR, BB_MC_STAR, 0 ); LockedPartitionRight ( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT_MC_STAR, B0_MC_STAR, /**********/ /**********/ B1_MC_STAR, BB_MC_STAR, B2_MC_STAR ); LockedRepartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR, B2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); LocalGemm ( NORMAL, orientation, alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( NORMAL, orientation, alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT_MC_STAR, B0_MC_STAR, B1_MC_STAR, /**********/ /**********/ BB_MC_STAR, B2_MC_STAR ); SlideLockedPartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void Trr2kNNTN ( UpperOrLower uplo, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Width() != E.Width() || D.Width() != E.Width() || A.Width() != B.Height() || C.Height() != D.Height() ) throw std::logic_error("Nonconformal Trr2kNNTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, TRANSPOSE, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateRUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MC >& X_STAR_MC, DistMatrix<T,MR, STAR>& ZTrans_MR_STAR ) { #ifndef RELEASE CallStackEntry entry("internal::LocalTrmmAccumulateRUN"); if( U.Grid() != X_STAR_MC.Grid() || X_STAR_MC.Grid() != ZTrans_MR_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != X_STAR_MC.Width() || U.Height() != ZTrans_MR_STAR.Height() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateRUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X[* ,MC] ~ " << X_STAR_MC.Height() << " x " << X_STAR_MC.Width() << "\n" << " Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x " << ZTrans_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( X_STAR_MC.RowAlignment() != U.ColAlignment() || ZTrans_MR_STAR.ColAlignment() != U.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> XL_STAR_MC(g), XR_STAR_MC(g), X0_STAR_MC(g), X1_STAR_MC(g), X2_STAR_MC(g); DistMatrix<T,MR,STAR> ZTTrans_MR_STAR(g), Z0Trans_MR_STAR(g), ZBTrans_MR_STAR(g), Z1Trans_MR_STAR(g), Z2Trans_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight( X_STAR_MC, XL_STAR_MC, XR_STAR_MC, 0 ); PartitionDown ( ZTrans_MR_STAR, ZTTrans_MR_STAR, ZBTrans_MR_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XL_STAR_MC, /**/ XR_STAR_MC, X0_STAR_MC, /**/ X1_STAR_MC, X2_STAR_MC ); RepartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, /***************/ /***************/ Z1Trans_MR_STAR, ZBTrans_MR_STAR, Z2Trans_MR_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTriangular( UPPER, D11 ); if( diag == UNIT ) SetDiagonal( D11, T(1) ); LocalGemm ( orientation, orientation, alpha, D11, X1_STAR_MC, T(1), Z1Trans_MR_STAR ); LocalGemm ( orientation, orientation, alpha, U01, X0_STAR_MC, T(1), Z1Trans_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XL_STAR_MC, /**/ XR_STAR_MC, X0_STAR_MC, X1_STAR_MC, /**/ X2_STAR_MC ); SlidePartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, Z1Trans_MR_STAR, /***************/ /***************/ ZBTrans_MR_STAR, Z2Trans_MR_STAR ); } PopBlocksizeStack(); }
inline void SymmLLA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T> Z1(g); DistMatrix<T,MC,STAR> Z1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); B1_MC_STAR.AlignWith( A ); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); Z1_MC_STAR.AlignWith( A ); Z1_MR_STAR.AlignWith( A ); Scale( beta, C ); LockedPartitionRight ( B, BL, BR, 0 ); PartitionRight ( C, CL, CR, 0 ); while( CL.Width() < C.Width() ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Z1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), Z1_MC_STAR ); Zeros( C1.Height(), C1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// B1_MC_STAR = B1; B1_VR_STAR = B1_MC_STAR; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); LocalSymmetricAccumulateLL ( TRANSPOSE, alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); Z1 = Z1_MR_MC; Z1.SumScatterUpdate( T(1), Z1_MC_STAR ); Axpy( T(1), Z1, C1 ); //--------------------------------------------------------------------// Z1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymmetricAccumulateRU ( Orientation orientation, T alpha, const DistMatrix<T,MC, MR >& A, const DistMatrix<T,STAR,MC >& B_STAR_MC, const DistMatrix<T,MR, STAR>& BTrans_MR_STAR, DistMatrix<T,MC, STAR>& ZTrans_MC_STAR, DistMatrix<T,MR, STAR>& ZTrans_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateRU"); if( A.Grid() != B_STAR_MC.Grid() || B_STAR_MC.Grid() != BTrans_MR_STAR.Grid() || BTrans_MR_STAR.Grid() != ZTrans_MC_STAR.Grid() || ZTrans_MC_STAR.Grid() != ZTrans_MR_STAR.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_STAR_MC.Width() || A.Height() != BTrans_MR_STAR.Height() || A.Height() != ZTrans_MC_STAR.Height() || A.Height() != ZTrans_MR_STAR.Height() || B_STAR_MC.Height() != BTrans_MR_STAR.Width() || BTrans_MR_STAR.Width() != ZTrans_MC_STAR.Width() || ZTrans_MC_STAR.Width() != ZTrans_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateRU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[* ,MC] ~ " << B_STAR_MC.Height() << " x " << B_STAR_MC.Width() << "\n" << " B^H/T[MR,* ] ~ " << BTrans_MR_STAR.Height() << " x " << BTrans_MR_STAR.Width() << "\n" << " Z^H/T[MC,* ] ~ " << ZTrans_MC_STAR.Height() << " x " << ZTrans_MC_STAR.Width() << "\n" << " Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x " << ZTrans_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_STAR_MC.RowAlignment() != A.ColAlignment() || BTrans_MR_STAR.ColAlignment() != A.RowAlignment() || ZTrans_MC_STAR.ColAlignment() != A.ColAlignment() || ZTrans_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> BL_STAR_MC(g), BR_STAR_MC(g), B0_STAR_MC(g), B1_STAR_MC(g), B2_STAR_MC(g); DistMatrix<T,MR,STAR> BTTrans_MR_STAR(g), B0Trans_MR_STAR(g), BBTrans_MR_STAR(g), B1Trans_MR_STAR(g), B2Trans_MR_STAR(g); DistMatrix<T,MC,STAR> ZTTrans_MC_STAR(g), Z0Trans_MC_STAR(g), ZBTrans_MC_STAR(g), Z1Trans_MC_STAR(g), Z2Trans_MC_STAR(g); DistMatrix<T,MR,STAR> ZBTrans_MR_STAR(g), Z0Trans_MR_STAR(g), ZTTrans_MR_STAR(g), Z1Trans_MR_STAR(g), Z2Trans_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B_STAR_MC, BL_STAR_MC, BR_STAR_MC, 0 ); LockedPartitionDown ( BTrans_MR_STAR, BTTrans_MR_STAR, BBTrans_MR_STAR, 0 ); PartitionDown ( ZTrans_MC_STAR, ZTTrans_MC_STAR, ZBTrans_MC_STAR, 0 ); PartitionDown ( ZTrans_MR_STAR, ZTTrans_MR_STAR, ZBTrans_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL_STAR_MC, /**/ BR_STAR_MC, B0_STAR_MC, /**/ B1_STAR_MC, B2_STAR_MC ); LockedRepartitionDown ( BTTrans_MR_STAR, B0Trans_MR_STAR, /***************/ /***************/ B1Trans_MR_STAR, BBTrans_MR_STAR, B2Trans_MR_STAR ); RepartitionDown ( ZTTrans_MC_STAR, Z0Trans_MC_STAR, /***************/ /***************/ Z1Trans_MC_STAR, ZBTrans_MC_STAR, Z2Trans_MC_STAR ); RepartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, /***************/ /***************/ Z1Trans_MR_STAR, ZBTrans_MR_STAR, Z2Trans_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTriangular( UPPER, D11 ); LocalGemm ( orientation, orientation, alpha, D11, B1_STAR_MC, T(1), Z1Trans_MR_STAR ); SetDiagonal( D11, T(0) ); LocalGemm ( NORMAL, NORMAL, alpha, D11, B1Trans_MR_STAR, T(1), Z1Trans_MC_STAR ); LocalGemm ( orientation, orientation, alpha, A12, B1_STAR_MC, T(1), Z2Trans_MR_STAR ); LocalGemm ( NORMAL, NORMAL, alpha, A12, B2Trans_MR_STAR, T(1), Z1Trans_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL_STAR_MC, /**/ BR_STAR_MC, B0_STAR_MC, B1_STAR_MC, /**/ B2_STAR_MC ); SlideLockedPartitionDown ( BTTrans_MR_STAR, B0Trans_MR_STAR, B1Trans_MR_STAR, /***************/ /***************/ BBTrans_MR_STAR, B2Trans_MR_STAR ); SlidePartitionDown ( ZTTrans_MC_STAR, Z0Trans_MC_STAR, Z1Trans_MC_STAR, /***************/ /***************/ ZBTrans_MC_STAR, Z2Trans_MC_STAR ); SlidePartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, Z1Trans_MR_STAR, /***************/ /***************/ ZBTrans_MR_STAR, Z2Trans_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }