void UN_C ( T alpha, const AbstractDistMatrix<T>& APre, AbstractDistMatrix<T>& CPre, bool conjugate=false ) { EL_DEBUG_CSE const Int r = APre.Width(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); for( Int k=0; k<r; k+=bsize ) { const Int nb = Min(bsize,r-k); auto A1 = A( ALL, IR(k,k+nb) ); A1_VR_STAR = A1_MC_STAR = A1; Transpose( A1_VR_STAR, A1Trans_STAR_MR, conjugate ); LocalTrrk( UPPER, alpha, A1_MC_STAR, A1Trans_STAR_MR, T(1), C ); } }
inline void SyrkLN ( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SyrkLN"); if( A.Grid() != C.Grid() ) throw std::logic_error ("A and C must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() ) { std::ostringstream msg; msg << "Nonconformal SyrkLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate ); LocalTrrk( LOWER, alpha, A1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Her2kLN ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::Her2kLN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Her2kLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Adj_STAR_MR(g); DistMatrix<T,STAR,MR > B1Adj_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Adj_STAR_MR.AlignWith( C ); B1Adj_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Adj_STAR_MR.AdjointFrom( A1_VR_STAR ); B1_VR_STAR = B1_MC_STAR = B1; B1Adj_STAR_MR.AdjointFrom( B1_VR_STAR ); LocalTrr2k ( LOWER, alpha, A1_MC_STAR, B1Adj_STAR_MR, B1_MC_STAR, A1Adj_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmTTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTB expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_STAR_MC ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Syr2kUN ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = C.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); DistMatrix<T,STAR,MR > B1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); B1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate ); B1_VR_STAR = B1_MC_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR, conjugate ); LocalTrr2k ( UPPER, alpha, A1_MC_STAR, B1Trans_STAR_MR, B1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } }