void L1DistanceMatrix(direction_t dirA, direction_t dirB, T alpha, const El::ElementalMatrix<T> &APre, const El::ElementalMatrix<T> &BPre, T beta, El::ElementalMatrix<T> &CPre) { if (dirA == base::COLUMNS && dirB == base::COLUMNS) { // Use a SUMMA-like routine, with C as stationary // Basically an adaptation of Elementals TN case for stationary C. const El::Int m = CPre.Height(); const El::Int n = CPre.Width(); const El::Int sumDim = BPre.Height(); const El::Int bsize = El::Blocksize(); const El::Grid& g = APre.Grid(); El::DistMatrixReadProxy<T, T, El::MC, El::MR> AProx(APre); El::DistMatrixReadProxy<T, T, El::MC, El::MR> BProx(BPre); El::DistMatrixReadWriteProxy<T, T, El::MC, El::MR> CProx(CPre); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions El::DistMatrix<T, El::STAR, El::MC> A1_STAR_MC(g); El::DistMatrix<T, El::STAR, El::MR> B1_STAR_MR(g); A1_STAR_MC.AlignWith(C); B1_STAR_MR.AlignWith(C); El::Scale(beta, C); for(El::Int k = 0; k < sumDim; k += bsize) { const El::Int nb = std::min(bsize,sumDim-k); auto A1 = A(El::IR(k,k+nb), El::IR(0,m)); auto B1 = B(El::IR(k,k+nb), El::IR(0,n)); A1_STAR_MC = A1; B1_STAR_MR = B1; L1DistanceMatrix(base::COLUMNS, base::COLUMNS, alpha, A1_STAR_MC.LockedMatrix(), B1_STAR_MR.LockedMatrix(), T(1.0), C.Matrix()); } } // TODO the rest of the cases. }
void SUMMA_TNC ( Orientation orientA, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { DEBUG_CSE const Int sumDim = BPre.Height(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); for( Int k=0; k<sumDim; k+=bsize ) { const Int nb = Min(bsize,sumDim-k); auto A1 = A( IR(k,k+nb), ALL ); auto B1 = B( IR(k,k+nb), ALL ); // C[MC,MR] += alpha (A1[*,MC])^T B1[*,MR] // = alpha (A1^T)[MC,*] B1[*,MR] A1_STAR_MC = A1; Transpose( B1, B1Trans_MR_STAR ); LocalGemm ( orientA, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, T(1), C ); } }
void LT_C ( T alpha, const AbstractDistMatrix<T>& APre, AbstractDistMatrix<T>& CPre, bool conjugate=false ) { EL_DEBUG_CSE const Int r = APre.Height(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); for( Int k=0; k<r; k+=bsize ) { const Int nb = Min(bsize,r-k); auto A1 = A( IR(k,k+nb), ALL ); Transpose( A1, A1Trans_MR_STAR ); Transpose( A1Trans_MR_STAR, A1_STAR_VR ); A1_STAR_MC = A1_STAR_VR; LocalTrrk ( LOWER, orientation, TRANSPOSE, alpha, A1_STAR_MC, A1Trans_MR_STAR, T(1), C ); } }
void SymmetricL1DistanceMatrix(El::UpperOrLower uplo, direction_t dir, T alpha, const El::ElementalMatrix<T> &APre, T beta, El::ElementalMatrix<T> &CPre) { if (dir == base::COLUMNS) { const El::Int r = APre.Height(); const El::Int bsize = El::Blocksize(); const El::Grid& g = APre.Grid(); El::DistMatrixReadProxy<T, T, El::MC, El::MR> AProx(APre); El::DistMatrixReadWriteProxy<T, T, El::MC, El::MR> CProx(CPre); auto& A = AProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions El::DistMatrix<T, El::STAR, El::MR> A1_STAR_MR(g); El::DistMatrix<T, El::STAR, El::MC> A1_STAR_MC(g); A1_STAR_MC.AlignWith(C); A1_STAR_MR.AlignWith(C); El::ScaleTrapezoid(beta, uplo, C); for(El::Int k = 0; k < r; k += bsize) { const El::Int nb = std::min(bsize, r - k); auto A1 = A(El::IR(k, k + nb), El::ALL); A1_STAR_MC = A1; A1_STAR_MR = A1; internal::L1DistanceMatrixTU(uplo, base::COLUMNS, base::COLUMNS, alpha, A1_STAR_MC, A1_STAR_MR, T(1.0), C); } } // TODO the rest of the cases. }
inline void Syr2kUT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( UPPER, orientation, TRANSPOSE, orientation, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
inline void GemmTTC ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTC expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( C ); B1_VR_STAR.AlignWith( C ); B1AdjOrTrans_STAR_MR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); // C[MC,MR] += alpha (A1[*,MC])^[T/H] (B1[MR,*])^[T/H] // = alpha (A1^[T/H])[MC,*] (B1^[T/H])[*,MR] LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNB ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ] LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR ); C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Trr2kTTTT ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, Orientation orientationOfC, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kTTTT"); if( E.Height() != E.Width() || A.Height() != C.Height() || A.Width() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Height() != E.Width() || A.Height() != B.Width() || C.Height() != D.Width() ) throw std::logic_error("Nonconformal Trr2kTTTT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AT.Height() < A.Height() ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionRight ( DL, /**/ DR, D0, /**/ D1, D2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; D1_VR_STAR = D1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, orientationOfA, orientationOfC, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HerkLC( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HerkLC"); if( A.Grid() != C.Grid() ) throw std::logic_error ("A and C must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() ) { std::ostringstream msg; msg << "Nonconformal HerkLC:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; LocalTrrk ( LOWER, ADJOINT, TRANSPOSE, alpha, A1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::GemmTNC ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNC assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T,MC,MR> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,STAR,MR> B1_STAR_MR(g); // Start the algorithm Scal( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); A1_STAR_MC.AlignWith( C ); B1_STAR_MR.AlignWith( C ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] B1_STAR_MR = B1; // B1[*,MR] <- B1[MC,MR] // C[MC,MR] += alpha (A1[*,MC])^T B1[*,MR] // = alpha (A1^T)[MC,*] B1[*,MR] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1_STAR_MR, (T)1, C ); //--------------------------------------------------------------------// A1_STAR_MC.FreeAlignments(); B1_STAR_MR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }