inline void HemmLUC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmLUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Adj_MR_STAR(g); B1Adj_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A11, A12 ); LockedView2x1 ( AColPan, A01, A11 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CAbove ); ARowPan_STAR_MC.AlignWith( CBelow ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( RIGHT, UPPER, 0, AColPan_MC_STAR ); MakeTrapezoidal( LEFT, UPPER, 1, ARowPan_STAR_MC ); B1Adj_MR_STAR.AdjointFrom( B1 ); LocalGemm ( NORMAL, ADJOINT, alpha, AColPan_MC_STAR, B1Adj_MR_STAR, T(1), CAbove ); LocalGemm ( ADJOINT, ADJOINT, alpha, ARowPan_STAR_MC, B1Adj_MR_STAR, T(1), CBelow ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HemmRUA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MR, STAR> B1Adj_MR_STAR(g); DistMatrix<T,VC, STAR> B1Adj_VC_STAR(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MC, STAR> Z1Adj_MC_STAR(g); DistMatrix<T,MR, STAR> Z1Adj_MR_STAR(g); DistMatrix<T,MR, MC > Z1Adj_MR_MC(g); DistMatrix<T> Z1Adj(g); B1Adj_MR_STAR.AlignWith( A ); B1Adj_VC_STAR.AlignWith( A ); B1_STAR_MC.AlignWith( A ); Z1Adj_MC_STAR.AlignWith( A ); Z1Adj_MR_STAR.AlignWith( A ); Matrix<T> Z1Local; Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CT.Height() < C.Height() ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Z1Adj_MR_MC.AlignWith( C1 ); Zeros( C1.Width(), C1.Height(), Z1Adj_MC_STAR ); Zeros( C1.Width(), C1.Height(), Z1Adj_MR_STAR ); //--------------------------------------------------------------------// B1Adj_MR_STAR.AdjointFrom( B1 ); B1Adj_VC_STAR = B1Adj_MR_STAR; B1_STAR_MC.AdjointFrom( B1Adj_VC_STAR ); LocalSymmetricAccumulateRU ( ADJOINT, alpha, A, B1_STAR_MC, B1Adj_MR_STAR, Z1Adj_MC_STAR, Z1Adj_MR_STAR ); Z1Adj.SumScatterFrom( Z1Adj_MC_STAR ); Z1Adj_MR_MC = Z1Adj; Z1Adj_MR_MC.SumScatterUpdate( T(1), Z1Adj_MR_STAR ); Adjoint( Z1Adj_MR_MC.LockedLocalMatrix(), Z1Local ); Axpy( T(1), Z1Local, C1.LocalMatrix() ); //--------------------------------------------------------------------// Z1Adj_MR_MC.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }