void graphics::NgoiLang(QPainter& painter,int x,int y,int c,int r) { QPoint A(x-r/2,y+c); QPoint B(x+r/2,y+c); QPoint C(x+r/2,y+c/3); QPoint D(x,y); QPoint E(x-r/2,y+c/3); QPolygon poly1; poly1 << D << E << A << B << C; painter.drawPolygon(poly1); // ve cai cua QPoint A1(x,y+c); QPoint B1(x,y+2*c/3); QPoint C1(x-r/4,y+2*c/3); QPoint D1(x-r/4,y+c); QPolygon poly2; poly2 << A1 << B1 << C1 << D1; painter.drawPolyline(poly2); // ve cua so QPoint A11(x-r/4,y+c/6); QPoint B11(x-r/4,y); QPoint C11(x-r/8,y); QPoint D11(x-r/8,y+c/12); QPolygon poly21; poly21 << A11 << B11 << C11 << D11; painter.drawPolygon(poly21); painter.drawRect(x+r/4,y+c/2.5,c/10,r/10); }
inline void LocalTrmmAccumulateLLT ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, const DistMatrix<T,MC,STAR>& X_MC_STAR, DistMatrix<T,MR,STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLLT"); if( L.Grid() != X_MC_STAR.Grid() || X_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != X_MC_STAR.Height() || L.Height() != Z_MR_STAR.Height() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLT: " << "\n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X[MC,* ] ~ " << X_MC_STAR.Height() << " x " << X_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( X_MC_STAR.ColAlignment() != L.ColAlignment() || Z_MR_STAR.ColAlignment() != L.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> XT_MC_STAR(g), X0_MC_STAR(g), XB_MC_STAR(g), X1_MC_STAR(g), X2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionDown ( X_MC_STAR, XT_MC_STAR, XB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionDown ( XT_MC_STAR, X0_MC_STAR, /**********/ /**********/ X1_MC_STAR, XB_MC_STAR, X2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTrapezoidal( LEFT, LOWER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, X1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( orientation, NORMAL, alpha, L21, X2_MC_STAR, T(1), Z1_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionDown ( XT_MC_STAR, X0_MC_STAR, X1_MC_STAR, /**********/ /**********/ XB_MC_STAR, X2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymmetricAccumulateLU ( Orientation orientation, T alpha, const DistMatrix<T>& A, const DistMatrix<T,MC, STAR>& B_MC_STAR, const DistMatrix<T,STAR,MR >& BAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR, DistMatrix<T,MR, STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateLU"); if( A.Grid() != B_MC_STAR.Grid() || B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() || BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() || Z_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{A,B,Z} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_MC_STAR.Height() || A.Height() != BAdjOrTrans_STAR_MR.Width() || A.Height() != Z_MC_STAR.Height() || A.Height() != Z_MR_STAR.Height() || B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() || BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() || Z_MC_STAR.Width() != Z_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateLU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[MC,* ] ~ " << B_MC_STAR.Height() << " x " << B_MC_STAR.Width() << "\n" << " B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x " << BAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_MC_STAR.ColAlignment() != A.ColAlignment() || BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() || Z_MC_STAR.ColAlignment() != A.ColAlignment() || Z_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> BT_MC_STAR(g), B0_MC_STAR(g), BB_MC_STAR(g), B1_MC_STAR(g), B2_MC_STAR(g); DistMatrix<T,STAR,MR> BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g), B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g), B2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B_MC_STAR, BT_MC_STAR, BB_MC_STAR, 0 ); LockedPartitionRight ( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT_MC_STAR, B0_MC_STAR, /**********/ /**********/ B1_MC_STAR, BB_MC_STAR, B2_MC_STAR ); LockedRepartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR, B2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); LocalGemm ( NORMAL, orientation, alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( NORMAL, orientation, alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT_MC_STAR, B0_MC_STAR, B1_MC_STAR, /**********/ /**********/ BB_MC_STAR, B2_MC_STAR ); SlideLockedPartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateRUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MC >& X_STAR_MC, DistMatrix<T,MR, STAR>& ZTrans_MR_STAR ) { #ifndef RELEASE CallStackEntry entry("internal::LocalTrmmAccumulateRUN"); if( U.Grid() != X_STAR_MC.Grid() || X_STAR_MC.Grid() != ZTrans_MR_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != X_STAR_MC.Width() || U.Height() != ZTrans_MR_STAR.Height() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateRUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X[* ,MC] ~ " << X_STAR_MC.Height() << " x " << X_STAR_MC.Width() << "\n" << " Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x " << ZTrans_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( X_STAR_MC.RowAlignment() != U.ColAlignment() || ZTrans_MR_STAR.ColAlignment() != U.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> XL_STAR_MC(g), XR_STAR_MC(g), X0_STAR_MC(g), X1_STAR_MC(g), X2_STAR_MC(g); DistMatrix<T,MR,STAR> ZTTrans_MR_STAR(g), Z0Trans_MR_STAR(g), ZBTrans_MR_STAR(g), Z1Trans_MR_STAR(g), Z2Trans_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight( X_STAR_MC, XL_STAR_MC, XR_STAR_MC, 0 ); PartitionDown ( ZTrans_MR_STAR, ZTTrans_MR_STAR, ZBTrans_MR_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XL_STAR_MC, /**/ XR_STAR_MC, X0_STAR_MC, /**/ X1_STAR_MC, X2_STAR_MC ); RepartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, /***************/ /***************/ Z1Trans_MR_STAR, ZBTrans_MR_STAR, Z2Trans_MR_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTriangular( UPPER, D11 ); if( diag == UNIT ) SetDiagonal( D11, T(1) ); LocalGemm ( orientation, orientation, alpha, D11, X1_STAR_MC, T(1), Z1Trans_MR_STAR ); LocalGemm ( orientation, orientation, alpha, U01, X0_STAR_MC, T(1), Z1Trans_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XL_STAR_MC, /**/ XR_STAR_MC, X0_STAR_MC, X1_STAR_MC, /**/ X2_STAR_MC ); SlidePartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, Z1Trans_MR_STAR, /***************/ /***************/ ZBTrans_MR_STAR, Z2Trans_MR_STAR ); } PopBlocksizeStack(); }
inline void LocalSymvRowAccumulateU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T,STAR,MC>& x_STAR_MC, const DistMatrix<T,STAR,MR>& x_STAR_MR, DistMatrix<T,STAR,MC>& z_STAR_MC, DistMatrix<T,STAR,MR>& z_STAR_MR ) { #ifndef RELEASE PushCallStack("internal::LocalSymvRowAccumulateU"); if( A.Grid() != x_STAR_MC.Grid() || x_STAR_MC.Grid() != x_STAR_MR.Grid() || x_STAR_MR.Grid() != z_STAR_MC.Grid() || z_STAR_MC.Grid() != z_STAR_MR.Grid() ) throw std::logic_error ("{A,x,z} must be distributed over the same grid"); if( x_STAR_MC.Height() != 1 || x_STAR_MR.Height() != 1 || z_STAR_MC.Height() != 1 || z_STAR_MR.Height() != 1 ) throw std::logic_error("Expected x and z to be row vectors"); if( A.Height() != A.Width() || A.Height() != x_STAR_MC.Width() || A.Height() != x_STAR_MR.Width() || A.Height() != z_STAR_MC.Width() || A.Height() != z_STAR_MR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymvRowAccumulateU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x[* ,MC] ~ " << x_STAR_MC.Height() << " x " << x_STAR_MC.Width() << "\n" << " x[* ,MR] ~ " << x_STAR_MR.Height() << " x " << x_STAR_MR.Width() << "\n" << " z[* ,MC] ~ " << z_STAR_MC.Height() << " x " << z_STAR_MC.Width() << "\n" << " z[* ,MR] ~ " << z_STAR_MR.Height() << " x " << z_STAR_MR.Width() << "\n"; throw std::logic_error( msg.str() ); } if( x_STAR_MC.RowAlignment() != A.ColAlignment() || x_STAR_MR.RowAlignment() != A.RowAlignment() || z_STAR_MC.RowAlignment() != A.ColAlignment() || z_STAR_MR.RowAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> A11(g), A12(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> x1_STAR_MC(g); DistMatrix<T,STAR,MR> xL_STAR_MR(g), xR_STAR_MR(g), x0_STAR_MR(g), x1_STAR_MR(g), x2_STAR_MR(g); DistMatrix<T,STAR,MC> z1_STAR_MC(g); DistMatrix<T,STAR,MR> z1_STAR_MR(g), z2_STAR_MR(g); // We want our local gemvs to be of width blocksize, so we will // temporarily change to max(r,c) times the current blocksize const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() ); LockedPartitionRight( x_STAR_MR, xL_STAR_MR, xR_STAR_MR, 0 ); while( xL_STAR_MR.Width() < x_STAR_MR.Width() ) { LockedRepartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, /**/ x1_STAR_MR, x2_STAR_MR ); const int n0 = x0_STAR_MR.Width(); const int n1 = x1_STAR_MR.Width(); const int n2 = x2_STAR_MR.Width(); LockedView( A11, A, n0, n0, n1, n1 ); LockedView( A12, A, n0, n0+n1, n1, n2 ); LockedView( x1_STAR_MC, x_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MC, z_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MR, z_STAR_MR, 0, n0, 1, n1 ); View( z2_STAR_MR, z_STAR_MR, 0, n0+n1, 1, n2 ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// // TODO: These diagonal block updates can be greatly improved D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); Gemv ( NORMAL, alpha, D11.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); Gemv ( TRANSPOSE, alpha, D11.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z1_STAR_MR.LocalMatrix() ); Gemv ( NORMAL, alpha, A12.LockedLocalMatrix(), x2_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); Gemv ( TRANSPOSE, alpha, A12.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z2_STAR_MR.LocalMatrix() ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, x1_STAR_MR, /**/ x2_STAR_MR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void internal::LocalTrmmAccumulateLUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MR >& XAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLUN"); if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() || XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != XAdjOrTrans_STAR_MR.Width() || U.Height() != Z_MC_STAR.Height() || XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x " << XAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() || Z_MC_STAR.ColAlignment() != U.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> D11(g); DistMatrix<T,STAR,MR> XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g), X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), X2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); internal::LocalGemm ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR, (T)1, Z1_MC_STAR ); internal::LocalGemm ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR, (T)1, Z0_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymvColAccumulateU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T,MC,STAR>& x_MC_STAR, const DistMatrix<T,MR,STAR>& x_MR_STAR, DistMatrix<T,MC,STAR>& z_MC_STAR, DistMatrix<T,MR,STAR>& z_MR_STAR, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::LocalSymvColAccumulateU"); if( A.Grid() != x_MC_STAR.Grid() || x_MC_STAR.Grid() != x_MR_STAR.Grid() || x_MR_STAR.Grid() != z_MC_STAR.Grid() || z_MC_STAR.Grid() != z_MR_STAR.Grid() ) LogicError ("{A,x,z} must be distributed over the same grid"); if( x_MC_STAR.Width() != 1 || x_MR_STAR.Width() != 1 || z_MC_STAR.Width() != 1 || z_MR_STAR.Width() != 1 ) LogicError("Expected x and z to be column vectors"); if( A.Height() != A.Width() || A.Height() != x_MC_STAR.Height() || A.Height() != x_MR_STAR.Height() || A.Height() != z_MC_STAR.Height() || A.Height() != z_MR_STAR.Height() ) { std::ostringstream msg; msg << "Nonconformal LocalSymvColAccumulateU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x[MC,* ] ~ " << x_MC_STAR.Height() << " x " << x_MC_STAR.Width() << "\n" << " x[MR,* ] ~ " << x_MR_STAR.Height() << " x " << x_MR_STAR.Width() << "\n" << " z[MC,* ] ~ " << z_MC_STAR.Height() << " x " << z_MC_STAR.Width() << "\n" << " z[MR,* ] ~ " << z_MR_STAR.Height() << " x " << z_MR_STAR.Width() << "\n"; LogicError( msg.str() ); } if( x_MC_STAR.ColAlignment() != A.ColAlignment() || x_MR_STAR.ColAlignment() != A.RowAlignment() || z_MC_STAR.ColAlignment() != A.ColAlignment() || z_MR_STAR.ColAlignment() != A.RowAlignment() ) LogicError("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> A11(g), A12(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> x1_MC_STAR(g); DistMatrix<T,MR,STAR> xT_MR_STAR(g), x0_MR_STAR(g), xB_MR_STAR(g), x1_MR_STAR(g), x2_MR_STAR(g); DistMatrix<T,MC,STAR> z1_MC_STAR(g); DistMatrix<T,MR,STAR> z1_MR_STAR(g), z2_MR_STAR(g); // We want our local gemvs to be of width blocksize, so we will // temporarily change to max(r,c) times the current blocksize const Int ratio = Max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() ); LockedPartitionDown ( x_MR_STAR, xT_MR_STAR, xB_MR_STAR, 0 ); while( xT_MR_STAR.Height() < x_MR_STAR.Height() ) { LockedRepartitionDown ( xT_MR_STAR, x0_MR_STAR, /**********/ /**********/ x1_MR_STAR, xB_MR_STAR, x2_MR_STAR ); const Int n0 = x0_MR_STAR.Height(); const Int n1 = x1_MR_STAR.Height(); const Int n2 = x2_MR_STAR.Height(); LockedView( A11, A, n0, n0, n1, n1 ); LockedView( A12, A, n0, n0+n1, n1, n2 ); LockedView( x1_MC_STAR, x_MC_STAR, n0, 0, n1, 1 ); View( z1_MC_STAR, z_MC_STAR, n0, 0, n1, 1 ); View( z1_MR_STAR, z_MR_STAR, n0, 0, n1, 1 ); View( z2_MR_STAR, z_MR_STAR, n0+n1, 0, n2, 1 ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// // TODO: These diagonal block updates can be greatly improved D11 = A11; MakeTriangular( UPPER, D11 ); LocalGemv( NORMAL, alpha, D11, x1_MR_STAR, T(1), z1_MC_STAR ); SetDiagonal( D11, T(0) ); LocalGemv( orientation, alpha, D11, x1_MC_STAR, T(1), z1_MR_STAR ); LocalGemv( NORMAL, alpha, A12, x2_MR_STAR, T(1), z1_MC_STAR ); LocalGemv( orientation, alpha, A12, x1_MC_STAR, T(1), z2_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( xT_MR_STAR, x0_MR_STAR, x1_MR_STAR, /**********/ /**********/ xB_MR_STAR, x2_MR_STAR ); } PopBlocksizeStack(); }
inline void LocalSymmetricAccumulateRU ( Orientation orientation, T alpha, const DistMatrix<T,MC, MR >& A, const DistMatrix<T,STAR,MC >& B_STAR_MC, const DistMatrix<T,MR, STAR>& BTrans_MR_STAR, DistMatrix<T,MC, STAR>& ZTrans_MC_STAR, DistMatrix<T,MR, STAR>& ZTrans_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateRU"); if( A.Grid() != B_STAR_MC.Grid() || B_STAR_MC.Grid() != BTrans_MR_STAR.Grid() || BTrans_MR_STAR.Grid() != ZTrans_MC_STAR.Grid() || ZTrans_MC_STAR.Grid() != ZTrans_MR_STAR.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_STAR_MC.Width() || A.Height() != BTrans_MR_STAR.Height() || A.Height() != ZTrans_MC_STAR.Height() || A.Height() != ZTrans_MR_STAR.Height() || B_STAR_MC.Height() != BTrans_MR_STAR.Width() || BTrans_MR_STAR.Width() != ZTrans_MC_STAR.Width() || ZTrans_MC_STAR.Width() != ZTrans_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateRU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[* ,MC] ~ " << B_STAR_MC.Height() << " x " << B_STAR_MC.Width() << "\n" << " B^H/T[MR,* ] ~ " << BTrans_MR_STAR.Height() << " x " << BTrans_MR_STAR.Width() << "\n" << " Z^H/T[MC,* ] ~ " << ZTrans_MC_STAR.Height() << " x " << ZTrans_MC_STAR.Width() << "\n" << " Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x " << ZTrans_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_STAR_MC.RowAlignment() != A.ColAlignment() || BTrans_MR_STAR.ColAlignment() != A.RowAlignment() || ZTrans_MC_STAR.ColAlignment() != A.ColAlignment() || ZTrans_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> BL_STAR_MC(g), BR_STAR_MC(g), B0_STAR_MC(g), B1_STAR_MC(g), B2_STAR_MC(g); DistMatrix<T,MR,STAR> BTTrans_MR_STAR(g), B0Trans_MR_STAR(g), BBTrans_MR_STAR(g), B1Trans_MR_STAR(g), B2Trans_MR_STAR(g); DistMatrix<T,MC,STAR> ZTTrans_MC_STAR(g), Z0Trans_MC_STAR(g), ZBTrans_MC_STAR(g), Z1Trans_MC_STAR(g), Z2Trans_MC_STAR(g); DistMatrix<T,MR,STAR> ZBTrans_MR_STAR(g), Z0Trans_MR_STAR(g), ZTTrans_MR_STAR(g), Z1Trans_MR_STAR(g), Z2Trans_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B_STAR_MC, BL_STAR_MC, BR_STAR_MC, 0 ); LockedPartitionDown ( BTrans_MR_STAR, BTTrans_MR_STAR, BBTrans_MR_STAR, 0 ); PartitionDown ( ZTrans_MC_STAR, ZTTrans_MC_STAR, ZBTrans_MC_STAR, 0 ); PartitionDown ( ZTrans_MR_STAR, ZTTrans_MR_STAR, ZBTrans_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL_STAR_MC, /**/ BR_STAR_MC, B0_STAR_MC, /**/ B1_STAR_MC, B2_STAR_MC ); LockedRepartitionDown ( BTTrans_MR_STAR, B0Trans_MR_STAR, /***************/ /***************/ B1Trans_MR_STAR, BBTrans_MR_STAR, B2Trans_MR_STAR ); RepartitionDown ( ZTTrans_MC_STAR, Z0Trans_MC_STAR, /***************/ /***************/ Z1Trans_MC_STAR, ZBTrans_MC_STAR, Z2Trans_MC_STAR ); RepartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, /***************/ /***************/ Z1Trans_MR_STAR, ZBTrans_MR_STAR, Z2Trans_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTriangular( UPPER, D11 ); LocalGemm ( orientation, orientation, alpha, D11, B1_STAR_MC, T(1), Z1Trans_MR_STAR ); SetDiagonal( D11, T(0) ); LocalGemm ( NORMAL, NORMAL, alpha, D11, B1Trans_MR_STAR, T(1), Z1Trans_MC_STAR ); LocalGemm ( orientation, orientation, alpha, A12, B1_STAR_MC, T(1), Z2Trans_MR_STAR ); LocalGemm ( NORMAL, NORMAL, alpha, A12, B2Trans_MR_STAR, T(1), Z1Trans_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL_STAR_MC, /**/ BR_STAR_MC, B0_STAR_MC, B1_STAR_MC, /**/ B2_STAR_MC ); SlideLockedPartitionDown ( BTTrans_MR_STAR, B0Trans_MR_STAR, B1Trans_MR_STAR, /***************/ /***************/ BBTrans_MR_STAR, B2Trans_MR_STAR ); SlidePartitionDown ( ZTTrans_MC_STAR, Z0Trans_MC_STAR, Z1Trans_MC_STAR, /***************/ /***************/ ZBTrans_MC_STAR, Z2Trans_MC_STAR ); SlidePartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, Z1Trans_MR_STAR, /***************/ /***************/ ZBTrans_MR_STAR, Z2Trans_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }