inline void internal::LocalTrmmAccumulateLUN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& U, const DistMatrix<T,STAR,MR >& XAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLUN"); if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() || XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{U,X,Z} must be distributed over the same grid"); if( U.Height() != U.Width() || U.Height() != XAdjOrTrans_STAR_MR.Width() || U.Height() != Z_MC_STAR.Height() || XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x " << XAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() || Z_MC_STAR.ColAlignment() != U.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> D11(g); DistMatrix<T,STAR,MR> XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g), X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), X2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); LockedPartitionRight ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( UTL.Height() < U.Height() ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); LockedRepartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( U11 ); //--------------------------------------------------------------------// D11 = U11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); internal::LocalGemm ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR, (T)1, Z1_MC_STAR ); internal::LocalGemm ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR, (T)1, Z0_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlideLockedPartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void LU( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("LU"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalLU( A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_MC_STAR = A21; internal::LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A21_MC_STAR ); A21 = A21_MC_STAR; // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; internal::LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::SymmRUA ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::SymmRUA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T,MC,MR> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T,MC,MR> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,VC, STAR> B1Trans_VC_STAR(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MC, STAR> Z1Trans_MC_STAR(g); DistMatrix<T,MR, STAR> Z1Trans_MR_STAR(g); DistMatrix<T,MC, MR > Z1Trans(g); DistMatrix<T,MR, MC > Z1Trans_MR_MC(g); Matrix<T> Z1Local; Scal( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CT.Height() < C.Height() ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); B1Trans_MR_STAR.AlignWith( A ); B1Trans_VC_STAR.AlignWith( A ); B1_STAR_MC.AlignWith( A ); Z1Trans_MC_STAR.AlignWith( A ); Z1Trans_MR_STAR.AlignWith( A ); Z1Trans_MR_MC.AlignWith( C1 ); Z1Trans_MC_STAR.ResizeTo( C1.Width(), C1.Height() ); Z1Trans_MR_STAR.ResizeTo( C1.Width(), C1.Height() ); //--------------------------------------------------------------------// B1Trans_MR_STAR.TransposeFrom( B1 ); B1Trans_VC_STAR = B1Trans_MR_STAR; B1_STAR_MC.TransposeFrom( B1Trans_VC_STAR ); Zero( Z1Trans_MC_STAR ); Zero( Z1Trans_MR_STAR ); internal::LocalSymmetricAccumulateRU ( TRANSPOSE, alpha, A, B1_STAR_MC, B1Trans_MR_STAR, Z1Trans_MC_STAR, Z1Trans_MR_STAR ); Z1Trans.SumScatterFrom( Z1Trans_MC_STAR ); Z1Trans_MR_MC = Z1Trans; Z1Trans_MR_MC.SumScatterUpdate( (T)1, Z1Trans_MR_STAR ); Transpose( Z1Trans_MR_MC.LockedLocalMatrix(), Z1Local ); Axpy( (T)1, Z1Local, C1.LocalMatrix() ); //--------------------------------------------------------------------// B1Trans_MR_STAR.FreeAlignments(); B1Trans_VC_STAR.FreeAlignments(); B1_STAR_MC.FreeAlignments(); Z1Trans_MC_STAR.FreeAlignments(); Z1Trans_MR_STAR.FreeAlignments(); Z1Trans_MR_MC.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
int main( int argc, char* argv[] ) { Initialize( argc, argv ); mpi::Comm comm = mpi::COMM_WORLD; const int commRank = mpi::CommRank( comm ); try { const int m = Input("--height","height of matrix",20); const int n = Input("--width","width of matrix",100); const int maxSteps = Input("--maxSteps","max # of steps of QR",10); const double tol = Input("--tol","tolerance for ID",-1.); const bool print = Input("--print","print matrices?",false); ProcessInput(); PrintInputReport(); DistMatrix<C> A; Uniform( A, m, n ); const Real frobA = FrobeniusNorm( A ); if( print ) A.Print("A"); const Grid& g = A.Grid(); DistMatrix<int,VR,STAR> pR(g), pC(g); DistMatrix<C> Z(g); Skeleton( A, pR, pC, Z, maxSteps, tol ); const int numSteps = pR.Height(); if( print ) { pR.Print("pR"); pC.Print("pC"); Z.Print("Z"); } // Form the matrices of A's (hopefully) dominant rows and columns DistMatrix<C> AR( A ); ApplyRowPivots( AR, pR ); AR.ResizeTo( numSteps, A.Width() ); DistMatrix<C> AC( A ); ApplyColumnPivots( AC, pC ); AC.ResizeTo( A.Height(), numSteps ); if( print ) { AC.Print("AC"); AR.Print("AR"); } // Check || A - AC Z AR ||_F / || A ||_F DistMatrix<C> B(g); Gemm( NORMAL, NORMAL, C(1), Z, AR, B ); Gemm( NORMAL, NORMAL, C(-1), AC, B, C(1), A ); const Real frobError = FrobeniusNorm( A ); if( print ) A.Print("A - AC Z AR"); if( commRank == 0 ) { std::cout << "|| A ||_F = " << frobA << "\n\n" << "|| A - AC Z AR ||_F / || A ||_F = " << frobError/frobA << "\n" << std::endl; } } catch( ArgException& e ) { // There is nothing to do } catch( exception& e ) { ostringstream os; os << "Process " << commRank << " caught exception with message: " << e.what() << endl; cerr << os.str(); #ifndef RELEASE DumpCallStack(); #endif } Finalize(); return 0; }
inline void Inverse( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("Inverse"); if( A.Height() != A.Width() ) throw std::logic_error("Cannot invert non-square matrices"); #endif const Grid& g = A.Grid(); DistMatrix<int,VC,STAR> p( g ); LU( A, p ); TriangularInverse( UPPER, NON_UNIT, A ); // Solve inv(A) L = inv(U) for inv(A) DistMatrix<F> ATL(g), ATR(g), ABL(g), ABR(g); DistMatrix<F> A00(g), A01(g), A02(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> A1(g), A2(g); DistMatrix<F,VC, STAR> A1_VC_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21Trans_STAR_MR(g); DistMatrix<F,MC, STAR> Z1(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); View( A1, A, 0, A00.Width(), A.Height(), A01.Width() ); View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() ); L21_VR_STAR.AlignWith( A2 ); L21Trans_STAR_MR.AlignWith( A2 ); Z1.AlignWith( A01 ); Z1.ResizeTo( A.Height(), A01.Width() ); //--------------------------------------------------------------------// // Copy out L1 L11_STAR_STAR = A11; L21_VR_STAR = A21; L21Trans_STAR_MR.TransposeFrom( L21_VR_STAR ); // Zero the strictly lower triangular portion of A1 MakeTrapezoidal( LEFT, UPPER, 0, A11 ); Zero( A21 ); // Perform the lazy update of A1 internal::LocalGemm ( NORMAL, TRANSPOSE, F(-1), A2, L21Trans_STAR_MR, F(0), Z1 ); A1.SumScatterUpdate( F(1), Z1 ); // Solve against this diagonal block of L11 A1_VC_STAR = A1; internal::LocalTrsm ( RIGHT, LOWER, NORMAL, UNIT, F(1), L11_STAR_STAR, A1_VC_STAR ); A1 = A1_VC_STAR; //--------------------------------------------------------------------// Z1.FreeAlignments(); L21Trans_STAR_MR.FreeAlignments(); L21_VR_STAR.FreeAlignments(); SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /*******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } // inv(A) := inv(A) P ApplyInverseColumnPivots( A, p ); #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymvRowAccumulateU ( T alpha, const DistMatrix<T>& A, const DistMatrix<T,STAR,MC>& x_STAR_MC, const DistMatrix<T,STAR,MR>& x_STAR_MR, DistMatrix<T,STAR,MC>& z_STAR_MC, DistMatrix<T,STAR,MR>& z_STAR_MR ) { #ifndef RELEASE PushCallStack("internal::LocalSymvRowAccumulateU"); if( A.Grid() != x_STAR_MC.Grid() || x_STAR_MC.Grid() != x_STAR_MR.Grid() || x_STAR_MR.Grid() != z_STAR_MC.Grid() || z_STAR_MC.Grid() != z_STAR_MR.Grid() ) throw std::logic_error ("{A,x,z} must be distributed over the same grid"); if( x_STAR_MC.Height() != 1 || x_STAR_MR.Height() != 1 || z_STAR_MC.Height() != 1 || z_STAR_MR.Height() != 1 ) throw std::logic_error("Expected x and z to be row vectors"); if( A.Height() != A.Width() || A.Height() != x_STAR_MC.Width() || A.Height() != x_STAR_MR.Width() || A.Height() != z_STAR_MC.Width() || A.Height() != z_STAR_MR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymvRowAccumulateU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x[* ,MC] ~ " << x_STAR_MC.Height() << " x " << x_STAR_MC.Width() << "\n" << " x[* ,MR] ~ " << x_STAR_MR.Height() << " x " << x_STAR_MR.Width() << "\n" << " z[* ,MC] ~ " << z_STAR_MC.Height() << " x " << z_STAR_MC.Width() << "\n" << " z[* ,MR] ~ " << z_STAR_MR.Height() << " x " << z_STAR_MR.Width() << "\n"; throw std::logic_error( msg.str() ); } if( x_STAR_MC.RowAlignment() != A.ColAlignment() || x_STAR_MR.RowAlignment() != A.RowAlignment() || z_STAR_MC.RowAlignment() != A.ColAlignment() || z_STAR_MR.RowAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> A11(g), A12(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> x1_STAR_MC(g); DistMatrix<T,STAR,MR> xL_STAR_MR(g), xR_STAR_MR(g), x0_STAR_MR(g), x1_STAR_MR(g), x2_STAR_MR(g); DistMatrix<T,STAR,MC> z1_STAR_MC(g); DistMatrix<T,STAR,MR> z1_STAR_MR(g), z2_STAR_MR(g); // We want our local gemvs to be of width blocksize, so we will // temporarily change to max(r,c) times the current blocksize const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() ); LockedPartitionRight( x_STAR_MR, xL_STAR_MR, xR_STAR_MR, 0 ); while( xL_STAR_MR.Width() < x_STAR_MR.Width() ) { LockedRepartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, /**/ x1_STAR_MR, x2_STAR_MR ); const int n0 = x0_STAR_MR.Width(); const int n1 = x1_STAR_MR.Width(); const int n2 = x2_STAR_MR.Width(); LockedView( A11, A, n0, n0, n1, n1 ); LockedView( A12, A, n0, n0+n1, n1, n2 ); LockedView( x1_STAR_MC, x_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MC, z_STAR_MC, 0, n0, 1, n1 ); View( z1_STAR_MR, z_STAR_MR, 0, n0, 1, n1 ); View( z2_STAR_MR, z_STAR_MR, 0, n0+n1, 1, n2 ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// // TODO: These diagonal block updates can be greatly improved D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); Gemv ( NORMAL, alpha, D11.LockedLocalMatrix(), x1_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); Gemv ( TRANSPOSE, alpha, D11.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z1_STAR_MR.LocalMatrix() ); Gemv ( NORMAL, alpha, A12.LockedLocalMatrix(), x2_STAR_MR.LockedLocalMatrix(), T(1), z1_STAR_MC.LocalMatrix() ); Gemv ( TRANSPOSE, alpha, A12.LockedLocalMatrix(), x1_STAR_MC.LockedLocalMatrix(), T(1), z2_STAR_MR.LocalMatrix() ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionRight ( xL_STAR_MR, /**/ xR_STAR_MR, x0_STAR_MR, x1_STAR_MR, /**/ x2_STAR_MR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrsmLLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrtrsmLLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XTL(g), XTR(g), X00(g), X01(g), X02(g), XBL(g), XBR(g), X10(g), X11(g), X12(g), X20(g), X21(g), X22(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,VR > X10_STAR_VR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); // Start the algorithm ScaleTrapezoid( alpha, LOWER, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDownDiagonal ( X, XTL, XTR, XBL, XBR, 0 ); while( XBR.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDownDiagonal ( XTL, /**/ XTR, X00, /**/ X01, X02, /*************/ /******************/ /**/ X10, /**/ X11, X12, XBL, /**/ XBR, X20, /**/ X21, X22 ); L21_MC_STAR.AlignWith( X20 ); X10_STAR_MR.AlignWith( X20 ); X11_STAR_MR.AlignWith( X21 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X11_STAR_STAR = X11; X10_STAR_VR = X10; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR, checkIfSingular ); LocalTrtrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR, checkIfSingular ); X11 = X11_STAR_STAR; X11_STAR_MR = X11_STAR_STAR; MakeTriangular( LOWER, X11_STAR_MR ); X10_STAR_MR = X10_STAR_VR; X10 = X10_STAR_MR; L21_MC_STAR = L21; LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 ); LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDownDiagonal ( XTL, /**/ XTR, X00, X01, /**/ X02, /**/ X10, X11, /**/ X12, /*************/ /******************/ XBL, /**/ XBR, X20, X21, /**/ X22 ); } }
void TestCorrectness ( bool print, UpperOrLower uplo, const DistMatrix<Complex<R> >& A, const DistMatrix<Complex<R>,STAR,STAR>& t, DistMatrix<Complex<R> >& AOrig ) { typedef Complex<R> C; const Grid& g = A.Grid(); const int m = AOrig.Height(); int subdiagonal = ( uplo==LOWER ? -1 : +1 ); if( g.Rank() == 0 ) cout << "Testing error..." << endl; // Grab the diagonal and subdiagonal of the symmetric tridiagonal matrix DistMatrix<R,MD,STAR> d(g); DistMatrix<R,MD,STAR> e(g); A.GetRealPartOfDiagonal( d ); A.GetRealPartOfDiagonal( e, subdiagonal ); // Grab a full copy of e so that we may fill the opposite subdiagonal DistMatrix<R,STAR,STAR> e_STAR_STAR(g); DistMatrix<R,MD,STAR> eOpposite(g); e_STAR_STAR = e; eOpposite.AlignWithDiagonal( A, -subdiagonal ); eOpposite = e_STAR_STAR; // Zero B and then fill its tridiagonal DistMatrix<C> B(g); B.AlignWith( A ); Zeros( m, m, B ); B.SetRealPartOfDiagonal( d ); B.SetRealPartOfDiagonal( e, subdiagonal ); B.SetRealPartOfDiagonal( eOpposite, -subdiagonal ); // Reverse the accumulated Householder transforms, ignoring symmetry if( uplo == LOWER ) { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, subdiagonal, A, t, B ); ApplyPackedReflectors ( RIGHT, LOWER, VERTICAL, BACKWARD, CONJUGATED, subdiagonal, A, t, B ); } else { ApplyPackedReflectors ( LEFT, UPPER, VERTICAL, FORWARD, UNCONJUGATED, subdiagonal, A, t, B ); ApplyPackedReflectors ( RIGHT, UPPER, VERTICAL, FORWARD, CONJUGATED, subdiagonal, A, t, B ); } // Compare the appropriate triangle of AOrig and B MakeTriangular( uplo, AOrig ); MakeTriangular( uplo, B ); Axpy( C(-1), AOrig, B ); const R infNormOfAOrig = HermitianNorm( uplo, AOrig, INFINITY_NORM ); const R frobNormOfAOrig = HermitianNorm( uplo, AOrig, FROBENIUS_NORM ); const R infNormOfError = HermitianNorm( uplo, B, INFINITY_NORM ); const R frobNormOfError = HermitianNorm( uplo, B, FROBENIUS_NORM ); if( g.Rank() == 0 ) { cout << " ||AOrig||_1 = ||AOrig||_oo = " << infNormOfAOrig << "\n" << " ||AOrig||_F = " << frobNormOfAOrig << "\n" << " ||AOrig - Q^H A Q||_oo = " << infNormOfError << "\n" << " ||AOrig - Q^H A Q||_F = " << frobNormOfError << endl; } }
inline void HermitianTridiagL ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagL"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, -1 ); tDiag.ResizeTo( A.Height()-1, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C> WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( tDiag, tT, tB, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); HermitianPanelTridiagL ( ABR, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, ADJOINT, ADJOINT, C(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, C(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( LOWER, A11_STAR_STAR.LocalMatrix(), t1_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HegstLUVar2( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& U ) { #ifndef RELEASE PushCallStack("internal::HegstLUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,MR, STAR> U12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U12Adj_VC_STAR(g); DistMatrix<F,MC, STAR> X01_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, MR > Y12(g); DistMatrix<F,MC, MR > Z12Adj(g); DistMatrix<F,MR, MC > Z12Adj_MR_MC(g); DistMatrix<F,MC, STAR> Z12Adj_MC_STAR(g); DistMatrix<F,MR, STAR> Z12Adj_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_VR.AlignWith( A12 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A12 ); U12Adj_MR_STAR.AlignWith( A22 ); U12Adj_VC_STAR.AlignWith( A22 ); X01_MC_STAR.AlignWith( A01 ); Y12.AlignWith( A12 ); Z12Adj.AlignWith( A12 ); Z12Adj_MR_MC.AlignWith( A12 ); Z12Adj_MC_STAR.AlignWith( A22 ); Z12Adj_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A01 := A01 U11' U11_STAR_STAR = U11; A01_VC_STAR = A01; internal::LocalTrmm ( RIGHT, UPPER, ADJOINT, NON_UNIT, (F)1, U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A01 := A01 + A02 U12' U12Adj_MR_STAR.AdjointFrom( U12 ); X01_MC_STAR.ResizeTo( A01.Height(), A01.Width() ); internal::LocalGemm ( NORMAL, NORMAL, (F)1, A02, U12Adj_MR_STAR, (F)0, X01_MC_STAR ); A01.SumScatterUpdate( (F)1, X01_MC_STAR ); // Y12 := U12 A22 U12Adj_VC_STAR = U12Adj_MR_STAR; U12_STAR_MC.AdjointFrom( U12Adj_VC_STAR ); Z12Adj_MC_STAR.ResizeTo( A12.Width(), A12.Height() ); Z12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() ); Zero( Z12Adj_MC_STAR ); Zero( Z12Adj_MR_STAR ); internal::LocalSymmetricAccumulateRU ( ADJOINT, (F)1, A22, U12_STAR_MC, U12Adj_MR_STAR, Z12Adj_MC_STAR, Z12Adj_MR_STAR ); Z12Adj.SumScatterFrom( Z12Adj_MC_STAR ); Z12Adj_MR_MC = Z12Adj; Z12Adj_MR_MC.SumScatterUpdate( (F)1, Z12Adj_MR_STAR ); Y12.ResizeTo( A12.Height(), A12.Width() ); Adjoint( Z12Adj_MR_MC.LockedLocalMatrix(), Y12.LocalMatrix() ); // A12 := U11 A12 A12_STAR_VR = A12; U11_STAR_STAR = U11; internal::LocalTrmm ( LEFT, UPPER, NORMAL, NON_UNIT, (F)1, U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 + 1/2 Y12 Axpy( (F)0.5, Y12, A12 ); // A11 := U11 A11 U11' A11_STAR_STAR = A11; internal::LocalHegst( LEFT, UPPER, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A12 U12' + U12 A12') A12_STAR_VR = A12; U12_STAR_VR = U12; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( UPPER, NORMAL, (F)1, A12_STAR_VR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), (F)0, X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( (F)1, X11_STAR_STAR ); // A12 := A12 + 1/2 Y12 Axpy( (F)0.5, Y12, A12 ); //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); U12Adj_MR_STAR.FreeAlignments(); U12Adj_VC_STAR.FreeAlignments(); X01_MC_STAR.FreeAlignments(); Y12.FreeAlignments(); Z12Adj.FreeAlignments(); Z12Adj_MR_MC.FreeAlignments(); Z12Adj_MC_STAR.FreeAlignments(); Z12Adj_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
void TestCorrectness ( const DistMatrix<F>& A, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& AOrig ) { typedef BASE(F) Real; const Grid& g = A.Grid(); const Int m = A.Height(); const Int n = A.Width(); const Int minDim = std::min(m,n); if( g.Rank() == 0 ) cout << " Testing orthogonality of Q..." << endl; // Form Z := Q Q^H as an approximation to identity DistMatrix<F> Z(g); Identity( Z, m, n ); rq::ApplyQ( RIGHT, NORMAL, A, t, Z ); rq::ApplyQ( RIGHT, ADJOINT, A, t, Z ); DistMatrix<F> ZUpper(g); View( ZUpper, Z, 0, 0, minDim, minDim ); // Form Identity DistMatrix<F> X(g); Identity( X, minDim, minDim ); // Form X := I - Q Q^H Axpy( F(-1), ZUpper, X ); Real oneNormOfError = OneNorm( X ); Real infNormOfError = InfinityNorm( X ); Real frobNormOfError = FrobeniusNorm( X ); if( g.Rank() == 0 ) { cout << " ||Q^H Q - I||_1 = " << oneNormOfError << "\n" << " ||Q^H Q - I||_oo = " << infNormOfError << "\n" << " ||Q^H Q - I||_F = " << frobNormOfError << endl; } if( g.Rank() == 0 ) cout << " Testing if A = RQ..." << endl; // Form RQ DistMatrix<F> U( A ); MakeTrapezoidal( UPPER, U, 0, RIGHT ); rq::ApplyQ( RIGHT, NORMAL, A, t, U ); // Form R Q - A Axpy( F(-1), AOrig, U ); const Real oneNormOfA = OneNorm( AOrig ); const Real infNormOfA = InfinityNorm( AOrig ); const Real frobNormOfA = FrobeniusNorm( AOrig ); oneNormOfError = OneNorm( U ); infNormOfError = InfinityNorm( U ); frobNormOfError = FrobeniusNorm( U ); if( g.Rank() == 0 ) { cout << " ||A||_1 = " << oneNormOfA << "\n" << " ||A||_oo = " << infNormOfA << "\n" << " ||A||_F = " << frobNormOfA << "\n" << " ||A - RQ||_1 = " << oneNormOfError << "\n" << " ||A - RQ||_oo = " << infNormOfError << "\n" << " ||A - RQ||_F = " << frobNormOfError << endl; } }
void Trr2kNNTN ( UpperOrLower uplo, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE CallStackEntry entry("internal::Trr2kNNTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Width() != E.Width() || D.Width() != E.Width() || A.Width() != B.Height() || C.Height() != D.Height() ) throw std::logic_error("Nonconformal Trr2kNNTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, TRANSPOSE, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } }
inline void TwoSidedTrsmUVar1 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUVar1"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MR, MC > Z01_MR_MC(g); DistMatrix<F,MC, STAR> Z01_MC_STAR(g); DistMatrix<F,MR, STAR> Z01_MR_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A01 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); Y01.AlignWith( A01 ); Z01_MR_MC.AlignWith( A01 ); Z01_MC_STAR.AlignWith( A00 ); Z01_MR_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Z01_MC_STAR.ResizeTo( A01.Height(), A01.Width() ); Z01_MR_STAR.ResizeTo( A01.Height(), A01.Width() ); Zero( Z01_MC_STAR ); Zero( Z01_MR_STAR ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, Z01_MC_STAR, Z01_MR_STAR ); Z01_MR_MC.SumScatterFrom( Z01_MR_STAR ); Y01 = Z01_MR_MC; Y01.SumScatterUpdate( F(1), Z01_MC_STAR ); // A01 := inv(U00)' A01 // // This is the bottleneck because A01 only has blocksize columns Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A11 := A11 - (U01' A01 + A01' U01) A01_VC_STAR = A01; U01_VC_STAR = U01_MC_STAR; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( UPPER, ADJOINT, F(-1), A01_VC_STAR.LocalMatrix(), U01_VC_STAR.LocalMatrix(), F(0), X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; U11_STAR_STAR = U11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A01 := A01 inv(U11) A01_VC_STAR = A01; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; //--------------------------------------------------------------------// A01_VC_STAR.FreeAlignments(); U01_MC_STAR.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01_VC_STAR.FreeAlignments(); U01Adj_STAR_MR.FreeAlignments(); Y01.FreeAlignments(); Z01_MR_MC.FreeAlignments(); Z01_MC_STAR.FreeAlignments(); Z01_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::TrmmLUNA ( UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC,MR>& U, DistMatrix<T,MC,MR>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmULNA"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLUNA: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = U.Grid(); DistMatrix<T,MC,MR> XL(g), XR(g), X0(g), X1(g), X2(g); DistMatrix<T,VR, STAR> X1_VR_STAR(g); DistMatrix<T,STAR,MR > X1Trans_STAR_MR(g); DistMatrix<T,MC, STAR> Z1_MC_STAR(g); PartitionRight ( X, XL, XR, 0 ); while( XL.Width() < X.Width() ) { RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); X1_VR_STAR.AlignWith( U ); X1Trans_STAR_MR.AlignWith( U ); Z1_MC_STAR.AlignWith( U ); Z1_MC_STAR.ResizeTo( X1.Height(), X1.Width() ); //--------------------------------------------------------------------// X1_VR_STAR = X1; X1Trans_STAR_MR.TransposeFrom( X1_VR_STAR ); Zero( Z1_MC_STAR ); internal::LocalTrmmAccumulateLUN ( TRANSPOSE, diag, alpha, U, X1Trans_STAR_MR, Z1_MC_STAR ); X1.SumScatterFrom( Z1_MC_STAR ); //--------------------------------------------------------------------// X1_VR_STAR.FreeAlignments(); X1Trans_STAR_MR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
void InPlaceRedist( DistMatrix<F>& Z, Int rowAlign, const Base<F>* readBuffer ) { typedef Base<F> Real; const Grid& g = Z.Grid(); const Int height = Z.Height(); const Int width = Z.Width(); const Int r = g.Height(); const Int c = g.Width(); const Int p = r * c; const Int row = g.Row(); const Int col = g.Col(); const Int rowShift = Z.RowShift(); const Int colAlign = Z.ColAlign(); const Int localWidth = Length(width,g.VRRank(),rowAlign,p); const Int maxHeight = MaxLength(height,r); const Int maxWidth = MaxLength(width,p); const Int portionSize = mpi::Pad( maxHeight*maxWidth ); // Allocate our send/recv buffers vector<Real> buffer(2*r*portionSize); Real* sendBuffer = &buffer[0]; Real* recvBuffer = &buffer[r*portionSize]; // Pack EL_OUTER_PARALLEL_FOR for( Int k=0; k<r; ++k ) { Real* data = &sendBuffer[k*portionSize]; const Int thisColShift = Shift(k,colAlign,r); const Int thisLocalHeight = Length(height,thisColShift,r); EL_INNER_PARALLEL_FOR_COLLAPSE2 for( Int j=0; j<localWidth; ++j ) for( Int i=0; i<thisLocalHeight; ++i ) data[i+j*thisLocalHeight] = readBuffer[thisColShift+i*r+j*height]; } // Communicate mpi::AllToAll ( sendBuffer, portionSize, recvBuffer, portionSize, g.ColComm() ); // Unpack const Int localHeight = Length(height,row,colAlign,r); EL_OUTER_PARALLEL_FOR for( Int k=0; k<r; ++k ) { const Real* data = &recvBuffer[k*portionSize]; const Int thisRank = col+k*c; const Int thisRowShift = Shift(thisRank,rowAlign,p); const Int thisRowOffset = (thisRowShift-rowShift) / c; const Int thisLocalWidth = Length(width,thisRowShift,p); EL_INNER_PARALLEL_FOR for( Int j=0; j<thisLocalWidth; ++j ) { const Real* dataCol = &(data[j*localHeight]); Real* thisCol = (Real*)Z.Buffer(0,thisRowOffset+j*r); if( IsComplex<F>::value ) { for( Int i=0; i<localHeight; ++i ) { thisCol[2*i] = dataCol[i]; thisCol[2*i+1] = 0; } } else { MemCopy( thisCol, dataCol, localHeight ); } } } }
inline void HermitianTridiagL( DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagL"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square."); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<R> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R> WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); HermitianPanelTridiagL ( ABR, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, TRANSPOSE, TRANSPOSE, R(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, R(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( LOWER, A11_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; } SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Syr2kLT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::Syr2kLT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kLT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( LOWER, TRANSPOSE, TRANSPOSE, TRANSPOSE, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
void TestCorrectness ( UpperOrLower uplo, const DistMatrix<F>& A, const DistMatrix<F,STAR,STAR>& t, DistMatrix<F>& AOrig, bool print, bool display ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int n = AOrig.Height(); const Real infNormAOrig = InfinityNorm( AOrig ); const Real frobNormAOrig = FrobeniusNorm( AOrig ); if( g.Rank() == 0 ) cout << "Testing error..." << endl; // Set H to the appropriate Hessenberg portion of A DistMatrix<F> H( A ); if( uplo == LOWER ) MakeTrapezoidal( LOWER, H, 1 ); else MakeTrapezoidal( UPPER, H, -1 ); if( print ) Print( H, "Hessenberg" ); if( display ) Display( H, "Bidiagonal" ); if( print || display ) { DistMatrix<F> Q(g); Identity( Q, n, n ); hessenberg::ApplyQ( LEFT, uplo, NORMAL, A, t, Q ); if( print ) Print( Q, "Q" ); if( display ) Display( Q, "Q" ); } // Reverse the accumulated Householder transforms hessenberg::ApplyQ( LEFT, uplo, ADJOINT, A, t, AOrig ); hessenberg::ApplyQ( RIGHT, uplo, NORMAL, A, t, AOrig ); if( print ) Print( AOrig, "Manual Hessenberg" ); if( display ) Display( AOrig, "Manual Hessenberg" ); // Compare the appropriate portion of AOrig and B if( uplo == LOWER ) MakeTrapezoidal( LOWER, AOrig, 1 ); else MakeTrapezoidal( UPPER, AOrig, -1 ); H -= AOrig; if( print ) Print( H, "Error in rotated Hessenberg" ); if( display ) Display( H, "Error in rotated Hessenberg" ); const Real infNormError = InfinityNorm( H ); const Real frobNormError = FrobeniusNorm( H ); if( g.Rank() == 0 ) { cout << " ||A||_oo = " << infNormAOrig << "\n" << " ||A||_F = " << frobNormAOrig << "\n" << " ||H - Q^H A Q||_oo = " << infNormError << "\n" << " ||H - Q^H A Q||_F = " << frobNormError << endl; } }
void TestCorrectness ( UpperOrLower uplo, UnitOrNonUnit diag, const DistMatrix<F>& A, const DistMatrix<F>& B, const DistMatrix<F>& AOrig, bool print ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int m = AOrig.Height(); const Int k=100; DistMatrix<F> X(g), Y(g), Z(g); Uniform( X, m, k ); Y = X; Zeros( Z, m, k ); if( uplo == LOWER ) { // Test correctness by comparing the application of A against a // random set of k vectors to the application of // tril(B)^H AOrig tril(B) Trmm( LEFT, LOWER, NORMAL, diag, F(1), B, Y ); Hemm( LEFT, LOWER, F(1), AOrig, Y, F(0), Z ); Trmm( LEFT, LOWER, ADJOINT, diag, F(1), B, Z ); Hemm( LEFT, LOWER, F(-1), A, X, F(1), Z ); Real infNormAOrig = HermitianInfinityNorm( uplo, AOrig ); Real frobNormAOrig = HermitianFrobeniusNorm( uplo, AOrig ); Real infNormA = HermitianInfinityNorm( uplo, A ); Real frobNormA = HermitianFrobeniusNorm( uplo, A ); Real oneNormError = OneNorm( Z ); Real infNormError = InfinityNorm( Z ); Real frobNormError = FrobeniusNorm( Z ); OutputFromRoot (g.Comm(), "||AOrig||_1 = ||AOrig||_oo = ",infNormAOrig,"\n",Indent(), "||AOrig||_F = ",frobNormAOrig,"\n",Indent(), "||A||_1 = ||A||_oo = ",infNormA,"\n",Indent(), "||A||_F = ",frobNormA,"\n",Indent(), "||A X - L^H AOrig L X||_1 = ",oneNormError,"\n",Indent(), "||A X - L^H AOrig L X||_oo = ",infNormError,"\n",Indent(), "||A X - L^H AOrig L X||_F = ",frobNormError); } else { // Test correctness by comparing the application of A against a // random set of k vectors to the application of // triu(B) AOrig triu(B)^H Trmm( LEFT, UPPER, ADJOINT, diag, F(1), B, Y ); Hemm( LEFT, UPPER, F(1), AOrig, Y, F(0), Z ); Trmm( LEFT, UPPER, NORMAL, diag, F(1), B, Z ); Hemm( LEFT, UPPER, F(-1), A, X, F(1), Z ); Real infNormAOrig = HermitianInfinityNorm( uplo, AOrig ); Real frobNormAOrig = HermitianFrobeniusNorm( uplo, AOrig ); Real infNormA = HermitianInfinityNorm( uplo, A ); Real frobNormA = HermitianFrobeniusNorm( uplo, A ); Real oneNormError = OneNorm( Z ); Real infNormError = InfinityNorm( Z ); Real frobNormError = FrobeniusNorm( Z ); OutputFromRoot (g.Comm(), "||AOrig||_1 = ||AOrig||_oo = ",infNormAOrig,"\n",Indent(), "||AOrig||_F = ",frobNormAOrig,"\n",Indent(), "||A||_1 = ||A||_oo = ",infNormA,"\n",Indent(), "||A||_F = ",frobNormA,"\n",Indent(), "||A X - U AOrig U^H X||_1 = ",oneNormError,"\n",Indent(), "||A X - U AOrig U^H X||_oo = ",infNormError,"\n",Indent(), "||A X - U AOrig U^H X||_F = ",frobNormError); } }
inline void TrsmLUNSmall ( UnitOrNonUnit diag, F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrsmLUNSmall"); if( U.Grid() != X.Grid() ) LogicError("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; LogicError( msg.str() ); } if( U.ColAlignment() != X.ColAlignment() ) LogicError("U and X are assumed to be aligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F,VC,STAR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,VC,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VC,* ] // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VC,* ] -= U01[VC,* ] X1[* ,* ] LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
inline void CholeskyLVar2( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar2"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10Adj_MR_STAR.AlignWith( A10 ); X11_MC_STAR.AlignWith( A10 ); X21_MC_STAR.AlignWith( A20 ); X11_MC_STAR.ResizeTo( A11.Height(), A11.Width() ); X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); //--------------------------------------------------------------------// A10Adj_MR_STAR.AdjointFrom( A10 ); LocalGemm ( NORMAL, NORMAL, F(1), A10, A10Adj_MR_STAR, F(0), X11_MC_STAR ); A11.SumScatterUpdate( F(-1), X11_MC_STAR ); A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; LocalGemm ( NORMAL, NORMAL, F(1), A20, A10Adj_MR_STAR, F(0), X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10Adj_MR_STAR.FreeAlignments(); X11_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLUNLarge ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrsmLUNLarge"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); U01_MC_STAR.AlignWith( X0 ); X1_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
void L( DistMatrix<F>& A, DistMatrix<F,STAR,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::L"); if( A.Grid() != t.Grid() ) LogicError("{A,t} must be distributed over the same grid"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( t.Viewing() ) LogicError("t must not be a view"); #endif const Grid& g = A.Grid(); DistMatrix<F,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, -1 ); tDiag.ResizeTo( A.Height()-1, 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<F> WPan(g); DistMatrix<F,STAR,STAR> t1_STAR_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<F,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<F,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<F,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( tDiag, tT, tB, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelL ( ABR, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, ADJOINT, ADJOINT, F(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, F(1), A22 ); //----------------------------------------------------------------// } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( LOWER, A11_STAR_STAR.Matrix(), t1_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; }
inline void ApplyPackedReflectorsRLHF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsRLHF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ALeft(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,STAR,VR > HPan_STAR_VR(g); DistMatrix<C,STAR,MR > HPan_STAR_MR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<C,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanHeight ); ALeft.View( A, 0, 0, A.Height(), HPanWidth ); HPan_STAR_MR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); Zeros( HPan.Height(), ALeft.Height(), ZAdj_STAR_MC ); Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Herk ( UPPER, NORMAL, C(1), HPan_STAR_VR.LockedLocalMatrix(), C(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_STAR_MR = HPan_STAR_VR; LocalGemm ( NORMAL, ADJOINT, C(1), HPan_STAR_MR, ALeft, C(0), ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, NORMAL, C(-1), ZAdj_STAR_MC, HPan_STAR_MR, C(1), ALeft ); //--------------------------------------------------------------------// HPan_STAR_MR.FreeAlignments(); ZAdj_STAR_MC.FreeAlignments(); ZAdj_STAR_VC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LU( DistMatrix<F>& A, DistMatrix<int,VC,STAR>& p ) { #ifndef RELEASE PushCallStack("LU"); if( A.Grid() != p.Grid() ) throw std::logic_error("{A,p} must be distributed over the same grid"); if( p.Viewing() && (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) throw std::logic_error ("p must be a vector of the same height as the min dimension of A."); #endif const Grid& g = A.Grid(); if( !p.Viewing() ) p.ResizeTo( std::min(A.Height(),A.Width()), 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), AB(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<int,VC,STAR> pT(g), p0(g), pB(g), p1(g), p2(g); // Temporary distributions DistMatrix<F, STAR,STAR> A11_STAR_STAR(g); DistMatrix<F, MC, STAR> A21_MC_STAR(g); DistMatrix<F, STAR,VR > A12_STAR_VR(g); DistMatrix<F, STAR,MR > A12_STAR_MR(g); DistMatrix<int,STAR,STAR> p1_STAR_STAR(g); // Pivot composition std::vector<int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( p, pT, pB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( pT, p0, /**/ /**/ p1, pB, p2 ); View1x2( AB, ABL, ABR ); const int pivotOffset = A01.Height(); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); p1_STAR_STAR.ResizeTo( p1.Height(), 1 ); //--------------------------------------------------------------------// A21_MC_STAR = A21; A11_STAR_STAR = A11; internal::PanelLU ( A11_STAR_STAR, A21_MC_STAR, p1_STAR_STAR, pivotOffset ); internal::ComposePanelPivots ( p1_STAR_STAR, pivotOffset, image, preimage ); ApplyRowPivots( AB, image, preimage ); // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; internal::LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A11 = A11_STAR_STAR; A12 = A12_STAR_MR; A21 = A21_MC_STAR; p1 = p1_STAR_STAR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( pT, p0, p1, /**/ /**/ pB, p2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyPackedReflectorsRLHF ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsRLHF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > 0 || offset < -H.Width() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Width() ) throw std::logic_error ("Width of transforms must equal width of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ALeft(g); DistMatrix<R,STAR,VR > HPan_STAR_VR(g); DistMatrix<R,STAR,MR > HPan_STAR_MR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MC > ZTrans_STAR_MC(g); DistMatrix<R,STAR,VC > ZTrans_STAR_VC(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanWidth = H10.Width() + H11.Width(); const int HPanOffset = std::min( H11.Height(), std::max(-offset-H00.Height(),0) ); const int HPanHeight = H11.Height()-HPanOffset; HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth ); ALeft.View( A, 0, 0, A.Height(), HPanWidth ); HPan_STAR_MR.AlignWith( ALeft ); ZTrans_STAR_MC.AlignWith( ALeft ); ZTrans_STAR_VC.AlignWith( ALeft ); Zeros( HPan.Height(), ALeft.Height(), ZTrans_STAR_MC ); Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); HPan_STAR_VR = HPanCopy; Syrk ( UPPER, NORMAL, R(1), HPan_STAR_VR.LockedLocalMatrix(), R(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_STAR_MR = HPan_STAR_VR; LocalGemm ( NORMAL, TRANSPOSE, R(1), HPan_STAR_MR, ALeft, R(0), ZTrans_STAR_MC ); ZTrans_STAR_VC.SumScatterFrom( ZTrans_STAR_MC ); LocalTrsm ( LEFT, UPPER, TRANSPOSE, NON_UNIT, R(1), SInv_STAR_STAR, ZTrans_STAR_VC ); ZTrans_STAR_MC = ZTrans_STAR_VC; LocalGemm ( TRANSPOSE, NORMAL, R(-1), ZTrans_STAR_MC, HPan_STAR_MR, R(1), ALeft ); //--------------------------------------------------------------------// HPan_STAR_MR.FreeAlignments(); ZTrans_STAR_MC.FreeAlignments(); ZTrans_STAR_VC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::SymmRUC ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::SymmRUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error("{A,B,C} must be distributed on the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T,MC,MR> CL(g), CR(g), C0(g), C1(g), C2(g), CLeft(g), CRight(g); // Temporary distributions DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> AColPan_VR_STAR(g); DistMatrix<T,STAR,MR > AColPanTrans_STAR_MR(g); DistMatrix<T,MR, STAR> ARowPanTrans_MR_STAR(g); // Start the algorithm Scal( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( CR.Width() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); ARowPan.LockedView1x2( A11, A12 ); AColPan.LockedView2x1 ( A01, A11 ); CLeft.View1x2( C0, C1 ); CRight.View1x2( C1, C2 ); B1_MC_STAR.AlignWith( C ); AColPan_VR_STAR.AlignWith( CLeft ); AColPanTrans_STAR_MR.AlignWith( CLeft ); ARowPanTrans_MR_STAR.AlignWith( CRight ); //--------------------------------------------------------------------// B1_MC_STAR = B1; AColPan_VR_STAR = AColPan; AColPanTrans_STAR_MR.TransposeFrom( AColPan_VR_STAR ); ARowPanTrans_MR_STAR.TransposeFrom( ARowPan ); MakeTrapezoidal( LEFT, LOWER, 0, ARowPanTrans_MR_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, AColPanTrans_STAR_MR ); internal::LocalGemm ( NORMAL, TRANSPOSE, alpha, B1_MC_STAR, ARowPanTrans_MR_STAR, (T)1, CRight ); internal::LocalGemm ( NORMAL, NORMAL, alpha, B1_MC_STAR, AColPanTrans_STAR_MR, (T)1, CLeft ); //--------------------------------------------------------------------// B1_MC_STAR.FreeAlignments(); AColPan_VR_STAR.FreeAlignments(); AColPanTrans_STAR_MR.FreeAlignments(); ARowPanTrans_MR_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrdtrmmUVar1( Orientation orientation, DistMatrix<F,MC,MR>& U ) { #ifndef RELEASE PushCallStack("internal::TrdtrmmUVar1"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( orientation == NORMAL ) throw std::logic_error("Orientation must be (conjugate-)transpose"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,MD,STAR> d1(g); // Temporary distributions DistMatrix<F,MC, STAR> S01_MC_STAR(g); DistMatrix<F,VC, STAR> S01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); S01_MC_STAR.AlignWith( U ); S01_VC_STAR.AlignWith( U ); U01_VR_STAR.AlignWith( U ); U01AdjOrTrans_STAR_MR.AlignWith( U ); PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// U11.GetDiagonal( d1 ); S01_MC_STAR = U01; S01_VC_STAR = S01_MC_STAR; U01_VR_STAR = S01_VC_STAR; if( orientation == TRANSPOSE ) { DiagonalSolve( RIGHT, NORMAL, d1, U01_VR_STAR ); U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); } else { DiagonalSolve( RIGHT, ADJOINT, d1, U01_VR_STAR ); U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); } LocalTrrk( UPPER, F(1), S01_MC_STAR, U01AdjOrTrans_STAR_MR, F(1), U00 ); U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, UNIT, F(1), U11_STAR_STAR, U01_VR_STAR ); U01 = U01_VR_STAR; LocalTrdtrmm( orientation, UPPER, U11_STAR_STAR ); U11 = U11_STAR_STAR; //--------------------------------------------------------------------// d1.FreeAlignments(); SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline R Row( DistMatrix<R>& chi, DistMatrix<R>& x ) { #ifndef RELEASE PushCallStack("reflector::Row"); if( chi.Grid() != x.Grid() ) throw std::logic_error ("chi and x must be distributed over the same grid"); if( chi.Height() != 1 || chi.Width() != 1 ) throw std::logic_error("chi must be a scalar"); if( x.Height() != 1 ) throw std::logic_error("x must be a row vector"); if( chi.Grid().Row() != chi.ColAlignment() ) throw std::logic_error("Reflecting with incorrect row of processes"); if( x.Grid().Row() != x.ColAlignment() ) throw std::logic_error("Reflecting with incorrect row of processes"); #endif const Grid& grid = x.Grid(); mpi::Comm rowComm = grid.RowComm(); const int gridCol = grid.Col(); const int gridWidth = grid.Width(); const int rowAlignment = chi.RowAlignment(); std::vector<R> localNorms(gridWidth); R localNorm = Nrm2( x.LockedMatrix() ); mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm ); R norm = blas::Nrm2( gridWidth, &localNorms[0], 1 ); if( norm == 0 ) { if( gridCol == rowAlignment ) chi.SetLocal(0,0,-chi.GetLocal(0,0)); #ifndef RELEASE PopCallStack(); #endif return R(2); } R alpha; if( gridCol == rowAlignment ) alpha = chi.GetLocal(0,0); mpi::Broadcast( &alpha, 1, rowAlignment, rowComm ); R beta; if( alpha <= 0 ) beta = lapack::SafeNorm( alpha, norm ); else beta = -lapack::SafeNorm( alpha, norm ); const R one = 1; const R safeMin = lapack::MachineSafeMin<R>(); const R epsilon = lapack::MachineEpsilon<R>(); const R safeInv = safeMin/epsilon; int count = 0; if( Abs(beta) < safeInv ) { R invOfSafeInv = one/safeInv; do { ++count; Scale( invOfSafeInv, x ); alpha *= invOfSafeInv; beta *= invOfSafeInv; } while( Abs(beta) < safeInv ); localNorm = Nrm2( x.LockedMatrix() ); mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm ); norm = blas::Nrm2( gridWidth, &localNorms[0], 1 ); if( alpha <= 0 ) beta = lapack::SafeNorm( alpha, norm ); else beta = -lapack::SafeNorm( alpha, norm ); } R tau = (beta-alpha)/beta; Scale( one/(alpha-beta), x ); for( int j=0; j<count; ++j ) beta *= safeInv; if( gridCol == rowAlignment ) chi.SetLocal(0,0,beta); #ifndef RELEASE PopCallStack(); #endif return tau; }
inline void internal::TrmmLUNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC,MR>& U, DistMatrix<T,MC,MR>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLUNC"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> U11_STAR_STAR(g); DistMatrix<T,STAR,MC > U12_STAR_MC(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); DistMatrix<T,MR, MC > D1Trans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scal( alpha, X ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionDown ( X, XT, XB, 0 ); while( XB.Height() > 0 ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); U12_STAR_MC.AlignWith( X2 ); D1Trans_MR_STAR.AlignWith( X1 ); D1Trans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); D1Trans_MR_STAR.ResizeTo( X1.Width(), X1.Height() ); D1.ResizeTo( X1.Height(), X1.Width() ); //--------------------------------------------------------------------// X1_STAR_VR = X1; U11_STAR_STAR = U11; internal::LocalTrmm ( LEFT, UPPER, NORMAL, diag, (T)1, U11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; U12_STAR_MC = U12; internal::LocalGemm ( TRANSPOSE, TRANSPOSE, (T)1, X2, U12_STAR_MC, (T)0, D1Trans_MR_STAR ); D1Trans_MR_MC.SumScatterFrom( D1Trans_MR_STAR ); Transpose( D1Trans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( (T)1, D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1Trans_MR_MC.FreeAlignments(); D1Trans_MR_STAR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }