void EnsureConformal ( const DistMatrix<T,MR,STAR>& A, const DistMatrix<T>& C, string name ) { if( A.Height() != C.Width() || A.ColAlign() != C.RowAlign() ) LogicError(name," not conformal with C"); }
void TestCorrectness ( UpperOrLower uplo, const DistMatrix<F>& A, const DistMatrix<F,STAR,STAR>& t, DistMatrix<F>& AOrig, bool print, bool display ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int n = AOrig.Height(); const Real infNormAOrig = InfinityNorm( AOrig ); const Real frobNormAOrig = FrobeniusNorm( AOrig ); if( g.Rank() == 0 ) Output("Testing error..."); // Set H to the appropriate Hessenberg portion of A DistMatrix<F> H( A ); if( uplo == LOWER ) MakeTrapezoidal( LOWER, H, 1 ); else MakeTrapezoidal( UPPER, H, -1 ); if( print ) Print( H, "Hessenberg" ); if( display ) Display( H, "Bidiagonal" ); if( print || display ) { DistMatrix<F> Q(g); Identity( Q, n, n ); hessenberg::ApplyQ( LEFT, uplo, NORMAL, A, t, Q ); if( print ) Print( Q, "Q" ); if( display ) Display( Q, "Q" ); } // Reverse the accumulated Householder transforms hessenberg::ApplyQ( LEFT, uplo, ADJOINT, A, t, AOrig ); hessenberg::ApplyQ( RIGHT, uplo, NORMAL, A, t, AOrig ); if( print ) Print( AOrig, "Manual Hessenberg" ); if( display ) Display( AOrig, "Manual Hessenberg" ); // Compare the appropriate portion of AOrig and B if( uplo == LOWER ) MakeTrapezoidal( LOWER, AOrig, 1 ); else MakeTrapezoidal( UPPER, AOrig, -1 ); H -= AOrig; if( print ) Print( H, "Error in rotated Hessenberg" ); if( display ) Display( H, "Error in rotated Hessenberg" ); const Real infNormError = InfinityNorm( H ); const Real frobNormError = FrobeniusNorm( H ); if( g.Rank() == 0 ) Output (" ||A||_oo = ",infNormAOrig,"\n", " ||A||_F = ",frobNormAOrig,"\n", " ||H - Q^H A Q||_oo = ",infNormError,"\n", " ||H - Q^H A Q||_F = ",frobNormError); }
inline void TrsmLUNSmall ( UnitOrNonUnit diag, F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLUNSmall"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str() ); } if( U.ColAlignment() != X.ColAlignment() ) throw std::logic_error("U and X are assumed to be aligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F,VC,STAR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,VC,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VC,* ] // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VC,* ] -= U01[VC,* ] X1[* ,* ] LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,VC > A12_STAR_VC(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,MR > U12_STAR_MR(g); DistMatrix<F,STAR,VC > U12_STAR_VC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,VR > Y12_STAR_VR(g); DistMatrix<F> Y12(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VC.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_MR.AlignWith( A22 ); U12_STAR_VC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A22 ); Y12.AlignWith( A12 ); Y12_STAR_VR.AlignWith( A12 ); //--------------------------------------------------------------------// // A11 := inv(U11)' A11 inv(U11) U11_STAR_STAR = U11; A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // Y12 := A11 U12 U12_STAR_VR = U12; Y12_STAR_VR.ResizeTo( A12.Height(), A12.Width() ); Hemm ( LEFT, UPPER, F(1), A11_STAR_STAR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), F(0), Y12_STAR_VR.LocalMatrix() ); Y12 = Y12_STAR_VR; // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A22 := A22 - (A12' U12 + U12' A12) A12_STAR_VR = A12; A12_STAR_VC = A12_STAR_VR; U12_STAR_VC = U12_STAR_VR; A12_STAR_MC = A12_STAR_VC; U12_STAR_MC = U12_STAR_VC; A12_STAR_MR = A12_STAR_VR; U12_STAR_MR = U12_STAR_VR; LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(-1), U12_STAR_MC, A12_STAR_MR, A12_STAR_MC, U12_STAR_MR, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A12 := A12 inv(U22) // // This is the bottleneck because A12 only has blocksize rows Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U22, A12 ); //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VC.FreeAlignments(); A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_MR.FreeAlignments(); U12_STAR_VC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); Y12.FreeAlignments(); Y12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
void LSquare( DistMatrix<R>& A ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::LSquare"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Grid().Height() != A.Grid().Width() ) throw std::logic_error("The process grid must be square"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<R> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R> WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelLSquare ( ABR, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, TRANSPOSE, TRANSPOSE, R(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, R(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( LOWER, A11_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; } SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
void TestCorrectness ( LeftOrRight side, UpperOrLower uplo, ForwardOrBackward order, Conjugation conjugation, Int offset, bool printMatrices, const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t ) { typedef Base<F> Real; const Grid& g = H.Grid(); const Int m = H.Height(); if( g.Rank() == 0 ) cout << " Testing orthogonality of transform..." << endl; // Form Z := Q^H Q or Q Q^H as an approximation to identity DistMatrix<F> Y(g); Identity( Y, m, m ); ApplyPackedReflectors ( side, uplo, VERTICAL, order, conjugation, offset, H, t, Y ); if( printMatrices ) { DistMatrix<F> W(g); Identity( W, m, m ); if( order == FORWARD ) { ApplyPackedReflectors ( side, uplo, VERTICAL, BACKWARD, conjugation, offset, H, t, W ); Print( Y, "Q" ); Print( W, "Q^H" ); } else { ApplyPackedReflectors ( side, uplo, VERTICAL, FORWARD, conjugation, offset, H, t, W ); Print( Y, "Q^H" ); Print( W, "Q" ); } } DistMatrix<F> Z(g); Zeros( Z, m, m ); Herk( uplo, NORMAL, Real(1), Y, Real(0), Z ); MakeHermitian( uplo, Z ); // Form X := -I + Q^H Q or Q Q^H UpdateDiagonal( Z, F(-1) ); if( printMatrices ) { if( order == FORWARD ) Print( Z, "Q Q^H - I" ); else Print( Z, "Q^H Q - I" ); } // Compute the maximum deviance const Real oneNormOfError = OneNorm( Z ); const Real infNormOfError = InfinityNorm( Z ); const Real frobNormOfError = FrobeniusNorm( Z ); if( g.Rank() == 0 ) { if( order == FORWARD ) { cout << " ||Q Q^H - I||_1 = " << oneNormOfError << "\n" << " ||Q Q^H - I||_oo = " << infNormOfError << "\n" << " ||Q Q^H - I||_F = " << frobNormOfError << endl; } else { cout << " ||Q^H Q - I||_1 = " << oneNormOfError << "\n" << " ||Q^H Q - I||_oo = " << infNormOfError << "\n" << " ||Q^H Q - I||_F = " << frobNormOfError << endl; } } }
inline void GolubReinschUpper ( DistMatrix<F>& A, DistMatrix<BASE(F),VR,STAR>& s, DistMatrix<F>& V ) { #ifndef RELEASE CallStackEntry entry("svd::GolubReinschUpper"); #endif typedef BASE(F) Real; const Int m = A.Height(); const Int n = A.Width(); const Int k = Min( m, n ); const Int offdiagonal = ( m>=n ? 1 : -1 ); const char uplo = ( m>=n ? 'U' : 'L' ); const Grid& g = A.Grid(); // Bidiagonalize A DistMatrix<F,STAR,STAR> tP( g ), tQ( g ); Bidiag( A, tP, tQ ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR(g), e_MD_STAR(g); A.GetRealPartOfDiagonal( d_MD_STAR ); A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); // NOTE: lapack::BidiagQRAlg expects e to be of length k DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ), eHat_STAR_STAR( k, 1, g ), e_STAR_STAR( g ); View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); e_STAR_STAR = e_MD_STAR; // Initialize U and VAdj to the appropriate identity matrices DistMatrix<F,VC,STAR> U_VC_STAR( g ); DistMatrix<F,STAR,VC> VAdj_STAR_VC( g ); U_VC_STAR.AlignWith( A ); VAdj_STAR_VC.AlignWith( V ); Identity( U_VC_STAR, m, k ); Identity( VAdj_STAR_VC, k, n ); // Compute the SVD of the bidiagonal matrix and accumulate the Givens // rotations into our local portion of U and VAdj Matrix<F>& ULoc = U_VC_STAR.Matrix(); Matrix<F>& VAdjLoc = VAdj_STAR_VC.Matrix(); lapack::BidiagQRAlg ( uplo, k, VAdjLoc.Width(), ULoc.Height(), d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), VAdjLoc.Buffer(), VAdjLoc.LDim(), ULoc.Buffer(), ULoc.LDim() ); // Make a copy of A (for the Householder vectors) and pull the necessary // portions of U and VAdj into a standard matrix dist. DistMatrix<F> B( A ); if( m >= n ) { DistMatrix<F> AT(g), AB(g); DistMatrix<F,VC,STAR> UT_VC_STAR(g), UB_VC_STAR(g); PartitionDown( A, AT, AB, n ); PartitionDown( U_VC_STAR, UT_VC_STAR, UB_VC_STAR, n ); AT = UT_VC_STAR; MakeZeros( AB ); Adjoint( VAdj_STAR_VC, V ); } else { DistMatrix<F> VT(g), VB(g); DistMatrix<F,STAR,VC> VAdjL_STAR_VC(g), VAdjR_STAR_VC(g); PartitionDown( V, VT, VB, m ); PartitionRight( VAdj_STAR_VC, VAdjL_STAR_VC, VAdjR_STAR_VC, m ); Adjoint( VAdjL_STAR_VC, VT ); MakeZeros( VB ); } // Backtransform U and V bidiag::ApplyU( LEFT, NORMAL, B, tQ, A ); bidiag::ApplyV( LEFT, NORMAL, B, tP, V ); // Copy out the appropriate subset of the singular values s = d_STAR_STAR; }
inline void SUMMA_TTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_TTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must have the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) LogicError("A and B must be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } }
inline void SUMMA_TTA ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_TTA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must have the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) LogicError("A and B must be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MR, STAR> D1_MR_STAR(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); B1_STAR_MC.AlignWith( A ); D1_MR_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionRight( C, CL, CR, 0 ); while( BB.Height() > 0 ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); D1.AlignWith( C1 ); //--------------------------------------------------------------------// B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR] // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T // = alpha (A^T)[MR,MC] (B1^T)[MC,*] LocalGemm ( orientationOfA, orientationOfB, alpha, A, B1_STAR_MC, D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } }
void TestCorrectness ( bool print, UpperOrLower uplo, const DistMatrix<F>& A, const DistMatrix<F,STAR,STAR>& t, DistMatrix<F>& AOrig ) { typedef BASE(F) Real; const Grid& g = A.Grid(); const Int m = AOrig.Height(); Int subdiagonal = ( uplo==LOWER ? -1 : +1 ); if( g.Rank() == 0 ) cout << "Testing error..." << endl; // Grab the diagonal and subdiagonal of the symmetric tridiagonal matrix DistMatrix<Real,MD,STAR> d(g); DistMatrix<Real,MD,STAR> e(g); A.GetRealPartOfDiagonal( d ); A.GetRealPartOfDiagonal( e, subdiagonal ); // Grab a full copy of e so that we may fill the opposite subdiagonal DistMatrix<Real,STAR,STAR> e_STAR_STAR(g); DistMatrix<Real,MD,STAR> eOpposite(g); e_STAR_STAR = e; eOpposite.AlignWithDiagonal( A.DistData(), -subdiagonal ); eOpposite = e_STAR_STAR; // Zero B and then fill its tridiagonal DistMatrix<F> B(g); B.AlignWith( A ); Zeros( B, m, m ); B.SetRealPartOfDiagonal( d ); B.SetRealPartOfDiagonal( e, subdiagonal ); B.SetRealPartOfDiagonal( eOpposite, -subdiagonal ); if( print ) Print( B, "Tridiagonal" ); // Reverse the accumulated Householder transforms, ignoring symmetry hermitian_tridiag::ApplyQ( LEFT, uplo, NORMAL, A, t, B ); hermitian_tridiag::ApplyQ( RIGHT, uplo, ADJOINT, A, t, B ); if( print ) Print( B, "Rotated tridiagonal" ); // Compare the appropriate triangle of AOrig and B MakeTriangular( uplo, AOrig ); MakeTriangular( uplo, B ); Axpy( F(-1), AOrig, B ); if( print ) Print( B, "Error in rotated tridiagonal" ); const Real infNormOfAOrig = HermitianInfinityNorm( uplo, AOrig ); const Real frobNormOfAOrig = HermitianFrobeniusNorm( uplo, AOrig ); const Real infNormOfError = HermitianInfinityNorm( uplo, B ); const Real frobNormOfError = HermitianFrobeniusNorm( uplo, B ); if( g.Rank() == 0 ) { cout << " ||AOrig||_1 = ||AOrig||_oo = " << infNormOfAOrig << "\n" << " ||AOrig||_F = " << frobNormOfAOrig << "\n" << " ||AOrig - Q^H A Q||_oo = " << infNormOfError << "\n" << " ||AOrig - Q^H A Q||_F = " << frobNormOfError << endl; } }
inline void TwoSidedTrsmUVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,MC > A01Trans_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,VC > A12_STAR_VC(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> U12Trans_MR_STAR(g); DistMatrix<F,VR, STAR> U12Trans_VR_STAR(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,VC > U12_STAR_VC(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > Y12_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A02 ); A01Trans_STAR_MC.AlignWith( A02 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_VC.AlignWith( A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); U12Trans_MR_STAR.AlignWith( A02 ); U12Trans_VR_STAR.AlignWith( A02 ); U12_STAR_VR.AlignWith( A02 ); U12_STAR_VC.AlignWith( A22 ); U12_STAR_MC.AlignWith( A22 ); Y12_STAR_VR.AlignWith( A12 ); //--------------------------------------------------------------------// // A01 := A01 inv(U11) A01_VC_STAR = A01; U11_STAR_STAR = U11; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A02 := A02 - A01 U12 A01Trans_STAR_MC.TransposeFrom( A01_VC_STAR ); U12Trans_MR_STAR.TransposeFrom( U12 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), A01Trans_STAR_MC, U12Trans_MR_STAR, F(1), A02 ); // Y12 := A11 U12 U12Trans_VR_STAR = U12Trans_MR_STAR; Zeros( U12_STAR_VR, A12.Height(), A12.Width() ); Transpose( U12Trans_VR_STAR.Matrix(), U12_STAR_VR.Matrix() ); Zeros( Y12_STAR_VR, A12.Height(), A12.Width() ); Hemm ( LEFT, UPPER, F(1), A11_STAR_STAR.Matrix(), U12_STAR_VR.Matrix(), F(0), Y12_STAR_VR.Matrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12_STAR_VR, A12_STAR_VR ); // A22 := A22 - (A12' U12 + U12' A12) A12_STAR_MR = A12_STAR_VR; A12_STAR_VC = A12_STAR_VR; U12_STAR_VC = U12_STAR_VR; A12_STAR_MC = A12_STAR_VC; U12_STAR_MC = U12_STAR_VC; LocalTrr2k ( UPPER, ADJOINT, TRANSPOSE, ADJOINT, F(-1), A12_STAR_MC, U12Trans_MR_STAR, U12_STAR_MC, A12_STAR_MR, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12_STAR_VR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /**********************************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
void TestCorrectness ( bool print, const DistMatrix<Complex<R> >& A, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& AOrig ) { typedef Complex<R> C; const Grid& g = A.Grid(); const int m = A.Height(); const int n = A.Width(); const int minDim = std::min(m,n); if( g.Rank() == 0 ) cout << " Testing orthogonality of Q..." << endl; // Form Z := Q^H Q as an approximation to identity DistMatrix<C> Z(g); Identity( m, n, Z ); ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, A, t, Z ); ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, FORWARD, CONJUGATED, 0, A, t, Z ); DistMatrix<C> ZUpper(g); View( ZUpper, Z, 0, 0, minDim, minDim ); // Form Identity DistMatrix<C> X(g); Identity( minDim, minDim, X ); // Form X := I - Q^H Q Axpy( C(-1), ZUpper, X ); R oneNormOfError = Norm( X, ONE_NORM ); R infNormOfError = Norm( X, INFINITY_NORM ); R frobNormOfError = Norm( X, FROBENIUS_NORM ); if( g.Rank() == 0 ) { cout << " ||Q^H Q - I||_1 = " << oneNormOfError << "\n" << " ||Q^H Q - I||_oo = " << infNormOfError << "\n" << " ||Q^H Q - I||_F = " << frobNormOfError << endl; } if( g.Rank() == 0 ) cout << " Testing if A = QR..." << endl; // Form Q R DistMatrix<C> U( A ); MakeTriangular( UPPER, U ); ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, A, t, U ); // Form Q R - A Axpy( C(-1), AOrig, U ); const R oneNormOfA = Norm( AOrig, ONE_NORM ); const R infNormOfA = Norm( AOrig, INFINITY_NORM ); const R frobNormOfA = Norm( AOrig, FROBENIUS_NORM ); oneNormOfError = Norm( U, ONE_NORM ); infNormOfError = Norm( U, INFINITY_NORM ); frobNormOfError = Norm( U, FROBENIUS_NORM ); if( g.Rank() == 0 ) { cout << " ||A||_1 = " << oneNormOfA << "\n" << " ||A||_oo = " << infNormOfA << "\n" << " ||A||_F = " << frobNormOfA << "\n" << " ||A - QR||_1 = " << oneNormOfError << "\n" << " ||A - QR||_oo = " << infNormOfError << "\n" << " ||A - QR||_F = " << frobNormOfError << endl; } }
inline void CholeskyUVar2( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("hpd_inverse::CholeskyUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,VR, STAR> A01_VR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A01Trans_STAR_MC(g); DistMatrix<F,MR, STAR> A01_MR_STAR(g); DistMatrix<F,STAR,MR > A01Adj_STAR_MR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A01_VC_STAR.AlignWith( A00 ); A12_STAR_VR.AlignWith( A02 ); A01Trans_STAR_MC.AlignWith( A00 ); A01_VR_STAR.AlignWith( A00 ); A01Adj_STAR_MR.AlignWith( A00 ); A12_STAR_MR.AlignWith( A02 ); A12_STAR_MC.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( UPPER, A11_STAR_STAR ); A01_VC_STAR = A01; LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A01_VC_STAR ); A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A01Trans_STAR_MC.TransposeFrom( A01_VC_STAR ); A01_VR_STAR = A01_VC_STAR; A01Adj_STAR_MR.AdjointFrom( A01_VR_STAR ); LocalTrrk ( UPPER, TRANSPOSE, F(1), A01Trans_STAR_MC, A01Adj_STAR_MR, F(1), A00 ); A12_STAR_MR = A12_STAR_VR; LocalGemm ( TRANSPOSE, NORMAL, F(-1), A01Trans_STAR_MC, A12_STAR_MR, F(1), A02 ); A12_STAR_MC = A12_STAR_VR; LocalTrrk ( UPPER, ADJOINT, F(-1), A12_STAR_MC, A12_STAR_MR, F(1), A22 ); LocalTrsm ( RIGHT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A01_VC_STAR ); LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, F(-1), A11_STAR_STAR, A12_STAR_VR ); LocalTriangularInverse( UPPER, NON_UNIT, A11_STAR_STAR ); LocalTrtrmm( ADJOINT, UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A01 = A01_VC_STAR; A12 = A12_STAR_VR; //--------------------------------------------------------------------// A01_VC_STAR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); A01Trans_STAR_MC.FreeAlignments(); A01_VR_STAR.FreeAlignments(); A01Adj_STAR_MR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_MC.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void UVar3( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("cholesky::UVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrix distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MC = A12_STAR_VR; A12_STAR_MR = A12_STAR_VR; LocalTrrk ( UPPER, ADJOINT, F(-1), A12_STAR_MC, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void MakeNormalUniformSpectrum ( DistMatrix<Complex<R>,U,V>& A, Complex<R> center=0, R radius=1 ) { #ifndef RELEASE CallStackEntry entry("MakeNormalUniformSpectrum"); #endif typedef Complex<R> C; if( A.Height() != A.Width() ) LogicError("Cannot make a non-square matrix normal"); const Grid& grid = A.Grid(); const bool standardDist = ( U == MC && V == MR ); // Sample the diagonal matrix D from the ball B_radius(center) // and then rotate it with a random Householder similarity transformation: // // (I-2uu^H) D (I-2uu^H)^H = D - 2(u (Conj(D) u)^H + (D u) u^H) + // (4 u^H D u) u u^H // // Form d and D const Int n = A.Height(); std::vector<C> d( n ); if( grid.Rank() == 0 ) for( Int j=0; j<n; ++j ) d[j] = SampleBall<C>( center, radius ); mpi::Broadcast( &d[0], n, 0, grid.Comm() ); DistMatrix<C> ABackup( grid ); if( standardDist ) Diagonal( A, d ); else { ABackup.AlignWith( A ); Diagonal( ABackup, d ); } // Form u DistMatrix<C> u( grid ); if( standardDist ) u.AlignWith( A ); else u.AlignWith( ABackup ); Uniform( u, n, 1 ); const R origNorm = Nrm2( u ); Scale( 1/origNorm, u ); // Form v := D u DistMatrix<C> v( grid ); if( standardDist ) v.AlignWith( A ); else v.AlignWith( ABackup ); v.ResizeTo( n, 1 ); if( v.LocalWidth() == 1 ) { const Int colShift = v.ColShift(); const Int colStride = v.ColStride(); const Int localHeight = v.LocalHeight(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = colShift + iLoc*colStride; v.SetLocal( iLoc, 0, d[i]*u.GetLocal(iLoc,0) ); } } // Form w := Conj(D) u DistMatrix<C> w( grid ); if( standardDist ) w.AlignWith( A ); else w.AlignWith( ABackup ); w.ResizeTo( n, 1 ); if( w.LocalWidth() == 1 ) { const Int colShift = w.ColShift(); const Int colStride = w.ColStride(); const Int localHeight = w.LocalHeight(); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = colShift + iLoc*colStride; w.SetLocal( iLoc, 0, Conj(d[i])*u.GetLocal(iLoc,0) ); } } // Update A := A - 2(u w^H + v u^H) if( standardDist ) { Ger( C(-2), u, w, A ); Ger( C(-2), v, u, A ); } else { Ger( C(-2), u, w, ABackup ); Ger( C(-2), v, u, ABackup ); } // Form \gamma := 4 u^H (D u) = 4 (u,Du) const C gamma = 4*Dot(u,v); // Update A := A + gamma u u^H if( standardDist ) Ger( gamma, u, u, A ); else Ger( gamma, u, u, ABackup ); // Copy the result into the correct distribution if( !standardDist ) A = ABackup; }
inline void SUMMA_TTC ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_TTC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must have the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) LogicError("A and B must be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( C ); B1_VR_STAR.AlignWith( C ); B1AdjOrTrans_STAR_MR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); // C[MC,MR] += alpha (A1[*,MC])^[T/H] (B1[MR,*])^[T/H] // = alpha (A1^[T/H])[MC,*] (B1^[T/H])[*,MR] LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } }
void RowAllToAllPromote ( const DistMatrix<T, U, V >& A, DistMatrix<T,PartialUnionCol<U,V>(),Partial<V>()>& B ) { DEBUG_ONLY(CSE cse("copy::RowAllToAllPromote")) AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.AlignRowsAndResize ( Mod(A.RowAlign(),B.RowStride()), height, width, false, false ); if( !B.Participating() ) return; const Int rowAlign = A.RowAlign(); const Int rowStride = A.RowStride(); const Int rowStridePart = A.PartialRowStride(); const Int rowStrideUnion = A.PartialUnionRowStride(); const Int rowRankPart = A.PartialRowRank(); const Int rowDiff = B.RowAlign() - Mod(rowAlign,rowStridePart); const Int maxLocalWidth = MaxLength(width,rowStride); const Int maxLocalHeight = MaxLength(height,rowStrideUnion); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); if( rowDiff == 0 ) { if( A.PartialUnionRowStride() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else { vector<T> buffer; FastResize( buffer, 2*rowStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[rowStrideUnion*portionSize]; // Pack util::ColStridedPack ( height, A.LocalWidth(), B.ColAlign(), rowStrideUnion, A.LockedBuffer(), A.LDim(), firstBuf, portionSize ); // Simultaneously Gather in rows and Scatter in columns mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, A.PartialUnionRowComm() ); // Unpack util::PartialRowStridedUnpack ( B.LocalHeight(), width, rowAlign, rowStride, rowStrideUnion, rowStridePart, rowRankPart, B.RowShift(), secondBuf, portionSize, B.Buffer(), B.LDim() ); } } else { #ifdef EL_UNALIGNED_WARNINGS if( A.Grid().Rank() == 0 ) cerr << "Unaligned RowAllToAllPromote" << endl; #endif const Int sendRowRankPart = Mod( rowRankPart+rowDiff, rowStridePart ); const Int recvRowRankPart = Mod( rowRankPart-rowDiff, rowStridePart ); vector<T> buffer; FastResize( buffer, 2*rowStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[rowStrideUnion*portionSize]; // Pack util::ColStridedPack ( height, A.LocalWidth(), B.ColAlign(), rowStrideUnion, A.LockedBuffer(), A.LDim(), secondBuf, portionSize ); // Realign the input mpi::SendRecv ( secondBuf, rowStrideUnion*portionSize, sendRowRankPart, firstBuf, rowStrideUnion*portionSize, recvRowRankPart, A.PartialRowComm() ); // Simultaneously Scatter in rows and Gather in columns mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, A.PartialUnionRowComm() ); // Unpack util::PartialRowStridedUnpack ( B.LocalHeight(), width, rowAlign, rowStride, rowStrideUnion, rowStridePart, recvRowRankPart, B.RowShift(), secondBuf, portionSize, B.Buffer(), B.LDim() ); } }
inline void LU( DistMatrix<F>& A, DistMatrix<int,VC,STAR>& p ) { #ifndef RELEASE CallStackEntry entry("LU"); if( A.Grid() != p.Grid() ) throw std::logic_error("{A,p} must be distributed over the same grid"); if( p.Viewing() && (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) throw std::logic_error ("p must be a vector of the same height as the min dimension of A."); #endif const Grid& g = A.Grid(); if( !p.Viewing() ) p.ResizeTo( std::min(A.Height(),A.Width()), 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), AB(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<int,VC,STAR> pT(g), p0(g), pB(g), p1(g), p2(g); // Temporary distributions DistMatrix<F, STAR,STAR> A11_STAR_STAR(g); DistMatrix<F, MC, STAR> A21_MC_STAR(g); DistMatrix<F, STAR,VR > A12_STAR_VR(g); DistMatrix<F, STAR,MR > A12_STAR_MR(g); DistMatrix<int,STAR,STAR> p1_STAR_STAR(g); // Pivot composition std::vector<int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( p, pT, pB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( pT, p0, /**/ /**/ p1, pB, p2 ); View1x2( AB, ABL, ABR ); const int pivotOffset = A01.Height(); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); p1_STAR_STAR.ResizeTo( p1.Height(), 1 ); //--------------------------------------------------------------------// A21_MC_STAR = A21; A11_STAR_STAR = A11; lu::Panel( A11_STAR_STAR, A21_MC_STAR, p1_STAR_STAR, pivotOffset ); ComposePivots( p1_STAR_STAR, pivotOffset, image, preimage ); ApplyRowPivots( AB, image, preimage ); // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A11 = A11_STAR_STAR; A12 = A12_STAR_MR; A21 = A21_MC_STAR; p1 = p1_STAR_STAR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( pT, p0, p1, /**/ /**/ pB, p2 ); } }
inline void GolubReinschUpper_FLA ( DistMatrix<F>& A, DistMatrix<BASE(F),VR,STAR>& s, DistMatrix<F>& V ) { #ifndef RELEASE CallStackEntry entry("svd::GolubReinschUpper_FLA"); #endif typedef BASE(F) Real; const Int m = A.Height(); const Int n = A.Width(); const Int k = Min( m, n ); const Int offdiagonal = ( m>=n ? 1 : -1 ); const Grid& g = A.Grid(); // Bidiagonalize A DistMatrix<F,STAR,STAR> tP(g), tQ(g); Bidiag( A, tP, tQ ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR(g), e_MD_STAR(g); A.GetRealPartOfDiagonal( d_MD_STAR ); A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); // In order to use serial QR kernels, we need the full bidiagonal matrix // on each process DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ), e_STAR_STAR( e_MD_STAR ); // Initialize U and VAdj to the appropriate identity matrices DistMatrix<F,VC,STAR> U_VC_STAR(g), V_VC_STAR(g); U_VC_STAR.AlignWith( A ); V_VC_STAR.AlignWith( V ); Identity( U_VC_STAR, m, k ); Identity( V_VC_STAR, n, k ); FlaSVD ( k, U_VC_STAR.LocalHeight(), V_VC_STAR.LocalHeight(), d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), U_VC_STAR.Buffer(), U_VC_STAR.LDim(), V_VC_STAR.Buffer(), V_VC_STAR.LDim() ); // Make a copy of A (for the Householder vectors) and pull the necessary // portions of U and V into a standard matrix dist. DistMatrix<F> B( A ); if( m >= n ) { DistMatrix<F> AT(g), AB(g); DistMatrix<F,VC,STAR> UT_VC_STAR(g), UB_VC_STAR(g); PartitionDown( A, AT, AB, n ); PartitionDown( U_VC_STAR, UT_VC_STAR, UB_VC_STAR, n ); AT = UT_VC_STAR; MakeZeros( AB ); V = V_VC_STAR; } else { DistMatrix<F> VT(g), VB(g); DistMatrix<F,VC,STAR> VT_VC_STAR(g), VB_VC_STAR(g); PartitionDown( V, VT, VB, m ); PartitionDown( V_VC_STAR, VT_VC_STAR, VB_VC_STAR, m ); VT = VT_VC_STAR; MakeZeros( VB ); } // Backtransform U and V bidiag::ApplyU( LEFT, NORMAL, B, tQ, A ); bidiag::ApplyV( LEFT, NORMAL, B, tP, V ); // Copy out the appropriate subset of the singular values s = d_STAR_STAR; }
inline void LU( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("LU"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalLU( A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_MC_STAR = A21; LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A21_MC_STAR ); A21 = A21_MC_STAR; // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void Ger ( T alpha, const DistMatrix<T>& x, const DistMatrix<T>& y, DistMatrix<T>& A ) { #ifndef RELEASE CallStackEntry entry("Ger"); if( A.Grid() != x.Grid() || x.Grid() != y.Grid() ) LogicError ("{A,x,y} must be distributed over the same grid"); if( ( x.Width() != 1 && x.Height() != 1 ) || ( y.Width() != 1 && y.Height() != 1 ) ) LogicError("x and y are assumed to be vectors"); const Int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const Int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( A.Height() != xLength || A.Width() != yLength ) { std::ostringstream msg; msg << "Nonconformal Ger: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); if( x.Width() == 1 && y.Width() == 1 ) { // Temporary distributions DistMatrix<T,MC,STAR> x_MC_STAR(g); DistMatrix<T,MR,STAR> y_MR_STAR(g); // Begin the algoritm x_MC_STAR.AlignWith( A ); y_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; y_MR_STAR = y; Ger ( alpha, x_MC_STAR.LockedMatrix(), y_MR_STAR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// } else if( x.Width() == 1 ) { // Temporary distributions DistMatrix<T,MC, STAR> x_MC_STAR(g); DistMatrix<T,STAR,MR > y_STAR_MR(g); // Begin the algorithm x_MC_STAR.AlignWith( A ); y_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; y_STAR_MR = y; Ger ( alpha, x_MC_STAR.LockedMatrix(), y_STAR_MR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// } else if( y.Width() == 1 ) { // Temporary distributions DistMatrix<T,STAR,MC > x_STAR_MC(g); DistMatrix<T,MR, STAR> y_MR_STAR(g); // Begin the algorithm x_STAR_MC.AlignWith( A ); y_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MC = x; y_MR_STAR = y; Ger ( alpha, x_STAR_MC.LockedMatrix(), y_MR_STAR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// } else { // Temporary distributions DistMatrix<T,STAR,MC> x_STAR_MC(g); DistMatrix<T,STAR,MR> y_STAR_MR(g); // Begin the algorithm x_STAR_MC.AlignWith( A ); y_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MC = x; y_STAR_MR = y; Ger ( alpha, x_STAR_MC.LockedMatrix(), y_STAR_MR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// } }
int main( int argc, char* argv[] ) { Environment env( argc, argv ); try { const Int m = Input("--height","height of matrix",20); const Int n = Input("--width","width of matrix",100); const Int r = Input("--rank","rank of matrix",5); const Int maxSteps = Input("--maxSteps","max # of steps of QR",10); const Real tol = Input("--tol","tolerance for ID",Real(-1)); const bool print = Input("--print","print matrices?",false); ProcessInput(); PrintInputReport(); DistMatrix<C> U, V; Uniform( U, m, r ); Uniform( V, n, r ); DistMatrix<C> A; Gemm( NORMAL, ADJOINT, C(1), U, V, A ); const Real frobA = FrobeniusNorm( A ); if( print ) Print( A, "A" ); const Grid& g = A.Grid(); QRCtrl<Real> ctrl; ctrl.boundRank = true; ctrl.maxRank = maxSteps; if( tol != Real(-1) ) { ctrl.adaptive = true; ctrl.tol = tol; } DistPermutation PR(g), PC(g); DistMatrix<C> Z(g); Timer timer; if( mpi::Rank() == 0 ) timer.Start(); Skeleton( A, PR, PC, Z, ctrl ); if( mpi::Rank() == 0 ) timer.Stop(); const Int rank = Z.Height(); if( print ) { DistMatrix<Int> PFull(g); PR.ExplicitMatrix( PFull ); Print( PFull, "PR" ); PC.ExplicitMatrix( PFull ); Print( PFull, "PC" ); Print( Z, "Z" ); } // Form the matrices of A's (hopefully) dominant rows and columns DistMatrix<C> AR( A ); PR.PermuteRows( AR ); AR.Resize( rank, A.Width() ); DistMatrix<C> AC( A ); PC.PermuteCols( AC ); AC.Resize( A.Height(), rank ); if( print ) { Print( AC, "A_C" ); Print( AR, "A_R" ); } // Check || A - AC Z AR ||_F / || A ||_F DistMatrix<C> B(g); Gemm( NORMAL, NORMAL, C(1), Z, AR, B ); Gemm( NORMAL, NORMAL, C(-1), AC, B, C(1), A ); const Real frobError = FrobeniusNorm( A ); if( print ) Print( A, "A - A_C Z A_R" ); if( mpi::Rank() == 0 ) { Output("Skeleton time: ",timer.Total()," secs"); Output ("|| A ||_F = ",frobA,"\n", "|| A - A_C Z A_R ||_F / || A ||_F = ",frobError/frobA); } } catch( exception& e ) { ReportException(e); } return 0; }
void LSquare ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::LSquare"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); #ifndef RELEASE if( g.Height() != g.Width() ) throw std::logic_error("The process grid must be square"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, -1 ); tDiag.ResizeTo( A.Height()-1, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C> WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( tDiag, tT, tB, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelLSquare ( ABR, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, ADJOINT, ADJOINT, C(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, C(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( LOWER, A11_STAR_STAR.Matrix(), t1_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; }
inline void TwoSidedTrmmLVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10Adj_MR_STAR.AdjointFrom( L10 ); L10_STAR_VR.AdjointFrom( L10Adj_MR_STAR ); Zeros( Y10_STAR_VR, A10.Height(), A10.Width() ); Hemm ( LEFT, LOWER, F(1), A11_STAR_STAR.LockedMatrix(), L10_STAR_VR.LockedMatrix(), F(0), Y10_STAR_VR.Matrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MC = L10_STAR_VR; LocalTrr2k ( LOWER, ADJOINT, ADJOINT, ADJOINT, F(1), A10_STAR_MC, L10Adj_MR_STAR, L10_STAR_MC, A10_STAR_MR, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; LocalGemm ( NORMAL, ADJOINT, F(1), A21_MC_STAR, L10Adj_MR_STAR, F(1), A20 ); // A11 := L11' A11 L11 LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
void AllGather ( const DistMatrix<T, U, V >& A, DistMatrix<T,Collect<U>(),Collect<V>()>& B ) { DEBUG_ONLY(CSE cse("copy::AllGather")) AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.SetGrid( A.Grid() ); B.Resize( height, width ); if( A.Participating() ) { const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int distStride = colStride*rowStride; const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,rowStride); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); vector<T> buf( (distStride+1)*portionSize ); T* sendBuf = &buf[0]; T* recvBuf = &buf[portionSize]; // Pack util::InterleaveMatrix ( A.LocalHeight(), A.LocalWidth(), A.LockedBuffer(), 1, A.LDim(), sendBuf, 1, A.LocalHeight() ); // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() ); // Unpack util::StridedUnpack ( height, width, A.ColAlign(), colStride, A.RowAlign(), rowStride, recvBuf, portionSize, B.Buffer(), B.LDim() ); } if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF ) { // Pack from the root const Int BLocalHeight = B.LocalHeight(); const Int BLocalWidth = B.LocalWidth(); vector<T> buf(BLocalHeight*BLocalWidth); if( A.CrossRank() == A.Root() ) util::InterleaveMatrix ( BLocalHeight, BLocalWidth, B.LockedBuffer(), 1, B.LDim(), buf.data(), 1, BLocalHeight ); // Broadcast from the root mpi::Broadcast ( buf.data(), BLocalHeight*BLocalWidth, A.Root(), A.CrossComm() ); // Unpack if not the root if( A.CrossRank() != A.Root() ) util::InterleaveMatrix ( BLocalHeight, BLocalWidth, buf.data(), 1, BLocalHeight, B.Buffer(), 1, B.LDim() ); } }
inline void Syr2kUT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, UPPER, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( UPPER, orientation, TRANSPOSE, orientation, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
inline void UVar3( UnitOrNonUnit diag, DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("triangular_inverse::UVar3"); if( U.Height() != U.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,MC > U01Trans_STAR_MC(g); DistMatrix<F,MR, STAR> U12Trans_MR_STAR(g); // Start the algorithm PartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UBR.Height() < U.Height() ) { RepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); U01Trans_STAR_MC.AlignWith( U02 ); U12Trans_MR_STAR.AlignWith( U02 ); //--------------------------------------------------------------------// U01_VC_STAR = U01; U11_STAR_STAR = U11; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(-1), U11_STAR_STAR, U01_VC_STAR ); // We transpose before the communication to avoid cache-thrashing // in the unpacking stage. U12Trans_MR_STAR.TransposeFrom( U12 ); U01Trans_STAR_MC.TransposeFrom( U01_VC_STAR ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(1), U01Trans_STAR_MC, U12Trans_MR_STAR, F(1), U02 ); U01.TransposeFrom( U01Trans_STAR_MC ); U12_STAR_VR.TransposeFrom( U12Trans_MR_STAR ); LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, U12_STAR_VR ); LocalTriangularInverse( UPPER, diag, U11_STAR_STAR ); U11 = U11_STAR_STAR; U12 = U12_STAR_VR; //--------------------------------------------------------------------// U01Trans_STAR_MC.FreeAlignments(); U12Trans_MR_STAR.FreeAlignments(); SlidePartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); } }
inline void Symv ( UpperOrLower uplo, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& x, T beta, DistMatrix<T>& y, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("Symv"); if( A.Grid() != x.Grid() || x.Grid() != y.Grid() ) LogicError ("{A,x,y} must be distributed over the same grid"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( ( x.Width() != 1 && x.Height() != 1 ) || ( y.Width() != 1 && y.Height() != 1 ) ) LogicError("x and y are assumed to be vectors"); const Int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const Int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( A.Height() != xLength || A.Height() != yLength ) { std::ostringstream msg; msg << "Nonconformal Symv: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); if( x.Width() == 1 && y.Width() == 1 ) { // Temporary distributions DistMatrix<T,MC,STAR> x_MC_STAR(g), z_MC_STAR(g); DistMatrix<T,MR,STAR> x_MR_STAR(g), z_MR_STAR(g); DistMatrix<T,MR,MC > z_MR_MC(g); DistMatrix<T> z(g); // Begin the algoritm Scale( beta, y ); x_MC_STAR.AlignWith( A ); x_MR_STAR.AlignWith( A ); z_MC_STAR.AlignWith( A ); z_MR_STAR.AlignWith( A ); z.AlignWith( y ); Zeros( z_MC_STAR, y.Height(), 1 ); Zeros( z_MR_STAR, y.Height(), 1 ); //--------------------------------------------------------------------// x_MC_STAR = x; x_MR_STAR = x_MC_STAR; if( uplo == LOWER ) { internal::LocalSymvColAccumulateL ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate ); } else { internal::LocalSymvColAccumulateU ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate ); } z_MR_MC.SumScatterFrom( z_MR_STAR ); z = z_MR_MC; z.SumScatterUpdate( T(1), z_MC_STAR ); Axpy( T(1), z, y ); //--------------------------------------------------------------------// } else if( x.Width() == 1 ) { // Temporary distributions DistMatrix<T,MC,STAR> x_MC_STAR(g), z_MC_STAR(g); DistMatrix<T,MR,STAR> x_MR_STAR(g), z_MR_STAR(g); DistMatrix<T,MR,MC > z_MR_MC(g); DistMatrix<T> z(g), zTrans(g); // Begin the algoritm Scale( beta, y ); x_MC_STAR.AlignWith( A ); x_MR_STAR.AlignWith( A ); z_MC_STAR.AlignWith( A ); z_MR_STAR.AlignWith( A ); z.AlignWith( y ); z_MR_MC.AlignWith( y ); Zeros( z_MC_STAR, y.Width(), 1 ); Zeros( z_MR_STAR, y.Width(), 1 ); //--------------------------------------------------------------------// x_MC_STAR = x; x_MR_STAR = x_MC_STAR; if( uplo == LOWER ) { internal::LocalSymvColAccumulateL ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate ); } else { internal::LocalSymvColAccumulateU ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate ); } z.SumScatterFrom( z_MC_STAR ); z_MR_MC = z; z_MR_MC.SumScatterUpdate( T(1), z_MR_STAR ); Transpose( z_MR_MC, zTrans ); Axpy( T(1), zTrans, y ); //--------------------------------------------------------------------// } else if( y.Width() == 1 ) { // Temporary distributions DistMatrix<T,STAR,MC> x_STAR_MC(g), z_STAR_MC(g); DistMatrix<T,STAR,MR> x_STAR_MR(g), z_STAR_MR(g); DistMatrix<T,MR, MC> z_MR_MC(g); DistMatrix<T> z(g), zTrans(g); // Begin the algoritm Scale( beta, y ); x_STAR_MC.AlignWith( A ); x_STAR_MR.AlignWith( A ); z_STAR_MC.AlignWith( A ); z_STAR_MR.AlignWith( A ); z.AlignWith( y ); z_MR_MC.AlignWith( y ); Zeros( z_STAR_MC, 1, y.Height() ); Zeros( z_STAR_MR, 1, y.Height() ); //--------------------------------------------------------------------// x_STAR_MR = x; x_STAR_MC = x_STAR_MR; if( uplo == LOWER ) { internal::LocalSymvRowAccumulateL ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate ); } else { internal::LocalSymvRowAccumulateU ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate ); } z.SumScatterFrom( z_STAR_MR ); z_MR_MC = z; z_MR_MC.SumScatterUpdate( T(1), z_STAR_MC ); Transpose( z_MR_MC, zTrans ); Axpy( T(1), zTrans, y ); //--------------------------------------------------------------------// } else { // Temporary distributions DistMatrix<T,STAR,MC> x_STAR_MC(g), z_STAR_MC(g); DistMatrix<T,STAR,MR> x_STAR_MR(g), z_STAR_MR(g); DistMatrix<T,MR, MC> z_MR_MC(g); DistMatrix<T> z(g); // Begin the algoritm Scale( beta, y ); x_STAR_MC.AlignWith( A ); x_STAR_MR.AlignWith( A ); z_STAR_MC.AlignWith( A ); z_STAR_MR.AlignWith( A ); z.AlignWith( y ); z_MR_MC.AlignWith( y ); Zeros( z_STAR_MC, 1, y.Width() ); Zeros( z_STAR_MR, 1, y.Width() ); //--------------------------------------------------------------------// x_STAR_MR = x; x_STAR_MC = x_STAR_MR; if( uplo == LOWER ) { internal::LocalSymvRowAccumulateL ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate ); } else { internal::LocalSymvRowAccumulateU ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate ); } z_MR_MC.SumScatterFrom( z_STAR_MC ); z = z_MR_MC; z.SumScatterUpdate( T(1), z_STAR_MR ); Axpy( T(1), z, y ); //--------------------------------------------------------------------// } }
int main( int argc, char* argv[] ) { Initialize( argc, argv ); try { const Int m = Input("--height","height of matrix",20); const Int n = Input("--width","width of matrix",100); const Int r = Input("--rank","rank of matrix",5); const Int maxSteps = Input("--maxSteps","max # of steps of QR",10); const double tol = Input("--tol","tolerance for ID",-1.); const bool print = Input("--print","print matrices?",false); ProcessInput(); PrintInputReport(); DistMatrix<C> U, V; Uniform( U, m, r ); Uniform( V, n, r ); DistMatrix<C> A; Gemm( NORMAL, ADJOINT, C(1), U, V, A ); const Real frobA = FrobeniusNorm( A ); if( print ) Print( A, "A" ); const Grid& g = A.Grid(); DistMatrix<Int,VR,STAR> permR(g), permC(g); DistMatrix<C> Z(g); Skeleton( A, permR, permC, Z, maxSteps, tol ); const Int rank = Z.Height(); if( print ) { Print( permR, "permR" ); Print( permC, "permC" ); Print( Z, "Z" ); } // Form the matrices of A's (hopefully) dominant rows and columns DistMatrix<C> AR( A ); InversePermuteRows( AR, permR ); AR.Resize( rank, A.Width() ); DistMatrix<C> AC( A ); InversePermuteCols( AC, permC ); AC.Resize( A.Height(), rank ); if( print ) { Print( AC, "AC" ); Print( AR, "AR" ); } // Check || A - AC Z AR ||_F / || A ||_F DistMatrix<C> B(g); Gemm( NORMAL, NORMAL, C(1), Z, AR, B ); Gemm( NORMAL, NORMAL, C(-1), AC, B, C(1), A ); const Real frobError = FrobeniusNorm( A ); if( print ) Print( A, "A - AC Z AR" ); if( mpi::WorldRank() == 0 ) { std::cout << "|| A ||_F = " << frobA << "\n\n" << "|| A - AC Z AR ||_F / || A ||_F = " << frobError/frobA << "\n" << std::endl; } } catch( exception& e ) { ReportException(e); } Finalize(); return 0; }
void Trr2kNTTN ( UpperOrLower uplo, Orientation orientationOfB, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE CallStackEntry entry("internal::Trr2kNTTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Width() != E.Width() || A.Width() != B.Width() || C.Height() != D.Height() ) LogicError("Nonconformal Trr2kNTTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); } }