inline void Syr2kLN ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::Syr2kLN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Syr2kLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); DistMatrix<T,STAR,MR > B1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); B1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR ); B1_VR_STAR = B1_MC_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); LocalTrr2k ( LOWER, alpha, A1_MC_STAR, B1Trans_STAR_MR, B1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLNCOld ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLNCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,MC > L10_STAR_MC(g); DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); DistMatrix<T,MR, MC > D1Trans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L10_STAR_MC.AlignWith( X0 ); D1Trans_MR_STAR.AlignWith( X1 ); D1Trans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); Zeros( X1.Width(), X1.Height(), D1Trans_MR_STAR ); Zeros( X1.Height(), X1.Width(), D1 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_STAR_VR = X1; LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; L10_STAR_MC = L10; LocalGemm ( TRANSPOSE, TRANSPOSE, T(1), X0, L10_STAR_MC, T(0), D1Trans_MR_STAR ); D1Trans_MR_MC.SumScatterFrom( D1Trans_MR_STAR ); Transpose( D1Trans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( T(1), D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1Trans_MR_MC.FreeAlignments(); D1Trans_MR_STAR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmLVar4( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; // Temporary products Matrix<F> Y10; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); //--------------------------------------------------------------------// // Y10 := A11 L10 Zeros( A10.Height(), A10.Width(), Y10 ); Hemm( LEFT, LOWER, F(1), A11, L10, F(0), Y10 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10, A10 ); // A00 := A00 + (A10' L10 + L10' A10) Her2k( LOWER, ADJOINT, F(1), A10, L10, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10, A10 ); // A10 := L11' A10 Trmm( LEFT, LOWER, ADJOINT, diag, F(1), L11, A10 ); // A20 := A20 + A21 L10 Gemm( NORMAL, NORMAL, F(1), A21, L10, F(1), A20 ); // A11 := L11' A11 L11 TwoSidedTrmmLUnb( diag, A11, L11 ); // A21 := A21 L11 Trmm( RIGHT, LOWER, NORMAL, diag, F(1), L11, A21 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Var3( Orientation orientation, DistMatrix<F>& A, DistMatrix<F,MC,STAR>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T and LDL^H"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Grid() != d.Grid() ) throw std::logic_error("A and d must use the same grid"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector of the same height as A"); if( d.Viewing() && d.ColAlignment() != A.ColAlignment() ) throw std::logic_error("d must be aligned with A"); #endif const Grid& g = A.Grid(); if( !d.Viewing() ) { d.AlignWith( A ); d.ResizeTo( A.Height(), 1 ); } // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,STAR> dT(g), d0(g), dB(g), d1(g), d2(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,STAR> d1_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MC > S21Trans_STAR_MC(g); DistMatrix<F,STAR,MR > A21AdjOrTrans_STAR_MR(g); const bool conjugate = ( orientation == ADJOINT ); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( d, dT, dB, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( dT, d0, /**/ /**/ d1, dB, d2 ); A21_VC_STAR.AlignWith( A22 ); A21_VR_STAR.AlignWith( A22 ); S21Trans_STAR_MC.AlignWith( A22 ); A21AdjOrTrans_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalLDL( orientation, A11_STAR_STAR, d1_STAR_STAR ); A11 = A11_STAR_STAR; d1 = d1_STAR_STAR; A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, orientation, UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); S21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); DiagonalSolve( RIGHT, NORMAL, d1_STAR_STAR, A21_VC_STAR ); A21_VR_STAR = A21_VC_STAR; A21AdjOrTrans_STAR_MR.TransposeFrom( A21_VR_STAR, conjugate ); LocalTrrk ( LOWER, TRANSPOSE, F(-1), S21Trans_STAR_MC, A21AdjOrTrans_STAR_MR, F(1), A22 ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); A21_VR_STAR.FreeAlignments(); S21Trans_STAR_MC.FreeAlignments(); A21AdjOrTrans_STAR_MR.FreeAlignments(); SlidePartitionDown ( dT, d0, d1, /**/ /**/ dB, d2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Var3( Orientation orientation, Matrix<F>& A, Matrix<F>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector the same height as A"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T or LDL^H"); #endif const int n = A.Height(); if( !d.Viewing() ) d.ResizeTo( n, 1 ); Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> dT, d0, dB, d1, d2; Matrix<F> S21; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( d, dT, dB, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( dT, d0, /**/ /**/ d1, dB, d2 ); //--------------------------------------------------------------------// ldl::Var3Unb( orientation, A11, d1 ); Trsm( RIGHT, LOWER, orientation, UNIT, F(1), A11, A21 ); S21 = A21; DiagonalSolve( RIGHT, NORMAL, d1, A21 ); internal::TrrkNT( LOWER, orientation, F(-1), S21, A21, F(1), A22 ); //--------------------------------------------------------------------// SlidePartitionDown ( dT, d0, d1, /**/ /**/ dB, d2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void MakeTrapezoidal ( LeftOrRight side, UpperOrLower uplo, int offset, Matrix<T>& A ) { #ifndef RELEASE PushCallStack("MakeTrapezoidal"); #endif const int height = A.Height(); const int width = A.Width(); const int ldim = A.LDim(); T* buffer = A.Buffer(); if( uplo == LOWER ) { if( side == LEFT ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=std::max(0,offset+1); j<width; ++j ) { const int lastZeroRow = j-offset-1; const int numZeroRows = std::min( lastZeroRow+1, height ); MemZero( &buffer[j*ldim], numZeroRows ); } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=std::max(0,offset-height+width+1); j<width; ++j ) { const int lastZeroRow = j-offset+height-width-1; const int numZeroRows = std::min( lastZeroRow+1, height ); MemZero( &buffer[j*ldim], numZeroRows ); } } } else { if( side == LEFT ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) { const int firstZeroRow = std::max(j-offset+1,0); if( firstZeroRow < height ) MemZero ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow ); } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) { const int firstZeroRow = std::max(j-offset+height-width+1,0); if( firstZeroRow < height ) MemZero ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow ); } } } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HPDInverseUVar2( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::HPDInverseUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,VR, STAR> A01_VR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A01Trans_STAR_MC(g); DistMatrix<F,MR, STAR> A01_MR_STAR(g); DistMatrix<F,STAR,MR > A01Adj_STAR_MR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A01_VC_STAR.AlignWith( A00 ); A12_STAR_VR.AlignWith( A02 ); A01Trans_STAR_MC.AlignWith( A00 ); A01_VR_STAR.AlignWith( A00 ); A01Adj_STAR_MR.AlignWith( A00 ); A12_STAR_MR.AlignWith( A02 ); A12_STAR_MC.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( UPPER, A11_STAR_STAR ); A01_VC_STAR = A01; internal::LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, (F)1, A11_STAR_STAR, A01_VC_STAR ); A12_STAR_VR = A12; internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A12_STAR_VR ); A01Trans_STAR_MC.TransposeFrom( A01_VC_STAR ); A01_VR_STAR = A01_VC_STAR; A01Adj_STAR_MR.AdjointFrom( A01_VR_STAR ); internal::LocalTrrk ( UPPER, TRANSPOSE, (F)1, A01Trans_STAR_MC, A01Adj_STAR_MR, (F)1, A00 ); A12_STAR_MR = A12_STAR_VR; internal::LocalGemm ( TRANSPOSE, NORMAL, (F)-1, A01Trans_STAR_MC, A12_STAR_MR, (F)1, A02 ); A12_STAR_MC = A12_STAR_VR; internal::LocalTrrk ( UPPER, ADJOINT, (F)-1, A12_STAR_MC, A12_STAR_MR, (F)1, A22 ); internal::LocalTrsm ( RIGHT, UPPER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A01_VC_STAR ); internal::LocalTrsm ( LEFT, UPPER, NORMAL, NON_UNIT, (F)-1, A11_STAR_STAR, A12_STAR_VR ); internal::LocalTriangularInverse ( UPPER, NON_UNIT, A11_STAR_STAR ); internal::LocalHetrmm( UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A01 = A01_VC_STAR; A12 = A12_STAR_VR; //--------------------------------------------------------------------// A01_VC_STAR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); A01Trans_STAR_MC.FreeAlignments(); A01_VR_STAR.FreeAlignments(); A01Adj_STAR_MR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_MC.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Trr2kNNTT ( UpperOrLower uplo, Orientation orientationOfC, Orientation orientationOfD, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, const DistMatrix<T,MC,MR>& C, const DistMatrix<T,MC,MR>& D, T beta, DistMatrix<T,MC,MR>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNTT"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Height() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNTT"); #endif const Grid& g = E.Grid(); DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T,MC,MR> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MC,MR> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionRight ( DL, /**/ DR, D0, /**/ D1, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, orientationOfC, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_STAR_MC, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
CallStackEntry(const std::string& s) { if (!std::uncaught_exception()) PushCallStack(s); }
inline void TwoSidedTrmmUVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,MR, STAR> U12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U12Adj_VC_STAR(g); DistMatrix<F,MC, STAR> X01_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MR, MC > Z12Adj_MR_MC(g); DistMatrix<F,MC, STAR> Z12Adj_MC_STAR(g); DistMatrix<F,MR, STAR> Z12Adj_MR_STAR(g); DistMatrix<F> Y12(g); DistMatrix<F> Z12Adj(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_VR.AlignWith( A12 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A12 ); U12Adj_MR_STAR.AlignWith( A22 ); U12Adj_VC_STAR.AlignWith( A22 ); X01_MC_STAR.AlignWith( A01 ); Y12.AlignWith( A12 ); Z12Adj.AlignWith( A12 ); Z12Adj_MR_MC.AlignWith( A12 ); Z12Adj_MC_STAR.AlignWith( A22 ); Z12Adj_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A01 := A01 U11' U11_STAR_STAR = U11; A01_VC_STAR = A01; LocalTrmm ( RIGHT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A01 := A01 + A02 U12' U12Adj_MR_STAR.AdjointFrom( U12 ); X01_MC_STAR.ResizeTo( A01.Height(), A01.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A02, U12Adj_MR_STAR, F(0), X01_MC_STAR ); A01.SumScatterUpdate( F(1), X01_MC_STAR ); // Y12 := U12 A22 U12Adj_VC_STAR = U12Adj_MR_STAR; U12_STAR_MC.AdjointFrom( U12Adj_VC_STAR ); Z12Adj_MC_STAR.ResizeTo( A12.Width(), A12.Height() ); Z12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() ); Zero( Z12Adj_MC_STAR ); Zero( Z12Adj_MR_STAR ); LocalSymmetricAccumulateRU ( ADJOINT, F(1), A22, U12_STAR_MC, U12Adj_MR_STAR, Z12Adj_MC_STAR, Z12Adj_MR_STAR ); Z12Adj.SumScatterFrom( Z12Adj_MC_STAR ); Z12Adj_MR_MC = Z12Adj; Z12Adj_MR_MC.SumScatterUpdate( F(1), Z12Adj_MR_STAR ); Y12.ResizeTo( A12.Height(), A12.Width() ); Adjoint( Z12Adj_MR_MC.LockedLocalMatrix(), Y12.LocalMatrix() ); // A12 := U11 A12 A12_STAR_VR = A12; U11_STAR_STAR = U11; LocalTrmm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 + 1/2 Y12 Axpy( F(1)/F(2), Y12, A12 ); // A11 := U11 A11 U11' A11_STAR_STAR = A11; LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A12 U12' + U12 A12') A12_STAR_VR = A12; U12_STAR_VR = U12; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( UPPER, NORMAL, F(1), A12_STAR_VR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), F(0), X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A12 := A12 + 1/2 Y12 Axpy( F(1)/F(2), Y12, A12 ); //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); U12Adj_MR_STAR.FreeAlignments(); U12Adj_VC_STAR.FreeAlignments(); X01_MC_STAR.FreeAlignments(); Y12.FreeAlignments(); Z12Adj.FreeAlignments(); Z12Adj_MR_MC.FreeAlignments(); Z12Adj_MC_STAR.FreeAlignments(); Z12Adj_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmUVar2( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; Matrix<F> UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; // Temporary products Matrix<F> Y12; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// // A01 := A01 U11' Trmm( RIGHT, UPPER, ADJOINT, diag, F(1), U11, A01 ); // A01 := A01 + A02 U12' Gemm( NORMAL, ADJOINT, F(1), A02, U12, F(1), A01 ); // Y12 := U12 A22 Zeros( A12.Height(), A12.Width(), Y12 ); Hemm( RIGHT, UPPER, F(1), A22, U12, F(0), Y12 ); // A12 := U11 A12 Trmm( LEFT, UPPER, NORMAL, diag, F(1), U11, A12 ); // A12 := A12 + 1/2 Y12 Axpy( F(1)/F(2), Y12, A12 ); // A11 := U11 A11 U11' TwoSidedTrmmUUnb( diag, A11, U11 ); // A11 := A11 + (A12 U12' + U12 A12') Her2k( UPPER, NORMAL, F(1), A12, U12, F(1), A11 ); // A12 := A12 + 1/2 Y12 Axpy( F(1)/F(2), Y12, A12 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::CholeskyLVar3( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MC > A21Trans_STAR_MC(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A21_VR_STAR.AlignWith( A22 ); A21_VC_STAR.AlignWith( A22 ); A21Trans_STAR_MC.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A21_VC_STAR ); A21_VR_STAR = A21_VC_STAR; A21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); // (A21^T[* ,MC])^T A21^H[* ,MR] = A21[MC,* ] A21^H[* ,MR] // = (A21 A21^H)[MC,MR] internal::LocalTrrk ( LOWER, TRANSPOSE, (F)-1, A21Trans_STAR_MC, A21Adj_STAR_MR, (F)1, A22 ); A21.TransposeFrom( A21Trans_STAR_MC ); //--------------------------------------------------------------------// A21_VR_STAR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A21Trans_STAR_MC.FreeAlignments(); A21Adj_STAR_MR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::CholeskyLVar3Naive( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyLVar3Naive"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); if( A.Grid().VCRank() == 0 ) { std::cout << "CholeskyLVar3Naive exists solely for academic purposes. Please " "use CholeskyLVar3 in real applications." << std::endl; } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrices DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,MR, STAR> A21_MR_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A21_VC_STAR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A21_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_VC_STAR = A21; internal::LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A21_VC_STAR ); A21_MC_STAR = A21_VC_STAR; A21_MR_STAR = A21_VC_STAR; // (A21^T[* ,MC])^T A21^H[* ,MR] = A21[MC,* ] A21^H[* ,MR] // = (A21 A21^H)[MC,MR] internal::LocalTrrk ( LOWER, ADJOINT, (F)-1, A21_MC_STAR, A21_MR_STAR, (F)1, A22 ); A21 = A21_MC_STAR; //--------------------------------------------------------------------// A21_VC_STAR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); A21_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,MC, STAR> F01_MC_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,MR, STAR> X12Adj_MR_STAR(g); DistMatrix<F,MR, MC > X12Adj_MR_MC(g); DistMatrix<F,MR, MC > Y01_MR_MC(g); DistMatrix<F,MR, STAR> Y01_MR_STAR(g); DistMatrix<F> X11(g); DistMatrix<F> Y01(g); Matrix<F> X12Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( U01 ); Y01.AlignWith( A01 ); Y01_MR_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); X11_STAR_MR.AlignWith( U01 ); X11.AlignWith( A11 ); X12Adj_MR_STAR.AlignWith( A02 ); X12Adj_MR_MC.AlignWith( A12 ); F01_MC_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Y01_MR_STAR.ResizeTo( A01.Height(), A01.Width() ); F01_MC_STAR.ResizeTo( A01.Height(), A01.Width() ); Zero( Y01_MR_STAR ); Zero( F01_MC_STAR ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, F01_MC_STAR, Y01_MR_STAR ); Y01_MR_MC.SumScatterFrom( Y01_MR_STAR ); Y01 = Y01_MR_MC; Y01.SumScatterUpdate( F(1), F01_MC_STAR ); // X11 := U01' A01 X11_STAR_MR.ResizeTo( A11.Height(), A11.Width() ); LocalGemm( ADJOINT, NORMAL, F(1), U01_MC_STAR, A01, F(0), X11_STAR_MR ); // A01 := A01 - Y01 Axpy( F(-1), Y01, A01 ); A01_MC_STAR = A01; // A11 := A11 - triu(X11 + A01' U01) = A11 - (U01 A01 + A01' U01) LocalGemm( ADJOINT, NORMAL, F(1), A01_MC_STAR, U01, F(1), X11_STAR_MR ); X11.SumScatterFrom( X11_STAR_MR ); MakeTrapezoidal( LEFT, UPPER, 0, X11 ); Axpy( F(-1), X11, A11 ); // A01 := A01 inv(U11) U11_STAR_STAR = U11; A01_VC_STAR = A01_MC_STAR; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A12 := A12 - A02' U01 X12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() ); LocalGemm ( ADJOINT, NORMAL, F(1), A02, U01_MC_STAR, F(0), X12Adj_MR_STAR ); X12Adj_MR_MC.SumScatterFrom( X12Adj_MR_STAR ); Adjoint( X12Adj_MR_MC.LockedLocalMatrix(), X12Local ); Axpy( F(-1), X12Local, A12.LocalMatrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// A01_MC_STAR.FreeAlignments(); F01_MC_STAR.FreeAlignments(); U01_MC_STAR.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01Adj_STAR_MR.FreeAlignments(); X11.FreeAlignments(); X11_STAR_MR.FreeAlignments(); X12Adj_MR_STAR.FreeAlignments(); X12Adj_MR_MC.FreeAlignments(); Y01.FreeAlignments(); Y01_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrsmLLN ( UnitOrNonUnit diag, F alpha, const Matrix<F>& L, Matrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrtrsmLLN"); #endif // Matrix views Matrix<F> LTL, LTR, L00, L01, L02, LBL, LBR, L10, L11, L12, L20, L21, L22; Matrix<F> XTL, XTR, X00, X01, X02, XBL, XBR, X10, X11, X12, X20, X21, X22; Matrix<F> Z11; // Start the algorithm ScaleTrapezoid( alpha, LEFT, LOWER, 0, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDownDiagonal ( X, XTL, XTR, XBL, XBR, 0 ); while( XBR.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDownDiagonal ( XTL, /**/ XTR, X00, /**/ X01, X02, /*************/ /******************/ /**/ X10, /**/ X11, X12, XBL, /**/ XBR, X20, /**/ X21, X22 ); //--------------------------------------------------------------------// Trsm( LEFT, LOWER, NORMAL, diag, F(1), L11, X10, checkIfSingular ); TrtrsmLLNUnb( diag, F(1), L11, X11 ); Gemm( NORMAL, NORMAL, F(-1), L21, X10, F(1), X20 ); Z11 = X11; MakeTrapezoidal( LEFT, LOWER, 0, Z11 ); Gemm( NORMAL, NORMAL, F(-1), L21, Z11, F(1), X21 ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDownDiagonal ( XTL, /**/ XTR, X00, X01, /**/ X02, /**/ X10, X11, /**/ X12, /*************/ /******************/ XBL, /**/ XBR, X20, X21, /**/ X22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::ApplyPackedReflectorsLUVF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R>,MC,MR >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R>,MC,MR >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > H.Height() ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < 0 ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); // Matrix views DistMatrix<C,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C,MC,MR> AT(g), A0(g), ATop(g), AB(g), A1(g), A2(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,MC, MR > HPanCopy(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MC, STAR> HPan_MC_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); HPan_MC_STAR.AlignWith( ATop ); Z_STAR_MR.AlignWith( ATop ); Z_STAR_VR.AlignWith( ATop ); Z_STAR_MR.ResizeTo( HPan.Width(), ATop.Width() ); SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Herk ( UPPER, ADJOINT, (C)1, HPan_VC_STAR.LockedLocalMatrix(), (C)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( ADJOINT, NORMAL, (C)1, HPan_MC_STAR, ATop, (C)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (C)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (C)-1, HPan_MC_STAR, Z_STAR_MR, (C)1, ATop ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
int QDWH ( Matrix<F>& A, typename Base<F>::type lowerBound, typename Base<F>::type upperBound ) { #ifndef RELEASE PushCallStack("QDWH"); #endif typedef typename Base<F>::type R; const int height = A.Height(); const int width = A.Width(); const R oneHalf = R(1)/R(2); const R oneThird = R(1)/R(3); if( height < width ) throw std::logic_error("Height cannot be less than width"); const R epsilon = lapack::MachineEpsilon<R>(); const R tol = 5*epsilon; const R cubeRootTol = Pow(tol,oneThird); // Form the first iterate Scale( 1/upperBound, A ); int numIts=0; R frobNormADiff; Matrix<F> ALast; Matrix<F> Q( height+width, width ); Matrix<F> QT, QB; PartitionDown( Q, QT, QB, height ); Matrix<F> C; Matrix<F> ATemp; do { ++numIts; ALast = A; R L2; Complex<R> dd, sqd; if( Abs(1-lowerBound) < tol ) { L2 = 1; dd = 0; sqd = 1; } else { L2 = lowerBound*lowerBound; dd = Pow( 4*(1-L2)/(L2*L2), oneThird ); sqd = Sqrt( 1+dd ); } const Complex<R> arg = 8 - 4*dd + 8*(2-L2)/(L2*sqd); const R a = (sqd + Sqrt( arg )/2).real; const R b = (a-1)*(a-1)/4; const R c = a+b-1; const Complex<R> alpha = a-b/c; const Complex<R> beta = b/c; lowerBound = lowerBound*(a+b*L2)/(1+c*L2); if( c > 100 ) { // // The standard QR-based algorithm // QT = A; Scale( Sqrt(c), QT ); MakeIdentity( QB ); ExplicitQR( Q ); Gemm( NORMAL, ADJOINT, alpha/Sqrt(c), QT, QB, beta, A ); } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( width, width, C ); Herk( LOWER, ADJOINT, F(c), A, F(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); Scale( beta, A ); Axpy( alpha, ATemp, A ); } Axpy( F(-1), A, ALast ); frobNormADiff = Norm( ALast, FROBENIUS_NORM ); } while( frobNormADiff > cubeRootTol || Abs(1-lowerBound) > tol ); #ifndef RELEASE PopCallStack(); #endif return numIts; }
inline void internal::ApplyPackedReflectorsLUVF ( int offset, const DistMatrix<R,MC,MR>& H, DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > H.Height() ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < 0 ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); // Matrix views DistMatrix<R,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R,MC,MR> AT(g), A0(g), ATop(g), AB(g), A1(g), A2(g); DistMatrix<R,MC, MR > HPanCopy(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MC, STAR> HPan_MC_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); HPan_MC_STAR.AlignWith( ATop ); Z_STAR_MR.AlignWith( ATop ); Z_STAR_VR.AlignWith( ATop ); Z_STAR_MR.ResizeTo( HPan.Width(), ATop.Width() ); SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Syrk ( UPPER, TRANSPOSE, (R)1, HPan_VC_STAR.LockedLocalMatrix(), (R)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( TRANSPOSE, NORMAL, (R)1, HPan_MC_STAR, ATop, (R)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, TRANSPOSE, NON_UNIT, (R)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (R)-1, HPan_MC_STAR, Z_STAR_MR, (R)1, ATop ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void MakeTrapezoidal ( LeftOrRight side, UpperOrLower uplo, int offset, DistMatrix<T,U,V>& A ) { #ifndef RELEASE PushCallStack("MakeTrapezoidal"); #endif const int height = A.Height(); const int width = A.Width(); const int localHeight = A.LocalHeight(); const int localWidth = A.LocalWidth(); const int colShift = A.ColShift(); const int rowShift = A.RowShift(); const int colStride = A.ColStride(); const int rowStride = A.RowStride(); T* buffer = A.Buffer(); const int ldim = A.LDim(); if( uplo == LOWER ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int j = rowShift + jLocal*rowStride; const int lastZeroRow = ( side==LEFT ? j-offset-1 : j-offset+height-width-1 ); if( lastZeroRow >= 0 ) { const int boundary = std::min( lastZeroRow+1, height ); const int numZeroRows = Length_( boundary, colShift, colStride ); MemZero( &buffer[jLocal*ldim], numZeroRows ); } } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const int j = rowShift + jLocal*rowStride; const int firstZeroRow = ( side==LEFT ? std::max(j-offset+1,0) : std::max(j-offset+height-width+1,0) ); const int numNonzeroRows = Length_(firstZeroRow,colShift,colStride); if( numNonzeroRows < localHeight ) { T* col = &buffer[numNonzeroRows+jLocal*ldim]; MemZero( col, localHeight-numNonzeroRows ); } } } #ifndef RELEASE PopCallStack(); #endif }
CallStackEntry( string s ) { if( !uncaught_exception() ) PushCallStack(s); }
inline void Trsm ( LeftOrRight side, UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& A, DistMatrix<F>& B, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("Trsm"); if( A.Grid() != B.Grid() ) throw std::logic_error("A and B must use the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( side == LEFT ) { if( A.Height() != B.Height() ) throw std::logic_error("Nonconformal Trsm"); } else { if( A.Height() != B.Width() ) throw std::logic_error("Nonconformal Trsm"); } #endif const int p = B.Grid().Size(); if( side == LEFT && uplo == LOWER ) { if( orientation == NORMAL ) { if( B.Width() > 5*p ) internal::TrsmLLNLarge( diag, alpha, A, B, checkIfSingular ); else internal::TrsmLLNMedium( diag, alpha, A, B, checkIfSingular ); } else { if( B.Width() > 5*p ) internal::TrsmLLTLarge ( orientation, diag, alpha, A, B, checkIfSingular ); else internal::TrsmLLTMedium ( orientation, diag, alpha, A, B, checkIfSingular ); } } else if( side == LEFT && uplo == UPPER ) { if( orientation == NORMAL ) { if( B.Width() > 5*p ) internal::TrsmLUNLarge( diag, alpha, A, B, checkIfSingular ); else internal::TrsmLUNMedium( diag, alpha, A, B, checkIfSingular ); } else { if( B.Width() > 5*p ) internal::TrsmLUTLarge ( orientation, diag, alpha, A, B, checkIfSingular ); else internal::TrsmLUTMedium ( orientation, diag, alpha, A, B, checkIfSingular ); } } else if( side == RIGHT && uplo == LOWER ) { if( orientation == NORMAL ) internal::TrsmRLN( diag, alpha, A, B, checkIfSingular ); else internal::TrsmRLT ( orientation, diag, alpha, A, B, checkIfSingular ); } else if( side == RIGHT && uplo == UPPER ) { if( orientation == NORMAL ) internal::TrsmRUN( diag, alpha, A, B, checkIfSingular ); else internal::TrsmRUT ( orientation, diag, alpha, A, B, checkIfSingular ); } #ifndef RELEASE PopCallStack(); #endif }
inline void ReformNormalMatrix ( DistMatrix<Complex<R>,MC,MR >& A, const DistMatrix<R, VR,STAR>& w, const DistMatrix<Complex<R>,MC,MR >& Z, const ComplexFunctor& f ) { #ifndef RELEASE PushCallStack("hermitian_function::ReformNormalMatrix"); #endif const Grid& g = A.Grid(); typedef Complex<R> C; DistMatrix<C,MC,MR> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<C,MC, STAR> Z1_MC_STAR(g); DistMatrix<C,VR, STAR> Z1_VR_STAR(g); DistMatrix<C,STAR,MR > Z1Adj_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); Zero( A ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Adj_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; // Scale Z1[VR,* ] with the modified eigenvalues const int width = Z1_VR_STAR.Width(); const int localHeight = Z1_VR_STAR.LocalHeight(); for( int j=0; j<width; ++j ) { const C conjOmega = Conj(f(w1_STAR_STAR.GetLocalEntry(j,0))); C* buffer = Z1_VR_STAR.LocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) buffer[iLocal] *= conjOmega; } Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR ); internal::LocalGemm ( NORMAL, NORMAL, (C)1, Z1_MC_STAR, Z1Adj_STAR_MR, (C)1, A ); //--------------------------------------------------------------------// Z1Adj_STAR_MR.FreeAlignments(); Z1_VR_STAR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Var3Unb( Orientation orientation, Matrix<F>& A, Matrix<F>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3Unb"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector the same height as A"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T or LDL^H"); #endif const int n = A.Height(); if( !d.Viewing() ) d.ResizeTo( n, 1 ); F* ABuffer = A.Buffer(); F* dBuffer = d.Buffer(); const int ldim = A.LDim(); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal of D const F alpha11 = ABuffer[j+j*ldim]; if( alpha11 == F(0) ) throw SingularMatrixException(); dBuffer[j] = alpha11; F* RESTRICT a21 = &ABuffer[(j+1)+j*ldim]; if( orientation == ADJOINT ) { // A22 := A22 - a21 (a21 / alpha11)^H for( int k=0; k<a21Height; ++k ) { const F beta = Conj(a21[k]/alpha11); F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim]; for( int i=k; i<a21Height; ++i ) A22Col[i] -= a21[i]*beta; } } else { // A22 := A22 - a21 (a21 / alpha11)^T for( int k=0; k<a21Height; ++k ) { const F beta = a21[k]/alpha11; F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim]; for( int i=k; i<a21Height; ++i ) A22Col[i] -= a21[i]*beta; } } // a21 := a21 / alpha11 for( int i=0; i<a21Height; ++i ) a21[i] /= alpha11; } #ifndef RELEASE PopCallStack(); #endif }
inline void ReformHermitianMatrix ( UpperOrLower uplo, DistMatrix<R,MC,MR>& A, const DistMatrix<R,VR,STAR>& w, const DistMatrix<R,MC,MR>& Z, const RealFunctor& f ) { #ifndef RELEASE PushCallStack("hermitian_function::ReformHermitianMatrix"); #endif const Grid& g = A.Grid(); DistMatrix<R,MC,MR> ZL(g), ZR(g), Z0(g), Z1(g), Z2(g); DistMatrix<R,VR,STAR> wT(g), w0(g), wB(g), w1(g), w2(g); DistMatrix<R,MC, STAR> Z1_MC_STAR(g); DistMatrix<R,VR, STAR> Z1_VR_STAR(g); DistMatrix<R,STAR,MR > Z1Trans_STAR_MR(g); DistMatrix<R,STAR,STAR> w1_STAR_STAR(g); if( uplo == LOWER ) MakeTrapezoidal( LEFT, UPPER, 1, A ); else MakeTrapezoidal( LEFT, LOWER, -1, A ); LockedPartitionRight( Z, ZL, ZR, 0 ); LockedPartitionDown ( w, wT, wB, 0 ); while( ZL.Width() < Z.Width() ) { LockedRepartitionRight ( ZL, /**/ ZR, Z0, /**/ Z1, Z2 ); LockedRepartitionDown ( wT, w0, /**/ /**/ w1, wB, w2 ); Z1_MC_STAR.AlignWith( A ); Z1_VR_STAR.AlignWith( A ); Z1Trans_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// Z1_MC_STAR = Z1; Z1_VR_STAR = Z1_MC_STAR; w1_STAR_STAR = w1; // Scale Z1[VR,* ] with the modified eigenvalues const int width = Z1_VR_STAR.Width(); const int localHeight = Z1_VR_STAR.LocalHeight(); for( int j=0; j<width; ++j ) { const R omega = f(w1_STAR_STAR.GetLocalEntry(j,0)); R* buffer = Z1_VR_STAR.LocalBuffer(0,j); for( int iLocal=0; iLocal<localHeight; ++iLocal ) buffer[iLocal] *= omega; } Z1Trans_STAR_MR.TransposeFrom( Z1_VR_STAR ); internal::LocalTrrk( uplo, (R)1, Z1_MC_STAR, Z1Trans_STAR_MR, (R)1, A ); //--------------------------------------------------------------------// Z1Trans_STAR_MR.FreeAlignments(); Z1_VR_STAR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); SlideLockedPartitionDown ( wT, w0, w1, /**/ /**/ wB, w2 ); SlideLockedPartitionRight ( ZL, /**/ ZR, Z0, Z1, /**/ Z2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalTrmmAccumulateLLN ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC, MR >& L, const DistMatrix<T,STAR,MR >& XAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalTrmmAccumulateLLN"); if( L.Grid() != XAdjOrTrans_STAR_MR.Grid() || XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ) throw std::logic_error ("{L,X,Z} must be distributed over the same grid"); if( L.Height() != L.Width() || L.Height() != XAdjOrTrans_STAR_MR.Width() || L.Height() != Z_MC_STAR.Height() || XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalTrmmAccumulateLLN: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x " << XAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( XAdjOrTrans_STAR_MR.RowAlignment() != L.RowAlignment() || Z_MC_STAR.ColAlignment() != L.ColAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MR> XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g), X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), X2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); LockedPartitionRight ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); while( LTL.Height() < L.Height() ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); LockedRepartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); D11.AlignWith( L11 ); //--------------------------------------------------------------------// D11 = L11; MakeTrapezoidal( LEFT, LOWER, 0, D11 ); if( diag == UNIT ) SetDiagonalToOne( D11 ); LocalGemm ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( NORMAL, orientation, alpha, L21, X1AdjOrTrans_STAR_MR, T(1), Z2_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlideLockedPartitionRight ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR, X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrmmUVar1( Orientation orientation, DistMatrix<T>& U ) { #ifndef RELEASE PushCallStack("internal::TrtrmmUVar1"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<T,MC, STAR> U01_MC_STAR(g); DistMatrix<T,VC, STAR> U01_VC_STAR(g); DistMatrix<T,VR, STAR> U01_VR_STAR(g); DistMatrix<T,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,STAR> U11_STAR_STAR(g); U01_MC_STAR.AlignWith( U ); U01_VC_STAR.AlignWith( U ); U01_VR_STAR.AlignWith( U ); U01AdjOrTrans_STAR_MR.AlignWith( U ); PartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( UTL.Height() < U.Height() && UTL.Width() < U.Height() ) { RepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); //--------------------------------------------------------------------// U01_MC_STAR = U01; U01_VC_STAR = U01_MC_STAR; U01_VR_STAR = U01_VC_STAR; if( orientation == ADJOINT ) U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); else U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); LocalTrrk( UPPER, T(1), U01_MC_STAR, U01AdjOrTrans_STAR_MR, T(1), U00 ); U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, orientation, NON_UNIT, T(1), U11_STAR_STAR, U01_VC_STAR ); U01 = U01_VC_STAR; LocalTrtrmm( orientation, UPPER, U11_STAR_STAR ); U11 = U11_STAR_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmLVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10Adj_MR_STAR.AdjointFrom( L10 ); L10_STAR_VR.AdjointFrom( L10Adj_MR_STAR ); Y10_STAR_VR.ResizeTo( A10.Height(), A10.Width() ); Zero( Y10_STAR_VR ); Hemm ( LEFT, LOWER, F(1), A11_STAR_STAR.LockedLocalMatrix(), L10_STAR_VR.LockedLocalMatrix(), F(0), Y10_STAR_VR.LocalMatrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MC = L10_STAR_VR; LocalTrr2k ( LOWER, ADJOINT, ADJOINT, ADJOINT, F(1), A10_STAR_MC, L10Adj_MR_STAR, L10_STAR_MC, A10_STAR_MR, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; LocalGemm ( NORMAL, ADJOINT, F(1), A21_MC_STAR, L10Adj_MR_STAR, F(1), A20 ); // A11 := L11' A11 L11 LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A21_MC_STAR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10Adj_MR_STAR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); Y10_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrsmLLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrtrsmLLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XTL(g), XTR(g), X00(g), X01(g), X02(g), XBL(g), XBR(g), X10(g), X11(g), X12(g), X20(g), X21(g), X22(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X10_STAR_MR(g); DistMatrix<F,STAR,VR > X10_STAR_VR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); // Start the algorithm ScaleTrapezoid( alpha, LEFT, LOWER, 0, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDownDiagonal ( X, XTL, XTR, XBL, XBR, 0 ); while( XBR.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDownDiagonal ( XTL, /**/ XTR, X00, /**/ X01, X02, /*************/ /******************/ /**/ X10, /**/ X11, X12, XBL, /**/ XBR, X20, /**/ X21, X22 ); L21_MC_STAR.AlignWith( X20 ); X10_STAR_MR.AlignWith( X20 ); X11_STAR_MR.AlignWith( X21 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X11_STAR_STAR = X11; X10_STAR_VR = X10; LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR, checkIfSingular ); LocalTrtrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR, checkIfSingular ); X11 = X11_STAR_STAR; X11_STAR_MR = X11_STAR_STAR; MakeTrapezoidal( LEFT, LOWER, 0, X11_STAR_MR ); X10_STAR_MR = X10_STAR_VR; X10 = X10_STAR_MR; L21_MC_STAR = L21; LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 ); LocalGemm ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 ); //--------------------------------------------------------------------// L21_MC_STAR.FreeAlignments(); X10_STAR_MR.FreeAlignments(); X11_STAR_MR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDownDiagonal ( XTL, /**/ XTR, X00, X01, /**/ X02, /**/ X10, X11, /**/ X12, /*************/ /******************/ XBL, /**/ XBR, X20, X21, /**/ X22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void SimpleSVDUpper ( DistMatrix<Real>& A, DistMatrix<Real,VR,STAR>& s, DistMatrix<Real>& V ) { #ifndef RELEASE PushCallStack("svd::SimpleSVDUpper"); #endif const int m = A.Height(); const int n = A.Width(); const int k = std::min( m, n ); const int offdiagonal = ( m>=n ? 1 : -1 ); const char uplo = ( m>=n ? 'U' : 'L' ); const Grid& g = A.Grid(); // Bidiagonalize A Bidiag( A ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR( g ), e_MD_STAR( g ); A.GetDiagonal( d_MD_STAR ); A.GetDiagonal( e_MD_STAR, offdiagonal ); // In order to use serial QR kernels, we need the full bidiagonal matrix // on each process. // // NOTE: lapack::BidiagQRAlg expects e to be of length k DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ); DistMatrix<Real,STAR,STAR> eHat_STAR_STAR( k, 1, g ); DistMatrix<Real,STAR,STAR> e_STAR_STAR( g ); e_STAR_STAR.View( eHat_STAR_STAR, 0, 0, k-1, 1 ); e_STAR_STAR = e_MD_STAR; // Initialize U and VTrans to the appropriate identity matrices. DistMatrix<Real,VC,STAR> U_VC_STAR( g ); DistMatrix<Real,STAR,VC> VTrans_STAR_VC( g ); U_VC_STAR.AlignWith( A ); VTrans_STAR_VC.AlignWith( V ); Identity( m, k, U_VC_STAR ); Identity( k, n, VTrans_STAR_VC ); // Compute the SVD of the bidiagonal matrix and accumulate the Givens // rotations into our local portion of U and VTrans Matrix<Real>& ULocal = U_VC_STAR.LocalMatrix(); Matrix<Real>& VTransLocal = VTrans_STAR_VC.LocalMatrix(); lapack::BidiagQRAlg ( uplo, k, VTransLocal.Width(), ULocal.Height(), d_STAR_STAR.LocalBuffer(), e_STAR_STAR.LocalBuffer(), VTransLocal.Buffer(), VTransLocal.LDim(), ULocal.Buffer(), ULocal.LDim() ); // Make a copy of A (for the Householder vectors) and pull the necessary // portions of U and VTrans into a standard matrix dist. DistMatrix<Real> B( A ); if( m >= n ) { DistMatrix<Real> AT( g ), AB( g ); DistMatrix<Real,VC,STAR> UT_VC_STAR( g ), UB_VC_STAR( g ); PartitionDown( A, AT, AB, n ); PartitionDown( U_VC_STAR, UT_VC_STAR, UB_VC_STAR, n ); AT = UT_VC_STAR; MakeZeros( AB ); Transpose( VTrans_STAR_VC, V ); } else { DistMatrix<Real> VT( g ), VB( g ); DistMatrix<Real,STAR,VC> VTransL_STAR_VC( g ), VTransR_STAR_VC( g ); PartitionDown( V, VT, VB, m ); PartitionRight( VTrans_STAR_VC, VTransL_STAR_VC, VTransR_STAR_VC, m ); Transpose( VTransL_STAR_VC, VT ); MakeZeros( VB ); } // Backtransform U and V if( m >= n ) { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); } else { ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); } // Copy out the appropriate subset of the singular values s = d_STAR_STAR; #ifndef RELEASE PopCallStack(); #endif }
inline void Her2kUC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::Her2kUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Her2kUC:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( UPPER, ADJOINT, TRANSPOSE, ADJOINT, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }