void Tikhonov ( Orientation orientation, const Matrix<F>& A, const Matrix<F>& B, const Matrix<F>& G, Matrix<F>& X, TikhonovAlg alg ) { DEBUG_CSE const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( G.Width() != n ) LogicError("Tikhonov matrix was the wrong width"); if( orientation == TRANSPOSE && IsComplex<F>::value ) LogicError("Transpose version of complex Tikhonov not yet supported"); if( m >= n ) { Matrix<F> Z; if( alg == TIKHONOV_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<F>(1), A, Z ); else Herk( LOWER, NORMAL, Base<F>(1), A, Z ); Herk( LOWER, ADJOINT, Base<F>(1), G, Base<F>(1), Z ); Cholesky( LOWER, Z ); } else { const Int mG = G.Height(); Zeros( Z, m+mG, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+mG), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); ZB = G; qr::ExplicitTriang( Z ); } if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, F(1), A, B, X ); else Gemm( NORMAL, NORMAL, F(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { LogicError("This case not yet supported"); } }
void Tikhonov ( Orientation orientation, const SparseMatrix<F>& A, const Matrix<F>& B, const SparseMatrix<F>& G, Matrix<F>& X, const LeastSquaresCtrl<Base<F>>& ctrl ) { DEBUG_CSE // Explicitly form W := op(A) // ========================== SparseMatrix<F> W; if( orientation == NORMAL ) W = A; else if( orientation == TRANSPOSE ) Transpose( A, W ); else Adjoint( A, W ); const Int m = W.Height(); const Int n = W.Width(); const Int numRHS = B.Width(); // Embed into a higher-dimensional problem via appending regularization // ==================================================================== SparseMatrix<F> WEmb; if( m >= n ) VCat( W, G, WEmb ); else HCat( W, G, WEmb ); Matrix<F> BEmb; Zeros( BEmb, WEmb.Height(), numRHS ); if( m >= n ) { auto BEmbT = BEmb( IR(0,m), IR(0,numRHS) ); BEmbT = B; } else BEmb = B; // Solve the higher-dimensional problem // ==================================== Matrix<F> XEmb; LeastSquares( NORMAL, WEmb, BEmb, XEmb, ctrl ); // Extract the solution // ==================== if( m >= n ) X = XEmb; else X = XEmb( IR(0,n), IR(0,numRHS) ); }
void Skeleton ( const AbstractDistMatrix<F>& APre, DistPermutation& PR, DistPermutation& PC, AbstractDistMatrix<F>& Z, const QRCtrl<Base<F>>& ctrl ) { DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> AProx( APre ); auto& A = AProx.GetLocked(); const Grid& g = A.Grid(); DistMatrix<F> AAdj(g); Adjoint( A, AAdj ); // Find the row permutation DistMatrix<F> B(AAdj); DistMatrix<F,MD,STAR> householderScalars(g); DistMatrix<Base<F>,MD,STAR> signature(g); QR( B, householderScalars, signature, PR, ctrl ); const Int numSteps = householderScalars.Height(); B.Resize( B.Height(), numSteps ); // Form K' := (A pinv(AR))' = pinv(AR') A' DistMatrix<F> KAdj(g); qr::SolveAfter( NORMAL, B, householderScalars, signature, AAdj, KAdj ); // Form K := (K')' DistMatrix<F> K(g); Adjoint( KAdj, K ); // Find the column permutation (force the same number of steps) B = A; auto secondCtrl = ctrl; secondCtrl.adaptive = false; secondCtrl.boundRank = true; secondCtrl.maxRank = numSteps; QR( B, householderScalars, signature, PC, secondCtrl ); // Form Z := pinv(AC) K = pinv(AC) (A pinv(AR)) B.Resize( B.Height(), numSteps ); qr::SolveAfter( NORMAL, B, householderScalars, signature, K, Z ); }
bool Matrix33 :: Invert ( Matrix33 & A, Matrix33 & Result ) { double Determinate = GetDeterminate ( A ); if ( Determinate == 0.0 ) return false; Adjoint ( A, Result ); Multiply ( Result, 1.0 / Determinate ); return true; };
/** * @brief Adjoint transform * * @param m To transform * @return Transform */ inline Matrix <T> operator->* (const Matrix <T> & m) { if (_wl_fam == ID) return m; else { Matrix <T> res (m); Adjoint (m, res); return res; } }
void Skeleton ( const Matrix<F>& A, Permutation& PR, Permutation& PC, Matrix<F>& Z, const QRCtrl<Base<F>>& ctrl ) { DEBUG_CSE Matrix<F> AAdj; Adjoint( A, AAdj ); // Find the row permutation Matrix<F> B(AAdj); Matrix<F> householderScalars; Matrix<Base<F>> signature; QR( B, householderScalars, signature, PR, ctrl ); const Int numSteps = householderScalars.Height(); B.Resize( B.Height(), numSteps ); // Form K' := (A pinv(AR))' = pinv(AR') A' Matrix<F> KAdj; qr::SolveAfter( NORMAL, B, householderScalars, signature, AAdj, KAdj ); // Form K := (K')' Matrix<F> K; Adjoint( KAdj, K ); // Find the column permutation (force the same number of steps) B = A; auto secondCtrl = ctrl; secondCtrl.adaptive = false; secondCtrl.boundRank = true; secondCtrl.maxRank = numSteps; QR( B, householderScalars, signature, PC, secondCtrl ); // Form Z := pinv(AC) K = pinv(AC) (A pinv(AR)) B.Resize( B.Height(), numSteps ); qr::SolveAfter( NORMAL, B, householderScalars, signature, K, Z ); }
Matrix Matrix::Reversed() const { Matrix result; double determinant = Determinant(); if (determinant != 0) { result = Adjoint(); result.Transpose(); result *= (1 / determinant); } return result; }
matrix4x4 Inverse(const matrix4x4 &m) { matrix4x4 retVal = Adjoint(m); scalar_t det = Determinant(m); assert(det); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { retVal(i)[j] /= det; } } return retVal; }
void HermitianFromEVD ( UpperOrLower uplo, AbstractDistMatrix<F>& APre, const AbstractDistMatrix<Base<F>>& wPre, const AbstractDistMatrix<F>& ZPre ) { DEBUG_CSE typedef Base<F> Real; DistMatrixWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixReadProxy<Real,Real,VR,STAR> wProx( wPre ); DistMatrixReadProxy<F,F,MC,MR> ZProx( ZPre ); auto& A = AProx.Get(); auto& w = wProx.GetLocked(); auto& Z = ZProx.GetLocked(); const Grid& g = A.Grid(); DistMatrix<F,MC, STAR> Z1_MC_STAR(g); DistMatrix<F,VR, STAR> Z1_VR_STAR(g); DistMatrix<F,STAR,MR > Z1Adj_STAR_MR(g); const Int m = Z.Height(); const Int n = Z.Width(); A.Resize( m, m ); if( uplo == LOWER ) MakeTrapezoidal( UPPER, A, 1 ); else MakeTrapezoidal( LOWER, A, -1 ); const Int bsize = Blocksize(); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); auto Z1 = Z( ALL, IR(k,k+nb) ); auto w1 = w( IR(k,k+nb), ALL ); Z1_MC_STAR.AlignWith( A ); Z1_MC_STAR = Z1; Z1_VR_STAR.AlignWith( A ); Z1_VR_STAR = Z1_MC_STAR; DiagonalScale( RIGHT, NORMAL, w1, Z1_VR_STAR ); Z1Adj_STAR_MR.AlignWith( A ); Adjoint( Z1_VR_STAR, Z1Adj_STAR_MR ); LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A ); } }
Base<Field> ProductLanczosDecomp ( const DistSparseMatrix<Field>& A, DistMultiVec<Field>& V, AbstractDistMatrix<Base<Field>>& T, DistMultiVec<Field>& v, Int basisSize ) { EL_DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); mpi::Comm comm = A.Comm(); DistMultiVec<Field> S(comm); // Cache the adjoint // ----------------- DistSparseMatrix<Field> AAdj(comm); Adjoint( A, AAdj ); if( m >= n ) { auto applyA = [&]( const DistMultiVec<Field>& X, DistMultiVec<Field>& Y ) { Zeros( S, m, X.Width() ); Multiply( NORMAL, Field(1), A, X, Field(0), S ); Zeros( Y, n, X.Width() ); Multiply( NORMAL, Field(1), AAdj, S, Field(0), Y ); }; return LanczosDecomp( n, applyA, V, T, v, basisSize ); } else { auto applyA = [&]( const DistMultiVec<Field>& X, DistMultiVec<Field>& Y ) { Zeros( S, n, X.Width() ); Multiply( NORMAL, Field(1), AAdj, X, Field(0), S ); Zeros( Y, m, X.Width() ); Multiply( NORMAL, Field(1), A, S, Field(0), Y ); }; return LanczosDecomp( m, applyA, V, T, v, basisSize ); } }
inline void SVD ( DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s, DistMatrix<F>& V, double heightRatio ) { #ifndef RELEASE PushCallStack("SVD"); if( heightRatio <= 1.0 ) throw std::logic_error("Nonsensical switchpoint for SVD"); #endif typedef typename Base<F>::type Real; // Check if we need to rescale the matrix, and do so if necessary bool needRescaling; Real scale; svd::CheckScale( A, needRescaling, scale ); if( needRescaling ) Scale( scale, A ); // TODO: Switch between different algorithms. For instance, starting // with a QR decomposition of tall-skinny matrices. if( A.Height() >= A.Width() ) { svd::SVDUpper( A, s, V, heightRatio ); } else { // Lower bidiagonalization is not yet supported, so we instead play a // trick to get the SVD of A. Adjoint( A, V ); svd::SVDUpper( V, s, A, heightRatio ); } // Rescale the singular values if necessary if( needRescaling ) Scal( 1/scale, s ); #ifndef RELEASE PopCallStack(); #endif }
void ProductLanczos ( const SparseMatrix<Field>& A, Matrix<Base<Field>>& T, Int basisSize ) { EL_DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); Matrix<Field> S; // Cache the adjoint // ----------------- SparseMatrix<Field> AAdj; Adjoint( A, AAdj ); if( m >= n ) { auto applyA = [&]( const Matrix<Field>& X, Matrix<Field>& Y ) { Zeros( S, m, X.Width() ); Multiply( NORMAL, Field(1), A, X, Field(0), S ); Zeros( Y, n, X.Width() ); Multiply( NORMAL, Field(1), AAdj, S, Field(0), Y ); }; Lanczos<Field>( n, applyA, T, basisSize ); } else { auto applyA = [&]( const Matrix<Field>& X, Matrix<Field>& Y ) { Zeros( S, n, X.Width() ); Multiply( NORMAL, Field(1), AAdj, X, Field(0), S ); Zeros( Y, m, X.Width() ); Multiply( NORMAL, Field(1), A, S, Field(0), Y ); }; Lanczos<Field>( m, applyA, T, basisSize ); } }
inline void SingularValues ( DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s, double heightRatio ) { #ifndef RELEASE PushCallStack("SingularValues"); #endif typedef typename Base<F>::type R; // Check if we need to rescale the matrix, and do so if necessary bool needRescaling; R scale; svd::CheckScale( A, needRescaling, scale ); if( needRescaling ) Scale( scale, A ); // TODO: Switch between different algorithms. For instance, starting // with a QR decomposition of tall-skinny matrices. if( A.Height() >= A.Width() ) { svd::SingularValuesUpper( A, s, heightRatio ); } else { // Lower bidiagonalization is not yet supported, so we instead play a // trick to get the SVD of A. DistMatrix<F> AAdj( A.Grid() ); Adjoint( A, AAdj ); svd::SingularValuesUpper( AAdj, s, heightRatio ); } // Rescale the singular values if necessary if( needRescaling ) Scal( 1/scale, s ); #ifndef RELEASE PopCallStack(); #endif }
inline int Lyapunov( const Matrix<F>& A, const Matrix<F>& C, Matrix<F>& X ) { #ifndef RELEASE CallStackEntry cse("Lyapunov"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( C.Height() != A.Height() || C.Width() != A.Height() ) LogicError("C must conform with A"); #endif const Int m = A.Height(); Matrix<F> W, WTL, WTR, WBL, WBR; Zeros( W, 2*m, 2*m ); PartitionDownDiagonal ( W, WTL, WTR, WBL, WBR, m ); WTL = A; Adjoint( A, WBR ); Scale( F(-1), WBR ); WTR = C; Scale( F(-1), WTR ); return Sylvester( m, W, X ); }
inline void QRSVD( Matrix<F>& A, Matrix<typename Base<F>::type>& s, Matrix<F>& V ) { #ifndef RELEASE PushCallStack("svd::QRSVD"); #endif typedef typename Base<F>::type R; const int m = A.Height(); const int n = A.Width(); const int k = std::min(m,n); s.ResizeTo( k, 1 ); Matrix<F> U( m, k ); Matrix<F> VAdj( k, n ); lapack::QRSVD ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), VAdj.Buffer(), VAdj.LDim() ); A = U; Adjoint( VAdj, V ); #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLTCOld ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLTCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrmmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLTC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1AdjOrTrans_MR_STAR(g); DistMatrix<T,MR, MC > D1AdjOrTrans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDown ( X, XT, XB, 0 ); while( XB.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); L21_MC_STAR.AlignWith( X2 ); D1AdjOrTrans_MR_STAR.AlignWith( X1 ); D1AdjOrTrans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); Zeros( X1.Width(), X1.Height(), D1AdjOrTrans_MR_STAR ); Zeros( X1.Height(), X1.Width(), D1 ); //--------------------------------------------------------------------// X1_STAR_VR = X1; L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, orientation, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; L21_MC_STAR = L21; LocalGemm ( orientation, NORMAL, T(1), X2, L21_MC_STAR, T(0), D1AdjOrTrans_MR_STAR ); D1AdjOrTrans_MR_MC.SumScatterFrom( D1AdjOrTrans_MR_STAR ); if( orientation == TRANSPOSE ) Transpose( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); else Adjoint( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( T(1), D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1AdjOrTrans_MR_MC.FreeAlignments(); D1AdjOrTrans_MR_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
void Ridge ( Orientation orientation, const Matrix<Field>& A, const Matrix<Field>& B, Base<Field> gamma, Matrix<Field>& X, RidgeAlg alg ) { EL_DEBUG_CSE const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( orientation == TRANSPOSE && IsComplex<Field>::value ) LogicError("Transpose version of complex Ridge not yet supported"); if( m >= n ) { Matrix<Field> Z; if( alg == RIDGE_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<Field>(1), A, Z ); else Herk( LOWER, NORMAL, Base<Field>(1), A, Z ); ShiftDiagonal( Z, Field(gamma*gamma) ); Cholesky( LOWER, Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else if( alg == RIDGE_QR ) { Zeros( Z, m+n, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+n), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); FillDiagonal( ZB, Field(gamma) ); // NOTE: This QR factorization could exploit the upper-triangular // structure of the diagonal matrix ZB qr::ExplicitTriang( Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { Matrix<Field> U, V; Matrix<Base<Field>> s; if( orientation == NORMAL ) { SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = false; SVD( A, U, s, V, ctrl ); } else { Matrix<Field> AAdj; Adjoint( A, AAdj ); SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = true; SVD( AAdj, U, s, V, ctrl ); } auto sigmaMap = [=]( const Base<Field>& sigma ) { return sigma / (sigma*sigma + gamma*gamma); }; EntrywiseMap( s, MakeFunction(sigmaMap) ); Gemm( ADJOINT, NORMAL, Field(1), U, B, X ); DiagonalScale( LEFT, NORMAL, s, X ); U = X; Gemm( NORMAL, NORMAL, Field(1), V, U, X ); } } else { LogicError("This case not yet supported"); } }
Matrix3<T> Matrix3<T>::Inverse( T determinant ) const { T det = ( (determinant == T()) ? Determinant( ) : determinant ); return Inverse( det, Adjoint( ) ); }
/** * @brief Adjoint transform * * @param m To transform * @return Transform */ template <class T> Matrix<T> operator->* (const Matrix<T>& m) { return Adjoint (m); }
inline void GolubReinschUpper ( DistMatrix<F>& A, DistMatrix<BASE(F),VR,STAR>& s, DistMatrix<F>& V ) { #ifndef RELEASE CallStackEntry entry("svd::GolubReinschUpper"); #endif typedef BASE(F) Real; const Int m = A.Height(); const Int n = A.Width(); const Int k = Min( m, n ); const Int offdiagonal = ( m>=n ? 1 : -1 ); const char uplo = ( m>=n ? 'U' : 'L' ); const Grid& g = A.Grid(); // Bidiagonalize A DistMatrix<F,STAR,STAR> tP( g ), tQ( g ); Bidiag( A, tP, tQ ); // Grab copies of the diagonal and sub/super-diagonal of A DistMatrix<Real,MD,STAR> d_MD_STAR(g), e_MD_STAR(g); A.GetRealPartOfDiagonal( d_MD_STAR ); A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); // NOTE: lapack::BidiagQRAlg expects e to be of length k DistMatrix<Real,STAR,STAR> d_STAR_STAR( d_MD_STAR ), eHat_STAR_STAR( k, 1, g ), e_STAR_STAR( g ); View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); e_STAR_STAR = e_MD_STAR; // Initialize U and VAdj to the appropriate identity matrices DistMatrix<F,VC,STAR> U_VC_STAR( g ); DistMatrix<F,STAR,VC> VAdj_STAR_VC( g ); U_VC_STAR.AlignWith( A ); VAdj_STAR_VC.AlignWith( V ); Identity( U_VC_STAR, m, k ); Identity( VAdj_STAR_VC, k, n ); // Compute the SVD of the bidiagonal matrix and accumulate the Givens // rotations into our local portion of U and VAdj Matrix<F>& ULoc = U_VC_STAR.Matrix(); Matrix<F>& VAdjLoc = VAdj_STAR_VC.Matrix(); lapack::BidiagQRAlg ( uplo, k, VAdjLoc.Width(), ULoc.Height(), d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), VAdjLoc.Buffer(), VAdjLoc.LDim(), ULoc.Buffer(), ULoc.LDim() ); // Make a copy of A (for the Householder vectors) and pull the necessary // portions of U and VAdj into a standard matrix dist. DistMatrix<F> B( A ); if( m >= n ) { DistMatrix<F> AT(g), AB(g); DistMatrix<F,VC,STAR> UT_VC_STAR(g), UB_VC_STAR(g); PartitionDown( A, AT, AB, n ); PartitionDown( U_VC_STAR, UT_VC_STAR, UB_VC_STAR, n ); AT = UT_VC_STAR; MakeZeros( AB ); Adjoint( VAdj_STAR_VC, V ); } else { DistMatrix<F> VT(g), VB(g); DistMatrix<F,STAR,VC> VAdjL_STAR_VC(g), VAdjR_STAR_VC(g); PartitionDown( V, VT, VB, m ); PartitionRight( VAdj_STAR_VC, VAdjL_STAR_VC, VAdjR_STAR_VC, m ); Adjoint( VAdjL_STAR_VC, VT ); MakeZeros( VB ); } // Backtransform U and V bidiag::ApplyU( LEFT, NORMAL, B, tQ, A ); bidiag::ApplyV( LEFT, NORMAL, B, tP, V ); // Copy out the appropriate subset of the singular values s = d_STAR_STAR; }
Matrix3D Matrix3D::Inverse() const { return (1.0f / Determinant()) * Adjoint(); }
Int ADMM ( const Matrix<Real>& A, const Matrix<Real>& b, const Matrix<Real>& c, Matrix<Real>& z, const ADMMCtrl<Real>& ctrl ) { EL_DEBUG_CSE // Cache a custom partially-pivoted LU factorization of // | rho*I A^H | = | B11 B12 | // | A 0 | | B21 B22 | // by (justifiably) avoiding pivoting in the first n steps of // the factorization, so that // [I,rho*I] = lu(rho*I). // The factorization would then proceed with // B21 := B21 U11^{-1} = A (rho*I)^{-1} = A/rho // B12 := L11^{-1} B12 = I A^H = A^H. // The Schur complement would then be // B22 := B22 - B21 B12 = 0 - (A*A^H)/rho. // We then factor said matrix with LU with partial pivoting and // swap the necessary rows of B21 in order to implicitly commute // the row pivots with the Gauss transforms in the manner standard // for GEPP. Unless A A' is singular, pivoting should not be needed, // as Cholesky factorization of the negative matrix should be valid. // // The result is the factorization // | I 0 | | rho*I A^H | = | I 0 | | rho*I U12 |, // | 0 P22 | | A 0 | | L21 L22 | | 0 U22 | // where [L22,U22] are stored within B22. Matrix<Real> U12, L21, B22, bPiv; Adjoint( A, U12 ); L21 = A; L21 *= 1/ctrl.rho; Herk( LOWER, NORMAL, -1/ctrl.rho, A, B22 ); MakeHermitian( LOWER, B22 ); // TODO: Replace with sparse-direct Cholesky version? Permutation P2; LU( B22, P2 ); P2.PermuteRows( L21 ); bPiv = b; P2.PermuteRows( bPiv ); // Possibly form the inverse of L22 U22 Matrix<Real> X22; if( ctrl.inv ) { X22 = B22; MakeTrapezoidal( LOWER, X22 ); FillDiagonal( X22, Real(1) ); TriangularInverse( LOWER, UNIT, X22 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, Real(1), B22, X22 ); } Int numIter=0; const Int m = A.Height(); const Int n = A.Width(); Matrix<Real> g, xTmp, y, t; Zeros( g, m+n, 1 ); PartitionDown( g, xTmp, y, n ); Matrix<Real> x, u, zOld, xHat; Zeros( z, n, 1 ); Zeros( u, n, 1 ); Zeros( t, n, 1 ); while( numIter < ctrl.maxIter ) { zOld = z; // Find x from // | rho*I A^H | | x | = | rho*(z-u)-c | // | A 0 | | y | | b | // via our cached custom factorization: // // |x| = inv(U) inv(L) P' |rho*(z-u)-c| // |y| |b | // = |rho*I U12|^{-1} |I 0 | |I 0 | |rho*(z-u)-c| // = |0 U22| |L21 L22| |0 P22'| |b | // = " " |rho*(z-u)-c| // | P22' b | xTmp = z; xTmp -= u; xTmp *= ctrl.rho; xTmp -= c; y = bPiv; Gemv( NORMAL, Real(-1), L21, xTmp, Real(1), y ); if( ctrl.inv ) { Gemv( NORMAL, Real(1), X22, y, t ); y = t; } else { Trsv( LOWER, NORMAL, UNIT, B22, y ); Trsv( UPPER, NORMAL, NON_UNIT, B22, y ); } Gemv( NORMAL, Real(-1), U12, y, Real(1), xTmp ); xTmp *= 1/ctrl.rho; // xHat := alpha*x + (1-alpha)*zOld xHat = xTmp; xHat *= ctrl.alpha; Axpy( 1-ctrl.alpha, zOld, xHat ); // z := pos(xHat+u) z = xHat; z += u; LowerClip( z, Real(0) ); // u := u + (xHat-z) u += xHat; u -= z; const Real objective = Dot( c, xTmp ); // rNorm := || x - z ||_2 t = xTmp; t -= z; const Real rNorm = FrobeniusNorm( t ); // sNorm := |rho| || z - zOld ||_2 t = z; t -= zOld; const Real sNorm = Abs(ctrl.rho)*FrobeniusNorm( t ); const Real epsPri = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Max(FrobeniusNorm(xTmp),FrobeniusNorm(z)); const Real epsDual = Sqrt(Real(n))*ctrl.absTol + ctrl.relTol*Abs(ctrl.rho)*FrobeniusNorm(u); if( ctrl.print ) { t = xTmp; LowerClip( t, Real(0) ); t -= xTmp; const Real clipDist = FrobeniusNorm( t ); cout << numIter << ": " << "||x-z||_2=" << rNorm << ", " << "epsPri=" << epsPri << ", " << "|rho| ||z-zOld||_2=" << sNorm << ", " << "epsDual=" << epsDual << ", " << "||x-Pos(x)||_2=" << clipDist << ", " << "c'x=" << objective << endl; } if( rNorm < epsPri && sNorm < epsDual ) break; ++numIter; } if( ctrl.maxIter == numIter ) cout << "ADMM failed to converge" << endl; x = xTmp; return numIter; }
/** * @brief Backward transform * * @param m To transform * @return Transform */ virtual Matrix< std::complex<T> > operator->* (const Matrix< std::complex<T> >& m) const { return Adjoint (m); }
/** * @brief Backward transform * * @param m To transform * @return Transform */ inline virtual Matrix<CT> operator->* (const Matrix<CT>& m) const { return Adjoint (m); }
inline void TwoSidedTrsmLVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmLVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MR, STAR> F10Adj_MR_STAR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,VC, STAR> L10Adj_VC_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); DistMatrix<F,MC, STAR> Y10Adj_MC_STAR(g); DistMatrix<F,MR, MC > Y10Adj_MR_MC(g); DistMatrix<F> X11(g); DistMatrix<F> Y10Adj(g); Matrix<F> Y10Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10Adj_MR_STAR.AlignWith( L10 ); F10Adj_MR_STAR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10Adj_VC_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); X11.AlignWith( A11 ); X11_MC_STAR.AlignWith( L10 ); X21_MC_STAR.AlignWith( A20 ); Y10Adj_MC_STAR.AlignWith( A00 ); Y10Adj_MR_MC.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := L10 A00 L10Adj_MR_STAR.AdjointFrom( L10 ); L10Adj_VC_STAR = L10Adj_MR_STAR; L10_STAR_MC.AdjointFrom( L10Adj_VC_STAR ); Y10Adj_MC_STAR.ResizeTo( A10.Width(), A10.Height() ); F10Adj_MR_STAR.ResizeTo( A10.Width(), A10.Height() ); Zero( Y10Adj_MC_STAR ); Zero( F10Adj_MR_STAR ); LocalSymmetricAccumulateRL ( ADJOINT, F(1), A00, L10_STAR_MC, L10Adj_MR_STAR, Y10Adj_MC_STAR, F10Adj_MR_STAR ); Y10Adj.SumScatterFrom( Y10Adj_MC_STAR ); Y10Adj_MR_MC = Y10Adj; Y10Adj_MR_MC.SumScatterUpdate( F(1), F10Adj_MR_STAR ); Adjoint( Y10Adj_MR_MC.LockedLocalMatrix(), Y10Local ); // X11 := A10 L10' X11_MC_STAR.ResizeTo( A11.Height(), A11.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A10, L10Adj_MR_STAR, F(0), X11_MC_STAR ); // A10 := A10 - Y10 Axpy( F(-1), Y10Local, A10.LocalMatrix() ); A10Adj_MR_STAR.AdjointFrom( A10 ); // A11 := A11 - (X11 + L10 A10') = A11 - (A10 L10' + L10 A10') LocalGemm ( NORMAL, NORMAL, F(1), L10, A10Adj_MR_STAR, F(1), X11_MC_STAR ); X11.SumScatterFrom( X11_MC_STAR ); MakeTrapezoidal( LEFT, LOWER, 0, X11 ); Axpy( F(-1), X11, A11 ); // A10 := inv(L11) A10 L11_STAR_STAR = L11; A10_STAR_VR.AdjointFrom( A10Adj_MR_STAR ); LocalTrsm ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A11 := inv(L11) A11 inv(L11)' A11_STAR_STAR = A11; LocalTwoSidedTrsm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 - A20 L10' X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() ); LocalGemm ( NORMAL, NORMAL, F(1), A20, L10Adj_MR_STAR, F(0), X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); // A21 := A21 inv(L11)' A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10Adj_MR_STAR.FreeAlignments(); F10Adj_MR_STAR.FreeAlignments(); L10Adj_MR_STAR.FreeAlignments(); L10Adj_VC_STAR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); X11.FreeAlignments(); X11_MC_STAR.FreeAlignments(); X21_MC_STAR.FreeAlignments(); Y10Adj_MC_STAR.FreeAlignments(); Y10Adj_MR_MC.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /**********************************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
/*! compute inverse matrix */ COMPILER_FORCEINLINE const LinearSpace3 Inverse() const { return Rcp(Det())*Adjoint(); }
SVDInfo ScaLAPACKHelper ( const AbstractDistMatrix<F>& APre, AbstractDistMatrix<F>& UPre, AbstractDistMatrix<Base<F>>& sPre, AbstractDistMatrix<F>& V, const SVDCtrl<Base<F>>& ctrl ) { DEBUG_CSE AssertScaLAPACKSupport(); SVDInfo info; #ifdef EL_HAVE_SCALAPACK typedef Base<F> Real; DistMatrix<F,MC,MR,BLOCK> A( APre ); DistMatrixWriteProxy<Real,Real,STAR,STAR> sProx(sPre); DistMatrixWriteProxy<F,F,MC,MR,BLOCK> UProx(UPre); auto& s = sProx.Get(); auto& U = UProx.Get(); const int m = A.Height(); const int n = A.Width(); const int k = Min(m,n); auto approach = ctrl.bidiagSVDCtrl.approach; if( approach == THIN_SVD || approach == COMPACT_SVD ) { Zeros( U, m, k ); DistMatrix<F,MC,MR,BLOCK> VH( A.Grid() ); Zeros( VH, k, n ); s.Resize( k, 1 ); auto descA = FillDesc( A ); auto descU = FillDesc( U ); auto descVH = FillDesc( VH ); scalapack::SVD ( m, n, A.Buffer(), descA.data(), s.Buffer(), U.Buffer(), descU.data(), VH.Buffer(), descVH.data() ); const bool compact = ( approach == COMPACT_SVD ); if( compact ) { const Real twoNorm = ( k==0 ? Real(0) : s.Get(0,0) ); const Real thresh = bidiag_svd::APosterioriThreshold ( m, n, twoNorm, ctrl.bidiagSVDCtrl ); Int rank = k; for( Int j=0; j<k; ++j ) { if( s.Get(j,0) <= thresh ) { rank = j; break; } } s.Resize( rank, 1 ); U.Resize( m, rank ); VH.Resize( rank, n ); } Adjoint( VH, V ); } else LogicError ("Only Thin and Compact singular value options currently supported"); #endif return info; }
inline void TwoSidedTrsmUVar2 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar2"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,MC, STAR> F01_MC_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X11_STAR_MR(g); DistMatrix<F,MR, STAR> X12Adj_MR_STAR(g); DistMatrix<F,MR, MC > X12Adj_MR_MC(g); DistMatrix<F,MR, MC > Y01_MR_MC(g); DistMatrix<F,MR, STAR> Y01_MR_STAR(g); DistMatrix<F> X11(g); DistMatrix<F> Y01(g); Matrix<F> X12Local; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( U01 ); Y01.AlignWith( A01 ); Y01_MR_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); X11_STAR_MR.AlignWith( U01 ); X11.AlignWith( A11 ); X12Adj_MR_STAR.AlignWith( A02 ); X12Adj_MR_MC.AlignWith( A12 ); F01_MC_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Zeros( Y01_MR_STAR, A01.Height(), A01.Width() ); Zeros( F01_MC_STAR, A01.Height(), A01.Width() ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, F01_MC_STAR, Y01_MR_STAR ); Y01_MR_MC.SumScatterFrom( Y01_MR_STAR ); Y01 = Y01_MR_MC; Y01.SumScatterUpdate( F(1), F01_MC_STAR ); // X11 := U01' A01 LocalGemm( ADJOINT, NORMAL, F(1), U01_MC_STAR, A01, X11_STAR_MR ); // A01 := A01 - Y01 Axpy( F(-1), Y01, A01 ); A01_MC_STAR = A01; // A11 := A11 - triu(X11 + A01' U01) = A11 - (U01 A01 + A01' U01) LocalGemm( ADJOINT, NORMAL, F(1), A01_MC_STAR, U01, F(1), X11_STAR_MR ); X11.SumScatterFrom( X11_STAR_MR ); MakeTriangular( UPPER, X11 ); Axpy( F(-1), X11, A11 ); // A01 := A01 inv(U11) U11_STAR_STAR = U11; A01_VC_STAR = A01_MC_STAR; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A12 := A12 - A02' U01 LocalGemm( ADJOINT, NORMAL, F(1), A02, U01_MC_STAR, X12Adj_MR_STAR ); X12Adj_MR_MC.SumScatterFrom( X12Adj_MR_STAR ); Adjoint( X12Adj_MR_MC.LockedMatrix(), X12Local ); Axpy( F(-1), X12Local, A12.Matrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void HemmRUA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MR, STAR> B1Adj_MR_STAR(g); DistMatrix<T,VC, STAR> B1Adj_VC_STAR(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MC, STAR> Z1Adj_MC_STAR(g); DistMatrix<T,MR, STAR> Z1Adj_MR_STAR(g); DistMatrix<T,MR, MC > Z1Adj_MR_MC(g); DistMatrix<T> Z1Adj(g); B1Adj_MR_STAR.AlignWith( A ); B1Adj_VC_STAR.AlignWith( A ); B1_STAR_MC.AlignWith( A ); Z1Adj_MC_STAR.AlignWith( A ); Z1Adj_MR_STAR.AlignWith( A ); Matrix<T> Z1Local; Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CT.Height() < C.Height() ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Z1Adj_MR_MC.AlignWith( C1 ); Zeros( C1.Width(), C1.Height(), Z1Adj_MC_STAR ); Zeros( C1.Width(), C1.Height(), Z1Adj_MR_STAR ); //--------------------------------------------------------------------// B1Adj_MR_STAR.AdjointFrom( B1 ); B1Adj_VC_STAR = B1Adj_MR_STAR; B1_STAR_MC.AdjointFrom( B1Adj_VC_STAR ); LocalSymmetricAccumulateRU ( ADJOINT, alpha, A, B1_STAR_MC, B1Adj_MR_STAR, Z1Adj_MC_STAR, Z1Adj_MR_STAR ); Z1Adj.SumScatterFrom( Z1Adj_MC_STAR ); Z1Adj_MR_MC = Z1Adj; Z1Adj_MR_MC.SumScatterUpdate( T(1), Z1Adj_MR_STAR ); Adjoint( Z1Adj_MR_MC.LockedLocalMatrix(), Z1Local ); Axpy( T(1), Z1Local, C1.LocalMatrix() ); //--------------------------------------------------------------------// Z1Adj_MR_MC.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HegstLUVar2( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& U ) { #ifndef RELEASE PushCallStack("internal::HegstLUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,MR, STAR> U12Adj_MR_STAR(g); DistMatrix<F,VC, STAR> U12Adj_VC_STAR(g); DistMatrix<F,MC, STAR> X01_MC_STAR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MC, MR > Y12(g); DistMatrix<F,MC, MR > Z12Adj(g); DistMatrix<F,MR, MC > Z12Adj_MR_MC(g); DistMatrix<F,MC, STAR> Z12Adj_MC_STAR(g); DistMatrix<F,MR, STAR> Z12Adj_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_VR.AlignWith( A12 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A12 ); U12Adj_MR_STAR.AlignWith( A22 ); U12Adj_VC_STAR.AlignWith( A22 ); X01_MC_STAR.AlignWith( A01 ); Y12.AlignWith( A12 ); Z12Adj.AlignWith( A12 ); Z12Adj_MR_MC.AlignWith( A12 ); Z12Adj_MC_STAR.AlignWith( A22 ); Z12Adj_MR_STAR.AlignWith( A22 ); //--------------------------------------------------------------------// // A01 := A01 U11' U11_STAR_STAR = U11; A01_VC_STAR = A01; internal::LocalTrmm ( RIGHT, UPPER, ADJOINT, NON_UNIT, (F)1, U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A01 := A01 + A02 U12' U12Adj_MR_STAR.AdjointFrom( U12 ); X01_MC_STAR.ResizeTo( A01.Height(), A01.Width() ); internal::LocalGemm ( NORMAL, NORMAL, (F)1, A02, U12Adj_MR_STAR, (F)0, X01_MC_STAR ); A01.SumScatterUpdate( (F)1, X01_MC_STAR ); // Y12 := U12 A22 U12Adj_VC_STAR = U12Adj_MR_STAR; U12_STAR_MC.AdjointFrom( U12Adj_VC_STAR ); Z12Adj_MC_STAR.ResizeTo( A12.Width(), A12.Height() ); Z12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() ); Zero( Z12Adj_MC_STAR ); Zero( Z12Adj_MR_STAR ); internal::LocalSymmetricAccumulateRU ( ADJOINT, (F)1, A22, U12_STAR_MC, U12Adj_MR_STAR, Z12Adj_MC_STAR, Z12Adj_MR_STAR ); Z12Adj.SumScatterFrom( Z12Adj_MC_STAR ); Z12Adj_MR_MC = Z12Adj; Z12Adj_MR_MC.SumScatterUpdate( (F)1, Z12Adj_MR_STAR ); Y12.ResizeTo( A12.Height(), A12.Width() ); Adjoint( Z12Adj_MR_MC.LockedLocalMatrix(), Y12.LocalMatrix() ); // A12 := U11 A12 A12_STAR_VR = A12; U11_STAR_STAR = U11; internal::LocalTrmm ( LEFT, UPPER, NORMAL, NON_UNIT, (F)1, U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 + 1/2 Y12 Axpy( (F)0.5, Y12, A12 ); // A11 := U11 A11 U11' A11_STAR_STAR = A11; internal::LocalHegst( LEFT, UPPER, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A11 := A11 + (A12 U12' + U12 A12') A12_STAR_VR = A12; U12_STAR_VR = U12; X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); Her2k ( UPPER, NORMAL, (F)1, A12_STAR_VR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), (F)0, X11_STAR_STAR.LocalMatrix() ); A11.SumScatterUpdate( (F)1, X11_STAR_STAR ); // A12 := A12 + 1/2 Y12 Axpy( (F)0.5, Y12, A12 ); //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); U12Adj_MR_STAR.FreeAlignments(); U12Adj_VC_STAR.FreeAlignments(); X01_MC_STAR.FreeAlignments(); Y12.FreeAlignments(); Z12Adj.FreeAlignments(); Z12Adj_MR_MC.FreeAlignments(); Z12Adj_MC_STAR.FreeAlignments(); Z12Adj_MR_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }