void MultiplyAdjoint ( Scalar alpha, const Dense<Scalar>& A, const LowRank<Scalar>& B, LowRank<Scalar>& C ) { #ifndef RELEASE CallStackEntry entry("hmat_tools::MultiplyAdjoint (F := D F^H)"); if( A.Width() != B.Width() ) throw std::logic_error("Cannot multiply nonconformal matrices."); #endif const int m = A.Height(); const int n = B.Height(); const int r = B.Rank(); C.U.SetType( GENERAL ); C.U.Resize( m, r ); C.V.SetType( GENERAL ); C.V.Resize( n, r ); // C.U C.V^T := alpha A conj(B.V) B.U^H // = (alpha A conj(B.V)) B.U^H Dense<Scalar> cBV; Conjugate( B.V, cBV ); blas::Gemm ( 'N', 'N', m, r, A.Width(), alpha, A.LockedBuffer(), A.LDim(), cBV.LockedBuffer(), cBV.LDim(), 0, C.U.Buffer(), C.U.LDim() ); Conjugate( B.U, C.V ); }
Quaternion Quaternion::Inverse() { if (ModulusSqr() == 1.0) { return Conjugate(); } else { return Conjugate()/Modulus(); } }
void SolveAfter ( Orientation orientation, const Matrix<F>& A, const Matrix<F>& householderScalars, const Matrix<Base<F>>& signature, const Matrix<F>& B, Matrix<F>& X ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); if( m > n ) LogicError("Must have full row rank"); // TODO: Add scaling auto AL = A( IR(0,m), IR(0,m) ); if( orientation == NORMAL ) { if( m != B.Height() ) LogicError("A and B do not conform"); // Copy B into X X.Resize( n, B.Width() ); auto XT = X( IR(0,m), ALL ); auto XB = X( IR(m,n), ALL ); XT = B; Zero( XB ); // Solve against L (checking for singularities) Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), AL, XT, true ); // Apply Q' to X lq::ApplyQ( LEFT, ADJOINT, A, householderScalars, signature, X ); } else // orientation in {TRANSPOSE,ADJOINT} { if( n != B.Height() ) LogicError("A and B do not conform"); // Copy B into X X = B; if( orientation == TRANSPOSE ) Conjugate( X ); // Apply Q to X lq::ApplyQ( LEFT, NORMAL, A, householderScalars, signature, X ); // Shrink X to its new height X.Resize( m, X.Width() ); // Solve against L' (check for singularities) Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), AL, X, true ); if( orientation == TRANSPOSE ) Conjugate( X ); } }
void MultiplyAdjoint ( Scalar alpha, const LowRank<Scalar>& A, const LowRank<Scalar>& B, Scalar beta, Dense<Scalar>& C ) { #ifndef RELEASE CallStackEntry entry("hmat_tools::MultiplyAdjoint (D := F F^H + D)"); if( A.Height() != C.Height() ) throw std::logic_error("The height of A and C are nonconformal."); if( B.Height() != C.Width() ) throw std::logic_error("The width of B and C are nonconformal."); #endif Dense<Scalar> W( A.Rank(), B.Rank() ); blas::Gemm ( 'C', 'N', A.Rank(), B.Rank(), A.Width(), 1, A.V.LockedBuffer(), A.V.LDim(), B.V.LockedBuffer(), B.V.LDim(), 0, W.Buffer(), W.LDim() ); Conjugate( W ); Dense<Scalar> X( A.Height(), B.Rank() ); blas::Gemm ( 'N', 'N', A.Height(), B.Rank(), A.Rank(), 1, A.U.LockedBuffer(), A.U.LDim(), W.LockedBuffer(), W.LDim(), 0, X.Buffer(), X.LDim() ); blas::Gemm ( 'N', 'C', C.Height(), C.Width(), B.Rank(), alpha, X.LockedBuffer(), X.LDim(), B.U.LockedBuffer(), B.U.LDim(), beta, C.Buffer(), C.LDim() ); }
void MultiplyAdjoint ( Scalar alpha, const LowRank<Scalar>& A, const Dense<Scalar>& B, LowRank<Scalar>& C ) { #ifndef RELEASE CallStackEntry entry("hmat_tools::MultiplyAdjoint (F := F D^H)"); if( A.Width() != B.Width() ) throw std::logic_error("Cannot multiply nonconformal matrices."); #endif const int m = A.Height(); const int n = B.Height(); const int r = A.Rank(); C.U.SetType( GENERAL ); C.U.Resize( m, r ); C.V.SetType( GENERAL ); C.V.Resize( n, r ); // C.U C.V^T := alpha A.U A.V^T B^H // = A.U (alpha conj(B) A.V)^T // // C.U := A.U // C.V := alpha conj(B) A.V Copy( A.U, C.U ); Dense<Scalar> cB; Conjugate( B, cB ); blas::Gemm ( 'N', 'N', n, r, A.Width(), alpha, cB.LockedBuffer(), cB.LDim(), A.V.LockedBuffer(), A.V.LDim(), 0, C.V.Buffer(), C.V.LDim() ); }
static Vector<T, 3> RotateVector( const Quaternion& q, const Vector<T, 3>& v ) { return (q*Quaternion(T(0), v)*Conjugate(q)).Imag(); }
inline void SolveAfter ( UpperOrLower uplo, Orientation orientation, const DistMatrix<F>& A, DistMatrix<F>& B ) { #ifndef RELEASE CallStackEntry entry("cholesky::SolveAfter"); if( A.Grid() != B.Grid() ) LogicError("{A,B} must be distributed over the same grid"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( A.Height() != B.Height() ) LogicError("A and B must be the same height"); #endif if( B.Width() == 1 ) { if( uplo == LOWER ) { if( orientation == TRANSPOSE ) Conjugate( B ); Trsv( LOWER, NORMAL, NON_UNIT, A, B ); Trsv( LOWER, ADJOINT, NON_UNIT, A, B ); if( orientation == TRANSPOSE ) Conjugate( B ); } else { if( orientation == TRANSPOSE ) Conjugate( B ); Trsv( UPPER, ADJOINT, NON_UNIT, A, B ); Trsv( UPPER, NORMAL, NON_UNIT, A, B ); if( orientation == TRANSPOSE ) Conjugate( B ); } } else { if( uplo == LOWER ) { if( orientation == TRANSPOSE ) Conjugate( B ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), A, B ); Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), A, B ); if( orientation == TRANSPOSE ) Conjugate( B ); } else { if( orientation == TRANSPOSE ) Conjugate( B ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A, B ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, F(1), A, B ); if( orientation == TRANSPOSE ) Conjugate( B ); } } }
/* inverse */ void Inverse() { float fL = 1.f/Len2(); Conjugate(); cV *= fL; fScalar *= fL; }
inline void Conjugate( DistMatrix<T,U,V>& A ) { #ifndef RELEASE CallStackEntry entry("Conjugate (in-place)"); #endif Conjugate( A.Matrix() ); }
void Quaternion::Invert(void) { Conjugate(); float lengthSq = LengthSq(); if (Math::IsZero(lengthSq)) ErrorIf(Math::IsZero(lengthSq), "Quaternion - Division by zero."); *this /= lengthSq; }
inline void Conjugate( const DistMatrix<T,U,V>& A, DistMatrix<T,W,Z>& B ) { #ifndef RELEASE CallStackEntry entry("Conjugate"); #endif B = A; Conjugate( B ); }
inline void Conjugate( DistMatrix<T,U,V>& A ) { #ifndef RELEASE PushCallStack("Conjugate (in-place)"); #endif Conjugate( A.LocalMatrix() ); #ifndef RELEASE PopCallStack(); #endif }
cVector3 cQuaternion::AppliedTo( const cVector3& v ) { cQuaternion vq, rq; vq.x = v.x; vq.y = v.y; vq.z = v.z; vq.w = 0.0f; rq = vq*Conjugate(); rq = *this * rq; return cVector3( rq.x, rq.y, rq.z ); }
inline void Conjugate( const DistMatrix<T,U,V>& A, DistMatrix<T,W,Z>& B ) { #ifndef RELEASE PushCallStack("Conjugate"); #endif B = A; Conjugate( B ); #ifndef RELEASE PopCallStack(); #endif }
/** \brief Multiplying a quaternion q with a vector v applies the q-rotation to vec. \param vec Input vector. \return Output vector that's rotated by this Quaternion. */ Vec3 operator*( const Vec3& vec ) const { // FIXME: vec was originally normalized here but I removed normalization because it caused // too fast camera movement and errorneous movement at slow speeds. [2014-12-08] Quaternion vecQuat, resQuat; vecQuat.x = vec.x; vecQuat.y = vec.y; vecQuat.z = vec.z; vecQuat.w = 0.0f; resQuat = vecQuat * Conjugate(); resQuat = *this * resQuat; return Vec3( resQuat.x, resQuat.y, resQuat.z ); }
void Covariance( const Matrix<F>& D, Matrix<F>& S ) { DEBUG_CSE const Int numObs = D.Height(); const Int n = D.Width(); // Compute the average column Matrix<F> ones, xMean; Ones( ones, numObs, 1 ); Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean ); // Subtract the mean from each column of D Matrix<F> DDev( D ); for( Int i=0; i<numObs; ++i ) blas::Axpy ( n, F(-1), xMean.LockedBuffer(), 1, DDev.Buffer(i,0), DDev.LDim() ); // Form S := 1/(numObs-1) DDev DDev' Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S ); Conjugate( S ); MakeHermitian( LOWER, S ); }
void MultiplyAdjoint ( Scalar alpha, const Dense<Scalar>& A, const LowRank<Scalar>& B, Scalar beta, Dense<Scalar>& C ) { #ifndef RELEASE CallStackEntry entry ("hmat_tools::MultiplyAdjoint (D := D F^H + D)"); if( A.Width() != B.Width() ) throw std::logic_error("Cannot multiply nonconformal matrices."); if( A.Height() != C.Height() ) throw std::logic_error("The height of A and C are nonconformal."); if( B.Height() != C.Width() ) throw std::logic_error("The width of B and C are nonconformal."); if( C.Symmetric() ) throw std::logic_error("Update will probably not be symmetric."); #endif const int m = C.Height(); const int n = C.Width(); const int r = B.Rank(); // C := alpha (A conj(B.V)) B.U^H + beta C // // W := A conj(B.V) // C := alpha W B.U^H + beta C Dense<Scalar> W( m, r ), cBV; Conjugate( B.V, cBV ); blas::Gemm ( 'N', 'N', m, r, B.Width(), 1, A.LockedBuffer(), A.LDim(), cBV.LockedBuffer(), cBV.LDim(), 0, W.Buffer(), W.LDim() ); blas::Gemm ( 'N', 'C', m, n, r, alpha, W.LockedBuffer(), W.LDim(), B.U.LockedBuffer(), B.U.LDim(), beta, C.Buffer(), C.LDim() ); }
void Covariance ( const ElementalMatrix<F>& DPre, ElementalMatrix<F>& SPre ) { DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> DProx( DPre ); DistMatrixWriteProxy<F,F,MC,MR> SProx( SPre ); auto& D = DProx.GetLocked(); auto& S = SProx.Get(); const Grid& g = D.Grid(); const Int numObs = D.Height(); // Compute the average column DistMatrix<F> ones(g), xMean(g); Ones( ones, numObs, 1 ); Gemv( TRANSPOSE, F(1)/F(numObs), D, ones, xMean ); DistMatrix<F,MR,STAR> xMean_MR(g); xMean_MR.AlignWith( D ); xMean_MR = xMean; // Subtract the mean from each column of D DistMatrix<F> DDev( D ); for( Int iLoc=0; iLoc<DDev.LocalHeight(); ++iLoc ) blas::Axpy ( DDev.LocalWidth(), F(-1), xMean_MR.LockedBuffer(), 1, DDev.Buffer(iLoc,0), DDev.LDim() ); // Form S := 1/(numObs-1) DDev DDev' Herk( LOWER, ADJOINT, Base<F>(1)/Base<F>(numObs-1), DDev, S ); Conjugate( S ); MakeHermitian( LOWER, S ); }
void Quat::Inverse() { assume(IsNormalized()); assume(IsInvertible()); Conjugate(); }
Quaternion Quaternion::Inverse() const { return Conjugate() / MagnitudeSquared(); }
void LLTUnb ( bool conjugate, const Matrix<F>& L, Matrix<F>& X, bool checkIfSingular ) { EL_DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); const F* LBuf = L.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldl = L.LDim(); const Int ldx = X.LDim(); if( conjugate ) Conjugate( X ); Int k=m-1; while( k >= 0 ) { const bool in2x2 = ( k>0 && LBuf[(k-1)+k*ldl] != F(0) ); if( in2x2 ) { --k; // Solve the 2x2 linear systems via a 2x2 LQ decomposition produced // by the Givens rotation // | L(k,k) L(k,k+1) | | c -conj(s) | = | gamma11 0 | // | s c | // and by also forming the bottom two entries of the 2x2 resulting // lower-triangular matrix, say gamma21 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = LBuf[ k + k *ldl]; const F delta12 = LBuf[ k +(k+1)*ldl]; const F delta21 = LBuf[(k+1)+ k *ldl]; const F delta22 = LBuf[(k+1)+(k+1)*ldl]; // Decompose D = L Q Real c; F s; const F gamma11 = Givens( delta11, delta12, c, s ); const F gamma21 = c*delta21 + s*delta22; const F gamma22 = -Conj(s)*delta21 + c*delta22; if( checkIfSingular ) { // TODO: Instead check if values are too small in magnitude if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against Q^T const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 + s*chi2; xBuf[k+1] = -Conj(s)*chi1 + c*chi2; // Solve against R^T xBuf[k+1] /= gamma22; xBuf[k ] -= gamma21*xBuf[k+1]; xBuf[k ] /= gamma11; // Update x0 := x0 - L10^T x1 blas::Axpy( k, -xBuf[k ], &LBuf[k ], ldl, xBuf, 1 ); blas::Axpy( k, -xBuf[k+1], &LBuf[k+1], ldl, xBuf, 1 ); } } else { if( checkIfSingular ) if( LBuf[k+k*ldl] == F(0) ) LogicError("Singular diagonal entry detected"); for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve the 1x1 linear system xBuf[k] /= LBuf[k+k*ldl]; // Update x0 := x0 - l10^T chi_1 blas::Axpy( k, -xBuf[k], &LBuf[k], ldl, xBuf, 1 ); } } --k; } if( conjugate ) Conjugate( X ); }
void InverseUnit() { Conjugate(); }
void LUTUnb ( bool conjugate, const Matrix<F>& U, const Matrix<F>& shifts, Matrix<F>& X ) { DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); if( conjugate ) Conjugate( X ); const F* UBuf = U.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldU = U.LDim(); const Int ldX = X.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldU] != F(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via 2x2 QR decompositions produced // by the Givens rotation // | c s | | U(k, k)-shift | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the constant part of the 2x2 diagonal block, D const F delta12 = UBuf[ k +(k+1)*ldU]; const F delta21 = UBuf[(k+1)+ k *ldU]; for( Int j=0; j<n; ++j ) { const F delta11 = UBuf[ k + k *ldU] - shifts.Get(j,0); const F delta22 = UBuf[(k+1)+(k+1)*ldU] - shifts.Get(j,0); // Decompose D = Q R Real c; F s; const F gamma11 = Givens( delta11, delta21, c, s ); const F gamma12 = c*delta12 + s*delta22; const F gamma22 = -Conj(s)*delta12 + c*delta22; F* xBuf = &XBuf[j*ldX]; // Solve against R^T F chi1 = xBuf[k ]; F chi2 = xBuf[k+1]; chi1 /= gamma11; chi2 -= gamma12*chi1; chi2 /= gamma22; // Solve against Q^T xBuf[k ] = c*chi1 - Conj(s)*chi2; xBuf[k+1] = s*chi1 + c*chi2; // Update x2 := x2 - U12^T x1 blas::Axpy ( m-(k+2), -xBuf[k ], &UBuf[ k +(k+2)*ldU], ldU, &xBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xBuf[k+1], &UBuf[(k+1)+(k+2)*ldU], ldU, &xBuf[k+2], 1 ); } k += 2; } else { for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldX]; xBuf[k] /= UBuf[k+k*ldU] - shifts.Get(j,0); blas::Axpy ( m-(k+1), -xBuf[k], &UBuf[k+(k+1)*ldU], ldU, &xBuf[k+1], 1 ); } k += 1; } } if( conjugate ) Conjugate( X ); }
float Quat::InverseAndNormalize() { Conjugate(); return Normalize(); }
void MultiplyAdjoint ( Scalar alpha, const LowRank<Scalar>& A, const LowRank<Scalar>& B, LowRank<Scalar>& C ) { #ifndef RELEASE CallStackEntry entry("hmat_tools::MultiplyAdjoint (F := F F^H)"); if( A.Width() != B.Width() ) throw std::logic_error("Cannot multiply nonconformal matrices."); #endif const int m = A.Height(); const int n = B.Height(); const int Ar = A.Rank(); const int Br = B.Rank(); if( Ar <= Br ) { const int r = Ar; C.U.SetType( GENERAL ); C.U.Resize( m, r ); C.V.SetType( GENERAL ); C.V.Resize( n, r ); // C.U C.V^T = alpha (A.U A.V^T) (B.U B.V^T)^H // = alpha A.U A.V^T conj(B.V) B.U^H // = A.U (alpha conj(B.U) (B.V^H A.V)))^T // = A.U (alpha conj(B.U) W)^T // // C.U := A.U // W := B.V^H A.V // C.V := alpha conj(B.U) W Copy( A.U, C.U ); Dense<Scalar> W( Br, Ar ), cBU; blas::Gemm ( 'C', 'N', Br, Ar, A.Width(), 1, B.V.LockedBuffer(), B.V.LDim(), A.V.LockedBuffer(), A.V.LDim(), 0, W.Buffer(), W.LDim() ); Conjugate( B.U, cBU ); blas::Gemm ( 'N', 'N', n, Ar, Br, alpha, cBU.LockedBuffer(), cBU.LDim(), W.LockedBuffer(), W.LDim(), 0, C.V.Buffer(), C.V.LDim() ); } else // B.r < A.r { const int r = Br; C.U.SetType( GENERAL ); C.U.Resize( m, r ); C.V.SetType( GENERAL ); C.V.Resize( n, r ); // C.U C.V^T := alpha (A.U A.V^T) (B.U B.V^T)^H // = alpha A.U A.V^T conj(B.V) B.U^H // = (alpha A.U A.V^T conj(B.U)) conj(B.U)^T // = (alpha A.U W) conj(B.U)^T // // W := A.V^T conj(B.U) // C.U := alpha A.U W // C.V := conj(B.U) Dense<Scalar> W( Ar, Br ), cBU; Conjugate( B.U, cBU ); blas::Gemm ( 'T', 'N', Ar, Br, B.Height(), 1, A.V.LockedBuffer(), A.V.LDim(), cBU.LockedBuffer(), cBU.LDim(), 0, W.Buffer(), W.LDim() ); blas::Gemm ( 'N', 'N', A.Height(), Br, Ar, alpha, A.U.LockedBuffer(), A.U.LDim(), W.LockedBuffer(), W.LDim(), 0, C.U.Buffer(), C.U.LDim() ); Conjugate( B.U, C.V ); } }
cQuaternion cQuaternion::AppliedTo( const cQuaternion& q ) { return *this * ( q * Conjugate() ); }
inline void PanelHouseholder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("lq::PanelHouseholder"); if( A.Grid() != t.Grid() ) LogicError("{A,t} must be distributed over the same grid"); if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 ) LogicError ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) LogicError("t must be aligned with A's main diagonal"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<F> aTopRowConj(g); DistMatrix<F,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<F,MC, STAR> z_MC_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2, 1 ); View1x2( aTopRow, alpha11, a12 ); View1x2( ABottomPan, a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); //--------------------------------------------------------------------// // Compute the Householder reflector const F tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); // Apply the Householder reflector const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); F alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Zeros( z_MC_STAR, ABottomPan.Height(), 1 ); LocalGemv ( NORMAL, F(1), ABottomPan, aTopRowConj_STAR_MR, F(0), z_MC_STAR ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedMatrix(), aTopRowConj_STAR_MR.LockedMatrix(), ABottomPan.Matrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void UnblockedBidiagU ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,MD,STAR>& tP, DistMatrix<Complex<R>,MD,STAR>& tQ ) { #ifndef RELEASE PushCallStack("BidiagU"); #endif const int tPHeight = std::max(A.Width()-1,0); const int tQHeight = A.Width(); #ifndef RELEASE if( A.Grid() != tP.Grid() || tP.Grid() != tQ.Grid() ) throw std::logic_error("Process grids do not match"); if( A.Height() < A.Width() ) throw std::logic_error("A must be at least as tall as it is wide"); if( tP.Viewing() && (tP.Height() != tPHeight || tP.Width() != 1) ) throw std::logic_error("tP is the wrong height"); if( tQ.Viewing() && (tQ.Height() != tQHeight || tQ.Width() != 1) ) throw std::logic_error("tQ is the wrong height"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); if( !tP.Viewing() ) tP.ResizeTo( tPHeight, 1 ); if( !tQ.Viewing() ) tQ.ResizeTo( tQHeight, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), a01(g), A02(g), alpha12L(g), a12R(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), aB1(g), AB2(g), A20(g), a21(g), A22(g); // Temporary matrices DistMatrix<C,STAR,MR > a12_STAR_MR(g); DistMatrix<C,MC, STAR> aB1_MC_STAR(g); DistMatrix<C,MR, STAR> x12Adj_MR_STAR(g); DistMatrix<C,MC, STAR> w21_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); View2x1 ( aB1, alpha11, a21 ); View2x1 ( AB2, a12, A22 ); aB1_MC_STAR.AlignWith( aB1 ); a12_STAR_MR.AlignWith( a12 ); x12Adj_MR_STAR.AlignWith( AB2 ); w21_MC_STAR.AlignWith( A22 ); Zeros( a12.Width(), 1, x12Adj_MR_STAR ); Zeros( a21.Height(), 1, w21_MC_STAR ); const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() ); const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() ); const bool nextIsMyCol = ( g.Col() == a12.RowAlignment() ); //--------------------------------------------------------------------// // Find tauQ, u, and epsilonQ such that // I - conj(tauQ) | 1 | | 1, u^H | | alpha11 | = | epsilonQ | // | u | | a21 | | 0 | const C tauQ = Reflector( alpha11, a21 ); tQ.Set(A00.Height(),0,tauQ ); C epsilonQ=0; if( thisIsMyCol && thisIsMyRow ) epsilonQ = alpha11.GetLocal(0,0); // Set aB1 = | 1 | and form x12^H := (aB1^H AB2)^H = AB2^H aB1 // | u | alpha11.Set(0,0,C(1)); aB1_MC_STAR = aB1; internal::LocalGemv ( ADJOINT, C(1), AB2, aB1_MC_STAR, C(0), x12Adj_MR_STAR ); x12Adj_MR_STAR.SumOverCol(); // Update AB2 := AB2 - conj(tauQ) aB1 x12 // = AB2 - conj(tauQ) aB1 aB1^H AB2 // = (I - conj(tauQ) aB1 aB1^H) AB2 internal::LocalGer( -Conj(tauQ), aB1_MC_STAR, x12Adj_MR_STAR, AB2 ); // Put epsilonQ back instead of the temporary value, 1 if( thisIsMyCol && thisIsMyRow ) alpha11.SetLocal(0,0,epsilonQ); if( A22.Width() != 0 ) { // Due to the deficiencies in the BLAS ?gemv routines, this section // is easier if we temporarily conjugate a12 Conjugate( a12 ); // Expose the subvector we seek to zero, a12R PartitionRight( a12, alpha12L, a12R ); // Find tauP, v, and epsilonP such that // I - conj(tauP) | 1 | | 1, v^H | | alpha12L | = | epsilonP | // | v | | a12R^T | | 0 | const C tauP = Reflector( alpha12L, a12R ); tP.Set(A00.Height(),0,tauP); C epsilonP=0; if( nextIsMyCol && thisIsMyRow ) epsilonP = alpha12L.GetLocal(0,0); // Set a12^T = | 1 | and form w21 := A22 a12^T = A22 | 1 | // | v | | v | alpha12L.Set(0,0,C(1)); a12_STAR_MR = a12; internal::LocalGemv ( NORMAL, C(1), A22, a12_STAR_MR, C(0), w21_MC_STAR ); w21_MC_STAR.SumOverRow(); // A22 := A22 - tauP w21 conj(a12) // = A22 - tauP A22 a12^T conj(a12) // = A22 (I - tauP a12^T conj(a12)) // = A22 conj(I - conj(tauP) a12^H a12) // which compensates for the fact that the reflector was generated // on the conjugated a12. internal::LocalGer( -tauP, w21_MC_STAR, a12_STAR_MR, A22 ); // Put epsilonP back instead of the temporary value, 1 if( nextIsMyCol && thisIsMyRow ) alpha12L.SetLocal(0,0,epsilonP); // Undue the temporary conjugation Conjugate( a12 ); } //--------------------------------------------------------------------// aB1_MC_STAR.FreeAlignments(); a12_STAR_MR.FreeAlignments(); x12Adj_MR_STAR.FreeAlignments(); w21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
void LUTUnb ( bool conjugate, const Matrix<F>& U, Matrix<F>& X, bool checkIfSingular ) { EL_DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); if( conjugate ) Conjugate( X ); const F* UBuf = U.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldu = U.LDim(); const Int ldx = X.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldu] != F(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via a 2x2 QR decomposition produced // by the Givens rotation // | c s | | U(k, k) | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = UBuf[ k + k *ldu]; const F delta12 = UBuf[ k +(k+1)*ldu]; const F delta21 = UBuf[(k+1)+ k *ldu]; const F delta22 = UBuf[(k+1)+(k+1)*ldu]; // Decompose D = Q R Real c; F s; const F gamma11 = Givens( delta11, delta21, c, s ); const F gamma12 = c*delta12 + s*delta22; const F gamma22 = -Conj(s)*delta12 + c*delta22; if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against R^T xBuf[k ] /= gamma11; xBuf[k+1] -= gamma12*xBuf[k]; xBuf[k+1] /= gamma22; // Solve against Q^T const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 - Conj(s)*chi2; xBuf[k+1] = s*chi1 + c*chi2; // Update x2 := x2 - U12^T x1 blas::Axpy ( m-(k+2), -xBuf[k ], &UBuf[ k +(k+2)*ldu], ldu, &xBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xBuf[k+1], &UBuf[(k+1)+(k+2)*ldu], ldu, &xBuf[k+2], 1 ); } k += 2; } else { if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( UBuf[k+k*ldu] == F(0) ) LogicError("Singular diagonal entry detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; xBuf[k] /= UBuf[k+k*ldu]; blas::Axpy ( m-(k+1), -xBuf[k], &UBuf[k+(k+1)*ldu], ldu, &xBuf[k+1], 1 ); } k += 1; } } if( conjugate ) Conjugate( X ); }
inline void PanelHouseholder( Matrix<F>& A, Matrix<F>& t ) { #ifndef RELEASE CallStackEntry entry("lq::PanelHouseholder"); if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 ) LogicError ("t must be a vector of height equal to the minimum dimension of A"); #endif Matrix<F> ATL, ATR, A00, a01, A02, aTopRow, ABottomPan, ABL, ABR, a10, alpha11, a12, A20, a21, A22; Matrix<F> tT, t0, tB, tau1, t2; Matrix<F> z, aTopRowConj; PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2, 1 ); View1x2( aTopRow, alpha11, a12 ); View1x2( ABottomPan, a21, A22 ); //--------------------------------------------------------------------// // Compute the Householder reflector const F tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); // Apply the Householder reflector const F alpha = alpha11.Get(0,0); alpha11.Set(0,0,1); Conjugate( aTopRow, aTopRowConj ); Zeros( z, ABottomPan.Height(), 1 ); Gemv( NORMAL, F(1), ABottomPan, aTopRowConj, F(0), z ); Ger( -Conj(tau), z, aTopRowConj, ABottomPan ); alpha11.Set(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }