inline void Trmv ( UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, const Matrix<T>& A, Matrix<T>& x ) { #ifndef RELEASE CallStackEntry entry("Trmv"); if( x.Height() != 1 && x.Width() != 1 ) throw std::logic_error("x must be a vector"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); if( xLength != A.Height() ) throw std::logic_error("x must conform with A"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const char diagChar = UnitOrNonUnitToChar( diag ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); blas::Trmv ( uploChar, transChar, diagChar, m, A.LockedBuffer(), A.LDim(), x.Buffer(), incx ); }
inline void ApplyInverseColumnPivots( Matrix<F>& A, const Matrix<int>& p ) { #ifndef RELEASE PushCallStack("ApplyInverseColumnPivots"); if( p.Width() != 1 ) throw std::logic_error("p must be a column vector"); if( p.Height() != A.Width() ) throw std::logic_error("p must be the same length as the width of A"); #endif const int height = A.Height(); const int width = A.Width(); if( height == 0 || width == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } for( int j=width-1; j>=0; --j ) { const int k = p.Get(j,0); F* Aj = A.Buffer(0,j); F* Ak = A.Buffer(0,k); for( int i=0; i<height; ++i ) { F temp = Aj[i]; Aj[i] = Ak[i]; Ak[i] = temp; } } #ifndef RELEASE PopCallStack(); #endif }
void LUTUnb ( bool conjugate, const Matrix<F>& U, Matrix<F>& X, bool checkIfSingular ) { EL_DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); if( conjugate ) Conjugate( X ); const F* UBuf = U.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldu = U.LDim(); const Int ldx = X.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldu] != F(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via a 2x2 QR decomposition produced // by the Givens rotation // | c s | | U(k, k) | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = UBuf[ k + k *ldu]; const F delta12 = UBuf[ k +(k+1)*ldu]; const F delta21 = UBuf[(k+1)+ k *ldu]; const F delta22 = UBuf[(k+1)+(k+1)*ldu]; // Decompose D = Q R Real c; F s; const F gamma11 = Givens( delta11, delta21, c, s ); const F gamma12 = c*delta12 + s*delta22; const F gamma22 = -Conj(s)*delta12 + c*delta22; if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against R^T xBuf[k ] /= gamma11; xBuf[k+1] -= gamma12*xBuf[k]; xBuf[k+1] /= gamma22; // Solve against Q^T const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 - Conj(s)*chi2; xBuf[k+1] = s*chi1 + c*chi2; // Update x2 := x2 - U12^T x1 blas::Axpy ( m-(k+2), -xBuf[k ], &UBuf[ k +(k+2)*ldu], ldu, &xBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xBuf[k+1], &UBuf[(k+1)+(k+2)*ldu], ldu, &xBuf[k+2], 1 ); } k += 2; } else { if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( UBuf[k+k*ldu] == F(0) ) LogicError("Singular diagonal entry detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; xBuf[k] /= UBuf[k+k*ldu]; blas::Axpy ( m-(k+1), -xBuf[k], &UBuf[k+(k+1)*ldu], ldu, &xBuf[k+1], 1 ); } k += 1; } } if( conjugate ) Conjugate( X ); }
void LLTUnb ( bool conjugate, const Matrix<F>& L, Matrix<F>& X, bool checkIfSingular ) { EL_DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); const F* LBuf = L.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldl = L.LDim(); const Int ldx = X.LDim(); if( conjugate ) Conjugate( X ); Int k=m-1; while( k >= 0 ) { const bool in2x2 = ( k>0 && LBuf[(k-1)+k*ldl] != F(0) ); if( in2x2 ) { --k; // Solve the 2x2 linear systems via a 2x2 LQ decomposition produced // by the Givens rotation // | L(k,k) L(k,k+1) | | c -conj(s) | = | gamma11 0 | // | s c | // and by also forming the bottom two entries of the 2x2 resulting // lower-triangular matrix, say gamma21 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = LBuf[ k + k *ldl]; const F delta12 = LBuf[ k +(k+1)*ldl]; const F delta21 = LBuf[(k+1)+ k *ldl]; const F delta22 = LBuf[(k+1)+(k+1)*ldl]; // Decompose D = L Q Real c; F s; const F gamma11 = Givens( delta11, delta12, c, s ); const F gamma21 = c*delta21 + s*delta22; const F gamma22 = -Conj(s)*delta21 + c*delta22; if( checkIfSingular ) { // TODO: Instead check if values are too small in magnitude if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against Q^T const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 + s*chi2; xBuf[k+1] = -Conj(s)*chi1 + c*chi2; // Solve against R^T xBuf[k+1] /= gamma22; xBuf[k ] -= gamma21*xBuf[k+1]; xBuf[k ] /= gamma11; // Update x0 := x0 - L10^T x1 blas::Axpy( k, -xBuf[k ], &LBuf[k ], ldl, xBuf, 1 ); blas::Axpy( k, -xBuf[k+1], &LBuf[k+1], ldl, xBuf, 1 ); } } else { if( checkIfSingular ) if( LBuf[k+k*ldl] == F(0) ) LogicError("Singular diagonal entry detected"); for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve the 1x1 linear system xBuf[k] /= LBuf[k+k*ldl]; // Update x0 := x0 - l10^T chi_1 blas::Axpy( k, -xBuf[k], &LBuf[k], ldl, xBuf, 1 ); } } --k; } if( conjugate ) Conjugate( X ); }
inline void TwoSidedTrsmUUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUUnb"); #endif // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const int n = A.Height(); const int lda = A.LDim(); const int ldu = U.LDim(); F* ABuffer = A.Buffer(); const F* UBuffer = U.LockedBuffer(); std::vector<F> a12Conj( n ), u12Conj( n ); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal value of U const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 / upsilon11 F* a01 = &ABuffer[j*lda]; if( diag != UNIT ) for( int k=0; k<j; ++k ) a01[k] /= upsilon11; // A02 := A02 - a01 u12 F* A02 = &ABuffer[(j+1)*lda]; const F* u12 = &UBuffer[j+(j+1)*ldu]; blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda ); // alpha11 := alpha11 / |upsilon11|^2 ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11); const F alpha11 = ABuffer[j+j*lda]; // a12 := a12 / conj(upsilon11) F* a12 = &ABuffer[j+(j+1)*lda]; if( diag != UNIT ) for( int k=0; k<a21Height; ++k ) a12[k*lda] /= Conj(upsilon11); // a12 := a12 - (alpha11/2)u12 for( int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/2)*u12[k*ldu]; // A22 := A22 - (a12' u12 + u12' a12) F* A22 = &ABuffer[(j+1)+(j+1)*lda]; for( int k=0; k<a21Height; ++k ) a12Conj[k] = Conj(a12[k*lda]); for( int k=0; k<a21Height; ++k ) u12Conj[k] = Conj(u12[k*ldu]); blas::Her2 ( 'U', a21Height, F(-1), &u12Conj[0], 1, &a12Conj[0], 1, A22, lda ); // a12 := a12 - (alpha11/2)u12 for( int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/2)*u12[k*ldu]; } #ifndef RELEASE PopCallStack(); #endif }
inline void View( Matrix<T>& A, Matrix<T>& B ) { DEBUG_ONLY(CallStackEntry cse("View")) A.Attach( B.Height(), B.Width(), B.Buffer(), B.LDim() ); }
inline void Gemv ( Orientation orientation, T alpha, const Matrix<T>& A, const Matrix<T>& x, T beta, Matrix<T>& y ) { #ifndef RELEASE PushCallStack("Gemv"); if( ( x.Height() != 1 && x.Width() != 1 ) || ( y.Height() != 1 && y.Width() != 1 ) ) { std::ostringstream msg; msg << "x and y must be vectors:\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width(); throw std::logic_error( msg.str().c_str() ); } const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( orientation == NORMAL ) { if( A.Height() != yLength || A.Width() != xLength ) { std::ostringstream msg; msg << "A must conform with x and y:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width(); throw std::logic_error( msg.str().c_str() ); } } else { if( A.Width() != yLength || A.Height() != xLength ) { std::ostringstream msg; msg << "A must conform with x and y:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width(); throw std::logic_error( msg.str().c_str() ); } } #endif const char transChar = OrientationToChar( orientation ); const int m = A.Height(); const int n = A.Width(); const int k = ( transChar == 'N' ? n : m ); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); const int incy = ( y.Width()==1 ? 1 : y.LDim() ); if( k != 0 ) { blas::Gemv ( transChar, m, n, alpha, A.LockedBuffer(), A.LDim(), x.LockedBuffer(), incx, beta, y.Buffer(), incy ); } else { Scale( beta, y ); } #ifndef RELEASE PopCallStack(); #endif }
void LUTUnb ( bool conjugate, const Matrix<Real>& U, const Matrix<Complex<Real>>& shifts, Matrix<Real>& XReal, Matrix<Real>& XImag ) { DEBUG_CSE typedef Complex<Real> C; const Int m = XReal.Height(); const Int n = XReal.Width(); if( conjugate ) XImag *= -1; const Real* UBuf = U.LockedBuffer(); Real* XRealBuf = XReal.Buffer(); Real* XImagBuf = XImag.Buffer(); const Int ldU = U.LDim(); const Int ldXReal = XReal.LDim(); const Int ldXImag = XImag.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldU] != Real(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via 2x2 QR decompositions produced // by the Givens rotation // | c s | | U(k, k)-shift | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the constant part of the 2x2 diagonal block, D const Real delta12 = UBuf[ k +(k+1)*ldU]; const Real delta21 = UBuf[(k+1)+ k *ldU]; for( Int j=0; j<n; ++j ) { const C delta11 = UBuf[ k + k *ldU] - shifts.Get(j,0); const C delta22 = UBuf[(k+1)+(k+1)*ldU] - shifts.Get(j,0); // Decompose D = Q R Real c; C s; const C gamma11 = Givens( delta11, C(delta21), c, s ); const C gamma12 = c*delta12 + s*delta22; const C gamma22 = -Conj(s)*delta12 + c*delta22; Real* xRealBuf = &XRealBuf[j*ldXReal]; Real* xImagBuf = &XImagBuf[j*ldXImag]; // Solve against R^T C chi1(xRealBuf[k ],xImagBuf[k ]); C chi2(xRealBuf[k+1],xImagBuf[k+1]); chi1 /= gamma11; chi2 -= gamma12*chi1; chi2 /= gamma22; // Solve against Q^T const C eta1 = c*chi1 - Conj(s)*chi2; const C eta2 = s*chi1 + c*chi2; xRealBuf[k ] = eta1.real(); xImagBuf[k ] = eta1.imag(); xRealBuf[k+1] = eta2.real(); xImagBuf[k+1] = eta2.imag(); // Update x2 := x2 - U12^T x1 blas::Axpy ( m-(k+2), -xRealBuf[k ], &UBuf[ k +(k+2)*ldU], ldU, &xRealBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xImagBuf[k ], &UBuf[ k +(k+2)*ldU], ldU, &xImagBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xRealBuf[k+1], &UBuf[(k+1)+(k+2)*ldU], ldU, &xRealBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xImagBuf[k+1], &UBuf[(k+1)+(k+2)*ldU], ldU, &xImagBuf[k+2], 1 ); } k += 2; } else { for( Int j=0; j<n; ++j ) { Real* xRealBuf = &XRealBuf[j*ldXReal]; Real* xImagBuf = &XImagBuf[j*ldXImag]; C eta1( xRealBuf[k], xImagBuf[k] ); eta1 /= UBuf[k+k*ldU] - shifts.Get(j,0); xRealBuf[k] = eta1.real(); xImagBuf[k] = eta1.imag(); blas::Axpy ( m-(k+1), -xRealBuf[k], &UBuf[k+(k+1)*ldU], ldU, &xRealBuf[k+1], 1 ); blas::Axpy ( m-(k+1), -xImagBuf[k], &UBuf[k+(k+1)*ldU], ldU, &xImagBuf[k+1], 1 ); } k += 1; } } if( conjugate ) XImag *= -1; }
void LUTUnb ( bool conjugate, const Matrix<F>& U, const Matrix<F>& shifts, Matrix<F>& X ) { DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); if( conjugate ) Conjugate( X ); const F* UBuf = U.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldU = U.LDim(); const Int ldX = X.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && UBuf[(k+1)+k*ldU] != F(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via 2x2 QR decompositions produced // by the Givens rotation // | c s | | U(k, k)-shift | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the constant part of the 2x2 diagonal block, D const F delta12 = UBuf[ k +(k+1)*ldU]; const F delta21 = UBuf[(k+1)+ k *ldU]; for( Int j=0; j<n; ++j ) { const F delta11 = UBuf[ k + k *ldU] - shifts.Get(j,0); const F delta22 = UBuf[(k+1)+(k+1)*ldU] - shifts.Get(j,0); // Decompose D = Q R Real c; F s; const F gamma11 = Givens( delta11, delta21, c, s ); const F gamma12 = c*delta12 + s*delta22; const F gamma22 = -Conj(s)*delta12 + c*delta22; F* xBuf = &XBuf[j*ldX]; // Solve against R^T F chi1 = xBuf[k ]; F chi2 = xBuf[k+1]; chi1 /= gamma11; chi2 -= gamma12*chi1; chi2 /= gamma22; // Solve against Q^T xBuf[k ] = c*chi1 - Conj(s)*chi2; xBuf[k+1] = s*chi1 + c*chi2; // Update x2 := x2 - U12^T x1 blas::Axpy ( m-(k+2), -xBuf[k ], &UBuf[ k +(k+2)*ldU], ldU, &xBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xBuf[k+1], &UBuf[(k+1)+(k+2)*ldU], ldU, &xBuf[k+2], 1 ); } k += 2; } else { for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldX]; xBuf[k] /= UBuf[k+k*ldU] - shifts.Get(j,0); blas::Axpy ( m-(k+1), -xBuf[k], &UBuf[k+(k+1)*ldU], ldU, &xBuf[k+1], 1 ); } k += 1; } } if( conjugate ) Conjugate( X ); }
auto overflowPair = OverflowParameters<Real>(); const Real smallNum = overflowPair.first; const Real bigNum = overflowPair.second; const Real oneHalf = Real(1)/Real(2); const Real oneQuarter = Real(1)/Real(4); const F* UBuf = U.LockedBuffer(); const Int ULDim = U.LDim(); // Default scale is 1 Ones( scales, numShifts, 1 ); // Compute infinity norms of columns of U (excluding diagonal) Matrix<Real> cNorm( n, 1 ); Real* cNormBuf = cNorm.Buffer(); cNormBuf[0] = Real(0); for( Int j=1; j<n; ++j ) { //cNormBuf[j] = MaxNorm( U(IR(0,j),IR(j)) ); cNormBuf[j] = 0; for( Int i=0; i<j; ++i ) cNormBuf[j] = Max( cNormBuf[j], Abs(UBuf[i+j*ULDim]) ); } // Iterate through RHS's for( Int j=1; j<numShifts; ++j ) { const Int xHeight = Min(n,j); // Initialize triangular system
void LUNUnb( const Matrix<F>& U, Matrix<F>& X, bool checkIfSingular ) { DEBUG_CSE const Int m = X.Height(); const Int n = X.Width(); typedef Base<F> Real; const F* UBuf = U.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldu = U.LDim(); const Int ldx = X.LDim(); Int k=m-1; while( k >= 0 ) { const bool in2x2 = ( k>0 && UBuf[k+(k-1)*ldu] != F(0) ); if( in2x2 ) { --k; // Solve the 2x2 linear systems via a 2x2 QR decomposition produced // by the Givens rotation // | c s | | U(k, k) | = | gamma11 | // | -conj(s) c | | U(k+1,k) | | 0 | // // and by also forming the right two entries of the 2x2 resulting // upper-triangular matrix, say gamma12 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = UBuf[ k + k *ldu]; const F delta12 = UBuf[ k +(k+1)*ldu]; const F delta21 = UBuf[(k+1)+ k *ldu]; const F delta22 = UBuf[(k+1)+(k+1)*ldu]; // Decompose D = Q R Real c; F s; const F gamma11 = Givens( delta11, delta21, c, s ); const F gamma12 = c*delta12 + s*delta22; const F gamma22 = -Conj(s)*delta12 + c*delta22; if( checkIfSingular ) { // TODO: Instead check if values are too small in magnitude if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against Q const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 + s*chi2; xBuf[k+1] = -Conj(s)*chi1 + c*chi2; // Solve against R xBuf[k+1] /= gamma22; xBuf[k ] -= gamma12*xBuf[k+1]; xBuf[k ] /= gamma11; // Update x0 := x0 - U01 x1 blas::Axpy( k, -xBuf[k ], &UBuf[ k *ldu], 1, xBuf, 1 ); blas::Axpy( k, -xBuf[k+1], &UBuf[(k+1)*ldu], 1, xBuf, 1 ); } } else { if( checkIfSingular ) if( UBuf[k+k*ldu] == F(0) ) LogicError("Singular diagonal entry detected"); for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve the 1x1 linear system xBuf[k] /= UBuf[k+k*ldu]; // Update x0 := x0 - u01 chi_1 blas::Axpy( k, -xBuf[k], &UBuf[k*ldu], 1, xBuf, 1 ); } } --k; } }
void LLNUnb( const Matrix<F>& L, Matrix<F>& X, bool checkIfSingular ) { DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); const F* LBuf = L.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldl = L.LDim(); const Int ldx = X.LDim(); Int k=0; while( k < m ) { const bool in2x2 = ( k+1<m && LBuf[k+(k+1)*ldl] != F(0) ); if( in2x2 ) { // Solve the 2x2 linear systems via a 2x2 LQ decomposition produced // by the Givens rotation // | L(k,k) L(k,k+1) | | c -conj(s) | = | gamma11 0 | // | s c | // and by also forming the bottom two entries of the 2x2 resulting // lower-triangular matrix, say gamma21 and gamma22 // // Extract the 2x2 diagonal block, D const F delta11 = LBuf[ k + k *ldl]; const F delta12 = LBuf[ k +(k+1)*ldl]; const F delta21 = LBuf[(k+1)+ k *ldl]; const F delta22 = LBuf[(k+1)+(k+1)*ldl]; // Decompose D = L Q Real c; F s; const F gamma11 = Givens( delta11, delta12, c, s ); const F gamma21 = c*delta21 + s*delta22; const F gamma22 = -Conj(s)*delta21 + c*delta22; if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( gamma11 == F(0) || gamma22 == F(0) ) LogicError("Singular diagonal block detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve against L xBuf[k ] /= gamma11; xBuf[k+1] -= gamma21*xBuf[k]; xBuf[k+1] /= gamma22; // Solve against Q const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 - Conj(s)*chi2; xBuf[k+1] = s*chi1 + c*chi2; // Update x2 := x2 - L21 x1 blas::Axpy ( m-(k+2), -xBuf[k ], &LBuf[(k+2)+ k *ldl], 1, &xBuf[k+2], 1 ); blas::Axpy ( m-(k+2), -xBuf[k+1], &LBuf[(k+2)+(k+1)*ldl], 1, &xBuf[k+2], 1 ); } k += 2; } else { if( checkIfSingular ) { // TODO: Check if sufficiently small instead if( LBuf[k+k*ldl] == F(0) ) LogicError("Singular diagonal entry detected"); } for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; xBuf[k] /= LBuf[k+k*ldl]; blas::Axpy ( m-(k+1), -xBuf[k], &LBuf[(k+1)+k*ldl], 1, &xBuf[k+1], 1 ); } k += 1; } } }
inline void MakeTrapezoidal ( LeftOrRight side, UpperOrLower uplo, int offset, Matrix<T>& A ) { #ifndef RELEASE PushCallStack("MakeTrapezoidal"); #endif const int height = A.Height(); const int width = A.Width(); const int ldim = A.LDim(); T* buffer = A.Buffer(); if( uplo == LOWER ) { if( side == LEFT ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=std::max(0,offset+1); j<width; ++j ) { const int lastZeroRow = j-offset-1; const int numZeroRows = std::min( lastZeroRow+1, height ); MemZero( &buffer[j*ldim], numZeroRows ); } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=std::max(0,offset-height+width+1); j<width; ++j ) { const int lastZeroRow = j-offset+height-width-1; const int numZeroRows = std::min( lastZeroRow+1, height ); MemZero( &buffer[j*ldim], numZeroRows ); } } } else { if( side == LEFT ) { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) { const int firstZeroRow = std::max(j-offset+1,0); if( firstZeroRow < height ) MemZero ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow ); } } else { #ifdef HAVE_OPENMP #pragma omp parallel for #endif for( int j=0; j<width; ++j ) { const int firstZeroRow = std::max(j-offset+height-width+1,0); if( firstZeroRow < height ) MemZero ( &buffer[firstZeroRow+j*ldim], height-firstZeroRow ); } } } #ifndef RELEASE PopCallStack(); #endif }
void LLTUnb ( bool conjugate, const Matrix<F>& L, const Matrix<F>& shifts, Matrix<F>& X ) { DEBUG_CSE typedef Base<F> Real; const Int m = X.Height(); const Int n = X.Width(); const F* LBuf = L.LockedBuffer(); F* XBuf = X.Buffer(); const Int ldl = L.LDim(); const Int ldx = X.LDim(); if( conjugate ) Conjugate( X ); Int k=m-1; while( k >= 0 ) { const bool in2x2 = ( k>0 && LBuf[(k-1)+k*ldl] != F(0) ); if( in2x2 ) { --k; // Solve the 2x2 linear systems via 2x2 LQ decompositions produced // by the Givens rotation // | L(k,k)-shift L(k,k+1) | | c -conj(s) | = | gamma11 0 | // | s c | // and by also forming the bottom two entries of the 2x2 resulting // lower-triangular matrix, say gamma21 and gamma22 // // Extract the constant part of the 2x2 diagonal block, D const F delta12 = LBuf[ k +(k+1)*ldl]; const F delta21 = LBuf[(k+1)+ k *ldl]; for( Int j=0; j<n; ++j ) { const F delta11 = LBuf[ k + k *ldl] - shifts.Get(j,0); const F delta22 = LBuf[(k+1)+(k+1)*ldl] - shifts.Get(j,0); // Decompose D = L Q Real c; F s; const F gamma11 = Givens( delta11, delta12, c, s ); const F gamma21 = c*delta21 + s*delta22; const F gamma22 = -Conj(s)*delta21 + c*delta22; F* xBuf = &XBuf[j*ldx]; // Solve against Q^T const F chi1 = xBuf[k ]; const F chi2 = xBuf[k+1]; xBuf[k ] = c*chi1 + s*chi2; xBuf[k+1] = -Conj(s)*chi1 + c*chi2; // Solve against R^T xBuf[k+1] /= gamma22; xBuf[k ] -= gamma21*xBuf[k+1]; xBuf[k ] /= gamma11; // Update x0 := x0 - L10^T x1 blas::Axpy( k, -xBuf[k ], &LBuf[k ], ldl, xBuf, 1 ); blas::Axpy( k, -xBuf[k+1], &LBuf[k+1], ldl, xBuf, 1 ); } } else { for( Int j=0; j<n; ++j ) { F* xBuf = &XBuf[j*ldx]; // Solve the 1x1 linear system xBuf[k] /= LBuf[k+k*ldl] - shifts.Get(j,0); // Update x0 := x0 - l10^T chi_1 blas::Axpy( k, -xBuf[k], &LBuf[k], ldl, xBuf, 1 ); } } --k; } if( conjugate ) Conjugate( X ); }
SVDInfo LAPACKHelper ( Matrix<F>& A, Matrix<F>& U, Matrix<Base<F>>& s, Matrix<F>& V, const SVDCtrl<Base<F>>& ctrl ) { DEBUG_CSE typedef Base<F> Real; if( !ctrl.overwrite ) LogicError("LAPACKHelper assumes ctrl.overwrite == true"); auto approach = ctrl.bidiagSVDCtrl.approach; if( approach != THIN_SVD && approach != FULL_SVD && approach != COMPACT_SVD ) LogicError("LAPACKHelper assumes THIN_SVD, FULL_SVD, or COMPACT_SVD"); SVDInfo info; const Int m = A.Height(); const Int n = A.Width(); const Int k = Min(m,n); const bool thin = ( approach == THIN_SVD ); const bool compact = ( approach == COMPACT_SVD ); const bool avoidU = !ctrl.bidiagSVDCtrl.wantU; const bool avoidV = !ctrl.bidiagSVDCtrl.wantV; s.Resize( k, 1 ); Matrix<F> VAdj; if( thin || compact ) { U.Resize( m, k ); VAdj.Resize( k, n ); } else { U.Resize( m, m ); VAdj.Resize( n, n ); } lapack::DivideAndConquerSVD ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), VAdj.Buffer(), VAdj.LDim(), (thin||compact) ); if( compact ) { const Real twoNorm = ( k==0 ? Real(0) : s(0) ); const Real thresh = bidiag_svd::APosterioriThreshold ( m, n, twoNorm, ctrl.bidiagSVDCtrl ); Int rank = k; for( Int j=0; j<k; ++j ) { if( s(j) <= thresh ) { rank = j; break; } } s.Resize( rank, 1 ); if( !avoidU ) U.Resize( m, rank ); if( !avoidV ) VAdj.Resize( rank, n ); } if( !avoidV ) Adjoint( VAdj, V ); return info; }
inline void Var3Unb( Orientation orientation, Matrix<F>& A, Matrix<F>& d ) { #ifndef RELEASE PushCallStack("ldl::Var3Unb"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) ) throw std::logic_error ("d must be a column vector the same height as A"); if( orientation == NORMAL ) throw std::logic_error("Can only perform LDL^T or LDL^H"); #endif const int n = A.Height(); if( !d.Viewing() ) d.ResizeTo( n, 1 ); F* ABuffer = A.Buffer(); F* dBuffer = d.Buffer(); const int ldim = A.LDim(); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal of D const F alpha11 = ABuffer[j+j*ldim]; if( alpha11 == F(0) ) throw SingularMatrixException(); dBuffer[j] = alpha11; F* RESTRICT a21 = &ABuffer[(j+1)+j*ldim]; if( orientation == ADJOINT ) { // A22 := A22 - a21 (a21 / alpha11)^H for( int k=0; k<a21Height; ++k ) { const F beta = Conj(a21[k]/alpha11); F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim]; for( int i=k; i<a21Height; ++i ) A22Col[i] -= a21[i]*beta; } } else { // A22 := A22 - a21 (a21 / alpha11)^T for( int k=0; k<a21Height; ++k ) { const F beta = a21[k]/alpha11; F* RESTRICT A22Col = &ABuffer[(j+1)+(j+1+k)*ldim]; for( int i=k; i<a21Height; ++i ) A22Col[i] -= a21[i]*beta; } } // a21 := a21 / alpha11 for( int i=0; i<a21Height; ++i ) a21[i] /= alpha11; } #ifndef RELEASE PopCallStack(); #endif }