inline void Hemv ( UpperOrLower uplo, T alpha, const Matrix<T>& A, const Matrix<T>& x, T beta, Matrix<T>& y ) { #ifndef RELEASE PushCallStack("Hemv"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( ( x.Height() != 1 && x.Width() != 1 ) || ( y.Height() != 1 && y.Width() != 1 ) ) throw std::logic_error("x and y must be vectors"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( A.Height() != xLength || A.Height() != yLength ) throw std::logic_error("A must conform with x and y"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); const int incy = ( y.Width()==1 ? 1 : y.LDim() ); blas::Hemv ( uploChar, m, alpha, A.LockedBuffer(), A.LDim(), x.LockedBuffer(), incx, beta, y.Buffer(), incy ); #ifndef RELEASE PopCallStack(); #endif }
inline void Symm ( LeftOrRight side, UpperOrLower uplo, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("Symm"); #endif const char sideChar = LeftOrRightToChar( side ); const char uploChar = UpperOrLowerToChar( uplo ); if( conjugate ) { blas::Hemm ( sideChar, uploChar, C.Height(), C.Width(), alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); } else { blas::Symm ( sideChar, uploChar, C.Height(), C.Width(), alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); } }
void FusedRowPanelGemvs ( bool conjugate, T alpha, const Matrix<T>& A, const Matrix<T>& q, const Matrix<T>& r, Matrix<T>& s, Matrix<T>& t, Int bsize ) { const Int m = A.Height(); const Int n = A.Width(); const char transChar = ( conjugate ? 'C' : 'T' ); const T* ABuf = A.LockedBuffer(); const T* qBuf = q.LockedBuffer(); const T* rBuf = r.LockedBuffer(); T* sBuf = s.Buffer(); T* tBuf = t.Buffer(); const Int ALDim = A.LDim(); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(n-k,bsize); blas::Gemv ( 'N', m, nb, alpha, &ABuf[k*ALDim], ALDim, &qBuf[k], 1, T(1), sBuf, 1 ); blas::Gemv ( transChar, m, nb, alpha, &ABuf[k*ALDim], ALDim, rBuf, 1, T(1), &tBuf[k], 1 ); } }
inline void Syr ( UpperOrLower uplo, T alpha, const Matrix<T>& x, Matrix<T>& A, bool conjugate=false ) { #ifndef RELEASE PushCallStack("Syr"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); if( xLength != A.Height() ) throw std::logic_error("x must conform with A"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); if( conjugate ) { blas::Her ( uploChar, m, alpha, x.LockedBuffer(), incx, A.Buffer(), A.LDim() ); } else { blas::Syr ( uploChar, m, alpha, x.LockedBuffer(), incx, A.Buffer(), A.LDim() ); } #ifndef RELEASE PopCallStack(); #endif }
void TestLAPACK ( const Matrix<double>& d, const double& rho, const Matrix<double>& z ) { typedef double Real; const Int n = d.Height(); Timer timer; Matrix<Real> w(n,1), dPlusShift(n,1), dMinusShift(n,1); BlasInt nBLAS = n; BlasInt info; timer.Start(); for( Int i=0; i<n; ++i ) { Real sigma; const BlasInt ip1 = i+1; EL_LAPACK(dlasd4) ( &nBLAS, &ip1, d.LockedBuffer(), z.LockedBuffer(), dMinusShift.Buffer(), &rho, &sigma, dPlusShift.Buffer(), &info ); if( info != 0 ) RuntimeError("LAPACK's dlasd4 did not converge"); w(i) = sigma; } const Real lapackTime = timer.Stop(); Output("LAPACK secular singular value time: ",lapackTime," seconds"); }
inline void Ger( T alpha, const Matrix<T>& x, const Matrix<T>& y, Matrix<T>& A ) { #ifndef RELEASE CallStackEntry entry("Ger"); if( ( x.Height() != 1 && x.Width() != 1 ) || ( y.Height() != 1 && y.Width() != 1 ) ) LogicError("x and y must be vectors"); const Int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const Int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( xLength != A.Height() || yLength != A.Width() ) { std::ostringstream msg; msg << "Nonconformal Ger:\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width() << "\n" << " A ~ " << A.Height() << " x " << A.Width(); LogicError( msg.str() ); } #endif const Int m = A.Height(); const Int n = A.Width(); const Int incx = ( x.Width()==1 ? 1 : x.LDim() ); const Int incy = ( y.Width()==1 ? 1 : y.LDim() ); blas::Ger ( m, n, alpha, x.LockedBuffer(), incx, y.LockedBuffer(), incy, A.Buffer(), A.LDim() ); }
void Apply ( const Matrix<Real>& x, const Matrix<Real>& y, Matrix<Real>& z, const Matrix<Int>& orders, const Matrix<Int>& firstInds ) { DEBUG_ONLY(CSE cse("soc::Apply")) soc::Dots( x, y, z, orders, firstInds ); auto xRoots = x; auto yRoots = y; cone::Broadcast( xRoots, orders, firstInds ); cone::Broadcast( yRoots, orders, firstInds ); const Int height = x.Height(); const Real* xBuf = x.LockedBuffer(); const Real* xRootBuf = xRoots.LockedBuffer(); const Real* yBuf = y.LockedBuffer(); const Real* yRootBuf = yRoots.LockedBuffer(); Real* zBuf = z.Buffer(); const Int* firstIndBuf = firstInds.LockedBuffer(); for( Int i=0; i<height; ++i ) if( i != firstIndBuf[i] ) zBuf[i] += xRootBuf[i]*yBuf[i] + yRootBuf[i]*xBuf[i]; }
inline void Her2k ( UpperOrLower uplo, Orientation orientation, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { #ifndef RELEASE PushCallStack("Her2k"); if( orientation == NORMAL ) { if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() ||B.Height() != C.Width() ) throw std::logic_error("Nonconformal Her2k"); } else if( orientation == ADJOINT ) { if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() ) throw std::logic_error("Nonconformal Her2k"); } else throw std::logic_error ("Her2k only accepts NORMAL and ADJOINT options"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const int k = ( orientation == NORMAL ? A.Width() : A.Height() ); blas::Her2k ( uploChar, transChar, C.Height(), k, alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); #ifndef RELEASE PopCallStack(); #endif }
void NesterovTodd ( const Matrix<Real>& s, const Matrix<Real>& z, Matrix<Real>& w ) { DEBUG_CSE const Int k = s.Height(); w.Resize( k, 1 ); const Real* sBuf = s.LockedBuffer(); const Real* zBuf = z.LockedBuffer(); Real* wBuf = w.Buffer(); for( Int i=0; i<k; ++i ) wBuf[i] = Sqrt(sBuf[i]/zBuf[i]); }
inline void Trmm ( LeftOrRight side, UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, T alpha, const Matrix<T>& A, Matrix<T>& B ) { #ifndef RELEASE CallStackEntry entry("Trmm"); if( A.Height() != A.Width() ) LogicError("Triangular matrix must be square"); if( side == LEFT ) { if( A.Height() != B.Height() ) LogicError("Nonconformal Trmm"); } else { if( A.Height() != B.Width() ) LogicError("Nonconformal Trmm"); } #endif const char sideChar = LeftOrRightToChar( side ); const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const char diagChar = UnitOrNonUnitToChar( diag ); blas::Trmm ( sideChar, uploChar, transChar, diagChar, B.Height(), B.Width(), alpha, A.LockedBuffer(), A.LDim(), B.Buffer(), B.LDim() ); }
inline void Trmv ( UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, const Matrix<T>& A, Matrix<T>& x ) { #ifndef RELEASE PushCallStack("Trmv"); if( x.Height() != 1 && x.Width() != 1 ) throw std::logic_error("x must be a vector"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); if( xLength != A.Height() ) throw std::logic_error("x must conform with A"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const char diagChar = UnitOrNonUnitToChar( diag ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); blas::Trmv ( uploChar, transChar, diagChar, m, A.LockedBuffer(), A.LDim(), x.Buffer(), incx ); #ifndef RELEASE PopCallStack(); #endif }
inline void BinaryFlat( const Matrix<T>& A, string basename="matrix" ) { DEBUG_ONLY(CallStackEntry cse("write::BinaryFlat")) string filename = basename + "." + FileExtension(BINARY_FLAT); std::ofstream file( filename.c_str(), std::ios::binary ); if( !file.is_open() ) RuntimeError("Could not open ",filename); if( A.Height() == A.LDim() ) file.write( (char*)A.LockedBuffer(), A.Height()*A.Width()*sizeof(T) ); else for( Int j=0; j<A.Width(); ++j ) file.write( (char*)A.LockedBuffer(0,j), A.Height()*sizeof(T) ); }
void TransposeAxpy ( S alphaS, const Matrix<T>& X, Matrix<T>& Y, bool conjugate ) { DEBUG_CSE const T alpha = T(alphaS); const Int mX = X.Height(); const Int nX = X.Width(); const Int nY = Y.Width(); const Int ldX = X.LDim(); const Int ldY = Y.LDim(); const T* XBuf = X.LockedBuffer(); T* YBuf = Y.Buffer(); // If X and Y are vectors, we can allow one to be a column and the other // to be a row. Otherwise we force X and Y to be the same dimension. if( mX == 1 || nX == 1 ) { const Int lengthX = ( nX==1 ? mX : nX ); const Int incX = ( nX==1 ? 1 : ldX ); const Int incY = ( nY==1 ? 1 : ldY ); DEBUG_ONLY( const Int mY = Y.Height(); const Int lengthY = ( nY==1 ? mY : nY ); if( lengthX != lengthY ) LogicError("Nonconformal TransposeAxpy"); ) if( conjugate ) for( Int j=0; j<lengthX; ++j ) YBuf[j*incY] += alpha*Conj(XBuf[j*incX]); else blas::Axpy( lengthX, alpha, XBuf, incX, YBuf, incY ); }
inline void SortEig( Matrix<R>& w, Matrix<R>& Z ) { #ifndef RELEASE PushCallStack("SortEig"); #endif const int n = Z.Height(); const int k = Z.Width(); // Initialize the pairs of indices and eigenvalues std::vector<internal::IndexValuePair<R> > pairs( k ); for( int i=0; i<k; ++i ) { pairs[i].index = i; pairs[i].value = w.Get(i,0); } // Sort the eigenvalues and simultaneously form the permutation std::sort ( pairs.begin(), pairs.end(), internal::IndexValuePair<R>::Compare ); // Reorder the eigenvectors and eigenvalues using the new ordering Matrix<R> ZPerm( n, k ); for( int j=0; j<k; ++j ) { const int source = pairs[j].index; MemCopy( ZPerm.Buffer(0,j), Z.LockedBuffer(0,source), n ); w.Set(j,0,pairs[j].value); } Z = ZPerm; #ifndef RELEASE PopCallStack(); #endif }
void LLNUnb( UnitOrNonUnit diag, F alpha, const Matrix<F>& L, Matrix<F>& X ) { DEBUG_CSE const bool isUnit = ( diag==UNIT ); const Int n = L.Height(); const Int LLDim = L.LDim(); const Int XLDim = X.LDim(); const F* LBuffer = L.LockedBuffer(); F* XBuffer = X.Buffer(); // X := alpha X if( alpha != F(1) ) for( Int j=0; j<n; ++j ) for( Int i=j; i<n; ++i ) XBuffer[i+j*XLDim] *= alpha; for( Int i=0; i<n; ++i ) { if( !isUnit ) { const F lambda11 = LBuffer[i+i*LLDim]; for( Int j=0; j<i; ++j ) XBuffer[i+j*XLDim] /= lambda11; XBuffer[i+i*XLDim] /= lambda11; } const Int l21Height = n - (i+1); const F* l21 = &LBuffer[(i+1)+i*LLDim]; const F* x1L = &XBuffer[i]; F* X2L = &XBuffer[i+1]; blas::Geru( l21Height, i+1, F(-1), l21, 1, x1L, XLDim, X2L, XLDim ); } }
UnitaryCoherence( Matrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("UnitaryCoherence"); #endif typedef BASE(F) R; const Int n = U.Height(); const Int r = U.Width(); // Z := U U' in n^2 r work Matrix<F> Z; Herk( UPPER, NORMAL, F(1), U, Z ); // Now make Z explicitly Hermitian so that our job is easier MakeHermitian( UPPER, Z ); // Compute the maximum column two-norm R maxColNorm = 0; for( Int j=0; j<n; ++j ) { const R colNorm = blas::Nrm2( n, Z.LockedBuffer(0,j), 1 ); maxColNorm = std::max( colNorm, maxColNorm ); } return (n*maxColNorm*maxColNorm)/r; }
void Axpy( S alphaS, const Matrix<T>& X, Matrix<T>& Y ) { EL_DEBUG_CSE const T alpha = T(alphaS); const Int mX = X.Height(); const Int nX = X.Width(); const Int nY = Y.Width(); const Int ldX = X.LDim(); const Int ldY = Y.LDim(); const T* XBuf = X.LockedBuffer(); T* YBuf = Y.Buffer(); // If X and Y are vectors, we can allow one to be a column and the other // to be a row. Otherwise we force X and Y to be the same dimension. if( mX == 1 || nX == 1 ) { const Int XLength = ( nX==1 ? mX : nX ); const Int XStride = ( nX==1 ? 1 : ldX ); const Int YStride = ( nY==1 ? 1 : ldY ); EL_DEBUG_ONLY( const Int mY = Y.Height(); const Int YLength = ( nY==1 ? mY : nY ); if( XLength != YLength ) LogicError("Nonconformal Axpy"); ) blas::Axpy( XLength, alpha, XBuf, XStride, YBuf, YStride ); }
void Enqueue( const Matrix<Field>& y ) { MemCopy( Y_.Buffer(0,numQueued_), y.LockedBuffer(), y.Height() ); ++numQueued_; if( numQueued_ == Y_.Width() ) Flush(); }
void UUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); F* ABuffer = A.Buffer(); const F* UBuffer = U.LockedBuffer(); vector<F> a12Conj( n ), u12Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal value of U const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 / upsilon11 F* a01 = &ABuffer[j*lda]; if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] /= upsilon11; // A02 := A02 - a01 u12 F* A02 = &ABuffer[(j+1)*lda]; const F* u12 = &UBuffer[j+(j+1)*ldu]; blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda ); // alpha11 := alpha11 / |upsilon11|^2 ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11); const F alpha11 = ABuffer[j+j*lda]; // a12 := a12 / conj(upsilon11) F* a12 = &ABuffer[j+(j+1)*lda]; if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] /= Conj(upsilon11); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; // A22 := A22 - (a12' u12 + u12' a12) F* A22 = &ABuffer[(j+1)+(j+1)*lda]; for( Int k=0; k<a21Height; ++k ) a12Conj[k] = Conj(a12[k*lda]); for( Int k=0; k<a21Height; ++k ) u12Conj[k] = Conj(u12[k*ldu]); blas::Her2 ( 'U', a21Height, F(-1), u12Conj.data(), 1, a12Conj.data(), 1, A22, lda ); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; } }
void LUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& L ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldl = L.LDim(); T* ABuffer = A.Buffer(); const T* LBuffer = L.LockedBuffer(); vector<T> a10Conj( n ), l10Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal values of A and L const T alpha11 = ABuffer[j+j*lda]; const T lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] ); // a10 := a10 + (alpha11/2)l10 T* a10 = &ABuffer[j]; const T* l10 = &LBuffer[j]; for( Int k=0; k<j; ++k ) a10[k*lda] += (alpha11/T(2))*l10[k*ldl]; // A00 := A00 + (a10' l10 + l10' a10) T* A00 = ABuffer; for( Int k=0; k<j; ++k ) a10Conj[k] = Conj(a10[k*lda]); for( Int k=0; k<j; ++k ) l10Conj[k] = Conj(l10[k*ldl]); blas::Her2 ( 'L', j, T(1), a10Conj.data(), 1, l10Conj.data(), 1, A00, lda ); // a10 := a10 + (alpha11/2)l10 for( Int k=0; k<j; ++k ) a10[k*lda] += (alpha11/T(2))*l10[k*ldl]; // a10 := conj(lambda11) a10 if( diag != UNIT ) for( Int k=0; k<j; ++k ) a10[k*lda] *= Conj(lambda11); // alpha11 := alpha11 * |lambda11|^2 ABuffer[j+j*lda] *= Conj(lambda11)*lambda11; // A20 := A20 + a21 l10 T* a21 = &ABuffer[(j+1)+j*lda]; T* A20 = &ABuffer[j+1]; blas::Geru( a21Height, j, T(1), a21, 1, l10, ldl, A20, lda ); // a21 := lambda11 a21 if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a21[k] *= lambda11; } }
void MaxEig ( const Matrix<Real>& x, Matrix<Real>& maxEigs, const Matrix<Int>& orders, const Matrix<Int>& firstInds ) { DEBUG_ONLY(CSE cse("soc::MaxEig")) soc::LowerNorms( x, maxEigs, orders, firstInds ); Real* maxEigBuf = maxEigs.Buffer(); const Real* xBuf = x.LockedBuffer(); const Int* firstIndBuf = firstInds.LockedBuffer(); const Int height = x.Height(); for( Int i=0; i<height; ++i ) if( i == firstIndBuf[i] ) maxEigBuf[i] = xBuf[i]+maxEigBuf[i]; }
inline void TwoSidedTrsmLUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmLUnb"); #endif // Use the Variant 4 algorithm const int n = A.Height(); const int lda = A.LDim(); const int ldl = L.LDim(); F* ABuffer = A.Buffer(); const F* LBuffer = L.LockedBuffer(); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal value of L const F lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] ); // a10 := a10 / lambda11 F* a10 = &ABuffer[j]; if( diag != UNIT ) for( int k=0; k<j; ++k ) a10[k*lda] /= lambda11; // A20 := A20 - l21 a10 F* A20 = &ABuffer[j+1]; const F* l21 = &LBuffer[(j+1)+j*ldl]; blas::Geru( a21Height, j, F(-1), l21, 1, a10, lda, A20, lda ); // alpha11 := alpha11 / |lambda11|^2 ABuffer[j+j*lda] /= lambda11*Conj(lambda11); const F alpha11 = ABuffer[j+j*lda]; // a21 := a21 / conj(lambda11) F* a21 = &ABuffer[(j+1)+j*lda]; if( diag != UNIT ) for( int k=0; k<a21Height; ++k ) a21[k] /= Conj(lambda11); // a21 := a21 - (alpha11/2)l21 for( int k=0; k<a21Height; ++k ) a21[k] -= (alpha11/2)*l21[k]; // A22 := A22 - (l21 a21' + a21 l21') F* A22 = &ABuffer[(j+1)+(j+1)*lda]; blas::Her2( 'L', a21Height, F(-1), l21, 1, a21, 1, A22, lda ); // a21 := a21 - (alpha11/2)l21 for( int k=0; k<a21Height; ++k ) a21[k] -= (alpha11/2)*l21[k]; } #ifndef RELEASE PopCallStack(); #endif }
inline void Unb( Matrix<F>& A ) { EL_DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); for( Int j=0; j<Min(m,n); ++j ) { const F alpha = A(j,j); if( alpha == F(0) ) throw SingularMatrixException(); blas::Scal( m-(j+1), F(1)/alpha, A.Buffer(j+1,j), 1 ); blas::Geru ( m-(j+1), n-(j+1), F(-1), A.LockedBuffer(j+1,j), 1, A.LockedBuffer(j,j+1), A.LDim(), A.Buffer(j+1,j+1), A.LDim() ); } }
inline void Hemm ( LeftOrRight side, UpperOrLower uplo, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { #ifndef RELEASE PushCallStack("Hemm"); #endif const char sideChar = LeftOrRightToChar( side ); const char uploChar = UpperOrLowerToChar( uplo ); blas::Hemm ( sideChar, uploChar, C.Height(), C.Width(), alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); #ifndef RELEASE PopCallStack(); #endif }
Real ComplementRatio ( const Matrix<Real>& s, const Matrix<Real>& z ) { DEBUG_CSE const Int k = s.Height(); const Real* sBuf = s.LockedBuffer(); const Real* zBuf = z.LockedBuffer(); Real maxProd = 0; for( Int i=0; i<k; ++i ) maxProd = Max( sBuf[i]*zBuf[i], maxProd ); Real minProd = maxProd; for( Int i=0; i<k; ++i ) minProd = Min( sBuf[i]*zBuf[i], minProd ); return maxProd/minProd; }
void TrrkMKL ( UpperOrLower uplo, Orientation orientA, Orientation orientB, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { EL_DEBUG_CSE const char uploChar = UpperOrLowerToChar( uplo ); const char orientAChar = OrientationToChar( orientA ); const char orientBChar = OrientationToChar( orientB ); const auto n = C.Height(); const auto k = orientA == NORMAL ? A.Width() : A.Height(); mkl::Trrk ( uploChar, orientAChar, orientBChar, n, k, alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); }
inline typename Base<F>::type Nrm2( const Matrix<F>& x ) { #ifndef RELEASE PushCallStack("Nrm2"); if( x.Height() != 1 && x.Width() != 1 ) throw std::logic_error("Expected vector input"); #endif typedef typename Base<F>::type R; R norm; if( x.Width() == 1 ) norm = blas::Nrm2( x.Height(), x.LockedBuffer(), 1 ); else norm = blas::Nrm2( x.Width(), x.LockedBuffer(), x.LDim() ); #ifndef RELEASE PopCallStack(); #endif return norm; }
inline void Unb( Matrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("lu::Unb"); #endif const Int m = A.Height(); const Int n = A.Width(); for( Int j=0; j<Min(m,n); ++j ) { const F alpha = A.Get(j,j); if( alpha == F(0) ) throw SingularMatrixException(); blas::Scal( m-(j+1), 1/alpha, A.Buffer(j+1,j), 1 ); blas::Geru ( m-(j+1), n-(j+1), F(-1), A.LockedBuffer(j+1,j), 1, A.LockedBuffer(j,j+1), A.LDim(), A.Buffer(j+1,j+1), A.LDim() ); } }
void UUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); T* ABuffer = A.Buffer(); const T* UBuffer = U.LockedBuffer(); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal values of A and U const T alpha11 = ABuffer[j+j*lda]; const T upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 + (alpha11/2)u01 T* a01 = &ABuffer[j*lda]; const T* u01 = &UBuffer[j*ldu]; for( Int k=0; k<j; ++k ) a01[k] += (alpha11/T(2))*u01[k]; // A00 := A00 + (u01 a01' + a01 u01') T* A00 = ABuffer; blas::Her2( 'U', j, T(1), u01, 1, a01, 1, A00, lda ); // a01 := a01 + (alpha11/2)u01 for( Int k=0; k<j; ++k ) a01[k] += (alpha11/T(2))*u01[k]; // a01 := conj(upsilon11) a01 if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] *= Conj(upsilon11); // A02 := A02 + u01 a12 T* a12 = &ABuffer[j+(j+1)*lda]; T* A02 = &ABuffer[(j+1)*lda]; blas::Geru( j, a21Height, T(1), u01, 1, a12, lda, A02, lda ); // alpha11 := alpha11 * |upsilon11|^2 ABuffer[j+j*lda] *= Conj(upsilon11)*upsilon11; // a12 := upsilon11 a12 if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] *= upsilon11; } }
Real Max( const Matrix<Real>& A ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const Real* ABuf = A.LockedBuffer(); const Int ALDim = A.LDim(); Real value = limits::Lowest<Real>(); for( Int j=0; j<n; ++j ) for( Int i=0; i<m; ++i ) value = Max(value,ABuf[i+j*ALDim]); return value; }