void LLNUnb( UnitOrNonUnit diag, F alpha, const Matrix<F>& L, Matrix<F>& X ) { DEBUG_CSE const bool isUnit = ( diag==UNIT ); const Int n = L.Height(); const Int LLDim = L.LDim(); const Int XLDim = X.LDim(); const F* LBuffer = L.LockedBuffer(); F* XBuffer = X.Buffer(); // X := alpha X if( alpha != F(1) ) for( Int j=0; j<n; ++j ) for( Int i=j; i<n; ++i ) XBuffer[i+j*XLDim] *= alpha; for( Int i=0; i<n; ++i ) { if( !isUnit ) { const F lambda11 = LBuffer[i+i*LLDim]; for( Int j=0; j<i; ++j ) XBuffer[i+j*XLDim] /= lambda11; XBuffer[i+i*XLDim] /= lambda11; } const Int l21Height = n - (i+1); const F* l21 = &LBuffer[(i+1)+i*LLDim]; const F* x1L = &XBuffer[i]; F* X2L = &XBuffer[i+1]; blas::Geru( l21Height, i+1, F(-1), l21, 1, x1L, XLDim, X2L, XLDim ); } }
void Axpy( S alphaS, const Matrix<T>& X, Matrix<T>& Y ) { EL_DEBUG_CSE const T alpha = T(alphaS); const Int mX = X.Height(); const Int nX = X.Width(); const Int nY = Y.Width(); const Int ldX = X.LDim(); const Int ldY = Y.LDim(); const T* XBuf = X.LockedBuffer(); T* YBuf = Y.Buffer(); // If X and Y are vectors, we can allow one to be a column and the other // to be a row. Otherwise we force X and Y to be the same dimension. if( mX == 1 || nX == 1 ) { const Int XLength = ( nX==1 ? mX : nX ); const Int XStride = ( nX==1 ? 1 : ldX ); const Int YStride = ( nY==1 ? 1 : ldY ); EL_DEBUG_ONLY( const Int mY = Y.Height(); const Int YLength = ( nY==1 ? mY : nY ); if( XLength != YLength ) LogicError("Nonconformal Axpy"); ) blas::Axpy( XLength, alpha, XBuf, XStride, YBuf, YStride ); }
inline void Hemv ( UpperOrLower uplo, T alpha, const Matrix<T>& A, const Matrix<T>& x, T beta, Matrix<T>& y ) { #ifndef RELEASE PushCallStack("Hemv"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( ( x.Height() != 1 && x.Width() != 1 ) || ( y.Height() != 1 && y.Width() != 1 ) ) throw std::logic_error("x and y must be vectors"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( A.Height() != xLength || A.Height() != yLength ) throw std::logic_error("A must conform with x and y"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); const int incy = ( y.Width()==1 ? 1 : y.LDim() ); blas::Hemv ( uploChar, m, alpha, A.LockedBuffer(), A.LDim(), x.LockedBuffer(), incx, beta, y.Buffer(), incy ); #ifndef RELEASE PopCallStack(); #endif }
inline void Syr ( UpperOrLower uplo, T alpha, const Matrix<T>& x, Matrix<T>& A, bool conjugate=false ) { #ifndef RELEASE PushCallStack("Syr"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( x.Width() != 1 && x.Height() != 1 ) throw std::logic_error("x must be a vector"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); if( xLength != A.Height() ) throw std::logic_error("x must conform with A"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); if( conjugate ) { blas::Her ( uploChar, m, alpha, x.LockedBuffer(), incx, A.Buffer(), A.LDim() ); } else { blas::Syr ( uploChar, m, alpha, x.LockedBuffer(), incx, A.Buffer(), A.LDim() ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Trmv ( UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, const Matrix<T>& A, Matrix<T>& x ) { #ifndef RELEASE PushCallStack("Trmv"); if( x.Height() != 1 && x.Width() != 1 ) throw std::logic_error("x must be a vector"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); if( xLength != A.Height() ) throw std::logic_error("x must conform with A"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const char diagChar = UnitOrNonUnitToChar( diag ); const int m = A.Height(); const int incx = ( x.Width()==1 ? 1 : x.LDim() ); blas::Trmv ( uploChar, transChar, diagChar, m, A.LockedBuffer(), A.LDim(), x.Buffer(), incx ); #ifndef RELEASE PopCallStack(); #endif }
inline void Trmm ( LeftOrRight side, UpperOrLower uplo, Orientation orientation, UnitOrNonUnit diag, T alpha, const Matrix<T>& A, Matrix<T>& B ) { #ifndef RELEASE CallStackEntry entry("Trmm"); if( A.Height() != A.Width() ) LogicError("Triangular matrix must be square"); if( side == LEFT ) { if( A.Height() != B.Height() ) LogicError("Nonconformal Trmm"); } else { if( A.Height() != B.Width() ) LogicError("Nonconformal Trmm"); } #endif const char sideChar = LeftOrRightToChar( side ); const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const char diagChar = UnitOrNonUnitToChar( diag ); blas::Trmm ( sideChar, uploChar, transChar, diagChar, B.Height(), B.Width(), alpha, A.LockedBuffer(), A.LDim(), B.Buffer(), B.LDim() ); }
void TransposeAxpy ( S alphaS, const Matrix<T>& X, Matrix<T>& Y, bool conjugate ) { DEBUG_CSE const T alpha = T(alphaS); const Int mX = X.Height(); const Int nX = X.Width(); const Int nY = Y.Width(); const Int ldX = X.LDim(); const Int ldY = Y.LDim(); const T* XBuf = X.LockedBuffer(); T* YBuf = Y.Buffer(); // If X and Y are vectors, we can allow one to be a column and the other // to be a row. Otherwise we force X and Y to be the same dimension. if( mX == 1 || nX == 1 ) { const Int lengthX = ( nX==1 ? mX : nX ); const Int incX = ( nX==1 ? 1 : ldX ); const Int incY = ( nY==1 ? 1 : ldY ); DEBUG_ONLY( const Int mY = Y.Height(); const Int lengthY = ( nY==1 ? mY : nY ); if( lengthX != lengthY ) LogicError("Nonconformal TransposeAxpy"); ) if( conjugate ) for( Int j=0; j<lengthX; ++j ) YBuf[j*incY] += alpha*Conj(XBuf[j*incX]); else blas::Axpy( lengthX, alpha, XBuf, incX, YBuf, incY ); }
inline void Syrk ( UpperOrLower uplo, Orientation orientation, T alpha, const Matrix<T>& A, T beta, Matrix<T>& C ) { #ifndef RELEASE PushCallStack("Syrk"); if( orientation == NORMAL ) { if( A.Height() != C.Height() || A.Height() != C.Width() ) throw std::logic_error("Nonconformal Syrk"); } else if( orientation == TRANSPOSE ) { if( A.Width() != C.Height() || A.Width() != C.Width() ) throw std::logic_error("Nonconformal Syrk"); } else throw std::logic_error ("Syrk only accepts NORMAL and TRANSPOSE options"); #endif const char uploChar = UpperOrLowerToChar( uplo ); const char transChar = OrientationToChar( orientation ); const int k = ( orientation == NORMAL ? A.Width() : A.Height() ); blas::Syrk ( uploChar, transChar, C.Height(), k, alpha, A.LockedBuffer(), A.LDim(), beta, C.Buffer(), C.LDim() ); #ifndef RELEASE PopCallStack(); #endif }
inline void Ger( T alpha, const Matrix<T>& x, const Matrix<T>& y, Matrix<T>& A ) { #ifndef RELEASE CallStackEntry entry("Ger"); if( ( x.Height() != 1 && x.Width() != 1 ) || ( y.Height() != 1 && y.Width() != 1 ) ) LogicError("x and y must be vectors"); const Int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const Int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( xLength != A.Height() || yLength != A.Width() ) { std::ostringstream msg; msg << "Nonconformal Ger:\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width() << "\n" << " A ~ " << A.Height() << " x " << A.Width(); LogicError( msg.str() ); } #endif const Int m = A.Height(); const Int n = A.Width(); const Int incx = ( x.Width()==1 ? 1 : x.LDim() ); const Int incy = ( y.Width()==1 ? 1 : y.LDim() ); blas::Ger ( m, n, alpha, x.LockedBuffer(), incx, y.LockedBuffer(), incy, A.Buffer(), A.LDim() ); }
void UUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); F* ABuffer = A.Buffer(); const F* UBuffer = U.LockedBuffer(); vector<F> a12Conj( n ), u12Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal value of U const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 / upsilon11 F* a01 = &ABuffer[j*lda]; if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] /= upsilon11; // A02 := A02 - a01 u12 F* A02 = &ABuffer[(j+1)*lda]; const F* u12 = &UBuffer[j+(j+1)*ldu]; blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda ); // alpha11 := alpha11 / |upsilon11|^2 ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11); const F alpha11 = ABuffer[j+j*lda]; // a12 := a12 / conj(upsilon11) F* a12 = &ABuffer[j+(j+1)*lda]; if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] /= Conj(upsilon11); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; // A22 := A22 - (a12' u12 + u12' a12) F* A22 = &ABuffer[(j+1)+(j+1)*lda]; for( Int k=0; k<a21Height; ++k ) a12Conj[k] = Conj(a12[k*lda]); for( Int k=0; k<a21Height; ++k ) u12Conj[k] = Conj(u12[k*ldu]); blas::Her2 ( 'U', a21Height, F(-1), u12Conj.data(), 1, a12Conj.data(), 1, A22, lda ); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; } }
void LUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& L ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldl = L.LDim(); T* ABuffer = A.Buffer(); const T* LBuffer = L.LockedBuffer(); vector<T> a10Conj( n ), l10Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal values of A and L const T alpha11 = ABuffer[j+j*lda]; const T lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] ); // a10 := a10 + (alpha11/2)l10 T* a10 = &ABuffer[j]; const T* l10 = &LBuffer[j]; for( Int k=0; k<j; ++k ) a10[k*lda] += (alpha11/T(2))*l10[k*ldl]; // A00 := A00 + (a10' l10 + l10' a10) T* A00 = ABuffer; for( Int k=0; k<j; ++k ) a10Conj[k] = Conj(a10[k*lda]); for( Int k=0; k<j; ++k ) l10Conj[k] = Conj(l10[k*ldl]); blas::Her2 ( 'L', j, T(1), a10Conj.data(), 1, l10Conj.data(), 1, A00, lda ); // a10 := a10 + (alpha11/2)l10 for( Int k=0; k<j; ++k ) a10[k*lda] += (alpha11/T(2))*l10[k*ldl]; // a10 := conj(lambda11) a10 if( diag != UNIT ) for( Int k=0; k<j; ++k ) a10[k*lda] *= Conj(lambda11); // alpha11 := alpha11 * |lambda11|^2 ABuffer[j+j*lda] *= Conj(lambda11)*lambda11; // A20 := A20 + a21 l10 T* a21 = &ABuffer[(j+1)+j*lda]; T* A20 = &ABuffer[j+1]; blas::Geru( a21Height, j, T(1), a21, 1, l10, ldl, A20, lda ); // a21 := lambda11 a21 if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a21[k] *= lambda11; } }
inline void TwoSidedTrsmLUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmLUnb"); #endif // Use the Variant 4 algorithm const int n = A.Height(); const int lda = A.LDim(); const int ldl = L.LDim(); F* ABuffer = A.Buffer(); const F* LBuffer = L.LockedBuffer(); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal value of L const F lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] ); // a10 := a10 / lambda11 F* a10 = &ABuffer[j]; if( diag != UNIT ) for( int k=0; k<j; ++k ) a10[k*lda] /= lambda11; // A20 := A20 - l21 a10 F* A20 = &ABuffer[j+1]; const F* l21 = &LBuffer[(j+1)+j*ldl]; blas::Geru( a21Height, j, F(-1), l21, 1, a10, lda, A20, lda ); // alpha11 := alpha11 / |lambda11|^2 ABuffer[j+j*lda] /= lambda11*Conj(lambda11); const F alpha11 = ABuffer[j+j*lda]; // a21 := a21 / conj(lambda11) F* a21 = &ABuffer[(j+1)+j*lda]; if( diag != UNIT ) for( int k=0; k<a21Height; ++k ) a21[k] /= Conj(lambda11); // a21 := a21 - (alpha11/2)l21 for( int k=0; k<a21Height; ++k ) a21[k] -= (alpha11/2)*l21[k]; // A22 := A22 - (l21 a21' + a21 l21') F* A22 = &ABuffer[(j+1)+(j+1)*lda]; blas::Her2( 'L', a21Height, F(-1), l21, 1, a21, 1, A22, lda ); // a21 := a21 - (alpha11/2)l21 for( int k=0; k<a21Height; ++k ) a21[k] -= (alpha11/2)*l21[k]; } #ifndef RELEASE PopCallStack(); #endif }
void UUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); T* ABuffer = A.Buffer(); const T* UBuffer = U.LockedBuffer(); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal values of A and U const T alpha11 = ABuffer[j+j*lda]; const T upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 + (alpha11/2)u01 T* a01 = &ABuffer[j*lda]; const T* u01 = &UBuffer[j*ldu]; for( Int k=0; k<j; ++k ) a01[k] += (alpha11/T(2))*u01[k]; // A00 := A00 + (u01 a01' + a01 u01') T* A00 = ABuffer; blas::Her2( 'U', j, T(1), u01, 1, a01, 1, A00, lda ); // a01 := a01 + (alpha11/2)u01 for( Int k=0; k<j; ++k ) a01[k] += (alpha11/T(2))*u01[k]; // a01 := conj(upsilon11) a01 if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] *= Conj(upsilon11); // A02 := A02 + u01 a12 T* a12 = &ABuffer[j+(j+1)*lda]; T* A02 = &ABuffer[(j+1)*lda]; blas::Geru( j, a21Height, T(1), u01, 1, a12, lda, A02, lda ); // alpha11 := alpha11 * |upsilon11|^2 ABuffer[j+j*lda] *= Conj(upsilon11)*upsilon11; // a12 := upsilon11 a12 if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] *= upsilon11; } }
void EntrywiseMap( const Matrix<S>& A, Matrix<T>& B, function<T(S)> func ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const S* ABuf = A.LockedBuffer(); const Int ALDim = A.LDim(); B.Resize( m, n ); T* BBuf = B.Buffer(); const Int BLDim = B.LDim(); for( Int j=0; j<n; ++j ) for( Int i=0; i<m; ++i ) BBuf[i+j*BLDim] = func(ABuf[i+j*ALDim]); }
inline void CholeskyUVar3Unb( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyUVar3Unb"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif typedef typename Base<F>::type R; const int n = A.Height(); const int lda = A.LDim(); F* ABuffer = A.Buffer(); for( int j=0; j<n; ++j ) { R alpha = RealPart(ABuffer[j+j*lda]); if( alpha <= R(0) ) throw std::logic_error("A was not numerically HPD"); alpha = Sqrt( alpha ); ABuffer[j+j*lda] = alpha; for( int k=j+1; k<n; ++k ) ABuffer[j+k*lda] /= alpha; for( int k=j+1; k<n; ++k ) for( int i=j+1; i<=k; ++i ) ABuffer[i+k*lda] -= Conj(ABuffer[j+i*lda])*ABuffer[j+k*lda]; } #ifndef RELEASE PopCallStack(); #endif }
inline void DivideAndConquerSVD ( Matrix<F>& A, Matrix<typename Base<F>::type>& s, Matrix<F>& V ) { #ifndef RELEASE PushCallStack("svd::DivideAndConquerSVD"); #endif typedef typename Base<F>::type R; const int m = A.Height(); const int n = A.Width(); const int k = std::min(m,n); s.ResizeTo( k, 1 ); Matrix<F> U( m, k ); Matrix<F> VAdj( k, n ); lapack::DivideAndConquerSVD ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), VAdj.Buffer(), VAdj.LDim() ); A = U; Adjoint( VAdj, V ); #ifndef RELEASE PopCallStack(); #endif }
void Transform2x2( const Matrix<T>& G, Matrix<T>& a1, Matrix<T>& a2 ) { DEBUG_CSE T* a1Buf = a1.Buffer(); T* a2Buf = a2.Buffer(); const Int inc1 = ( a1.Height() == 1 ? a1.LDim() : 1 ); const Int inc2 = ( a2.Height() == 1 ? a2.LDim() : 1 ); const Int n = ( a1.Height() == 1 ? a1.Width() : a1.Height() ); const T gamma11 = G.Get(0,0); const T gamma12 = G.Get(0,1); const T gamma21 = G.Get(1,0); const T gamma22 = G.Get(1,1); Transform2x2 ( n, gamma11, gamma12, gamma21, gamma22, a1Buf, inc1, a2Buf, inc2 ); }
inline void UVar3Unb( UnitOrNonUnit diag, Matrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("triangular_inverse::UVar3Unb"); if( U.Height() != U.Width() ) LogicError("Nonsquare matrices cannot be triangular"); #endif const Int n = U.Height(); const Int ldu = U.LDim(); F* UBuffer = U.Buffer(); for( Int j=n-1; j>=0; --j ) { const F upsilon = ( diag==NON_UNIT ? UBuffer[j+j*ldu] : F(1) ); for( Int k=0; k<j; ++k ) UBuffer[k+j*ldu] /= -upsilon; blas::Geru ( j, n-(j+1), F(1), &UBuffer[j*ldu], 1, &UBuffer[j+(j+1)*ldu], ldu, &UBuffer[(j+1)*ldu], ldu ); if( diag == NON_UNIT ) { for( Int k=j+1; k<n; ++k ) UBuffer[j+k*ldu] /= upsilon; UBuffer[j+j*ldu] = F(1) / UBuffer[j+j*ldu]; } } }
inline void Binary( Matrix<T>& A, const string filename ) { EL_DEBUG_CSE std::ifstream file( filename.c_str(), std::ios::binary ); if( !file.is_open() ) RuntimeError("Could not open ",filename); Int height, width; file.read( (char*)&height, sizeof(Int) ); file.read( (char*)&width, sizeof(Int) ); const Int numBytes = FileSize( file ); const Int metaBytes = 2*sizeof(Int); const Int dataBytes = height*width*sizeof(T); const Int numBytesExp = metaBytes + dataBytes; if( numBytes != numBytesExp ) RuntimeError ("Expected file to be ",numBytesExp," bytes but found ",numBytes); A.Resize( height, width ); if( A.Height() == A.LDim() ) file.read( (char*)A.Buffer(), height*width*sizeof(T) ); else for( Int j=0; j<width; ++j ) file.read( (char*)A.Buffer(0,j), height*sizeof(T) ); }
inline void TriangularInverseUVar3Unb( UnitOrNonUnit diag, Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TriangularInverseUVar3Unb"); if( U.Height() != U.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const int n = U.Height(); const int ldu = U.LDim(); F* UBuffer = U.Buffer(); for( int j=n-1; j>=0; --j ) { const F upsilon = ( diag==NON_UNIT ? UBuffer[j+j*ldu] : F(1) ); for( int k=0; k<j; ++k ) UBuffer[k+j*ldu] /= -upsilon; blas::Geru ( j, n-(j+1), F(1), &UBuffer[j*ldu], 1, &UBuffer[j+(j+1)*ldu], ldu, &UBuffer[(j+1)*ldu], ldu ); if( diag == NON_UNIT ) { for( int k=j+1; k<n; ++k ) UBuffer[j+k*ldu] /= upsilon; UBuffer[j+j*ldu] = F(1) / UBuffer[j+j*ldu]; } } #ifndef RELEASE PopCallStack(); #endif }
void FusedRowPanelGemvs ( bool conjugate, T alpha, const Matrix<T>& A, const Matrix<T>& q, const Matrix<T>& r, Matrix<T>& s, Matrix<T>& t, Int bsize ) { const Int m = A.Height(); const Int n = A.Width(); const char transChar = ( conjugate ? 'C' : 'T' ); const T* ABuf = A.LockedBuffer(); const T* qBuf = q.LockedBuffer(); const T* rBuf = r.LockedBuffer(); T* sBuf = s.Buffer(); T* tBuf = t.Buffer(); const Int ALDim = A.LDim(); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(n-k,bsize); blas::Gemv ( 'N', m, nb, alpha, &ABuf[k*ALDim], ALDim, &qBuf[k], 1, T(1), sBuf, 1 ); blas::Gemv ( transChar, m, nb, alpha, &ABuf[k*ALDim], ALDim, rBuf, 1, T(1), &tBuf[k], 1 ); } }
inline void Unb( Matrix<F>& A ) { EL_DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); for( Int j=0; j<Min(m,n); ++j ) { const F alpha = A(j,j); if( alpha == F(0) ) throw SingularMatrixException(); blas::Scal( m-(j+1), F(1)/alpha, A.Buffer(j+1,j), 1 ); blas::Geru ( m-(j+1), n-(j+1), F(-1), A.LockedBuffer(j+1,j), 1, A.LockedBuffer(j,j+1), A.LDim(), A.Buffer(j+1,j+1), A.LDim() ); } }
inline void Hemm ( LeftOrRight side, UpperOrLower uplo, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { #ifndef RELEASE PushCallStack("Hemm"); #endif const char sideChar = LeftOrRightToChar( side ); const char uploChar = UpperOrLowerToChar( uplo ); blas::Hemm ( sideChar, uploChar, C.Height(), C.Width(), alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); #ifndef RELEASE PopCallStack(); #endif }
void TrrkMKL ( UpperOrLower uplo, Orientation orientA, Orientation orientB, T alpha, const Matrix<T>& A, const Matrix<T>& B, T beta, Matrix<T>& C ) { EL_DEBUG_CSE const char uploChar = UpperOrLowerToChar( uplo ); const char orientAChar = OrientationToChar( orientA ); const char orientBChar = OrientationToChar( orientB ); const auto n = C.Height(); const auto k = orientA == NORMAL ? A.Width() : A.Height(); mkl::Trrk ( uploChar, orientAChar, orientBChar, n, k, alpha, A.LockedBuffer(), A.LDim(), B.LockedBuffer(), B.LDim(), beta, C.Buffer(), C.LDim() ); }
inline T Dotu( const Matrix<T>& x, const Matrix<T>& y ) { #ifndef RELEASE PushCallStack("Dotu"); if( (x.Height() != 1 && x.Width() != 1) || (y.Height() != 1 && y.Width() != 1) ) throw std::logic_error("Expected vector inputs"); int xLength = ( x.Width() == 1 ? x.Height() : x.Width() ); int yLength = ( y.Width() == 1 ? y.Height() : y.Width() ); if( xLength != yLength ) throw std::logic_error("x and y must be the same length"); #endif T dotProduct; if( x.Width() == 1 && y.Width() == 1 ) { dotProduct = blas::Dotu ( x.Height(), x.LockedBuffer(), 1, y.LockedBuffer(), 1 ); } else if( x.Width() == 1 ) { dotProduct = blas::Dotu ( x.Height(), x.LockedBuffer(), 1, y.LockedBuffer(), y.LDim() ); } else if( y.Width() == 1 ) { dotProduct = blas::Dotu ( x.Width(), x.LockedBuffer(), x.LDim(), y.LockedBuffer(), 1 ); } else { dotProduct = blas::Dotu ( x.Width(), x.LockedBuffer(), x.LDim(), y.LockedBuffer(), y.LDim() ); } #ifndef RELEASE PopCallStack(); #endif return dotProduct; }
inline void TrtrmmUUnblocked( Orientation orientation, Matrix<T>& U ) { #ifndef RELEASE PushCallStack("internal::TrtrmmUUnblocked"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( orientation == NORMAL ) throw std::logic_error("Trtrmm requires (conjugate-)transpose"); #endif const int n = U.Height(); T* UBuffer = U.Buffer(); const int ldim = U.LDim(); for( int j=0; j<n; ++j ) { T* RESTRICT u01 = &UBuffer[j*ldim]; if( orientation == ADJOINT ) { // U00 := U00 + u01 u01^H for( int k=0; k<j; ++k ) { const T gamma = Conj(u01[k]); T* RESTRICT U00Col = &UBuffer[k*ldim]; for( int i=0; i<=k; ++i ) U00Col[i] += u01[i]*gamma; } } else { // U00 := U00 + u01 u01^T for( int k=0; k<j; ++k ) { const T gamma = u01[k]; T* RESTRICT U00Col = &UBuffer[k*ldim]; for( int i=0; i<=k; ++i ) U00Col[i] += u01[i]*gamma; } } // u01 := u01 upsilon11 const T upsilon11 = UBuffer[j+j*ldim]; for( int k=0; k<j; ++k ) u01[k] *= upsilon11; // upsilon11 := upsilon11^2 or |upsilon11|^2 if( orientation == ADJOINT ) UBuffer[j+j*ldim] = upsilon11*Conj(upsilon11); else UBuffer[j+j*ldim] = upsilon11*upsilon11; } #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrmmLUnblocked( Orientation orientation, Matrix<T>& L ) { #ifndef RELEASE PushCallStack("internal::TrtrmmLUnblocked"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( orientation == NORMAL ) throw std::logic_error("Trtrmm requires (conjugate-)transpose"); #endif const int n = L.Height(); T* LBuffer = L.Buffer(); const int ldim = L.LDim(); for( int j=0; j<n; ++j ) { T* RESTRICT l10 = &LBuffer[j]; if( orientation == ADJOINT ) { // L00 := L00 + l10^H l10 for( int k=0; k<j; ++k ) { const T gamma = l10[k*ldim]; T* RESTRICT L00Col = &LBuffer[k*ldim]; for( int i=k; i<j; ++i ) L00Col[i] += Conj(l10[i*ldim])*gamma; } } else { // L00 := L00 + l10^T l10 for( int k=0; k<j; ++k ) { const T gamma = l10[k*ldim]; T* RESTRICT L00Col = &LBuffer[k*ldim]; for( int i=k; i<j; ++i ) L00Col[i] += l10[i*ldim]*gamma; } } // l10 := l10 lambda11 const T lambda11 = LBuffer[j+j*ldim]; for( int k=0; k<j; ++k ) l10[k*ldim] *= lambda11; // lambda11 := lambda11^2 or |lambda11|^2 if( orientation == ADJOINT ) LBuffer[j+j*ldim] = lambda11*Conj(lambda11); else LBuffer[j+j*ldim] = lambda11*lambda11; } #ifndef RELEASE PopCallStack(); #endif }
void EntrywiseMap( Matrix<T>& A, function<T(T)> func ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); T* ABuf = A.Buffer(); const Int ALDim = A.LDim(); for( Int j=0; j<n; ++j ) for( Int i=0; i<m; ++i ) ABuf[i+j*ALDim] = func(ABuf[i+j*ALDim]); }
void Round( Matrix<T>& A ) { EL_DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); T* ABuf = A.Buffer(); const Int ALDim = A.LDim(); for( Int j=0; j<n; ++j ) for( Int i=0; i<m; ++i ) ABuf[i+j*ALDim] = Round(ABuf[i+j*ALDim]); }
inline void SVD( Matrix<F>& A, Matrix<BASE(F)>& s ) { #ifndef RELEASE CallStackEntry entry("SVD"); #endif const Int m = A.Height(); const Int n = A.Width(); s.ResizeTo( Min(m,n), 1 ); lapack::SVD( m, n, A.Buffer(), A.LDim(), s.Buffer() ); }