void UUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); F* ABuffer = A.Buffer(); const F* UBuffer = U.LockedBuffer(); vector<F> a12Conj( n ), u12Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal value of U const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 / upsilon11 F* a01 = &ABuffer[j*lda]; if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] /= upsilon11; // A02 := A02 - a01 u12 F* A02 = &ABuffer[(j+1)*lda]; const F* u12 = &UBuffer[j+(j+1)*ldu]; blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda ); // alpha11 := alpha11 / |upsilon11|^2 ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11); const F alpha11 = ABuffer[j+j*lda]; // a12 := a12 / conj(upsilon11) F* a12 = &ABuffer[j+(j+1)*lda]; if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] /= Conj(upsilon11); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; // A22 := A22 - (a12' u12 + u12' a12) F* A22 = &ABuffer[(j+1)+(j+1)*lda]; for( Int k=0; k<a21Height; ++k ) a12Conj[k] = Conj(a12[k*lda]); for( Int k=0; k<a21Height; ++k ) u12Conj[k] = Conj(u12[k*ldu]); blas::Her2 ( 'U', a21Height, F(-1), u12Conj.data(), 1, a12Conj.data(), 1, A22, lda ); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; } }
inline void TwoSidedTrsmUUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUUnb"); #endif // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const int n = A.Height(); const int lda = A.LDim(); const int ldu = U.LDim(); F* ABuffer = A.Buffer(); const F* UBuffer = U.LockedBuffer(); std::vector<F> a12Conj( n ), u12Conj( n ); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal value of U const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 / upsilon11 F* a01 = &ABuffer[j*lda]; if( diag != UNIT ) for( int k=0; k<j; ++k ) a01[k] /= upsilon11; // A02 := A02 - a01 u12 F* A02 = &ABuffer[(j+1)*lda]; const F* u12 = &UBuffer[j+(j+1)*ldu]; blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda ); // alpha11 := alpha11 / |upsilon11|^2 ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11); const F alpha11 = ABuffer[j+j*lda]; // a12 := a12 / conj(upsilon11) F* a12 = &ABuffer[j+(j+1)*lda]; if( diag != UNIT ) for( int k=0; k<a21Height; ++k ) a12[k*lda] /= Conj(upsilon11); // a12 := a12 - (alpha11/2)u12 for( int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/2)*u12[k*ldu]; // A22 := A22 - (a12' u12 + u12' a12) F* A22 = &ABuffer[(j+1)+(j+1)*lda]; for( int k=0; k<a21Height; ++k ) a12Conj[k] = Conj(a12[k*lda]); for( int k=0; k<a21Height; ++k ) u12Conj[k] = Conj(u12[k*ldu]); blas::Her2 ( 'U', a21Height, F(-1), &u12Conj[0], 1, &a12Conj[0], 1, A22, lda ); // a12 := a12 - (alpha11/2)u12 for( int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/2)*u12[k*ldu]; } #ifndef RELEASE PopCallStack(); #endif }