示例#1
0
void UUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
{
    EL_DEBUG_CSE
    // Use the Variant 4 algorithm
    // (which annoyingly requires conjugations for the Her2)
    const Int n = A.Height();
    const Int lda = A.LDim();
    const Int ldu = U.LDim();
    F* ABuffer = A.Buffer();
    const F* UBuffer = U.LockedBuffer();
    vector<F> a12Conj( n ), u12Conj( n );
    for( Int j=0; j<n; ++j )
    {
        const Int a21Height = n - (j+1);

        // Extract and store the diagonal value of U
        const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] );

        // a01 := a01 / upsilon11
        F* a01 = &ABuffer[j*lda];
        if( diag != UNIT )
            for( Int k=0; k<j; ++k )
                a01[k] /= upsilon11;

        // A02 := A02 - a01 u12
        F* A02 = &ABuffer[(j+1)*lda];
        const F* u12 = &UBuffer[j+(j+1)*ldu];
        blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda );

        // alpha11 := alpha11 / |upsilon11|^2
        ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11);
        const F alpha11 = ABuffer[j+j*lda];

        // a12 := a12 / conj(upsilon11)
        F* a12 = &ABuffer[j+(j+1)*lda];
        if( diag != UNIT )
            for( Int k=0; k<a21Height; ++k )
                a12[k*lda] /= Conj(upsilon11);

        // a12 := a12 - (alpha11/2)u12
        for( Int k=0; k<a21Height; ++k )
            a12[k*lda] -= (alpha11/F(2))*u12[k*ldu];

        // A22 := A22 - (a12' u12 + u12' a12)
        F* A22 = &ABuffer[(j+1)+(j+1)*lda];
        for( Int k=0; k<a21Height; ++k )
            a12Conj[k] = Conj(a12[k*lda]);
        for( Int k=0; k<a21Height; ++k )
            u12Conj[k] = Conj(u12[k*ldu]);
        blas::Her2
        ( 'U', a21Height,
          F(-1), u12Conj.data(), 1, a12Conj.data(), 1, A22, lda );

        // a12 := a12 - (alpha11/2)u12
        for( Int k=0; k<a21Height; ++k )
            a12[k*lda] -= (alpha11/F(2))*u12[k*ldu];
    }
}
示例#2
0
inline void 
TwoSidedTrsmUUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U )
{
#ifndef RELEASE
    PushCallStack("internal::TwoSidedTrsmUUnb");
#endif
    // Use the Variant 4 algorithm
    // (which annoyingly requires conjugations for the Her2)
    const int n = A.Height();
    const int lda = A.LDim();
    const int ldu = U.LDim();
    F* ABuffer = A.Buffer();
    const F* UBuffer = U.LockedBuffer();
    std::vector<F> a12Conj( n ), u12Conj( n );
    for( int j=0; j<n; ++j )
    {
        const int a21Height = n - (j+1);

        // Extract and store the diagonal value of U
        const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] );

        // a01 := a01 / upsilon11
        F* a01 = &ABuffer[j*lda];
        if( diag != UNIT )
            for( int k=0; k<j; ++k )
                a01[k] /= upsilon11;

        // A02 := A02 - a01 u12
        F* A02 = &ABuffer[(j+1)*lda];
        const F* u12 = &UBuffer[j+(j+1)*ldu];
        blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda );

        // alpha11 := alpha11 / |upsilon11|^2
        ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11);
        const F alpha11 = ABuffer[j+j*lda];

        // a12 := a12 / conj(upsilon11)
        F* a12 = &ABuffer[j+(j+1)*lda];
        if( diag != UNIT )
            for( int k=0; k<a21Height; ++k )
                a12[k*lda] /= Conj(upsilon11);

        // a12 := a12 - (alpha11/2)u12
        for( int k=0; k<a21Height; ++k )
            a12[k*lda] -= (alpha11/2)*u12[k*ldu];

        // A22 := A22 - (a12' u12 + u12' a12)
        F* A22 = &ABuffer[(j+1)+(j+1)*lda];
        for( int k=0; k<a21Height; ++k )
            a12Conj[k] = Conj(a12[k*lda]);
        for( int k=0; k<a21Height; ++k )
            u12Conj[k] = Conj(u12[k*ldu]);
        blas::Her2
        ( 'U', a21Height, F(-1), &u12Conj[0], 1, &a12Conj[0], 1, A22, lda );

        // a12 := a12 - (alpha11/2)u12
        for( int k=0; k<a21Height; ++k )
            a12[k*lda] -= (alpha11/2)*u12[k*ldu];
    }
#ifndef RELEASE
    PopCallStack();
#endif
}