Exemplo n.º 1
0
inline void
PanelHouseholder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t )
{
#ifndef RELEASE
    CallStackEntry entry("lq::PanelHouseholder");
    if( A.Grid() != t.Grid() )
        LogicError("{A,t} must be distributed over the same grid");
    if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 )
        LogicError
        ("t must be a vector of height equal to the minimum dimension of A");
    if( !t.AlignedWithDiagonal( A, 0 ) )
        LogicError("t must be aligned with A's main diagonal");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<F>
        ATL(g), ATR(g),  A00(g), a01(g),     A02(g),  aTopRow(g), ABottomPan(g),
        ABL(g), ABR(g),  a10(g), alpha11(g), a12(g),
                         A20(g), a21(g),     A22(g);
    DistMatrix<F,MD,STAR>
        tT(g),  t0(g),
        tB(g),  tau1(g),
                t2(g);

    // Temporary distributions
    DistMatrix<F> aTopRowConj(g);
    DistMatrix<F,STAR,MR  > aTopRowConj_STAR_MR(g);
    DistMatrix<F,MC,  STAR> z_MC_STAR(g);

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( t, tT,
         tB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22, 1 );

        RepartitionDown
        ( tT,  t0,
         /**/ /****/
               tau1,
          tB,  t2, 1 );

        View1x2( aTopRow, alpha11, a12 );
        View1x2( ABottomPan, a21, A22 );

        aTopRowConj_STAR_MR.AlignWith( ABottomPan );
        z_MC_STAR.AlignWith( ABottomPan );
        //--------------------------------------------------------------------//
        // Compute the Householder reflector
        const F tau = Reflector( alpha11, a12 );
        tau1.Set( 0, 0, tau );

        // Apply the Householder reflector
        const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() &&
                                       g.Col() == alpha11.RowAlignment() );
        F alpha = 0;
        if( myDiagonalEntry )
        {
            alpha = alpha11.GetLocal(0,0);
            alpha11.SetLocal(0,0,1);
        }
        Conjugate( aTopRow, aTopRowConj );
        aTopRowConj_STAR_MR = aTopRowConj;
        Zeros( z_MC_STAR, ABottomPan.Height(), 1 );
        LocalGemv
        ( NORMAL, F(1), ABottomPan, aTopRowConj_STAR_MR, F(0), z_MC_STAR );
        z_MC_STAR.SumOverRow();
        Ger
        ( -Conj(tau),
          z_MC_STAR.LockedMatrix(),
          aTopRowConj_STAR_MR.LockedMatrix(),
          ABottomPan.Matrix() );
        if( myDiagonalEntry )
            alpha11.SetLocal(0,0,alpha);
        //--------------------------------------------------------------------//

        SlidePartitionDown
        ( tT,  t0,
               tau1,
         /**/ /****/
          tB,  t2 );

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );
    }
}
Exemplo n.º 2
0
void UUnb
( AbstractDistMatrix<F>& APre,
  AbstractDistMatrix<F>& householderScalarsPre )
{
    DEBUG_CSE

    DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre );
    DistMatrixWriteProxy<F,F,STAR,STAR>
      householderScalarsProx( householderScalarsPre );
    auto& A = AProx.Get();
    auto& householderScalars = householderScalarsProx.Get();

    const Grid& g = A.Grid();
    const Int n = A.Height();
    const Int householderScalarsHeight = Max(n-1,0);
    householderScalars.SetGrid( g );
    householderScalars.Resize( householderScalarsHeight, 1 );

    DistMatrix<F,MC,STAR> a21_MC(g);
    DistMatrix<F,MR,STAR> a21_MR(g);
    DistMatrix<F,MC,STAR> x1_MC(g); 
    DistMatrix<F,MR,STAR> x12Adj_MR(g);

    for( Int k=0; k<n-1; ++k )
    {
        const Range<Int> ind1( k,   k+1 ),
                         ind2( k+1, n   );

        auto a21 = A( ind2,    ind1 );
        auto A22 = A( ind2,    ind2 );
        auto A2  = A( IR(0,n), ind2 );

        auto alpha21T = A( IR(k+1,k+2), ind1 );
        auto a21B     = A( IR(k+2,n),   ind1 );

        // Find tau and v such that
        //  / I - tau | 1 | | 1, v^H | \ | alpha21T | = | beta |
        //  \         | v |            / |     a21B |   |    0 |
        const F tau = LeftReflector( alpha21T, a21B );
        householderScalars.Set(k,0,tau);

        // Temporarily set a21 := | 1 |
        //                        | v |
        F beta=0;
        if( alpha21T.IsLocal(0,0) )
            beta = alpha21T.GetLocal(0,0);
        alpha21T.Set(0,0,F(1));

        // A2 := A2 Hous(a21,tau)^H
        //     = A2 (I - conj(tau) a21 a21^H)
        //     = A2 - conj(tau) (A2 a21) a21^H
        // -----------------------------------
        // x1 := A2 a21
        a21_MR.AlignWith( A2 );
        a21_MR = a21;
        x1_MC.AlignWith( A2 );
        Zeros( x1_MC, n, 1 );
        LocalGemv( NORMAL, F(1), A2, a21_MR, F(0), x1_MC );
        El::AllReduce( x1_MC, A2.RowComm() );
        // A2 := A2 - conj(tau) x1 a21^H
        LocalGer( -Conj(tau), x1_MC, a21_MR, A2 ); 

        // A22 := Hous(a21,tau) A22
        //      = (I - tau a21 a21^H) A22
        //      = A22 - tau a21 (A22^H a21)^H
        // ----------------------------------
        // x12^H := (a21^H A22)^H = A22^H a21
        a21_MC.AlignWith( A22 );
        a21_MC = a21;
        x12Adj_MR.AlignWith( A22 );
        Zeros( x12Adj_MR, A22.Width(), 1 );
        LocalGemv( ADJOINT, F(1), A22, a21_MC, F(0), x12Adj_MR );
        El::AllReduce( x12Adj_MR, A22.ColComm() );
        // A22 := A22 - tau a21 x12
        LocalGer( -tau, a21_MC, x12Adj_MR, A22 );

        // Put beta back 
        if( alpha21T.IsLocal(0,0) )
            alpha21T.SetLocal(0,0,beta);
    }
}
Exemplo n.º 3
0
inline void
TrsvUN( UnitOrNonUnit diag, const DistMatrix<F>& U, DistMatrix<F>& x )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrsvUN");
    if( U.Grid() != x.Grid() )
        LogicError("{U,x} must be distributed over the same grid");
    if( U.Height() != U.Width() )
        LogicError("U must be square");
    if( x.Width() != 1 && x.Height() != 1 )
        LogicError("x must be a vector");
    const Int xLength = ( x.Width() == 1 ? x.Height() : x.Width() );
    if( U.Width() != xLength )
        LogicError("Nonconformal TrsvUN");
#endif
    const Grid& g = U.Grid();

    if( x.Width() == 1 )
    {
        // Matrix views 
        DistMatrix<F> U01(g),
                      U11(g);
        DistMatrix<F> 
            xT(g),  x0(g),
            xB(g),  x1(g),
                    x2(g);

        // Temporary distributions
        DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
        DistMatrix<F,STAR,STAR> x1_STAR_STAR(g);
        DistMatrix<F,MR,  STAR> x1_MR_STAR(g);
        DistMatrix<F,MC,  STAR> z_MC_STAR(g);

        // Views of z[MC,* ], which will store updates to x
        DistMatrix<F,MC,STAR> z0_MC_STAR(g),
                              z1_MC_STAR(g);

        z_MC_STAR.AlignWith( U );
        Zeros( z_MC_STAR, x.Height(), 1 );

        // Start the algorithm
        PartitionUp
        ( x, xT,
             xB, 0 );
        while( xT.Height() > 0 )
        {
            RepartitionUp
            ( xT,  x0,
                   x1,
             /**/ /**/
              xB,  x2 );

            const Int n0 = x0.Height();
            const Int n1 = x1.Height();
            LockedView( U01, U, 0,  n0, n0, n1 );
            LockedView( U11, U, n0, n0, n1, n1 );
            View( z0_MC_STAR, z_MC_STAR, 0,  0, n0, 1 );
            View( z1_MC_STAR, z_MC_STAR, n0, 0, n1, 1 );

            x1_MR_STAR.AlignWith( U01 );
            //----------------------------------------------------------------//
            if( x2.Height() != 0 )
                x1.SumScatterUpdate( F(1), z1_MC_STAR );

            x1_STAR_STAR = x1;
            U11_STAR_STAR = U11;
            Trsv
            ( UPPER, NORMAL, diag,
              U11_STAR_STAR.LockedMatrix(),
              x1_STAR_STAR.Matrix() );
            x1 = x1_STAR_STAR;

            x1_MR_STAR = x1_STAR_STAR;
            LocalGemv( NORMAL, F(-1), U01, x1_MR_STAR, F(1), z0_MC_STAR );
            //----------------------------------------------------------------//

            SlidePartitionUp
            ( xT,  x0,
             /**/ /**/
                   x1,
              xB,  x2 );
        }
    }
    else
    {
        // Matrix views 
        DistMatrix<F> U01(g),
                      U11(g);
        DistMatrix<F> 
            xL(g), xR(g),
            x0(g), x1(g), x2(g);

        // Temporary distributions
        DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
        DistMatrix<F,STAR,STAR> x1_STAR_STAR(g);
        DistMatrix<F,STAR,MR  > x1_STAR_MR(g);
        DistMatrix<F,MC,  MR  > z1(g);
        DistMatrix<F,MR,  MC  > z1_MR_MC(g);
        DistMatrix<F,STAR,MC  > z_STAR_MC(g);

        // Views of z[* ,MC]
        DistMatrix<F,STAR,MC> z0_STAR_MC(g), z1_STAR_MC(g);

        z_STAR_MC.AlignWith( U );
        Zeros( z_STAR_MC, 1, x.Width() );

        // Start the algorithm
        PartitionLeft( x,  xL, xR, 0 );
        while( xL.Width() > 0 )
        {
            RepartitionLeft
            ( xL,     /**/ xR,
              x0, x1, /**/ x2 );

            const Int n0 = x0.Width();
            const Int n1 = x1.Width();
            LockedView( U01, U, 0,  n0, n0, n1 );
            LockedView( U11, U, n0, n0, n1, n1 );
            View( z0_STAR_MC, z_STAR_MC, 0, 0,  1, n0 );
            View( z1_STAR_MC, z_STAR_MC, 0, n0, 1, n1 );

            x1_STAR_MR.AlignWith( U01 );
            z1.AlignWith( x1 );
            //----------------------------------------------------------------//
            if( x2.Width() != 0 )
            {
                z1_MR_MC.SumScatterFrom( z1_STAR_MC );
                z1 = z1_MR_MC;
                Axpy( F(1), z1, x1 );
            }

            x1_STAR_STAR = x1;
            U11_STAR_STAR = U11;
            Trsv
            ( UPPER, NORMAL, diag,
              U11_STAR_STAR.LockedMatrix(),
              x1_STAR_STAR.Matrix() );
            x1 = x1_STAR_STAR;

            x1_STAR_MR = x1_STAR_STAR;
            LocalGemv( NORMAL, F(-1), U01, x1_STAR_MR, F(1), z0_STAR_MC );
            //----------------------------------------------------------------//

            SlidePartitionLeft
            ( xL, /**/ xR,
              x0, /**/ x1, x2 );
        }
    }
}
Exemplo n.º 4
0
inline void
LocalSymvColAccumulateU
( T alpha, 
  const DistMatrix<T>& A,
  const DistMatrix<T,MC,STAR>& x_MC_STAR,
  const DistMatrix<T,MR,STAR>& x_MR_STAR,
        DistMatrix<T,MC,STAR>& z_MC_STAR,
        DistMatrix<T,MR,STAR>& z_MR_STAR,
  bool conjugate=false )
{
#ifndef RELEASE
    CallStackEntry entry("internal::LocalSymvColAccumulateU");
    if( A.Grid() != x_MC_STAR.Grid() ||
        x_MC_STAR.Grid() != x_MR_STAR.Grid() ||
        x_MR_STAR.Grid() != z_MC_STAR.Grid() ||
        z_MC_STAR.Grid() != z_MR_STAR.Grid() )
        LogicError
        ("{A,x,z} must be distributed over the same grid");
    if( x_MC_STAR.Width() != 1 || x_MR_STAR.Width() != 1 ||
        z_MC_STAR.Width() != 1 || z_MR_STAR.Width() != 1 )
        LogicError("Expected x and z to be column vectors");
    if( A.Height() != A.Width() || 
        A.Height() != x_MC_STAR.Height() ||
        A.Height() != x_MR_STAR.Height() ||
        A.Height() != z_MC_STAR.Height() ||
        A.Height() != z_MR_STAR.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalSymvColAccumulateU: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  x[MC,* ] ~ " << x_MC_STAR.Height() << " x " 
                               << x_MC_STAR.Width() << "\n"
            << "  x[MR,* ] ~ " << x_MR_STAR.Height() << " x " 
                               << x_MR_STAR.Width() << "\n"
            << "  z[MC,* ] ~ " << z_MC_STAR.Height() << " x " 
                               << z_MC_STAR.Width() << "\n"
            << "  z[MR,* ] ~ " << z_MR_STAR.Height() << " x " 
                               << z_MR_STAR.Width() << "\n";
        LogicError( msg.str() );
    }
    if( x_MC_STAR.ColAlignment() != A.ColAlignment() ||
        x_MR_STAR.ColAlignment() != A.RowAlignment() ||
        z_MC_STAR.ColAlignment() != A.ColAlignment() ||
        z_MR_STAR.ColAlignment() != A.RowAlignment() )
        LogicError("Partial matrix distributions are misaligned");
#endif
    const Grid& g = A.Grid();
    const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE );

    // Matrix views
    DistMatrix<T> A11(g), A12(g);
    DistMatrix<T> D11(g);

    DistMatrix<T,MC,STAR> x1_MC_STAR(g);
    DistMatrix<T,MR,STAR> 
        xT_MR_STAR(g),  x0_MR_STAR(g),
        xB_MR_STAR(g),  x1_MR_STAR(g),
                        x2_MR_STAR(g);
    DistMatrix<T,MC,STAR> z1_MC_STAR(g);
    DistMatrix<T,MR,STAR> z1_MR_STAR(g),
                          z2_MR_STAR(g);

    // We want our local gemvs to be of width blocksize, so we will 
    // temporarily change to max(r,c) times the current blocksize
    const Int ratio = Max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() );
                 
    LockedPartitionDown
    ( x_MR_STAR, xT_MR_STAR,
                 xB_MR_STAR, 0 );
    while( xT_MR_STAR.Height() < x_MR_STAR.Height() )
    {
        LockedRepartitionDown
        ( xT_MR_STAR,  x0_MR_STAR,
         /**********/ /**********/
                       x1_MR_STAR,
          xB_MR_STAR,  x2_MR_STAR );

        const Int n0 = x0_MR_STAR.Height();
        const Int n1 = x1_MR_STAR.Height();
        const Int n2 = x2_MR_STAR.Height();
        LockedView( A11, A, n0, n0,    n1, n1 );
        LockedView( A12, A, n0, n0+n1, n1, n2 );
        LockedView( x1_MC_STAR, x_MC_STAR, n0, 0, n1, 1 );
        View( z1_MC_STAR, z_MC_STAR, n0,    0, n1, 1 );
        View( z1_MR_STAR, z_MR_STAR, n0,    0, n1, 1 );
        View( z2_MR_STAR, z_MR_STAR, n0+n1, 0, n2, 1 );

        D11.AlignWith( A11 );
        //--------------------------------------------------------------------//
        // TODO: These diagonal block updates can be greatly improved
        D11 = A11;
        MakeTriangular( UPPER, D11 );
        LocalGemv( NORMAL, alpha, D11, x1_MR_STAR, T(1), z1_MC_STAR );
        SetDiagonal( D11, T(0) );
        LocalGemv( orientation, alpha, D11, x1_MC_STAR, T(1), z1_MR_STAR );
        
        LocalGemv( NORMAL, alpha, A12, x2_MR_STAR, T(1), z1_MC_STAR );
        LocalGemv( orientation, alpha, A12, x1_MC_STAR, T(1), z2_MR_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( xT_MR_STAR,  x0_MR_STAR,
                       x1_MR_STAR,
         /**********/ /**********/
          xB_MR_STAR,  x2_MR_STAR );
    }
    PopBlocksizeStack();
}
Exemplo n.º 5
0
inline void LUnb( DistMatrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("bidiag::LUnb");
    if( A.Height() > A.Width() )
        throw std::logic_error("A must be at least as wide as it is tall");
#endif
    const Grid& g = A.Grid();

    // Matrix views 
    DistMatrix<R>
        ATL(g), ATR(g),  A00(g), a01(g),     A02(g),  alpha21T(g), a1R(g),
        ABL(g), ABR(g),  a10(g), alpha11(g), a12(g),  a21B(g),     A2R(g),
                         A20(g), a21(g),     A22(g);

    // Temporary matrices
    DistMatrix<R,MC,  STAR> a21_MC_STAR(g);
    DistMatrix<R,STAR,MR  > a1R_STAR_MR(g);
    DistMatrix<R,MR,  STAR> x12Trans_MR_STAR(g);
    DistMatrix<R,MC,  STAR> w21_MC_STAR(g);

    PushBlocksizeStack( 1 );
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );

        View1x2( a1R, alpha11, a12 );
        View1x2( A2R, a21, A22 );

        a21_MC_STAR.AlignWith( A22 );
        a1R_STAR_MR.AlignWith( A2R );
        x12Trans_MR_STAR.AlignWith( A22 );
        w21_MC_STAR.AlignWith( A2R );
        Zeros( a12.Width(), 1, x12Trans_MR_STAR );
        Zeros( a21.Height(), 1, w21_MC_STAR );
        const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() );
        const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() );
        const bool nextIsMyRow = ( g.Row() == a21.ColAlignment() );
        //--------------------------------------------------------------------//

        // Find tauP, u, and epsilonP such that
        //     I - tauP | 1 | | 1, v^T | | alpha11 | = | epsilonP |
        //              | v |            |   a12^T | = |    0     |
        const R tauP = Reflector( alpha11, a12 );
        R epsilonP=0;
        if( thisIsMyCol && thisIsMyRow )
            epsilonP = alpha11.GetLocal(0,0);

        // Set a1R^T = | 1 | and form w21 := A2R a1R^T = A2R | 1 |
        //             | v |                                 | v |
        alpha11.Set(0,0,R(1));
        a1R_STAR_MR = a1R;
        LocalGemv( NORMAL, R(1), A2R, a1R_STAR_MR, R(0), w21_MC_STAR );
        w21_MC_STAR.SumOverRow();

        // A2R := A2R - tauP w21 a1R
        //      = A2R - tauP A2R a1R^T a1R
        //      = A2R (I - tauP a1R^T a1R)
        LocalGer( -tauP, w21_MC_STAR, a1R_STAR_MR, A2R );

        // Put epsilonP back instead of the temporary value, 1
        if( thisIsMyCol && thisIsMyRow )
            alpha11.SetLocal(0,0,epsilonP);

        if( A22.Height() != 0 )
        {
            // Expose the subvector we seek to zero, a21B
            PartitionDown
            ( a21, alpha21T,
                   a21B );

            // Find tauQ, u, and epsilonQ such that
            //     I - tauQ | 1 | | 1, u^T | | alpha21T | = | epsilonQ |
            //              | u |            | a21B     | = |    0     |
            const R tauQ = Reflector( alpha21T, a21B );
            R epsilonQ=0;
            if( nextIsMyRow && thisIsMyCol )
                epsilonQ = alpha21T.GetLocal(0,0);

            // Set a21 = | 1 | and form x12^T = (a21^T A22)^T = A22^T a21
            //           | u |  
            alpha21T.Set(0,0,R(1));
            a21_MC_STAR = a21;
            LocalGemv
            ( TRANSPOSE, R(1), A22, a21_MC_STAR, R(0), x12Trans_MR_STAR );
            x12Trans_MR_STAR.SumOverCol();

            // A22 := A22 - tauQ a21 x12
            //      = A22 - tauQ a21 a21^T A22
            //      = (I - tauQ a21 a21^T) A22
            LocalGer( -tauQ, a21_MC_STAR, x12Trans_MR_STAR, A22 );

            // Put epsilonQ back instead of the temporary value, 1
            if( nextIsMyRow && thisIsMyCol )
                alpha21T.SetLocal(0,0,epsilonQ);
        }
        //--------------------------------------------------------------------//
        a1R_STAR_MR.FreeAlignments();
        a21_MC_STAR.FreeAlignments();
        x12Trans_MR_STAR.FreeAlignments();
        w21_MC_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemplo n.º 6
0
inline void LUnb
( DistMatrix<Complex<R> >& A, 
  DistMatrix<Complex<R>,MD,STAR>& tP,
  DistMatrix<Complex<R>,MD,STAR>& tQ )
{
#ifndef RELEASE
    PushCallStack("bidiag::LUnb");
#endif
    const int tPHeight = A.Height();
    const int tQHeight = std::max(A.Height()-1,0);
#ifndef RELEASE
    if( A.Grid() != tP.Grid() || tP.Grid() != tQ.Grid() )
        throw std::logic_error("Process grids do not match");
    if( A.Height() > A.Width() )
        throw std::logic_error("A must be at least as wide as it is tall");
    if( tP.Viewing() && (tP.Height() != tPHeight || tP.Width() != 1) )
        throw std::logic_error("tP is the wrong height");
    if( tQ.Viewing() && (tQ.Height() != tQHeight || tQ.Width() != 1) )
        throw std::logic_error("tQ is the wrong height");
#endif
    typedef Complex<R> C;
    const Grid& g = A.Grid();

    if( !tP.Viewing() )
        tP.ResizeTo( tPHeight, 1 );
    if( !tQ.Viewing() )
        tQ.ResizeTo( tQHeight, 1 );

    // Matrix views 
    DistMatrix<C>
        ATL(g), ATR(g),  A00(g), a01(g),     A02(g),  alpha21T(g), a1R(g),
        ABL(g), ABR(g),  a10(g), alpha11(g), a12(g),  a21B(g),     A2R(g),
                         A20(g), a21(g),     A22(g);

    // Temporary matrices
    DistMatrix<C,MC,  STAR> a21_MC_STAR(g);
    DistMatrix<C,STAR,MR  > a1R_STAR_MR(g);
    DistMatrix<C,MR,  STAR> x12Adj_MR_STAR(g);
    DistMatrix<C,MC,  STAR> w21_MC_STAR(g);

    PushBlocksizeStack( 1 );
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ a01,     A02,
         /*************/ /**********************/
               /**/       a10, /**/ alpha11, a12,
          ABL, /**/ ABR,  A20, /**/ a21,     A22 );

        View1x2( a1R, alpha11, a12 );
        View1x2( A2R, a21, A22 );

        a21_MC_STAR.AlignWith( A22 );
        a1R_STAR_MR.AlignWith( A2R );
        x12Adj_MR_STAR.AlignWith( A22 );
        w21_MC_STAR.AlignWith( A2R );
        Zeros( a12.Width(), 1, x12Adj_MR_STAR );
        Zeros( a21.Height(), 1, w21_MC_STAR );
        const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() );
        const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() );
        const bool nextIsMyRow = ( g.Row() == a21.ColAlignment() );
        //--------------------------------------------------------------------//

        // Due to deficiencies in the BLAS ?gemv routines, this section is
        // easier if we temporary conjugate a1R = | alpha11, a12 |
        Conjugate( a1R );

        // Find tauP, u, and epsilonP such that
        //     I - conj(tauP) | 1 | | 1, v^H | | alpha11 | = | epsilonP |
        //                    | v |            |   a12^T | = |    0     |
        const C tauP = Reflector( alpha11, a12 );
        tP.Set(A00.Height(),0,tauP);
        C epsilonP=0;
        if( thisIsMyCol && thisIsMyRow )
            epsilonP = alpha11.GetLocal(0,0);

        // Set a1R^T = | 1 | and form w21 := A2R a1R^T = A2R | 1 |
        //             | v |                                 | v |
        alpha11.Set(0,0,C(1));
        a1R_STAR_MR = a1R;
        LocalGemv( NORMAL, C(1), A2R, a1R_STAR_MR, C(0), w21_MC_STAR );
        w21_MC_STAR.SumOverRow();

        // A2R := A2R - tauP w21 conj(a1R)
        //      = A2R - tauP A2R a1R^T conj(a1R)
        //      = A2R conj(I - conj(tauP) a1R^H a1R)
        // which compensates for the fact that the reflector was generated
        // on the conjugated a1R
        LocalGer( -tauP, w21_MC_STAR, a1R_STAR_MR, A2R );

        // Put epsilonP back instead of the temporary value, 1
        if( thisIsMyCol && thisIsMyRow )
            alpha11.SetLocal(0,0,epsilonP);

        // Undo the temporary conjugation
        Conjugate( a1R );

        if( A22.Height() != 0 )
        {
            // Expose the subvector we seek to zero, a21B
            PartitionDown
            ( a21, alpha21T,
                   a21B );

            // Find tauQ, u, and epsilonQ such that
            //     I - conj(tauQ) | 1 | | 1, u^H | | alpha21T | = | epsilonQ |
            //                    | u |            | a21B     | = |    0     |
            const C tauQ = Reflector( alpha21T, a21B );
            tQ.Set(A00.Height(),0,tauQ);
            C epsilonQ=0;
            if( nextIsMyRow && thisIsMyCol )
                epsilonQ = alpha21T.GetLocal(0,0);

            // Set a21 = | 1 | and form x12^H = (a21^H A22)^H = A22^H a21
            //           | u |  
            alpha21T.Set(0,0,C(1));
            a21_MC_STAR = a21;
            LocalGemv( ADJOINT, C(1), A22, a21_MC_STAR, C(0), x12Adj_MR_STAR );
            x12Adj_MR_STAR.SumOverCol();

            // A22 := A22 - conj(tauQ) a21 x12
            //      = A22 - conj(tauQ) a21 a21^H A22
            //      = (I - conj(tauQ) a21 a21^H) A22
            LocalGer( -Conj(tauQ), a21_MC_STAR, x12Adj_MR_STAR, A22 );

            // Put epsilonQ back instead of the temporary value, 1
            if( nextIsMyRow && thisIsMyCol )
                alpha21T.SetLocal(0,0,epsilonQ);
        }
        //--------------------------------------------------------------------//
        a21_MC_STAR.FreeAlignments();
        a1R_STAR_MR.FreeAlignments();
        x12Adj_MR_STAR.FreeAlignments();
        w21_MC_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, a01,     /**/ A02,
               /**/       a10, alpha11, /**/ a12,
         /*************/ /**********************/
          ABL, /**/ ABR,  A20, a21,     /**/ A22 );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}