void HermitianFromEVD
( UpperOrLower uplo, 
        AbstractDistMatrix<F>& APre,
  const AbstractDistMatrix<Base<F>>& wPre,
  const AbstractDistMatrix<F>& ZPre )
{
    DEBUG_CSE
    typedef Base<F> Real;

    DistMatrixWriteProxy<F,F,MC,MR> AProx( APre );
    DistMatrixReadProxy<Real,Real,VR,STAR> wProx( wPre );
    DistMatrixReadProxy<F,F,MC,MR> ZProx( ZPre );
    auto& A = AProx.Get();
    auto& w = wProx.GetLocked();
    auto& Z = ZProx.GetLocked();

    const Grid& g = A.Grid();
    DistMatrix<F,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<F,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<F,STAR,MR  > Z1Adj_STAR_MR(g);

    const Int m = Z.Height();
    const Int n = Z.Width();
    A.Resize( m, m );
    if( uplo == LOWER )
        MakeTrapezoidal( UPPER, A, 1 );
    else
        MakeTrapezoidal( LOWER, A, -1 );
    const Int bsize = Blocksize();
    for( Int k=0; k<n; k+=bsize )
    {
        const Int nb = Min(bsize,n-k);
        auto Z1 = Z( ALL,        IR(k,k+nb) );
        auto w1 = w( IR(k,k+nb), ALL        );

        Z1_MC_STAR.AlignWith( A );
        Z1_MC_STAR = Z1;
        Z1_VR_STAR.AlignWith( A );
        Z1_VR_STAR = Z1_MC_STAR;

        DiagonalScale( RIGHT, NORMAL, w1, Z1_VR_STAR );

        Z1Adj_STAR_MR.AlignWith( A );
        Adjoint( Z1_VR_STAR, Z1Adj_STAR_MR );
        LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A );
    }
}
inline void
ReformHermitianMatrix
( UpperOrLower uplo,
        DistMatrix<Complex<R>,MC,MR  >& A,
  const DistMatrix<R,         VR,STAR>& w,
  const DistMatrix<Complex<R>,MC,MR  >& Z,
  const RealFunctor& f )
{
#ifndef RELEASE
    PushCallStack("hermitian_function::ReformHermitianMatrix");
#endif
    const Grid& g = A.Grid();
    typedef Complex<R> C;

    DistMatrix<C,MC,MR> ZL(g), ZR(g),
                        Z0(g), Z1(g), Z2(g);
    DistMatrix<R,VR,STAR> wT(g),  w0(g),
                          wB(g),  w1(g),
                                  w2(g);

    DistMatrix<C,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<C,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<C,STAR,MR  > Z1Adj_STAR_MR(g);
    DistMatrix<R,STAR,STAR> w1_STAR_STAR(g);

    if( uplo == LOWER )
        MakeTrapezoidal( LEFT, UPPER, 1, A );
    else
        MakeTrapezoidal( LEFT, LOWER, -1, A );
    LockedPartitionRight( Z, ZL, ZR, 0 );
    LockedPartitionDown
    ( w, wT,
         wB, 0 );
    while( ZL.Width() < Z.Width() )
    {
        LockedRepartitionRight
        ( ZL, /**/ ZR,
          Z0, /**/ Z1, Z2 );
        LockedRepartitionDown
        ( wT,  w0,
         /**/ /**/
               w1,
          wB,  w2 );

        Z1_MC_STAR.AlignWith( A );
        Z1_VR_STAR.AlignWith( A );
        Z1Adj_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        Z1_MC_STAR = Z1;
        Z1_VR_STAR = Z1_MC_STAR;
        w1_STAR_STAR = w1;

        // Scale Z1[VR,* ] with the modified eigenvalues
        const int width = Z1_VR_STAR.Width();
        const int localHeight = Z1_VR_STAR.LocalHeight();
        for( int j=0; j<width; ++j )
        {
            const R omega = f(w1_STAR_STAR.GetLocalEntry(j,0));
            C* buffer = Z1_VR_STAR.LocalBuffer(0,j);
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                buffer[iLocal] *= omega;
        }

        Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR );
        internal::LocalTrrk( uplo, (C)1, Z1_MC_STAR, Z1Adj_STAR_MR, (C)1, A );
        //--------------------------------------------------------------------//
        Z1Adj_STAR_MR.FreeAlignments();
        Z1_VR_STAR.FreeAlignments();
        Z1_MC_STAR.FreeAlignments();

        SlideLockedPartitionDown
        ( wT,  w0,
               w1,
         /**/ /**/
          wB,  w2 );
        SlideLockedPartitionRight
        ( ZL,     /**/ ZR,
          Z0, Z1, /**/ Z2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
inline void
ReformNormalMatrix
(       DistMatrix<Complex<R>,MC,MR  >& A,
  const DistMatrix<R,         VR,STAR>& w,
  const DistMatrix<Complex<R>,MC,MR  >& Z,
  const ComplexFunctor& f )
{
#ifndef RELEASE
    PushCallStack("hermitian_function::ReformNormalMatrix");
#endif
    const Grid& g = A.Grid();
    typedef Complex<R> C;

    DistMatrix<C,MC,MR> ZL(g), ZR(g),
                        Z0(g), Z1(g), Z2(g);
    DistMatrix<R,VR,STAR> wT(g),  w0(g),
                          wB(g),  w1(g),
                                  w2(g);

    DistMatrix<C,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<C,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<C,STAR,MR  > Z1Adj_STAR_MR(g);
    DistMatrix<R,STAR,STAR> w1_STAR_STAR(g);

    Zero( A );
    LockedPartitionRight( Z, ZL, ZR, 0 );
    LockedPartitionDown
    ( w, wT,
         wB, 0 );
    while( ZL.Width() < Z.Width() )
    {
        LockedRepartitionRight
        ( ZL, /**/ ZR,
          Z0, /**/ Z1, Z2 );
        LockedRepartitionDown
        ( wT,  w0,
         /**/ /**/
               w1,
          wB,  w2 );

        Z1_MC_STAR.AlignWith( A );
        Z1_VR_STAR.AlignWith( A );
        Z1Adj_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        Z1_MC_STAR = Z1;
        Z1_VR_STAR = Z1_MC_STAR;
        w1_STAR_STAR = w1;

        // Scale Z1[VR,* ] with the modified eigenvalues
        const int width = Z1_VR_STAR.Width();
        const int localHeight = Z1_VR_STAR.LocalHeight();
        for( int j=0; j<width; ++j )
        {
            const C conjOmega = Conj(f(w1_STAR_STAR.GetLocalEntry(j,0)));
            C* buffer = Z1_VR_STAR.LocalBuffer(0,j);
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                buffer[iLocal] *= conjOmega;
        }

        Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR );
        internal::LocalGemm
        ( NORMAL, NORMAL, (C)1, Z1_MC_STAR, Z1Adj_STAR_MR, (C)1, A );
        //--------------------------------------------------------------------//
        Z1Adj_STAR_MR.FreeAlignments();
        Z1_VR_STAR.FreeAlignments();
        Z1_MC_STAR.FreeAlignments();

        SlideLockedPartitionDown
        ( wT,  w0,
               w1,
         /**/ /**/
          wB,  w2 );
        SlideLockedPartitionRight
        ( ZL,     /**/ ZR,
          Z0, Z1, /**/ Z2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
inline void
HermitianFromEVD
( UpperOrLower uplo,
        DistMatrix<F>& A,
  const DistMatrix<BASE(F),VR,STAR>& w,
  const DistMatrix<F>& Z )
{
#ifndef RELEASE
    CallStackEntry entry("HermitianFromEVD");
#endif
    const Grid& g = A.Grid();
    typedef BASE(F) R;

    DistMatrix<F> ZL(g), ZR(g),
                  Z0(g), Z1(g), Z2(g);
    DistMatrix<R,VR,STAR> wT(g),  w0(g),
                          wB(g),  w1(g),
                                  w2(g);

    DistMatrix<F,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<F,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<F,STAR,MR  > Z1Adj_STAR_MR(g);
    DistMatrix<R,STAR,STAR> w1_STAR_STAR(g);

    A.ResizeTo( Z.Height(), Z.Height() );
    if( uplo == LOWER )
        MakeTrapezoidal( UPPER, A, 1 );
    else
        MakeTrapezoidal( LOWER, A, -1 );
    LockedPartitionRight( Z, ZL, ZR, 0 );
    LockedPartitionDown
    ( w, wT,
         wB, 0 );
    while( ZL.Width() < Z.Width() )
    {
        LockedRepartitionRight
        ( ZL, /**/ ZR,
          Z0, /**/ Z1, Z2 );
        LockedRepartitionDown
        ( wT,  w0,
         /**/ /**/
               w1,
          wB,  w2 );

        Z1_MC_STAR.AlignWith( A );
        Z1_VR_STAR.AlignWith( A );
        Z1Adj_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        Z1_MC_STAR = Z1;
        Z1_VR_STAR = Z1_MC_STAR;
        w1_STAR_STAR = w1;

        DiagonalScale( RIGHT, NORMAL, w1_STAR_STAR, Z1_VR_STAR );

        Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR );
        LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( wT,  w0,
               w1,
         /**/ /**/
          wB,  w2 );
        SlideLockedPartitionRight
        ( ZL,     /**/ ZR,
          Z0, Z1, /**/ Z2 );
    }
}