Beispiel #1
0
inline void
internal::LocalTrmmAccumulateLUN
( Orientation orientation, UnitOrNonUnit diag, T alpha,
  const DistMatrix<T,MC,  MR  >& U,
  const DistMatrix<T,STAR,MR  >& XAdjOrTrans_STAR_MR,
        DistMatrix<T,MC,  STAR>& Z_MC_STAR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalTrmmAccumulateLUN");
    if( U.Grid() != XAdjOrTrans_STAR_MR.Grid() ||
        XAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() )
        throw std::logic_error
        ("{U,X,Z} must be distributed over the same grid");
    if( U.Height() != U.Width() ||
        U.Height() != XAdjOrTrans_STAR_MR.Width() ||
        U.Height() != Z_MC_STAR.Height() ||
        XAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalTrmmAccumulateLUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X^H/T[* ,MR] ~ " << XAdjOrTrans_STAR_MR.Height() << " x "
                                   << XAdjOrTrans_STAR_MR.Width() << "\n"
            << "  Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x "
                               << Z_MC_STAR.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( XAdjOrTrans_STAR_MR.RowAlignment() != U.RowAlignment() ||
        Z_MC_STAR.ColAlignment() != U.ColAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<T,MC,MR>
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<T,MC,MR> D11(g);

    DistMatrix<T,STAR,MR>
        XLAdjOrTrans_STAR_MR(g), XRAdjOrTrans_STAR_MR(g),
        X0AdjOrTrans_STAR_MR(g), X1AdjOrTrans_STAR_MR(g), 
        X2AdjOrTrans_STAR_MR(g);

    DistMatrix<T,MC,STAR>
        ZT_MC_STAR(g),  Z0_MC_STAR(g),
        ZB_MC_STAR(g),  Z1_MC_STAR(g),
                        Z2_MC_STAR(g);

    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*Blocksize() );

    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    LockedPartitionRight
    ( XAdjOrTrans_STAR_MR, XLAdjOrTrans_STAR_MR, XRAdjOrTrans_STAR_MR, 0 );
    PartitionDown
    ( Z_MC_STAR, ZT_MC_STAR,
                 ZB_MC_STAR, 0 );
    while( UTL.Height() < U.Height() )
    {
        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        LockedRepartitionRight
        ( XLAdjOrTrans_STAR_MR, /**/ XRAdjOrTrans_STAR_MR,
          X0AdjOrTrans_STAR_MR, /**/ X1AdjOrTrans_STAR_MR, X2AdjOrTrans_STAR_MR 
        );

        RepartitionDown
        ( ZT_MC_STAR,  Z0_MC_STAR,
         /**********/ /**********/
                       Z1_MC_STAR,
          ZB_MC_STAR,  Z2_MC_STAR );

        D11.AlignWith( U11 );
        //--------------------------------------------------------------------//
        D11 = U11;
        MakeTrapezoidal( LEFT, UPPER, 0, D11 );
        if( diag == UNIT )
            SetDiagonalToOne( D11 );
        internal::LocalGemm
        ( NORMAL, orientation, alpha, D11, X1AdjOrTrans_STAR_MR,
          (T)1, Z1_MC_STAR );

        internal::LocalGemm
        ( NORMAL, orientation, alpha, U01, X1AdjOrTrans_STAR_MR,
          (T)1, Z0_MC_STAR );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        SlideLockedPartitionRight
        ( XLAdjOrTrans_STAR_MR,                       /**/ XRAdjOrTrans_STAR_MR,
          X0AdjOrTrans_STAR_MR, X1AdjOrTrans_STAR_MR, /**/ X2AdjOrTrans_STAR_MR 
        );

        SlidePartitionDown
        ( ZT_MC_STAR,  Z0_MC_STAR,
                       Z1_MC_STAR,
         /**********/ /**********/
          ZB_MC_STAR,  Z2_MC_STAR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #2
0
inline void
LU( DistMatrix<F>& A )
{
#ifndef RELEASE
    PushCallStack("LU");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<F>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g), 
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  
                         A20(g), A21(g), A22(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,MC,  STAR> A21_MC_STAR(g);
    DistMatrix<F,STAR,VR  > A12_STAR_VR(g);
    DistMatrix<F,STAR,MR  > A12_STAR_MR(g);

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        A12_STAR_VR.AlignWith( A22 );
        A12_STAR_MR.AlignWith( A22 );
        A21_MC_STAR.AlignWith( A22 );
        A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() );
        //--------------------------------------------------------------------//
        A11_STAR_STAR = A11;
        internal::LocalLU( A11_STAR_STAR );
        A11 = A11_STAR_STAR;

        A21_MC_STAR = A21;
        internal::LocalTrsm
        ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A21_MC_STAR );
        A21 = A21_MC_STAR;

        // Perhaps we should give up perfectly distributing this operation since
        // it's total contribution is only O(n^2)
        A12_STAR_VR = A12;
        internal::LocalTrsm
        ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR );

        A12_STAR_MR = A12_STAR_VR;
        internal::LocalGemm
        ( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 );
        A12 = A12_STAR_MR;
        //--------------------------------------------------------------------//
        A12_STAR_VR.FreeAlignments();
        A12_STAR_MR.FreeAlignments();
        A21_MC_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #3
0
inline void
internal::SymmRUA
( T alpha, const DistMatrix<T,MC,MR>& A,
           const DistMatrix<T,MC,MR>& B,
  T beta,        DistMatrix<T,MC,MR>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmRUA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    DistMatrix<T,MC,MR>
        BT(g),  B0(g),
        BB(g),  B1(g),
                B2(g);

    DistMatrix<T,MC,MR>
        CT(g),  C0(g),
        CB(g),  C1(g),
                C2(g);

    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,VC,  STAR> B1Trans_VC_STAR(g);
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);
    DistMatrix<T,MC,  STAR> Z1Trans_MC_STAR(g);
    DistMatrix<T,MR,  STAR> Z1Trans_MR_STAR(g);
    DistMatrix<T,MC,  MR  > Z1Trans(g);
    DistMatrix<T,MR,  MC  > Z1Trans_MR_MC(g);

    Matrix<T> Z1Local;

    Scal( beta, C );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CT.Height() < C.Height() )
    {
        LockedRepartitionDown
        ( BT,  B0, 
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        B1Trans_MR_STAR.AlignWith( A );
        B1Trans_VC_STAR.AlignWith( A );
        B1_STAR_MC.AlignWith( A );
        Z1Trans_MC_STAR.AlignWith( A );
        Z1Trans_MR_STAR.AlignWith( A );
        Z1Trans_MR_MC.AlignWith( C1 );
        Z1Trans_MC_STAR.ResizeTo( C1.Width(), C1.Height() );
        Z1Trans_MR_STAR.ResizeTo( C1.Width(), C1.Height() );
        //--------------------------------------------------------------------//
        B1Trans_MR_STAR.TransposeFrom( B1 );
        B1Trans_VC_STAR = B1Trans_MR_STAR;
        B1_STAR_MC.TransposeFrom( B1Trans_VC_STAR );
        Zero( Z1Trans_MC_STAR );
        Zero( Z1Trans_MR_STAR );
        internal::LocalSymmetricAccumulateRU
        ( TRANSPOSE, alpha, A, B1_STAR_MC, B1Trans_MR_STAR, 
          Z1Trans_MC_STAR, Z1Trans_MR_STAR );

        Z1Trans.SumScatterFrom( Z1Trans_MC_STAR );
        Z1Trans_MR_MC = Z1Trans;
        Z1Trans_MR_MC.SumScatterUpdate( (T)1, Z1Trans_MR_STAR );
        Transpose( Z1Trans_MR_MC.LockedLocalMatrix(), Z1Local );
        Axpy( (T)1, Z1Local, C1.LocalMatrix() );
        //--------------------------------------------------------------------//
        B1Trans_MR_STAR.FreeAlignments();
        B1Trans_VC_STAR.FreeAlignments();
        B1_STAR_MC.FreeAlignments();
        Z1Trans_MC_STAR.FreeAlignments();
        Z1Trans_MR_STAR.FreeAlignments();
        Z1Trans_MR_MC.FreeAlignments();

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #4
0
int
main( int argc, char* argv[] )
{
    Initialize( argc, argv );

    mpi::Comm comm = mpi::COMM_WORLD;
    const int commRank = mpi::CommRank( comm );

    try 
    {
        const int m = Input("--height","height of matrix",20);
        const int n = Input("--width","width of matrix",100);
        const int maxSteps = Input("--maxSteps","max # of steps of QR",10);
        const double tol = Input("--tol","tolerance for ID",-1.);
        const bool print = Input("--print","print matrices?",false);
        ProcessInput();
        PrintInputReport();

        DistMatrix<C> A;
        Uniform( A, m, n );
        const Real frobA = FrobeniusNorm( A );
        if( print )
            A.Print("A");

        const Grid& g = A.Grid();
        DistMatrix<int,VR,STAR> pR(g), pC(g);
        DistMatrix<C> Z(g);
        Skeleton( A, pR, pC, Z, maxSteps, tol );
        const int numSteps = pR.Height();
        if( print )
        {
            pR.Print("pR");
            pC.Print("pC");
            Z.Print("Z");
        }

        // Form the matrices of A's (hopefully) dominant rows and columns
        DistMatrix<C> AR( A );
        ApplyRowPivots( AR, pR );
        AR.ResizeTo( numSteps, A.Width() );
        DistMatrix<C> AC( A );
        ApplyColumnPivots( AC, pC );
        AC.ResizeTo( A.Height(), numSteps );
        if( print )
        {
            AC.Print("AC");
            AR.Print("AR");
        }

        // Check || A - AC Z AR ||_F / || A ||_F
        DistMatrix<C> B(g);
        Gemm( NORMAL, NORMAL, C(1), Z, AR, B );
        Gemm( NORMAL, NORMAL, C(-1), AC, B, C(1), A );
        const Real frobError = FrobeniusNorm( A );
        if( print )
            A.Print("A - AC Z AR");

        if( commRank == 0 )
        {
            std::cout << "|| A ||_F = " << frobA << "\n\n"
                      << "|| A - AC Z AR ||_F / || A ||_F = " 
                      << frobError/frobA << "\n" << std::endl;
        }
    }
    catch( ArgException& e )
    {
        // There is nothing to do
    }
    catch( exception& e )
    {
        ostringstream os;
        os << "Process " << commRank << " caught exception with message: "
           << e.what() << endl;
        cerr << os.str();
#ifndef RELEASE
        DumpCallStack();
#endif
    }

    Finalize();
    return 0;
}
Beispiel #5
0
inline void
Inverse( DistMatrix<F>& A )
{
#ifndef RELEASE
    PushCallStack("Inverse");
    if( A.Height() != A.Width() )
        throw std::logic_error("Cannot invert non-square matrices");
#endif
    const Grid& g = A.Grid();
    DistMatrix<int,VC,STAR> p( g );
    LU( A, p );
    TriangularInverse( UPPER, NON_UNIT, A );

    // Solve inv(A) L = inv(U) for inv(A)
    DistMatrix<F> ATL(g), ATR(g),
               ABL(g), ABR(g);
    DistMatrix<F> A00(g), A01(g), A02(g),
               A10(g), A11(g), A12(g),
               A20(g), A21(g), A22(g);
    DistMatrix<F> A1(g), A2(g);
    DistMatrix<F,VC,  STAR> A1_VC_STAR(g);
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,VR,  STAR> L21_VR_STAR(g);
    DistMatrix<F,STAR,MR  > L21Trans_STAR_MR(g);
    DistMatrix<F,MC,  STAR> Z1(g);
    PartitionUpDiagonal
    ( A, ATL, ATR,
      ABL, ABR, 0 );
    while( ABR.Height() < A.Height() )
    {
        RepartitionUpDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
          /**/       A10, A11, /**/ A12,
          /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        View( A1, A, 0, A00.Width(),             A.Height(), A01.Width() );
        View( A2, A, 0, A00.Width()+A01.Width(), A.Height(), A02.Width() );

        L21_VR_STAR.AlignWith( A2 );
        L21Trans_STAR_MR.AlignWith( A2 );
        Z1.AlignWith( A01 );
        Z1.ResizeTo( A.Height(), A01.Width() );
        //--------------------------------------------------------------------//
        // Copy out L1
        L11_STAR_STAR = A11;
        L21_VR_STAR = A21;
        L21Trans_STAR_MR.TransposeFrom( L21_VR_STAR );

        // Zero the strictly lower triangular portion of A1
        MakeTrapezoidal( LEFT, UPPER, 0, A11 );
        Zero( A21 );

        // Perform the lazy update of A1
        internal::LocalGemm
        ( NORMAL, TRANSPOSE,
          F(-1), A2, L21Trans_STAR_MR, F(0), Z1 );
        A1.SumScatterUpdate( F(1), Z1 );

        // Solve against this diagonal block of L11
        A1_VC_STAR = A1;
        internal::LocalTrsm
        ( RIGHT, LOWER, NORMAL, UNIT, F(1), L11_STAR_STAR, A1_VC_STAR );
        A1 = A1_VC_STAR;
        //--------------------------------------------------------------------//
        Z1.FreeAlignments();
        L21Trans_STAR_MR.FreeAlignments();
        L21_VR_STAR.FreeAlignments();

        SlidePartitionUpDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
          /*************/ /*******************/
          /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );
    }

    // inv(A) := inv(A) P
    ApplyInverseColumnPivots( A, p );
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #6
0
inline void
LocalSymvRowAccumulateU
( T alpha, 
  const DistMatrix<T>& A,
  const DistMatrix<T,STAR,MC>& x_STAR_MC,
  const DistMatrix<T,STAR,MR>& x_STAR_MR,
        DistMatrix<T,STAR,MC>& z_STAR_MC,
        DistMatrix<T,STAR,MR>& z_STAR_MR )
{
#ifndef RELEASE
    PushCallStack("internal::LocalSymvRowAccumulateU");
    if( A.Grid() != x_STAR_MC.Grid() ||
        x_STAR_MC.Grid() != x_STAR_MR.Grid() ||
        x_STAR_MR.Grid() != z_STAR_MC.Grid() ||
        z_STAR_MC.Grid() != z_STAR_MR.Grid() )
        throw std::logic_error
        ("{A,x,z} must be distributed over the same grid");
    if( x_STAR_MC.Height() != 1 || x_STAR_MR.Height() != 1 ||
        z_STAR_MC.Height() != 1 || z_STAR_MR.Height() != 1 )
        throw std::logic_error("Expected x and z to be row vectors");
    if( A.Height() != A.Width() || 
        A.Height() != x_STAR_MC.Width() ||
        A.Height() != x_STAR_MR.Width() ||
        A.Height() != z_STAR_MC.Width() ||
        A.Height() != z_STAR_MR.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal LocalSymvRowAccumulateU: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  x[* ,MC] ~ " << x_STAR_MC.Height() << " x " 
                               << x_STAR_MC.Width() << "\n"
            << "  x[* ,MR] ~ " << x_STAR_MR.Height() << " x " 
                               << x_STAR_MR.Width() << "\n"
            << "  z[* ,MC] ~ " << z_STAR_MC.Height() << " x " 
                               << z_STAR_MC.Width() << "\n"
            << "  z[* ,MR] ~ " << z_STAR_MR.Height() << " x " 
                               << z_STAR_MR.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
    if( x_STAR_MC.RowAlignment() != A.ColAlignment() ||
        x_STAR_MR.RowAlignment() != A.RowAlignment() ||
        z_STAR_MC.RowAlignment() != A.ColAlignment() ||
        z_STAR_MR.RowAlignment() != A.RowAlignment() )
        throw std::logic_error("Partial matrix distributions are misaligned");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> A11(g), A12(g);
    DistMatrix<T> D11(g);

    DistMatrix<T,STAR,MC> x1_STAR_MC(g);
    DistMatrix<T,STAR,MR> 
        xL_STAR_MR(g), xR_STAR_MR(g),
        x0_STAR_MR(g), x1_STAR_MR(g), x2_STAR_MR(g);
    DistMatrix<T,STAR,MC> z1_STAR_MC(g);
    DistMatrix<T,STAR,MR> z1_STAR_MR(g), z2_STAR_MR(g);

    // We want our local gemvs to be of width blocksize, so we will 
    // temporarily change to max(r,c) times the current blocksize
    const int ratio = std::max( g.Height(), g.Width() );
    PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() );
                 
    LockedPartitionRight( x_STAR_MR,  xL_STAR_MR, xR_STAR_MR, 0 );
    while( xL_STAR_MR.Width() < x_STAR_MR.Width() )
    {
        LockedRepartitionRight
        ( xL_STAR_MR, /**/ xR_STAR_MR, 
          x0_STAR_MR, /**/ x1_STAR_MR, x2_STAR_MR );

        const int n0 = x0_STAR_MR.Width();
        const int n1 = x1_STAR_MR.Width();
        const int n2 = x2_STAR_MR.Width();
        LockedView( A11, A, n0, n0,    n1, n1 );
        LockedView( A12, A, n0, n0+n1, n1, n2 );
        LockedView( x1_STAR_MC, x_STAR_MC, 0, n0, 1, n1 );
        View( z1_STAR_MC, z_STAR_MC, 0, n0,    1, n1 );
        View( z1_STAR_MR, z_STAR_MR, 0, n0,    1, n1 );
        View( z2_STAR_MR, z_STAR_MR, 0, n0+n1, 1, n2 );

        D11.AlignWith( A11 );
        //--------------------------------------------------------------------//
        // TODO: These diagonal block updates can be greatly improved
        D11 = A11;
        MakeTrapezoidal( LEFT, UPPER, 0, D11 );
        Gemv
        ( NORMAL, 
          alpha, D11.LockedLocalMatrix(), 
                 x1_STAR_MR.LockedLocalMatrix(),
          T(1),  z1_STAR_MC.LocalMatrix() );
        MakeTrapezoidal( LEFT, UPPER, 1, D11 );
        Gemv
        ( TRANSPOSE,
          alpha, D11.LockedLocalMatrix(),
                 x1_STAR_MC.LockedLocalMatrix(),
          T(1),  z1_STAR_MR.LocalMatrix() );

        Gemv
        ( NORMAL,
          alpha, A12.LockedLocalMatrix(),
                 x2_STAR_MR.LockedLocalMatrix(),
          T(1),  z1_STAR_MC.LocalMatrix() );
        Gemv
        ( TRANSPOSE,
          alpha, A12.LockedLocalMatrix(),
                 x1_STAR_MC.LockedLocalMatrix(),
          T(1),  z2_STAR_MR.LocalMatrix() );
        //--------------------------------------------------------------------//
        D11.FreeAlignments();

        SlideLockedPartitionRight
        ( xL_STAR_MR,             /**/ xR_STAR_MR,
          x0_STAR_MR, x1_STAR_MR, /**/ x2_STAR_MR );
    }
    PopBlocksizeStack();
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #7
0
inline void
TrtrsmLLN
( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrtrsmLLN");
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<F>
    LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
        L20(g), L21(g), L22(g);
    DistMatrix<F>
    XTL(g), XTR(g),  X00(g), X01(g), X02(g),
        XBL(g), XBR(g),  X10(g), X11(g), X12(g),
        X20(g), X21(g), X22(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,MC,  STAR> L21_MC_STAR(g);
    DistMatrix<F,STAR,MR  > X10_STAR_MR(g);
    DistMatrix<F,STAR,VR  > X10_STAR_VR(g);
    DistMatrix<F,STAR,MR  > X11_STAR_MR(g);
    DistMatrix<F,STAR,STAR> X11_STAR_STAR(g);

    // Start the algorithm
    ScaleTrapezoid( alpha, LOWER, X );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
      LBL, LBR, 0 );
    PartitionDownDiagonal
    ( X, XTL, XTR,
      XBL, XBR, 0 );
    while( XBR.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
          /*************/ /******************/
          /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        RepartitionDownDiagonal
        ( XTL, /**/ XTR,  X00, /**/ X01, X02,
          /*************/ /******************/
          /**/       X10, /**/ X11, X12,
          XBL, /**/ XBR,  X20, /**/ X21, X22 );

        L21_MC_STAR.AlignWith( X20 );
        X10_STAR_MR.AlignWith( X20 );
        X11_STAR_MR.AlignWith( X21 );
        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11;
        X11_STAR_STAR = X11;
        X10_STAR_VR = X10;

        LocalTrsm
        ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X10_STAR_VR,
          checkIfSingular );
        LocalTrtrsm
        ( LEFT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X11_STAR_STAR,
          checkIfSingular );
        X11 = X11_STAR_STAR;
        X11_STAR_MR = X11_STAR_STAR;
        MakeTriangular( LOWER, X11_STAR_MR );

        X10_STAR_MR = X10_STAR_VR;
        X10 = X10_STAR_MR;
        L21_MC_STAR = L21;

        LocalGemm
        ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X10_STAR_MR, F(1), X20 );
        LocalGemm
        ( NORMAL, NORMAL, F(-1), L21_MC_STAR, X11_STAR_MR, F(1), X21 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
          /**/       L10, L11, /**/ L12,
          /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionDownDiagonal
        ( XTL, /**/ XTR,  X00, X01, /**/ X02,
          /**/       X10, X11, /**/ X12,
          /*************/ /******************/
          XBL, /**/ XBR,  X20, X21, /**/ X22 );
    }
}
Beispiel #8
0
void TestCorrectness
( bool print,
  UpperOrLower uplo, 
  const DistMatrix<Complex<R> >& A, 
  const DistMatrix<Complex<R>,STAR,STAR>& t,
        DistMatrix<Complex<R> >& AOrig )
{
    typedef Complex<R> C;
    const Grid& g = A.Grid();
    const int m = AOrig.Height();

    int subdiagonal = ( uplo==LOWER ? -1 : +1 );

    if( g.Rank() == 0 )
        cout << "Testing error..." << endl;

    // Grab the diagonal and subdiagonal of the symmetric tridiagonal matrix
    DistMatrix<R,MD,STAR> d(g);
    DistMatrix<R,MD,STAR> e(g);
    A.GetRealPartOfDiagonal( d );
    A.GetRealPartOfDiagonal( e, subdiagonal );
     
    // Grab a full copy of e so that we may fill the opposite subdiagonal 
    DistMatrix<R,STAR,STAR> e_STAR_STAR(g);
    DistMatrix<R,MD,STAR> eOpposite(g);
    e_STAR_STAR = e;
    eOpposite.AlignWithDiagonal( A, -subdiagonal );
    eOpposite = e_STAR_STAR;
    
    // Zero B and then fill its tridiagonal
    DistMatrix<C> B(g);
    B.AlignWith( A );
    Zeros( m, m, B );
    B.SetRealPartOfDiagonal( d );
    B.SetRealPartOfDiagonal( e, subdiagonal );
    B.SetRealPartOfDiagonal( eOpposite, -subdiagonal );

    // Reverse the accumulated Householder transforms, ignoring symmetry
    if( uplo == LOWER )
    {
        ApplyPackedReflectors
        ( LEFT, LOWER, VERTICAL, BACKWARD, 
          UNCONJUGATED, subdiagonal, A, t, B );
        ApplyPackedReflectors
        ( RIGHT, LOWER, VERTICAL, BACKWARD, 
          CONJUGATED, subdiagonal, A, t, B );
    }
    else
    {
        ApplyPackedReflectors
        ( LEFT, UPPER, VERTICAL, FORWARD, 
          UNCONJUGATED, subdiagonal, A, t, B );
        ApplyPackedReflectors
        ( RIGHT, UPPER, VERTICAL, FORWARD, 
          CONJUGATED, subdiagonal, A, t, B );
    }

    // Compare the appropriate triangle of AOrig and B
    MakeTriangular( uplo, AOrig );
    MakeTriangular( uplo, B );
    Axpy( C(-1), AOrig, B );

    const R infNormOfAOrig = HermitianNorm( uplo, AOrig, INFINITY_NORM );
    const R frobNormOfAOrig = HermitianNorm( uplo, AOrig, FROBENIUS_NORM );
    const R infNormOfError = HermitianNorm( uplo, B, INFINITY_NORM );
    const R frobNormOfError = HermitianNorm( uplo, B, FROBENIUS_NORM );
    if( g.Rank() == 0 )
    {
        cout << "    ||AOrig||_1 = ||AOrig||_oo = " << infNormOfAOrig << "\n"
             << "    ||AOrig||_F                = " << frobNormOfAOrig << "\n"
             << "    ||AOrig - Q^H A Q||_oo     = " << infNormOfError << "\n"
             << "    ||AOrig - Q^H A Q||_F      = " << frobNormOfError << endl;
    }
}
Beispiel #9
0
inline void
HermitianTridiagL
( DistMatrix<Complex<R> >& A,
  DistMatrix<Complex<R>,STAR,STAR>& t )
{
#ifndef RELEASE
    PushCallStack("internal::HermitianTridiagL");
    if( A.Grid() != t.Grid() )
        throw std::logic_error("{A,t} must be distributed over the same grid");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( t.Viewing() )
        throw std::logic_error("t must not be a view");
#endif
    typedef Complex<R> C;

    const Grid& g = A.Grid();
    DistMatrix<C,MD,STAR> tDiag(g);
    tDiag.AlignWithDiagonal( A, -1 );
    tDiag.ResizeTo( A.Height()-1, 1 );

    // Matrix views 
    DistMatrix<C> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g), 
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<C,MD,STAR> tT(g),  t0(g), 
                          tB(g),  t1(g),
                                  t2(g);

    // Temporary distributions
    DistMatrix<C> WPan(g);
    DistMatrix<C,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<C,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<C,MC,  STAR> APan_MC_STAR(g),  A11_MC_STAR(g),
                                              A21_MC_STAR(g);
    DistMatrix<C,MR,  STAR> APan_MR_STAR(g),  A11_MR_STAR(g),
                                              A21_MR_STAR(g);
    DistMatrix<C,MC,  STAR> WPan_MC_STAR(g),  W11_MC_STAR(g),
                                              W21_MC_STAR(g);
    DistMatrix<C,MR,  STAR> WPan_MR_STAR(g),  W11_MR_STAR(g),
                                              W21_MR_STAR(g);

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( tDiag, tT,
             tB, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );
        
        if( A22.Height() > 0 )
        {
            WPan.AlignWith( A11 );
            APan_MC_STAR.AlignWith( A11 );
            WPan_MC_STAR.AlignWith( A11 );
            APan_MR_STAR.AlignWith( A11 );
            WPan_MR_STAR.AlignWith( A11 );
            //----------------------------------------------------------------//
            WPan.ResizeTo( ABR.Height(), A11.Width() );
            APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() );
            WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() );
            APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() );
            WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() );

            HermitianPanelTridiagL
            ( ABR, WPan, t1,
              APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR );

            PartitionDown
            ( APan_MC_STAR, A11_MC_STAR,
                            A21_MC_STAR, A11.Height() );
            PartitionDown
            ( APan_MR_STAR, A11_MR_STAR,
                            A21_MR_STAR, A11.Height() );
            PartitionDown
            ( WPan_MC_STAR, W11_MC_STAR,
                            W21_MC_STAR, A11.Height() );
            PartitionDown
            ( WPan_MR_STAR, W11_MR_STAR,
                            W21_MR_STAR, A11.Height() );

            LocalTrr2k
            ( LOWER, ADJOINT, ADJOINT,
              C(-1), A21_MC_STAR, W21_MR_STAR,
                     W21_MC_STAR, A21_MR_STAR,
              C(1),  A22 );
            //----------------------------------------------------------------//
            WPan_MR_STAR.FreeAlignments();
            APan_MR_STAR.FreeAlignments();
            WPan_MC_STAR.FreeAlignments();
            APan_MC_STAR.FreeAlignments();
            WPan.FreeAlignments();
        }
        else
        {
            A11_STAR_STAR = A11;
            t1_STAR_STAR.ResizeTo( t1.Height(), 1 );

            HermitianTridiag
            ( LOWER, A11_STAR_STAR.LocalMatrix(), 
              t1_STAR_STAR.LocalMatrix() );

            A11 = A11_STAR_STAR;
            t1 = t1_STAR_STAR;
        }

        SlidePartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }

    // Redistribute from matrix-diagonal form to fully replicated
    t = tDiag;
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #10
0
inline void
internal::HegstLUVar2( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& U )
{
#ifndef RELEASE
    PushCallStack("internal::HegstLUVar2");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( U.Height() != U.Width() )
        throw std::logic_error("Triangular matrices must be square");
    if( A.Height() != U.Height() )
        throw std::logic_error("A and U must be the same size");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<F,MC,MR>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);

    DistMatrix<F,MC,MR>
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    // Temporary distributions
    DistMatrix<F,VC,  STAR> A01_VC_STAR(g);
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,STAR,VR  > A12_STAR_VR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,STAR,MC  > U12_STAR_MC(g);
    DistMatrix<F,STAR,VR  > U12_STAR_VR(g);
    DistMatrix<F,MR,  STAR> U12Adj_MR_STAR(g);
    DistMatrix<F,VC,  STAR> U12Adj_VC_STAR(g);
    DistMatrix<F,MC,  STAR> X01_MC_STAR(g);
    DistMatrix<F,STAR,STAR> X11_STAR_STAR(g);
    DistMatrix<F,MC,  MR  > Y12(g);
    DistMatrix<F,MC,  MR  > Z12Adj(g);
    DistMatrix<F,MR,  MC  > Z12Adj_MR_MC(g);
    DistMatrix<F,MC,  STAR> Z12Adj_MC_STAR(g);
    DistMatrix<F,MR,  STAR> Z12Adj_MR_STAR(g);

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        A12_STAR_VR.AlignWith( A12 );
        U12_STAR_MC.AlignWith( A22 );
        U12_STAR_VR.AlignWith( A12 );
        U12Adj_MR_STAR.AlignWith( A22 );
        U12Adj_VC_STAR.AlignWith( A22 );
        X01_MC_STAR.AlignWith( A01 );
        Y12.AlignWith( A12 );
        Z12Adj.AlignWith( A12 );
        Z12Adj_MR_MC.AlignWith( A12 );
        Z12Adj_MC_STAR.AlignWith( A22 );
        Z12Adj_MR_STAR.AlignWith( A22 );
        //--------------------------------------------------------------------//
        // A01 := A01 U11'
        U11_STAR_STAR = U11;
        A01_VC_STAR = A01;
        internal::LocalTrmm
        ( RIGHT, UPPER, ADJOINT, NON_UNIT, (F)1, U11_STAR_STAR, A01_VC_STAR );
        A01 = A01_VC_STAR;

        // A01 := A01 + A02 U12'
        U12Adj_MR_STAR.AdjointFrom( U12 );
        X01_MC_STAR.ResizeTo( A01.Height(), A01.Width() );
        internal::LocalGemm
        ( NORMAL, NORMAL,
          (F)1, A02, U12Adj_MR_STAR, (F)0, X01_MC_STAR );
        A01.SumScatterUpdate( (F)1, X01_MC_STAR );

        // Y12 := U12 A22
        U12Adj_VC_STAR = U12Adj_MR_STAR;
        U12_STAR_MC.AdjointFrom( U12Adj_VC_STAR );
        Z12Adj_MC_STAR.ResizeTo( A12.Width(), A12.Height() );
        Z12Adj_MR_STAR.ResizeTo( A12.Width(), A12.Height() );
        Zero( Z12Adj_MC_STAR );
        Zero( Z12Adj_MR_STAR );
        internal::LocalSymmetricAccumulateRU
        ( ADJOINT, 
          (F)1, A22, U12_STAR_MC, U12Adj_MR_STAR, 
          Z12Adj_MC_STAR, Z12Adj_MR_STAR );
        Z12Adj.SumScatterFrom( Z12Adj_MC_STAR );
        Z12Adj_MR_MC = Z12Adj;
        Z12Adj_MR_MC.SumScatterUpdate( (F)1, Z12Adj_MR_STAR );
        Y12.ResizeTo( A12.Height(), A12.Width() );
        Adjoint( Z12Adj_MR_MC.LockedLocalMatrix(), Y12.LocalMatrix() );

        // A12 := U11 A12
        A12_STAR_VR = A12;
        U11_STAR_STAR = U11;
        internal::LocalTrmm
        ( LEFT, UPPER, NORMAL, NON_UNIT, (F)1, U11_STAR_STAR, A12_STAR_VR );
        A12 = A12_STAR_VR;

        // A12 := A12 + 1/2 Y12
        Axpy( (F)0.5, Y12, A12 );

        // A11 := U11 A11 U11'
        A11_STAR_STAR = A11;
        internal::LocalHegst( LEFT, UPPER, A11_STAR_STAR, U11_STAR_STAR );
        A11 = A11_STAR_STAR;

        // A11 := A11 + (A12 U12' + U12 A12')
        A12_STAR_VR = A12;
        U12_STAR_VR = U12;
        X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() );
        Her2k
        ( UPPER, NORMAL,
          (F)1, A12_STAR_VR.LocalMatrix(), U12_STAR_VR.LocalMatrix(),
          (F)0, X11_STAR_STAR.LocalMatrix() );
        A11.SumScatterUpdate( (F)1, X11_STAR_STAR );

        // A12 := A12 + 1/2 Y12
        Axpy( (F)0.5, Y12, A12 );
        //--------------------------------------------------------------------//
        A12_STAR_VR.FreeAlignments();
        U12_STAR_MC.FreeAlignments();
        U12_STAR_VR.FreeAlignments();
        U12Adj_MR_STAR.FreeAlignments();
        U12Adj_VC_STAR.FreeAlignments();
        X01_MC_STAR.FreeAlignments();
        Y12.FreeAlignments();
        Z12Adj.FreeAlignments();
        Z12Adj_MR_MC.FreeAlignments(); 
        Z12Adj_MC_STAR.FreeAlignments();
        Z12Adj_MR_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #11
0
void TestCorrectness
( const DistMatrix<F>& A,
  const DistMatrix<F,MD,STAR>& t,
        DistMatrix<F>& AOrig )
{
    typedef BASE(F) Real;
    const Grid& g = A.Grid();
    const Int m = A.Height();
    const Int n = A.Width();
    const Int minDim = std::min(m,n);

    if( g.Rank() == 0 )
        cout << "  Testing orthogonality of Q..." << endl;

    // Form Z := Q Q^H as an approximation to identity
    DistMatrix<F> Z(g);
    Identity( Z, m, n );
    rq::ApplyQ( RIGHT, NORMAL, A, t, Z );
    rq::ApplyQ( RIGHT, ADJOINT, A, t, Z );
    
    DistMatrix<F> ZUpper(g);
    View( ZUpper, Z, 0, 0, minDim, minDim );

    // Form Identity
    DistMatrix<F> X(g);
    Identity( X, minDim, minDim );

    // Form X := I - Q Q^H
    Axpy( F(-1), ZUpper, X );

    Real oneNormOfError = OneNorm( X );
    Real infNormOfError = InfinityNorm( X );
    Real frobNormOfError = FrobeniusNorm( X );
    if( g.Rank() == 0 )
    {
        cout << "    ||Q^H Q - I||_1  = " << oneNormOfError << "\n"
             << "    ||Q^H Q - I||_oo = " << infNormOfError << "\n"
             << "    ||Q^H Q - I||_F  = " << frobNormOfError << endl;
    }

    if( g.Rank() == 0 )
        cout << "  Testing if A = RQ..." << endl;

    // Form RQ
    DistMatrix<F> U( A );
    MakeTrapezoidal( UPPER, U, 0, RIGHT );
    rq::ApplyQ( RIGHT, NORMAL, A, t, U );

    // Form R Q - A
    Axpy( F(-1), AOrig, U );
    
    const Real oneNormOfA = OneNorm( AOrig );
    const Real infNormOfA = InfinityNorm( AOrig );
    const Real frobNormOfA = FrobeniusNorm( AOrig );
    oneNormOfError = OneNorm( U );
    infNormOfError = InfinityNorm( U );
    frobNormOfError = FrobeniusNorm( U );
    if( g.Rank() == 0 )
    {
        cout << "    ||A||_1       = " << oneNormOfA << "\n"
             << "    ||A||_oo      = " << infNormOfA << "\n"
             << "    ||A||_F       = " << frobNormOfA << "\n"
             << "    ||A - RQ||_1  = " << oneNormOfError << "\n"
             << "    ||A - RQ||_oo = " << infNormOfError << "\n"
             << "    ||A - RQ||_F  = " << frobNormOfError << endl;
    }
}
Beispiel #12
0
void Trr2kNNTN
( UpperOrLower uplo,
  Orientation orientationOfC,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    CallStackEntry entry("internal::Trr2kNNTN");
    if( E.Height() != E.Width()  || A.Width()  != C.Height() ||
        A.Height() != E.Height() || C.Width()  != E.Height() ||
        B.Width()  != E.Width()  || D.Width()  != E.Width()  ||
        A.Width()  != B.Height() || C.Height() != D.Height() )
        throw std::logic_error("Nonconformal Trr2kNNTN");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);
    DistMatrix<T> DT(g),  D0(g),
                  DB(g),  D1(g),
                          D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,STAR,MC  > C1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> D1Trans_MR_STAR(g);

    A1_MC_STAR.AlignWith( E );
    B1Trans_MR_STAR.AlignWith( E );
    C1_STAR_MC.AlignWith( E );
    D1Trans_MR_STAR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    LockedPartitionDown
    ( C, CT,
         CB, 0 );
    LockedPartitionDown
    ( D, DT,
         DB, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );
        LockedRepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );
        LockedRepartitionDown
        ( DT,  D0,
         /**/ /**/
               D1,
          DB,  D2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_STAR_MC = C1;
        B1Trans_MR_STAR.TransposeFrom( B1 );
        D1Trans_MR_STAR.TransposeFrom( D1 );
        LocalTrr2k
        ( uplo, TRANSPOSE, orientationOfC, TRANSPOSE,
          alpha, A1_MC_STAR, B1Trans_MR_STAR, 
                 C1_STAR_MC, D1Trans_MR_STAR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( DT,  D0,
               D1,
         /**/ /**/
          DB,  D2 );
        SlideLockedPartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
    }
}
Beispiel #13
0
inline void
TwoSidedTrsmUVar1
( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U )
{
#ifndef RELEASE
    PushCallStack("internal::TwoSidedTrsmUVar1");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( U.Height() != U.Width() )
        throw std::logic_error("Triangular matrices must be square");
    if( A.Height() != U.Height() )
        throw std::logic_error("A and U must be the same size");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<F>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<F>
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,VC,  STAR> A01_VC_STAR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,MC,  STAR> U01_MC_STAR(g);
    DistMatrix<F,VC,  STAR> U01_VC_STAR(g);
    DistMatrix<F,VR,  STAR> U01_VR_STAR(g);
    DistMatrix<F,STAR,MR  > U01Adj_STAR_MR(g);
    DistMatrix<F,STAR,STAR> X11_STAR_STAR(g);
    DistMatrix<F,MR,  MC  > Z01_MR_MC(g);
    DistMatrix<F,MC,  STAR> Z01_MC_STAR(g);
    DistMatrix<F,MR,  STAR> Z01_MR_STAR(g);
    DistMatrix<F> Y01(g);

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        A01_VC_STAR.AlignWith( A01 );
        U01_MC_STAR.AlignWith( A00 );
        U01_VR_STAR.AlignWith( A00 );
        U01_VC_STAR.AlignWith( A00 );
        U01Adj_STAR_MR.AlignWith( A00 );
        Y01.AlignWith( A01 );
        Z01_MR_MC.AlignWith( A01 );
        Z01_MC_STAR.AlignWith( A00 );
        Z01_MR_STAR.AlignWith( A00 );
        //--------------------------------------------------------------------//
        // Y01 := A00 U01
        U01_MC_STAR = U01;
        U01_VR_STAR = U01_MC_STAR;
        U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR );
        Z01_MC_STAR.ResizeTo( A01.Height(), A01.Width() );
        Z01_MR_STAR.ResizeTo( A01.Height(), A01.Width() );
        Zero( Z01_MC_STAR );
        Zero( Z01_MR_STAR );
        LocalSymmetricAccumulateLU
        ( ADJOINT, 
          F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, Z01_MC_STAR, Z01_MR_STAR );
        Z01_MR_MC.SumScatterFrom( Z01_MR_STAR );
        Y01 = Z01_MR_MC;
        Y01.SumScatterUpdate( F(1), Z01_MC_STAR );

        // A01 := inv(U00)' A01
        //
        // This is the bottleneck because A01 only has blocksize columns
        Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 );

        // A01 := A01 - 1/2 Y01
        Axpy( F(-1)/F(2), Y01, A01 );

        // A11 := A11 - (U01' A01 + A01' U01)
        A01_VC_STAR = A01;
        U01_VC_STAR = U01_MC_STAR;
        X11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() );
        Her2k
        ( UPPER, ADJOINT,
          F(-1), A01_VC_STAR.LocalMatrix(), U01_VC_STAR.LocalMatrix(),
          F(0), X11_STAR_STAR.LocalMatrix() );
        A11.SumScatterUpdate( F(1), X11_STAR_STAR );

        // A11 := inv(U11)' A11 inv(U11)
        A11_STAR_STAR = A11;
        U11_STAR_STAR = U11;
        LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR );
        A11 = A11_STAR_STAR;

        // A01 := A01 - 1/2 Y01
        Axpy( F(-1)/F(2), Y01, A01 );

        // A01 := A01 inv(U11)
        A01_VC_STAR = A01;
        LocalTrsm
        ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR );
        A01 = A01_VC_STAR;
        //--------------------------------------------------------------------//
        A01_VC_STAR.FreeAlignments();
        U01_MC_STAR.FreeAlignments();
        U01_VR_STAR.FreeAlignments();
        U01_VC_STAR.FreeAlignments();
        U01Adj_STAR_MR.FreeAlignments();
        Y01.FreeAlignments();
        Z01_MR_MC.FreeAlignments();
        Z01_MC_STAR.FreeAlignments();
        Z01_MR_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #14
0
inline void
internal::TrmmLUNA
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T,MC,MR>& U,
                 DistMatrix<T,MC,MR>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmULNA");
    if( U.Grid() != X.Grid() )
        throw std::logic_error
        ("U and X must be distributed over the same grid");
    if( U.Height() != U.Width() || U.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLUNA: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = U.Grid();

    DistMatrix<T,MC,MR>
        XL(g), XR(g),
        X0(g), X1(g), X2(g);

    DistMatrix<T,VR,  STAR> X1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > X1Trans_STAR_MR(g);
    DistMatrix<T,MC,  STAR> Z1_MC_STAR(g);

    PartitionRight
    ( X, XL, XR, 0 );
    while( XL.Width() < X.Width() )
    {
        RepartitionRight
        ( XL, /**/ XR,
          X0, /**/ X1, X2 );

        X1_VR_STAR.AlignWith( U );
        X1Trans_STAR_MR.AlignWith( U );
        Z1_MC_STAR.AlignWith( U );
        Z1_MC_STAR.ResizeTo( X1.Height(), X1.Width() );
        //--------------------------------------------------------------------//
        X1_VR_STAR = X1;
        X1Trans_STAR_MR.TransposeFrom( X1_VR_STAR );
        Zero( Z1_MC_STAR );
        internal::LocalTrmmAccumulateLUN
        ( TRANSPOSE, diag, alpha, U, X1Trans_STAR_MR, Z1_MC_STAR );

        X1.SumScatterFrom( Z1_MC_STAR );
        //--------------------------------------------------------------------//
        X1_VR_STAR.FreeAlignments();
        X1Trans_STAR_MR.FreeAlignments();
        Z1_MC_STAR.FreeAlignments();

        SlidePartitionRight
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #15
0
void InPlaceRedist( DistMatrix<F>& Z, Int rowAlign, const Base<F>* readBuffer )
{
    typedef Base<F> Real;
    const Grid& g = Z.Grid();
    const Int height = Z.Height();
    const Int width = Z.Width();

    const Int r = g.Height();
    const Int c = g.Width();
    const Int p = r * c;
    const Int row = g.Row();
    const Int col = g.Col();
    const Int rowShift = Z.RowShift();
    const Int colAlign = Z.ColAlign();
    const Int localWidth = Length(width,g.VRRank(),rowAlign,p);

    const Int maxHeight = MaxLength(height,r);
    const Int maxWidth = MaxLength(width,p);
    const Int portionSize = mpi::Pad( maxHeight*maxWidth );
    
    // Allocate our send/recv buffers
    vector<Real> buffer(2*r*portionSize);
    Real* sendBuffer = &buffer[0];
    Real* recvBuffer = &buffer[r*portionSize];

    // Pack
    EL_OUTER_PARALLEL_FOR
    for( Int k=0; k<r; ++k )
    {
        Real* data = &sendBuffer[k*portionSize];

        const Int thisColShift = Shift(k,colAlign,r);
        const Int thisLocalHeight = Length(height,thisColShift,r);

        EL_INNER_PARALLEL_FOR_COLLAPSE2
        for( Int j=0; j<localWidth; ++j )
            for( Int i=0; i<thisLocalHeight; ++i )
                data[i+j*thisLocalHeight] = 
                    readBuffer[thisColShift+i*r+j*height];
    }

    // Communicate
    mpi::AllToAll
    ( sendBuffer, portionSize,
      recvBuffer, portionSize, g.ColComm() );

    // Unpack
    const Int localHeight = Length(height,row,colAlign,r);
    EL_OUTER_PARALLEL_FOR
    for( Int k=0; k<r; ++k )
    {
        const Real* data = &recvBuffer[k*portionSize];

        const Int thisRank = col+k*c;
        const Int thisRowShift = Shift(thisRank,rowAlign,p);
        const Int thisRowOffset = (thisRowShift-rowShift) / c;
        const Int thisLocalWidth = Length(width,thisRowShift,p);

        EL_INNER_PARALLEL_FOR
        for( Int j=0; j<thisLocalWidth; ++j )
        {
            const Real* dataCol = &(data[j*localHeight]);
            Real* thisCol = (Real*)Z.Buffer(0,thisRowOffset+j*r);
            if( IsComplex<F>::value )
            {
                for( Int i=0; i<localHeight; ++i )
                {
                    thisCol[2*i] = dataCol[i];
                    thisCol[2*i+1] = 0;
                }
            }
            else
            {
                MemCopy( thisCol, dataCol, localHeight );
            }
        }
    }
}
Beispiel #16
0
inline void
HermitianTridiagL( DistMatrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("internal::HermitianTridiagL");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square.");
#endif
    const Grid& g = A.Grid();

    // Matrix views 
    DistMatrix<R> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g), 
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                             A20(g), A21(g), A22(g);

    // Temporary distributions
    DistMatrix<R> WPan(g);
    DistMatrix<R,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<R,MC,  STAR> APan_MC_STAR(g),  A11_MC_STAR(g),
                                              A21_MC_STAR(g);
    DistMatrix<R,MR,  STAR> APan_MR_STAR(g),  A11_MR_STAR(g),
                                              A21_MR_STAR(g);
    DistMatrix<R,MC,  STAR> WPan_MC_STAR(g),  W11_MC_STAR(g),
                                              W21_MC_STAR(g);
    DistMatrix<R,MR,  STAR> WPan_MR_STAR(g),  W11_MR_STAR(g),
                                              W21_MR_STAR(g);

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        if( A22.Height() > 0 )
        {
            WPan.AlignWith( A11 );
            APan_MC_STAR.AlignWith( A11 );
            WPan_MC_STAR.AlignWith( A11 );
            APan_MR_STAR.AlignWith( A11 );
            WPan_MR_STAR.AlignWith( A11 );
            //----------------------------------------------------------------//
            WPan.ResizeTo( ABR.Height(), A11.Width() );
            APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() );
            WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() );
            APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() );
            WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() );

            HermitianPanelTridiagL
            ( ABR, WPan, 
              APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR );

            PartitionDown
            ( APan_MC_STAR, A11_MC_STAR,
                            A21_MC_STAR, A11.Height() );
            PartitionDown
            ( APan_MR_STAR, A11_MR_STAR,
                            A21_MR_STAR, A11.Height() );
            PartitionDown
            ( WPan_MC_STAR, W11_MC_STAR,
                            W21_MC_STAR, A11.Height() );
            PartitionDown
            ( WPan_MR_STAR, W11_MR_STAR,
                            W21_MR_STAR, A11.Height() );

            LocalTrr2k
            ( LOWER, TRANSPOSE, TRANSPOSE,
              R(-1), A21_MC_STAR, W21_MR_STAR,
                     W21_MC_STAR, A21_MR_STAR,
              R(1),  A22 );
            //----------------------------------------------------------------//
            WPan_MR_STAR.FreeAlignments();
            APan_MR_STAR.FreeAlignments();
            WPan_MC_STAR.FreeAlignments();
            APan_MC_STAR.FreeAlignments();
            WPan.FreeAlignments();
        }
        else
        {
            A11_STAR_STAR = A11;
            HermitianTridiag( LOWER, A11_STAR_STAR.LocalMatrix() );
            A11 = A11_STAR_STAR;
        }

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #17
0
inline void
Syr2kLT
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::Syr2kLT");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Width() != C.Height() || 
        A.Width() != C.Width()  ||
        B.Width() != C.Height() ||
        B.Width() != C.Width()  ||
        A.Height() != B.Height()  )
    {
        std::ostringstream msg;
        msg << "Nonconformal Syr2kLT:\n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    // Temporary distributions
    DistMatrix<T,MR,  STAR> A1Trans_MR_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,STAR,VR  > A1_STAR_VR(g);
    DistMatrix<T,STAR,VR  > B1_STAR_VR(g);
    DistMatrix<T,STAR,MC  > A1_STAR_MC(g);
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);

    A1Trans_MR_STAR.AlignWith( C );
    B1Trans_MR_STAR.AlignWith( C );
    A1_STAR_MC.AlignWith( C );
    B1_STAR_MC.AlignWith( C );

    // Start the algorithm
    ScaleTrapezoid( beta, LEFT, LOWER, 0, C );
    LockedPartitionDown
    ( A, AT, 
         AB, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        //--------------------------------------------------------------------//
        A1Trans_MR_STAR.TransposeFrom( A1 );
        A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR );
        A1_STAR_MC = A1_STAR_VR;

        B1Trans_MR_STAR.TransposeFrom( B1 );
        B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR );
        B1_STAR_MC = B1_STAR_VR;

        LocalTrr2k
        ( LOWER, TRANSPOSE, TRANSPOSE, TRANSPOSE, TRANSPOSE, 
          alpha, A1_STAR_MC, B1Trans_MR_STAR,
                 B1_STAR_MC, A1Trans_MR_STAR,
          T(1),  C );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #18
0
void TestCorrectness
( UpperOrLower uplo, 
  const DistMatrix<F>& A, 
  const DistMatrix<F,STAR,STAR>& t,
        DistMatrix<F>& AOrig,
  bool print, bool display )
{
    typedef Base<F> Real;
    const Grid& g = A.Grid();
    const Int n = AOrig.Height();
    const Real infNormAOrig = InfinityNorm( AOrig );
    const Real frobNormAOrig = FrobeniusNorm( AOrig );
    if( g.Rank() == 0 )
        cout << "Testing error..." << endl;

    // Set H to the appropriate Hessenberg portion of A
    DistMatrix<F> H( A );
    if( uplo == LOWER )
        MakeTrapezoidal( LOWER, H, 1 );
    else
        MakeTrapezoidal( UPPER, H, -1 );
    if( print )
        Print( H, "Hessenberg" );
    if( display )
        Display( H, "Bidiagonal" );

    if( print || display )
    {
        DistMatrix<F> Q(g);
        Identity( Q, n, n );
        hessenberg::ApplyQ( LEFT, uplo, NORMAL, A, t, Q );
        if( print )
            Print( Q, "Q" );
        if( display )
            Display( Q, "Q" );
    }

    // Reverse the accumulated Householder transforms
    hessenberg::ApplyQ( LEFT, uplo, ADJOINT, A, t, AOrig );
    hessenberg::ApplyQ( RIGHT, uplo, NORMAL, A, t, AOrig );
    if( print )
        Print( AOrig, "Manual Hessenberg" );
    if( display )
        Display( AOrig, "Manual Hessenberg" );

    // Compare the appropriate portion of AOrig and B
    if( uplo == LOWER )
        MakeTrapezoidal( LOWER, AOrig, 1 );
    else
        MakeTrapezoidal( UPPER, AOrig, -1 );
    H -= AOrig;
    if( print )
        Print( H, "Error in rotated Hessenberg" );
    if( display )
        Display( H, "Error in rotated Hessenberg" );
    const Real infNormError = InfinityNorm( H );
    const Real frobNormError = FrobeniusNorm( H );

    if( g.Rank() == 0 )
    {
        cout << "    ||A||_oo = " << infNormAOrig << "\n"
             << "    ||A||_F  = " << frobNormAOrig << "\n"
             << "    ||H - Q^H A Q||_oo = " << infNormError << "\n"
             << "    ||H - Q^H A Q||_F  = " << frobNormError << endl;
    }
}
Beispiel #19
0
void TestCorrectness
( UpperOrLower uplo,
  UnitOrNonUnit diag,
  const DistMatrix<F>& A,
  const DistMatrix<F>& B,
  const DistMatrix<F>& AOrig,
  bool print )
{
    typedef Base<F> Real;
    const Grid& g = A.Grid();
    const Int m = AOrig.Height();

    const Int k=100;
    DistMatrix<F> X(g), Y(g), Z(g);
    Uniform( X, m, k );
    Y = X;
    Zeros( Z, m, k );

    if( uplo == LOWER )
    {
        // Test correctness by comparing the application of A against a 
        // random set of k vectors to the application of 
        // tril(B)^H AOrig tril(B)
        Trmm( LEFT, LOWER, NORMAL, diag, F(1), B, Y );
        Hemm( LEFT, LOWER, F(1), AOrig, Y, F(0), Z );
        Trmm( LEFT, LOWER, ADJOINT, diag, F(1), B, Z );
        Hemm( LEFT, LOWER, F(-1), A, X, F(1), Z );
        Real infNormAOrig = HermitianInfinityNorm( uplo, AOrig );
        Real frobNormAOrig = HermitianFrobeniusNorm( uplo, AOrig );
        Real infNormA = HermitianInfinityNorm( uplo, A );
        Real frobNormA = HermitianFrobeniusNorm( uplo, A );
        Real oneNormError = OneNorm( Z );
        Real infNormError = InfinityNorm( Z );
        Real frobNormError = FrobeniusNorm( Z );
        OutputFromRoot
        (g.Comm(),
         "||AOrig||_1 = ||AOrig||_oo     = ",infNormAOrig,"\n",Indent(),
         "||AOrig||_F                    = ",frobNormAOrig,"\n",Indent(),
         "||A||_1 = ||A||_oo             = ",infNormA,"\n",Indent(),
         "||A||_F                        = ",frobNormA,"\n",Indent(),
         "||A X - L^H AOrig L X||_1  = ",oneNormError,"\n",Indent(),
         "||A X - L^H AOrig L X||_oo = ",infNormError,"\n",Indent(),
         "||A X - L^H AOrig L X||_F  = ",frobNormError);
    }
    else
    {
        // Test correctness by comparing the application of A against a 
        // random set of k vectors to the application of 
        // triu(B) AOrig triu(B)^H
        Trmm( LEFT, UPPER, ADJOINT, diag, F(1), B, Y );
        Hemm( LEFT, UPPER, F(1), AOrig, Y, F(0), Z );
        Trmm( LEFT, UPPER, NORMAL, diag, F(1), B, Z );
        Hemm( LEFT, UPPER, F(-1), A, X, F(1), Z );
        Real infNormAOrig = HermitianInfinityNorm( uplo, AOrig );
        Real frobNormAOrig = HermitianFrobeniusNorm( uplo, AOrig );
        Real infNormA = HermitianInfinityNorm( uplo, A );
        Real frobNormA = HermitianFrobeniusNorm( uplo, A );
        Real oneNormError = OneNorm( Z );
        Real infNormError = InfinityNorm( Z );
        Real frobNormError = FrobeniusNorm( Z );
        OutputFromRoot
        (g.Comm(),
         "||AOrig||_1 = ||AOrig||_oo     = ",infNormAOrig,"\n",Indent(),
         "||AOrig||_F                    = ",frobNormAOrig,"\n",Indent(),
         "||A||_1 = ||A||_oo             = ",infNormA,"\n",Indent(),
         "||A||_F                        = ",frobNormA,"\n",Indent(),
         "||A X - U AOrig U^H X||_1  = ",oneNormError,"\n",Indent(),
         "||A X - U AOrig U^H X||_oo = ",infNormError,"\n",Indent(),
         "||A X - U AOrig U^H X||_F  = ",frobNormError);
    }
}
Beispiel #20
0
inline void
TrsmLUNSmall
( UnitOrNonUnit diag,
  F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrsmLUNSmall");
    if( U.Grid() != X.Grid() )
        LogicError("U and X must be distributed over the same grid");
    if( U.Height() != U.Width() || U.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrsmLUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        LogicError( msg.str() );
    }
    if( U.ColAlignment() != X.ColAlignment() )
        LogicError("U and X are assumed to be aligned");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F,VC,STAR> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F,VC,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),
                                  X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,   U00, U01, /**/ U02,
               /**/        U10, U11, /**/ U12,
         /*************/  /******************/
          UBL, /**/ UBR,   U20, U21, /**/ U22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VC,* ]
        
        // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ]
        LocalTrsm
        ( LEFT, UPPER, NORMAL, diag,
          F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular );
        X1 = X1_STAR_STAR;

        // X0[VC,* ] -= U01[VC,* ] X1[* ,* ]
        LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
}
Beispiel #21
0
inline void
CholeskyLVar2( DistMatrix<F>& A )
{
#ifndef RELEASE
    PushCallStack("internal::CholeskyLVar2");
    if( A.Height() != A.Width() )
        throw std::logic_error
        ("Can only compute Cholesky factor of square matrices");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<F> 
        ATL(g), ATR(g),   A00(g), A01(g), A02(g),
        ABL(g), ABR(g),   A10(g), A11(g), A12(g),
                          A20(g), A21(g), A22(g);

    // Temporary distributions
    DistMatrix<F,MR,  STAR> A10Adj_MR_STAR(g);
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,VC,  STAR> A21_VC_STAR(g);
    DistMatrix<F,MC,  STAR> X11_MC_STAR(g);
    DistMatrix<F,MC,  STAR> X21_MC_STAR(g);

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        A10Adj_MR_STAR.AlignWith( A10 );
        X11_MC_STAR.AlignWith( A10 );
        X21_MC_STAR.AlignWith( A20 );
        X11_MC_STAR.ResizeTo( A11.Height(), A11.Width() );
        X21_MC_STAR.ResizeTo( A21.Height(), A21.Width() );
        //--------------------------------------------------------------------//
        A10Adj_MR_STAR.AdjointFrom( A10 );
        LocalGemm
        ( NORMAL, NORMAL, F(1), A10, A10Adj_MR_STAR, F(0), X11_MC_STAR );
        A11.SumScatterUpdate( F(-1), X11_MC_STAR );

        A11_STAR_STAR = A11;
        LocalCholesky( LOWER, A11_STAR_STAR );
        A11 = A11_STAR_STAR;

        LocalGemm
        ( NORMAL, NORMAL, F(1), A20, A10Adj_MR_STAR, F(0), X21_MC_STAR );
        A21.SumScatterUpdate( F(-1), X21_MC_STAR );

        A21_VC_STAR = A21;
        LocalTrsm
        ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR );
        A21 = A21_VC_STAR;
        //--------------------------------------------------------------------//
        A10Adj_MR_STAR.FreeAlignments();
        X11_MC_STAR.FreeAlignments();
        X21_MC_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #22
0
inline void
TrsmLUNLarge
( UnitOrNonUnit diag,
  F alpha, const DistMatrix<F>& U, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrsmLUNLarge");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<F,MC,  STAR> U01_MC_STAR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,STAR,MR  > X1_STAR_MR(g);
    DistMatrix<F,STAR,VR  > X1_STAR_VR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,   U00, U01, /**/ U02,
               /**/        U10, U11, /**/ U12,
         /*************/  /******************/
          UBL, /**/ UBR,   U20, U21, /**/ U22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        U01_MC_STAR.AlignWith( X0 );
        X1_STAR_MR.AlignWith( X0 );
        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR]
        X1_STAR_VR    = X1;  // X1[* ,VR] <- X1[MC,MR]
        
        // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR]
        LocalTrsm
        ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_VR,
          checkIfSingular );

        X1_STAR_MR  = X1_STAR_VR; // X1[* ,MR]  <- X1[* ,VR]
        X1          = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR]
        U01_MC_STAR = U01;        // U01[MC,* ] <- U01[MC,MR]

        // X0[MC,MR] -= U01[MC,* ] X1[* ,MR]
        LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
}
Beispiel #23
0
void L( DistMatrix<F>& A, DistMatrix<F,STAR,STAR>& t )
{
#ifndef RELEASE
    CallStackEntry entry("hermitian_tridiag::L");
    if( A.Grid() != t.Grid() )
        LogicError("{A,t} must be distributed over the same grid");
    if( A.Height() != A.Width() )
        LogicError("A must be square");
    if( t.Viewing() )
        LogicError("t must not be a view");
#endif
    const Grid& g = A.Grid();
    DistMatrix<F,MD,STAR> tDiag(g);
    tDiag.AlignWithDiagonal( A, -1 );
    tDiag.ResizeTo( A.Height()-1, 1 );

    // Matrix views 
    DistMatrix<F> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g), 
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<F,MD,STAR> tT(g),  t0(g), 
                          tB(g),  t1(g),
                                  t2(g);

    // Temporary distributions
    DistMatrix<F> WPan(g);
    DistMatrix<F,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,MC,  STAR> APan_MC_STAR(g),  A11_MC_STAR(g),
                                              A21_MC_STAR(g);
    DistMatrix<F,MR,  STAR> APan_MR_STAR(g),  A11_MR_STAR(g),
                                              A21_MR_STAR(g);
    DistMatrix<F,MC,  STAR> WPan_MC_STAR(g),  W11_MC_STAR(g),
                                              W21_MC_STAR(g);
    DistMatrix<F,MR,  STAR> WPan_MR_STAR(g),  W11_MR_STAR(g),
                                              W21_MR_STAR(g);

    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( tDiag, tT,
             tB, 0 );
    while( ATL.Height() < A.Height() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2 );
        
        if( A22.Height() > 0 )
        {
            WPan.AlignWith( A11 );
            APan_MC_STAR.AlignWith( A11 );
            WPan_MC_STAR.AlignWith( A11 );
            APan_MR_STAR.AlignWith( A11 );
            WPan_MR_STAR.AlignWith( A11 );
            //----------------------------------------------------------------//
            WPan.ResizeTo( ABR.Height(), A11.Width() );
            APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() );
            WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() );
            APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() );
            WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() );

            hermitian_tridiag::PanelL
            ( ABR, WPan, t1,
              APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR );

            PartitionDown
            ( APan_MC_STAR, A11_MC_STAR,
                            A21_MC_STAR, A11.Height() );
            PartitionDown
            ( APan_MR_STAR, A11_MR_STAR,
                            A21_MR_STAR, A11.Height() );
            PartitionDown
            ( WPan_MC_STAR, W11_MC_STAR,
                            W21_MC_STAR, A11.Height() );
            PartitionDown
            ( WPan_MR_STAR, W11_MR_STAR,
                            W21_MR_STAR, A11.Height() );

            LocalTrr2k
            ( LOWER, ADJOINT, ADJOINT,
              F(-1), A21_MC_STAR, W21_MR_STAR,
                     W21_MC_STAR, A21_MR_STAR,
              F(1),  A22 );
            //----------------------------------------------------------------//
        }
        else
        {
            A11_STAR_STAR = A11;
            t1_STAR_STAR.ResizeTo( t1.Height(), 1 );

            HermitianTridiag
            ( LOWER, A11_STAR_STAR.Matrix(), t1_STAR_STAR.Matrix() );

            A11 = A11_STAR_STAR;
            t1 = t1_STAR_STAR;
        }

        SlidePartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );
    }

    // Redistribute from matrix-diagonal form to fully replicated
    t = tDiag;
}
Beispiel #24
0
inline void
ApplyPackedReflectorsRLHF
( Conjugation conjugation, int offset, 
  const DistMatrix<Complex<R> >& H,
  const DistMatrix<Complex<R>,MD,STAR>& t,
        DistMatrix<Complex<R> >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsRLHF");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        throw std::logic_error
        ("{H,t,A} must be distributed over the same grid");
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag");
    if( !t.AlignedWithDiagonal( H, offset ) )
        throw std::logic_error("t must be aligned with H's 'offset' diagonal");
#endif
    typedef Complex<R> C;
    const Grid& g = H.Grid();

    DistMatrix<C>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<C> ALeft(g);
    DistMatrix<C,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<C,STAR,VR  > HPan_STAR_VR(g);
    DistMatrix<C,STAR,MR  > HPan_STAR_MR(g);
    DistMatrix<C,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<C,STAR,MC  > ZAdj_STAR_MC(g);
    DistMatrix<C,STAR,VC  > ZAdj_STAR_VC(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2, HPanHeight );

        ALeft.View( A, 0, 0, A.Height(), HPanWidth );

        HPan_STAR_MR.AlignWith( ALeft );
        ZAdj_STAR_MC.AlignWith( ALeft );
        ZAdj_STAR_VC.AlignWith( ALeft );
        Zeros( HPan.Height(), ALeft.Height(), ZAdj_STAR_MC );
        Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );

        HPan_STAR_VR = HPanCopy;
        Herk
        ( UPPER, NORMAL,
          C(1), HPan_STAR_VR.LockedLocalMatrix(),
          C(0), SInv_STAR_STAR.LocalMatrix() );
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_STAR_MR = HPan_STAR_VR;
        LocalGemm
        ( NORMAL, ADJOINT,
          C(1), HPan_STAR_MR, ALeft, C(0), ZAdj_STAR_MC );
        ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC );

        LocalTrsm
        ( LEFT, UPPER, ADJOINT, NON_UNIT,
          C(1), SInv_STAR_STAR, ZAdj_STAR_VC );

        ZAdj_STAR_MC = ZAdj_STAR_VC;
        LocalGemm
        ( ADJOINT, NORMAL,
          C(-1), ZAdj_STAR_MC, HPan_STAR_MR, C(1), ALeft );
        //--------------------------------------------------------------------//
        HPan_STAR_MR.FreeAlignments();
        ZAdj_STAR_MC.FreeAlignments();
        ZAdj_STAR_VC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #25
0
inline void
LU( DistMatrix<F>& A, DistMatrix<int,VC,STAR>& p )
{
#ifndef RELEASE
    PushCallStack("LU");
    if( A.Grid() != p.Grid() )
        throw std::logic_error("{A,p} must be distributed over the same grid");
    if( p.Viewing() && 
        (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) 
        throw std::logic_error
        ("p must be a vector of the same height as the min dimension of A.");
#endif
    const Grid& g = A.Grid();
    if( !p.Viewing() )
        p.ResizeTo( std::min(A.Height(),A.Width()), 1 );

    // Matrix views
    DistMatrix<F>
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AB(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  
                         A20(g), A21(g), A22(g);

    DistMatrix<int,VC,STAR>
        pT(g),  p0(g), 
        pB(g),  p1(g),
                p2(g);

    // Temporary distributions
    DistMatrix<F,  STAR,STAR> A11_STAR_STAR(g);
    DistMatrix<F,  MC,  STAR> A21_MC_STAR(g);
    DistMatrix<F,  STAR,VR  > A12_STAR_VR(g);
    DistMatrix<F,  STAR,MR  > A12_STAR_MR(g);
    DistMatrix<int,STAR,STAR> p1_STAR_STAR(g);

    // Pivot composition
    std::vector<int> image, preimage;

    // Start the algorithm
    PartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    PartitionDown
    ( p, pT,
         pB, 0 );
    while( ATL.Height() < A.Height() && ATL.Width() < A.Width() )
    {
        RepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        RepartitionDown
        ( pT,  p0,
         /**/ /**/
               p1,
          pB,  p2 );

        View1x2( AB, ABL, ABR );

        const int pivotOffset = A01.Height();
        A12_STAR_VR.AlignWith( A22 );
        A12_STAR_MR.AlignWith( A22 );
        A21_MC_STAR.AlignWith( A22 );
        A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() );
        p1_STAR_STAR.ResizeTo( p1.Height(), 1 );
        //--------------------------------------------------------------------//
        A21_MC_STAR = A21;
        A11_STAR_STAR = A11;
        internal::PanelLU
        ( A11_STAR_STAR, A21_MC_STAR, p1_STAR_STAR, pivotOffset );
        internal::ComposePanelPivots
        ( p1_STAR_STAR, pivotOffset, image, preimage );
        ApplyRowPivots( AB, image, preimage );

        // Perhaps we should give up perfectly distributing this operation since
        // it's total contribution is only O(n^2)
        A12_STAR_VR = A12;
        internal::LocalTrsm
        ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR );

        A12_STAR_MR = A12_STAR_VR;
        internal::LocalGemm
        ( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 );

        A11 = A11_STAR_STAR;
        A12 = A12_STAR_MR;
        A21 = A21_MC_STAR;
        p1 = p1_STAR_STAR;
        //--------------------------------------------------------------------//
        A12_STAR_VR.FreeAlignments();
        A12_STAR_MR.FreeAlignments();
        A21_MC_STAR.FreeAlignments();

        SlidePartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlidePartitionDown
        ( pT,  p0,
               p1,
         /**/ /**/
          pB,  p2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #26
0
inline void
ApplyPackedReflectorsRLHF
( int offset, 
  const DistMatrix<R>& H,
        DistMatrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsRLHF");
    if( H.Grid() != A.Grid() )
        throw std::logic_error("{H,A} must be distributed over the same grid");
    if( offset > 0 || offset < -H.Width() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Width() )
        throw std::logic_error
        ("Width of transforms must equal width of target matrix");
#endif
    const Grid& g = H.Grid();

    DistMatrix<R>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<R> ALeft(g);

    DistMatrix<R,STAR,VR  > HPan_STAR_VR(g);
    DistMatrix<R,STAR,MR  > HPan_STAR_MR(g);
    DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<R,STAR,MC  > ZTrans_STAR_MC(g);
    DistMatrix<R,STAR,VC  > ZTrans_STAR_VC(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanWidth = H10.Width() + H11.Width();
        const int HPanOffset = 
            std::min( H11.Height(), std::max(-offset-H00.Height(),0) );
        const int HPanHeight = H11.Height()-HPanOffset;
        HPan.LockedView( H, H00.Height()+HPanOffset, 0, HPanHeight, HPanWidth );

        ALeft.View( A, 0, 0, A.Height(), HPanWidth );

        HPan_STAR_MR.AlignWith( ALeft );
        ZTrans_STAR_MC.AlignWith( ALeft );
        ZTrans_STAR_VC.AlignWith( ALeft );
        Zeros( HPan.Height(), ALeft.Height(), ZTrans_STAR_MC );
        Zeros( HPan.Height(), HPan.Height(), SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );

        HPan_STAR_VR = HPanCopy;
        Syrk
        ( UPPER, NORMAL,
          R(1), HPan_STAR_VR.LockedLocalMatrix(),
          R(0), SInv_STAR_STAR.LocalMatrix() );
        SInv_STAR_STAR.SumOverGrid();
        HalveMainDiagonal( SInv_STAR_STAR );

        HPan_STAR_MR = HPan_STAR_VR;
        LocalGemm
        ( NORMAL, TRANSPOSE,
          R(1), HPan_STAR_MR, ALeft, R(0), ZTrans_STAR_MC );
        ZTrans_STAR_VC.SumScatterFrom( ZTrans_STAR_MC );

        LocalTrsm
        ( LEFT, UPPER, TRANSPOSE, NON_UNIT,
          R(1), SInv_STAR_STAR, ZTrans_STAR_VC );

        ZTrans_STAR_MC = ZTrans_STAR_VC;
        LocalGemm
        ( TRANSPOSE, NORMAL,
          R(-1), ZTrans_STAR_MC, HPan_STAR_MR, R(1), ALeft );
        //--------------------------------------------------------------------//
        HPan_STAR_MR.FreeAlignments();
        ZTrans_STAR_MC.FreeAlignments();
        ZTrans_STAR_VC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #27
0
inline void
internal::SymmRUC
( T alpha, const DistMatrix<T,MC,MR>& A,
           const DistMatrix<T,MC,MR>& B,
  T beta,        DistMatrix<T,MC,MR>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmRUC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error("{A,B,C} must be distributed on the same grid");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T,MC,MR> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AColPan(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  ARowPan(g),
                         A20(g), A21(g), A22(g);

    DistMatrix<T,MC,MR> BL(g), BR(g),
                        B0(g), B1(g), B2(g);

    DistMatrix<T,MC,MR> CL(g), CR(g),
                        C0(g), C1(g), C2(g),
                        CLeft(g), CRight(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> B1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> AColPan_VR_STAR(g);
    DistMatrix<T,STAR,MR  > AColPanTrans_STAR_MR(g);
    DistMatrix<T,MR,  STAR> ARowPanTrans_MR_STAR(g);

    // Start the algorithm
    Scal( beta, C );
    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionRight( B, BL, BR, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( CR.Width() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionRight
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );

        RepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        ARowPan.LockedView1x2( A11, A12 );

        AColPan.LockedView2x1
        ( A01,
          A11 );

        CLeft.View1x2( C0, C1 );

        CRight.View1x2( C1, C2 );

        B1_MC_STAR.AlignWith( C );
        AColPan_VR_STAR.AlignWith( CLeft );
        AColPanTrans_STAR_MR.AlignWith( CLeft );
        ARowPanTrans_MR_STAR.AlignWith( CRight );
        //--------------------------------------------------------------------//
        B1_MC_STAR = B1;

        AColPan_VR_STAR = AColPan;
        AColPanTrans_STAR_MR.TransposeFrom( AColPan_VR_STAR );
        ARowPanTrans_MR_STAR.TransposeFrom( ARowPan );
        MakeTrapezoidal( LEFT,  LOWER,  0, ARowPanTrans_MR_STAR );
        MakeTrapezoidal( RIGHT, LOWER, -1, AColPanTrans_STAR_MR );

        internal::LocalGemm
        ( NORMAL, TRANSPOSE, 
          alpha, B1_MC_STAR, ARowPanTrans_MR_STAR, (T)1, CRight );

        internal::LocalGemm
        ( NORMAL, NORMAL,
          alpha, B1_MC_STAR, AColPanTrans_STAR_MR, (T)1, CLeft );
        //--------------------------------------------------------------------//
        B1_MC_STAR.FreeAlignments();
        AColPan_VR_STAR.FreeAlignments();
        AColPanTrans_STAR_MR.FreeAlignments();
        ARowPanTrans_MR_STAR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #28
0
inline void
TrdtrmmUVar1( Orientation orientation, DistMatrix<F,MC,MR>& U )
{
#ifndef RELEASE
    PushCallStack("internal::TrdtrmmUVar1");
    if( U.Height() != U.Width() )
        throw std::logic_error("U must be square");
    if( orientation == NORMAL )
        throw std::logic_error("Orientation must be (conjugate-)transpose");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F,MC,MR>
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F,MD,STAR> d1(g);

    // Temporary distributions
    DistMatrix<F,MC,  STAR> S01_MC_STAR(g);
    DistMatrix<F,VC,  STAR> S01_VC_STAR(g);
    DistMatrix<F,VR,  STAR> U01_VR_STAR(g);
    DistMatrix<F,STAR,MR  > U01AdjOrTrans_STAR_MR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);

    S01_MC_STAR.AlignWith( U );
    S01_VC_STAR.AlignWith( U );
    U01_VR_STAR.AlignWith( U );
    U01AdjOrTrans_STAR_MR.AlignWith( U );

    PartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    while( UTL.Height() < U.Height() && UTL.Width() < U.Height() )
    {
        RepartitionDownDiagonal 
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        //--------------------------------------------------------------------//
        U11.GetDiagonal( d1 );
        S01_MC_STAR = U01;
        S01_VC_STAR = S01_MC_STAR;
        U01_VR_STAR = S01_VC_STAR;
        if( orientation == TRANSPOSE )
        {
            DiagonalSolve( RIGHT, NORMAL, d1, U01_VR_STAR );
            U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR );
        }
        else
        {
            DiagonalSolve( RIGHT, ADJOINT, d1, U01_VR_STAR );
            U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR );
        }
        LocalTrrk( UPPER, F(1), S01_MC_STAR, U01AdjOrTrans_STAR_MR, F(1), U00 );

        U11_STAR_STAR = U11;
        LocalTrmm
        ( RIGHT, UPPER, ADJOINT, UNIT, F(1), U11_STAR_STAR, U01_VR_STAR );
        U01 = U01_VR_STAR;

        LocalTrdtrmm( orientation, UPPER, U11_STAR_STAR );
        U11 = U11_STAR_STAR;
        //--------------------------------------------------------------------//
        d1.FreeAlignments();

        SlidePartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Beispiel #29
0
inline R
Row( DistMatrix<R>& chi, DistMatrix<R>& x )
{
#ifndef RELEASE
    PushCallStack("reflector::Row");
    if( chi.Grid() != x.Grid() )
        throw std::logic_error
        ("chi and x must be distributed over the same grid");
    if( chi.Height() != 1 || chi.Width() != 1 )
        throw std::logic_error("chi must be a scalar");
    if( x.Height() != 1 )
        throw std::logic_error("x must be a row vector");
    if( chi.Grid().Row() != chi.ColAlignment() )
        throw std::logic_error("Reflecting with incorrect row of processes");
    if( x.Grid().Row() != x.ColAlignment() )
        throw std::logic_error("Reflecting with incorrect row of processes");
#endif
    const Grid& grid = x.Grid();
    mpi::Comm rowComm = grid.RowComm();
    const int gridCol = grid.Col();
    const int gridWidth = grid.Width();
    const int rowAlignment = chi.RowAlignment();

    std::vector<R> localNorms(gridWidth);
    R localNorm = Nrm2( x.LockedMatrix() ); 
    mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm );
    R norm = blas::Nrm2( gridWidth, &localNorms[0], 1 );

    if( norm == 0 )
    {
        if( gridCol == rowAlignment )
            chi.SetLocal(0,0,-chi.GetLocal(0,0));
#ifndef RELEASE
        PopCallStack();
#endif
        return R(2);
    }

    R alpha;
    if( gridCol == rowAlignment )
        alpha = chi.GetLocal(0,0);
    mpi::Broadcast( &alpha, 1, rowAlignment, rowComm );

    R beta;
    if( alpha <= 0 )
        beta = lapack::SafeNorm( alpha, norm );
    else
        beta = -lapack::SafeNorm( alpha, norm );

    const R one = 1;
    const R safeMin = lapack::MachineSafeMin<R>();
    const R epsilon = lapack::MachineEpsilon<R>();
    const R safeInv = safeMin/epsilon;
    int count = 0;
    if( Abs(beta) < safeInv )
    {
        R invOfSafeInv = one/safeInv;
        do
        {
            ++count;
            Scale( invOfSafeInv, x );
            alpha *= invOfSafeInv;
            beta *= invOfSafeInv;
        } while( Abs(beta) < safeInv );

        localNorm = Nrm2( x.LockedMatrix() );
        mpi::AllGather( &localNorm, 1, &localNorms[0], 1, rowComm );
        norm = blas::Nrm2( gridWidth, &localNorms[0], 1 );
        if( alpha <= 0 )
            beta = lapack::SafeNorm( alpha, norm );
        else
            beta = -lapack::SafeNorm( alpha, norm );
    }

    R tau = (beta-alpha)/beta;
    Scale( one/(alpha-beta), x );

    for( int j=0; j<count; ++j )
        beta *= safeInv;
    if( gridCol == rowAlignment )
        chi.SetLocal(0,0,beta);
        
#ifndef RELEASE
    PopCallStack();
#endif
    return tau;
}
Beispiel #30
0
inline void
internal::TrmmLUNC
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T,MC,MR>& U,
                 DistMatrix<T,MC,MR>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmLUNC");
    if( U.Grid() != X.Grid() )
        throw std::logic_error
        ("U and X must be distributed over the same grid");
    if( U.Height() != U.Width() || U.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<T,MC,MR> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<T,MC,MR> XT(g),  X0(g),
                        XB(g),  X1(g),
                                X2(g);

    // Temporary distributions
    DistMatrix<T,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<T,STAR,MC  > U12_STAR_MC(g);
    DistMatrix<T,STAR,VR  > X1_STAR_VR(g);
    DistMatrix<T,MR,  STAR> D1Trans_MR_STAR(g);
    DistMatrix<T,MR,  MC  > D1Trans_MR_MC(g);
    DistMatrix<T,MC,  MR  > D1(g);

    // Start the algorithm
    Scal( alpha, X );
    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionDown
    ( X, XT,
         XB, 0 );
    while( XB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,   U00, /**/ U01, U02,
         /*************/  /******************/
               /**/        U10, /**/ U11, U12,
          UBL, /**/ UBR,   U20, /**/ U21, U22 );

        RepartitionDown
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );

        U12_STAR_MC.AlignWith( X2 );
        D1Trans_MR_STAR.AlignWith( X1 );
        D1Trans_MR_MC.AlignWith( X1 );
        D1.AlignWith( X1 );
        D1Trans_MR_STAR.ResizeTo( X1.Width(), X1.Height() );
        D1.ResizeTo( X1.Height(), X1.Width() );
        //--------------------------------------------------------------------//
        X1_STAR_VR = X1;
        U11_STAR_STAR = U11;
        internal::LocalTrmm
        ( LEFT, UPPER, NORMAL, diag, (T)1, U11_STAR_STAR, X1_STAR_VR );
        X1 = X1_STAR_VR;
 
        U12_STAR_MC = U12;
        internal::LocalGemm
        ( TRANSPOSE, TRANSPOSE, (T)1, X2, U12_STAR_MC, (T)0, D1Trans_MR_STAR );
        D1Trans_MR_MC.SumScatterFrom( D1Trans_MR_STAR );
        Transpose( D1Trans_MR_MC.LocalMatrix(), D1.LocalMatrix() );
        Axpy( (T)1, D1, X1 );
       //--------------------------------------------------------------------//
        D1.FreeAlignments();
        D1Trans_MR_MC.FreeAlignments();
        D1Trans_MR_STAR.FreeAlignments();
        U12_STAR_MC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        SlidePartitionDown
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}