Esempio n. 1
0
inline void
HemmLUC
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::HemmLUC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AColPan(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  ARowPan(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);
    DistMatrix<T> CT(g),  C0(g),  CAbove(g),
                  CB(g),  C1(g),  CBelow(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> AColPan_MC_STAR(g);
    DistMatrix<T,STAR,MC  > ARowPan_STAR_MC(g);
    DistMatrix<T,MR,  STAR> B1Adj_MR_STAR(g);

    B1Adj_MR_STAR.AlignWith( C );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        LockedView1x2( ARowPan, A11, A12 );
        LockedView2x1
        ( AColPan, A01,
                   A11 );

        View2x1
        ( CAbove, C0,
                  C1 );
        View2x1
        ( CBelow, C1,
                  C2 );

        AColPan_MC_STAR.AlignWith( CAbove );
        ARowPan_STAR_MC.AlignWith( CBelow );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR = AColPan;
        ARowPan_STAR_MC = ARowPan;
        MakeTrapezoidal( RIGHT, UPPER, 0, AColPan_MC_STAR );
        MakeTrapezoidal( LEFT,  UPPER, 1, ARowPan_STAR_MC );

        B1Adj_MR_STAR.AdjointFrom( B1 );

        LocalGemm
        ( NORMAL, ADJOINT, 
          alpha, AColPan_MC_STAR, B1Adj_MR_STAR, T(1), CAbove );

        LocalGemm
        ( ADJOINT, ADJOINT, 
          alpha, ARowPan_STAR_MC, B1Adj_MR_STAR, T(1), CBelow );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR.FreeAlignments();
        ARowPan_STAR_MC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 2
0
inline void
HemmRUA
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::HemmRUA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    DistMatrix<T>
        BT(g),  B0(g),
        BB(g),  B1(g),
                B2(g);
    DistMatrix<T>
        CT(g),  C0(g),
        CB(g),  C1(g),
                C2(g);

    DistMatrix<T,MR,  STAR> B1Adj_MR_STAR(g);
    DistMatrix<T,VC,  STAR> B1Adj_VC_STAR(g);
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);
    DistMatrix<T,MC,  STAR> Z1Adj_MC_STAR(g);
    DistMatrix<T,MR,  STAR> Z1Adj_MR_STAR(g);
    DistMatrix<T,MR,  MC  > Z1Adj_MR_MC(g);
    DistMatrix<T> Z1Adj(g);

    B1Adj_MR_STAR.AlignWith( A );
    B1Adj_VC_STAR.AlignWith( A );
    B1_STAR_MC.AlignWith( A );
    Z1Adj_MC_STAR.AlignWith( A );
    Z1Adj_MR_STAR.AlignWith( A );

    Matrix<T> Z1Local;

    Scale( beta, C );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CT.Height() < C.Height() )
    {
        LockedRepartitionDown
        ( BT,  B0, 
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        Z1Adj_MR_MC.AlignWith( C1 );
        Zeros( C1.Width(), C1.Height(), Z1Adj_MC_STAR );
        Zeros( C1.Width(), C1.Height(), Z1Adj_MR_STAR );
        //--------------------------------------------------------------------//
        B1Adj_MR_STAR.AdjointFrom( B1 );
        B1Adj_VC_STAR = B1Adj_MR_STAR;
        B1_STAR_MC.AdjointFrom( B1Adj_VC_STAR );
        LocalSymmetricAccumulateRU
        ( ADJOINT, alpha, A, B1_STAR_MC, B1Adj_MR_STAR, 
          Z1Adj_MC_STAR, Z1Adj_MR_STAR );

        Z1Adj.SumScatterFrom( Z1Adj_MC_STAR );
        Z1Adj_MR_MC = Z1Adj;
        Z1Adj_MR_MC.SumScatterUpdate( T(1), Z1Adj_MR_STAR );
        Adjoint( Z1Adj_MR_MC.LockedLocalMatrix(), Z1Local );
        Axpy( T(1), Z1Local, C1.LocalMatrix() );
        //--------------------------------------------------------------------//
        Z1Adj_MR_MC.FreeAlignments();

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}