Ejemplo n.º 1
0
PT_TREE_T ATL_Strmm
(
   const PT_LVL3_TYPE_T       * PLVL3,
   const unsigned int         NODE,
   const unsigned int         NTHREADS,
   pthread_attr_t             * ATTR,
   const int                  NB,
   const enum ATLAS_SIDE      SIDE,
   const enum ATLAS_UPLO      UPLO,
   const enum ATLAS_TRANS     TRANS,
   const enum ATLAS_DIAG      DIAG,
   const int                  M,
   const int                  N,
   const void                 * ALPHA,
   const void                 * A,
   const int                  LDA,
   void                       * B,
   const int                  LDB
)
{
/*
 * .. Local Variables ..
 */
#if 0
   PT_TREE_T                  left, right, root = NULL;
   PT_TRMM_ARGS_T             * a_mm = NULL;
   size_t                     size;
   unsigned int               next = ( NODE << 1 ), nt1, nt2;
   int                        incA, incB, m1, m2, n1, n2;
#else
   PT_TREE_T                  left, right, tree = NULL;
   PT_TRMM_ARGS_T             * a_mm = NULL;
   double                     rm, rn;
   DIM_1DSPLIT_T              splitM, splitN;
   size_t                     size;
   unsigned int               next = ( NODE << 1 ), ntm1, ntm2, ntn1, ntn2;
   int                        incB, m1, m2, n1, n2;
#endif
/* ..
 * .. Executable Statements ..
 *
 */
   if( NTHREADS < 1 ) return( tree );

   size = PLVL3->size;

   if( SIDE == AtlasLeft )
   {
#if 0
      if( M > N )
      {
         ATL_gesplit( NTHREADS, M, NB, &nt1, &nt2, &m1, &m2 );
         incA  = m1 + m1 * LDA;
         incB  = m1;
         root = PLVL3->pttrmm( NTHREADS, ATTR, SIDE, UPLO, TRANS, DIAG,
                               m2, N, ALPHA, Mvpt3( A, incA, size ), LDA,
                               Mvpt3( B, incB, size ), LDB );
         ATL_join_tree  ( root );
         ATL_free_tree  ( root );
         root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, AtlasNoTrans,
                               m2, N, m1, ALPHA,
                               Mvpt3( A, m1, size ), LDA,
                               B, LDB, PLVL3->one,
                               Mvpt3( B, m1, size ), LDB );
         ATL_join_tree  ( root );
         ATL_free_tree  ( root );
         root = PLVL3->pttrmm( NTHREADS, ATTR, SIDE, UPLO, TRANS, DIAG,
                               m1, N, ALPHA, A, LDA, B, LDB );
         ATL_join_tree  ( root );
         ATL_free_tree  ( root );

         return( NULL );
      }
      else
      {
         ATL_gesplit( NTHREADS, N, NB, &nt1, &nt2, &n1, &n2 );
         left  = ATL_Strmm( PLVL3, next+1, nt1, ATTR, NB, SIDE, UPLO, TRANS,
                            DIAG, M, n1, ALPHA, A, LDA, B, LDB );
         incB  = n1 * LDB;
         right = ATL_Strmm( PLVL3, next+2, nt2, ATTR, NB, SIDE, UPLO, TRANS,
                            DIAG, M, n2, ALPHA, A, LDA, Mvpt3( B, incB, size ),
                            LDB );
      }
#else
      splitN = ATL_1dsplit( NTHREADS, N, NB, &ntn1, &ntn2, &n1, &n2, &rn );

      if( splitN == Atlas1dSplit )
      {
         left  = ATL_Strmm( PLVL3, next+1, ntn1, ATTR, NB, SIDE, UPLO, TRANS,
                            DIAG, M, n1, ALPHA, A, LDA, B, LDB );
         incB  = n1 * LDB;
         right = ATL_Strmm( PLVL3, next+2, ntn2, ATTR, NB, SIDE, UPLO, TRANS,
                            DIAG, M, n2, ALPHA, A, LDA, Mvpt3( B, incB,
                            size ), LDB );
         tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
      }
      else
      {
         a_mm = (PT_TRMM_ARGS_T *)malloc( sizeof( PT_TRMM_ARGS_T ) );
         ATL_assert( a_mm != NULL );
         a_mm->si = SIDE; a_mm->up = UPLO; a_mm->tr = TRANS; a_mm->di = DIAG;
         a_mm->m  = M;    a_mm->n  = N;    a_mm->al = ALPHA;
         a_mm->a  = A;    a_mm->la = LDA;  a_mm->b  = B;     a_mm->lb = LDB;
         tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PLVL3->trmm0,
                               (void *)(a_mm) );
      }
#endif
   }
   else
   {
      splitM = ATL_1dsplit( NTHREADS, M, NB, &ntm1, &ntm2, &m1, &m2, &rm );

      if( splitM == Atlas1dSplit )
      {
         left  = ATL_Strmm( PLVL3, next+1, ntm1, ATTR, NB, SIDE, UPLO, TRANS,
                            DIAG, m1, N, ALPHA, A, LDA, B, LDB );
         incB  = m1;
         right = ATL_Strmm( PLVL3, next+2, ntm2, ATTR, NB, SIDE, UPLO, TRANS,
                            DIAG, m2, N, ALPHA, A, LDA, Mvpt3( B, incB,
                            size ), LDB );
         tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
      }
      else
      {
         a_mm = (PT_TRMM_ARGS_T *)malloc( sizeof( PT_TRMM_ARGS_T ) );
         ATL_assert( a_mm != NULL );
         a_mm->si = SIDE; a_mm->up = UPLO; a_mm->tr = TRANS; a_mm->di = DIAG;
         a_mm->m  = M;    a_mm->n  = N;    a_mm->al = ALPHA;
         a_mm->a  = A;    a_mm->la = LDA;  a_mm->b  = B;     a_mm->lb = LDB;
         tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PLVL3->trmm0,
                               (void *)(a_mm) );
      }
   }

   return( tree );
/*
 * End of ATL_Strmm
 */
}
Ejemplo n.º 2
0
PT_TREE_T ATL_Ssyr2k
(
   const PT_LVL3_TYPE_T       * PLVL3,
   const unsigned int         NODE,
   const unsigned int         NTHREADS,
   pthread_attr_t             * ATTR,
   const int                  NB,
   const enum ATLAS_UPLO      UPLO,
   const enum ATLAS_TRANS     TRANS,
   const enum ATLAS_TRANS     TGEMM,
   const int                  M,
   const int                  N,
   const int                  K,
   const int                  L,
   const void                 * ALPHA,
   const void                 * ALPHC,
   const void                 * A,
   const int                  LDA,
   const void                 * B,
   const int                  LDB,
   const void                 * BETA,
   void                       * C,
   const int                  LDC
)
{
/*
 * .. Local Variables ..
 */
   PT_TREE_T                  left, right, tree = NULL, root;
   PT_SYR2K_ARGS_T            * a_syr2k  = NULL;
#if 1
   double                     rn;
   DIM_1DSPLIT_T              splitN;
   size_t                     size;
   unsigned int               next = ( NODE << 1 ), nt1, nt2;
   int                        failed, incA, incB, incC, mnk1, mnk2;
#else
   size_t                     size;
   DIM_TZSPLIT_T              split = AtlasTzNoSplit;
   unsigned int               next = ( NODE << 1 ), nt1, nt2;
   int                        failed, incA, incB, incC, mnk1, mnk2;
#endif
/* ..
 * .. Executable Statements ..
 *
 */
   if( NTHREADS < 1 ) return( tree );

   size  = PLVL3->size;

   if( M < 0 || N < 0 ) return( tree ); /* temporary */

#if 1
   if( TGEMM == AtlasTrans )
   {
      failed = PLVL3->ptsyr2k0( NTHREADS, ATTR, UPLO, TRANS, TGEMM, K, L,
                                ALPHA, A, LDA, B, LDB, BETA, C, LDC );
   }
   else
   {
      failed = PLVL3->pther2k0( NTHREADS, ATTR, UPLO, TRANS, TGEMM, K, L,
                                ALPHA, A, LDA, B, LDB, BETA, C, LDC );
   }

   if( failed )
   {
      splitN = ATL_1dsplit( NTHREADS, K, NB, &nt1, &nt2, &mnk1, &mnk2, &rn );

      if( splitN == Atlas1dSplit )
      {
         left  = ATL_Ssyr2k( PLVL3, next+1, NTHREADS, ATTR, NB, UPLO, TRANS,
                             TGEMM, 0, 0, mnk1, L, ALPHA, ALPHC, A, LDA,
                             B, LDB, BETA, C, LDC );

         if( UPLO == AtlasLower )
         {
            if( TRANS == AtlasNoTrans )
            {
               incA  = mnk1; incB  = mnk1; incC  = mnk1;

               root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk2,
                                     mnk1, L, ALPHA, Mvpt3( A, incA, size ),
                                     LDA, B, LDB, BETA, Mvpt3( C, incC, size ),
                                     LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk2,
                                     mnk1, L, ALPHC, Mvpt3( B, incB, size ),
                                     LDB, A, LDA, PLVL3->one, Mvpt3( C, incC,
                                     size ), LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               incC += mnk1 * LDC;
            }
            else
            {
               incA  = mnk1 * LDA; incB  = mnk1 * LDB; incC  = mnk1;

               root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk2,
                                     mnk1, L, ALPHA, Mvpt3( A, incA, size ),
                                     LDA, B, LDB, BETA, Mvpt3( C, incC, size ),
                                     LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk2,
                                     mnk1, L, ALPHC, Mvpt3( B, incB, size ),
                                     LDB, A, LDA, PLVL3->one, Mvpt3( C, incC,
                                     size ), LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               incC += mnk1 * LDC;
            }

         }
         else
         {
            if( TRANS == AtlasNoTrans )
            {
               incA  = mnk1; incB  = mnk1; incC  = mnk1 * LDC;

               root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk1,
                                     mnk2, L, ALPHA, A, LDA, Mvpt3( B, incB,
                                     size ), LDB, BETA, Mvpt3( C, incC, size ),
                                     LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk1,
                                     mnk2, L, ALPHC, B, LDB, Mvpt3( A, incA,
                                     size ), LDA, PLVL3->one, Mvpt3( C, incC,
                                     size ), LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               incC += mnk1;
            }
            else
            {
               incA  = mnk1 * LDA; incB  = mnk1 * LDB; incC  = mnk1 * LDC;

               root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk1,
                                     mnk2, L, ALPHA, A, LDA, Mvpt3( B, incB,
                                     size ), LDB, BETA, Mvpt3( C, incC, size ),
                                     LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk1,
                                     mnk2, L, ALPHC, B, LDB, Mvpt3( A, incA,
                                     size ), LDA, PLVL3->one, Mvpt3( C, incC,
                                     size ), LDC );
               ATL_join_tree  ( root );
               ATL_free_tree  ( root );

               incC += mnk1;
            }
         }

         right = ATL_Ssyr2k( PLVL3, next+2, NTHREADS, ATTR, NB, UPLO, TRANS,
                             TGEMM, 0, 0, mnk2, L, ALPHA, ALPHC,
                             Mvpt3( A, incA, size ), LDA,
                             Mvpt3( B, incB, size ), LDB, BETA,
                             Mvpt3( C, incC, size ), LDC );
      }
      else
      {
         a_syr2k = (PT_SYR2K_ARGS_T *)malloc( sizeof( PT_SYR2K_ARGS_T ) );
         ATL_assert( a_syr2k != NULL );
         a_syr2k->up = UPLO;  a_syr2k->tr = TRANS; a_syr2k->m  = 0;
         a_syr2k->n  = 0;     a_syr2k->k  = K;     a_syr2k->l  = L;
         a_syr2k->al = ALPHA; a_syr2k->ac = ALPHA; a_syr2k->a  = A;
         a_syr2k->la = LDA;   a_syr2k->b  = B;     a_syr2k->lb = LDB;
         a_syr2k->be = BETA;  a_syr2k->c  = C;     a_syr2k->lc = LDC;

         tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL,
                               ( TGEMM == AtlasTrans ? PLVL3->syr2k0 :
                               PLVL3->her2k0 ), (void *)(a_syr2k) );
      }
   }

   return( tree );
#else
   split = ATL_tzsplit( UPLO, NTHREADS, M, N, K, NB, &nt1, &nt2,
                        &mnk1, &mnk2 );

   if( split == AtlasSplitKrow )
   {
      if( UPLO == AtlasLower )
      {
         left  = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS,
                             TGEMM, 0, N, mnk1, L, ALPHA, ALPHC, A, LDA,
                             B, LDB, BETA, C, LDC );
         incA  = ( TRANS == AtlasNoTrans ? mnk1 : 0 );
         incB  = ( TRANS == AtlasNoTrans ? mnk1 : 0 );
         incC  = mnk1;
         right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
                             TGEMM, M, N + mnk1, mnk2, L, ALPHA, ALPHC,
                             Mvpt3( A, incA, size ), LDA,
                             Mvpt3( B, incB, size ), LDB, BETA,
                             Mvpt3( C, incC, size ), LDC );
      }
      else
      {
         left  = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS,
                             TGEMM, M, N + mnk2, mnk1, L, ALPHA, ALPHC,
                             A, LDA, B, LDB, BETA, C, LDC );
         incA  = ( TRANS == AtlasNoTrans ? mnk1 : mnk1 * LDA );
         incB  = ( TRANS == AtlasNoTrans ? mnk1 : mnk1 * LDB );
         incC  = mnk1 * ( LDC + 1 );
         right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
                             TGEMM, 0, N, mnk2, L, ALPHA, ALPHC,
                             Mvpt3( A, incA, size ), LDA,
                             Mvpt3( B, incB, size ), LDB, BETA,
                             Mvpt3( C, incC, size ), LDC );
      }
      tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
   }
   else if( split == AtlasSplitKcol )
   {
      if( UPLO == AtlasLower )
      {
         left  = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS,
                             TGEMM, M + mnk2, N, mnk1, L, ALPHA, ALPHC,
                             A, LDA, B, LDB, BETA, C, LDC );
         incA  = ( TRANS == AtlasNoTrans ? mnk1 : ( N + mnk1 ) * LDA );
         incB  = ( TRANS == AtlasNoTrans ? mnk1 : ( N + mnk1 ) * LDB );
         incC  = mnk1 + ( N + mnk1 ) * LDC;
         right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
                             TGEMM, M, 0, mnk2, L, ALPHA, ALPHC,
                             Mvpt3( A, incA, size ), LDA,
                             Mvpt3( B, incB, size ), LDB, BETA,
                             Mvpt3( C, incC, size ), LDC );
      }
      else
      {
         left  = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS,
                             TGEMM, M, 0, mnk1, L, ALPHA, ALPHC, A, LDA,
                             B, LDB, BETA, C, LDC );
         incA  = ( TRANS == AtlasNoTrans ? 0 : mnk1 * LDA );
         incB  = ( TRANS == AtlasNoTrans ? 0 : mnk1 * LDB );
         incC  = mnk1 * LDC;
         right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS,
                             TGEMM, M + mnk1, N, mnk2, L, ALPHA, ALPHC,
                             Mvpt3( A, incA, size ), LDA,
                             Mvpt3( B, incB, size ), LDB, BETA,
                             Mvpt3( C, incC, size ), LDC );
      }
      tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
   }
   else
   {
      a_syr2k = (PT_SYR2K_ARGS_T *)malloc( sizeof( PT_SYR2K_ARGS_T ) );
      ATL_assert( a_syr2k != NULL );
      a_syr2k->up = UPLO;  a_syr2k->tr = TRANS; a_syr2k->m  = M;
      a_syr2k->n  = N;     a_syr2k->k  = K;     a_syr2k->l  = L;
      a_syr2k->al = ALPHA; a_syr2k->ac = ALPHA; a_syr2k->a  = A;
      a_syr2k->la = LDA;   a_syr2k->b  = B;     a_syr2k->lb = LDB;
      a_syr2k->be = BETA;  a_syr2k->c  = C;     a_syr2k->lc = LDC;

      tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL,
                            ( TGEMM == AtlasTrans ? PLVL3->syr2k0 :
                            PLVL3->her2k0 ), (void *)(a_syr2k) );
   }

   return( tree );
#endif
/*
 * End of ATL_Ssyr2k
 */
}
Ejemplo n.º 3
0
PT_TREE_T ATL_Sgezero
(
   const PT_MISC_TYPE_T       * PTYPE,
   const unsigned int         NODE,
   const unsigned int         THREADS,
   pthread_attr_t             * ATTR,
   const int                  NB,
   const int                  M,
   const int                  N,
   void                       * A,
   const int                  LDA
)
{
/*
 * Purpose
 * =======
 *
 * ATL_Sgezero recursively  builds  the binary tasks tree to be threaded
 * for the matrix zero operation.
 *
 * Arguments
 * =========
 *
 * PTYPE   (input)                       const PT_MISC_TYPE_T *
 *         On entry, PTYPE  points  to the data structure containing the
 *         type information.
 *
 * NODE    (input)                       const unsigned int
 *         On entry, NODE specifies the current node number.
 *
 * THREADS (input)                       const unsigned int
 *         On entry, THREADS  specifies the number of threads to be used
 *         for the current operation.
 *
 * ATTR    (input)                       pthread_attr_t *
 *         On entry, ATTR  specifies  the  thread attribute object to be
 *         used for the node functions to be threaded.
 *
 * NB      (input)                       const int
 *         On entry, NB  specifies  the  blocksize  to  be  used for the
 *         problem size partitioning.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   PT_TREE_T                  left, right, tree = NULL;
   PT_GEZERO_ARGS_T           * a_zero = NULL;
   double                     rm, rn;
   DIM_1DSPLIT_T              splitM, splitN;
   size_t                     size;
   int                        split, m1, m2, n1, n2;
   unsigned int               next = (NODE << 1), ntm1, ntm2, ntn1, ntn2;
/* ..
 * .. Executable Statements ..
 *
 */
   if( THREADS < 1 ) return( tree );

   size   = PTYPE->size;
   splitM = ATL_1dsplit( THREADS, M, NB, &ntm1, &ntm2, &m1, &m2, &rm );
   splitN = ATL_1dsplit( THREADS, N, NB, &ntn1, &ntn2, &n1, &n2, &rn );

   if( ( splitM == Atlas1dSplit ) && ( splitN == Atlas1dSplit ) )
   { split = ( rm < rn ? SPLIT_M : SPLIT_N ); }
   else if( splitM == Atlas1dSplit ) { split = SPLIT_M; }
   else if( splitN == Atlas1dSplit ) { split = SPLIT_N; }
   else                              { split = NOSPLIT; }

   if(      split == SPLIT_N )
   {
      left  = ATL_Sgezero( PTYPE, next+1, ntn1, ATTR, NB, M, n1,
                           A, LDA );
      right = ATL_Sgezero( PTYPE, next+2, ntn2, ATTR, NB, M, n2,
                           Mvptm( A, n1*LDA, size ), LDA );
      tree  = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
   }
   else if( split == SPLIT_M )
   {
      left  = ATL_Sgezero( PTYPE, next+1, ntm1, ATTR, NB, m1, N,
                           A, LDA );
      right = ATL_Sgezero( PTYPE, next+2, ntm2, ATTR, NB, m2, N,
                           Mvptm( A, m1, size ), LDA );
      tree  = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL );
   }
   else
   {
      a_zero = (PT_GEZERO_ARGS_T *)malloc( sizeof( PT_GEZERO_ARGS_T ) );
      ATL_assert( a_zero != NULL );
      a_zero->m = M; a_zero->n = N; a_zero->a = A; a_zero->la = LDA;
      tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PTYPE->fun,
                            (void *)(a_zero) );
   }

   return( tree );
/*
 * End of ATL_Sgezero
 */
}