コード例 #1
0
static void task_xddot_xy_work_blocking( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskXY * const t = (struct TaskXY *) arg ;

        const unsigned block_size   = t->block ;
        const unsigned block_start  = block_size * p_rank ;
        const unsigned block_stride = block_size * p_size ;

        const double * const x_end = t->x_beg + t->number ;
        const double * x = t->x_beg + block_start ;
        const double * y = t->x_beg + block_start ;

        double s_local[4] = { 0 , 0 , 0 , 0 };

        for ( ; x < x_end ; x += block_stride , y += block_stride ) {
            const unsigned n = x_end - x ;
            xddot( s_local , ( block_size < n ? block_size : n ) , x , y );
        }

        {
            double * const xy_sum = t->xy_sum + 4 * p_rank ;

            xy_sum[0] = s_local[0] ;
            xy_sum[1] = s_local[1] ;
            xy_sum[2] = s_local[2] ;
            xy_sum[3] = s_local[3] ;
        }
    }
}
コード例 #2
0
static void task_xddot_xy_work( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskXY * const t = (struct TaskXY *) arg ;

        const unsigned n_total = t->number ;
        const unsigned n_begin = ( n_total * ( p_rank     ) ) / p_size ;
        const unsigned n_end   = ( n_total * ( p_rank + 1 ) ) / p_size ;
        const unsigned n_local = ( n_end - n_begin );

        const double * const x = t->x_beg + n_begin ;
        const double * const y = t->y_beg + n_begin ;

        double s_local[4] = { 0 , 0 , 0 , 0 };

        xddot( s_local , n_local , x , y );

        {
            double * const xy_sum = t->xy_sum + 4 * p_rank ;

            xy_sum[0] = s_local[0] ;
            xy_sum[1] = s_local[1] ;
            xy_sum[2] = s_local[2] ;
            xy_sum[3] = s_local[3] ;
        }
    }
}
コード例 #3
0
static void task_ddot_xy_work_blocking( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskXY * const t = (struct TaskXY *) arg ;

        const unsigned block_size   = t->block ;
        const unsigned block_start  = block_size * p_rank ;
        const unsigned block_stride = block_size * p_size ;

        const double * const x_end = t->x_beg + t->number ;
        const double * x = t->x_beg + block_start ;
        const double * y = t->x_beg + block_start ;

        double local = 0.0 ;

        for ( ; x < x_end ; x += block_stride , y += block_stride ) {
            const unsigned n = x_end - x ;
            local += ddot( ( block_size < n ? block_size : n ) , x , y );
        }

        t->xy_sum[ p_rank ] = local ;
    }
}
コード例 #4
0
static void task_xddot_x_work( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        double partial[2] = { 0 , 0 };
        struct TaskX * const t  = (struct TaskX *) arg ;

        {
            const unsigned p_next   = p_rank + 1 ;
            const unsigned n_global = t->number ;
            const unsigned n_begin  = ( ( n_global * p_rank ) / p_size );
            const unsigned n_local  = ( ( n_global * p_next ) / p_size ) - n_begin ;

            dot1_unroll( partial , t->x_beg + n_begin , n_local );
        }

        {
            TPI_Lock(pool,0);
            {
                double * const v = t->x_sum ;
                SUM_ADD( v , partial[0] );
                SUM_ADD( v , partial[1] );
                TPI_Unlock(pool,0);
            }
        }
    }
}
コード例 #5
0
static void task_norm1_work( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskX * const t  = (struct TaskX *) arg ;

        const unsigned p_next = p_rank + 1 ;
        const unsigned n = t->number ;
        const double * const xb = t->x_beg + ( n * p_rank ) / p_size ;
        const double * const xe = t->x_beg + ( n * p_next ) / p_size ;
        double * const v  = t->x_sum ;

        double partial[2] = { 0 , 0 };

        norm1( partial , xb , xe );

        TPI_Lock( pool , 0 );

        SUM_ADD( v , partial[0] );
        SUM_ADD( v , partial[1] );

        TPI_Unlock( pool , 0 );
    }
}
コード例 #6
0
static void task_sum_work( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskX * const t  = (struct TaskX *) arg ;

        const unsigned p_next = p_rank + 1 ;
        const unsigned n = t->number ;
        const double * const xb = t->x_beg + ( n * p_rank ) / p_size ;
        const double * const xe = t->x_beg + ( n * p_next ) / p_size ;
        double * const v  = t->x_sum ;

        double partial[4] = { 0 , 0 , 0 , 0 };

        add_array( partial , xb , xe );

        TPI_Lock( pool , 0 );

        xdsum_add_dsum( v , partial );

        TPI_Unlock( pool , 0 );
    }
}
コード例 #7
0
ファイル: txblas_axpby.c プロジェクト: 00liujj/trilinos
static void task_axpby_work_steal( void * arg , TPI_ThreadPool pool )
{
  enum { BLOCK = UNROLL * 128 };
  int p_size ;
  int p_rank ;

  if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

    struct TaskXY * const t = (struct TaskXY *) arg ;

    const double   a = t->alpha ;
    const double   b = t->beta ;
    const unsigned n = t->number ;
    const double * const x = t->x_beg ;
          double * const y = t->y_beg ;

    unsigned * const all_iter = t->iter ;
    unsigned * const my_iter  = all_iter + p_size ;

    {
      unsigned i ;
      for ( i = 0 ; i < n ; ) {
        TPI_Lock( pool , p_rank );
        i = *my_iter * BLOCK ; *my_iter += p_size ;
        TPI_Unlock( pool , p_rank );
        if ( i < n ) {
          const unsigned len = BLOCK < n - i ? BLOCK : n - i ;
          daxpby_work( len, a, x + i, b, y + i );
        }
      }
    }

    /* Finished my work, steal work from someone else */

    {
    int working ; 
    int p = 0 ;
    for ( working = 1 ; working ; ) {
      working = 0 ;
      for ( p = 0 ; p < p_size ; ++p ) {
        if ( all_iter[p] * BLOCK < n ) {
          if ( ! TPI_Trylock( pool , p ) ) {
            const unsigned i = all_iter[p] * BLOCK ;
            all_iter[p] += p_size ;
            TPI_Unlock( pool , p );
            if ( i < n ) {
              const unsigned len = BLOCK < n - i ? BLOCK : n - i ;
              daxpby_work( len, a, x + i, b, y + i );
            }
          }
          working = 1 ;
        }
      }
    }
    }
  }
}
コード例 #8
0
ファイル: txblas_cr4_mxv.c プロジェクト: gitter-badger/quinoa
static void txblas_task_cr_mxv( void * data , TPI_ThreadPool pool )
{
  int p_size , p_rank ;

  if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

    txblasTask_cr_Matrix * const t = (txblasTask_cr_Matrix*) data ;

    const unsigned beg_row = ( t->number_row * ( p_rank     ) ) / p_size ;
    const unsigned end_row = ( t->number_row * ( p_rank + 1 ) ) / p_size ;

    const unsigned * const pc_end = t->pc_begin + end_row ;
    const unsigned * const ia_beg = t->ia_begin ;
    const double   * const a_beg  = t->a_begin ;
    const double   * const x_beg  = t->x_begin ;
          double   *       y      = t->y_begin + beg_row ;

    const unsigned * pc = t->pc_begin + beg_row ;
    const unsigned * ia = ia_beg + *pc ;
    const double   * a  = a_beg  + *pc ;

    while ( pc < pc_end ) {
      double ytmp = 0 ;

      const unsigned * const ia_end = ia_beg + *++pc ;

      {
        enum { STRIDE = 4 };

        const unsigned * const ia_blk = ia_end - ( ia_end - ia ) % STRIDE ;

        for ( ; ia < ia_blk ; ia += STRIDE , a += STRIDE ) {
          ytmp += a[0] * x_beg[ ia[0] ] +
                  a[1] * x_beg[ ia[1] ] +
                  a[2] * x_beg[ ia[2] ] +
                  a[3] * x_beg[ ia[3] ] ;
        }
      }

      for ( ; ia < ia_end ; ++ia , ++a ) {
        ytmp += *a * x_beg[ *ia ];
      }

      *y++ = ytmp ;
    }
  }
}
コード例 #9
0
ファイル: txblas_axpby.c プロジェクト: 00liujj/trilinos
static void task_axpby_work( void * arg , TPI_ThreadPool pool )
{
  int p_size ;
  int p_rank ;

  if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

    struct TaskXY * const t = (struct TaskXY *) arg ;

    const int      n_rem = t->number % p_size ;
    const unsigned n_num = t->number / p_size ;
    const unsigned n_beg = p_rank * n_num + ( p_rank < n_rem ? p_rank : n_rem );
    const unsigned n_len = n_num + ( p_rank < n_rem ? 1 : 0 );

    daxpby_work( n_len , t->alpha , t->x_beg + n_beg ,
                         t->beta  , t->y_beg + n_beg );
  }
}
コード例 #10
0
static void task_ddot_xy_work( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskXY * const t = (struct TaskXY *) arg ;

        const unsigned n_total = t->number ;
        const unsigned n_begin = ( n_total * ( p_rank     ) ) / p_size ;
        const unsigned n_end   = ( n_total * ( p_rank + 1 ) ) / p_size ;
        const unsigned n_local = ( n_end - n_begin );

        const double * x = t->x_beg + n_begin ;
        const double * y = t->y_beg + n_begin ;

        t->xy_sum[ p_rank ] = ddot( n_local , x , y );
    }
}
コード例 #11
0
ファイル: txblas_axpby.c プロジェクト: 00liujj/trilinos
static void task_axpby_work_block( void * arg , TPI_ThreadPool pool )
{
  enum { BLOCK = UNROLL * 1024 };
  int p_size ;
  int p_rank ;

  if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

    struct TaskXY * const t = (struct TaskXY *) arg ;

    const unsigned inc = BLOCK * p_size ;
    const unsigned num = t->number ;

    if ( 1 < p_size && 2 * inc < num ) { /* More than two blocks of work */
      const double a = t->alpha ;
      const double b = t->beta ;
      const double * const x = t->x_beg ;
            double * const y = t->y_beg ;

      int len ;

      unsigned i ;
      for ( i = BLOCK * p_rank ; 0 < ( len = num - i ) ; i += inc ) {
        daxpby_work( ( BLOCK < len ? BLOCK : len ) , a , x + i , b , y + i );
      }
    }
    else { /* Even partitioning */
      const int      n_rem = num % p_size ;
      const unsigned n_num = num / p_size ;
      const unsigned n_beg = p_rank*n_num + (p_rank < n_rem ? p_rank : n_rem);
      const unsigned n_len = n_num + ( p_rank < n_rem ? 1 : 0 );

      daxpby_work( n_len , t->alpha , t->x_beg + n_beg ,
                           t->beta ,  t->y_beg + n_beg );
    }
  }
}
コード例 #12
0
ファイル: TPI.hpp プロジェクト: gitter-badger/quinoa
inline
int Rank( ThreadPool pool , int & rank , int & size )
  { return TPI_Rank( pool , & rank , & size ); }