void txddot( double * s , unsigned n , const double * x , const double * y )
{
    int p_size ;
    if ( ! TPI_Size( & p_size ) ) {
        double* tmp;
        const int ntmp = 4 * p_size ;
        tmp = malloc(ntmp * sizeof(double));
        {   struct TaskXY data = { tmp , x , y , n , BLOCKING_SIZE };
            int i ;
            for ( i = 0 ; i < ntmp ; ++i ) {
                tmp[i] = 0 ;
            }
            if ( data.block ) {
                TPI_Run( & task_xddot_xy_work_blocking , & data , 0 );
            }
            else {
                TPI_Run( & task_xddot_xy_work , & data , 0 );
            }
            for ( i = 0 ; i < p_size ; ++i ) {
                xdsum_add_dsum( s , tmp + 4 * i );
            }
        }
        free(tmp);
    }
}
Exemple #2
0
void tdaxpby( unsigned n ,
              double a , const double * x ,
              double b , double * y ,
              int block )
{
  int p_size ;
  TPI_Size( & p_size );

  {
    unsigned *tmp = malloc( p_size );
    struct TaskXY data = { a , b , x , y , n , tmp };
    int i ;
    for ( i = 0 ; i < p_size ; ++i ) { tmp[i] = i ; }
    if ( 0 < block ) {
      TPI_Run( & task_axpby_work_block , & data , 0 );
    }
    else if ( block < 0 ) {
      TPI_Set_lock_size( p_size );
      TPI_Run( & task_axpby_work_steal , & data , 0 );
    }
    else {
      TPI_Run( & task_axpby_work , & data , 0 );
    }
    free(tmp);
  }
}
void txddot1( double * s , unsigned n , const double * x )
{
    struct TaskX data ;
    data.x_sum  = s ;
    data.x_beg  = x ;
    data.number = n ;
    TPI_Set_lock_size( 1 );
    TPI_Run( & task_xddot_x_work , & data , 0 );
}
Exemple #4
0
inline
int Run( Worker & worker, void (Worker::*method)(ThreadPool) , int n )
{
  typedef WorkerMethodHelper<Worker> WM ;

  WM tmp( worker , method );

  return TPI_Run( reinterpret_cast<TPI_parallel_subprogram>(& WM::run),&tmp,n);
}
Exemple #5
0
inline
int Run( Worker & worker, void (Worker::*method)(Work &) ,
         int work_count , int lock_count )
{
  typedef WorkerMethodHelper<Worker> WM ;

  WM tmp( worker , method );

  return TPI_Run( reinterpret_cast<TPI_work_subprogram>(& WM::run),&tmp,work_count,lock_count);
}
void txblas_cr_mxv(
  const unsigned nr  /* Number rows */ ,
  const unsigned pc[] ,
  const unsigned ia[] ,
  const double   a[] ,
  const double   x[] ,  /* Input vector */
        double   y[] )  /* Output vector */
{
  txblasTask_cr_Matrix data = { nr , pc , ia , a , x , y };
  TPI_Run( & txblas_task_cr_mxv , & data , 0 );
}
void tddot( double * s , unsigned n , const double * x , const double * y )
{
    int p_size ;
    if ( ! TPI_Size( & p_size ) ) {
        double* tmp = malloc( p_size * sizeof(double));
        struct TaskXY data = { tmp , x , y , n , BLOCKING_SIZE };
        int i ;
        for ( i = 0 ; i < p_size ; ++i ) {
            tmp[i] = 0 ;
        }
        if ( data.block ) {
            TPI_Run( & task_ddot_xy_work_blocking , & data , 0 );
        }
        else {
            TPI_Run( & task_ddot_xy_work , & data , 0 );
        }
        for ( i = 1 ; i < p_size ; ++i ) {
            tmp[0] += tmp[i] ;
        }
        *s = tmp[0] ;
        free(tmp);
    }
}
Exemple #8
0
void test_tpi_work( const int ntest , const int nthread[] , const int nwork ,
                    const int ntrial )
{
  int * const flags = (int *) malloc( sizeof(int) * nwork );
  int j ;

  fprintf( stdout , "\n\"TEST TPI_Run / TPI_Run_reduce\"\n" );
  fprintf( stdout , "\"#Thread\" , \"#Work\" , \"#Trial\" , \"TPI_Run(avg-msec)\" , \"TPI_Run(stddev-msec)\" , \"TPI_Run_reduce(avg-msec)\" , \"TPI_Run_reduce(stddev-msec)\"\n");

  for ( j = 0 ; j < ntest ; ++j ) {
    const int nth = nthread[j];

    double dt_work_total   = 0.0 ;
    double dt_work_total_2 = 0.0 ;
    double dt_reduce_total    = 0.0 ;
    double dt_reduce_total_2  = 0.0 ;
    int i , k ;

    int result = TPI_Init( nth );

    if ( result != nth ) {
      fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth );
    }

    for ( i = 0 ; i < ntrial ; ++i ) {
      double t , dt ;
      int value = 0 ;

      for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; }

      t = TPI_Walltime();
      TPI_Run( test_work , & flags , nwork , 0 );
      dt = TPI_Walltime() - t ;
      dt_work_total += dt ;
      dt_work_total_2 += dt * dt ;

      for ( k = 0 ; k < nwork && flags[k] ; ++k );

      if ( k < nwork ) {
        fprintf(stderr, "TPI_Run(...) : FAILED at trial %d\n", i );
        abort();
      }

      for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; }

      t = TPI_Walltime();
      TPI_Run_reduce( test_reduce_work , & flags , nwork ,
                      test_reduce_join , test_reduce_init ,
                      sizeof(value) , & value );
  
      dt = TPI_Walltime() - t ;
      dt_reduce_total += dt ;
      dt_reduce_total_2 += dt * dt ;

      for ( k = 0 ; k < nwork && flags[k] ; ++k );

      if ( value != nwork || k < nwork ) {
        fprintf(stderr, "TPI_Run_reduce(...) : FAILED at trial %d\n", i );
        abort();
      }
    }

    TPI_Finalize();

    if ( 1 < ntrial ) {
      const double work_mean = 1.0e6 * dt_work_total / ntrial ;
      const double work_sdev = 1.0e6 * sqrt( ( ntrial * dt_work_total_2 -
                                       dt_work_total * dt_work_total ) /
                                     ( ntrial * ( ntrial - 1 ) ) );

      const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ;
      const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 -
                                         dt_reduce_total * dt_reduce_total) /
                                       ( ntrial * ( ntrial - 1 ) ) );
      
      fprintf(stdout,"%d , %d , %d , %10g , %10g , %10g , %10g\n",
              nth, ntrial, nwork, work_mean, work_sdev, reduce_mean, reduce_sdev);
    }
  }

  free( flags );
}
Exemple #9
0
inline
int Run( void (*func)( void * , ThreadPool ) , void * arg , int n )
{
  return TPI_Run( reinterpret_cast< TPI_parallel_subprogram >(func), arg , n );
}