Пример #1
0
void test_tpi_block( const int ntest , const int nthread[] , const int ntrial )
{
  int i, j ;

  fprintf( stdout , "\n\"TEST TPI_Block / TPI_Unblock\"\n" );
  fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Block(avg-msec)\" , \"TPI_Block(stddev-msec)\" , \"TPI_Unblock(avg-msec)\" , \"TPI_Unblock(stddev-msec)\"\n");

  for ( j = 0 ; j < ntest ; ++j ) {
    const int nth = nthread[j];

    double dt_block_total   = 0.0 ;
    double dt_block_total_2 = 0.0 ;
    double dt_unblock_total    = 0.0 ;
    double dt_unblock_total_2  = 0.0 ;

    int result = TPI_Init( nth );

    if ( result != nth ) {
      fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth );
      abort();
    }

    for ( i = 0 ; i < ntrial ; ++i ) {
      double t , dt ;

      t = TPI_Walltime();
      TPI_Block();
      dt = TPI_Walltime() - t ;
      dt_block_total += dt ;
      dt_block_total_2 += dt * dt ;


      t = TPI_Walltime();
      TPI_Unblock();
      dt = TPI_Walltime() - t ;
      dt_unblock_total += dt ;
      dt_unblock_total_2 += dt * dt ;
    }

    TPI_Finalize();

    if ( 1 < ntrial ) {
      const double block_mean = 1.0e6 * dt_block_total / ntrial ;
      const double block_sdev = 1.0e6 * sqrt( ( ntrial * dt_block_total_2 -
                                        dt_block_total * dt_block_total ) /
                                      ( ntrial * ( ntrial - 1 ) ) );

      const double unblock_mean = 1.0e6 * dt_unblock_total / ntrial ;
      const double unblock_sdev = 1.0e6 * sqrt( ( ntrial * dt_unblock_total_2 -
                                          dt_unblock_total * dt_unblock_total) /
                                        ( ntrial * ( ntrial - 1 ) ) );
      
      fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n",
              nth , ntrial , block_mean , block_sdev , unblock_mean , unblock_sdev );
    }
  }
}
Пример #2
0
 void TPINode::init(int numThreads) {
   if (curNumThreads_ >= 1) {
     TPI_Finalize();
   }
   curNumThreads_ = numThreads;
   if (curNumThreads_ >= 1) {
     TPI_Init(curNumThreads_);
   }
 }
Пример #3
0
void test_tpi_init( const int ntest , const int nthread[] , const int ntrial )
{
  int j ;

  fprintf( stdout , "\n\"TEST TPI_Init / TPI_Finalize\"\n" );
  fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Init(avg-msec)\" , \"TPI_Init(stddev-msec)\" , \"TPI_Finalize(avg-msec)\" , \"TPI_Finalize(stddev-msec)\"\n");

  for ( j = 0 ; j < ntest ; ++j ) {
    const int nth = nthread[j];
    double dt_init_total   = 0.0 ;
    double dt_init_total_2 = 0.0 ;
    double dt_fin_total    = 0.0 ;
    double dt_fin_total_2  = 0.0 ;
    int i ;
    int result ;

    for ( i = 0 ; i < ntrial ; ++i ) {
      double t , dt ;

      t = TPI_Walltime();
      result = TPI_Init( nth );
      dt = TPI_Walltime() - t ;
      dt_init_total += dt ;
      dt_init_total_2 += dt * dt ;

      if ( result != nth ) {
        fprintf(stderr,"%d != TPI_Init(%d) : FAILED at trial %d\n",
                result , nth , i );
        abort();
      }

      t = TPI_Walltime();
      TPI_Finalize();
      dt = TPI_Walltime() - t ;
      dt_fin_total += dt ;
      dt_fin_total_2 += dt * dt ;
    }

    if ( 1 < ntrial ) {
      const double init_mean = 1.0e6 * dt_init_total / ntrial ;
      const double init_sdev = 1.0e6 * sqrt( ( ntrial * dt_init_total_2 -
                                       dt_init_total * dt_init_total ) /
                                     ( ntrial * ( ntrial - 1 ) ) );

      const double fin_mean = 1.0e6 * dt_fin_total / ntrial ;
      const double fin_sdev = 1.0e6 * sqrt( ( ntrial * dt_fin_total_2 -
                                      dt_fin_total * dt_fin_total ) /
                                    ( ntrial * ( ntrial - 1 ) ) );
      
      fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n",
              nth , ntrial , init_mean , init_sdev , fin_mean , fin_sdev );
    }
  }
}
Пример #4
0
int TPI_Init( int n )
{
  ThreadPool * const pool = local_thread_pool();

  int result = ! pool || pool->m_number_threads ? TPI_ERROR_ACTIVE : 0 ;

  if ( ! result && n <= 0 ) { result = TPI_ERROR_SIZE ; }

  if ( ! result ) {
    pthread_attr_t thread_attr ;

    if ( pthread_attr_init( & thread_attr ) ) {
      result = TPI_ERROR_INTERNAL ;
    }
    else {

      pthread_attr_setscope(       & thread_attr, PTHREAD_SCOPE_SYSTEM );
      pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_DETACHED );

      pthread_mutex_lock( & pool->m_pool_lock );

      {
        int n_thread = 1 ; /* Count myself among the threads */

        pool->m_number_threads = n_thread ;

        for ( ; n_thread < n && ! result ; ++n_thread ) {
          pthread_t pt ;

          if ( pthread_create( & pt, & thread_attr,
                               & local_thread_pool_driver, pool ) ) {
            result = TPI_ERROR_INTERNAL ;
          }
          else {
            /* Wait for start */
            pthread_cond_wait( & pool->m_pool_cond , & pool->m_pool_lock );
          }
        }
      }

      pthread_attr_destroy( & thread_attr );

      pthread_mutex_unlock( & pool->m_pool_lock );
    }

    if ( result ) { TPI_Finalize(); }
  }

  return result ;
}
Пример #5
0
void test_tpi_work_async(
  const int ntest , const int nthread[] , const int nwork , const int ntrial )
{
  int * const flags = (int *) malloc( sizeof(int) * nwork );
  int j ;

  fprintf( stdout , "\n\"TEST TPI_Start / TPI_Start_reduce\"\n" );
  fprintf( stdout , "\"#Thread\" , \"#Work\" , \"#Trial\" , \"TPI_Start(avg-msec)\" , \"TPI_Start(stddev-msec)\" , \"TPI_Start_reduce(avg-msec)\" , \"TPI_Start_reduce(stddev-msec)\"\n");

  for ( j = 0 ; j < ntest ; ++j ) {
    const int nth = nthread[j];

    double dt_work_total   = 0.0 ;
    double dt_work_total_2 = 0.0 ;
    double dt_reduce_total    = 0.0 ;
    double dt_reduce_total_2  = 0.0 ;
    int i , k ;

    int result = TPI_Init( nth );

    if ( result != nth ) {
      fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth );
    }

    for ( i = 0 ; i < ntrial ; ++i ) {
      double t , dt ;
      int value = 0 ;

      for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; }

      t = TPI_Walltime();
      TPI_Start( test_work , & flags , nwork , 0 );
      TPI_Wait();
      dt = TPI_Walltime() - t ;
      dt_work_total += dt ;
      dt_work_total_2 += dt * dt ;

      for ( k = 0 ; k < nwork && flags[k] ; ++k );

      if ( k < nwork ) {
        fprintf(stderr, "TPI_Run(...) : FAILED at trial %d\n", i );
        abort();
      }

      for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; }

      t = TPI_Walltime();

      TPI_Start_reduce( test_reduce_work , & flags , nwork ,
                        test_reduce_join , test_reduce_init ,
                        sizeof(value) , & value );
      TPI_Wait();
  
      dt = TPI_Walltime() - t ;
      dt_reduce_total += dt ;
      dt_reduce_total_2 += dt * dt ;

      for ( k = 0 ; k < nwork && flags[k] ; ++k );

      if ( value != nwork || k < nwork ) {
        fprintf(stderr, "TPI_Run_reduce(...) : FAILED at trial %d\n", i );
        abort();
      }
    }

    TPI_Finalize();

    if ( 1 < ntrial ) {
      const double work_mean = 1.0e6 * dt_work_total / ntrial ;
      const double work_sdev = 1.0e6 * sqrt( ( ntrial * dt_work_total_2 -
                                       dt_work_total * dt_work_total ) /
                                     ( ntrial * ( ntrial - 1 ) ) );

      const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ;
      const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 -
                                         dt_reduce_total * dt_reduce_total) /
                                       ( ntrial * ( ntrial - 1 ) ) );
      
      fprintf(stdout,"%d , %d , %d , %10g , %10g , %10g , %10g\n",
              nth, ntrial, nwork, work_mean, work_sdev, reduce_mean, reduce_sdev);
    }
  }

  free( flags );
}
Пример #6
0
void test_tpi_reduce( const int ntest , const int nthread[] , const int ntrial )
{
  int j ;

  fprintf( stdout , "\n\"TEST TPI_Run_threads(reduce) / TPI_Run_threads_reduce\"\n" );
  fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Run_threads(avg-msec)\" , \"TPI_Run_threads(stddev-msec)\" , \"TPI_Run_threads_reduce(avg-msec)\" , \"TPI_Run_threads_reduce(stddev-msec)\"\n");

  for ( j = 0 ; j < ntest ; ++j ) {
    const int nth = nthread[j];

    double dt_lock_total   = 0.0 ;
    double dt_lock_total_2 = 0.0 ;
    double dt_reduce_total    = 0.0 ;
    double dt_reduce_total_2  = 0.0 ;
    int i ;

    int result = TPI_Init( nth );

    if ( result != nth ) {
      fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth );
    }

    for ( i = 0 ; i < ntrial ; ++i ) {
      double t , dt ;
      int value = 0 ;
      int * const ptr = & value ;

      t = TPI_Walltime();
      TPI_Run_threads( test_reduce_via_lock , & ptr , 1 );
      dt = TPI_Walltime() - t ;
      dt_lock_total += dt ;
      dt_lock_total_2 += dt * dt ;

      if ( value != nth ) {
        fprintf(stderr,
                "TPI_Run_threads(reduce,...) : FAILED at trial %d\n",
                i );
        abort();
      }

      value = 0 ;

      t = TPI_Walltime();
      TPI_Run_threads_reduce( test_reduce_via_nolock , NULL ,
                              test_reduce_join , test_reduce_init ,
                              sizeof(value) , & value );
  
      dt = TPI_Walltime() - t ;
      dt_reduce_total += dt ;
      dt_reduce_total_2 += dt * dt ;

      if ( value != nth ) {
        fprintf(stderr,
                "TPI_Run_threads_reduce(...) : FAILED at trial %d\n",
                i );
        abort();
      }
    }

    TPI_Finalize();

    if ( 1 < ntrial ) {
      const double lock_mean = 1.0e6 * dt_lock_total / ntrial ;
      const double lock_sdev = 1.0e6 * sqrt( ( ntrial * dt_lock_total_2 -
                                       dt_lock_total * dt_lock_total ) /
                                     ( ntrial * ( ntrial - 1 ) ) );

      const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ;
      const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 -
                                         dt_reduce_total * dt_reduce_total) /
                                       ( ntrial * ( ntrial - 1 ) ) );
      
      fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n",
              nth, ntrial, lock_mean, lock_sdev, reduce_mean, reduce_sdev);
    }
  }
}
Пример #7
0
inline
int Finalize() { return TPI_Finalize(); }
Пример #8
0
int main( int argc , char ** argv )
{
  const int ghost = 1 ;
  const int max_cube = 20 ;
  int ncube[20] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
                    0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };

  FILE * print_file = stdout ;
  int print_iter = 500 ;
  int max_iter = 50 ;

  VECTOR_SCALAR tolerance = 0.0 ; /* Force max iterations */

  int gbox[3][2] = { { 0 , 16 } , { 0 , 16 } , { 0 , 16 } };
  int nt = 0 ;
  int trials = 5 ;
  int ntest ;
  int np = 1;
  int my_p = 0 ;

#ifdef HAVE_MPI
  MPI_Init( & argc , & argv );
  MPI_Comm_size( MPI_COMM_WORLD , & np );
  MPI_Comm_rank( MPI_COMM_WORLD , & my_p );
#endif

  if ( ! my_p ) {
    const char arg_threads[] = "threads=" ;
    const char arg_cube[] = "cube=" ;
    const char arg_box[] = "box=" ;
    const char arg_max[] = "max_iter=" ;
    const char arg_trials[] = "trials=" ;
    const char arg_print[] = "print_iter=" ;
    const char arg_file[] = "print_file=" ;
    int i ;
    for ( i = 1 ; i < argc ; ++i ) {
      if ( ! strncmp(argv[i],arg_threads,strlen(arg_threads)) ) {
        sscanf(argv[i]+strlen(arg_threads),"%d",&nt);
      }
      else if ( ! strncmp(argv[i],arg_box,strlen(arg_box)) ) {
        sscanf(argv[i]+strlen(arg_box),"%d%*[x]%d%*[x]%d",
               & gbox[0][1] , & gbox[1][1] , & gbox[2][1] );
      }
      else if ( ! strncmp(argv[i],arg_cube,strlen(arg_cube)) ) {
        sscanf(argv[i]+strlen(arg_cube),
               "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d",
               ncube+0, ncube+1, ncube+2, ncube+3, ncube+4,
               ncube+5, ncube+6, ncube+7, ncube+8, ncube+9,
               ncube+10, ncube+11, ncube+12, ncube+13, ncube+14,
               ncube+15, ncube+16, ncube+17, ncube+18, ncube+19);
      }
      else if ( ! strncmp(argv[i],arg_max,strlen(arg_max)) ) {
        sscanf(argv[i]+strlen(arg_max),"%d",&max_iter);
      }
      else if ( ! strncmp(argv[i],arg_trials,strlen(arg_trials)) ) {
        sscanf(argv[i]+strlen(arg_trials),"%d",&trials);
      }
      else if ( ! strncmp(argv[i],arg_print,strlen(arg_print)) ) {
        sscanf(argv[i]+strlen(arg_print),"%d",&print_iter);
      }
      else if ( ! strncmp(argv[i],arg_file,strlen(arg_file)) ) {
        char buffer[256] ;
        sscanf(argv[i]+strlen(arg_file),"%s",buffer);
        print_file = fopen(buffer,"a");
      }
    }
  }

#ifdef HAVE_MPI
  {
    MPI_Bcast( & nt , 1 , MPI_INT , 0 , MPI_COMM_WORLD );
    MPI_Bcast( & gbox[0][0] , 6 , MPI_INT , 0 , MPI_COMM_WORLD );
    MPI_Bcast( ncube , max_cube , MPI_INT , 0 , MPI_COMM_WORLD );
    MPI_Bcast( & max_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD );
    MPI_Bcast( & print_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD );
    MPI_Bcast( & trials , 1 , MPI_INT , 0 , MPI_COMM_WORLD );
  }
#endif

  if ( nt ) {
    TPI_Init( nt );
    TPI_Block();
    TPI_Unblock();
  }

  if ( ! my_p ) {
    fprintf(print_file,"\"PROC\" , \"THREAD\" , \"EQUATION\" , \"NON-ZERO\" , \"MXV\"    , \"AXPBY\"  , \"DOT\" , \"Xerror\" , \"Iter\"\n");
    fprintf(print_file,"\"COUNT\" , \"COUNT\"  , \"COUNT\"    , \"COUNT\"    , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"L2norm\" , \"COUNT\"\n");
  }

  for ( ntest = 0 ; ! ntest || ( ntest < max_cube && ncube[ntest] ) ; ++ntest ) {
    struct cgsolve_data cgdata ;

    if ( ncube[ntest] ) {
      gbox[0][1] = gbox[1][1] = gbox[2][1] = ncube[ntest] ;
    }

    hpccg_alloc_and_fill( np, my_p, (const int (*)[2]) gbox, ghost, &cgdata);

    cgdata.max_iter   = max_iter ;
    cgdata.print_iter = print_iter ;
    cgdata.tolerance  = tolerance ;

    {
      double dt_mxv[2] = { 0 , 0 };
      double dt_axpby[2] = { 0 , 0 };
      double dt_dot[2] = { 0 , 0 };
      VECTOR_SCALAR norm_resid = 0.0 ;
      int iter_count = 0 ;
      int iter_total = 0 ;
      int k ;

      VECTOR_SCALAR * const b      = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow );
      VECTOR_SCALAR * const x      = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow );
      VECTOR_SCALAR * const xexact = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow );

      {
        const VECTOR_SCALAR value = 1.0 /* 1.0 / 3.0 */ ;
        int i ;
        for ( i = 0 ; i < cgdata.nRow ; ++i ) xexact[i] = value ;
      }

      for ( k = 0 ; k < trials ; ++k ) {
        int i ;

        for ( i = 0 ; i < cgdata.nRow ; ++i ) { x[i] = 0.0 ; }

        cgsolve_set_lhs( & cgdata , xexact , b );

        cgsolve( & cgdata, b, x,
                 & iter_count, & norm_resid,
                 dt_mxv , dt_axpby , dt_dot );

        iter_total += iter_count ;
      }

      {
        int nnzGlobal = cgdata.A_pc[ cgdata.nRow ];
        double error[2] = { 0 , 0 };

        for ( k = 0 ; k < cgdata.nRow ; ++k ) {
          error[0] += ( x[k] - xexact[k] ) * ( x[k] - xexact[k] );
          error[1] += xexact[k] * xexact[k] ;
        }

#ifdef HAVE_MPI
        {
          double error_global[2] = { 0.0 , 0.0 };
          int nnz = nnzGlobal ;

          MPI_Allreduce( & nnz , & nnzGlobal , 1 , MPI_INT , MPI_SUM ,
                         MPI_COMM_WORLD );

          MPI_Allreduce( error , error_global , 2 , MPI_DOUBLE , MPI_SUM ,
                         MPI_COMM_WORLD );

          error[0] = error_global[0];
          error[1] = error_global[1];
        }
#endif

        error[0] = sqrt( error[0] );
        error[1] = sqrt( error[1] );

        if ( ! my_p ) {
          const int nRowGlobal = ( gbox[0][1] - gbox[0][0] ) *
                                 ( gbox[1][1] - gbox[1][0] ) *
                                 ( gbox[2][1] - gbox[2][0] );

          const double mflop_mxv =
             1.0e-6 * ( iter_total ) * 2 * nnzGlobal / dt_mxv[0] ;

          const double mflop_axpby =
             1.0e-6 * ( iter_total * 3 ) * 3 * nRowGlobal / dt_axpby[0] ;

          const double mflop_dot =
             1.0e-6 * ( iter_total * 2 ) * 2 * nRowGlobal / dt_dot[0] ;

          fprintf(print_file,"%8d , %8d , %8d , %8d , %10g , %10g , %10g , %g , %d\n",
                  np , nt , nRowGlobal , nnzGlobal ,
                  mflop_mxv , mflop_axpby , mflop_dot ,
                  error[0] / error[1] , iter_total );
          fflush(print_file);
        }
      }

      free( xexact );
      free( x );
      free( b );
    }
    free( cgdata.A_a );
    free( cgdata.A_ia );
    free( cgdata.A_pc );
    free( cgdata.recv_pc );
    free( cgdata.send_pc );
    free( cgdata.send_id );
  }

  if ( nt ) { TPI_Finalize(); }

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return 0 ;
}
Пример #9
0
 TPINode::~TPINode()
 {
   if (curNumThreads_ >= 1) {
     TPI_Finalize();
   }
 }