예제 #1
0
void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size )
{
  const int exec_all_reduce_alloc = align_alloc( reduce_size );
  const int shepherd_scan_alloc   = align_alloc( 8 );
  const int shepherd_shared_end   = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size );

  if ( s_worker_reduce_end < exec_all_reduce_alloc ||
       s_worker_shared_end < shepherd_shared_end ) {

    // Clear current worker memory before allocating new worker memory
    clear_workers();

    // Increase the buffers to an aligned allocation
    s_worker_reduce_end   = exec_all_reduce_alloc ;
    s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ;
    s_worker_shared_end   = shepherd_shared_end ;

    // Need to query which shepherd this main 'process' is running...

    // Have each worker resize its memory for proper first-touch
    for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
    for ( int i = jshep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) {

      // Unit tests hang with this call:
      //
      // qthread_fork_to_local_priority( driver_resize_workers , NULL , NULL , jshep );
      //

      qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep );
    }}

    driver_resize_worker_scratch( NULL );

    // Verify all workers allocated

    bool ok = true ;
    for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; }

    if ( ! ok ) {
      std::ostringstream msg ;
      msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ;
      for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) {
         if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); }
      }
      msg << " }" ;
      Kokkos::Impl::throw_runtime_exception( msg.str() );
    }
  }
}
void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size )
{
  const int exec_all_reduce_alloc = align_alloc( reduce_size );
  const int shepherd_scan_alloc   = align_alloc( 8 );
  const int shepherd_shared_end   = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size );

  if ( s_worker_reduce_end < exec_all_reduce_alloc ||
       s_worker_shared_end < shepherd_shared_end ) {

/*
  fprintf( stdout , "QthreadExec::resize\n");
  fflush(stdout);
*/

    // Clear current worker memory before allocating new worker memory
    clear_workers();

    // Increase the buffers to an aligned allocation
    s_worker_reduce_end   = exec_all_reduce_alloc ;
    s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ;
    s_worker_shared_end   = shepherd_shared_end ;

    // Need to query which shepherd this main 'process' is running...
 
    const int main_shep = qthread_shep();

    // Have each worker resize its memory for proper first-touch
#if 1
    for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
    for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) {
      qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep );
    }}
#else
    // If this function is used before the 'qthread.task_policy' unit test
    // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so.
    for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
      const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ;

      if ( num_clone ) {
        const int ret = qthread_fork_clones_to_local_priority
          ( driver_resize_worker_scratch   /* function */
          , NULL                           /* function data block */
          , NULL                           /* pointer to return value feb */
          , jshep                          /* shepherd number */
          , num_clone - 1                  /* number of instances - 1 */
          );

        assert(ret == QTHREAD_SUCCESS);
      }
    }
#endif

    driver_resize_worker_scratch( NULL );

    // Verify all workers allocated

    bool ok = true ;
    for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; }

    if ( ! ok ) {
      std::ostringstream msg ;
      msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ;
      for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) {
         if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); }
      }
      msg << " }" ;
      Kokkos::Impl::throw_runtime_exception( msg.str() );
    }
  }
}