コード例 #1
0
ファイル: Kokkos_ThreadsExec.cpp プロジェクト: albapa/lammps
/** \brief  Begin execution of the asynchronous functor */
void ThreadsExec::start( void (*func)( ThreadsExec & , const void * ) , const void * arg )
{
  verify_is_process("ThreadsExec::start" , true );

  if ( s_current_function || s_current_function_arg ) {
    Kokkos::Impl::throw_runtime_exception( std::string( "ThreadsExec::start() FAILED : already executing" ) );
  }

  s_current_function     = func ;
  s_current_function_arg = arg ;

  // Make sure function and arguments are written before activating threads.
  memory_fence();

  // Activate threads:
  for ( int i = s_thread_pool_size[0] ; 0 < i-- ; ) {
    s_threads_exec[i]->m_pool_state = ThreadsExec::Active ;
  }

  if ( s_threads_process.m_pool_size ) {
    // Master process is the root thread, run it:
    (*func)( s_threads_process , arg );
    s_threads_process.m_pool_state = ThreadsExec::Inactive ;
  }
}
コード例 #2
0
void * ThreadsExec::resize_scratch( size_t reduce_size , size_t thread_size )
{
  enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 };

  fence();

  const size_t old_reduce_size = s_threads_process.m_scratch_reduce_end ;
  const size_t old_thread_size = s_threads_process.m_scratch_thread_end - s_threads_process.m_scratch_reduce_end ;

  reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ;
  thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ;

  // Increase size or deallocate completely.

  if ( ( old_reduce_size < reduce_size ) ||
       ( old_thread_size < thread_size ) ||
       ( ( reduce_size == 0 && thread_size == 0 ) &&
         ( old_reduce_size != 0 || old_thread_size != 0 ) ) ) {

    verify_is_process( "ThreadsExec::resize_scratch" , true );

    s_threads_process.m_scratch_reduce_end = reduce_size ;
    s_threads_process.m_scratch_thread_end = reduce_size + thread_size ;

    execute_serial( & execute_resize_scratch );

    s_threads_process.m_scratch = s_threads_exec[0]->m_scratch ;
  }

  return s_threads_process.m_scratch ;
}
コード例 #3
0
void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg )
{
  verify_is_process("QthreadExec::exec_all(...)",true);

  s_active_function     = func ;
  s_active_function_arg = arg ;

  // Need to query which shepherd this main 'process' is running...
 
  const int main_shep = qthread_shep();

  for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) {
  for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) {

    // Unit tests hang with this call:
    //
    // qthread_fork_to_local_priority( driver_exec_all , NULL , NULL , jshep );
    //

    qthread_fork_to( driver_exec_all , NULL , NULL , jshep );
  }}

  driver_exec_all( NULL );

  s_active_function     = 0 ;
  s_active_function_arg = 0 ;
}
コード例 #4
0
ファイル: Kokkos_ThreadsExec.cpp プロジェクト: albapa/lammps
void ThreadsExec::finalize()
{
  verify_is_process("ThreadsExec::finalize",false);

  fence();

  resize_scratch(0,0);

  const unsigned begin = s_threads_process.m_pool_base ? 1 : 0 ;

  for ( unsigned i = s_thread_pool_size[0] ; begin < i-- ; ) {

    if ( s_threads_exec[i] ) {

      s_threads_exec[i]->m_pool_state = ThreadsExec::Terminating ;

      wait_yield( s_threads_process.m_pool_state , ThreadsExec::Inactive );

      s_threads_process.m_pool_state = ThreadsExec::Inactive ;
    }

    s_threads_pid[i] = 0 ;
  }

  if ( s_threads_process.m_pool_base ) {
    ( & s_threads_process )->~ThreadsExec();
    s_threads_exec[0] = 0 ;
  }

  if (Kokkos::hwloc::can_bind_threads() ) {
    Kokkos::hwloc::unbind_this_thread();
  }

  s_thread_pool_size[0] = 0 ;
  s_thread_pool_size[1] = 0 ;
  s_thread_pool_size[2] = 0 ;

  // Reset master thread to run solo.
  s_threads_process.m_numa_rank       = 0 ;
  s_threads_process.m_numa_core_rank  = 0 ;
  s_threads_process.m_pool_base       = 0 ;
  s_threads_process.m_pool_rank       = 0 ;
  s_threads_process.m_pool_size       = 1 ;
  s_threads_process.m_pool_fan_size   = 0 ;
  s_threads_process.m_pool_state = ThreadsExec::Inactive ;

  #if (KOKKOS_ENABLE_PROFILING)
    Kokkos::Profiling::finalize();
  #endif
}
コード例 #5
0
bool ThreadsExec::wake()
{
  verify_is_process("ThreadsExec::wake", true );

  if ( & execute_sleep != s_current_function ) return false ;

  ThreadsExec::global_unlock();

  if ( s_threads_process.m_pool_base ) {
    execute_sleep( s_threads_process , 0 );
    s_threads_process.m_pool_state = ThreadsExec::Inactive ;
  }

  fence();

  return true ;
}
コード例 #6
0
void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg )
{
  verify_is_process("QthreadExec::exec_all(...)",true);

/*
  fprintf( stdout , "QthreadExec::exec_all\n");
  fflush(stdout);
*/

  s_active_function     = func ;
  s_active_function_arg = arg ;

  // Need to query which shepherd this main 'process' is running...
 
  const int main_shep = qthread_shep();

#if 1
  for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) {
  for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) {
    qthread_fork_to( driver_exec_all , NULL , NULL , jshep );
  }}
#else
  // If this function is used before the 'qthread.task_policy' unit test
  // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so.
  for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
    const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ;

    if ( num_clone ) {
      const int ret = qthread_fork_clones_to_local_priority
        ( driver_exec_all   /* function */
        , NULL              /* function data block */
        , NULL              /* pointer to return value feb */
        , jshep             /* shepherd number */
        , num_clone - 1     /* number of instances - 1 */
        );

      assert(ret == QTHREAD_SUCCESS);
    }
  }
#endif

  driver_exec_all( NULL );

  s_active_function     = 0 ;
  s_active_function_arg = 0 ;
}
コード例 #7
0
bool ThreadsExec::sleep()
{
  verify_is_process("ThreadsExec::sleep", true );

  if ( & execute_sleep == s_current_function ) return false ;

  fence();

  ThreadsExec::global_lock();

  s_current_function = & execute_sleep ;

  // Activate threads:
  for ( unsigned i = s_thread_pool_size[0] ; 0 < i ; ) {
    s_threads_exec[--i]->m_pool_state = ThreadsExec::Active ;
  }

  return true ;
}
コード例 #8
0
void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
{
  verify_is_process("ThreadsExec::print_configuration",false);

  fence();

  const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
  const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
  const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();

  // Forestall compiler warnings for unused variables.
  (void) numa_count;
  (void) cores_per_numa;
  (void) threads_per_core;

  s << "Kokkos::Threads" ;

#if defined( KOKKOS_HAVE_PTHREAD )
  s << " KOKKOS_HAVE_PTHREAD" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
  s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
#endif

  if ( s_thread_pool_size[0] ) {
    s << " threads[" << s_thread_pool_size[0] << "]"
      << " threads_per_numa[" << s_thread_pool_size[1] << "]"
      << " threads_per_core[" << s_thread_pool_size[2] << "]"
      ;
    if ( 0 == s_threads_process.m_pool_base ) { s << " Asynchronous" ; }
    s << " ReduceScratch[" << s_current_reduce_size << "]"
      << " SharedScratch[" << s_current_shared_size << "]" ;
    s << std::endl ;

    if ( detail ) {

      execute_serial( & execute_get_binding );

      for ( int i = 0 ; i < s_thread_pool_size[0] ; ++i ) {
        ThreadsExec * const th = s_threads_exec[i] ;
        s << "  Thread hwloc("
          << s_threads_coord[i].first << "."
          << s_threads_coord[i].second << ")" ;

        s_threads_coord[i].first  = ~0u ;
        s_threads_coord[i].second = ~0u ;

        if ( th ) {
          const int rank_rev = th->m_pool_size - ( th->m_pool_rank + 1 );

          s << " rank(" << th->m_pool_rank << ")" ;

          if ( th->m_pool_fan_size ) {
            s << " Fan{" ;
            for ( int j = 0 ; j < th->m_pool_fan_size ; ++j ) {
              s << " " << th->m_pool_base[rank_rev+(1<<j)]->m_pool_rank ;
            }
            s << " }" ;
          }

          if ( th == & s_threads_process ) {
            s << " is_process" ;
          }
        }
        s << std::endl ;
      }
    }
  }
  else {
    s << " not initialized" << std::endl ;
  }
}