// Function is called once by every concurrent thread.
  static void exec( QthreadExec & exec , const void * arg )
  {
    const ParallelFor & self = * ((const ParallelFor *) arg );

    const WorkRange range( self.m_policy, exec.worker_rank(), exec.worker_size() );

    ParallelFor::template exec_range< WorkTag > ( self.m_functor , range.begin() , range.end() );

    // All threads wait for completion.
    exec.exec_all_barrier();
  }
  static void exec( QthreadExec & exec , const void * arg )
  {
    const ParallelReduce & self = * ((const ParallelReduce *) arg );

    const WorkRange range( self.m_policy, exec.worker_rank(), exec.worker_size() );

    ParallelReduce::template exec_range< WorkTag >(
      self.m_functor, range.begin(), range.end(),
      ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer)
                     , exec.exec_all_reduce_value() ) );

    exec.template exec_all_reduce< FunctorType, ReducerType, WorkTag >( self.m_functor, self.m_reducer );
  }
  // Function is called once by every concurrent thread.
  static void execute( QthreadExec & exec , const void * arg )
  {

    const ParallelFor & self = * ((const ParallelFor *) arg );
    const Policy range( self.m_policy , exec.worker_rank() , exec.worker_size() );

    const typename Policy::member_type work_end = range.end();
    for ( typename Policy::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
      self.m_func( iwork );
    }

    // All threads wait for completion.
    exec.exec_all_barrier();
  }
  static void execute( QthreadExec & exec , const void * arg )
  {
    const ParallelReduce & self = * ((const ParallelReduce *) arg );
    const Policy range( self.m_policy , exec.worker_rank() , exec.worker_size() );

    // Initialize thread-local value
    typename Reduce::reference_type update = Reduce::init( self.m_func , exec.exec_all_reduce_value() );

    const typename Policy::member_type work_end = range.end();
    for ( typename Policy::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
      self.m_func( iwork , update );
    }

    exec.exec_all_reduce( self.m_func );
  }