inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { *p = v; fence(); }
inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { *p = v; fence(); }
inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
Exemple #8
0
void * P1(void * arg) {
  x = 1;
  // Instrumentation for CPROVER
  fence();
  __unbuffered_cnt++;
}
inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
inline void OrderAccess::storeload()  { fence(); }
inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { *(void* volatile *)p = v; fence(); }
/*
 The Binomial Spanning Tree algorithm.
 Outlay:
 The game scales with log2(NP) and uses 1 byte of memory.
 */
static int _algorithm_binomial_tree(struct oshmem_group_t *group,
                                     int PE_root,
                                     void *target,
                                     const void *source,
                                     size_t nlong,
                                     long *pSync)
{
    int rc = OSHMEM_SUCCESS;
    long value = SHMEM_SYNC_INIT;
    int root_id = oshmem_proc_group_find_id(group, PE_root);
    int my_id = oshmem_proc_group_find_id(group, group->my_pe);
    int peer_id = 0;
    int peer_pe = 0;
    int vrank;
    int dim = opal_cube_dim(group->proc_count);
    int hibit;
    int mask;
    int i = 0;

    SCOLL_VERBOSE(12, "[#%d] Broadcast algorithm: Tree", group->my_pe);
    SCOLL_VERBOSE(15,
                  "[#%d] pSync[0] = %ld root = #%d",
                  group->my_pe, pSync[0], PE_root);

    vrank = (my_id + group->proc_count - root_id) % group->proc_count;
    hibit = opal_hibit(vrank, dim);

    SCOLL_VERBOSE(15,
                  "[#%d] dim = %d vrank = %d hibit = %d",
                  group->my_pe, dim, vrank, hibit);

    dim--;

    pSync[0] = SHMEM_SYNC_READY;
    /* Receive data from parent in the tree. */
    if (vrank > 0) {
        value = SHMEM_SYNC_READY;

        SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
        rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG));
        while ((value = pSync[0]) < 0) {
            SCOLL_VERBOSE(14,
                          "[#%d] Broadcast size is a negative value (%li)\n",
                          group->my_pe, pSync[0]);
            MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG));
        }
        if (OSHMEM_SUCCESS != rc) {
            return rc;
        }
        nlong = (size_t) pSync[0];
    }

    /* Send data to the children. */
    for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
        peer_id = vrank | mask;

        if (peer_id < group->proc_count) {
            /* Wait for the child to be ready to receive (pSync must have the initial value) */
            peer_id = (peer_id + root_id) % group->proc_count;
            peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);

            SCOLL_VERBOSE(14,
                          "[#%d] check remote pe is ready to receive #%d",
                          group->my_pe, peer_pe);
            do {
                rc = MCA_SPML_CALL(get((void*)pSync, sizeof(long), (void*)pSync, peer_pe));
            } while ((OSHMEM_SUCCESS == rc) && (pSync[0] != SHMEM_SYNC_READY));

            SCOLL_VERBOSE(14, "[#%d] send data to #%d", group->my_pe, peer_pe);
            rc = MCA_SPML_CALL(put(target, nlong, (my_id == root_id ? (void *)source : target), peer_pe));

            MCA_SPML_CALL(fence());

            SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
            value = nlong;
            rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
            if (OSHMEM_SUCCESS != rc) {
                break;
            }
        }
    }

    return rc;
}
inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { *p = v; fence(); }
inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { *p = v; fence(); }
void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
{
  verify_is_process("ThreadsExec::print_configuration",false);

  fence();

  const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
  const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
  const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();

  // Forestall compiler warnings for unused variables.
  (void) numa_count;
  (void) cores_per_numa;
  (void) threads_per_core;

  s << "Kokkos::Threads" ;

#if defined( KOKKOS_HAVE_PTHREAD )
  s << " KOKKOS_HAVE_PTHREAD" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
  s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
#endif

  if ( s_thread_pool_size[0] ) {
    s << " threads[" << s_thread_pool_size[0] << "]"
      << " threads_per_numa[" << s_thread_pool_size[1] << "]"
      << " threads_per_core[" << s_thread_pool_size[2] << "]"
      ;
    if ( 0 == s_threads_process.m_pool_base ) { s << " Asynchronous" ; }
    s << " ReduceScratch[" << s_current_reduce_size << "]"
      << " SharedScratch[" << s_current_shared_size << "]" ;
    s << std::endl ;

    if ( detail ) {

      for ( int i = 0 ; i < s_thread_pool_size[0] ; ++i ) {

        ThreadsExec * const th = s_threads_exec[i] ;

        if ( th ) {

          const int rank_rev = th->m_pool_size - ( th->m_pool_rank + 1 );

          s << " Thread[ " << th->m_pool_rank << " : "
            << th->m_numa_rank << "." << th->m_numa_core_rank << " ]" ;

          s << " Fan{" ;
          for ( int j = 0 ; j < th->m_pool_fan_size ; ++j ) {
            ThreadsExec * const thfan = th->m_pool_base[rank_rev+(1<<j)] ;
            s << " [ " << thfan->m_pool_rank << " : "
              << thfan->m_numa_rank << "." << thfan->m_numa_core_rank << " ]" ;
          }
          s << " }" ;

          if ( th == & s_threads_process ) {
            s << " is_process" ;
          }
        }
        s << std::endl ;
      }
    }
  }
  else {
    s << " not initialized" << std::endl ;
  }
}