Пример #1
0
void 
darts :: hwloc :: AbstractMachine :: discoverTopologyWithLLC(void)
{
    unsigned nbSockets = hwloc_get_nbobjs_by_type(_topology,HWLOC_OBJ_SOCKET);
    hwloc_obj_t o = hwloc_get_obj_by_type(_topology,HWLOC_OBJ_SOCKET,0);

    hwloc_obj_t obj;
    for (obj = o->first_child;
            obj && obj->type != HWLOC_OBJ_CACHE;
            obj = obj->first_child)
        ;

    _nbClusters = nbSockets;
    if (obj) {
        int n = hwloc_get_nbobjs_inside_cpuset_by_type(_topology,obj->cpuset,HWLOC_OBJ_PU);
        _nbClusters = _nbTotalUnits / n; // XXX assumes homogeneous distribution of PUs
    }
    _clusterMap = new Cluster[_nbClusters];

    // TODO Refactor this code and the next function's code into a single one 
    for (o = obj; o; o = o->next_cousin)  {
        int           nUnits = hwloc_get_nbobjs_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU);
        Unit *units  = new Unit[nUnits];
        for (int i = 0; i < nUnits; ++i) {
            hwloc_obj_t t = hwloc_get_obj_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU,i);
            Unit hwu(o->logical_index,t->logical_index,t->os_index);
            units[i] = hwu; // simple shallow copy
        }
        Cluster cluster(o->logical_index,o->logical_index,nUnits,units);
        _clusterMap[o->logical_index] = cluster; // simple shallow copy
    }
}
Пример #2
0
void qrm_hwloc_topo(int *nodes, int *topo)
{
  int depth, ret;
  unsigned i, n, j, ncores, nnodes, cnode;
  int topodepth, numa;
  hwloc_topology_t topology;
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj, cobj;
  hwloc_obj_type_t otype;
  
  hwloc_topology_init(&topology);
  
  hwloc_topology_load(topology);

  /* get the number os cores and NUMA nodes */
  ncores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
  printf("ncores: %d\n",ncores);

  nnodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE);
  if(nnodes == 0){
    otype = HWLOC_OBJ_SOCKET;
    printf("grouping with sockets\n");
    nnodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_SOCKET);
  } else {
    otype = HWLOC_OBJ_NODE;
    printf("grouping with NUMA nodes\n");
  }

  /* get the handle for the first NUMA node */
  obj = hwloc_get_obj_by_type(topology, otype, 0); 
  
  /* get the number of cores in one NUMA node (supposedly the same for all nodes) */
  cnode = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_CORE);
  
  for(i=0; i<nnodes; i++){
    /* get the handle for the first i-th node */
    obj = hwloc_get_obj_by_type(topology, otype, i);
    /* get the number of cores in i-th NUMA node (supposedly the same for all nodes) */
    cnode = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_CORE);

    /* get the first core in this node */
    cobj = hwloc_get_next_obj_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_CORE, NULL);
    topo[(i*cnode)] = cobj->logical_index;
    /* printf("%2d -- group:  %2d",i,cobj->logical_index); */
    for(j=1; j<cnode; j++){
      cobj = hwloc_get_next_obj_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_CORE, cobj);
      topo[(i*cnode)+j] = cobj->logical_index;
      /* printf(" %2d",cobj->logical_index); */
    }
    /* printf("\n"); */
  }
  
  
  hwloc_topology_destroy(topology);
  return;
}
Пример #3
0
void 
darts :: hwloc :: AbstractMachine :: discoverTopology(void)
{
    _nbClusters   = hwloc_get_nbobjs_by_type(_topology,HWLOC_OBJ_SOCKET);
    _clusterMap   = new Cluster[_nbClusters];
    hwloc_obj_t o = hwloc_get_obj_by_type(_topology,HWLOC_OBJ_SOCKET,0);
    // TODO Refactor this code and the previous function's code into a single one
    for (; o; o = o->next_cousin)  {
        int           nUnits = hwloc_get_nbobjs_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU);
        Unit *units  = new Unit[nUnits];
        for (int i = 0; i < nUnits; ++i) {
            hwloc_obj_t t = hwloc_get_obj_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU,i);
            Unit hwu(o->logical_index,t->logical_index,t->os_index);
            units[i] = hwu; // simple shallow copy
        }
        Cluster cluster(o->logical_index,o->logical_index,nUnits,units);
        _clusterMap[o->logical_index] = cluster; // simple shallow copy
    }
}
Пример #4
0
/* This function initializes the bind_map data structure with the binding information from all
 * the ranks that have copied their cpu-affinity information into the shared memory region.
 * */
void MPIDI_SHM_hwloc_init_bindmap(int num_ranks, int topo_depth, int *shared_region, int **bind_map)
{
    int i, level;
    unsigned int num_obj, curr_obj;

    hwloc_cpuset_t hwloc_cpuset = hwloc_bitmap_alloc();
    /* STEP 3.1. Collect the binding information from hwloc for all ranks */
    for (i = 0; i < num_ranks; ++i) {
        cpu_set_t t = ((cpu_set_t *) (shared_region))[i];
        hwloc_cpuset_from_glibc_sched_affinity(MPIR_Process.hwloc_topology, hwloc_cpuset, &t,
                                               sizeof(t));
        /* HWLOC_OBJ_PU is the smallest unit of computation. We would like to get all the
         * affinity information for each rank */
        num_obj =
            hwloc_get_nbobjs_inside_cpuset_by_type(MPIR_Process.hwloc_topology, hwloc_cpuset,
                                                   HWLOC_OBJ_PU);
        /* Go over all objects, and if it is bound to more than one PU at that level, set it
         * to -1, otherwise update to the binding*/
        for (curr_obj = 0; curr_obj < num_obj; ++curr_obj) {
            hwloc_obj_t obj =
                hwloc_get_obj_inside_cpuset_by_type(MPIR_Process.hwloc_topology, hwloc_cpuset,
                                                    HWLOC_OBJ_PU, curr_obj);
            level = 0;
            do {
                /* If binding was not set, or is same as previous binding, update.
                 * Note that we use logical indices from hwloc instead of physical indices
                 * because logical indices are more portable - see hwloc documentation*/
                if (bind_map[i][level] == 0 || bind_map[i][level] == obj->logical_index) {
                    bind_map[i][level] = obj->logical_index;
                } else {
                    /* If rank is bound to different PUs at that level, we set to -1 */
                    bind_map[i][level] = -1;
                }
                level++;
            } while ((obj = obj->parent));
        }
    }
    hwloc_bitmap_free(hwloc_cpuset);
}
Пример #5
0
void Hwloc::getNumSockets(unsigned int &allowedNodes, int &numSockets, unsigned int &hwThreads) {
#ifdef HWLOC
   numSockets = 0;
   // Nodes that can be seen by hwloc
   allowedNodes = 0;
   // Hardware threads
   hwThreads = 0;

   int depth = hwloc_get_type_depth( _hwlocTopology, HWLOC_OBJ_NODE );
   // If there are NUMA nodes in this machine
   if ( depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
      //hwloc_const_cpuset_t cpuset = hwloc_topology_get_online_cpuset( _hwlocTopology );
      //allowedNodes = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, cpuset, HWLOC_OBJ_NODE );
      //hwThreads = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, cpuset, HWLOC_OBJ_PU );
      unsigned nodes = hwloc_get_nbobjs_by_depth( _hwlocTopology, depth );
      //hwloc_cpuset_t set = i

      // For each node, count how many hardware threads there are below.
      for ( unsigned nodeIdx = 0; nodeIdx < nodes; ++nodeIdx )
      {
         hwloc_obj_t node = hwloc_get_obj_by_depth( _hwlocTopology, depth, nodeIdx );
         int localThreads = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, node->cpuset, HWLOC_OBJ_PU );
         // Increase hw thread count
         hwThreads += localThreads;
         // If this node has hw threads beneath, increase the number of viewable nodes
         if ( localThreads > 0 ) ++allowedNodes;
      }
      numSockets = nodes;
   }
   // Otherwise, set it to 1
   else {
      allowedNodes = 1; 
      numSockets = 1;
   }
#else
   numSockets = 0;
   allowedNodes = 0;
#endif
}
Пример #6
0
void qrm_hwloc_info(int *ncores, int *nnodes, int *cnode)
{
  int depth, ret;
  unsigned i, n, j;
  int topodepth, numa;
  hwloc_topology_t topology;
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj, cobj;
  hwloc_obj_type_t otype;
  
  hwloc_topology_init(&topology);
  
  hwloc_topology_load(topology);

  /* get the number os cores and NUMA nodes */
  *ncores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
  /* printf("ncores: %d\n",*ncores); */

  *nnodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE);
  if(*nnodes == 0){
    otype = HWLOC_OBJ_SOCKET;
    /* printf("grouping with sockets\n"); */
    *nnodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_SOCKET);
  } else {
    otype = HWLOC_OBJ_NODE;
    /* printf("grouping with NUMA nodes\n"); */
  }

  /* get the handle for the first NUMA node */
  obj = hwloc_get_obj_by_type(topology, otype, 0); 
  
  /* get the number of cores in one NUMA node (supposedly the same for all nodes) */
  *cnode = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_CORE);
  
  hwloc_topology_destroy(topology);
  return;
}
Пример #7
0
/*
 * Prettyprint a list of all available sockets and cores.  Note that
 * this is *everything* -- not just the ones that are available to
 * this process.
 */
static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX])
{
    bool first = true;
    int i, num_cores, num_pus;
    char tmp[BUFSIZ];
    const int stmp = sizeof(tmp) - 1;
    hwloc_obj_t socket, core, c2;

    str[0] = '\0';
    for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
                                        HWLOC_OBJ_SOCKET, 0);
         NULL != socket; socket = socket->next_cousin) {
        /* If this isn't the first socket, add a delimiter */
        if (!first) {
            strncat(str, "; ", OMPI_AFFINITY_STRING_MAX - strlen(str));
        }
        first = false;

        snprintf(tmp, stmp, "socket %d has ", socket->os_index);
        strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));

        /* Find out how many cores are inside this socket, and get an
           object pointing to the first core.  Also count how many PUs
           are in the first core. */
        num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                           socket->cpuset,
                                                           HWLOC_OBJ_CORE);
        core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
                                                   socket->cpuset,
                                                   HWLOC_OBJ_CORE, 0);
        if (NULL != core) {
            num_pus =
                hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                       core->cpuset,
                                                       HWLOC_OBJ_PU);

            /* Only 1 core */
            if (1 == num_cores) {
                strncat(str, "1 core with ",
                        OMPI_AFFINITY_STRING_MAX - strlen(str));
                if (1 == num_pus) {
                    strncat(str, "1 hwt",
                            OMPI_AFFINITY_STRING_MAX - strlen(str));
                } else {
                    snprintf(tmp, stmp, "%d hwts", num_pus);
                    strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
                }
            }

            /* Multiple cores */
            else {
                bool same = true;

                snprintf(tmp, stmp, "%d cores", num_cores);
                strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));

                /* Do all the cores have the same number of PUs? */
                for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
                    if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                               core->cpuset,
                                                               HWLOC_OBJ_PU) !=
                        num_pus) {
                        same = false;
                        break;
                    }
                }

                /* Yes, they all have the same number of PUs */
                if (same) {
                    snprintf(tmp, stmp, ", each with %d hwt", num_pus);
                    strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
                    if (num_pus != 1) {
                        strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str));
                    }
                }

                /* No, they have differing numbers of PUs */
                else {
                    bool first = true;

                    strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str));
                    for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
                        if (!first) {
                            strncat(str, ", ",
                                    OMPI_AFFINITY_STRING_MAX - strlen(str));
                        }
                        first = false;

                        i = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                                   core->cpuset,
                                                                   HWLOC_OBJ_PU);
                        snprintf(tmp, stmp, "%d", i);
                        strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
                    }
                    strncat(str, ") hwts",
                            OMPI_AFFINITY_STRING_MAX - strlen(str));
                }
            }
        }
    }

    return OMPI_SUCCESS;
}
Пример #8
0
hwloc::hwloc()
{
  s_core_topology   = std::pair<unsigned,unsigned>(0,0);
  s_core_capacity   = 0 ;
  s_hwloc_topology  = 0 ;
  s_hwloc_location  = 0 ;
  s_process_binding = 0 ;

  for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;

  hwloc_topology_init( & s_hwloc_topology );
  hwloc_topology_load( s_hwloc_topology );

  s_hwloc_location  = hwloc_bitmap_alloc();
  s_process_binding = hwloc_bitmap_alloc();

  hwloc_get_cpubind( s_hwloc_topology , s_process_binding ,  HWLOC_CPUBIND_PROCESS );

  // Choose a hwloc object type for the NUMA level, which may not exist.

  hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;

  {
    // Object types to search, in order.
    static const hwloc_obj_type_t candidate_root_type[] =
      { HWLOC_OBJ_NODE     /* NUMA region     */
      , HWLOC_OBJ_SOCKET   /* hardware socket */
      , HWLOC_OBJ_MACHINE  /* local machine   */
      };

    enum { CANDIDATE_ROOT_TYPE_COUNT =
             sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };

    for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
      if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
        root_type = candidate_root_type[k] ;
      }
    }
  }

  // Determine which of these 'root' types are available to this process.
  // The process may have been bound (e.g., by MPI) to a subset of these root types.
  // Determine current location of the master (calling) process>

  hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();

  hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );

  const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );

  unsigned root_base     = max_root ;
  unsigned root_count    = 0 ;
  unsigned core_per_root = 0 ;
  unsigned pu_per_core   = 0 ;
  bool     symmetric     = true ;

  for ( unsigned i = 0 ; i < max_root ; ++i ) {

    const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );

    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {

      ++root_count ;

      // Remember which root (NUMA) object the master thread is running on.
      // This will be logical NUMA rank #0 for this process.

      if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
        root_base = i ;
      }

      // Count available cores:

      const unsigned max_core =
        hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                root->allowed_cpuset ,
                                                HWLOC_OBJ_CORE );

      unsigned core_count = 0 ;

      for ( unsigned j = 0 ; j < max_core ; ++j ) {

        const hwloc_obj_t core =
          hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
                                               root->allowed_cpuset ,
                                               HWLOC_OBJ_CORE , j );

        // If process' cpuset intersects core's cpuset then process can access this core.
        // Must use intersection instead of inclusion because the Intel-Phi
        // MPI may bind the process to only one of the core's hyperthreads.
        //
        // Assumption: if the process can access any hyperthread of the core
        // then it has ownership of the entire core.
        // This assumes that it would be performance-detrimental
        // to spawn more than one MPI process per core and use nested threading.

        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {

          ++core_count ;

          const unsigned pu_count =
            hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                    core->allowed_cpuset ,
                                                    HWLOC_OBJ_PU );

          if ( pu_per_core == 0 ) pu_per_core = pu_count ;

          // Enforce symmetry by taking the minimum:

          pu_per_core = std::min( pu_per_core , pu_count );

          if ( pu_count != pu_per_core ) symmetric = false ;
        }
      }

      if ( 0 == core_per_root ) core_per_root = core_count ;

      // Enforce symmetry by taking the minimum:

      core_per_root = std::min( core_per_root , core_count );

      if ( core_count != core_per_root ) symmetric = false ;
    }
  }

  s_core_topology.first  = root_count ;
  s_core_topology.second = core_per_root ;
  s_core_capacity        = pu_per_core ;

  // Fill the 's_core' array for fast mapping from a core coordinate to the
  // hwloc cpuset object required for thread location querying and binding.

  for ( unsigned i = 0 ; i < max_root ; ++i ) {

    const unsigned root_rank = ( i + root_base ) % max_root ;

    const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );

    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {

      const unsigned max_core =
        hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                root->allowed_cpuset ,
                                                HWLOC_OBJ_CORE );

      unsigned core_count = 0 ;

      for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {

        const hwloc_obj_t core =
          hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
                                               root->allowed_cpuset ,
                                               HWLOC_OBJ_CORE , j );

        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {

          s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;

          ++core_count ;
        }
      }
    }
  }

  hwloc_bitmap_free( proc_cpuset_location );

  if ( ! symmetric ) {
    std::cout << "KokkosArray::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
              << std::endl ;
  }
}
Пример #9
0
HYD_status HYDT_topo_hwloc_init(HYDT_topo_support_level_t * support_level)
{
    int node, sock, core, thread, idx;

    hwloc_obj_t obj_sys;
    hwloc_obj_t obj_node;
    hwloc_obj_t obj_sock;
    hwloc_obj_t obj_core;
    hwloc_obj_t obj_thread;

    struct HYDT_topo_obj *node_ptr, *sock_ptr, *core_ptr, *thread_ptr;

    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    hwloc_topology_init(&topology);
    hwloc_topology_load(topology);

    hwloc_initialized = 1;

    /* Get the max number of processing elements */
    HYDT_topo_info.total_proc_units = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);

    /* We have qualified for basic topology support level */
    *support_level = HYDT_TOPO_SUPPORT_BASIC;

    /* Setup the machine level */
    obj_sys = hwloc_get_root_obj(topology);

    /* Retained for debugging purposes */
    /* print_obj_info(obj_sys); */

    /* init Hydra structure */
    HYDT_topo_info.machine.type = HYDT_TOPO_OBJ_MACHINE;
    HYDT_topo_cpuset_zero(&HYDT_topo_info.machine.cpuset);
    HYDT_topo_info.machine.parent = NULL;

    HYDT_topo_info.machine.num_children = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE);
    /* If there is no real node, consider there is one */
    if (!HYDT_topo_info.machine.num_children)
        HYDT_topo_info.machine.num_children = 1;
    status = HYDT_topo_alloc_objs(HYDT_topo_info.machine.num_children,
                                  &HYDT_topo_info.machine.children);
    HYDU_ERR_POP(status, "error allocating topo objects\n");

    /* Setup the nodes levels */
    for (node = 0; node < HYDT_topo_info.machine.num_children; node++) {
        node_ptr = &HYDT_topo_info.machine.children[node];
        node_ptr->type = HYDT_TOPO_OBJ_NODE;
        node_ptr->parent = &HYDT_topo_info.machine;
        HYDT_topo_cpuset_zero(&node_ptr->cpuset);

        if (!(obj_node = hwloc_get_obj_inside_cpuset_by_type(topology, obj_sys->cpuset,
                                                             HWLOC_OBJ_NODE, node)))
            obj_node = obj_sys;

        /* copy the hwloc cpuset to hydra format */
        hwloc_to_hydra_cpuset_dup(obj_node->cpuset, &node_ptr->cpuset);

        /* memory information */
        node_ptr->mem.local_mem_size = obj_node->memory.local_memory;

        /* find the number of cache objects which match my cpuset */
        node_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_node->cpuset);

        /* add the actual cache objects that match my cpuset */
        if (node_ptr->mem.num_caches) {
            HYDU_MALLOC(node_ptr->mem.cache_size, size_t *,
                        node_ptr->mem.num_caches * sizeof(size_t), status);
            HYDU_MALLOC(node_ptr->mem.cache_depth, int *,
                        node_ptr->mem.num_caches * sizeof(int), status);
            idx = 0;
            load_cache_objs(obj_sys, obj_node->cpuset, node_ptr, &idx);
        }

        node_ptr->num_children =
            hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj_node->cpuset,
                                                   HWLOC_OBJ_SOCKET);
        /* In case there is no socket */
        if (!node_ptr->num_children)
            node_ptr->num_children = 1;

        status = HYDT_topo_alloc_objs(node_ptr->num_children, &node_ptr->children);
        HYDU_ERR_POP(status, "error allocating topo objects\n");

        /* Setup the socket level */
        for (sock = 0; sock < node_ptr->num_children; sock++) {
            sock_ptr = &node_ptr->children[sock];
            sock_ptr->type = HYDT_TOPO_OBJ_SOCKET;
            sock_ptr->parent = node_ptr;
            HYDT_topo_cpuset_zero(&sock_ptr->cpuset);

            if (!(obj_sock = hwloc_get_obj_inside_cpuset_by_type(topology, obj_node->cpuset,
                                                                 HWLOC_OBJ_SOCKET, sock)))
                obj_sock = obj_node;

            /* copy the hwloc cpuset to hydra format */
            hwloc_to_hydra_cpuset_dup(obj_sock->cpuset, &sock_ptr->cpuset);

            /* memory information */
            sock_ptr->mem.local_mem_size = obj_sock->memory.local_memory;

            /* find the number of cache objects which match my cpuset */
            sock_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_sock->cpuset);

            /* add the actual cache objects that match my cpuset */
            if (sock_ptr->mem.num_caches) {
                HYDU_MALLOC(sock_ptr->mem.cache_size, size_t *,
                            sock_ptr->mem.num_caches * sizeof(size_t), status);
                HYDU_MALLOC(sock_ptr->mem.cache_depth, int *,
                            sock_ptr->mem.num_caches * sizeof(int), status);
                idx = 0;
                load_cache_objs(obj_sys, obj_sock->cpuset, sock_ptr, &idx);
            }

            sock_ptr->num_children =
                hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj_sock->cpuset,
                                                       HWLOC_OBJ_CORE);

            /* In case there is no core */
            if (!sock_ptr->num_children)
                sock_ptr->num_children = 1;

            status = HYDT_topo_alloc_objs(sock_ptr->num_children, &sock_ptr->children);
            HYDU_ERR_POP(status, "error allocating topo objects\n");

            /* setup the core level */
            for (core = 0; core < sock_ptr->num_children; core++) {
                core_ptr = &sock_ptr->children[core];
                core_ptr->type = HYDT_TOPO_OBJ_CORE;
                core_ptr->parent = sock_ptr;
                HYDT_topo_cpuset_zero(&core_ptr->cpuset);

                if (!(obj_core = hwloc_get_obj_inside_cpuset_by_type(topology,
                                                                     obj_sock->cpuset,
                                                                     HWLOC_OBJ_CORE, core)))
                    obj_core = obj_sock;

                /* copy the hwloc cpuset to hydra format */
                hwloc_to_hydra_cpuset_dup(obj_core->cpuset, &core_ptr->cpuset);

                /* memory information */
                core_ptr->mem.local_mem_size = obj_core->memory.local_memory;

                /* find the number of cache objects which match my cpuset */
                core_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_core->cpuset);

                /* add the actual cache objects that match my cpuset */
                if (core_ptr->mem.num_caches) {
                    HYDU_MALLOC(core_ptr->mem.cache_size, size_t *,
                                core_ptr->mem.num_caches * sizeof(size_t), status);
                    HYDU_MALLOC(core_ptr->mem.cache_depth, int *,
                                core_ptr->mem.num_caches * sizeof(int), status);
                    idx = 0;
                    load_cache_objs(obj_sys, obj_core->cpuset, core_ptr, &idx);
                }

                core_ptr->num_children =
                    hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj_core->cpuset,
                                                           HWLOC_OBJ_PU);

                /* In case there is no thread */
                if (!core_ptr->num_children)
                    core_ptr->num_children = 1;

                status = HYDT_topo_alloc_objs(core_ptr->num_children, &core_ptr->children);
                HYDU_ERR_POP(status, "error allocating topo objects\n");

                /* setup the thread level */
                for (thread = 0; thread < core_ptr->num_children; thread++) {
                    thread_ptr = &core_ptr->children[thread];
                    thread_ptr->type = HYDT_TOPO_OBJ_THREAD;
                    thread_ptr->parent = core_ptr;
                    thread_ptr->num_children = 0;
                    thread_ptr->children = NULL;
                    HYDT_topo_cpuset_zero(&thread_ptr->cpuset);

                    if (!(obj_thread =
                          hwloc_get_obj_inside_cpuset_by_type(topology, obj_core->cpuset,
                                                              HWLOC_OBJ_PU, thread)))
                        HYDU_ERR_POP(status, "unable to detect processing units\n");

                    /* copy the hwloc cpuset to hydra format */
                    hwloc_to_hydra_cpuset_dup(obj_thread->cpuset, &thread_ptr->cpuset);

                    /* memory information */
                    thread_ptr->mem.local_mem_size = obj_thread->memory.local_memory;

                    /* find the number of cache objects which match my cpuset */
                    thread_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_thread->cpuset);

                    /* add the actual cache objects that match my cpuset */
                    if (thread_ptr->mem.num_caches) {
                        HYDU_MALLOC(thread_ptr->mem.cache_size, size_t *,
                                    thread_ptr->mem.num_caches * sizeof(size_t), status);
                        HYDU_MALLOC(thread_ptr->mem.cache_depth, int *,
                                    thread_ptr->mem.num_caches * sizeof(int), status);
                        idx = 0;
                        load_cache_objs(obj_sys, obj_thread->cpuset, thread_ptr, &idx);
                    }
                }
Пример #10
0
int32_t Pipe::_getAutoAffinity() const
{
#ifdef EQ_USE_HWLOC_GL
    uint32_t port = getPort();
    uint32_t device = getDevice();

    if( port == LB_UNDEFINED_UINT32 && device == LB_UNDEFINED_UINT32 )
        return lunchbox::Thread::NONE;

    if( port == LB_UNDEFINED_UINT32 )
        port = 0;
    if( device == LB_UNDEFINED_UINT32 )
        device = 0;

    hwloc_topology_t topology;
    hwloc_topology_init( &topology );

    // Flags used for loading the I/O devices,  bridges and their relevant info
    const unsigned long loading_flags = HWLOC_TOPOLOGY_FLAG_IO_BRIDGES |
                                        HWLOC_TOPOLOGY_FLAG_IO_DEVICES;
    // Set discovery flags
    if( hwloc_topology_set_flags( topology, loading_flags ) < 0 )
    {
        LBINFO << "Automatic pipe thread placement failed: "
               << "hwloc_topology_set_flags() failed" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    if( hwloc_topology_load( topology ) < 0 )
    {
        LBINFO << "Automatic pipe thread placement failed: "
               << "hwloc_topology_load() failed" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const hwloc_obj_t osdev =
        hwloc_gl_get_display_osdev_by_port_device( topology,
                                                   int( port ), int( device ));
    if( !osdev )
    {
        LBINFO << "Automatic pipe thread placement failed: GPU not found"
               << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const hwloc_obj_t pcidev = osdev->parent;
    const hwloc_obj_t parent = hwloc_get_non_io_ancestor_obj( topology, pcidev );
    const int numCpus =
        hwloc_get_nbobjs_inside_cpuset_by_type( topology, parent->cpuset,
                                                HWLOC_OBJ_SOCKET );
    if( numCpus != 1 )
    {
        LBINFO << "Automatic pipe thread placement failed: GPU attached to "
               << numCpus << " processors?" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const hwloc_obj_t cpuObj =
        hwloc_get_obj_inside_cpuset_by_type( topology, parent->cpuset,
                                             HWLOC_OBJ_SOCKET, 0 );
    if( cpuObj == 0 )
    {
        LBINFO << "Automatic pipe thread placement failed: "
               << "hwloc_get_obj_inside_cpuset_by_type() failed" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const int cpuIndex = cpuObj->logical_index;
    hwloc_topology_destroy( topology );
    return cpuIndex + lunchbox::Thread::SOCKET;
#else
    LBINFO << "Automatic thread placement not supported, no hwloc GL support"
           << std::endl;
#endif
    return lunchbox::Thread::NONE;
}
int
main (void)
{
  hwloc_topology_t topology;
  hwloc_obj_t obj, root;
  int err;

  err = hwloc_topology_init (&topology);
  if (err)
    return EXIT_FAILURE;

  hwloc_topology_set_synthetic (topology, "nodes:2 sockets:3 caches:4 cores:5 6");

  err = hwloc_topology_load (topology);
  if (err)
    return EXIT_FAILURE;

  /* there is no second system object */
  root = hwloc_get_root_obj (topology);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_SYSTEM, 1);
  assert(!obj);

  /* first system object is the top-level object of the topology */
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_MACHINE, 0);
  assert(obj == hwloc_get_root_obj(topology));

  /* first next-object object is the top-level object of the topology */
  obj = hwloc_get_next_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_MACHINE, NULL);
  assert(obj == hwloc_get_root_obj(topology));
  /* there is no next object after the system object */
  obj = hwloc_get_next_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_SYSTEM, obj);
  assert(!obj);

  /* check last PU */
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_PU, 2*3*4*5*6-1);
  assert(obj == hwloc_get_obj_by_depth(topology, 5, 2*3*4*5*6-1));
  /* there is no next PU after the last one */
  obj = hwloc_get_next_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_PU, obj);
  assert(!obj);


  /* check there are 20 cores inside first socket */
  root = hwloc_get_obj_by_depth(topology, 2, 0);
  assert(hwloc_get_nbobjs_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_CORE) == 20);

  /* check there are 12 caches inside last node */
  root = hwloc_get_obj_by_depth(topology, 1, 1);
  assert(hwloc_get_nbobjs_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_CACHE) == 12);


  /* check first PU of second socket */
  root = hwloc_get_obj_by_depth(topology, 2, 1);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_PU, 0);
  assert(obj == hwloc_get_obj_by_depth(topology, 5, 4*5*6));

  /* check third core of third socket */
  root = hwloc_get_obj_by_depth(topology, 2, 2);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_CORE, 2);
  assert(obj == hwloc_get_obj_by_depth(topology, 4, 2*4*5+2));

  /* check first socket of second node */
  root = hwloc_get_obj_by_depth(topology, 1, 1);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_SOCKET, 0);
  assert(obj == hwloc_get_obj_by_depth(topology, 2, 3));

  /* there is no node inside sockets */
  root = hwloc_get_obj_by_depth(topology, 2, 0);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_NODE, 0);
  assert(!obj);

  hwloc_topology_destroy (topology);

  return EXIT_SUCCESS;
}