Example #1
0
void 
darts :: hwloc :: AbstractMachine :: discoverTopologyWithLLC(void)
{
    unsigned nbSockets = hwloc_get_nbobjs_by_type(_topology,HWLOC_OBJ_SOCKET);
    hwloc_obj_t o = hwloc_get_obj_by_type(_topology,HWLOC_OBJ_SOCKET,0);

    hwloc_obj_t obj;
    for (obj = o->first_child;
            obj && obj->type != HWLOC_OBJ_CACHE;
            obj = obj->first_child)
        ;

    _nbClusters = nbSockets;
    if (obj) {
        int n = hwloc_get_nbobjs_inside_cpuset_by_type(_topology,obj->cpuset,HWLOC_OBJ_PU);
        _nbClusters = _nbTotalUnits / n; // XXX assumes homogeneous distribution of PUs
    }
    _clusterMap = new Cluster[_nbClusters];

    // TODO Refactor this code and the next function's code into a single one 
    for (o = obj; o; o = o->next_cousin)  {
        int           nUnits = hwloc_get_nbobjs_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU);
        Unit *units  = new Unit[nUnits];
        for (int i = 0; i < nUnits; ++i) {
            hwloc_obj_t t = hwloc_get_obj_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU,i);
            Unit hwu(o->logical_index,t->logical_index,t->os_index);
            units[i] = hwu; // simple shallow copy
        }
        Cluster cluster(o->logical_index,o->logical_index,nUnits,units);
        _clusterMap[o->logical_index] = cluster; // simple shallow copy
    }
}
Example #2
0
/*
 * Make a layout string of all available sockets and cores.  Note that
 * this is *everything* -- not just the ones that are available to
 * this process.
 *
 * Example: [../..]
 * Key:  [] - signifies socket
 *        / - signifies core
 *        . - signifies PU
 */
static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX])
{
    int core_index, pu_index;
    int len = OMPI_AFFINITY_STRING_MAX;
    hwloc_obj_t socket, core, pu;

    str[0] = '\0';

    /* Iterate over all existing sockets */
    for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
                                        HWLOC_OBJ_SOCKET, 0);
         NULL != socket;
         socket = socket->next_cousin) {
        strncat(str, "[", len - strlen(str));

        /* Iterate over all existing cores in this socket */
        core_index = 0;
        for (core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
                                                        socket->cpuset,
                                                        HWLOC_OBJ_CORE, core_index);
             NULL != core;
             core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
                                                        socket->cpuset,
                                                        HWLOC_OBJ_CORE, ++core_index)) {
            if (core_index > 0) {
                strncat(str, "/", len - strlen(str));
            }

            /* Iterate over all existing PUs in this core */
            pu_index = 0;
            for (pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
                                                          core->cpuset,
                                                          HWLOC_OBJ_PU, pu_index);
                 NULL != pu;
                 pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
                                                          core->cpuset,
                                                          HWLOC_OBJ_PU, ++pu_index)) {
                strncat(str, ".", len - strlen(str));
            }
        }
        strncat(str, "]", len - strlen(str));
    }

    return OMPI_SUCCESS;
}
int get_max_objs_inside_cpuset_by_type(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_obj_type_t type){
    int depth = hwloc_get_type_depth(topology, type);
    if(depth == HWLOC_TYPE_DEPTH_UNKNOWN){
	fprintf(stderr, "Cannot find depth %s\n", hwloc_type_name(type));
	return -1;
    }
    if(depth == HWLOC_TYPE_DEPTH_MULTIPLE){
	hwloc_obj_t deepest_of_type = hwloc_get_obj_inside_cpuset_by_type(topology, cpuset, HWLOC_OBJ_PU,0);
	while(deepest_of_type !=NULL && deepest_of_type->type != type)
	    deepest_of_type = deepest_of_type->parent;
	if(deepest_of_type == NULL)
	    return -1;
	else 
	    depth = deepest_of_type->depth;
    }
    return hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpuset, depth);
}
Example #4
0
void 
darts :: hwloc :: AbstractMachine :: discoverTopology(void)
{
    _nbClusters   = hwloc_get_nbobjs_by_type(_topology,HWLOC_OBJ_SOCKET);
    _clusterMap   = new Cluster[_nbClusters];
    hwloc_obj_t o = hwloc_get_obj_by_type(_topology,HWLOC_OBJ_SOCKET,0);
    // TODO Refactor this code and the previous function's code into a single one
    for (; o; o = o->next_cousin)  {
        int           nUnits = hwloc_get_nbobjs_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU);
        Unit *units  = new Unit[nUnits];
        for (int i = 0; i < nUnits; ++i) {
            hwloc_obj_t t = hwloc_get_obj_inside_cpuset_by_type(_topology,o->cpuset,HWLOC_OBJ_PU,i);
            Unit hwu(o->logical_index,t->logical_index,t->os_index);
            units[i] = hwu; // simple shallow copy
        }
        Cluster cluster(o->logical_index,o->logical_index,nUnits,units);
        _clusterMap[o->logical_index] = cluster; // simple shallow copy
    }
}
Example #5
0
/* This function initializes the bind_map data structure with the binding information from all
 * the ranks that have copied their cpu-affinity information into the shared memory region.
 * */
void MPIDI_SHM_hwloc_init_bindmap(int num_ranks, int topo_depth, int *shared_region, int **bind_map)
{
    int i, level;
    unsigned int num_obj, curr_obj;

    hwloc_cpuset_t hwloc_cpuset = hwloc_bitmap_alloc();
    /* STEP 3.1. Collect the binding information from hwloc for all ranks */
    for (i = 0; i < num_ranks; ++i) {
        cpu_set_t t = ((cpu_set_t *) (shared_region))[i];
        hwloc_cpuset_from_glibc_sched_affinity(MPIR_Process.hwloc_topology, hwloc_cpuset, &t,
                                               sizeof(t));
        /* HWLOC_OBJ_PU is the smallest unit of computation. We would like to get all the
         * affinity information for each rank */
        num_obj =
            hwloc_get_nbobjs_inside_cpuset_by_type(MPIR_Process.hwloc_topology, hwloc_cpuset,
                                                   HWLOC_OBJ_PU);
        /* Go over all objects, and if it is bound to more than one PU at that level, set it
         * to -1, otherwise update to the binding*/
        for (curr_obj = 0; curr_obj < num_obj; ++curr_obj) {
            hwloc_obj_t obj =
                hwloc_get_obj_inside_cpuset_by_type(MPIR_Process.hwloc_topology, hwloc_cpuset,
                                                    HWLOC_OBJ_PU, curr_obj);
            level = 0;
            do {
                /* If binding was not set, or is same as previous binding, update.
                 * Note that we use logical indices from hwloc instead of physical indices
                 * because logical indices are more portable - see hwloc documentation*/
                if (bind_map[i][level] == 0 || bind_map[i][level] == obj->logical_index) {
                    bind_map[i][level] = obj->logical_index;
                } else {
                    /* If rank is bound to different PUs at that level, we set to -1 */
                    bind_map[i][level] = -1;
                }
                level++;
            } while ((obj = obj->parent));
        }
    }
    hwloc_bitmap_free(hwloc_cpuset);
}
Example #6
0
/*
 * Prettyprint a list of all available sockets and cores.  Note that
 * this is *everything* -- not just the ones that are available to
 * this process.
 */
static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX])
{
    bool first = true;
    int i, num_cores, num_pus;
    char tmp[BUFSIZ];
    const int stmp = sizeof(tmp) - 1;
    hwloc_obj_t socket, core, c2;

    str[0] = '\0';
    for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
                                        HWLOC_OBJ_SOCKET, 0);
         NULL != socket; socket = socket->next_cousin) {
        /* If this isn't the first socket, add a delimiter */
        if (!first) {
            strncat(str, "; ", OMPI_AFFINITY_STRING_MAX - strlen(str));
        }
        first = false;

        snprintf(tmp, stmp, "socket %d has ", socket->os_index);
        strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));

        /* Find out how many cores are inside this socket, and get an
           object pointing to the first core.  Also count how many PUs
           are in the first core. */
        num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                           socket->cpuset,
                                                           HWLOC_OBJ_CORE);
        core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
                                                   socket->cpuset,
                                                   HWLOC_OBJ_CORE, 0);
        if (NULL != core) {
            num_pus =
                hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                       core->cpuset,
                                                       HWLOC_OBJ_PU);

            /* Only 1 core */
            if (1 == num_cores) {
                strncat(str, "1 core with ",
                        OMPI_AFFINITY_STRING_MAX - strlen(str));
                if (1 == num_pus) {
                    strncat(str, "1 hwt",
                            OMPI_AFFINITY_STRING_MAX - strlen(str));
                } else {
                    snprintf(tmp, stmp, "%d hwts", num_pus);
                    strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
                }
            }

            /* Multiple cores */
            else {
                bool same = true;

                snprintf(tmp, stmp, "%d cores", num_cores);
                strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));

                /* Do all the cores have the same number of PUs? */
                for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
                    if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                               core->cpuset,
                                                               HWLOC_OBJ_PU) !=
                        num_pus) {
                        same = false;
                        break;
                    }
                }

                /* Yes, they all have the same number of PUs */
                if (same) {
                    snprintf(tmp, stmp, ", each with %d hwt", num_pus);
                    strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
                    if (num_pus != 1) {
                        strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str));
                    }
                }

                /* No, they have differing numbers of PUs */
                else {
                    bool first = true;

                    strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str));
                    for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
                        if (!first) {
                            strncat(str, ", ",
                                    OMPI_AFFINITY_STRING_MAX - strlen(str));
                        }
                        first = false;

                        i = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
                                                                   core->cpuset,
                                                                   HWLOC_OBJ_PU);
                        snprintf(tmp, stmp, "%d", i);
                        strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
                    }
                    strncat(str, ") hwts",
                            OMPI_AFFINITY_STRING_MAX - strlen(str));
                }
            }
        }
    }

    return OMPI_SUCCESS;
}
Example #7
0
hwloc::hwloc()
{
  s_core_topology   = std::pair<unsigned,unsigned>(0,0);
  s_core_capacity   = 0 ;
  s_hwloc_topology  = 0 ;
  s_hwloc_location  = 0 ;
  s_process_binding = 0 ;

  for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;

  hwloc_topology_init( & s_hwloc_topology );
  hwloc_topology_load( s_hwloc_topology );

  s_hwloc_location  = hwloc_bitmap_alloc();
  s_process_binding = hwloc_bitmap_alloc();

  hwloc_get_cpubind( s_hwloc_topology , s_process_binding ,  HWLOC_CPUBIND_PROCESS );

  // Choose a hwloc object type for the NUMA level, which may not exist.

  hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;

  {
    // Object types to search, in order.
    static const hwloc_obj_type_t candidate_root_type[] =
      { HWLOC_OBJ_NODE     /* NUMA region     */
      , HWLOC_OBJ_SOCKET   /* hardware socket */
      , HWLOC_OBJ_MACHINE  /* local machine   */
      };

    enum { CANDIDATE_ROOT_TYPE_COUNT =
             sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };

    for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
      if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
        root_type = candidate_root_type[k] ;
      }
    }
  }

  // Determine which of these 'root' types are available to this process.
  // The process may have been bound (e.g., by MPI) to a subset of these root types.
  // Determine current location of the master (calling) process>

  hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();

  hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );

  const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );

  unsigned root_base     = max_root ;
  unsigned root_count    = 0 ;
  unsigned core_per_root = 0 ;
  unsigned pu_per_core   = 0 ;
  bool     symmetric     = true ;

  for ( unsigned i = 0 ; i < max_root ; ++i ) {

    const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );

    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {

      ++root_count ;

      // Remember which root (NUMA) object the master thread is running on.
      // This will be logical NUMA rank #0 for this process.

      if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
        root_base = i ;
      }

      // Count available cores:

      const unsigned max_core =
        hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                root->allowed_cpuset ,
                                                HWLOC_OBJ_CORE );

      unsigned core_count = 0 ;

      for ( unsigned j = 0 ; j < max_core ; ++j ) {

        const hwloc_obj_t core =
          hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
                                               root->allowed_cpuset ,
                                               HWLOC_OBJ_CORE , j );

        // If process' cpuset intersects core's cpuset then process can access this core.
        // Must use intersection instead of inclusion because the Intel-Phi
        // MPI may bind the process to only one of the core's hyperthreads.
        //
        // Assumption: if the process can access any hyperthread of the core
        // then it has ownership of the entire core.
        // This assumes that it would be performance-detrimental
        // to spawn more than one MPI process per core and use nested threading.

        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {

          ++core_count ;

          const unsigned pu_count =
            hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                    core->allowed_cpuset ,
                                                    HWLOC_OBJ_PU );

          if ( pu_per_core == 0 ) pu_per_core = pu_count ;

          // Enforce symmetry by taking the minimum:

          pu_per_core = std::min( pu_per_core , pu_count );

          if ( pu_count != pu_per_core ) symmetric = false ;
        }
      }

      if ( 0 == core_per_root ) core_per_root = core_count ;

      // Enforce symmetry by taking the minimum:

      core_per_root = std::min( core_per_root , core_count );

      if ( core_count != core_per_root ) symmetric = false ;
    }
  }

  s_core_topology.first  = root_count ;
  s_core_topology.second = core_per_root ;
  s_core_capacity        = pu_per_core ;

  // Fill the 's_core' array for fast mapping from a core coordinate to the
  // hwloc cpuset object required for thread location querying and binding.

  for ( unsigned i = 0 ; i < max_root ; ++i ) {

    const unsigned root_rank = ( i + root_base ) % max_root ;

    const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );

    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {

      const unsigned max_core =
        hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                root->allowed_cpuset ,
                                                HWLOC_OBJ_CORE );

      unsigned core_count = 0 ;

      for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {

        const hwloc_obj_t core =
          hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
                                               root->allowed_cpuset ,
                                               HWLOC_OBJ_CORE , j );

        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {

          s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;

          ++core_count ;
        }
      }
    }
  }

  hwloc_bitmap_free( proc_cpuset_location );

  if ( ! symmetric ) {
    std::cout << "KokkosArray::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
              << std::endl ;
  }
}
Example #8
0
static HYD_status handle_bitmap_binding(const char *binding, const char *mapping)
{
    int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0;
    int total_map_objs, total_bind_objs, num_pus_in_map_domain, num_pus_in_bind_domain,
        total_map_domains;
    hwloc_obj_t map_obj, bind_obj, *start_pu;
    hwloc_cpuset_t *map_domains;
    char *bind_str, *map_str;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* split out the count fields */
    status = split_count_field(binding, &bind_str, &bind_count);
    HYDU_ERR_POP(status, "error splitting count field\n");

    status = split_count_field(mapping, &map_str, &map_count);
    HYDU_ERR_POP(status, "error splitting count field\n");


    /* get the binding object */
    if (!strcmp(bind_str, "board"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
    else if (!strcmp(bind_str, "numa"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
    else if (!strcmp(bind_str, "socket"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
    else if (!strcmp(bind_str, "core"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
    else if (!strcmp(bind_str, "hwthread"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
    else {
        /* check if it's in the l*cache format */
        cache_depth = parse_cache_string(bind_str);
        if (!cache_depth) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "unrecognized binding string \"%s\"\n", binding);
        }
        bind_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
    }

    /* get the mapping */
    if (!strcmp(map_str, "board"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
    else if (!strcmp(map_str, "numa"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
    else if (!strcmp(map_str, "socket"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
    else if (!strcmp(map_str, "core"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
    else if (!strcmp(map_str, "hwthread"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
    else {
        cache_depth = parse_cache_string(map_str);
        if (!cache_depth) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "unrecognized mapping string \"%s\"\n", mapping);
        }
        map_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
    }

    /*
     * Process Affinity Algorithm:
     *
     * The code below works in 3 stages. The end result is an array of all the possible
     * binding bitmaps for a system, based on the options specified.
     *
     * 1. Define all possible mapping "domains" in a system. A mapping domain is a group
     *    of hardware elements found by traversing the topology. Each traversal skips the
     *    number of elements the user specified in the mapping string. The traversal ends
     *    when the next mapping domain == the first mapping domain. Note that if the
     *    mapping string defines a domain that is larger than the system size, we exit
     *    with an error.
     *
     * 2. Define the number of possible binding domains within a mapping domain. This
     *    process is similar to step 1, in that we traverse the mapping domain finding
     *    all possible bind combinations, stopping when a duplicate of the first binding
     *    is reached. If a binding is larger (in # of PUs) than the mapping domain,
     *    the number of possible bindings for that domain is 1. In this stage, we also
     *    locate the first PU in each mapping domain for use later during binding.
     *
     * 3. Create the binding bitmaps. We allocate an array of bitmaps and fill them in
     *    with all possible bindings. The starting PU in each mapping domain is advanced
     *    if and when we wrap around to the beginning of the mapping domains. This ensures
     *    that we do not repeat.
     *
     */

    /* calculate the number of map domains */
    total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth);
    num_pus_in_map_domain = (HYDT_topo_hwloc_info.total_num_pus / total_map_objs) * map_count;
    HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > HYDT_topo_hwloc_info.total_num_pus,
                        HYD_INTERNAL_ERROR, "mapping option \"%s\" larger than total system size\n",
                        mapping);

    /* The number of total_map_domains should be large enough to
     * contain all contiguous map object collections of length
     * map_count.  For example, if the map object is "socket" and the
     * map_count is 3, on a system with 4 sockets, the following map
     * domains should be included: (0,1,2), (3,0,1), (2,3,0), (1,2,3).
     * We do this by finding how many times we need to replicate the
     * list of the map objects so that an integral number of map
     * domains can map to them.  In the above case, the list of map
     * objects is replicated 3 times. */
    for (i = 1; (i * total_map_objs) % map_count; i++);
    total_map_domains = (i * total_map_objs) / map_count;

    /* initialize the map domains */
    HYDU_MALLOC_OR_JUMP(map_domains, hwloc_bitmap_t *, total_map_domains * sizeof(hwloc_bitmap_t),
                        status);
    HYDU_MALLOC_OR_JUMP(start_pu, hwloc_obj_t *, total_map_domains * sizeof(hwloc_obj_t), status);

    /* For each map domain, find the next map object (first map object
     * for the first map domain) and add the following "map_count"
     * number of contiguous map objects, wrapping to the first one if
     * needed, to the map domain.  Store the first PU in the first map
     * object of the map domain as "start_pu".  This is needed later
     * for the actual binding. */
    map_obj = NULL;
    for (i = 0; i < total_map_domains; i++) {
        map_domains[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_zero(map_domains[i]);

        for (j = 0; j < map_count; j++) {
            map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);
            /* map_obj will be NULL if it reaches the end. call again to wrap around */
            if (!map_obj)
                map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);

            if (j == 0)
                start_pu[i] =
                    hwloc_get_obj_inside_cpuset_by_type(topology, map_obj->cpuset, HWLOC_OBJ_PU, 0);

            hwloc_bitmap_or(map_domains[i], map_domains[i], map_obj->cpuset);
        }
    }


    /* Find the possible binding domains is similar to that of map
     * domains.  But if a binding domain is larger (in # of PUs) than
     * the mapping domain, the number of possible bindings for that
     * domain is 1. */

    /* calculate the number of possible bindings and allocate bitmaps for them */
    total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth);
    num_pus_in_bind_domain = (HYDT_topo_hwloc_info.total_num_pus / total_bind_objs) * bind_count;

    if (num_pus_in_bind_domain < num_pus_in_map_domain) {
        for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++);
        HYDT_topo_hwloc_info.num_bitmaps =
            (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain;
    }
    else {
        HYDT_topo_hwloc_info.num_bitmaps = total_map_domains;
    }

    /* initialize bitmaps */
    HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
                        HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);

    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
    }

    /* do bindings */
    i = 0;
    while (i < HYDT_topo_hwloc_info.num_bitmaps) {
        for (j = 0; j < total_map_domains; j++) {
            bind_obj = hwloc_get_ancestor_obj_by_depth(topology, bind_depth, start_pu[j]);

            for (k = 0; k < bind_count; k++) {
                hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i],
                                bind_obj->cpuset);

                /* if the binding is smaller than the mapping domain, wrap around inside that domain */
                if (num_pus_in_bind_domain < num_pus_in_map_domain) {
                    bind_obj =
                        hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
                                                                  bind_depth, bind_obj);
                    if (!bind_obj)
                        bind_obj =
                            hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
                                                                      bind_depth, bind_obj);
                }
                else {
                    bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
                    if (!bind_obj)
                        bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
                }

            }
            i++;

            /* advance the starting position for this map domain, if needed */
            if (num_pus_in_bind_domain < num_pus_in_map_domain) {
                for (k = 0; k < num_pus_in_bind_domain; k++) {
                    start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
                                                                           HWLOC_OBJ_PU,
                                                                           start_pu[j]);
                    if (!start_pu[j])
                        start_pu[j] =
                            hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
                                                                     HWLOC_OBJ_PU, start_pu[j]);
                }
            }
        }
    }

    /* free temporary memory */
    MPL_free(map_domains);
    MPL_free(start_pu);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #9
0
HYD_status HYDT_topo_hwloc_init(HYDT_topo_support_level_t * support_level)
{
    int node, sock, core, thread, idx;

    hwloc_obj_t obj_sys;
    hwloc_obj_t obj_node;
    hwloc_obj_t obj_sock;
    hwloc_obj_t obj_core;
    hwloc_obj_t obj_thread;

    struct HYDT_topo_obj *node_ptr, *sock_ptr, *core_ptr, *thread_ptr;

    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    hwloc_topology_init(&topology);
    hwloc_topology_load(topology);

    hwloc_initialized = 1;

    /* Get the max number of processing elements */
    HYDT_topo_info.total_proc_units = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);

    /* We have qualified for basic topology support level */
    *support_level = HYDT_TOPO_SUPPORT_BASIC;

    /* Setup the machine level */
    obj_sys = hwloc_get_root_obj(topology);

    /* Retained for debugging purposes */
    /* print_obj_info(obj_sys); */

    /* init Hydra structure */
    HYDT_topo_info.machine.type = HYDT_TOPO_OBJ_MACHINE;
    HYDT_topo_cpuset_zero(&HYDT_topo_info.machine.cpuset);
    HYDT_topo_info.machine.parent = NULL;

    HYDT_topo_info.machine.num_children = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE);
    /* If there is no real node, consider there is one */
    if (!HYDT_topo_info.machine.num_children)
        HYDT_topo_info.machine.num_children = 1;
    status = HYDT_topo_alloc_objs(HYDT_topo_info.machine.num_children,
                                  &HYDT_topo_info.machine.children);
    HYDU_ERR_POP(status, "error allocating topo objects\n");

    /* Setup the nodes levels */
    for (node = 0; node < HYDT_topo_info.machine.num_children; node++) {
        node_ptr = &HYDT_topo_info.machine.children[node];
        node_ptr->type = HYDT_TOPO_OBJ_NODE;
        node_ptr->parent = &HYDT_topo_info.machine;
        HYDT_topo_cpuset_zero(&node_ptr->cpuset);

        if (!(obj_node = hwloc_get_obj_inside_cpuset_by_type(topology, obj_sys->cpuset,
                                                             HWLOC_OBJ_NODE, node)))
            obj_node = obj_sys;

        /* copy the hwloc cpuset to hydra format */
        hwloc_to_hydra_cpuset_dup(obj_node->cpuset, &node_ptr->cpuset);

        /* memory information */
        node_ptr->mem.local_mem_size = obj_node->memory.local_memory;

        /* find the number of cache objects which match my cpuset */
        node_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_node->cpuset);

        /* add the actual cache objects that match my cpuset */
        if (node_ptr->mem.num_caches) {
            HYDU_MALLOC(node_ptr->mem.cache_size, size_t *,
                        node_ptr->mem.num_caches * sizeof(size_t), status);
            HYDU_MALLOC(node_ptr->mem.cache_depth, int *,
                        node_ptr->mem.num_caches * sizeof(int), status);
            idx = 0;
            load_cache_objs(obj_sys, obj_node->cpuset, node_ptr, &idx);
        }

        node_ptr->num_children =
            hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj_node->cpuset,
                                                   HWLOC_OBJ_SOCKET);
        /* In case there is no socket */
        if (!node_ptr->num_children)
            node_ptr->num_children = 1;

        status = HYDT_topo_alloc_objs(node_ptr->num_children, &node_ptr->children);
        HYDU_ERR_POP(status, "error allocating topo objects\n");

        /* Setup the socket level */
        for (sock = 0; sock < node_ptr->num_children; sock++) {
            sock_ptr = &node_ptr->children[sock];
            sock_ptr->type = HYDT_TOPO_OBJ_SOCKET;
            sock_ptr->parent = node_ptr;
            HYDT_topo_cpuset_zero(&sock_ptr->cpuset);

            if (!(obj_sock = hwloc_get_obj_inside_cpuset_by_type(topology, obj_node->cpuset,
                                                                 HWLOC_OBJ_SOCKET, sock)))
                obj_sock = obj_node;

            /* copy the hwloc cpuset to hydra format */
            hwloc_to_hydra_cpuset_dup(obj_sock->cpuset, &sock_ptr->cpuset);

            /* memory information */
            sock_ptr->mem.local_mem_size = obj_sock->memory.local_memory;

            /* find the number of cache objects which match my cpuset */
            sock_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_sock->cpuset);

            /* add the actual cache objects that match my cpuset */
            if (sock_ptr->mem.num_caches) {
                HYDU_MALLOC(sock_ptr->mem.cache_size, size_t *,
                            sock_ptr->mem.num_caches * sizeof(size_t), status);
                HYDU_MALLOC(sock_ptr->mem.cache_depth, int *,
                            sock_ptr->mem.num_caches * sizeof(int), status);
                idx = 0;
                load_cache_objs(obj_sys, obj_sock->cpuset, sock_ptr, &idx);
            }

            sock_ptr->num_children =
                hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj_sock->cpuset,
                                                       HWLOC_OBJ_CORE);

            /* In case there is no core */
            if (!sock_ptr->num_children)
                sock_ptr->num_children = 1;

            status = HYDT_topo_alloc_objs(sock_ptr->num_children, &sock_ptr->children);
            HYDU_ERR_POP(status, "error allocating topo objects\n");

            /* setup the core level */
            for (core = 0; core < sock_ptr->num_children; core++) {
                core_ptr = &sock_ptr->children[core];
                core_ptr->type = HYDT_TOPO_OBJ_CORE;
                core_ptr->parent = sock_ptr;
                HYDT_topo_cpuset_zero(&core_ptr->cpuset);

                if (!(obj_core = hwloc_get_obj_inside_cpuset_by_type(topology,
                                                                     obj_sock->cpuset,
                                                                     HWLOC_OBJ_CORE, core)))
                    obj_core = obj_sock;

                /* copy the hwloc cpuset to hydra format */
                hwloc_to_hydra_cpuset_dup(obj_core->cpuset, &core_ptr->cpuset);

                /* memory information */
                core_ptr->mem.local_mem_size = obj_core->memory.local_memory;

                /* find the number of cache objects which match my cpuset */
                core_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_core->cpuset);

                /* add the actual cache objects that match my cpuset */
                if (core_ptr->mem.num_caches) {
                    HYDU_MALLOC(core_ptr->mem.cache_size, size_t *,
                                core_ptr->mem.num_caches * sizeof(size_t), status);
                    HYDU_MALLOC(core_ptr->mem.cache_depth, int *,
                                core_ptr->mem.num_caches * sizeof(int), status);
                    idx = 0;
                    load_cache_objs(obj_sys, obj_core->cpuset, core_ptr, &idx);
                }

                core_ptr->num_children =
                    hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj_core->cpuset,
                                                           HWLOC_OBJ_PU);

                /* In case there is no thread */
                if (!core_ptr->num_children)
                    core_ptr->num_children = 1;

                status = HYDT_topo_alloc_objs(core_ptr->num_children, &core_ptr->children);
                HYDU_ERR_POP(status, "error allocating topo objects\n");

                /* setup the thread level */
                for (thread = 0; thread < core_ptr->num_children; thread++) {
                    thread_ptr = &core_ptr->children[thread];
                    thread_ptr->type = HYDT_TOPO_OBJ_THREAD;
                    thread_ptr->parent = core_ptr;
                    thread_ptr->num_children = 0;
                    thread_ptr->children = NULL;
                    HYDT_topo_cpuset_zero(&thread_ptr->cpuset);

                    if (!(obj_thread =
                          hwloc_get_obj_inside_cpuset_by_type(topology, obj_core->cpuset,
                                                              HWLOC_OBJ_PU, thread)))
                        HYDU_ERR_POP(status, "unable to detect processing units\n");

                    /* copy the hwloc cpuset to hydra format */
                    hwloc_to_hydra_cpuset_dup(obj_thread->cpuset, &thread_ptr->cpuset);

                    /* memory information */
                    thread_ptr->mem.local_mem_size = obj_thread->memory.local_memory;

                    /* find the number of cache objects which match my cpuset */
                    thread_ptr->mem.num_caches = get_cache_nbobjs(obj_sys, obj_thread->cpuset);

                    /* add the actual cache objects that match my cpuset */
                    if (thread_ptr->mem.num_caches) {
                        HYDU_MALLOC(thread_ptr->mem.cache_size, size_t *,
                                    thread_ptr->mem.num_caches * sizeof(size_t), status);
                        HYDU_MALLOC(thread_ptr->mem.cache_depth, int *,
                                    thread_ptr->mem.num_caches * sizeof(int), status);
                        idx = 0;
                        load_cache_objs(obj_sys, obj_thread->cpuset, thread_ptr, &idx);
                    }
                }
Example #10
0
int32_t Pipe::_getAutoAffinity() const
{
#ifdef EQ_USE_HWLOC_GL
    uint32_t port = getPort();
    uint32_t device = getDevice();

    if( port == LB_UNDEFINED_UINT32 && device == LB_UNDEFINED_UINT32 )
        return lunchbox::Thread::NONE;

    if( port == LB_UNDEFINED_UINT32 )
        port = 0;
    if( device == LB_UNDEFINED_UINT32 )
        device = 0;

    hwloc_topology_t topology;
    hwloc_topology_init( &topology );

    // Flags used for loading the I/O devices,  bridges and their relevant info
    const unsigned long loading_flags = HWLOC_TOPOLOGY_FLAG_IO_BRIDGES |
                                        HWLOC_TOPOLOGY_FLAG_IO_DEVICES;
    // Set discovery flags
    if( hwloc_topology_set_flags( topology, loading_flags ) < 0 )
    {
        LBINFO << "Automatic pipe thread placement failed: "
               << "hwloc_topology_set_flags() failed" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    if( hwloc_topology_load( topology ) < 0 )
    {
        LBINFO << "Automatic pipe thread placement failed: "
               << "hwloc_topology_load() failed" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const hwloc_obj_t osdev =
        hwloc_gl_get_display_osdev_by_port_device( topology,
                                                   int( port ), int( device ));
    if( !osdev )
    {
        LBINFO << "Automatic pipe thread placement failed: GPU not found"
               << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const hwloc_obj_t pcidev = osdev->parent;
    const hwloc_obj_t parent = hwloc_get_non_io_ancestor_obj( topology, pcidev );
    const int numCpus =
        hwloc_get_nbobjs_inside_cpuset_by_type( topology, parent->cpuset,
                                                HWLOC_OBJ_SOCKET );
    if( numCpus != 1 )
    {
        LBINFO << "Automatic pipe thread placement failed: GPU attached to "
               << numCpus << " processors?" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const hwloc_obj_t cpuObj =
        hwloc_get_obj_inside_cpuset_by_type( topology, parent->cpuset,
                                             HWLOC_OBJ_SOCKET, 0 );
    if( cpuObj == 0 )
    {
        LBINFO << "Automatic pipe thread placement failed: "
               << "hwloc_get_obj_inside_cpuset_by_type() failed" << std::endl;
        hwloc_topology_destroy( topology );
        return lunchbox::Thread::NONE;
    }

    const int cpuIndex = cpuObj->logical_index;
    hwloc_topology_destroy( topology );
    return cpuIndex + lunchbox::Thread::SOCKET;
#else
    LBINFO << "Automatic thread placement not supported, no hwloc GL support"
           << std::endl;
#endif
    return lunchbox::Thread::NONE;
}
int
main (void)
{
  hwloc_topology_t topology;
  hwloc_obj_t obj, root;
  int err;

  err = hwloc_topology_init (&topology);
  if (err)
    return EXIT_FAILURE;

  hwloc_topology_set_synthetic (topology, "nodes:2 sockets:3 caches:4 cores:5 6");

  err = hwloc_topology_load (topology);
  if (err)
    return EXIT_FAILURE;

  /* there is no second system object */
  root = hwloc_get_root_obj (topology);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_SYSTEM, 1);
  assert(!obj);

  /* first system object is the top-level object of the topology */
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_MACHINE, 0);
  assert(obj == hwloc_get_root_obj(topology));

  /* first next-object object is the top-level object of the topology */
  obj = hwloc_get_next_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_MACHINE, NULL);
  assert(obj == hwloc_get_root_obj(topology));
  /* there is no next object after the system object */
  obj = hwloc_get_next_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_SYSTEM, obj);
  assert(!obj);

  /* check last PU */
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_PU, 2*3*4*5*6-1);
  assert(obj == hwloc_get_obj_by_depth(topology, 5, 2*3*4*5*6-1));
  /* there is no next PU after the last one */
  obj = hwloc_get_next_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_PU, obj);
  assert(!obj);


  /* check there are 20 cores inside first socket */
  root = hwloc_get_obj_by_depth(topology, 2, 0);
  assert(hwloc_get_nbobjs_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_CORE) == 20);

  /* check there are 12 caches inside last node */
  root = hwloc_get_obj_by_depth(topology, 1, 1);
  assert(hwloc_get_nbobjs_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_CACHE) == 12);


  /* check first PU of second socket */
  root = hwloc_get_obj_by_depth(topology, 2, 1);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_PU, 0);
  assert(obj == hwloc_get_obj_by_depth(topology, 5, 4*5*6));

  /* check third core of third socket */
  root = hwloc_get_obj_by_depth(topology, 2, 2);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_CORE, 2);
  assert(obj == hwloc_get_obj_by_depth(topology, 4, 2*4*5+2));

  /* check first socket of second node */
  root = hwloc_get_obj_by_depth(topology, 1, 1);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_SOCKET, 0);
  assert(obj == hwloc_get_obj_by_depth(topology, 2, 3));

  /* there is no node inside sockets */
  root = hwloc_get_obj_by_depth(topology, 2, 0);
  obj = hwloc_get_obj_inside_cpuset_by_type(topology, root->cpuset, HWLOC_OBJ_NODE, 0);
  assert(!obj);

  hwloc_topology_destroy (topology);

  return EXIT_SUCCESS;
}