Пример #1
0
int opal_hwloc_copy(hwloc_topology_t *dest, hwloc_topology_t src, opal_data_type_t type)
{
    char *xml;
    int len;
    struct hwloc_topology_support *support, *destsupport;

    if (0 != hwloc_topology_export_xmlbuffer(src, &xml, &len)) {
        return OPAL_ERROR;
    }
    if (0 != hwloc_topology_init(dest)) {
        free(xml);
        return OPAL_ERROR;
    }
    if (0 != hwloc_topology_set_xmlbuffer(*dest, xml, len)) {
        hwloc_topology_destroy(*dest);
        free(xml);
        return OPAL_ERROR;
    }
    if (0 != hwloc_topology_load(*dest)) {
        hwloc_topology_destroy(*dest);
        free(xml);
        return OPAL_ERROR;
    }
    free(xml);

    /* get the available support - hwloc unfortunately does
     * not include this info in its xml support!
     */
    support = (struct hwloc_topology_support*)hwloc_topology_get_support(src);
    destsupport = (struct hwloc_topology_support*)hwloc_topology_get_support(*dest);
    *destsupport = *support;

    return OPAL_SUCCESS;
}
Пример #2
0
int opal_hwloc_pack(opal_buffer_t *buffer, const void *src,
                    int32_t num_vals,
                    opal_data_type_t type)
{
    /* NOTE: hwloc defines topology_t as a pointer to a struct! */
    hwloc_topology_t t, *tarray  = (hwloc_topology_t*)src;
    int rc, i;
    char *xmlbuffer=NULL;
    int len;
    struct hwloc_topology_support *support;

    for (i=0; i < num_vals; i++) {
        t = tarray[i];

        /* extract an xml-buffer representation of the tree */
        if (0 != hwloc_topology_export_xmlbuffer(t, &xmlbuffer, &len)) {
            return OPAL_ERROR;
        }

        /* add to buffer */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &xmlbuffer, 1, OPAL_STRING))) {
            free(xmlbuffer);
            return rc;
        }

        /* cleanup */
        if (NULL != xmlbuffer) {
            free(xmlbuffer);
        }

        /* get the available support - hwloc unfortunately does
         * not include this info in its xml export!
         */
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(t);
        /* pack the discovery support */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, support->discovery,
                                                sizeof(struct hwloc_topology_discovery_support),
                                                OPAL_BYTE))) {
            return rc;
        }
        /* pack the cpubind support */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, support->cpubind,
                                                sizeof(struct hwloc_topology_cpubind_support),
                                                OPAL_BYTE))) {
            return rc;
        }
        /* pack the membind support */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, support->membind,
                                                sizeof(struct hwloc_topology_membind_support),
                                                OPAL_BYTE))) {
            return rc;
        }
    }

    return OPAL_SUCCESS;
}
/****** binding_support/has_core_binding() *****************************************
*  NAME
*     has_core_binding() -- Check if core binding system call is supported.
*
*  SYNOPSIS
*     bool has_core_binding()
*
*  FUNCTION
*     Checks if core binding is supported on the machine or not. If it is
*     supported this does not mean that topology information (about socket
*     and core amount) is available (which is needed for internal functions
*     in order to perform a correct core binding).
*
*  RESULT
*     bool - True if core binding could be done. False if not.
*
*  NOTES
*     MT-NOTE: has_core_binding() is not MT safe
*
*******************************************************************************/
bool has_core_binding(void)
{
#if HAVE_HWLOC
   const struct hwloc_topology_support *support;

   if (!initialized) init_topology();
   if (!sge_hwloc_topology) return false;
   support = hwloc_topology_get_support(sge_hwloc_topology);
   if (support->cpubind->set_proc_cpubind) return true;
#endif
   return false;
}
/****** binding_support/has_topology_information() *********************************
*  NAME
*     has_topology_information() -- Checks if current arch offers topology.
*
*  SYNOPSIS
*     bool has_topology_information()
*
*  FUNCTION
*     Checks if current architecture (on which this function is called)
*     offers processor topology information or not.
*
*  RESULT
*     bool - true if the arch offers topology information false if not
*
*  NOTES
*     MT-NOTE: has_topology_information() is not MT safe
*
*******************************************************************************/
bool has_topology_information(void)
{
#if HAVE_HWLOC
   const struct hwloc_topology_support *support;

   if (!initialized) init_topology();
   if (!sge_hwloc_topology) return false;
   support = hwloc_topology_get_support(sge_hwloc_topology);
   if (support->discovery->pu)
     return true;
#endif
   return false;
}
Пример #5
0
static void add_process_objects(hwloc_topology_t topology)
{
#ifdef HAVE_DIRENT_H
  hwloc_obj_t root;
  hwloc_bitmap_t cpuset;
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_t task_cpuset;
#endif /* HWLOC_LINUX_SYS */
  DIR *dir;
  struct dirent *dirent;
  const struct hwloc_topology_support *support;

  root = hwloc_get_root_obj(topology);

  support = hwloc_topology_get_support(topology);

  if (!support->cpubind->get_proc_cpubind)
    return;

  dir  = opendir("/proc");
  if (!dir)
    return;
  cpuset = hwloc_bitmap_alloc();
#ifdef HWLOC_LINUX_SYS
  task_cpuset = hwloc_bitmap_alloc();
#endif /* HWLOC_LINUX_SYS */

  while ((dirent = readdir(dir))) {
    long local_pid_number;
    hwloc_pid_t local_pid;
    char *end;
    char name[64];
    int proc_cpubind;

    local_pid_number = strtol(dirent->d_name, &end, 10);
    if (*end)
      /* Not a number */
      continue;

    snprintf(name, sizeof(name), "%ld", local_pid_number);

    local_pid = hwloc_pid_from_number(local_pid_number, 0);

    proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1;

#ifdef HWLOC_LINUX_SYS
    {
      /* Get the process name */
      char *path;
      unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;
      char cmd[64], *c;
      int file;
      ssize_t n;

      path = malloc(pathlen);
      snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
      file = open(path, O_RDONLY);
      free(path);

      if (file >= 0) {
        n = read(file, cmd, sizeof(cmd) - 1);
        close(file);

        if (n <= 0)
          /* Ignore kernel threads and errors */
          continue;

        cmd[n] = 0;
        if ((c = strchr(cmd, ' ')))
          *c = 0;
        snprintf(name, sizeof(name), "%ld %s", local_pid_number, cmd);
      }
    }

    {
      /* Get threads */
      char *path;
      unsigned pathlen = 6+strlen(dirent->d_name) + 1 + 4 + 1;
      DIR *task_dir;
      struct dirent *task_dirent;

      path = malloc(pathlen);
      snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
      task_dir = opendir(path);
      free(path);

      if (task_dir) {
        while ((task_dirent = readdir(task_dir))) {
          long local_tid;
          char *task_end;
          char task_name[64];

          local_tid = strtol(task_dirent->d_name, &task_end, 10);
          if (*task_end)
            /* Not a number, or the main task */
            continue;

          if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset))
            continue;

          if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset))
            continue;

          snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid);

          insert_task(topology, task_cpuset, task_name);
        }
        closedir(task_dir);
      }
    }
#endif /* HWLOC_LINUX_SYS */

    if (!proc_cpubind)
      continue;

    if (hwloc_bitmap_isincluded(root->cpuset, cpuset))
      continue;

    insert_task(topology, cpuset, name);
  }

  hwloc_bitmap_free(cpuset);
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_free(task_cpuset);
#endif /* HWLOC_LINUX_SYS */
  closedir(dir);
#endif /* HAVE_DIRENT_H */
}
Пример #6
0
int orte_ess_base_proc_binding(void)
{
    hwloc_obj_t node, obj;
    hwloc_cpuset_t cpus, nodeset;
    hwloc_obj_type_t target;
    unsigned int cache_level = 0;
    struct hwloc_topology_support *support;
    char *map;
    int ret;
    char *error=NULL;
    hwloc_cpuset_t mycpus;

    /* Determine if we were pre-bound or not */
    if (NULL != getenv(OPAL_MCA_PREFIX"orte_bound_at_launch")) {
        orte_proc_is_bound = true;
        if (NULL != (map = getenv(OPAL_MCA_PREFIX"orte_base_applied_binding"))) {
            orte_proc_applied_binding = hwloc_bitmap_alloc();
            if (0 != (ret = hwloc_bitmap_list_sscanf(orte_proc_applied_binding, map))) {
                error = "applied_binding parse";
                goto error;
            }
        }
    }

    /* see if we were bound when launched */
    if (!orte_proc_is_bound) {
        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                             "%s Not bound at launch",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        /* we were not bound at launch */
        if (NULL == opal_hwloc_topology) {
            /* there is nothing we can do, so just return */
            return ORTE_SUCCESS;
        }
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
        /* get our node object */
        node = hwloc_get_root_obj(opal_hwloc_topology);
        nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
        /* get our bindings */
        cpus = hwloc_bitmap_alloc();
        if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
            /* we are NOT bound if get_cpubind fails, nor can we be bound - the
             * environment does not support it
             */
            hwloc_bitmap_free(cpus);
            OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                 "%s Binding not supported",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            goto MOVEON;
        }
        /* we are bound if the two cpusets are not equal,
         * or if there is only ONE cpu available to us
         */
        if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
            opal_hwloc_base_single_cpu(nodeset) ||
            opal_hwloc_base_single_cpu(cpus)) {
            /* someone external set it - indicate it is set
             * so that we know
             */
            orte_proc_is_bound = true;
            hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
            hwloc_bitmap_free(cpus);
            OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                 "%s Process was externally bound",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        } else if (support->cpubind->set_thisproc_cpubind &&
                   OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
                   OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
            /* the system is capable of doing processor affinity, but it
             * has not yet been set - see if a slot_list was given
             */
            hwloc_bitmap_zero(cpus);
            if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
                                                                           opal_hwloc_topology,
                                                                           OPAL_HWLOC_LOGICAL, cpus))) {
                    error = "Setting processor affinity failed";
                    hwloc_bitmap_free(cpus);
                    goto error;
                }
                if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                    error = "Setting processor affinity failed";
                    hwloc_bitmap_free(cpus);
                    goto error;
                }
                hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
                hwloc_bitmap_free(cpus);
                orte_proc_is_bound = true;
                OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                     "%s Process bound according to slot_list",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            } else {
                /* cleanup */
                hwloc_bitmap_free(cpus);
                /* get the node rank */
                if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
                    /* this is not an error - could be due to being
                     * direct launched - so just ignore and leave
                     * us unbound
                     */
                    OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                         "%s Process not bound - no node rank available",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                    goto MOVEON;
                }
                /* if the binding policy is hwthread, then we bind to the nrank-th
                 * hwthread on this node
                 */
                if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                    if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
                                                                       0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
                        ret = ORTE_ERR_NOT_FOUND;
                        error = "Getting hwthread object";
                        goto error;
                    }
                    cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
                    if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                        ret = ORTE_ERROR;
                        error = "Setting processor affinity failed";
                        goto error;
                    }
                    hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
                    hwloc_bitmap_free(cpus);
                    OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                         "%s Process bound to hwthread",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                    /* if the binding policy is core, then we bind to the nrank-th
                     * core on this node
                     */
                    if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
                                                                       0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
                        ret = ORTE_ERR_NOT_FOUND;
                        error = "Getting core object";
                        goto error;
                    }
                    cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
                    if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                        error = "Setting processor affinity failed";
                        ret = ORTE_ERROR;
                        goto error;
                    }
                    hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
                    OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                         "%s Process bound to core",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                } else {
                    /* for all higher binding policies, we bind to the specified
                     * object that the nrank-th core belongs to
                     */
                    if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
                                                                       0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
                        ret = ORTE_ERR_NOT_FOUND;
                        error = "Getting core object";
                        goto error;
                    }
                    if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        target = HWLOC_OBJ_CACHE;
                        cache_level = 1;
                    } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        target = HWLOC_OBJ_CACHE;
                        cache_level = 2;
                    } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        target = HWLOC_OBJ_CACHE;
                        cache_level = 3;
                    } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        target = HWLOC_OBJ_SOCKET;
                    } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        target = HWLOC_OBJ_NODE;
                    } else {
                        ret = ORTE_ERR_NOT_FOUND;
                        error = "Binding policy not known";
                        goto error;
                    }
                    for (obj = obj->parent; NULL != obj; obj = obj->parent) {
                        if (target == obj->type) {
                            if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
                                continue;
                            }
                            /* this is the place! */
                            cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
                            if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                                ret = ORTE_ERROR;
                                error = "Setting processor affinity failed";
                                goto error;
                            }
                            hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
                            orte_proc_is_bound = true;
                            OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                                 "%s Process bound to %s",
                                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                 hwloc_obj_type_string(target)));
                            break;
                        }
                    }
                    if (!orte_proc_is_bound) {
                        ret = ORTE_ERROR;
                        error = "Setting processor affinity failed";
                        goto error;
                    }
                }
            }
        }
    } else {
        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                             "%s Process bound at launch",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    }

 MOVEON:
    /* get or update our local cpuset - it will get used multiple
     * times, so it's more efficient to keep a global copy
     */
    opal_hwloc_base_get_local_cpuset();

    /* get the cpus we are bound to */
    mycpus = hwloc_bitmap_alloc();
    if (hwloc_get_cpubind(opal_hwloc_topology,
                          mycpus,
                          HWLOC_CPUBIND_PROCESS) < 0) {
        if (NULL != orte_process_info.cpuset) {
            free(orte_process_info.cpuset);
            orte_process_info.cpuset = NULL;
        }
        if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
            opal_output(0, "MCW rank %d is not bound",
                        ORTE_PROC_MY_NAME->vpid);
        }
    } else {
        /* store/update the string representation of our local binding */
        if (NULL != orte_process_info.cpuset) {
            free(orte_process_info.cpuset);
            orte_process_info.cpuset = NULL;
        }
        hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, mycpus);
        /* report the binding, if requested */
        if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
            char tmp1[1024], tmp2[1024];
            if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), opal_hwloc_topology, mycpus)) {
                opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", ORTE_PROC_MY_NAME->vpid);
            } else {
                opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), opal_hwloc_topology, mycpus);
                opal_output(0, "MCW rank %d bound to %s: %s",
                            ORTE_PROC_MY_NAME->vpid, tmp1, tmp2);
            }
        }
    }
    hwloc_bitmap_free(mycpus);
    /* push our cpuset so others can calculate our locality */
    if (NULL != orte_process_info.cpuset) {
        OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET,
                              orte_process_info.cpuset, OPAL_STRING);
    }
    return ORTE_SUCCESS;

 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ORTE_ERR_SILENT;
}
Пример #7
0
    }

    /* print the corresponding NUMA nodes */
    hwloc_bitmap_asprintf(&s, set);
    printf("bound to nodeset %s with contains:\n", s);
    free(s);
    hwloc_bitmap_foreach_begin(i, set) {
        obj = hwloc_get_numanode_obj_by_os_index(topology, i);
        printf("  node #%u (OS index %u) with %lld bytes of memory\n",
               obj->logical_index, i, (unsigned long long) obj->memory.local_memory);
    }
    hwloc_bitmap_foreach_end();
    hwloc_bitmap_free(set);

    /* check alloc+bind support */
    support = hwloc_topology_get_support(topology);
    if (support->membind->bind_membind) {
        printf("BIND memory binding policy is supported\n");
    } else {
        printf("BIND memory binding policy is NOT supported\n");
    }
    if (support->membind->alloc_membind) {
        printf("Allocating bound memory is supported\n");
    } else {
        printf("Allocating bound memory is NOT supported\n");
    }

    /* allocate memory of each nodes */
    printf("allocating memory on each node\n");
    obj = NULL;
    buffer = NULL;
static int bind_to_cpuset(orte_job_t *jdata)
{
    /* bind each process to opal_hwloc_base_cpu_set */
    int i, j;
    orte_job_map_t *map;
    orte_node_t *node;
    orte_proc_t *proc;
    struct hwloc_topology_support *support;
    opal_hwloc_topo_data_t *sum;
    hwloc_obj_t root;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: bind job %s to cpuset %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_cpu_set);
    /* initialize */
    map = jdata->map;

    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    return ORTE_ERR_SILENT;
                }
            }
        }
        root = hwloc_get_root_obj(node->topology);
        if (NULL == root->userdata) {
            /* something went wrong */
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        sum = (opal_hwloc_topo_data_t*)root->userdata;
        if (NULL == sum->available) {
            /* another error */
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        for (j=0; j < node->procs->size; j++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                continue;
            }
            /* ignore procs from other jobs */
            if (proc->name.jobid != jdata->jobid) {
                continue;
            }
            /* ignore procs that have already been bound - should
             * never happen, but safer
             */
            if (NULL != proc->cpu_bitmap) {
                continue;
            }
            hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, sum->available);
        }
    }
    return ORTE_SUCCESS;
}
Пример #9
0
int main(int argc, char *argv[])
{
    const struct hwloc_topology_support *support;
    hwloc_topology_t topology;
    hwloc_const_bitmap_t topocpuset;
    hwloc_bitmap_t cpuset;
    unsigned long flags = 0;
    DIR *dir;
    struct dirent *dirent;
    int show_all = 0;
    int show_threads = 0;
    int get_last_cpu_location = 0;
    char *callname;
    char *pidcmd = NULL;
    int err;
    int opt;

    callname = strrchr(argv[0], '/');
    if (!callname)
        callname = argv[0];
    else
        callname++;
    /* skip argv[0], handle options */
    argc--;
    argv++;

    hwloc_utils_check_api_version(callname);

    while (argc >= 1) {
        opt = 0;
        if (!strcmp(argv[0], "-a"))
            show_all = 1;
        else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) {
            logical = 1;
        } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) {
            logical = 0;
        } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) {
            show_cpuset = 1;
        } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) {
            get_last_cpu_location = 1;
        } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) {
#ifdef HWLOC_LINUX_SYS
            show_threads = 1;
#else
            fprintf (stderr, "Listing threads is currently only supported on Linux\n");
#endif
        } else if (!strcmp (argv[0], "--whole-system")) {
            flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
        } else if (!strcmp (argv[0], "--pid-cmd")) {
            if (argc < 2) {
                usage(callname, stdout);
                exit(EXIT_FAILURE);
            }
            pidcmd = argv[1];
            opt = 1;
        } else {
            fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
            usage (callname, stderr);
            exit(EXIT_FAILURE);
        }
        argc -= opt+1;
        argv += opt+1;
    }

    err = hwloc_topology_init(&topology);
    if (err)
        goto out;

    hwloc_topology_set_flags(topology, flags);

    err = hwloc_topology_load(topology);
    if (err)
        goto out_with_topology;

    support = hwloc_topology_get_support(topology);

    if (get_last_cpu_location) {
        if (!support->cpubind->get_proc_last_cpu_location)
            goto out_with_topology;
    } else {
        if (!support->cpubind->get_proc_cpubind)
            goto out_with_topology;
    }

    topocpuset = hwloc_topology_get_topology_cpuset(topology);

    dir  = opendir("/proc");
    if (!dir)
        goto out_with_topology;

    cpuset = hwloc_bitmap_alloc();
    if (!cpuset)
        goto out_with_dir;

    while ((dirent = readdir(dir))) {
        long pid_number;
        hwloc_pid_t pid;
        char pidoutput[1024];
        char *end;
        char name[64] = "";
        /* management of threads */
        unsigned boundthreads = 0, i;
        long *tids = NULL; /* NULL if process is not threaded */
        hwloc_bitmap_t *tidcpusets = NULL;

        pid_number = strtol(dirent->d_name, &end, 10);
        if (*end)
            /* Not a number */
            continue;

        pid = hwloc_pid_from_number(pid_number, 0);

#ifdef HWLOC_LINUX_SYS
        {
            unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;
            char *path;
            int file;
            ssize_t n;

            path = malloc(pathlen);
            snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
            file = open(path, O_RDONLY);
            free(path);

            if (file >= 0) {
                n = read(file, name, sizeof(name) - 1);
                close(file);

                if (n <= 0)
                    /* Ignore kernel threads and errors */
                    continue;

                name[n] = 0;
            }
        }
#endif /* HWLOC_LINUX_SYS */

        if (show_threads) {
#ifdef HWLOC_LINUX_SYS
            /* check if some threads must be displayed */
            unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1;
            char *path;
            DIR *taskdir;

            path = malloc(pathlen);
            snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
            taskdir = opendir(path);
            if (taskdir) {
                struct dirent *taskdirent;
                long tid;
                unsigned n = 0;
                /* count threads */
                while ((taskdirent = readdir(taskdir))) {
                    tid = strtol(taskdirent->d_name, &end, 10);
                    if (*end)
                        /* Not a number */
                        continue;
                    n++;
                }
                if (n > 1) {
                    /* if there's more than one thread, see if some are bound */
                    tids = malloc(n * sizeof(*tids));
                    tidcpusets = calloc(n+1, sizeof(*tidcpusets));
                    if (tids && tidcpusets) {
                        /* reread the directory but gather info now */
                        rewinddir(taskdir);
                        i = 0;
                        while ((taskdirent = readdir(taskdir))) {
                            tid = strtol(taskdirent->d_name, &end, 10);
                            if (*end)
                                /* Not a number */
                                continue;
                            if (get_last_cpu_location) {
                                if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset))
                                    continue;
                            } else {
                                if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset))
                                    continue;
                            }
                            hwloc_bitmap_and(cpuset, cpuset, topocpuset);
                            tids[i] = tid;
                            tidcpusets[i] = hwloc_bitmap_dup(cpuset);
                            i++;
                            if (hwloc_bitmap_iszero(cpuset))
                                continue;
                            if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all)
                                continue;
                            boundthreads++;
                        }
                    } else {
                        /* failed to alloc, behave as if there were no threads */
                        free(tids);
                        tids = NULL;
                        free(tidcpusets);
                        tidcpusets = NULL;
                    }
                }
                closedir(taskdir);
            }
#endif /* HWLOC_LINUX_SYS */
        }

        if (get_last_cpu_location) {
            if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0))
                continue;
        } else {
            if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0))
                continue;
        }

        hwloc_bitmap_and(cpuset, cpuset, topocpuset);
        if (hwloc_bitmap_iszero(cpuset))
            continue;

        /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */
        if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all)
            continue;

        pidoutput[0] = '\0';
        if (pidcmd) {
            char *cmd;
            FILE *file;
            cmd = malloc(strlen(pidcmd)+1+5+2+1);
            sprintf(cmd, "%s %u", pidcmd, pid);
            file = popen(cmd, "r");
            if (file) {
                if (fgets(pidoutput, sizeof(pidoutput), file)) {
                    end = strchr(pidoutput, '\n');
                    if (end)
                        *end = '\0';
                }
                pclose(file);
            }
            free(cmd);
        }

        /* print the process */
        print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0);
        if (tids)
            /* print each tid we found (it's tidcpuset isn't NULL anymore) */
            for(i=0; tidcpusets[i] != NULL; i++) {
                print_task(topology, tids[i], "", tidcpusets[i], NULL, 1);
                hwloc_bitmap_free(tidcpusets[i]);
            }

        /* free threads stuff */
        free(tidcpusets);
        free(tids);
    }

    err = 0;
    hwloc_bitmap_free(cpuset);

out_with_dir:
    closedir(dir);
out_with_topology:
    hwloc_topology_destroy(topology);
out:
    return err;
}
Пример #10
0
static void print_hwloc_obj(char **output, char *prefix,
                            hwloc_topology_t topo, hwloc_obj_t obj)
{
    hwloc_obj_t obj2;
    char string[1024], *tmp, *tmp2, *pfx;
    unsigned i;
    struct hwloc_topology_support *support;

    /* print the object type */
    hwloc_obj_type_snprintf(string, 1024, obj, 1);
    asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix);
    asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s",
             (NULL == prefix) ? "" : prefix, string, obj->arity,
             pfx, (NULL == obj->name) ? "NULL" : obj->name);
    if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) {
        /* print the attributes */
        asprintf(&tmp2, "%s%s%s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    /* print the cpusets - apparently, some new HWLOC types don't
     * have cpusets, so protect ourselves here
     */
    if (NULL != obj->cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->cpuset);
        asprintf(&tmp2, "%s%sCpuset:  %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (NULL != obj->online_cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset);
        asprintf(&tmp2, "%s%sOnline:  %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (NULL != obj->allowed_cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset);
        asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (HWLOC_OBJ_MACHINE == obj->type) {
        /* root level object - add support values */
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
        asprintf(&tmp2, "%s%sBind CPU proc:   %s%sBind CPU thread: %s", tmp, pfx,
                 (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx,
                 (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE");
        free(tmp);
        tmp = tmp2;
        asprintf(&tmp2, "%s%sBind MEM proc:   %s%sBind MEM thread: %s", tmp, pfx,
                 (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx,
                 (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE");
        free(tmp);
        tmp = tmp2;
    }
    asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp);
    free(tmp);
    free(pfx);
    asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix);
    for (i=0; i < obj->arity; i++) {
        obj2 = obj->children[i];
        /* print the object */
        print_hwloc_obj(&tmp2, pfx, topo, obj2);
    }
    free(pfx);
    if (NULL != *output) {
        free(*output);
    }
    *output = tmp2;
}
Пример #11
0
int orte_ess_base_proc_binding(void)
{
#if OPAL_HAVE_HWLOC
    hwloc_obj_t node, obj;
    hwloc_cpuset_t cpus, nodeset;
    hwloc_obj_type_t target;
    unsigned int cache_level = 0;
    struct hwloc_topology_support *support;
    char *map;
    int ret;
    char *error;

    /* Determine if we were pre-bound or not */
    if (NULL != getenv("OMPI_MCA_orte_bound_at_launch")) {
        orte_proc_is_bound = true;
        if (NULL != (map = getenv("OMPI_MCA_orte_base_applied_binding"))) {
            orte_proc_applied_binding = hwloc_bitmap_alloc();
            if (0 != (ret = hwloc_bitmap_list_sscanf(orte_proc_applied_binding, map))) {
                error = "applied_binding parse";
                goto error;
            }
        }
    }

    /* see if we were bound when launched */
    if (!orte_proc_is_bound) {
        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                             "%s Not bound at launch",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        /* we were not bound at launch */
        if (NULL != opal_hwloc_topology) {
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
            /* get our node object */
            node = hwloc_get_root_obj(opal_hwloc_topology);
            nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
            /* get our bindings */
            cpus = hwloc_bitmap_alloc();
            if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
                /* we are NOT bound if get_cpubind fails, nor can we be bound - the
                 * environment does not support it
                 */
                hwloc_bitmap_free(cpus);
                OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                     "%s Binding not supported",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                goto MOVEON;
            }
            /* we are bound if the two cpusets are not equal,
             * or if there is only ONE cpu available to us
             */
            if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
                opal_hwloc_base_single_cpu(nodeset) ||
                opal_hwloc_base_single_cpu(cpus)) {
                /* someone external set it - indicate it is set
                 * so that we know
                 */
                orte_proc_is_bound = true;
                hwloc_bitmap_free(cpus);
                OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                     "%s Process was externally bound",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            } else if (support->cpubind->set_thisproc_cpubind &&
                       OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
                       OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                /* the system is capable of doing processor affinity, but it
                 * has not yet been set - see if a slot_list was given
                 */
                hwloc_bitmap_zero(cpus);
                if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                    if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
                                                                               opal_hwloc_topology, cpus))) {
                        error = "Setting processor affinity failed";
                        hwloc_bitmap_free(cpus);
                        goto error;
                    }
                    if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                        error = "Setting processor affinity failed";
                        hwloc_bitmap_free(cpus);
                        goto error;
                    }
                    /* try to find a level and index for this location */
                    opal_hwloc_base_get_level_and_index(cpus, &orte_process_info.bind_level, &orte_process_info.bind_idx);
                    /* cleanup */
                    hwloc_bitmap_free(cpus);
                    orte_proc_is_bound = true;
                    OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                         "%s Process bound according to slot_list",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                } else {
                    /* cleanup */
                    hwloc_bitmap_free(cpus);
                    /* get the node rank */
                    if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
                        /* this is not an error - could be due to being
                         * direct launched - so just ignore and leave
                         * us unbound
                         */
                        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                             "%s Process not bound - no node rank available",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                        goto MOVEON;
                    }
                    /* if the binding policy is hwthread, then we bind to the nrank-th
                     * hwthread on this node
                     */
                    if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
                                                                           0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
                            ret = ORTE_ERR_NOT_FOUND;
                            error = "Getting hwthread object";
                            goto error;
                        }
                        cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
                        if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                            ret = ORTE_ERROR;
                            error = "Setting processor affinity failed";
                            goto error;
                        }
                        orte_process_info.bind_level = OPAL_HWLOC_HWTHREAD_LEVEL;
                        orte_process_info.bind_idx = orte_process_info.my_node_rank;
                        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                             "%s Process bound to hwthread",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                    } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        /* if the binding policy is core, then we bind to the nrank-th
                         * core on this node
                         */
                        if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
                                                                           0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
                            ret = ORTE_ERR_NOT_FOUND;
                            error = "Getting core object";
                            goto error;
                        }
                        cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
                        if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                            error = "Setting processor affinity failed";
                            ret = ORTE_ERROR;
                            goto error;
                        }
                        orte_process_info.bind_level = OPAL_HWLOC_CORE_LEVEL;
                        orte_process_info.bind_idx = orte_process_info.my_node_rank;
                        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                             "%s Process bound to core",
                                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                    } else {
                        /* for all higher binding policies, we bind to the specified
                         * object that the nrank-th core belongs to
                         */
                        if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
                                                                           0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
                            ret = ORTE_ERR_NOT_FOUND;
                            error = "Getting core object";
                            goto error;
                        }
                        if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                            target = HWLOC_OBJ_CACHE;
                            cache_level = 1;
                            orte_process_info.bind_level = OPAL_HWLOC_L1CACHE_LEVEL;
                        } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                            target = HWLOC_OBJ_CACHE;
                            cache_level = 2;
                            orte_process_info.bind_level = OPAL_HWLOC_L2CACHE_LEVEL;
                        } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                            target = HWLOC_OBJ_CACHE;
                            cache_level = 3;
                            orte_process_info.bind_level = OPAL_HWLOC_L3CACHE_LEVEL;
                        } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                            target = HWLOC_OBJ_SOCKET;
                            orte_process_info.bind_level = OPAL_HWLOC_SOCKET_LEVEL;
                        } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                            target = HWLOC_OBJ_NODE;
                            orte_process_info.bind_level = OPAL_HWLOC_NUMA_LEVEL;
                        } else {
                            ret = ORTE_ERR_NOT_FOUND;
                            error = "Binding policy not known";
                            goto error;
                        }
                        for (obj = obj->parent; NULL != obj; obj = obj->parent) {
                            if (target == obj->type) {
                                if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
                                    continue;
                                }
                                /* this is the place! */
                                cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
                                if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                                    ret = ORTE_ERROR;
                                    error = "Setting processor affinity failed";
                                    goto error;
                                }
                                orte_process_info.bind_idx = opal_hwloc_base_get_obj_idx(opal_hwloc_topology,
                                                                                         obj, OPAL_HWLOC_LOGICAL);
                                orte_proc_is_bound = true;
                                OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                                                     "%s Process bound to %s",
                                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                     opal_hwloc_base_print_level(orte_process_info.bind_level)));
                                break;
                            }
                        }
                        if (!orte_proc_is_bound) {
                            ret = ORTE_ERROR;
                            error = "Setting processor affinity failed";
                            goto error;
                        }
                    }
                }
            }
        }
    } else {
        OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
                             "%s Process bound at launch",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    }

 MOVEON:
    /* get or update our local cpuset - it will get used multiple
     * times, so it's more efficient to keep a global copy
     */
    opal_hwloc_base_get_local_cpuset();
    /* report bindings, if requested */
    if (opal_hwloc_report_bindings) {
        char bindings[64];
        hwloc_obj_t root;
        hwloc_cpuset_t cpus;
        /* get the root object for this node */
        root = hwloc_get_root_obj(opal_hwloc_topology);
        cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
        /* we are not bound if this equals our cpuset */
        if (0 == hwloc_bitmap_compare(cpus, opal_hwloc_my_cpuset)) {
            opal_output(0, "%s is not bound",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        } else {
            hwloc_bitmap_list_snprintf(bindings, 64, opal_hwloc_my_cpuset);
            opal_output(0, "%s is bound to cpus %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        bindings);
        }
    }

    return ORTE_SUCCESS;

 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ORTE_ERR_SILENT;

#else
    return ORTE_SUCCESS;
#endif
}
Пример #12
0
void chpl_topo_init(void) {
  //
  // We only load hwloc topology information in configurations where
  // the locale model is other than "flat" or the tasking is based on
  // Qthreads (which will use the topology we load).  We don't use
  // it otherwise (so far) because loading it is somewhat expensive.
  //
  if (strcmp(CHPL_LOCALE_MODEL, "flat") != 0
      || strcmp(CHPL_TASKS, "qthreads") == 0) {
    haveTopology = true;
  } else {
    haveTopology = false;
    return;
  }

  // Check hwloc API version.
  // Require at least hwloc version 1.11 (we need 1.11.5 to not crash
  // in some NUMA configurations).
  // Check both at build time and run time.
#define REQUIRE_HWLOC_VERSION 0x00010b00

#if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION
#error hwloc version 1.11.5 or newer is required
#endif

  CHK_ERR(hwloc_get_api_version() >= REQUIRE_HWLOC_VERSION);

  //
  // Allocate and initialize topology object.
  //
  CHK_ERR_ERRNO(hwloc_topology_init(&topology) == 0);

  //
  // Perform the topology detection.
  //
  CHK_ERR_ERRNO(hwloc_topology_load(topology) == 0);

  //
  // What is supported?
  //
  topoSupport = hwloc_topology_get_support(topology);

  //
  // TODO: update comment
  // For now, don't support setting memory locality when comm=ugni or
  // comm=gasnet, seg!=everything.  Those are the two configurations in
  // which we use hugepages and/or memory registered with the comm
  // interface, both of which may be a problem for the set-membind call.
  // We will have other ways to achieve locality for these configs in
  // the future.
  //
  do_set_area_membind = true;
  if ((strcmp(CHPL_COMM, "gasnet") == 0
       && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) {
      do_set_area_membind = false;
  }

  //
  // We need depth information.
  //
  topoDepth = hwloc_topology_get_depth(topology);

  //
  // How many NUMA domains do we have?
  //
  {
    int level;

    //
    // Note: If there are multiple levels with NUMA nodes, this finds
    //       only the uppermost.
    //
    for (level = 0, numaLevel = -1;
         level < topoDepth && numaLevel == -1;
         level++) {
      if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) {
        numaLevel = level;
      }
    }
  }

  //
  // Find the NUMA nodes, that is, the objects at numaLevel that also
  // have CPUs.  This is as opposed to things like Xeon Phi HBM, which
  // is memory-only, no CPUs.
  //
  {
    const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset;
    numNumaDomains =
      hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel);
  }
}
Пример #13
0
void chpl_topo_init(void) {
  //
  // For now we don't load topology information for locModel=flat, since
  // we won't use it in that case and loading it is somewhat expensive.
  // Eventually we will probably load it even for locModel=flat and use
  // it as the information source for what's currently in chplsys, and
  // also pass it to Qthreads when we use that (so it doesn't load it
  // again), but that's work for the future.
  //
  haveTopology = (strcmp(CHPL_LOCALE_MODEL, "flat") != 0) ? true : false;
  if (!haveTopology) {
    return;
  }

  // Check hwloc API version.
  // Require at least hwloc version 1.11 (we need 1.11.5 to not crash
  // in some NUMA configurations).
  // Check both at build time and run time.
#define REQUIRE_HWLOC_VERSION 0x00010b00

#if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION
#error hwloc version 1.11.5 or newer is required
#else
  {
    unsigned version = hwloc_get_api_version();
    // check that the version is at least REQUIRE_HWLOC_VERSION
    if (version < REQUIRE_HWLOC_VERSION)
      chpl_internal_error("hwloc version 1.11.5 or newer is required");
  }
#endif

  //
  // Allocate and initialize topology object.
  //
  if (hwloc_topology_init(&topology)) {
    report_error("hwloc_topology_init()", errno);
  }

  //
  // Perform the topology detection.
  //
  if (hwloc_topology_load(topology)) {
    report_error("hwloc_topology_load()", errno);
  }

  //
  // What is supported?
  //
  topoSupport = hwloc_topology_get_support(topology);

  //
  // TODO: update comment
  // For now, don't support setting memory locality when comm=ugni or
  // comm=gasnet, seg!=everything.  Those are the two configurations in
  // which we use hugepages and/or memory registered with the comm
  // interface, both of which may be a problem for the set-membind call.
  // We will have other ways to achieve locality for these configs in
  // the future.
  //
  do_set_area_membind = true;
  if ((strcmp(CHPL_COMM, "gasnet") == 0
       && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) {
      do_set_area_membind = false;
  }

  //
  // We need depth information.
  //
  topoDepth = hwloc_topology_get_depth(topology);

  //
  // How many NUMA domains do we have?
  //
  {
    int level;

    //
    // Note: If there are multiple levels with NUMA nodes, this finds
    //       only the uppermost.
    //
    for (level = 0, numaLevel = -1;
         level < topoDepth && numaLevel == -1;
         level++) {
      if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) {
        numaLevel = level;
      }
    }
  }

  //
  // Find the NUMA nodes, that is, the objects at numaLevel that also
  // have CPUs.  This is as opposed to things like Xeon Phi HBM, which
  // is memory-only, no CPUs.
  //
  {
    const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset;
    numNumaDomains =
      hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel);
  }
}
Пример #14
0
int main(void)
{
  const struct hwloc_topology_support *support;
  char *buffer;
  hwloc_topology_t topology;
  hwloc_bitmap_t set = hwloc_bitmap_alloc();
  hwloc_bitmap_t total = hwloc_bitmap_alloc();
  hwloc_obj_t node;
  char *s;
  int err;

  err = hwloc_topology_init(&topology);
  assert(!err);
  err = hwloc_topology_load(topology);
  assert(!err);

  support = hwloc_topology_get_support(topology);
  if (!support->membind->get_area_memlocation)
    goto out;

  buffer = hwloc_alloc(topology, LEN);
  assert(buffer);
  printf("buffer %p length %u\n", buffer, LEN);

  err = hwloc_get_area_memlocation(topology, buffer, LEN, set, HWLOC_MEMBIND_BYNODESET);
  if (err < 0 && errno == ENOSYS) {
    fprintf(stderr, "hwloc_get_area_memlocation() failed with ENOSYS, aborting\n");
    goto out_with_buffer;
  }
  assert(!err);
  hwloc_bitmap_asprintf(&s, set);
  printf("address %p length %u allocated in nodeset %s\n", buffer, LEN, s);
  free(s);
  hwloc_bitmap_copy(total, set);

  node = NULL;
 next1:
  node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node);
  if (!node)
    goto out_with_buffer;
  if (!node->memory.local_memory)
    goto next1;
  printf("binding to 1st node and touching 1st quarter\n");
  err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
  if (err < 0 && errno == ENOSYS) {
    fprintf(stderr, "hwloc_set_area_membind() failed with ENOSYS, aborting\n");
    goto out_with_buffer;
  }
  assert(!err);

  memset(buffer, 0, LEN/4);
  err = hwloc_get_area_memlocation(topology, buffer, 1, set, HWLOC_MEMBIND_BYNODESET);
  assert(!err);
  hwloc_bitmap_asprintf(&s, set);
  printf("address %p length %u allocated in nodeset %s\n", buffer, LEN/4, s);
  free(s);
  hwloc_bitmap_or(total, total, set);

 next2:
  node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node);
  if (!node)
    goto out_with_nomorenodes;
  if (!node->memory.local_memory)
    goto next2;
  printf("binding to 2nd node and touching 2nd quarter\n");
  err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
  assert(!err);

  memset(buffer+LEN/4, 0, LEN/4);
  err = hwloc_get_area_memlocation(topology, buffer+LEN/4, LEN/4, set, HWLOC_MEMBIND_BYNODESET);
  assert(!err);
  hwloc_bitmap_asprintf(&s, set);
  printf("address %p length %u allocated in nodeset %s\n", buffer+LEN/4, LEN/4, s);
  free(s);
  hwloc_bitmap_or(total, total, set);

 next3:
  node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node);
  if (!node)
    goto out_with_nomorenodes;
  if (!node->memory.local_memory)
    goto next3;
  printf("binding to 3rd node and touching 3rd quarter\n");
  err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
  assert(!err);

  memset(buffer+LEN/2, 0, LEN/4);
  err = hwloc_get_area_memlocation(topology, buffer+LEN/2, LEN/4, set, HWLOC_MEMBIND_BYNODESET);
  assert(!err);
  hwloc_bitmap_asprintf(&s, set);
  printf("address %p length %u allocated in nodeset %s\n", buffer+LEN/2, LEN/4, s);
  free(s);
  hwloc_bitmap_or(total, total, set);

 next4:
  node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node);
  if (!node)
    goto out_with_nomorenodes;
  if (!node->memory.local_memory)
    goto next4;
  printf("binding to 4th node and touching 4th quarter\n");
  err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
  assert(!err);

  memset(buffer+3*LEN/4, 0, LEN/4);
  err = hwloc_get_area_memlocation(topology, buffer+3*LEN/4, LEN/4, set, HWLOC_MEMBIND_BYNODESET);
  assert(!err);
  hwloc_bitmap_asprintf(&s, set);
  printf("address %p length %u allocated in nodeset %s\n", buffer+3*LEN/4, LEN/4, s);
  free(s);
  hwloc_bitmap_or(total, total, set);

 out_with_nomorenodes:
  err = hwloc_get_area_memlocation(topology, buffer, LEN, set, HWLOC_MEMBIND_BYNODESET);
  assert(!err);
  hwloc_bitmap_asprintf(&s, set);
  printf("address %p length %u located on %s\n", buffer, LEN, s);
  free(s);
  assert(hwloc_bitmap_isincluded(total, set));

 out_with_buffer:
  hwloc_free(topology, buffer, LEN);

 out:
  hwloc_topology_destroy(topology);
  hwloc_bitmap_free(set);
  hwloc_bitmap_free(total);
  return 0;
}
Пример #15
0
int opal_hwloc_compare(const hwloc_topology_t topo1,
                       const hwloc_topology_t topo2,
                       opal_data_type_t type)
{
    hwloc_topology_t t1, t2;
    unsigned d1, d2;
    struct hwloc_topology_support *s1, *s2;
    char *x1=NULL, *x2=NULL;
    int l1, l2;
    int s;
    
    /* stop stupid compiler warnings */
    t1 = (hwloc_topology_t)topo1;
    t2 = (hwloc_topology_t)topo2;

    /* do something quick first */
    d1 = hwloc_topology_get_depth(t1);
    d2 = hwloc_topology_get_depth(t2);
    if (d1 > d2) {
        return OPAL_VALUE1_GREATER;
    } else if (d2 > d1) {
        return OPAL_VALUE2_GREATER;
    }
    

    /* do the comparison the "cheat" way - get an xml representation
     * of each tree, and strcmp! This will work fine for inventory
     * comparisons, but might not meet the need for comparing topology
     * where we really need to do a tree-wise search so we only compare
     * the things we care about, and ignore stuff like MAC addresses
     */
    if (0 != hwloc_topology_export_xmlbuffer(t1, &x1, &l1)) {
        return OPAL_EQUAL;
    }
    if (0 != hwloc_topology_export_xmlbuffer(t2, &x2, &l2)) {
        free(x1);
        return OPAL_EQUAL;
    }

    s = strcmp(x1, x2);
    free(x1);
    free(x2);
    if (s > 0) {
        return OPAL_VALUE1_GREATER;
    } else if (s < 0) {
        return OPAL_VALUE2_GREATER;
    }
    
    /* compare the available support - hwloc unfortunately does
     * not include this info in its xml support!
     */
    if (NULL == (s1 = (struct hwloc_topology_support*)hwloc_topology_get_support(t1)) ||
        NULL == s1->cpubind || NULL == s1->membind) {
        return OPAL_EQUAL;
    }
    if (NULL == (s2 = (struct hwloc_topology_support*)hwloc_topology_get_support(t2)) ||
        NULL == s2->cpubind || NULL == s2->membind) {
        return OPAL_EQUAL;
    }
    /* compare the fields we care about */
    if (s1->cpubind->set_thisproc_cpubind != s2->cpubind->set_thisproc_cpubind ||
        s1->cpubind->set_thisthread_cpubind != s2->cpubind->set_thisthread_cpubind ||
        s1->membind->set_thisproc_membind != s2->membind->set_thisproc_membind ||
        s1->membind->set_thisthread_membind != s2->membind->set_thisthread_membind) {
        OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
                             "hwloc:base:compare BINDING CAPABILITIES DIFFER"));
        return OPAL_VALUE1_GREATER;
    }

    return OPAL_EQUAL;
}
static int bind_downwards(orte_job_t *jdata,
                          hwloc_obj_type_t target,
                          unsigned cache_level)
{
    int i, j;
    orte_job_map_t *map;
    orte_node_t *node;
    orte_proc_t *proc;
    hwloc_obj_t trg_obj, nxt_obj;
    hwloc_cpuset_t cpus;
    unsigned int ncpus;
    struct hwloc_topology_support *support;
    opal_hwloc_obj_data_t *data;
    int total_cpus;
    hwloc_cpuset_t totalcpuset;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: bind downward for job %s with bindings %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));
    /* initialize */
    map = jdata->map;
    totalcpuset = hwloc_bitmap_alloc();

    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                hwloc_bitmap_free(totalcpuset);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    hwloc_bitmap_free(totalcpuset);
                    return ORTE_ERR_SILENT;
                }
            }
        }

        /* clear the topology of any prior usage numbers */
        opal_hwloc_base_clear_usage(node->topology);

        /* cycle thru the procs */
        for (j=0; j < node->procs->size; j++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                continue;
            }
            /* ignore procs from other jobs */
            if (proc->name.jobid != jdata->jobid) {
                continue;
            }
            /* ignore procs that have already been bound - should
             * never happen, but safer
             */
            if (NULL != proc->cpu_bitmap) {
                continue;
            }
            /* we don't know if the target is a direct child of this locale,
             * or if it is some depth below it, so we have to conduct a bit
             * of a search. Let hwloc find the min usage one for us.
             */
            trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology,
                                                                      proc->locale,
                                                                      target, cache_level);
            if (NULL == trg_obj) {
                /* there aren't any such targets under this object */
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
                hwloc_bitmap_free(totalcpuset);
                return ORTE_ERR_SILENT;
            }
            /* start with a clean slate */
            hwloc_bitmap_zero(totalcpuset);
            total_cpus = 0;
            nxt_obj = trg_obj;
            do {
                if (NULL == nxt_obj) {
                    /* could not find enough cpus to meet request */
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
                    hwloc_bitmap_free(totalcpuset);
                    return ORTE_ERR_SILENT;
                }
                trg_obj = nxt_obj;
                /* get the number of cpus under this location */
                ncpus = opal_hwloc_base_get_npus(node->topology, trg_obj);
                opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                    "%s GOT %d CPUS",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ncpus);
                /* track the number bound */
                if (NULL == (data = (opal_hwloc_obj_data_t*)trg_obj->userdata)) {
                    data = OBJ_NEW(opal_hwloc_obj_data_t);
                    trg_obj->userdata = data;
                }
                data->num_bound++;
                /* error out if adding a proc would cause overload and that wasn't allowed */
                if (ncpus < data->num_bound &&
                    !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
                                   opal_hwloc_base_print_binding(map->binding), node->name,
                                   data->num_bound, ncpus);
                    hwloc_bitmap_free(totalcpuset);
                    return ORTE_ERR_SILENT;
                }
                /* bind the proc here */
                cpus = opal_hwloc_base_get_available_cpus(node->topology, trg_obj);
                hwloc_bitmap_or(totalcpuset, totalcpuset, cpus);
                total_cpus += ncpus;
                /* move to the next location, in case we need it */
                nxt_obj = trg_obj->next_cousin;
            } while (total_cpus < orte_rmaps_base.cpus_per_rank);
            hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, totalcpuset);
            if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) {
                char tmp1[1024], tmp2[1024];
                opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), totalcpuset);
                opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), totalcpuset);
                opal_output(orte_rmaps_base_framework.framework_output,
                            "%s BOUND PROC %s[%s] TO %s: %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&proc->name), node->name,
                            tmp1, tmp2);
            }
        }
    }
    hwloc_bitmap_free(totalcpuset);

    return ORTE_SUCCESS;
}
static int bind_upwards(orte_job_t *jdata,
                        hwloc_obj_type_t target,
                        unsigned cache_level)
{
    /* traverse the hwloc topology tree on each node upwards
     * until we find an object of type target - and then bind
     * the process to that target
     */
    int i, j;
    orte_job_map_t *map;
    orte_node_t *node;
    orte_proc_t *proc;
    hwloc_obj_t obj;
    hwloc_cpuset_t cpus;
    unsigned int idx, ncpus;
    struct hwloc_topology_support *support;
    opal_hwloc_obj_data_t *data;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: bind upwards for job %s with bindings %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));
    /* initialize */
    map = jdata->map;

    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    return ORTE_ERR_SILENT;
                }
            }
        }

        /* clear the topology of any prior usage numbers */
        opal_hwloc_base_clear_usage(node->topology);

        /* cycle thru the procs */
        for (j=0; j < node->procs->size; j++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                continue;
            }
            /* ignore procs from other jobs */
            if (proc->name.jobid != jdata->jobid) {
                continue;
            }
            /* ignore procs that have already been bound - should
             * never happen, but safer
             */
            if (NULL != proc->cpu_bitmap) {
                continue;
            }
            /* bozo check */
            if (NULL == proc->locale) {
                opal_output(0, "BIND UPWARDS: LOCALE FOR PROC %s IS NULL", ORTE_NAME_PRINT(&proc->name));
                return ORTE_ERR_SILENT;
            }
            /* starting at the locale, move up thru the parents
             * to find the target object type
             */
            for (obj = proc->locale->parent; NULL != obj; obj = obj->parent) {
                opal_output(0, "%s bind:upward target %s type %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            hwloc_obj_type_string(target),
                            hwloc_obj_type_string(obj->type));
                if (target == obj->type) {
                    if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
                        continue;
                    }
                    /* get its index */
                    if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, obj, OPAL_HWLOC_AVAILABLE))) {
                        return ORTE_ERR_SILENT;
                    }
                    /* track the number bound */
                    data = (opal_hwloc_obj_data_t*)obj->userdata;
                    data->num_bound++;
                    /* get the number of cpus under this location */
                    if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, obj))) {
                        orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
                        return ORTE_ERR_SILENT;
                    }
                    /* error out if adding a proc would cause overload and that wasn't allowed */
                    if (ncpus < data->num_bound &&
                        !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
                        orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
                                       opal_hwloc_base_print_binding(map->binding), node->name,
                                       data->num_bound, ncpus);
                        return ORTE_ERR_SILENT;
                    }
                    /* bind it here */
                    cpus = opal_hwloc_base_get_available_cpus(node->topology, obj);
                    hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "%s BOUND PROC %s TO %s[%s:%u] on node %s",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        ORTE_NAME_PRINT(&proc->name),
                                        proc->cpu_bitmap,
                                        hwloc_obj_type_string(target),
                                        idx, node->name);
                    break;
                }
            }
            if (NULL == proc->cpu_bitmap && OPAL_BINDING_REQUIRED(jdata->map->binding)) {
                /* didn't find anyone to bind to - this is an error
                 * unless the user specified if-supported
                 */
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-target-not-found", true,
                               opal_hwloc_base_print_binding(map->binding), node->name);
                return ORTE_ERR_SILENT;
            }
        }
    }

    return ORTE_SUCCESS;
}
Пример #18
0
int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest,
                      int32_t *num_vals,
                      opal_data_type_t type)
{
    /* NOTE: hwloc defines topology_t as a pointer to a struct! */
    hwloc_topology_t t, *tarray  = (hwloc_topology_t*)dest;
    int rc=OPAL_SUCCESS, i, cnt, j;
    char *xmlbuffer;
    struct hwloc_topology_support *support;

    for (i=0, j=0; i < *num_vals; i++) {
        /* unpack the xml string */
        cnt=1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &xmlbuffer, &cnt, OPAL_STRING))) {
            goto cleanup;
        }

        /* convert the xml */
        if (0 != hwloc_topology_init(&t)) {
            rc = OPAL_ERROR;
            free(xmlbuffer);
            goto cleanup;
        }
        if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) {
            rc = OPAL_ERROR;
            free(xmlbuffer);
            hwloc_topology_destroy(t);
            goto cleanup;
        }
        free(xmlbuffer);
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) {
            rc = OPAL_ERROR;
            hwloc_topology_destroy(t);
            goto cleanup;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(t)) {
            rc = OPAL_ERROR;
            hwloc_topology_destroy(t);
            goto cleanup;
        }

        /* get the available support - hwloc unfortunately does
         * not include this info in its xml import!
         */
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(t);
        cnt = sizeof(struct hwloc_topology_discovery_support);
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->discovery, &cnt, OPAL_BYTE))) {
            goto cleanup;
        }
        cnt = sizeof(struct hwloc_topology_cpubind_support);
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->cpubind, &cnt, OPAL_BYTE))) {
            goto cleanup;
        }
        cnt = sizeof(struct hwloc_topology_membind_support);
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->membind, &cnt, OPAL_BYTE))) {
            goto cleanup;
        }

        /* pass it back */
        tarray[i] = t;

        /* track the number added */
        j++;
    }

 cleanup:
    *num_vals = j;
    return rc;
}
Пример #19
0
static int bind_in_place(orte_job_t *jdata,
                         hwloc_obj_type_t target,
                         unsigned cache_level)
{
    /* traverse the hwloc topology tree on each node downwards
     * until we find an unused object of type target - and then bind
     * the process to that target
     */
    int i, j;
    orte_job_map_t *map;
    orte_node_t *node;
    orte_proc_t *proc;
    hwloc_cpuset_t cpus;
    unsigned int idx, ncpus;
    struct hwloc_topology_support *support;
    opal_hwloc_obj_data_t *data;
    hwloc_obj_t locale, sib;
    char *cpu_bitmap;
    bool found;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: bind in place for job %s with bindings %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));
    /* initialize */
    map = jdata->map;

    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(map->binding) ||
                    !OPAL_BINDING_POLICY_IS_SET(map->binding)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability - don't warn if the user didn't
             * specifically request binding
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind &&
                OPAL_BINDING_POLICY_IS_SET(map->binding)) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    return ORTE_ERR_SILENT;
                }
            }
        }

        /* some systems do not report cores, and so we can get a situation where our
         * default binding policy will fail for no necessary reason. So if we are
         * computing a binding due to our default policy, and no cores are found
         * on this node, just silently skip it - we will not bind
         */
        if (!OPAL_BINDING_POLICY_IS_SET(map->binding) &&
            HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "Unable to bind-to core by default on node %s as no cores detected",
                                node->name);
            continue;
        }

        /* we share topologies in order
         * to save space, so we need to reset the usage info to reflect
         * our own current state
         */
        reset_usage(node, jdata->jobid);

        /* cycle thru the procs */
        for (j=0; j < node->procs->size; j++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                continue;
            }
            /* ignore procs from other jobs */
            if (proc->name.jobid != jdata->jobid) {
                continue;
            }
            /* bozo check */
            if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-locale", true, ORTE_NAME_PRINT(&proc->name));
                return ORTE_ERR_SILENT;
            }
            /* get the index of this location */
            if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, locale, OPAL_HWLOC_AVAILABLE))) {
                ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                return ORTE_ERR_SILENT;
            }
            data = (opal_hwloc_obj_data_t*)locale->userdata;
            /* get the number of cpus under this location */
            if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, locale))) {
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
                return ORTE_ERR_SILENT;
            }
            /* if we don't have enough cpus to support this additional proc, try
             * shifting the location to a cousin that can support it - the important
             * thing is that we maintain the same level in the topology */
            if (ncpus < (data->num_bound+1)) {
                opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                    "%s bind_in_place: searching right",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                sib = locale;
                found = false;
                while (NULL != (sib = sib->next_cousin)) {
                    data = (opal_hwloc_obj_data_t*)sib->userdata;
                    ncpus = opal_hwloc_base_get_npus(node->topology, sib);
                    if (data->num_bound < ncpus) {
                        found = true;
                        locale = sib;
                        break;
                    }
                }
                if (!found) {
                    /* try the other direction */
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "%s bind_in_place: searching left",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                    sib = locale;
                    while (NULL != (sib = sib->prev_cousin)) {
                        data = (opal_hwloc_obj_data_t*)sib->userdata;
                        ncpus = opal_hwloc_base_get_npus(node->topology, sib);
                        if (data->num_bound < ncpus) {
                            found = true;
                            locale = sib;
                            break;
                        }
                    }
                }
                if (!found) {
                    /* no place to put this - see if overload is allowed */
                    if (!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
                        if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                            /* if the user specified a binding policy, then we cannot meet
                             * it since overload isn't allowed, so error out - have the
                             * message indicate that setting overload allowed will remove
                             * this restriction */
                            orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
                                           opal_hwloc_base_print_binding(map->binding), node->name,
                                           data->num_bound, ncpus);
                            return ORTE_ERR_SILENT;
                        } else {
                            /* if we have the default binding policy, then just don't bind */
                            OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE);
                            unbind_procs(jdata);
                            return ORTE_SUCCESS;
                        }
                    }
                }
            }
            /* track the number bound */
            data = (opal_hwloc_obj_data_t*)locale->userdata;  // just in case it changed
            data->num_bound++;
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "BINDING PROC %s TO %s NUMBER %u",
                                ORTE_NAME_PRINT(&proc->name),
                                hwloc_obj_type_string(locale->type), idx);
            /* bind the proc here */
            cpus = opal_hwloc_base_get_available_cpus(node->topology, locale);
            hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus);
            orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
            /* update the location, in case it changed */
            orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, locale, OPAL_PTR);
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "%s BOUND PROC %s TO %s[%s:%u] on node %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&proc->name),
                                cpu_bitmap, hwloc_obj_type_string(locale->type),
                                idx, node->name);
            if (NULL != cpu_bitmap) {
                free(cpu_bitmap);
            }
        }
    }

    return ORTE_SUCCESS;
}
static int bind_in_place(orte_job_t *jdata,
                         hwloc_obj_type_t target,
                         unsigned cache_level)
{
    /* traverse the hwloc topology tree on each node downwards
     * until we find an unused object of type target - and then bind
     * the process to that target
     */
    int i, j;
    orte_job_map_t *map;
    orte_node_t *node;
    orte_proc_t *proc;
    hwloc_cpuset_t cpus;
    unsigned int idx, ncpus;
    struct hwloc_topology_support *support;
    opal_hwloc_obj_data_t *data;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: bind in place for job %s with bindings %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));
    /* initialize */
    map = jdata->map;

    for (i=0; i < map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(map->binding) ||
                    !OPAL_BINDING_POLICY_IS_SET(map->binding)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability - don't warn if the user didn't
             * specifically request binding
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind &&
                OPAL_BINDING_POLICY_IS_SET(map->binding)) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    return ORTE_ERR_SILENT;
                }
            }
        }

        /* some systems do not report cores, and so we can get a situation where our
         * default binding policy will fail for no necessary reason. So if we are
         * computing a binding due to our default policy, and no cores are found
         * on this node, just silently skip it - we will not bind
         */
        if (!OPAL_BINDING_POLICY_IS_SET(map->binding) &&
            HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "Unable to bind-to core by default on node %s as no cores detected",
                                node->name);
            continue;
        }

        /* we share topologies in order
         * to save space, so we need to reset the usage info to reflect
         * our own current state
         */
        reset_usage(node, jdata->jobid);

        /* cycle thru the procs */
        for (j=0; j < node->procs->size; j++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                continue;
            }
            /* ignore procs from other jobs */
            if (proc->name.jobid != jdata->jobid) {
                continue;
            }
            /* ignore procs that have already been bound - should
             * never happen, but safer
             */
            if (NULL != proc->cpu_bitmap) {
                continue;
            }
            /* get the index of this location */
            if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, proc->locale, OPAL_HWLOC_AVAILABLE))) {
                ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                return ORTE_ERR_SILENT;
            }
            /* track the number bound */
            data = (opal_hwloc_obj_data_t*)proc->locale->userdata;
            data->num_bound++;
             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "BINDING PROC %s TO %s NUMBER %u",
                                ORTE_NAME_PRINT(&proc->name),
                                hwloc_obj_type_string(proc->locale->type), idx);
            /* get the number of cpus under this location */
            if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, proc->locale))) {
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
                return ORTE_ERR_SILENT;
            }
            /* error out if adding a proc would cause overload and that wasn't allowed,
             * and it wasn't a default binding policy (i.e., the user requested it)
             */
            if (ncpus < data->num_bound &&
                !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) &&
                OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
                               opal_hwloc_base_print_binding(map->binding), node->name,
                               data->num_bound, ncpus);
                return ORTE_ERR_SILENT;
            }
            /* bind the proc here */
            cpus = opal_hwloc_base_get_available_cpus(node->topology, proc->locale);
            hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus);
            /* record the location */
            proc->bind_location = proc->locale;
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "%s BOUND PROC %s TO %s[%s:%u] on node %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&proc->name),
                                proc->cpu_bitmap,
                                hwloc_obj_type_string(proc->locale->type),
                                idx, node->name);
        }
    }

    return ORTE_SUCCESS;
}
Пример #21
0
int main(void)
{
  hwloc_bitmap_t set;
  hwloc_obj_t obj;
  char *str = NULL;

  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);

  support = hwloc_topology_get_support(topology);

  obj = hwloc_get_root_obj(topology);
  set = hwloc_bitmap_dup(obj->cpuset);

  while (hwloc_bitmap_isequal(obj->cpuset, set)) {
    if (!obj->arity)
      break;
    obj = obj->children[0];
  }

  hwloc_bitmap_asprintf(&str, set);
  printf("system set is %s\n", str);
  free(str);

  test(set, 0);
  printf("now strict\n");
  test(set, HWLOC_CPUBIND_STRICT);

  hwloc_bitmap_free(set);
  set = hwloc_bitmap_dup(obj->cpuset);
  hwloc_bitmap_asprintf(&str, set);
  printf("obj set is %s\n", str);
  free(str);

  test(set, 0);
  printf("now strict\n");
  test(set, HWLOC_CPUBIND_STRICT);

  hwloc_bitmap_singlify(set);
  hwloc_bitmap_asprintf(&str, set);
  printf("singlified to %s\n", str);
  free(str);

  test(set, 0);
  printf("now strict\n");
  test(set, HWLOC_CPUBIND_STRICT);
  hwloc_bitmap_free(set);

  printf("\n\nmemory tests\n\n");
  printf("complete node set\n");
  set = hwloc_bitmap_dup(hwloc_get_root_obj(topology)->cpuset);
  hwloc_bitmap_asprintf(&str, set);
  printf("i.e. cpuset %s\n", str);
  free(str);
  testmem3(set);
  hwloc_bitmap_free(set);

  obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 0);
  if (obj) {
    set = hwloc_bitmap_dup(obj->cpuset);
    hwloc_bitmap_asprintf(&str, set);
    printf("cpuset set is %s\n", str);
    free(str);

    testmem3(set);

    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 1);
    if (obj) {
      hwloc_bitmap_or(set, set, obj->cpuset);
      hwloc_bitmap_asprintf(&str, set);
      printf("cpuset set is %s\n", str);
      free(str);

      testmem3(set);
    }
    hwloc_bitmap_free(set);
  }

  hwloc_topology_destroy(topology);
  return 0;
}
int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
{
    hwloc_obj_type_t hwb, hwm;
    unsigned clvl=0, clvm=0;
    opal_binding_policy_t bind;
    orte_mapping_policy_t map;
    orte_node_t *node;
    int i, rc;
    struct hwloc_topology_support *support;
    bool force_down = false;
    hwloc_cpuset_t totalcpuset;
    int bind_depth, map_depth;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: compute bindings for job %s with policy %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));

    map = ORTE_GET_MAPPING_POLICY(jdata->map->mapping);
    bind = OPAL_GET_BINDING_POLICY(jdata->map->binding);

    if (ORTE_MAPPING_BYUSER == map) {
        /* user specified binding by rankfile - nothing for us to do */
        return ORTE_SUCCESS;
    }

    if (OPAL_BIND_TO_CPUSET == bind) {
        int rc;
        /* cpuset was given - setup the bindings */
        if (ORTE_SUCCESS != (rc = bind_to_cpuset(jdata))) {
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (OPAL_BIND_TO_NONE == bind) {
        /* no binding requested */
        return ORTE_SUCCESS;
    }

    if (OPAL_BIND_TO_BOARD == bind) {
        /* doesn't do anything at this time */
        return ORTE_SUCCESS;
    }

    /* binding requested - convert the binding level to the hwloc obj type */
    switch (bind) {
    case OPAL_BIND_TO_NUMA:
        hwb = HWLOC_OBJ_NODE;
        break;
    case OPAL_BIND_TO_SOCKET:
        hwb = HWLOC_OBJ_SOCKET;
        break;
    case OPAL_BIND_TO_L3CACHE:
        hwb = HWLOC_OBJ_CACHE;
        clvl = 3;
        break;
    case OPAL_BIND_TO_L2CACHE:
        hwb = HWLOC_OBJ_CACHE;
        clvl = 2;
        break;
    case OPAL_BIND_TO_L1CACHE:
        hwb = HWLOC_OBJ_CACHE;
        clvl = 1;
        break;
    case OPAL_BIND_TO_CORE:
        hwb = HWLOC_OBJ_CORE;
        break;
    case OPAL_BIND_TO_HWTHREAD:
        hwb = HWLOC_OBJ_PU;
        break;
    default:
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    /* do the same for the mapping policy */
    switch (map) {
    case ORTE_MAPPING_BYNODE:
    case ORTE_MAPPING_BYSLOT:
    case ORTE_MAPPING_SEQ:
        hwm = HWLOC_OBJ_MACHINE;
        break;
    case ORTE_MAPPING_BYDIST:
    case ORTE_MAPPING_BYNUMA:
        hwm = HWLOC_OBJ_NODE;
        break;
    case ORTE_MAPPING_BYSOCKET:
        hwm = HWLOC_OBJ_SOCKET;
        break;
    case ORTE_MAPPING_BYL3CACHE:
        hwm = HWLOC_OBJ_CACHE;
        clvm = 3;
        break;
    case ORTE_MAPPING_BYL2CACHE:
        hwm = HWLOC_OBJ_CACHE;
        clvm = 2;
        break;
    case ORTE_MAPPING_BYL1CACHE:
        hwm = HWLOC_OBJ_CACHE;
        clvm = 1;
        break;
    case ORTE_MAPPING_BYCORE:
        hwm = HWLOC_OBJ_CORE;
        break;
    case ORTE_MAPPING_BYHWTHREAD:
        hwm = HWLOC_OBJ_PU;
        break;
    default:
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    /* if the job was mapped by the corresponding target, then
     * we bind in place
     *
     * otherwise, we have to bind either up or down the hwloc
     * tree. If we are binding upwards (e.g., mapped to hwthread
     * but binding to core), then we just climb the tree to find
     * the first matching object.
     *
     * if we are binding downwards (e.g., mapped to node and bind
     * to core), then we have to do a round-robin assigment of
     * procs to the resources below.
     */

    if (ORTE_MAPPING_BYDIST == map) {
        int rc = ORTE_SUCCESS;
        if (OPAL_BIND_TO_NUMA == bind) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps: bindings for job %s - dist to numa",
                                ORTE_JOBID_PRINT(jdata->jobid));
            if (ORTE_SUCCESS != (rc = bind_in_place(jdata, HWLOC_OBJ_NODE, 0))) {
                ORTE_ERROR_LOG(rc);
            }
        } else if (OPAL_BIND_TO_NUMA < bind) {
            /* bind every proc downwards */
            force_down = true;
            goto execute;
        }
        /* if the binding policy is less than numa, then we are unbound - so
         * just ignore this and return (should have been caught in prior
         * tests anyway as only options meeting that criteria are "none"
         * and "board")
         */
        return rc;
    }

    /* now deal with the remaining binding policies based on hardware */
    if (bind == map) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: bindings for job %s - bind in place",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = bind_in_place(jdata, hwb, clvl))) {
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    /* we need to handle the remaining binding options on a per-node
     * basis because different nodes could potentially have different
     * topologies, with different relative depths for the two levels
     */
 execute:
    /* initialize */
    totalcpuset = hwloc_bitmap_alloc();

    for (i=0; i < jdata->map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(jdata->map->binding) ||
                    !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                hwloc_bitmap_free(totalcpuset);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability - don't warn if the user didn't
             * specifically request binding
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind &&
                OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    hwloc_bitmap_free(totalcpuset);
                    return ORTE_ERR_SILENT;
                }
            }
        }

        /* some systems do not report cores, and so we can get a situation where our
         * default binding policy will fail for no necessary reason. So if we are
         * computing a binding due to our default policy, and no cores are found
         * on this node, just silently skip it - we will not bind
         */
        if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) &&
            HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "Unable to bind-to core by default on node %s as no cores detected",
                                node->name);
            continue;
        }

        /* we share topologies in order
         * to save space, so we need to reset the usage info to reflect
         * our own current state
         */
        reset_usage(node, jdata->jobid);

        if (force_down) {
            if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
        } else {
            /* determine the relative depth on this node */
            if (HWLOC_OBJ_CACHE == hwb) {
                /* must use a unique function because blasted hwloc
                 * just doesn't deal with caches very well...sigh
                 */
                bind_depth = hwloc_get_cache_type_depth(node->topology, clvl, -1);
            } else {
                bind_depth = hwloc_get_type_depth(node->topology, hwb);
            }
            if (0 > bind_depth) {
                /* didn't find such an object */
                orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
                               true, hwloc_obj_type_string(hwb), node->name);
                return ORTE_ERR_SILENT;
            }
            if (HWLOC_OBJ_CACHE == hwm) {
                /* must use a unique function because blasted hwloc
                 * just doesn't deal with caches very well...sigh
                 */
                map_depth = hwloc_get_cache_type_depth(node->topology, clvm, -1);
            } else {
                map_depth = hwloc_get_type_depth(node->topology, hwm);
            }
            if (0 > map_depth) {
                /* didn't find such an object */
                orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
                               true, hwloc_obj_type_string(hwm), node->name);
                return ORTE_ERR_SILENT;
            }
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "%s bind_depth: %d map_depth %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                bind_depth, map_depth);
            if (bind_depth > map_depth) {
                if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            } else {
                if (ORTE_SUCCESS != (rc = bind_upwards(jdata, node, hwb, clvl))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            }
        }
    }

    return ORTE_SUCCESS;
}
Пример #23
0
static int allocate(orte_job_t *jdata, opal_list_t *nodes)
{
    int i, n, val, dig, num_nodes;
    orte_node_t *node;
#if OPAL_HAVE_HWLOC
    orte_topology_t *t;
    hwloc_topology_t topo;
    hwloc_obj_t obj;
    unsigned j, k;
    struct hwloc_topology_support *support;
    char **files=NULL;
    char **topos = NULL;
    bool use_local_topology = false;
#endif
    char **node_cnt=NULL;
    char **slot_cnt=NULL;
    char **max_slot_cnt=NULL;
    char *tmp;
    char prefix[6];

    node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ',');
    if (NULL != mca_ras_simulator_component.slots) {
        slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ',');
        /* backfile the slot_cnt so every topology has a cnt */
        tmp = slot_cnt[opal_argv_count(slot_cnt)-1];
        for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) {
            opal_argv_append_nosize(&slot_cnt, tmp);
        }
    }
    if (NULL != mca_ras_simulator_component.slots_max) {
        max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ',');
        /* backfill the max_slot_cnt as reqd */
        tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1];
        for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) {
            opal_argv_append_nosize(&max_slot_cnt, tmp);
        }
    }
 
#if OPAL_HAVE_HWLOC
    if (NULL != mca_ras_simulator_component.topofiles) {
        files = opal_argv_split(mca_ras_simulator_component.topofiles, ',');
        if (opal_argv_count(files) != opal_argv_count(node_cnt)) {
            orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
            goto error_silent;
        }
    } else if (NULL != mca_ras_simulator_component.topologies) {
        topos = opal_argv_split(mca_ras_simulator_component.topologies, ',');
        if (opal_argv_count(topos) != opal_argv_count(node_cnt)) {
            orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
            goto error_silent;
        }
    } else {
        /* use our topology */
        use_local_topology = true;
    }
#else
    /* If we don't have hwloc and hwloc files were specified, then
       error out (because we can't deliver that functionality) */
    if (NULL == mca_ras_simulator_component.topofiles) {
        orte_show_help("help-ras-simulator.txt",
                       "no hwloc support for topofiles", true);
        goto error_silent;
    }
#endif

    /* setup the prefix to the node names */
    snprintf(prefix, 6, "nodeA");

    /* process the request */
    for (n=0; NULL != node_cnt[n]; n++) {
        num_nodes = strtol(node_cnt[n], NULL, 10);

        /* get number of digits */
        val = num_nodes;
        for (dig=0; 0 != val; dig++) {
            val /= 10;
        }

        /* set the prefix for this group of nodes */
        prefix[4] += n;

        /* check for topology */
#if OPAL_HAVE_HWLOC
        if (use_local_topology) {
            /* use our topology */
            topo = opal_hwloc_topology;
        } else if (NULL != files) {
            if (0 != hwloc_topology_init(&topo)) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "hwloc_topology_init");
                goto error_silent;
            }
            if (0 != hwloc_topology_set_xml(topo, files[n])) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc failed to load xml", true, files[n]);
                hwloc_topology_destroy(topo);
                goto error_silent;
            }
            /* since we are loading this from an external source, we have to
             * explicitly set a flag so hwloc sets things up correctly
             */
            if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "hwloc_topology_set_flags");
                hwloc_topology_destroy(topo);
                goto error_silent;
            }
            if (0 != hwloc_topology_load(topo)) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "hwloc_topology_load");
                hwloc_topology_destroy(topo);
                goto error_silent;
            }
            /* remove the hostname from the topology. Unfortunately, hwloc
             * decided to add the source hostname to the "topology", thus
             * rendering it unusable as a pure topological description. So
             * we remove that information here.
             */
            obj = hwloc_get_root_obj(topo);
            for (k=0; k < obj->infos_count; k++) {
                if (NULL == obj->infos[k].name ||
                    NULL == obj->infos[k].value) {
                    continue;
                }
                if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
                    free(obj->infos[k].name);
                    free(obj->infos[k].value);
                    /* left justify the array */
                    for (j=k; j < obj->infos_count-1; j++) {
                        obj->infos[j] = obj->infos[j+1];
                    }
                    obj->infos[obj->infos_count-1].name = NULL;
                    obj->infos[obj->infos_count-1].value = NULL;
                    obj->infos_count--;
                    break;
                }
            }
            /* unfortunately, hwloc does not include support info in its
             * xml output :-(( To aid in debugging, we set it here
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
            support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
            support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
            /* add it to our array */
            t = OBJ_NEW(orte_topology_t);
            t->topo = topo;
            t->sig = opal_hwloc_base_get_topo_signature(topo);
            opal_pointer_array_add(orte_node_topologies, t);
        } else {
            if (0 != hwloc_topology_init(&topo)) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "hwloc_topology_init");
                goto error_silent;
            }
            if (0 != hwloc_topology_set_synthetic(topo, topos[n])) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "hwloc_topology_set_synthetic");
                hwloc_topology_destroy(topo);
                goto error_silent;
            }
            if (0 != hwloc_topology_load(topo)) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "hwloc_topology_load");
                hwloc_topology_destroy(topo);
                goto error_silent;
            }
            if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
                orte_show_help("help-ras-simulator.txt",
                               "hwloc API fail", true,
                               __FILE__, __LINE__, "opal_hwloc_base_filter_cpus");
                hwloc_topology_destroy(topo);
                goto error_silent;
            }
            /* remove the hostname from the topology. Unfortunately, hwloc
             * decided to add the source hostname to the "topology", thus
             * rendering it unusable as a pure topological description. So
             * we remove that information here.
             */
            obj = hwloc_get_root_obj(topo);
            for (k=0; k < obj->infos_count; k++) {
                if (NULL == obj->infos[k].name ||
                    NULL == obj->infos[k].value) {
                    continue;
                }
                if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
                    free(obj->infos[k].name);
                    free(obj->infos[k].value);
                    /* left justify the array */
                    for (j=k; j < obj->infos_count-1; j++) {
                        obj->infos[j] = obj->infos[j+1];
                    }
                    obj->infos[obj->infos_count-1].name = NULL;
                    obj->infos[obj->infos_count-1].value = NULL;
                    obj->infos_count--;
                    break;
                }
            }
            /* unfortunately, hwloc does not include support info in its
             * xml output :-(( To aid in debugging, we set it here
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
            support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
            support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
            /* add it to our array */
            t = OBJ_NEW(orte_topology_t);
            t->topo = topo;
            t->sig = opal_hwloc_base_get_topo_signature(topo);
            opal_pointer_array_add(orte_node_topologies, t);
        }
#endif

        for (i=0; i < num_nodes; i++) {
            node = OBJ_NEW(orte_node_t);
            asprintf(&node->name, "%s%0*d", prefix, dig, i);
            node->state = ORTE_NODE_STATE_UP;
            node->slots_inuse = 0;
#if OPAL_HAVE_HWLOC
            if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) {
                node->slots_max = 0;
            } else {
                obj = hwloc_get_root_obj(topo);
                node->slots_max = opal_hwloc_base_get_npus(topo, obj);
            }
            if (NULL == slot_cnt || NULL == slot_cnt[n]) {
                node->slots = 0;
            } else {
                obj = hwloc_get_root_obj(topo);
                node->slots = opal_hwloc_base_get_npus(topo, obj);
            }
            node->topology = topo;
#endif
            opal_output_verbose(1, orte_ras_base_framework.framework_output,
                                "Created Node <%10s> [%3d : %3d]",
                                node->name, node->slots, node->slots_max);
            opal_list_append(nodes, &node->super);
        }
    }

    /* record the number of allocated nodes */
    orte_num_allocated_nodes = opal_list_get_size(nodes);

    if (NULL != max_slot_cnt) {
        opal_argv_free(max_slot_cnt);
    }
    if (NULL != slot_cnt) {
        opal_argv_free(slot_cnt);
    }
    if (NULL != node_cnt) {
        opal_argv_free(node_cnt);
    }

    return ORTE_SUCCESS;

error_silent:
    if (NULL != max_slot_cnt) {
        opal_argv_free(max_slot_cnt);
    }
    if (NULL != slot_cnt) {
        opal_argv_free(slot_cnt);
    }
    if (NULL != node_cnt) {
        opal_argv_free(node_cnt);
    }
    return ORTE_ERR_SILENT;

}
Пример #24
0
static void add_process_objects(hwloc_topology_t topology)
{
#ifdef HAVE_DIRENT_H
  hwloc_obj_t root;
  hwloc_bitmap_t cpuset;
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_t task_cpuset;
#endif /* HWLOC_LINUX_SYS */
  DIR *dir;
  struct dirent *dirent;
  const struct hwloc_topology_support *support;

  root = hwloc_get_root_obj(topology);

  support = hwloc_topology_get_support(topology);

  if (!support->cpubind->get_proc_cpubind)
    return;

  dir  = opendir("/proc");
  if (!dir)
    return;
  cpuset = hwloc_bitmap_alloc();
#ifdef HWLOC_LINUX_SYS
  task_cpuset = hwloc_bitmap_alloc();
#endif /* HWLOC_LINUX_SYS */

  while ((dirent = readdir(dir))) {
    long local_pid_number;
    hwloc_pid_t local_pid;
    char *end;
    char name[80];
    int proc_cpubind;

    local_pid_number = strtol(dirent->d_name, &end, 10);
    if (*end)
      /* Not a number */
      continue;

    snprintf(name, sizeof(name), "%ld", local_pid_number);

    local_pid = hwloc_pid_from_number(local_pid_number, 0);

    proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1;

#ifdef HWLOC_LINUX_SYS
    {
      char comm[16];
      char *path;
      size_t pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;

      path = malloc(pathlen);

      {
        /* Get the process name */
        char cmd[64];
        int file;
        ssize_t n;

        snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
        file = open(path, O_RDONLY);
        if (file < 0) {
          /* Ignore errors */
          free(path);
          continue;
        }
        n = read(file, cmd, sizeof(cmd));
        close(file);

        if (n <= 0) {
          /* Ignore kernel threads and errors */
          free(path);
          continue;
        }

        snprintf(path, pathlen, "/proc/%s/comm", dirent->d_name);
        file = open(path, O_RDONLY);

        if (file >= 0) {
          n = read(file, comm, sizeof(comm) - 1);
          close(file);
          if (n > 0) {
            comm[n] = 0;
            if (n > 1 && comm[n-1] == '\n')
              comm[n-1] = 0;
          } else {
            snprintf(comm, sizeof(comm), "(unknown)");
          }
        } else {
          /* Old kernel, have to look at old file */
          char stats[32];
          char *parenl = NULL, *parenr;

          snprintf(path, pathlen, "/proc/%s/stat", dirent->d_name);
          file = open(path, O_RDONLY);

          if (file < 0) {
            /* Ignore errors */
            free(path);
            continue;
          }

          /* "pid (comm) ..." */
          n = read(file, stats, sizeof(stats) - 1);
          close(file);
          if (n > 0) {
            stats[n] = 0;
            parenl = strchr(stats, '(');
            parenr = strchr(stats, ')');
            if (!parenr)
              parenr = &stats[sizeof(stats)-1];
            *parenr = 0;
          }
          if (!parenl) {
            snprintf(comm, sizeof(comm), "(unknown)");
          } else {
            snprintf(comm, sizeof(comm), "%s", parenl+1);
          }
        }

        snprintf(name, sizeof(name), "%ld %s", local_pid_number, comm);
      }

      {
        /* Get threads */
        DIR *task_dir;
        struct dirent *task_dirent;

        snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
        task_dir = opendir(path);

        if (task_dir) {
          while ((task_dirent = readdir(task_dir))) {
            long local_tid;
            char *task_end;
            const size_t tid_len = sizeof(local_tid)*3+1;
            size_t task_pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1
                                    + strlen(task_dirent->d_name) + 1 + 4 + 1;
            char *task_path;
            int comm_file;
            char task_comm[16] = "";
            char task_name[sizeof(name) + 1 + tid_len + 1 + sizeof(task_comm) + 1];
            ssize_t n;

            local_tid = strtol(task_dirent->d_name, &task_end, 10);
            if (*task_end)
              /* Not a number, or the main task */
              continue;

            task_path = malloc(task_pathlen);
            snprintf(task_path, task_pathlen, "/proc/%s/task/%s/comm",
                     dirent->d_name, task_dirent->d_name);
            comm_file = open(task_path, O_RDONLY);
            free(task_path);

            if (comm_file >= 0) {
              n = read(comm_file, task_comm, sizeof(task_comm) - 1);
              if (n < 0)
                n = 0;
              close(comm_file);
              task_comm[n] = 0;
              if (n > 1 && task_comm[n-1] == '\n')
                task_comm[n-1] = 0;
              if (!strcmp(comm, task_comm))
                /* Same as process comm, do not show it again */
                n = 0;
            } else {
              n = 0;
            }

            if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset))
              continue;

            if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset))
              continue;

            if (n) {
              snprintf(task_name, sizeof(task_name), "%s %li %s", name, local_tid, task_comm);
            } else {
              snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid);
            }

            insert_task(topology, task_cpuset, task_name);
          }
          closedir(task_dir);
        }
      }

      free(path);
    }
#endif /* HWLOC_LINUX_SYS */

    if (!proc_cpubind)
      continue;

    if (hwloc_bitmap_isincluded(root->cpuset, cpuset))
      continue;

    insert_task(topology, cpuset, name);
  }

  hwloc_bitmap_free(cpuset);
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_free(task_cpuset);
#endif /* HWLOC_LINUX_SYS */
  closedir(dir);
#endif /* HAVE_DIRENT_H */
}
Пример #25
0
int main(int argc, char* argv[])
{
    hwloc_obj_t obj;
    unsigned j, k;
    struct hwloc_topology_support *support;
    int rc;

    if (2 != argc) {
        fprintf(stderr, "Usage: opal_hwloc <topofile>\n");
        exit(1);
    }

    if (0 > (rc = opal_init(&argc, &argv))) {
        fprintf(stderr, "opal_hwloc: couldn't init opal - error code %d\n", rc);
        return rc;
    }

    if (0 != hwloc_topology_init(&my_topology)) {
        return OPAL_ERR_NOT_SUPPORTED;
    }
    if (0 != hwloc_topology_set_xml(my_topology, argv[1])) {
        hwloc_topology_destroy(my_topology);
        return OPAL_ERR_NOT_SUPPORTED;
    }
    /* since we are loading this from an external source, we have to
     * explicitly set a flag so hwloc sets things up correctly
     */
    if (0 != hwloc_topology_set_flags(my_topology,
                                      (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
                                       HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                       HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
        hwloc_topology_destroy(my_topology);
        return OPAL_ERR_NOT_SUPPORTED;
    }
    if (0 != hwloc_topology_load(my_topology)) {
        hwloc_topology_destroy(my_topology);
        return OPAL_ERR_NOT_SUPPORTED;
    }
    /* remove the hostname from the topology. Unfortunately, hwloc
     * decided to add the source hostname to the "topology", thus
     * rendering it unusable as a pure topological description. So
     * we remove that information here.
     */
    obj = hwloc_get_root_obj(my_topology);
    for (k=0; k < obj->infos_count; k++) {
        if (NULL == obj->infos[k].name ||
            NULL == obj->infos[k].value) {
            continue;
        }
        if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
            free(obj->infos[k].name);
            free(obj->infos[k].value);
            /* left justify the array */
            for (j=k; j < obj->infos_count-1; j++) {
                obj->infos[j] = obj->infos[j+1];
            }
            obj->infos[obj->infos_count-1].name = NULL;
            obj->infos[obj->infos_count-1].value = NULL;
            obj->infos_count--;
            break;
        }
    }
    /* unfortunately, hwloc does not include support info in its
     * xml output :-(( We default to assuming it is present as
     * systems that use this option are likely to provide
     * binding support
     */
    support = (struct hwloc_topology_support*)hwloc_topology_get_support(my_topology);
    support->cpubind->set_thisproc_cpubind = true;

    /* filter the cpus thru any default cpu set */
    opal_hwloc_base_filter_cpus(my_topology);

    /* fill opal_cache_line_size global with the smallest L1 cache
       line size */
    fill_cache_line_size();

    /* test it */
    if (NULL == hwloc_get_obj_by_type(my_topology, HWLOC_OBJ_CORE, 0)) {
        fprintf(stderr, "DIDN'T FIND A CORE\n");
    }

    hwloc_topology_destroy(my_topology);

    opal_finalize();

    return 0;
}