Example #1
0
static int orte_grpcomm_base_close(void)
{
    /* Close the selected component */
    if( NULL != orte_grpcomm.finalize ) {
        orte_grpcomm.finalize();
    }
    OBJ_DESTRUCT(&orte_grpcomm_base.active_colls);
    OBJ_DESTRUCT(&orte_grpcomm_base.modex_requests);

#if OPAL_HAVE_HWLOC
    if (NULL != orte_grpcomm_base.working_cpuset) {
        hwloc_bitmap_free(orte_grpcomm_base.working_cpuset);
        orte_grpcomm_base.working_cpuset = NULL;
    }
#endif
    return mca_base_framework_components_close(&orte_grpcomm_base_framework, NULL);
}
Example #2
0
void *
hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
{
  hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
  void *ret;

  if (hwloc_fix_membind_cpuset(topology, nodeset, set)) {
    if (flags & HWLOC_MEMBIND_STRICT)
      ret = NULL;
    else
      ret = hwloc_alloc(topology, len);
  } else
    ret = hwloc_alloc_membind_nodeset(topology, len, nodeset, policy, flags);

  hwloc_bitmap_free(nodeset);
  return ret;
}
Example #3
0
static int _get_ldom_sched_cpuset(hwloc_topology_t topology,
		hwloc_obj_type_t hwtype, hwloc_obj_type_t req_hwtype,
		uint32_t ldom, cpu_set_t *mask)
{
	hwloc_obj_t obj;
	hwloc_bitmap_t cpuset;
	int hwdepth;

	cpuset = hwloc_bitmap_alloc();
	hwdepth = hwloc_get_type_depth(topology, hwtype);
	obj = hwloc_get_obj_by_depth(topology, hwdepth, ldom);
	_add_hwloc_cpuset(hwtype, req_hwtype, obj, 0, 0, cpuset);
	hwloc_cpuset_to_glibc_sched_affinity(topology, cpuset,
						 mask, sizeof(cpu_set_t));
	hwloc_bitmap_free(cpuset);
	return true;
}
Example #4
0
int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments,
                               size_t num_segments)
{
    int rc = OPAL_SUCCESS;
    char *msg = NULL;
    size_t i;
    hwloc_cpuset_t cpuset = NULL;

    /* bozo check */
    if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
        msg = "hwloc_set_area_membind() failure - topology not available";
        return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__,
                                                   msg, rc);
    }

    /* This module won't be used unless the process is already
       processor-bound.  So find out where we're processor bound, and
       bind our memory there, too. */
    cpuset = hwloc_bitmap_alloc();
    if (NULL == cpuset) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        msg = "hwloc_bitmap_alloc() failure";
        goto out;
    }
    hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0);
    for (i = 0; i < num_segments; ++i) {
        if (0 != hwloc_set_area_membind(opal_hwloc_topology,
                                        segments[i].mbs_start_addr,
                                        segments[i].mbs_len, cpuset,
                                        HWLOC_MEMBIND_BIND,
                                        HWLOC_MEMBIND_STRICT)) {
            rc = OPAL_ERROR;
            msg = "hwloc_set_area_membind() failure";
            goto out;
        }
    }

 out:
    if (NULL != cpuset) {
        hwloc_bitmap_free(cpuset);
    }
    if (OPAL_SUCCESS != rc) {
        return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc);
    }
    return OPAL_SUCCESS;
}
Example #5
0
static int
hwloc_aix_get_sth_membind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
{
  hwloc_bitmap_t hwloc_set;
  rsethandle_t rset;
  unsigned cpu, maxcpus;
  int res = -1;
  int depth, n, i;

  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  if (depth < 0) {
    errno = EXDEV;
    return -1;
  }
  n = hwloc_get_nbobjs_by_depth(topology, depth);

  rset = rs_alloc(RS_EMPTY);

  if (ra_getrset(what, who, 0, rset) == -1)
    goto out;

  hwloc_set = hwloc_bitmap_alloc();

  maxcpus = rs_getinfo(rset, R_MAXPROCS, 0);
  for (cpu = 0; cpu < maxcpus; cpu++)
    if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1)
      hwloc_bitmap_set(hwloc_set, cpu);
  hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology));

  hwloc_bitmap_zero(nodeset);
  for (i = 0; i < n; i++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
    if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
      hwloc_bitmap_set(nodeset, obj->os_index);
  }

  hwloc_bitmap_free(hwloc_set);

  *policy = HWLOC_MEMBIND_BIND;
  res = 0;

out:
  rs_free(rset);
  return res;
}
Example #6
0
static int
hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
  int ret;
  hwloc_cpuset_t cpuset;

  if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
      || flags & HWLOC_MEMBIND_NOCPUBIND) {
    errno = ENOSYS;
    return -1;
  }

  cpuset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_nodeset(topology, cpuset, nodeset);
  ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0);
  hwloc_bitmap_free(cpuset);
  return ret;
}
Example #7
0
void chpl_topo_touchMemFromSubloc(void* p, size_t size, chpl_bool onlyInside,
                                  c_sublocid_t subloc) {
  size_t pgSize;
  unsigned char* pPgLo;
  size_t nPages;
  hwloc_cpuset_t cpuset;
  int flags;

  _DBG_P("chpl_topo_touchMemFromSubloc(%p, %#zx, onlyIn=%s, %d)\n",
         p, size, (onlyInside ? "T" : "F"), (int) subloc);

  if (!haveTopology
      || !topoSupport->cpubind->get_thread_cpubind
      || !topoSupport->cpubind->set_thread_cpubind) {
    return;
  }

  alignAddrSize(p, size, onlyInside, &pgSize, &pPgLo, &nPages);

  _DBG_P("    localize %p, %#zx bytes (%#zx pages)\n",
         pPgLo, nPages * pgSize, nPages);

  if (nPages == 0)
    return;

  CHK_ERR_ERRNO((cpuset = hwloc_bitmap_alloc()) != NULL);

  flags = HWLOC_CPUBIND_THREAD;
  CHK_ERR_ERRNO(hwloc_set_cpubind(topology, cpuset, flags) == 0);

  chpl_topo_setThreadLocality(subloc);

  {
    size_t pg;
    for (pg = 0; pg < nPages; pg++) {
      pPgLo[pg * pgSize] = 0;
    }
  }

  flags = HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT;
  CHK_ERR_ERRNO(hwloc_set_cpubind(topology, cpuset, flags) == 0);

  hwloc_bitmap_free(cpuset);
}
Example #8
0
void bindCurrentThreadToNumaNode(int node) {
  hwloc_topology_t topology = getHWTopology();
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj;

  // The actual node
  obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, node);

  // obj is nullptr on non NUMA machines
  if (obj == nullptr) {
    fprintf(stderr, "Couldn't get hwloc object, bindCurrentThreadToNumaNode failed!\n");
    return;
  }

  cpuset = hwloc_bitmap_dup(obj->cpuset);
  // hwloc_bitmap_singlify(cpuset);

  // bind
  if (hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND | HWLOC_CPUBIND_THREAD)) {
    char* str;
    int error = errno;
    hwloc_bitmap_asprintf(&str, obj->cpuset);
    printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
    free(str);
    throw std::runtime_error(strerror(error));
  }

  // free duplicated cpuset
  hwloc_bitmap_free(cpuset);

  // assuming single machine system
  obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0);
  // set membind policy interleave for this thread
  if (hwloc_set_membind_nodeset(
          topology, obj->nodeset, HWLOC_MEMBIND_INTERLEAVE, HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_THREAD) && errno != ENOSYS) {
    char* str;
    int error = errno;
    hwloc_bitmap_asprintf(&str, obj->nodeset);
    fprintf(stderr, "Couldn't membind to nodeset  %s: %s\n", str, strerror(error));
    fprintf(stderr, "Continuing as normal, however, no guarantees\n");
    free(str);
  }
}
int main(void)
{
  hwloc_topology_t topology;
  char *string = NULL;
  hwloc_obj_t obj;
  hwloc_bitmap_t set;

  hwloc_topology_init(&topology);
  hwloc_topology_set_synthetic(topology, SYNTHETIC_TOPOLOGY_DESCRIPTION);
  hwloc_topology_load(topology);
  set = hwloc_bitmap_alloc();

  hwloc_bitmap_sscanf(set, GIVEN_CPUSET_STRING);
  obj = hwloc_get_obj_covering_cpuset(topology, set);

  assert(obj);
  fprintf(stderr, "found covering object type %s covering cpuset %s\n",
	  hwloc_obj_type_string(obj->type), GIVEN_CPUSET_STRING);
  assert(hwloc_bitmap_isincluded(set, obj->cpuset));

  hwloc_bitmap_asprintf(&string, obj->cpuset);
  fprintf(stderr, "covering object of %s is %s, expected %s\n",
	  GIVEN_CPUSET_STRING, string, EXPECTED_CPUSET_STRING);
  assert(!strcmp(EXPECTED_CPUSET_STRING, string));
  free(string);

  hwloc_bitmap_sscanf(set, GIVEN_LARGESPLIT_CPUSET_STRING);
  obj = hwloc_get_obj_covering_cpuset(topology, set);
  assert(obj == hwloc_get_root_obj(topology));
  fprintf(stderr, "found system as covering object of first+last cpus cpuset %s\n",
	  GIVEN_LARGESPLIT_CPUSET_STRING);

  hwloc_bitmap_sscanf(set, GIVEN_TOOLARGE_CPUSET_STRING);
  obj = hwloc_get_obj_covering_cpuset(topology, set);
  assert(!obj);
  fprintf(stderr, "found no covering object for too-large cpuset %s\n",
	  GIVEN_TOOLARGE_CPUSET_STRING);

  hwloc_bitmap_free(set);
  hwloc_topology_destroy(topology);

  return EXIT_SUCCESS;
}
Example #10
0
int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs,
                            size_t count, int node_id)
{
    size_t i;
    int rc = OPAL_SUCCESS;
    char *msg = NULL;
    hwloc_cpuset_t cpuset = NULL;

    /* bozo check */
    if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
        msg = "hwloc_set_area_membind() failure - topology not available";
        return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__,
                                                   msg, rc);
    }

    cpuset = hwloc_bitmap_alloc();
    if (NULL == cpuset) {
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        msg = "hwloc_bitmap_alloc() failure";
        goto out;
    }
    hwloc_bitmap_set(cpuset, node_id);
    for(i = 0; i < count; i++) {
        if (0 != hwloc_set_area_membind(opal_hwloc_topology,
                                        segs[i].mbs_start_addr,
                                        segs[i].mbs_len, cpuset,
                                        HWLOC_MEMBIND_BIND,
                                        HWLOC_MEMBIND_STRICT)) {
            rc = OPAL_ERROR;
            msg = "hwloc_set_area_membind() failure";
            goto out;
        }
    }

 out:
    if (NULL != cpuset) {
        hwloc_bitmap_free(cpuset);
    }
    if (OPAL_SUCCESS != rc) {
        return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc);
    }
    return OPAL_SUCCESS;
}
void AbstractCoreBoundQueue::launchThread(int core) {
  //get the number of cores on system
  int NUM_PROCS = getNumberOfCoresOnSystem();

  if (core < NUM_PROCS) {
    _thread = new std::thread(&AbstractTaskQueue::executeTask, this);
    hwloc_cpuset_t cpuset;
    hwloc_obj_t obj;
    hwloc_topology_t topology = getHWTopology();

    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core);
    // the bitmap to modify
    cpuset = hwloc_bitmap_dup(obj->cpuset);
    // remove hyperthreads
    hwloc_bitmap_singlify(cpuset);
    // bind
    if (hwloc_set_thread_cpubind(topology, _thread->native_handle(), cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND)) {
      char *str;
      int error = errno;
      hwloc_bitmap_asprintf(&str, obj->cpuset);
      fprintf(stderr, "Couldn't bind to cpuset %s: %s\n", str, strerror(error));
      fprintf(stderr, "Continuing as normal, however, no guarantees\n");
      //throw std::runtime_error(strerror(error));
    }
    if(hwloc_set_membind (topology, cpuset,  HWLOC_MEMBIND_FIRSTTOUCH , HWLOC_MEMBIND_THREAD )){
      char *str;
      int error = errno;
      hwloc_bitmap_asprintf(&str, obj->cpuset);
      fprintf(stderr, "Couldn't bind to memory %s: %s\n", str, strerror(error));
      fprintf(stderr, "Continuing as normal, however, no guarantees\n");
      //throw std::runtime_error(strerror(error));
    }

    hwloc_bitmap_free(cpuset);

  } else {
    // this case should never happen, as TaskQueue is only initialized from SimpleTaskScheduler, which captures this case
    throw std::logic_error("CPU to run thread on is larger than number of total cores; seems that TaskQueue was initialized outside of SimpleTaskScheduler, which should not happen");
  }
}
Example #12
0
/* This function initializes the bind_map data structure with the binding information from all
 * the ranks that have copied their cpu-affinity information into the shared memory region.
 * */
void MPIDI_SHM_hwloc_init_bindmap(int num_ranks, int topo_depth, int *shared_region, int **bind_map)
{
    int i, level;
    unsigned int num_obj, curr_obj;

    hwloc_cpuset_t hwloc_cpuset = hwloc_bitmap_alloc();
    /* STEP 3.1. Collect the binding information from hwloc for all ranks */
    for (i = 0; i < num_ranks; ++i) {
        cpu_set_t t = ((cpu_set_t *) (shared_region))[i];
        hwloc_cpuset_from_glibc_sched_affinity(MPIR_Process.hwloc_topology, hwloc_cpuset, &t,
                                               sizeof(t));
        /* HWLOC_OBJ_PU is the smallest unit of computation. We would like to get all the
         * affinity information for each rank */
        num_obj =
            hwloc_get_nbobjs_inside_cpuset_by_type(MPIR_Process.hwloc_topology, hwloc_cpuset,
                                                   HWLOC_OBJ_PU);
        /* Go over all objects, and if it is bound to more than one PU at that level, set it
         * to -1, otherwise update to the binding*/
        for (curr_obj = 0; curr_obj < num_obj; ++curr_obj) {
            hwloc_obj_t obj =
                hwloc_get_obj_inside_cpuset_by_type(MPIR_Process.hwloc_topology, hwloc_cpuset,
                                                    HWLOC_OBJ_PU, curr_obj);
            level = 0;
            do {
                /* If binding was not set, or is same as previous binding, update.
                 * Note that we use logical indices from hwloc instead of physical indices
                 * because logical indices are more portable - see hwloc documentation*/
                if (bind_map[i][level] == 0 || bind_map[i][level] == obj->logical_index) {
                    bind_map[i][level] = obj->logical_index;
                } else {
                    /* If rank is bound to different PUs at that level, we set to -1 */
                    bind_map[i][level] = -1;
                }
                level++;
            } while ((obj = obj->parent));
        }
    }
    hwloc_bitmap_free(hwloc_cpuset);
}
Example #13
0
void opal_hwloc_base_close(void)
{
    if (!opal_hwloc_base_inited) {
        return;
    }

    /* free memory */
    if (NULL != opal_hwloc_my_cpuset) {
        hwloc_bitmap_free(opal_hwloc_my_cpuset);
        opal_hwloc_my_cpuset = NULL;
    }

    /* destroy the topology */
    if (NULL != opal_hwloc_topology) {
        opal_hwloc_base_free_topology(opal_hwloc_topology);
        opal_hwloc_topology = NULL;
    }


    /* All done */
    opal_hwloc_base_inited = false;
}
void migrate(long PageStart, long PageEnd) {
  SPMR_DEBUG(std::cout << "Runtime: migrate pages: " << PageStart << " to "
                       << PageEnd << "\n");
  SPMR_DEBUG(std::cout << "Runtime: hwloc call: " << (PageStart << PAGE_EXP)
                       << ", " << ((PageEnd - PageStart) << PAGE_EXP) << "\n");

  hwloc_bitmap_t set = hwloc_bitmap_alloc();

  hwloc_get_cpubind(__spm_topo, set, HWLOC_CPUBIND_THREAD);
  hwloc_get_last_cpu_location(__spm_topo, set, HWLOC_CPUBIND_THREAD);

  hwloc_bitmap_singlify(set);

	assert(
			hwloc_set_area_membind(__spm_topo, (const void*)(PageStart << PAGE_EXP),
								  (PageEnd - PageStart) << PAGE_EXP,
								  (hwloc_const_cpuset_t)set, HWLOC_MEMBIND_BIND,
								  HWLOC_MEMBIND_MIGRATE)
	!= -1 && "Unable to migrate requested pages");
                         
  hwloc_bitmap_free(set);
}
Example #15
0
void chpl_topo_setThreadLocality(c_sublocid_t subloc) {
  hwloc_cpuset_t cpuset;
  int flags;

  _DBG_P("chpl_topo_setThreadLocality(%d)\n", (int) subloc);

  if (!haveTopology) {
    return;
  }

  if (!topoSupport->cpubind->set_thread_cpubind)
    return;

  CHK_ERR_ERRNO((cpuset = hwloc_bitmap_alloc()) != NULL);

  hwloc_cpuset_from_nodeset(topology, cpuset,
                            getNumaObj(subloc)->allowed_nodeset);

  flags = HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT;
  CHK_ERR_ERRNO(hwloc_set_cpubind(topology, cpuset, flags) == 0);

  hwloc_bitmap_free(cpuset);
}
Example #16
0
static int opal_hwloc_base_close(void)
{
    if (!opal_hwloc_base_inited) {
        return OPAL_SUCCESS;
    }

#if OPAL_HAVE_HWLOC
    {
        int ret;

        /* no need to close the component as it was statically opened */

        /* for support of tools such as ompi_info */
        ret = mca_base_framework_components_close (&opal_hwloc_base_framework, NULL);
        if (OPAL_SUCCESS != ret) {
            return ret;
        }

        /* free memory */
        if (NULL != opal_hwloc_my_cpuset) {
            hwloc_bitmap_free(opal_hwloc_my_cpuset);
            opal_hwloc_my_cpuset = NULL;
        }

        /* destroy the topology */
        if (NULL != opal_hwloc_topology) {
            opal_hwloc_base_free_topology(opal_hwloc_topology);
            opal_hwloc_topology = NULL;
        }

    }
#endif

    /* All done */
    opal_hwloc_base_inited = false;
    return OPAL_SUCCESS;
}
Example #17
0
static void print_task(hwloc_topology_t topology,
                       long pid_number, const char *name, hwloc_bitmap_t cpuset,
                       char *pidoutput,
                       int thread)
{
    printf("%s%ld\t", thread ? " " : "", pid_number);

    if (show_cpuset) {
        char *cpuset_str = NULL;
        hwloc_bitmap_asprintf(&cpuset_str, cpuset);
        printf("%s", cpuset_str);
        free(cpuset_str);
    } else {
        hwloc_bitmap_t remaining = hwloc_bitmap_dup(cpuset);
        int first = 1;
        while (!hwloc_bitmap_iszero(remaining)) {
            char type[64];
            unsigned idx;
            hwloc_obj_t obj = hwloc_get_first_largest_obj_inside_cpuset(topology, remaining);
            /* don't show a cache if there's something equivalent and nicer */
            while (hwloc_obj_type_is_cache(obj->type) && obj->arity == 1)
                obj = obj->first_child;
            hwloc_obj_type_snprintf(type, sizeof(type), obj, 1);
            idx = logical ? obj->logical_index : obj->os_index;
            if (idx == (unsigned) -1)
                printf("%s%s", first ? "" : " ", type);
            else
                printf("%s%s:%u", first ? "" : " ", type, idx);
            hwloc_bitmap_andnot(remaining, remaining, obj->cpuset);
            first = 0;
        }
        hwloc_bitmap_free(remaining);
    }

    printf("\t\t%s%s%s\n", name, pidoutput ? "\t" : "", pidoutput ? pidoutput : "");
}
void
hwloc_look_synthetic(struct hwloc_topology *topology)
{
  hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
  unsigned first_cpu = 0, i;

  topology->support.discovery->pu = 1;

  /* start with id=0 for each level */
  for (i = 0; topology->backend_params.synthetic.arity[i] > 0; i++)
    topology->backend_params.synthetic.id[i] = 0;
  /* ... including the last one */
  topology->backend_params.synthetic.id[i] = 0;

  /* update first level type according to the synthetic type array */
  topology->levels[0][0]->type = topology->backend_params.synthetic.type[0];

  for (i = 0; i < topology->backend_params.synthetic.arity[0]; i++)
    first_cpu = hwloc__look_synthetic(topology, 1, first_cpu, cpuset);

  hwloc_bitmap_free(cpuset);

  hwloc_add_object_info(topology->levels[0][0], "Backend", "Synthetic");
}
static int bind_downwards(orte_job_t *jdata,
                          orte_node_t *node,
                          hwloc_obj_type_t target,
                          unsigned cache_level)
{
    int j;
    orte_job_map_t *map;
    orte_proc_t *proc;
    hwloc_obj_t trg_obj, nxt_obj;
    hwloc_cpuset_t cpus;
    unsigned int ncpus;
    opal_hwloc_obj_data_t *data;
    int total_cpus;
    hwloc_cpuset_t totalcpuset;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: bind downward for job %s with bindings %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));
    /* initialize */
    map = jdata->map;
    totalcpuset = hwloc_bitmap_alloc();

    /* cycle thru the procs */
    for (j=0; j < node->procs->size; j++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
            continue;
        }
        /* ignore procs from other jobs */
        if (proc->name.jobid != jdata->jobid) {
            continue;
        }
        /* ignore procs that have already been bound - should
         * never happen, but safer
         */
        if (NULL != proc->cpu_bitmap) {
            continue;
        }
        /* we don't know if the target is a direct child of this locale,
         * or if it is some depth below it, so we have to conduct a bit
         * of a search. Let hwloc find the min usage one for us.
         */
        trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology,
                                                                  proc->locale,
                                                                  target, cache_level);
        if (NULL == trg_obj) {
            /* there aren't any such targets under this object */
            orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
            hwloc_bitmap_free(totalcpuset);
            return ORTE_ERR_SILENT;
        }
        /* record the location */
        proc->bind_location = trg_obj;
        /* start with a clean slate */
        hwloc_bitmap_zero(totalcpuset);
        total_cpus = 0;
        nxt_obj = trg_obj;
        do {
            if (NULL == nxt_obj) {
                /* could not find enough cpus to meet request */
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
                hwloc_bitmap_free(totalcpuset);
                return ORTE_ERR_SILENT;
            }
            trg_obj = nxt_obj;
            /* get the number of cpus under this location */
            ncpus = opal_hwloc_base_get_npus(node->topology, trg_obj);
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "%s GOT %d CPUS",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ncpus);
            /* track the number bound */
            if (NULL == (data = (opal_hwloc_obj_data_t*)trg_obj->userdata)) {
                data = OBJ_NEW(opal_hwloc_obj_data_t);
                trg_obj->userdata = data;
            }
            data->num_bound++;
            /* error out if adding a proc would cause overload and that wasn't allowed,
             * and it wasn't a default binding policy (i.e., the user requested it)
             */
            if (ncpus < data->num_bound &&
                !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) {
                if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true,
                                   opal_hwloc_base_print_binding(map->binding), node->name,
                                   data->num_bound, ncpus);
                    hwloc_bitmap_free(totalcpuset);
                    return ORTE_ERR_SILENT;
                } else {
                    /* if this is the default binding policy, then just don't
                     * bind this proc
                     */
                    data->num_bound--;  // maintain count
                    /* show the proc as not bound */
                    proc->bind_location = NULL;
                    hwloc_bitmap_zero(totalcpuset);
                    break;
                }
            }
            /* bind the proc here */
            cpus = opal_hwloc_base_get_available_cpus(node->topology, trg_obj);
            hwloc_bitmap_or(totalcpuset, totalcpuset, cpus);
            /* track total #cpus */
            total_cpus += ncpus;
            /* move to the next location, in case we need it */
            nxt_obj = trg_obj->next_cousin;
        } while (total_cpus < orte_rmaps_base.cpus_per_rank);
        hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, totalcpuset);
        if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) {
            char tmp1[1024], tmp2[1024];
            if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1),
                                                               node->topology, totalcpuset)) {
                opal_output(orte_rmaps_base_framework.framework_output,
                            "%s PROC %s ON %s IS NOT BOUND",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&proc->name), node->name);
            } else {
                opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology, totalcpuset);
                opal_output(orte_rmaps_base_framework.framework_output,
                            "%s BOUND PROC %s[%s] TO %s: %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&proc->name), node->name,
                            tmp1, tmp2);
            }
        }
    }
    hwloc_bitmap_free(totalcpuset);
    
    return ORTE_SUCCESS;
}
Example #20
0
int main(void)
{
  hwloc_topology_t topology;
#ifdef HWLOC_HAVE_CPU_SET
  unsigned depth;
  hwloc_bitmap_t hwlocset;
  cpu_set_t schedset;
  hwloc_obj_t obj;
  int err;
#endif /* HWLOC_HAVE_CPU_SET */

  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);

#ifdef HWLOC_HAVE_CPU_SET

  depth = hwloc_topology_get_depth(topology);

  hwlocset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology));
  hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_setaffinity(0, sizeof(schedset));
#else
  err = sched_setaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwloc_bitmap_free(hwlocset);

#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_getaffinity(0, sizeof(schedset));
#else
  err = sched_getaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwlocset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
  assert(hwloc_bitmap_isincluded(hwlocset, hwloc_topology_get_complete_cpuset(topology)));
  hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_online_cpuset(topology));
  hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_allowed_cpuset(topology));
  assert(hwloc_bitmap_iszero(hwlocset));
  hwloc_bitmap_free(hwlocset);

  obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1) - 1);
  assert(obj);
  assert(obj->type == HWLOC_OBJ_PU);

  hwlocset = hwloc_bitmap_dup(obj->cpuset);
  hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_setaffinity(0, sizeof(schedset));
#else
  err = sched_setaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwloc_bitmap_free(hwlocset);

#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_getaffinity(0, sizeof(schedset));
#else
  err = sched_getaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwlocset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
  assert(hwloc_bitmap_isequal(hwlocset, obj->cpuset));
  hwloc_bitmap_free(hwlocset);

#endif /* HWLOC_HAVE_CPU_SET */

  hwloc_topology_destroy(topology);
  return 0;
}
Example #21
0
int main(int argc, char *argv[])
{
    const struct hwloc_topology_support *support;
    hwloc_topology_t topology;
    hwloc_const_bitmap_t topocpuset;
    hwloc_bitmap_t cpuset;
    unsigned long flags = 0;
    DIR *dir;
    struct dirent *dirent;
    int show_all = 0;
    int show_threads = 0;
    int get_last_cpu_location = 0;
    char *callname;
    char *pidcmd = NULL;
    int err;
    int opt;

    callname = strrchr(argv[0], '/');
    if (!callname)
        callname = argv[0];
    else
        callname++;
    /* skip argv[0], handle options */
    argc--;
    argv++;

    hwloc_utils_check_api_version(callname);

    while (argc >= 1) {
        opt = 0;
        if (!strcmp(argv[0], "-a"))
            show_all = 1;
        else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) {
            logical = 1;
        } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) {
            logical = 0;
        } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) {
            show_cpuset = 1;
        } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) {
            get_last_cpu_location = 1;
        } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) {
#ifdef HWLOC_LINUX_SYS
            show_threads = 1;
#else
            fprintf (stderr, "Listing threads is currently only supported on Linux\n");
#endif
        } else if (!strcmp (argv[0], "--whole-system")) {
            flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
        } else if (!strcmp (argv[0], "--pid-cmd")) {
            if (argc < 2) {
                usage(callname, stdout);
                exit(EXIT_FAILURE);
            }
            pidcmd = argv[1];
            opt = 1;
        } else {
            fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
            usage (callname, stderr);
            exit(EXIT_FAILURE);
        }
        argc -= opt+1;
        argv += opt+1;
    }

    err = hwloc_topology_init(&topology);
    if (err)
        goto out;

    hwloc_topology_set_flags(topology, flags);

    err = hwloc_topology_load(topology);
    if (err)
        goto out_with_topology;

    support = hwloc_topology_get_support(topology);

    if (get_last_cpu_location) {
        if (!support->cpubind->get_proc_last_cpu_location)
            goto out_with_topology;
    } else {
        if (!support->cpubind->get_proc_cpubind)
            goto out_with_topology;
    }

    topocpuset = hwloc_topology_get_topology_cpuset(topology);

    dir  = opendir("/proc");
    if (!dir)
        goto out_with_topology;

    cpuset = hwloc_bitmap_alloc();
    if (!cpuset)
        goto out_with_dir;

    while ((dirent = readdir(dir))) {
        long pid_number;
        hwloc_pid_t pid;
        char pidoutput[1024];
        char *end;
        char name[64] = "";
        /* management of threads */
        unsigned boundthreads = 0, i;
        long *tids = NULL; /* NULL if process is not threaded */
        hwloc_bitmap_t *tidcpusets = NULL;

        pid_number = strtol(dirent->d_name, &end, 10);
        if (*end)
            /* Not a number */
            continue;

        pid = hwloc_pid_from_number(pid_number, 0);

#ifdef HWLOC_LINUX_SYS
        {
            unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;
            char *path;
            int file;
            ssize_t n;

            path = malloc(pathlen);
            snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
            file = open(path, O_RDONLY);
            free(path);

            if (file >= 0) {
                n = read(file, name, sizeof(name) - 1);
                close(file);

                if (n <= 0)
                    /* Ignore kernel threads and errors */
                    continue;

                name[n] = 0;
            }
        }
#endif /* HWLOC_LINUX_SYS */

        if (show_threads) {
#ifdef HWLOC_LINUX_SYS
            /* check if some threads must be displayed */
            unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1;
            char *path;
            DIR *taskdir;

            path = malloc(pathlen);
            snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
            taskdir = opendir(path);
            if (taskdir) {
                struct dirent *taskdirent;
                long tid;
                unsigned n = 0;
                /* count threads */
                while ((taskdirent = readdir(taskdir))) {
                    tid = strtol(taskdirent->d_name, &end, 10);
                    if (*end)
                        /* Not a number */
                        continue;
                    n++;
                }
                if (n > 1) {
                    /* if there's more than one thread, see if some are bound */
                    tids = malloc(n * sizeof(*tids));
                    tidcpusets = calloc(n+1, sizeof(*tidcpusets));
                    if (tids && tidcpusets) {
                        /* reread the directory but gather info now */
                        rewinddir(taskdir);
                        i = 0;
                        while ((taskdirent = readdir(taskdir))) {
                            tid = strtol(taskdirent->d_name, &end, 10);
                            if (*end)
                                /* Not a number */
                                continue;
                            if (get_last_cpu_location) {
                                if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset))
                                    continue;
                            } else {
                                if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset))
                                    continue;
                            }
                            hwloc_bitmap_and(cpuset, cpuset, topocpuset);
                            tids[i] = tid;
                            tidcpusets[i] = hwloc_bitmap_dup(cpuset);
                            i++;
                            if (hwloc_bitmap_iszero(cpuset))
                                continue;
                            if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all)
                                continue;
                            boundthreads++;
                        }
                    } else {
                        /* failed to alloc, behave as if there were no threads */
                        free(tids);
                        tids = NULL;
                        free(tidcpusets);
                        tidcpusets = NULL;
                    }
                }
                closedir(taskdir);
            }
#endif /* HWLOC_LINUX_SYS */
        }

        if (get_last_cpu_location) {
            if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0))
                continue;
        } else {
            if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0))
                continue;
        }

        hwloc_bitmap_and(cpuset, cpuset, topocpuset);
        if (hwloc_bitmap_iszero(cpuset))
            continue;

        /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */
        if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all)
            continue;

        pidoutput[0] = '\0';
        if (pidcmd) {
            char *cmd;
            FILE *file;
            cmd = malloc(strlen(pidcmd)+1+5+2+1);
            sprintf(cmd, "%s %u", pidcmd, pid);
            file = popen(cmd, "r");
            if (file) {
                if (fgets(pidoutput, sizeof(pidoutput), file)) {
                    end = strchr(pidoutput, '\n');
                    if (end)
                        *end = '\0';
                }
                pclose(file);
            }
            free(cmd);
        }

        /* print the process */
        print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0);
        if (tids)
            /* print each tid we found (it's tidcpuset isn't NULL anymore) */
            for(i=0; tidcpusets[i] != NULL; i++) {
                print_task(topology, tids[i], "", tidcpusets[i], NULL, 1);
                hwloc_bitmap_free(tidcpusets[i]);
            }

        /* free threads stuff */
        free(tidcpusets);
        free(tids);
    }

    err = 0;
    hwloc_bitmap_free(cpuset);

out_with_dir:
    closedir(dir);
out_with_topology:
    hwloc_topology_destroy(topology);
out:
    return err;
}
Example #22
0
/* user to have to play with the cgroup hierarchy to modify it */
extern int task_cgroup_cpuset_set_task_affinity(slurmd_job_t *job)
{
	int fstatus = SLURM_ERROR;

#ifndef HAVE_HWLOC

	error("task/cgroup: plugin not compiled with hwloc support, "
	      "skipping affinity.");
	return fstatus;

#else
	uint32_t i;
	uint32_t nldoms;
	uint32_t nsockets;
	uint32_t ncores;
	uint32_t npus;
	uint32_t nobj;

	uint32_t pfirst,plast;
	uint32_t taskid = job->envtp->localid;
	uint32_t jntasks = job->node_tasks;
	uint32_t jnpus = jntasks * job->cpus_per_task;
	pid_t    pid = job->envtp->task_pid;

	cpu_bind_type_t bind_type;
	int verbose;

	hwloc_topology_t topology;
#if HWLOC_API_VERSION <= 0x00010000
	hwloc_cpuset_t cpuset,ct;
#else
	hwloc_bitmap_t cpuset,ct;
#endif
	hwloc_obj_t obj;
	struct hwloc_obj *pobj;
	hwloc_obj_type_t hwtype;
	hwloc_obj_type_t req_hwtype;
	int hwdepth;

	size_t tssize;
	cpu_set_t ts;

	bind_type = job->cpu_bind_type ;
	if (conf->task_plugin_param & CPU_BIND_VERBOSE ||
	    bind_type & CPU_BIND_VERBOSE)
		verbose = 1 ;

	if (bind_type & CPU_BIND_NONE) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting no affinity",
			     taskid);
		return 0;
	} else if (bind_type & CPU_BIND_TO_THREADS) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "thread level binding",taskid);
		req_hwtype = HWLOC_OBJ_PU;
	} else if (bind_type & CPU_BIND_TO_CORES) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "core level binding",taskid);
		req_hwtype = HWLOC_OBJ_CORE;
	} else if (bind_type & CPU_BIND_TO_SOCKETS) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "socket level binding",taskid);
		req_hwtype = HWLOC_OBJ_SOCKET;
	} else if (bind_type & CPU_BIND_TO_LDOMS) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "ldom level binding",taskid);
		req_hwtype = HWLOC_OBJ_NODE;
	} else {
		if (verbose)
			info("task/cgroup: task[%u] using core level binding"
			     " by default",taskid);
		req_hwtype = HWLOC_OBJ_CORE;
	}

	/* Allocate and initialize hwloc objects */
	hwloc_topology_init(&topology);
#if HWLOC_API_VERSION <= 0x00010000
	cpuset = hwloc_cpuset_alloc() ;
#else
	cpuset = hwloc_bitmap_alloc() ;
#endif

	/*
	 * Perform the topology detection. It will only get allowed PUs.
	 * Detect in the same time the granularity to use for binding.
	 * The granularity can be relaxed from threads to cores if enough
	 * cores are available as with hyperthread support, ntasks-per-core
	 * param can let us have access to more threads per core for each
	 * task
	 * Revert back to machine granularity if no finer-grained granularity
	 * matching the request is found. This will result in no affinity
	 * applied.
	 * The detected granularity will be used to find where to best place
	 * the task, then the cpu_bind option will be used to relax the
	 * affinity constraint and use more PUs. (i.e. use a core granularity
	 * to dispatch the tasks across the sockets and then provide access
	 * to each task to the cores of its socket.)
	 */
	hwloc_topology_load(topology);
	npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						   HWLOC_OBJ_PU);
	ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_CORE);
	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						       HWLOC_OBJ_SOCKET);
	nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_NODE);
	hwtype = HWLOC_OBJ_MACHINE;
	nobj = 1;
	if (npus >= jnpus || bind_type & CPU_BIND_TO_THREADS) {
		hwtype = HWLOC_OBJ_PU;
		nobj = npus;
	}
	if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) {
		hwtype = HWLOC_OBJ_CORE;
		nobj = ncores;
	}
	if (nsockets >= jntasks &&
	     bind_type & CPU_BIND_TO_SOCKETS) {
		hwtype = HWLOC_OBJ_SOCKET;
		nobj = nsockets;
	}
	/*
	 * HWLOC returns all the NUMA nodes available regardless of the
	 * number of underlying sockets available (regardless of the allowed
	 * resources). So there is no guarantee that each ldom will be populated
	 * with usable sockets. So add a simple check that at least ensure that
	 * we have as many sockets as ldoms before moving to ldoms granularity
	 */
	if (nldoms >= jntasks &&
	     nsockets >= nldoms &&
	     bind_type & CPU_BIND_TO_LDOMS) {
		hwtype = HWLOC_OBJ_NODE;
		nobj = nldoms;
	}

	/*
	 * Perform a block binding on the detected object respecting the
	 * granularity.
	 * If not enough objects to do the job, revert to no affinity mode
	 */
	if (hwloc_compare_types(hwtype,HWLOC_OBJ_MACHINE) == 0) {

		info("task/cgroup: task[%u] disabling affinity because of %s "
		     "granularity",taskid,hwloc_obj_type_string(hwtype));

	} else if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0 &&
		    jnpus > nobj) {

		info("task/cgroup: task[%u] not enough %s objects, disabling "
		     "affinity",taskid,hwloc_obj_type_string(hwtype));

	} else {

		if (verbose) {
			info("task/cgroup: task[%u] using %s granularity",
			     taskid,hwloc_obj_type_string(hwtype));
		}
		if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
			/* cores or threads granularity */
			pfirst = taskid *  job->cpus_per_task ;
			plast = pfirst + job->cpus_per_task - 1;
		} else {
			/* sockets or ldoms granularity */
			pfirst = taskid;
			plast = pfirst;
		}

		hwdepth = hwloc_get_type_depth(topology,hwtype);
		for (i = pfirst; i <= plast && i < nobj ; i++) {
			obj = hwloc_get_obj_by_depth(topology,hwdepth,(int)i);

			/* if requested binding overlap the granularity */
			/* use the ancestor cpuset instead of the object one */
			if (hwloc_compare_types(hwtype,req_hwtype) > 0) {

				/* Get the parent object of req_hwtype or the */
				/* one just above if not found (meaning of >0)*/
				/* (useful for ldoms binding with !NUMA nodes)*/
				pobj = obj->parent;
				while (pobj != NULL &&
					hwloc_compare_types(pobj->type,
							    req_hwtype) > 0)
					pobj = pobj->parent;

				if (pobj != NULL) {
					if (verbose)
						info("task/cgroup: task[%u] "
						     "higher level %s found",
						     taskid,
						     hwloc_obj_type_string(
							     pobj->type));
#if HWLOC_API_VERSION <= 0x00010000
					ct = hwloc_cpuset_dup(pobj->
							      allowed_cpuset);
					hwloc_cpuset_or(cpuset,cpuset,ct);
					hwloc_cpuset_free(ct);
#else
					ct = hwloc_bitmap_dup(pobj->
							      allowed_cpuset);
					hwloc_bitmap_or(cpuset,cpuset,ct);
					hwloc_bitmap_free(ct);
#endif
				} else {
					/* should not be executed */
					if (verbose)
						info("task/cgroup: task[%u] "
						     "no higher level found",
						     taskid);
#if HWLOC_API_VERSION <= 0x00010000
					ct = hwloc_cpuset_dup(obj->
							      allowed_cpuset);
					hwloc_cpuset_or(cpuset,cpuset,ct);
					hwloc_cpuset_free(ct);
#else
					ct = hwloc_bitmap_dup(obj->
							      allowed_cpuset);
					hwloc_bitmap_or(cpuset,cpuset,ct);
					hwloc_bitmap_free(ct);
#endif
				}

			} else {
#if HWLOC_API_VERSION <= 0x00010000
				ct = hwloc_cpuset_dup(obj->allowed_cpuset);
				hwloc_cpuset_or(cpuset,cpuset,ct);
				hwloc_cpuset_free(ct);
#else
				ct = hwloc_bitmap_dup(obj->allowed_cpuset);
				hwloc_bitmap_or(cpuset,cpuset,ct);
				hwloc_bitmap_free(ct);
#endif
			}
		}

		char *str;
#if HWLOC_API_VERSION <= 0x00010000
		hwloc_cpuset_asprintf(&str,cpuset);
#else
		hwloc_bitmap_asprintf(&str,cpuset);
#endif
		tssize = sizeof(cpu_set_t);
		if (hwloc_cpuset_to_glibc_sched_affinity(topology,cpuset,
							  &ts,tssize) == 0) {
			fstatus = SLURM_SUCCESS;
			if (sched_setaffinity(pid,tssize,&ts)) {
				error("task/cgroup: task[%u] unable to set "
				      "taskset '%s'",taskid,str);
				fstatus = SLURM_ERROR;
			} else if (verbose) {
				info("task/cgroup: task[%u] taskset '%s' is set"
				     ,taskid,str);
			}
		} else {
			error("task/cgroup: task[%u] unable to build "
			      "taskset '%s'",taskid,str);
			fstatus = SLURM_ERROR;
		}
		free(str);

	}

	/* Destroy hwloc objects */
#if HWLOC_API_VERSION <= 0x00010000
	hwloc_cpuset_free(cpuset);
#else
	hwloc_bitmap_free(cpuset);
#endif
	hwloc_topology_destroy(topology);

	return fstatus;
#endif

}
int main(void)
{
    hwloc_topology_t topology;
    hwloc_bitmap_t set;
    hwloc_const_bitmap_t cset;
    hwloc_membind_policy_t policy;
    const struct hwloc_topology_support *support;
    int nbnodes;
    hwloc_obj_t obj;
    char *buffer, *s;
    unsigned i;
    int err;

    /* create a topology */
    err = hwloc_topology_init(&topology);
    if (err < 0) {
        fprintf(stderr, "failed to initialize the topology\n");
        return EXIT_FAILURE;
    }
    err = hwloc_topology_load(topology);
    if (err < 0) {
        fprintf(stderr, "failed to load the topology\n");
        hwloc_topology_destroy(topology);
        return EXIT_FAILURE;
    }

    /* retrieve the entire set of NUMA nodes and count them */
    cset = hwloc_topology_get_topology_nodeset(topology);
    nbnodes = hwloc_bitmap_weight(cset);
    if (nbnodes <= 0) {
        /* nbnodes may be -1 when there's no NUMA information,
         * or 0 when the machine is known to be non-NUMA */
        printf("this machine is not NUMA, nothing to do\n");
        hwloc_topology_destroy(topology);
        return EXIT_SUCCESS;
    }
    printf("there are %d nodes in the machine\n", nbnodes);

    /* get the process memory binding as a nodeset */
    set = hwloc_bitmap_alloc();
    if (!set) {
        fprintf(stderr, "failed to allocate a bitmap\n");
        hwloc_topology_destroy(topology);
        return EXIT_FAILURE;
    }
    err = hwloc_get_membind_nodeset(topology, set, &policy, 0);
    if (err < 0) {
        fprintf(stderr, "failed to retrieve my memory binding and policy\n");
        hwloc_topology_destroy(topology);
        hwloc_bitmap_free(set);
        return EXIT_FAILURE;
    }

    /* print the corresponding NUMA nodes */
    hwloc_bitmap_asprintf(&s, set);
    printf("bound to nodeset %s with contains:\n", s);
    free(s);
    hwloc_bitmap_foreach_begin(i, set) {
        obj = hwloc_get_numanode_obj_by_os_index(topology, i);
        printf("  node #%u (OS index %u) with %lld bytes of memory\n",
               obj->logical_index, i, (unsigned long long) obj->memory.local_memory);
    }
int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
{
    hwloc_obj_type_t hwb, hwm;
    unsigned clvl=0, clvm=0;
    opal_binding_policy_t bind;
    orte_mapping_policy_t map;
    orte_node_t *node;
    int i, rc;
    struct hwloc_topology_support *support;
    bool force_down = false;
    hwloc_cpuset_t totalcpuset;
    int bind_depth, map_depth;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps: compute bindings for job %s with policy %s",
                        ORTE_JOBID_PRINT(jdata->jobid),
                        opal_hwloc_base_print_binding(jdata->map->binding));

    map = ORTE_GET_MAPPING_POLICY(jdata->map->mapping);
    bind = OPAL_GET_BINDING_POLICY(jdata->map->binding);

    if (ORTE_MAPPING_BYUSER == map) {
        /* user specified binding by rankfile - nothing for us to do */
        return ORTE_SUCCESS;
    }

    if (OPAL_BIND_TO_CPUSET == bind) {
        int rc;
        /* cpuset was given - setup the bindings */
        if (ORTE_SUCCESS != (rc = bind_to_cpuset(jdata))) {
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (OPAL_BIND_TO_NONE == bind) {
        /* no binding requested */
        return ORTE_SUCCESS;
    }

    if (OPAL_BIND_TO_BOARD == bind) {
        /* doesn't do anything at this time */
        return ORTE_SUCCESS;
    }

    /* binding requested - convert the binding level to the hwloc obj type */
    switch (bind) {
    case OPAL_BIND_TO_NUMA:
        hwb = HWLOC_OBJ_NODE;
        break;
    case OPAL_BIND_TO_SOCKET:
        hwb = HWLOC_OBJ_SOCKET;
        break;
    case OPAL_BIND_TO_L3CACHE:
        hwb = HWLOC_OBJ_CACHE;
        clvl = 3;
        break;
    case OPAL_BIND_TO_L2CACHE:
        hwb = HWLOC_OBJ_CACHE;
        clvl = 2;
        break;
    case OPAL_BIND_TO_L1CACHE:
        hwb = HWLOC_OBJ_CACHE;
        clvl = 1;
        break;
    case OPAL_BIND_TO_CORE:
        hwb = HWLOC_OBJ_CORE;
        break;
    case OPAL_BIND_TO_HWTHREAD:
        hwb = HWLOC_OBJ_PU;
        break;
    default:
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    /* do the same for the mapping policy */
    switch (map) {
    case ORTE_MAPPING_BYNODE:
    case ORTE_MAPPING_BYSLOT:
    case ORTE_MAPPING_SEQ:
        hwm = HWLOC_OBJ_MACHINE;
        break;
    case ORTE_MAPPING_BYDIST:
    case ORTE_MAPPING_BYNUMA:
        hwm = HWLOC_OBJ_NODE;
        break;
    case ORTE_MAPPING_BYSOCKET:
        hwm = HWLOC_OBJ_SOCKET;
        break;
    case ORTE_MAPPING_BYL3CACHE:
        hwm = HWLOC_OBJ_CACHE;
        clvm = 3;
        break;
    case ORTE_MAPPING_BYL2CACHE:
        hwm = HWLOC_OBJ_CACHE;
        clvm = 2;
        break;
    case ORTE_MAPPING_BYL1CACHE:
        hwm = HWLOC_OBJ_CACHE;
        clvm = 1;
        break;
    case ORTE_MAPPING_BYCORE:
        hwm = HWLOC_OBJ_CORE;
        break;
    case ORTE_MAPPING_BYHWTHREAD:
        hwm = HWLOC_OBJ_PU;
        break;
    default:
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    /* if the job was mapped by the corresponding target, then
     * we bind in place
     *
     * otherwise, we have to bind either up or down the hwloc
     * tree. If we are binding upwards (e.g., mapped to hwthread
     * but binding to core), then we just climb the tree to find
     * the first matching object.
     *
     * if we are binding downwards (e.g., mapped to node and bind
     * to core), then we have to do a round-robin assigment of
     * procs to the resources below.
     */

    if (ORTE_MAPPING_BYDIST == map) {
        int rc = ORTE_SUCCESS;
        if (OPAL_BIND_TO_NUMA == bind) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps: bindings for job %s - dist to numa",
                                ORTE_JOBID_PRINT(jdata->jobid));
            if (ORTE_SUCCESS != (rc = bind_in_place(jdata, HWLOC_OBJ_NODE, 0))) {
                ORTE_ERROR_LOG(rc);
            }
        } else if (OPAL_BIND_TO_NUMA < bind) {
            /* bind every proc downwards */
            force_down = true;
            goto execute;
        }
        /* if the binding policy is less than numa, then we are unbound - so
         * just ignore this and return (should have been caught in prior
         * tests anyway as only options meeting that criteria are "none"
         * and "board")
         */
        return rc;
    }

    /* now deal with the remaining binding policies based on hardware */
    if (bind == map) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: bindings for job %s - bind in place",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = bind_in_place(jdata, hwb, clvl))) {
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    /* we need to handle the remaining binding options on a per-node
     * basis because different nodes could potentially have different
     * topologies, with different relative depths for the two levels
     */
 execute:
    /* initialize */
    totalcpuset = hwloc_bitmap_alloc();

    for (i=0; i < jdata->map->nodes->size; i++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
            continue;
        }
        if (!orte_do_not_launch) {
            /* if we don't want to launch, then we are just testing the system,
             * so ignore questions about support capabilities
             */
            support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology);
            /* check if topology supports cpubind - have to be careful here
             * as Linux doesn't currently support thread-level binding. This
             * may change in the future, though, and it isn't clear how hwloc
             * interprets the current behavior. So check both flags to be sure.
             */
            if (!support->cpubind->set_thisproc_cpubind &&
                !support->cpubind->set_thisthread_cpubind) {
                if (!OPAL_BINDING_REQUIRED(jdata->map->binding) ||
                    !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                    /* we are not required to bind, so ignore this */
                    continue;
                }
                orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name);
                hwloc_bitmap_free(totalcpuset);
                return ORTE_ERR_SILENT;
            }
            /* check if topology supports membind - have to be careful here
             * as hwloc treats this differently than I (at least) would have
             * expected. Per hwloc, Linux memory binding is at the thread,
             * and not process, level. Thus, hwloc sets the "thisproc" flag
             * to "false" on all Linux systems, and uses the "thisthread" flag
             * to indicate binding capability - don't warn if the user didn't
             * specifically request binding
             */
            if (!support->membind->set_thisproc_membind &&
                !support->membind->set_thisthread_membind &&
                OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
                if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name);
                    membind_warned = true;
                } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) {
                    orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name);
                    hwloc_bitmap_free(totalcpuset);
                    return ORTE_ERR_SILENT;
                }
            }
        }

        /* some systems do not report cores, and so we can get a situation where our
         * default binding policy will fail for no necessary reason. So if we are
         * computing a binding due to our default policy, and no cores are found
         * on this node, just silently skip it - we will not bind
         */
        if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) &&
            HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "Unable to bind-to core by default on node %s as no cores detected",
                                node->name);
            continue;
        }

        /* we share topologies in order
         * to save space, so we need to reset the usage info to reflect
         * our own current state
         */
        reset_usage(node, jdata->jobid);

        if (force_down) {
            if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
        } else {
            /* determine the relative depth on this node */
            if (HWLOC_OBJ_CACHE == hwb) {
                /* must use a unique function because blasted hwloc
                 * just doesn't deal with caches very well...sigh
                 */
                bind_depth = hwloc_get_cache_type_depth(node->topology, clvl, -1);
            } else {
                bind_depth = hwloc_get_type_depth(node->topology, hwb);
            }
            if (0 > bind_depth) {
                /* didn't find such an object */
                orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
                               true, hwloc_obj_type_string(hwb), node->name);
                return ORTE_ERR_SILENT;
            }
            if (HWLOC_OBJ_CACHE == hwm) {
                /* must use a unique function because blasted hwloc
                 * just doesn't deal with caches very well...sigh
                 */
                map_depth = hwloc_get_cache_type_depth(node->topology, clvm, -1);
            } else {
                map_depth = hwloc_get_type_depth(node->topology, hwm);
            }
            if (0 > map_depth) {
                /* didn't find such an object */
                orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
                               true, hwloc_obj_type_string(hwm), node->name);
                return ORTE_ERR_SILENT;
            }
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "%s bind_depth: %d map_depth %d",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                bind_depth, map_depth);
            if (bind_depth > map_depth) {
                if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            } else {
                if (ORTE_SUCCESS != (rc = bind_upwards(jdata, node, hwb, clvl))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            }
        }
    }

    return ORTE_SUCCESS;
}
Example #25
0
int bind_myself_to_core(hwloc_topology_t topology, int id){
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj;
  char *str;
  int binding_res;
  int depth = hwloc_topology_get_depth(topology);
  int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
  int my_core;
  int nb_threads = get_nb_threads();
  /* printf("depth=%d\n",depth); */

  switch (mapping_policy){
  case SCATTER:
    my_core = id*(nb_cores/nb_threads);
    break;
  default:
    if(verbose_level>=WARNING){
      printf("Wrong scheduling policy. Using COMPACT\n");
    }
  case COMPACT:
    my_core = id%nb_cores;
  }

    if(verbose_level>=INFO){
       printf("Mapping thread %d on core %d\n",id,my_core);
   }

    /* Get my core. */
    obj = hwloc_get_obj_by_depth(topology, depth-1, my_core);
    if (obj) {
      /* Get a copy of its cpuset that we may modify. */
      cpuset = hwloc_bitmap_dup(obj->cpuset);

      /* Get only one logical processor (in case the core is
	 SMT/hyperthreaded). */
      hwloc_bitmap_singlify(cpuset);


      /*hwloc_bitmap_asprintf(&str, cpuset);
      printf("Binding thread %d to cpuset %s\n", my_core,str);
      FREE(str);
      */

      /* And try  to bind ourself there. */
      binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD);
      if (binding_res == -1){
	int error = errno;
	hwloc_bitmap_asprintf(&str, obj->cpuset);
	if(verbose_level>=WARNING)
	  printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error));
	free(str); /* str is allocated by hlwoc, free it normally*/
	return 0;
      }
      /* FREE our cpuset copy */
      hwloc_bitmap_free(cpuset);
      return 1;
    }else{
      if(verbose_level>=WARNING)
	printf("No valid object for core id %d!\n",my_core);
      return 0;
    }
}
Example #26
0
static void add_process_objects(hwloc_topology_t topology)
{
#ifdef HAVE_DIRENT_H
  hwloc_obj_t root;
  hwloc_bitmap_t cpuset;
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_t task_cpuset;
#endif /* HWLOC_LINUX_SYS */
  DIR *dir;
  struct dirent *dirent;
  const struct hwloc_topology_support *support;

  root = hwloc_get_root_obj(topology);

  support = hwloc_topology_get_support(topology);

  if (!support->cpubind->get_proc_cpubind)
    return;

  dir  = opendir("/proc");
  if (!dir)
    return;
  cpuset = hwloc_bitmap_alloc();
#ifdef HWLOC_LINUX_SYS
  task_cpuset = hwloc_bitmap_alloc();
#endif /* HWLOC_LINUX_SYS */

  while ((dirent = readdir(dir))) {
    long local_pid_number;
    hwloc_pid_t local_pid;
    char *end;
    char name[64];
    int proc_cpubind;

    local_pid_number = strtol(dirent->d_name, &end, 10);
    if (*end)
      /* Not a number */
      continue;

    snprintf(name, sizeof(name), "%ld", local_pid_number);

    local_pid = hwloc_pid_from_number(local_pid_number, 0);

    proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1;

#ifdef HWLOC_LINUX_SYS
    {
      /* Get the process name */
      char *path;
      unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;
      char cmd[64], *c;
      int file;
      ssize_t n;

      path = malloc(pathlen);
      snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
      file = open(path, O_RDONLY);
      free(path);

      if (file >= 0) {
        n = read(file, cmd, sizeof(cmd) - 1);
        close(file);

        if (n <= 0)
          /* Ignore kernel threads and errors */
          continue;

        cmd[n] = 0;
        if ((c = strchr(cmd, ' ')))
          *c = 0;
        snprintf(name, sizeof(name), "%ld %s", local_pid_number, cmd);
      }
    }

    {
      /* Get threads */
      char *path;
      unsigned pathlen = 6+strlen(dirent->d_name) + 1 + 4 + 1;
      DIR *task_dir;
      struct dirent *task_dirent;

      path = malloc(pathlen);
      snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
      task_dir = opendir(path);
      free(path);

      if (task_dir) {
        while ((task_dirent = readdir(task_dir))) {
          long local_tid;
          char *task_end;
          char task_name[64];

          local_tid = strtol(task_dirent->d_name, &task_end, 10);
          if (*task_end)
            /* Not a number, or the main task */
            continue;

          if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset))
            continue;

          if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset))
            continue;

          snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid);

          insert_task(topology, task_cpuset, task_name);
        }
        closedir(task_dir);
      }
    }
#endif /* HWLOC_LINUX_SYS */

    if (!proc_cpubind)
      continue;

    if (hwloc_bitmap_isincluded(root->cpuset, cpuset))
      continue;

    insert_task(topology, cpuset, name);
  }

  hwloc_bitmap_free(cpuset);
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_free(task_cpuset);
#endif /* HWLOC_LINUX_SYS */
  closedir(dir);
#endif /* HAVE_DIRENT_H */
}
Example #27
0
static void add_process_objects(hwloc_topology_t topology)
{
#ifdef HAVE_DIRENT_H
  hwloc_obj_t root;
  hwloc_bitmap_t cpuset;
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_t task_cpuset;
#endif /* HWLOC_LINUX_SYS */
  DIR *dir;
  struct dirent *dirent;
  const struct hwloc_topology_support *support;

  root = hwloc_get_root_obj(topology);

  support = hwloc_topology_get_support(topology);

  if (!support->cpubind->get_proc_cpubind)
    return;

  dir  = opendir("/proc");
  if (!dir)
    return;
  cpuset = hwloc_bitmap_alloc();
#ifdef HWLOC_LINUX_SYS
  task_cpuset = hwloc_bitmap_alloc();
#endif /* HWLOC_LINUX_SYS */

  while ((dirent = readdir(dir))) {
    long local_pid_number;
    hwloc_pid_t local_pid;
    char *end;
    char name[80];
    int proc_cpubind;

    local_pid_number = strtol(dirent->d_name, &end, 10);
    if (*end)
      /* Not a number */
      continue;

    snprintf(name, sizeof(name), "%ld", local_pid_number);

    local_pid = hwloc_pid_from_number(local_pid_number, 0);

    proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1;

#ifdef HWLOC_LINUX_SYS
    {
      char comm[16];
      char *path;
      size_t pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;

      path = malloc(pathlen);

      {
        /* Get the process name */
        char cmd[64];
        int file;
        ssize_t n;

        snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
        file = open(path, O_RDONLY);
        if (file < 0) {
          /* Ignore errors */
          free(path);
          continue;
        }
        n = read(file, cmd, sizeof(cmd));
        close(file);

        if (n <= 0) {
          /* Ignore kernel threads and errors */
          free(path);
          continue;
        }

        snprintf(path, pathlen, "/proc/%s/comm", dirent->d_name);
        file = open(path, O_RDONLY);

        if (file >= 0) {
          n = read(file, comm, sizeof(comm) - 1);
          close(file);
          if (n > 0) {
            comm[n] = 0;
            if (n > 1 && comm[n-1] == '\n')
              comm[n-1] = 0;
          } else {
            snprintf(comm, sizeof(comm), "(unknown)");
          }
        } else {
          /* Old kernel, have to look at old file */
          char stats[32];
          char *parenl = NULL, *parenr;

          snprintf(path, pathlen, "/proc/%s/stat", dirent->d_name);
          file = open(path, O_RDONLY);

          if (file < 0) {
            /* Ignore errors */
            free(path);
            continue;
          }

          /* "pid (comm) ..." */
          n = read(file, stats, sizeof(stats) - 1);
          close(file);
          if (n > 0) {
            stats[n] = 0;
            parenl = strchr(stats, '(');
            parenr = strchr(stats, ')');
            if (!parenr)
              parenr = &stats[sizeof(stats)-1];
            *parenr = 0;
          }
          if (!parenl) {
            snprintf(comm, sizeof(comm), "(unknown)");
          } else {
            snprintf(comm, sizeof(comm), "%s", parenl+1);
          }
        }

        snprintf(name, sizeof(name), "%ld %s", local_pid_number, comm);
      }

      {
        /* Get threads */
        DIR *task_dir;
        struct dirent *task_dirent;

        snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
        task_dir = opendir(path);

        if (task_dir) {
          while ((task_dirent = readdir(task_dir))) {
            long local_tid;
            char *task_end;
            const size_t tid_len = sizeof(local_tid)*3+1;
            size_t task_pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1
                                    + strlen(task_dirent->d_name) + 1 + 4 + 1;
            char *task_path;
            int comm_file;
            char task_comm[16] = "";
            char task_name[sizeof(name) + 1 + tid_len + 1 + sizeof(task_comm) + 1];
            ssize_t n;

            local_tid = strtol(task_dirent->d_name, &task_end, 10);
            if (*task_end)
              /* Not a number, or the main task */
              continue;

            task_path = malloc(task_pathlen);
            snprintf(task_path, task_pathlen, "/proc/%s/task/%s/comm",
                     dirent->d_name, task_dirent->d_name);
            comm_file = open(task_path, O_RDONLY);
            free(task_path);

            if (comm_file >= 0) {
              n = read(comm_file, task_comm, sizeof(task_comm) - 1);
              if (n < 0)
                n = 0;
              close(comm_file);
              task_comm[n] = 0;
              if (n > 1 && task_comm[n-1] == '\n')
                task_comm[n-1] = 0;
              if (!strcmp(comm, task_comm))
                /* Same as process comm, do not show it again */
                n = 0;
            } else {
              n = 0;
            }

            if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset))
              continue;

            if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset))
              continue;

            if (n) {
              snprintf(task_name, sizeof(task_name), "%s %li %s", name, local_tid, task_comm);
            } else {
              snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid);
            }

            insert_task(topology, task_cpuset, task_name);
          }
          closedir(task_dir);
        }
      }

      free(path);
    }
#endif /* HWLOC_LINUX_SYS */

    if (!proc_cpubind)
      continue;

    if (hwloc_bitmap_isincluded(root->cpuset, cpuset))
      continue;

    insert_task(topology, cpuset, name);
  }

  hwloc_bitmap_free(cpuset);
#ifdef HWLOC_LINUX_SYS
  hwloc_bitmap_free(task_cpuset);
#endif /* HWLOC_LINUX_SYS */
  closedir(dir);
#endif /* HAVE_DIRENT_H */
}
Example #28
0
int
main (int argc, char *argv[])
{
  int err;
  hwloc_topology_t topology;
  const char *filename = NULL;
  unsigned long flags = HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES | HWLOC_TOPOLOGY_FLAG_ICACHES;
  unsigned long restrict_flags = 0;
  int merge = 0;
  int ignorecache = 0;
  char * callname;
  char * input = NULL;
  enum hwloc_utils_input_format input_format = HWLOC_UTILS_INPUT_DEFAULT;
  enum output_format output_format = LSTOPO_OUTPUT_DEFAULT;
  char *restrictstring = NULL;
  struct lstopo_output loutput;
  int opt;
  unsigned i;

  loutput.overwrite = 0;
  loutput.logical = -1;
  loutput.legend = 1;
  loutput.verbose_mode = LSTOPO_VERBOSE_MODE_DEFAULT;

  for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
    force_orient[i] = LSTOPO_ORIENT_NONE;
  force_orient[HWLOC_OBJ_PU] = LSTOPO_ORIENT_HORIZ;
  force_orient[HWLOC_OBJ_CACHE] = LSTOPO_ORIENT_HORIZ;
  force_orient[HWLOC_OBJ_NUMANODE] = LSTOPO_ORIENT_HORIZ;

  /* enable verbose backends */
  putenv("HWLOC_XML_VERBOSE=1");
  putenv("HWLOC_SYNTHETIC_VERBOSE=1");

#ifdef HAVE_SETLOCALE
  setlocale(LC_ALL, "");
#endif

  callname = strrchr(argv[0], '/');
  if (!callname)
    callname = argv[0];
  else
    callname++;
  /* skip argv[0], handle options */
  argc--;
  argv++;

  err = hwloc_topology_init (&topology);
  if (err)
    return EXIT_FAILURE;

  while (argc >= 1)
    {
      opt = 0;
      if (!strcmp (argv[0], "-v") || !strcmp (argv[0], "--verbose")) {
	loutput.verbose_mode++;
      } else if (!strcmp (argv[0], "-s") || !strcmp (argv[0], "--silent")) {
	loutput.verbose_mode--;
      } else if (!strcmp (argv[0], "-h") || !strcmp (argv[0], "--help")) {
	usage(callname, stdout);
        exit(EXIT_SUCCESS);
      } else if (!strcmp (argv[0], "-f") || !strcmp (argv[0], "--force"))
	loutput.overwrite = 1;
      else if (!strcmp (argv[0], "-l") || !strcmp (argv[0], "--logical"))
	loutput.logical = 1;
      else if (!strcmp (argv[0], "-p") || !strcmp (argv[0], "--physical"))
	loutput.logical = 0;
      else if (!strcmp (argv[0], "-c") || !strcmp (argv[0], "--cpuset"))
	lstopo_show_cpuset = 1;
      else if (!strcmp (argv[0], "-C") || !strcmp (argv[0], "--cpuset-only"))
	lstopo_show_cpuset = 2;
      else if (!strcmp (argv[0], "--taskset")) {
	lstopo_show_taskset = 1;
	if (!lstopo_show_cpuset)
	  lstopo_show_cpuset = 1;
      } else if (!strcmp (argv[0], "--only")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
        if (hwloc_obj_type_sscanf(argv[1], &lstopo_show_only, NULL, NULL, 0) < 0)
	  fprintf(stderr, "Unsupported type `%s' passed to --only, ignoring.\n", argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--ignore")) {
	hwloc_obj_type_t type;
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	if (hwloc_obj_type_sscanf(argv[1], &type, NULL, NULL, 0) < 0)
	  fprintf(stderr, "Unsupported type `%s' passed to --ignore, ignoring.\n", argv[1]);
	else if (type == HWLOC_OBJ_PU)
	  lstopo_ignore_pus = 1;
	else
	  hwloc_topology_ignore_type(topology, type);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--no-caches"))
	ignorecache = 2;
      else if (!strcmp (argv[0], "--no-useless-caches"))
	ignorecache = 1;
      else if (!strcmp (argv[0], "--no-icaches"))
	flags &= ~HWLOC_TOPOLOGY_FLAG_ICACHES;
      else if (!strcmp (argv[0], "--whole-system"))
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
      else if (!strcmp (argv[0], "--no-io"))
	flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
      else if (!strcmp (argv[0], "--no-bridges"))
	flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
      else if (!strcmp (argv[0], "--whole-io"))
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_IO;
      else if (!strcmp (argv[0], "--merge"))
	merge = 1;
      else if (!strcmp (argv[0], "--no-collapse"))
	lstopo_collapse = 0;
      else if (!strcmp (argv[0], "--thissystem"))
	flags |= HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM;
      else if (!strcmp (argv[0], "--restrict")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	restrictstring = strdup(argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--restrict-flags")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	restrict_flags = (unsigned long) strtoull(argv[1], NULL, 0);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--export-synthetic-flags")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	lstopo_export_synthetic_flags = (unsigned long) strtoull(argv[1], NULL, 0);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--horiz"))
	for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
	  force_orient[i] = LSTOPO_ORIENT_HORIZ;
      else if (!strcmp (argv[0], "--vert"))
	for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
	  force_orient[i] = LSTOPO_ORIENT_VERT;
      else if (!strcmp (argv[0], "--rect"))
	for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
	  force_orient[i] = LSTOPO_ORIENT_RECT;
      else if (!strncmp (argv[0], "--horiz=", 8)
	       || !strncmp (argv[0], "--vert=", 7)
	       || !strncmp (argv[0], "--rect=", 7)) {
	enum lstopo_orient_e orient = (argv[0][2] == 'h') ? LSTOPO_ORIENT_HORIZ : (argv[0][2] == 'v') ? LSTOPO_ORIENT_VERT : LSTOPO_ORIENT_RECT;
	char *tmp = argv[0] + ((argv[0][2] == 'h') ? 8 : 7);
	while (tmp) {
	  char *end = strchr(tmp, ',');
	  hwloc_obj_type_t type;
	  if (end)
	    *end = '\0';
	  if (hwloc_obj_type_sscanf(tmp, &type, NULL, NULL, 0) < 0)
	    fprintf(stderr, "Unsupported type `%s' passed to %s, ignoring.\n", tmp, argv[0]);
	  else
	    force_orient[type] = orient;
	  if (!end)
	    break;
	  tmp = end+1;
        }
      }

      else if (!strcmp (argv[0], "--fontsize")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	fontsize = atoi(argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--gridsize")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	gridsize = atoi(argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--no-legend")) {
	loutput.legend = 0;
      }
      else if (!strcmp (argv[0], "--append-legend")) {
	char **tmp;
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	tmp = realloc(lstopo_append_legends, (lstopo_append_legends_nr+1) * sizeof(*lstopo_append_legends));
	if (!tmp) {
	  fprintf(stderr, "Failed to realloc legend append array, legend ignored.\n");
	} else {
	  lstopo_append_legends = tmp;
	  lstopo_append_legends[lstopo_append_legends_nr] = strdup(argv[1]);
	  lstopo_append_legends_nr++;
	}
	opt = 1;
      }

      else if (hwloc_utils_lookup_input_option(argv, argc, &opt,
					       &input, &input_format,
					       callname)) {
	/* nothing to do anymore */

      } else if (!strcmp (argv[0], "--pid")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	lstopo_pid_number = atoi(argv[1]); opt = 1;
      } else if (!strcmp (argv[0], "--ps") || !strcmp (argv[0], "--top"))
        top = 1;
      else if (!strcmp (argv[0], "--version")) {
          printf("%s %s\n", callname, HWLOC_VERSION);
          exit(EXIT_SUCCESS);
      } else if (!strcmp (argv[0], "--output-format") || !strcmp (argv[0], "--of")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
        output_format = parse_output_format(argv[1], callname);
        opt = 1;
      } else {
	if (filename) {
	  fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	} else
	  filename = argv[0];
      }
      argc -= opt+1;
      argv += opt+1;
    }

  if (lstopo_show_only != (hwloc_obj_type_t)-1)
    merge = 0;

  hwloc_topology_set_flags(topology, flags);

  if (ignorecache > 1) {
    hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE);
  } else if (ignorecache) {
    hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_CACHE);
  }
  if (merge)
    hwloc_topology_ignore_all_keep_structure(topology);

  if (input) {
    err = hwloc_utils_enable_input_format(topology, input, &input_format, loutput.verbose_mode > 1, callname);
    if (err)
      return err;
  }

  if (lstopo_pid_number > 0) {
    lstopo_pid = hwloc_pid_from_number(lstopo_pid_number, 0);
    if (hwloc_topology_set_pid(topology, lstopo_pid)) {
      perror("Setting target pid");
      return EXIT_FAILURE;
    }
  }

  /* if the output format wasn't enforced, look at the filename */
  if (filename && output_format == LSTOPO_OUTPUT_DEFAULT) {
    if (!strcmp(filename, "-")
	|| !strcmp(filename, "/dev/stdout")) {
      output_format = LSTOPO_OUTPUT_CONSOLE;
    } else {
      char *dot = strrchr(filename, '.');
      if (dot)
        output_format = parse_output_format(dot+1, callname);
      else {
	fprintf(stderr, "Cannot infer output type for file `%s' without any extension, using default output.\n", filename);
	filename = NULL;
      }
    }
  }

  /* if  the output format wasn't enforced, think a bit about what the user probably want */
  if (output_format == LSTOPO_OUTPUT_DEFAULT) {
    if (lstopo_show_cpuset
        || lstopo_show_only != (hwloc_obj_type_t)-1
        || loutput.verbose_mode != LSTOPO_VERBOSE_MODE_DEFAULT)
      output_format = LSTOPO_OUTPUT_CONSOLE;
  }

  if (input_format == HWLOC_UTILS_INPUT_XML
      && output_format == LSTOPO_OUTPUT_XML) {
    /* must be after parsing output format and before loading the topology */
    putenv("HWLOC_XML_USERDATA_NOT_DECODED=1");
    hwloc_topology_set_userdata_import_callback(topology, hwloc_utils_userdata_import_cb);
    hwloc_topology_set_userdata_export_callback(topology, hwloc_utils_userdata_export_cb);
  }

  err = hwloc_topology_load (topology);
  if (err) {
    fprintf(stderr, "hwloc_topology_load() failed (%s).\n", strerror(errno));
    return EXIT_FAILURE;
  }

  if (top)
    add_process_objects(topology);

  if (restrictstring) {
    hwloc_bitmap_t restrictset = hwloc_bitmap_alloc();
    if (!strcmp (restrictstring, "binding")) {
      if (lstopo_pid_number > 0)
	hwloc_get_proc_cpubind(topology, lstopo_pid, restrictset, HWLOC_CPUBIND_PROCESS);
      else
	hwloc_get_cpubind(topology, restrictset, HWLOC_CPUBIND_PROCESS);
    } else {
      hwloc_bitmap_sscanf(restrictset, restrictstring);
    }
    err = hwloc_topology_restrict (topology, restrictset, restrict_flags);
    if (err) {
      perror("Restricting the topology");
      /* fallthrough */
    }
    hwloc_bitmap_free(restrictset);
    free(restrictstring);
  }

  if (loutput.logical == -1) {
    if (output_format == LSTOPO_OUTPUT_CONSOLE)
      loutput.logical = 1;
    else if (output_format != LSTOPO_OUTPUT_DEFAULT)
      loutput.logical = 0;
  }

  loutput.topology = topology;
  loutput.file = NULL;

  lstopo_populate_userdata(hwloc_get_root_obj(topology));

  if (output_format != LSTOPO_OUTPUT_XML && lstopo_collapse)
    lstopo_add_collapse_attributes(topology);

  switch (output_format) {
    case LSTOPO_OUTPUT_DEFAULT:
#ifdef LSTOPO_HAVE_GRAPHICS
#if CAIRO_HAS_XLIB_SURFACE && defined HWLOC_HAVE_X11_KEYSYM
      if (getenv("DISPLAY")) {
        if (loutput.logical == -1)
          loutput.logical = 0;
        output_x11(&loutput, NULL);
      } else
#endif /* CAIRO_HAS_XLIB_SURFACE */
#ifdef HWLOC_WIN_SYS
      {
        if (loutput.logical == -1)
          loutput.logical = 0;
        output_windows(&loutput, NULL);
      }
#endif
#endif /* !LSTOPO_HAVE_GRAPHICS */
#if !defined HWLOC_WIN_SYS || !defined LSTOPO_HAVE_GRAPHICS
      {
        if (loutput.logical == -1)
          loutput.logical = 1;
        output_console(&loutput, NULL);
      }
#endif
      break;

    case LSTOPO_OUTPUT_CONSOLE:
      output_console(&loutput, filename);
      break;
    case LSTOPO_OUTPUT_SYNTHETIC:
      output_synthetic(&loutput, filename);
      break;
    case LSTOPO_OUTPUT_ASCII:
      output_ascii(&loutput, filename);
      break;
    case LSTOPO_OUTPUT_FIG:
      output_fig(&loutput, filename);
      break;
#ifdef LSTOPO_HAVE_GRAPHICS
# if CAIRO_HAS_PNG_FUNCTIONS
    case LSTOPO_OUTPUT_PNG:
      output_png(&loutput, filename);
      break;
# endif /* CAIRO_HAS_PNG_FUNCTIONS */
# if CAIRO_HAS_PDF_SURFACE
    case LSTOPO_OUTPUT_PDF:
      output_pdf(&loutput, filename);
      break;
# endif /* CAIRO_HAS_PDF_SURFACE */
# if CAIRO_HAS_PS_SURFACE
    case LSTOPO_OUTPUT_PS:
      output_ps(&loutput, filename);
      break;
#endif /* CAIRO_HAS_PS_SURFACE */
#if CAIRO_HAS_SVG_SURFACE
    case LSTOPO_OUTPUT_SVG:
      output_svg(&loutput, filename);
      break;
#endif /* CAIRO_HAS_SVG_SURFACE */
#endif /* LSTOPO_HAVE_GRAPHICS */
    case LSTOPO_OUTPUT_XML:
      output_xml(&loutput, filename);
      break;
    default:
      fprintf(stderr, "file format not supported\n");
      usage(callname, stderr);
      exit(EXIT_FAILURE);
  }

  lstopo_destroy_userdata(hwloc_get_root_obj(topology));
  hwloc_utils_userdata_free_recursive(hwloc_get_root_obj(topology));
  hwloc_topology_destroy (topology);

  for(i=0; i<lstopo_append_legends_nr; i++)
    free(lstopo_append_legends[i]);
  free(lstopo_append_legends);

  return EXIT_SUCCESS;
}
Example #29
0
static void obj_data_dest(opal_hwloc_obj_data_t *ptr)
{
    if (NULL != ptr->available) {
        hwloc_bitmap_free(ptr->available);
    }
}
Example #30
0
int orte_daemon(int argc, char *argv[])
{
    int ret = 0;
    opal_cmd_line_t *cmd_line = NULL;
    char *rml_uri;
    int i;
    opal_buffer_t *buffer;
    char hostname[100];
#if OPAL_ENABLE_FT_CR == 1
    char *tmp_env_var = NULL;
#endif
    
    /* initialize the globals */
    memset(&orted_globals, 0, sizeof(orted_globals));
    /* initialize the singleton died pipe to an illegal value so we can detect it was set */
    orted_globals.singleton_died_pipe = -1;
    /* init the failure orted vpid to an invalid value */
    orted_globals.fail = ORTE_VPID_INVALID;
    
    /* setup to check common command line options that just report and die */
    cmd_line = OBJ_NEW(opal_cmd_line_t);
    if (OPAL_SUCCESS != opal_cmd_line_create(cmd_line, orte_cmd_line_opts)) {
        OBJ_RELEASE(cmd_line);
        exit(1);
    }
    mca_base_cmd_line_setup(cmd_line);
    if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false,
                                                   argc, argv))) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        fprintf(stderr, "Usage: %s [OPTION]...\n%s\n", argv[0], args);
        free(args);
        OBJ_RELEASE(cmd_line);
        return ret;
    }
    
    /*
     * Since this process can now handle MCA/GMCA parameters, make sure to
     * process them.
     */
    mca_base_cmd_line_process_args(cmd_line, &environ, &environ);
    
    /* Ensure that enough of OPAL is setup for us to be able to run */
    /*
     * NOTE: (JJH)
     *  We need to allow 'mca_base_cmd_line_process_args()' to process command
     *  line arguments *before* calling opal_init_util() since the command
     *  line could contain MCA parameters that affect the way opal_init_util()
     *  functions. AMCA parameters are one such option normally received on the
     *  command line that affect the way opal_init_util() behaves.
     *  It is "safe" to call mca_base_cmd_line_process_args() before 
     *  opal_init_util() since mca_base_cmd_line_process_args() does *not*
     *  depend upon opal_init_util() functionality.
     */
    if (OPAL_SUCCESS != opal_init_util(&argc, &argv)) {
        fprintf(stderr, "OPAL failed to initialize -- orted aborting\n");
        exit(1);
    }

    /* save the environment for launch purposes. This MUST be
     * done so that we can pass it to any local procs we
     * spawn - otherwise, those local procs won't see any
     * non-MCA envars that were set in the enviro when the
     * orted was executed - e.g., by .csh
     */
    orte_launch_environ = opal_argv_copy(environ);
    
    /* purge any ess flag set in the environ when we were launched */
    opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
    
    /* if orte_daemon_debug is set, let someone know we are alive right
     * away just in case we have a problem along the way
     */
    if (orted_globals.debug) {
        gethostname(hostname, 100);
        fprintf(stderr, "Daemon was launched on %s - beginning to initialize\n", hostname);
    }
    
    /* check for help request */
    if (orted_globals.help) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        orte_show_help("help-orted.txt", "orted:usage", false,
                       argv[0], args);
        free(args);
        return 1;
    }
#if defined(HAVE_SETSID)
    /* see if we were directed to separate from current session */
    if (orted_globals.set_sid) {
        setsid();
    }
#endif
    /* see if they want us to spin until they can connect a debugger to us */
    i=0;
    while (orted_spin_flag) {
        i++;
        if (1000 < i) i=0;        
    }

#if OPAL_ENABLE_FT_CR == 1
    /* Mark as a tool program */
    (void) mca_base_var_env_name ("opal_cr_is_tool", &tmp_env_var);
    opal_setenv(tmp_env_var,
                "1",
                true, &environ);
    free(tmp_env_var);
#endif

    /* if mapreduce set, flag it */
    if (orted_globals.mapreduce) {
        orte_map_reduce = true;
    }

    /* detach from controlling terminal
     * otherwise, remain attached so output can get to us
     */
    if(!orte_debug_flag &&
       !orte_debug_daemons_flag &&
       orted_globals.daemonize) {
        opal_daemon_init(NULL);
    }
    
    /* Set the flag telling OpenRTE that I am NOT a
     * singleton, but am "infrastructure" - prevents setting
     * up incorrect infrastructure that only a singleton would
     * require.
     */
    if (orted_globals.hnp) {
        if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_HNP))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
    } else {
        if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_DAEMON))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
    }
    /* finalize the OPAL utils. As they are opened again from orte_init->opal_init
     * we continue to have a reference count on them. So we have to finalize them twice...
     */
    opal_finalize_util();

#if OPAL_HAVE_HWLOC
    /* bind ourselves if so directed */
    if (NULL != orte_daemon_cores) {
        char **cores=NULL, tmp[128];
        hwloc_obj_t pu;
        hwloc_cpuset_t ours, pucpus, res;
        int core;

        /* could be a collection of comma-delimited ranges, so
         * use our handy utility to parse it
         */
        orte_util_parse_range_options(orte_daemon_cores, &cores);
        if (NULL != cores) {
            ours = hwloc_bitmap_alloc();
            hwloc_bitmap_zero(ours);
            pucpus = hwloc_bitmap_alloc();
            res = hwloc_bitmap_alloc();
            for (i=0; NULL != cores[i]; i++) {
                core = strtoul(cores[i], NULL, 10);
                if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core, OPAL_HWLOC_LOGICAL))) {
                    /* turn off the show help forwarding as we won't
                     * be able to cycle the event library to send
                     */
                    orte_show_help_finalize();
                    /* the message will now come out locally */
                    orte_show_help("help-orted.txt", "orted:cannot-bind",
                                   true, orte_process_info.nodename,
                                   orte_daemon_cores);
                    ret = ORTE_ERR_NOT_SUPPORTED;
                    goto DONE;
                }
                hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
                hwloc_bitmap_or(res, ours, pucpus);
                hwloc_bitmap_copy(ours, res);
            }
            /* if the result is all zeros, then don't bind */
            if (!hwloc_bitmap_iszero(ours)) {
                (void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0);
                if (opal_hwloc_report_bindings) {
                    opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), opal_hwloc_topology, ours);
                    opal_output(0, "Daemon %s is bound to cores %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
                }
            }
            /* cleanup */
            hwloc_bitmap_free(ours);
            hwloc_bitmap_free(pucpus);
            hwloc_bitmap_free(res);
            opal_argv_free(cores);
        }
    }
#endif

    if ((int)ORTE_VPID_INVALID != orted_globals.fail) {
        orted_globals.abort=false;
        /* some vpid was ordered to fail. The value can be positive
         * or negative, depending upon the desired method for failure,
         * so need to check both here
         */
        if (0 > orted_globals.fail) {
            orted_globals.fail = -1*orted_globals.fail;
            orted_globals.abort = true;
        }
        /* are we the specified vpid? */
        if ((int)ORTE_PROC_MY_NAME->vpid == orted_globals.fail) {
            /* if the user specified we delay, then setup a timer
             * and have it kill us
             */
            if (0 < orted_globals.fail_delay) {
                ORTE_TIMER_EVENT(orted_globals.fail_delay, 0, shutdown_callback, ORTE_SYS_PRI);
                
            } else {
                opal_output(0, "%s is executing clean %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            orted_globals.abort ? "abort" : "abnormal termination");

                /* do -not- call finalize as this will send a message to the HNP
                 * indicating clean termination! Instead, just forcibly cleanup
                 * the local session_dir tree and exit
                 */
                orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
                
                /* if we were ordered to abort, do so */
                if (orted_globals.abort) {
                    abort();
                }
                
                /* otherwise, return with non-zero status */
                ret = ORTE_ERROR_DEFAULT_EXIT_CODE;
                goto DONE;
            }
        }
    }

    /* insert our contact info into our process_info struct so we
     * have it for later use and set the local daemon field to our name
     */
    orte_process_info.my_daemon_uri = orte_rml.get_contact_info();
    ORTE_PROC_MY_DAEMON->jobid = ORTE_PROC_MY_NAME->jobid;
    ORTE_PROC_MY_DAEMON->vpid = ORTE_PROC_MY_NAME->vpid;
    
    /* if I am also the hnp, then update that contact info field too */
    if (ORTE_PROC_IS_HNP) {
        orte_process_info.my_hnp_uri = orte_rml.get_contact_info();
        ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid;
        ORTE_PROC_MY_HNP->vpid = ORTE_PROC_MY_NAME->vpid;
    }
    
    /* setup the primary daemon command receive function */
    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
                            ORTE_RML_PERSISTENT, orte_daemon_recv, NULL);
    
    /* output a message indicating we are alive, our name, and our pid
     * for debugging purposes
     */
    if (orte_debug_daemons_flag) {
        fprintf(stderr, "Daemon %s checking in as pid %ld on host %s\n",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)orte_process_info.pid,
                orte_process_info.nodename);
    }

    /* We actually do *not* want the orted to voluntarily yield() the
       processor more than necessary.  The orted already blocks when
       it is doing nothing, so it doesn't use any more CPU cycles than
       it should; but when it *is* doing something, we do not want it
       to be unnecessarily delayed because it voluntarily yielded the
       processor in the middle of its work.

       For example: when a message arrives at the orted, we want the
       OS to wake up the orted in a timely fashion (which most OS's
       seem good about doing) and then we want the orted to process
       the message as fast as possible.  If the orted yields and lets
       aggressive MPI applications get the processor back, it may be a
       long time before the OS schedules the orted to run again
       (particularly if there is no IO event to wake it up).  Hence,
       routed OOB messages (for example) may be significantly delayed
       before being delivered to MPI processes, which can be
       problematic in some scenarios (e.g., COMM_SPAWN, BTL's that
       require OOB messages for wireup, etc.). */
    opal_progress_set_yield_when_idle(false);

    /* Change the default behavior of libevent such that we want to
       continually block rather than blocking for the default timeout
       and then looping around the progress engine again.  There
       should be nothing in the orted that cannot block in libevent
       until "something" happens (i.e., there's no need to keep
       cycling through progress because the only things that should
       happen will happen in libevent).  This is a minor optimization,
       but what the heck... :-) */
    opal_progress_set_event_flag(OPAL_EVLOOP_ONCE);

    /* if requested, report my uri to the indicated pipe */
    if (orted_globals.uri_pipe > 0) {
        orte_job_t *jdata;
        orte_proc_t *proc;
        orte_node_t *node;
        orte_app_context_t *app;
        char *tmp, *nptr, *sysinfo;
        int32_t ljob;

        /* setup the singleton's job */
        jdata = OBJ_NEW(orte_job_t);
        orte_plm_base_create_jobid(jdata);
        ljob = ORTE_LOCAL_JOBID(jdata->jobid);
        opal_pointer_array_set_item(orte_job_data, ljob, jdata);

        /* must create a map for it (even though it has no
         * info in it) so that the job info will be picked
         * up in subsequent pidmaps or other daemons won't
         * know how to route
         */
        jdata->map = OBJ_NEW(orte_job_map_t);

        /* setup an app_context for the singleton */
        app = OBJ_NEW(orte_app_context_t);
        app->app = strdup("singleton");
        app->num_procs = 1;
        opal_pointer_array_add(jdata->apps, app);
        
        /* setup a proc object for the singleton - since we
         * -must- be the HNP, and therefore we stored our
         * node on the global node pool, and since the singleton
         * -must- be on the same node as us, indicate that
         */
        proc = OBJ_NEW(orte_proc_t);
        proc->name.jobid = jdata->jobid;
        proc->name.vpid = 0;
        ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
        proc->state = ORTE_PROC_STATE_RUNNING;
        proc->app_idx = 0;
        /* obviously, it is on my node */
        node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
        proc->node = node;
        OBJ_RETAIN(node);  /* keep accounting straight */
        opal_pointer_array_add(jdata->procs, proc);
        jdata->num_procs = 1;
        /* and it obviously is on the node */
        OBJ_RETAIN(proc);
        opal_pointer_array_add(node->procs, proc);
        node->num_procs++;
        /* and obviously it is one of my local procs */
        OBJ_RETAIN(proc);
        opal_pointer_array_add(orte_local_children, proc);
        jdata->num_local_procs = 1;
        /* set the trivial */
        proc->local_rank = 0;
        proc->node_rank = 0;
        proc->app_rank = 0;
        proc->state = ORTE_PROC_STATE_RUNNING;
        proc->app_idx = 0;
        ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL);

        /* create a string that contains our uri + sysinfo + PMIx server URI */
        orte_util_convert_sysinfo_to_string(&sysinfo, orte_local_cpu_type, orte_local_cpu_model);
        asprintf(&tmp, "%s[%s]%s", orte_process_info.my_daemon_uri, sysinfo, pmix_server_uri);
	free(sysinfo);

        /* pass that info to the singleton */
        write(orted_globals.uri_pipe, tmp, strlen(tmp)+1); /* need to add 1 to get the NULL */

        /* cleanup */
        free(tmp);

        /* since a singleton spawned us, we need to harvest
         * any MCA params from the local environment so
         * we can pass them along to any subsequent daemons
         * we may start as the result of a comm_spawn
         */
        for (i=0; NULL != environ[i]; i++) {
            if (0 == strncmp(environ[i], OPAL_MCA_PREFIX, 9)) {
                /* make a copy to manipulate */
                tmp = strdup(environ[i]);
                /* find the equal sign */
                nptr = strchr(tmp, '=');
                *nptr = '\0';
                nptr++;
                /* add the mca param to the orted cmd line */
                opal_argv_append_nosize(&orted_cmd_line, "-"OPAL_MCA_CMD_LINE_ID);
                opal_argv_append_nosize(&orted_cmd_line, &tmp[9]);
                opal_argv_append_nosize(&orted_cmd_line, nptr);
                free(tmp);
            }
        }
    }

    /* if we were given a pipe to monitor for singleton termination, set that up */
    if (orted_globals.singleton_died_pipe > 0) {
        /* register shutdown handler */
        pipe_handler = (opal_event_t*)malloc(sizeof(opal_event_t));
        opal_event_set(orte_event_base, pipe_handler,
                       orted_globals.singleton_died_pipe,
                       OPAL_EV_READ,
                       pipe_closed,
                       pipe_handler);
        opal_event_add(pipe_handler, NULL);
    }

    /* If I have a parent, then save his contact info so
     * any messages we send can flow thru him.
     */

    orte_parent_uri = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "parent_uri",
                                  "URI for the parent if tree launch is enabled.",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0,
                                  MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9,
                                  MCA_BASE_VAR_SCOPE_CONSTANT,
                                  &orte_parent_uri);
    if (NULL != orte_parent_uri) {
        orte_process_name_t parent;

        /* set the contact info into the hash table */
        orte_rml.set_contact_info(orte_parent_uri);
        ret = orte_rml_base_parse_uris(orte_parent_uri, &parent, NULL);
        if (ORTE_SUCCESS != ret) {
            ORTE_ERROR_LOG(ret);
            free (orte_parent_uri);
            orte_parent_uri = NULL;
            goto DONE;
        }

        /* don't need this value anymore */
        free(orte_parent_uri);
        orte_parent_uri = NULL;

        /* tell the routed module that we have a path
         * back to the HNP
         */
        if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_HNP, &parent))) {
            ORTE_ERROR_LOG(ret);
            goto DONE;
        }
        /* set the lifeline to point to our parent so that we
         * can handle the situation if that lifeline goes away
         */
        if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(&parent))) {
            ORTE_ERROR_LOG(ret);
            goto DONE;
        }
    }

    /* if we are not the HNP...the only time we will be an HNP
     * is if we are launched by a singleton to provide support
     * for it
     */
    if (!ORTE_PROC_IS_HNP) {
        /* send the information to the orted report-back point - this function
         * will process the data, but also counts the number of
         * orteds that reported back so the launch procedure can continue.
         * We need to do this at the last possible second as the HNP
         * can turn right around and begin issuing orders to us
         */

        buffer = OBJ_NEW(opal_buffer_t);
        /* insert our name for rollup purposes */
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(buffer);
            goto DONE;
        }
        /* for now, always include our contact info, even if we are using
         * static ports. Eventually, this will be removed
         */
        rml_uri = orte_rml.get_contact_info();
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &rml_uri, 1, OPAL_STRING))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(buffer);
            goto DONE;
        }

        /* include our node name */
        opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING);

        /* if requested, include any non-loopback aliases for this node */
        if (orte_retain_aliases) {
            char **aliases=NULL;
            uint8_t naliases, ni;
            char hostname[ORTE_MAX_HOSTNAME_SIZE];

            /* if we stripped the prefix or removed the fqdn,
             * include full hostname as an alias
             */
            gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
            if (strlen(orte_process_info.nodename) < strlen(hostname)) {
                opal_argv_append_nosize(&aliases, hostname);
            }
            opal_ifgetaliases(&aliases);
            naliases = opal_argv_count(aliases);
            if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &naliases, 1, OPAL_UINT8))) {
                ORTE_ERROR_LOG(ret);
                OBJ_RELEASE(buffer);
                goto DONE;
            }
            for (ni=0; ni < naliases; ni++) {
                if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &aliases[ni], 1, OPAL_STRING))) {
                    ORTE_ERROR_LOG(ret);
                    OBJ_RELEASE(buffer);
                    goto DONE;
                }
            }
            opal_argv_free(aliases);
        }

#if OPAL_HAVE_HWLOC
        {
            char *coprocessors;
            /* add the local topology */
            if (NULL != opal_hwloc_topology &&
                (1 == ORTE_PROC_MY_NAME->vpid || orte_hetero_nodes)) {
                if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
                    ORTE_ERROR_LOG(ret);
                }
            }
            /* detect and add any coprocessors */
            coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology);
            if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &coprocessors, 1, OPAL_STRING))) {
                ORTE_ERROR_LOG(ret);
            }
            /* see if I am on a coprocessor */
            coprocessors = opal_hwloc_base_check_on_coprocessor();
            if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &coprocessors, 1, OPAL_STRING))) {
                ORTE_ERROR_LOG(ret);
            }
        }
#endif

        /* send to the HNP's callback - will be routed if routes are available */
        if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buffer,
                                               ORTE_RML_TAG_ORTED_CALLBACK,
                                               orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(buffer);
            goto DONE;
        }
    }

    /* if we are tree-spawning, then we need to capture the MCA params
     * from our cmd line so we can pass them along to the daemons we spawn -
     * otherwise, only the first layer of daemons will ever see them
     */
    if (orted_globals.tree_spawn) {
        int j, k;
        bool ignore;
        char *no_keep[] = {
            "orte_hnp_uri",
            "orte_ess_jobid",
            "orte_ess_vpid",
            "orte_ess_num_procs",
            "orte_parent_uri",
            "mca_base_env_list",
            NULL
        };
        for (i=0; i < argc; i++) {
            if (0 == strcmp("-"OPAL_MCA_CMD_LINE_ID,  argv[i]) ||
                0 == strcmp("--"OPAL_MCA_CMD_LINE_ID, argv[i]) ) {
                ignore = false;
                /* see if this is something we cannot pass along */
                for (k=0; NULL != no_keep[k]; k++) {
                    if (0 == strcmp(no_keep[k], argv[i+1])) {
                        ignore = true;
                        break;
                    }
                }
                if (!ignore) {
                    /* see if this is already present so we at least can
                     * avoid growing the cmd line with duplicates
                     */
                    if (NULL != orted_cmd_line) {
                        for (j=0; NULL != orted_cmd_line[j]; j++) {
                            if (0 == strcmp(argv[i+1], orted_cmd_line[j])) {
                                /* already here - ignore it */
                                ignore = true;
                                break;
                            }
                        }
                    }
                    if (!ignore) {
                        opal_argv_append_nosize(&orted_cmd_line, argv[i]);
                        opal_argv_append_nosize(&orted_cmd_line, argv[i+1]);
                        opal_argv_append_nosize(&orted_cmd_line, argv[i+2]);
                    }
                }
                i += 2;
            }
        }
    }
            
    if (orte_debug_daemons_flag) {
        opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
    }
    ret = ORTE_SUCCESS;

    /* loop the event lib until an exit event is detected */
    while (orte_event_base_active) {
        opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
    }

    /* ensure all local procs are dead */
    orte_odls.kill_local_procs(NULL);

 DONE:
    /* update the exit status, in case it wasn't done */
    ORTE_UPDATE_EXIT_STATUS(ret);

    /* cleanup and leave */
    orte_finalize();

    if (orte_debug_flag) {
        fprintf(stderr, "exiting with status %d\n", orte_exit_status);
    }
    exit(orte_exit_status);
}