int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type) { /* NOTE: hwloc defines topology_t as a pointer to a struct! */ hwloc_topology_t t, *tarray = (hwloc_topology_t*)dest; int rc=OPAL_SUCCESS, i, cnt, j; char *xmlbuffer; struct hwloc_topology_support *support; for (i=0, j=0; i < *num_vals; i++) { /* unpack the xml string */ cnt=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &xmlbuffer, &cnt, OPAL_STRING))) { goto cleanup; } /* convert the xml */ if (0 != hwloc_topology_init(&t)) { rc = OPAL_ERROR; free(xmlbuffer); goto cleanup; } if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) { rc = OPAL_ERROR; free(xmlbuffer); hwloc_topology_destroy(t); goto cleanup; } free(xmlbuffer); /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(t, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { rc = OPAL_ERROR; hwloc_topology_destroy(t); goto cleanup; } /* now load the topology */ if (0 != hwloc_topology_load(t)) { rc = OPAL_ERROR; hwloc_topology_destroy(t); goto cleanup; } /* get the available support - hwloc unfortunately does * not include this info in its xml import! */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(t); cnt = sizeof(struct hwloc_topology_discovery_support); if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->discovery, &cnt, OPAL_BYTE))) { goto cleanup; } cnt = sizeof(struct hwloc_topology_cpubind_support); if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->cpubind, &cnt, OPAL_BYTE))) { goto cleanup; } cnt = sizeof(struct hwloc_topology_membind_support); if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->membind, &cnt, OPAL_BYTE))) { goto cleanup; } /* filter the cpus thru any default cpu set */ if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(t))) { goto cleanup; } /* pass it back */ tarray[i] = t; /* track the number added */ j++; } cleanup: *num_vals = j; return rc; }
static int rte_init(void) { int ret; char *error = NULL; char *envar, *ev1, *ev2; uint64_t unique_key[2]; char *string_key; char *rmluri; opal_value_t *kv; char *val; int u32, *u32ptr; uint16_t u16, *u16ptr; char **peers=NULL, *mycpuset, **cpusets=NULL; opal_process_name_t name; size_t i; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* get an async event base - we use the opal_async one so * we don't startup extra threads if not needed */ orte_event_base = opal_progress_thread_init(NULL); progress_thread_running = true; /* open and setup pmix */ if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); /* we cannot run */ error = "pmix init"; goto error; } if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { /* we cannot run */ error = "pmix init"; goto error; } /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { /* we cannot run */ error = "pmix init"; goto error; } u32ptr = &u32; u16ptr = &u16; /**** THE FOLLOWING ARE REQUIRED VALUES ***/ /* pmix.init set our process name down in the OPAL layer, * so carry it forward here */ ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { error = "getting local rank"; goto error; } orte_process_info.my_local_rank = u16; /* get our node rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { error = "getting node rank"; goto error; } orte_process_info.my_node_rank = u16; /* get max procs */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting max procs"; goto error; } orte_process_info.max_procs = u32; /* get job size */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting job size"; goto error; } orte_process_info.num_procs = u32; /* push into the environ for pickup in MPI layer for * MPI-3 required info key */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) { asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs); putenv(ev1); added_num_procs = true; } if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) { asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs); putenv(ev2); added_app_ctx = true; } /* get our app number from PMI - ok if not found */ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { orte_process_info.app_num = u32; } else { orte_process_info.app_num = 0; } /* get the number of local peers - required for wireup of * shared memory BTL */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { orte_process_info.num_local_peers = u32 - 1; // want number besides ourselves } else { orte_process_info.num_local_peers = 0; } /* setup transport keys in case the MPI layer needs them - * we can use the jobfam and stepid as unique keys * because they are unique values assigned by the RM */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid); if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } opal_output_verbose(2, orte_ess_base_framework.framework_output, "%s transport key %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), string_key); asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); putenv(envar); added_transport_keys = true; /* cannot free the envar as that messes up our environ */ free(string_key); } /* retrieve our topology */ val = NULL; OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { /* load the topology */ if (0 != hwloc_topology_init(&opal_hwloc_topology)) { ret = OPAL_ERROR; free(val); error = "setting topology"; goto error; } if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { ret = OPAL_ERROR; free(val); hwloc_topology_destroy(opal_hwloc_topology); error = "setting topology"; goto error; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(opal_hwloc_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { ret = OPAL_ERROR; hwloc_topology_destroy(opal_hwloc_topology); free(val); error = "setting topology"; goto error; } /* now load the topology */ if (0 != hwloc_topology_load(opal_hwloc_topology)) { ret = OPAL_ERROR; hwloc_topology_destroy(opal_hwloc_topology); free(val); error = "setting topology"; goto error; } free(val); /* filter the cpus thru any default cpu set */ if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { error = "filtering topology"; goto error; } } else { /* it wasn't passed down to us, so go get it */ if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { error = "topology discovery"; goto error; } /* push it into the PMIx database in case someone * tries to retrieve it so we avoid an attempt to * get it again */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); kv->type = OPAL_STRING; if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { error = "topology export"; goto error; } if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { error = "topology store"; goto error; } OBJ_RELEASE(kv); } /* get our local peers */ if (0 < orte_process_info.num_local_peers) { /* if my local rank if too high, then that's an error */ if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) { ret = ORTE_ERR_BAD_PARAM; error = "num local peers"; goto error; } /* retrieve the local peers */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { peers = opal_argv_split(val, ','); free(val); /* and their cpusets, if available */ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { cpusets = opal_argv_split(val, ':'); free(val); } else { cpusets = NULL; } } else { peers = NULL; cpusets = NULL; } } else { peers = NULL; cpusets = NULL; } /* set the locality */ if (NULL != peers) { /* indentify our cpuset */ if (NULL != cpusets) { mycpuset = cpusets[orte_process_info.my_local_rank]; } else { mycpuset = NULL; } name.jobid = ORTE_PROC_MY_NAME->jobid; for (i=0; NULL != peers[i]; i++) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY); kv->type = OPAL_UINT16; name.vpid = strtoul(peers[i], NULL, 10); if (name.vpid == ORTE_PROC_MY_NAME->vpid) { /* we are fully local to ourselves */ u16 = OPAL_PROC_ALL_LOCAL; } else if (NULL == mycpuset || NULL == cpusets[i] || 0 == strcmp(cpusets[i], "UNBOUND")) { /* all we can say is that it shares our node */ u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* we have it, so compute the locality */ u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]); } OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "%s ess:pmi:locality: proc %s locality %x", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name), u16)); kv->data.uint16 = u16; ret = opal_pmix.store_local(&name, kv); if (OPAL_SUCCESS != ret) { error = "local store of locality"; opal_argv_free(peers); opal_argv_free(cpusets); goto error; } OBJ_RELEASE(kv); } opal_argv_free(peers); opal_argv_free(cpusets); } /* now that we have all required info, complete the setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* setup process binding */ if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { error = "proc_binding"; goto error; } /* this needs to be set to enable debugger use when direct launched */ if (NULL == orte_process_info.my_daemon_uri) { orte_standalone_operation = true; } /* set max procs */ if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } /*** PUSH DATA FOR OTHERS TO FIND ***/ /* push our RML URI in case others need to talk directly to us */ rmluri = orte_rml.get_contact_info(); /* push it out for others to use */ OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING); if (ORTE_SUCCESS != ret) { error = "pmix put uri"; goto error; } free(rmluri); /* push our hostname so others can find us, if they need to */ OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); if (ORTE_SUCCESS != ret) { error = "db store hostname"; goto error; } /* if we are an ORTE app - and not an MPI app - then * we need to exchange our connection info here. * MPI_Init has its own modex, so we don't need to do * two of them. However, if we don't do a modex at all, * then processes have no way to communicate * * NOTE: only do this when the process originally launches. * Cannot do this on a restart as the rest of the processes * in the job won't be executing this step, so we would hang */ if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) { opal_pmix.fence(NULL, 0); } return ORTE_SUCCESS; error: if (!progress_thread_running) { /* can't send the help message, so ensure it * comes out locally */ orte_show_help_finalize(); } if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int allocate(orte_job_t *jdata, opal_list_t *nodes) { int i, n, val, dig, num_nodes; orte_node_t *node; #if OPAL_HAVE_HWLOC orte_topology_t *t; hwloc_topology_t topo; hwloc_obj_t obj; unsigned j, k; struct hwloc_topology_support *support; char **files=NULL; char **topos = NULL; bool use_local_topology = false; #endif char **node_cnt=NULL; char **slot_cnt=NULL; char **max_slot_cnt=NULL; char *tmp; char prefix[6]; node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ','); if (NULL != mca_ras_simulator_component.slots) { slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ','); /* backfile the slot_cnt so every topology has a cnt */ tmp = slot_cnt[opal_argv_count(slot_cnt)-1]; for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) { opal_argv_append_nosize(&slot_cnt, tmp); } } if (NULL != mca_ras_simulator_component.slots_max) { max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ','); /* backfill the max_slot_cnt as reqd */ tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1]; for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) { opal_argv_append_nosize(&max_slot_cnt, tmp); } } #if OPAL_HAVE_HWLOC if (NULL != mca_ras_simulator_component.topofiles) { files = opal_argv_split(mca_ras_simulator_component.topofiles, ','); if (opal_argv_count(files) != opal_argv_count(node_cnt)) { orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); goto error_silent; } } else if (NULL != mca_ras_simulator_component.topologies) { topos = opal_argv_split(mca_ras_simulator_component.topologies, ','); if (opal_argv_count(topos) != opal_argv_count(node_cnt)) { orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); goto error_silent; } } else { /* use our topology */ use_local_topology = true; } #else /* If we don't have hwloc and hwloc files were specified, then error out (because we can't deliver that functionality) */ if (NULL == mca_ras_simulator_component.topofiles) { orte_show_help("help-ras-simulator.txt", "no hwloc support for topofiles", true); goto error_silent; } #endif /* setup the prefix to the node names */ snprintf(prefix, 6, "nodeA"); /* process the request */ for (n=0; NULL != node_cnt[n]; n++) { num_nodes = strtol(node_cnt[n], NULL, 10); /* get number of digits */ val = num_nodes; for (dig=0; 0 != val; dig++) { val /= 10; } /* set the prefix for this group of nodes */ prefix[4] += n; /* check for topology */ #if OPAL_HAVE_HWLOC if (use_local_topology) { /* use our topology */ topo = opal_hwloc_topology; } else if (NULL != files) { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_init"); goto error_silent; } if (0 != hwloc_topology_set_xml(topo, files[n])) { orte_show_help("help-ras-simulator.txt", "hwloc failed to load xml", true, files[n]); hwloc_topology_destroy(topo); goto error_silent; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_flags"); hwloc_topology_destroy(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); hwloc_topology_destroy(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(topo); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( To aid in debugging, we set it here */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; /* add it to our array */ t = OBJ_NEW(orte_topology_t); t->topo = topo; t->sig = opal_hwloc_base_get_topo_signature(topo); opal_pointer_array_add(orte_node_topologies, t); } else { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_init"); goto error_silent; } if (0 != hwloc_topology_set_synthetic(topo, topos[n])) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_synthetic"); hwloc_topology_destroy(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); hwloc_topology_destroy(topo); goto error_silent; } if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "opal_hwloc_base_filter_cpus"); hwloc_topology_destroy(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(topo); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( To aid in debugging, we set it here */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; /* add it to our array */ t = OBJ_NEW(orte_topology_t); t->topo = topo; t->sig = opal_hwloc_base_get_topo_signature(topo); opal_pointer_array_add(orte_node_topologies, t); } #endif for (i=0; i < num_nodes; i++) { node = OBJ_NEW(orte_node_t); asprintf(&node->name, "%s%0*d", prefix, dig, i); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; #if OPAL_HAVE_HWLOC if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) { node->slots_max = 0; } else { obj = hwloc_get_root_obj(topo); node->slots_max = opal_hwloc_base_get_npus(topo, obj); } if (NULL == slot_cnt || NULL == slot_cnt[n]) { node->slots = 0; } else { obj = hwloc_get_root_obj(topo); node->slots = opal_hwloc_base_get_npus(topo, obj); } node->topology = topo; #endif opal_output_verbose(1, orte_ras_base_framework.framework_output, "Created Node <%10s> [%3d : %3d]", node->name, node->slots, node->slots_max); opal_list_append(nodes, &node->super); } } /* record the number of allocated nodes */ orte_num_allocated_nodes = opal_list_get_size(nodes); if (NULL != max_slot_cnt) { opal_argv_free(max_slot_cnt); } if (NULL != slot_cnt) { opal_argv_free(slot_cnt); } if (NULL != node_cnt) { opal_argv_free(node_cnt); } return ORTE_SUCCESS; error_silent: if (NULL != max_slot_cnt) { opal_argv_free(max_slot_cnt); } if (NULL != slot_cnt) { opal_argv_free(slot_cnt); } if (NULL != node_cnt) { opal_argv_free(node_cnt); } return ORTE_ERR_SILENT; }
int main(int argc, char* argv[]) { hwloc_obj_t obj; unsigned j, k; struct hwloc_topology_support *support; int rc; if (2 != argc) { fprintf(stderr, "Usage: opal_hwloc <topofile>\n"); exit(1); } if (0 > (rc = opal_init(&argc, &argv))) { fprintf(stderr, "opal_hwloc: couldn't init opal - error code %d\n", rc); return rc; } if (0 != hwloc_topology_init(&my_topology)) { return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_set_xml(my_topology, argv[1])) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(my_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_load(my_topology)) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(my_topology); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( We default to assuming it is present as * systems that use this option are likely to provide * binding support */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(my_topology); support->cpubind->set_thisproc_cpubind = true; /* filter the cpus thru any default cpu set */ opal_hwloc_base_filter_cpus(my_topology); /* fill opal_cache_line_size global with the smallest L1 cache line size */ fill_cache_line_size(); /* test it */ if (NULL == hwloc_get_obj_by_type(my_topology, HWLOC_OBJ_CORE, 0)) { fprintf(stderr, "DIDN'T FIND A CORE\n"); } hwloc_topology_destroy(my_topology); opal_finalize(); return 0; }