static int orte_rmaps_rank_file_open(void) { /* ensure we flag mapping by user */ #if OPAL_HAVE_HWLOC if (NULL != opal_hwloc_base_slot_list || NULL != orte_rankfile) { #else if (NULL != orte_rankfile) { #endif if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { /* if a non-default mapping is already specified, then we * have an error */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "RANK_FILE", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_CONFLICTED); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYUSER); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* we are going to bind to cpuset since the user is specifying the cpus */ OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET); /* make us first */ my_priority = 10000; } return ORTE_SUCCESS; } static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority) { *priority = my_priority; *module = (mca_base_module_t *)&orte_rmaps_rank_file_module; return ORTE_SUCCESS; }
static int ppr_mapper(orte_job_t *jdata) { int rc = ORTE_SUCCESS, j, n; mca_base_component_t *c=&mca_rmaps_ppr_component.base_version; orte_node_t *node; orte_proc_t *proc; orte_app_context_t *app; orte_vpid_t total_procs, nprocs_mapped; opal_hwloc_level_t start=OPAL_HWLOC_NODE_LEVEL; #if OPAL_HAVE_HWLOC hwloc_obj_t obj; hwloc_obj_type_t lowest; unsigned cache_level=0; unsigned int nobjs, i; bool pruning_reqd = false; opal_hwloc_level_t level; #endif opal_list_t node_list; opal_list_item_t *item; orte_std_cntr_t num_slots; orte_app_idx_t idx; char **ppr_req, **ck; size_t len; bool initial_map=true; /* only handle initial launch of loadbalanced * or NPERxxx jobs - allow restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s being restarted - ppr cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper && 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL == jdata->map->ppr || ORTE_MAPPING_PPR != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* not for us */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: mapping job %s with ppr %s", ORTE_JOBID_PRINT(jdata->jobid), jdata->map->ppr); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* initialize */ memset(ppr, 0, OPAL_HWLOC_HWTHREAD_LEVEL * sizeof(opal_hwloc_level_t)); /* parse option */ n=0; ppr_req = opal_argv_split(jdata->map->ppr, ','); for (j=0; NULL != ppr_req[j]; j++) { /* split on the colon */ ck = opal_argv_split(ppr_req[j], ':'); if (2 != opal_argv_count(ck)) { /* must provide a specification */ orte_show_help("help-orte-rmaps-ppr.txt", "invalid-ppr", true, jdata->map->ppr); opal_argv_free(ppr_req); opal_argv_free(ck); return ORTE_ERR_SILENT; } len = strlen(ck[1]); if (0 == strncasecmp(ck[1], "node", len)) { ppr[OPAL_HWLOC_NODE_LEVEL] = strtol(ck[0], NULL, 10); ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYNODE); start = OPAL_HWLOC_NODE_LEVEL; n++; #if OPAL_HAVE_HWLOC } else if (0 == strncasecmp(ck[1], "hwthread", len) || 0 == strncasecmp(ck[1], "thread", len)) { ppr[OPAL_HWLOC_HWTHREAD_LEVEL] = strtol(ck[0], NULL, 10); start = OPAL_HWLOC_HWTHREAD_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYHWTHREAD); n++; } else if (0 == strncasecmp(ck[1], "core", len)) { ppr[OPAL_HWLOC_CORE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_CORE_LEVEL) { start = OPAL_HWLOC_CORE_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYCORE); } n++; } else if (0 == strncasecmp(ck[1], "socket", len) || 0 == strncasecmp(ck[1], "skt", len)) { ppr[OPAL_HWLOC_SOCKET_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_SOCKET_LEVEL) { start = OPAL_HWLOC_SOCKET_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSOCKET); } n++; } else if (0 == strncasecmp(ck[1], "l1cache", len)) { ppr[OPAL_HWLOC_L1CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L1CACHE_LEVEL) { start = OPAL_HWLOC_L1CACHE_LEVEL; cache_level = 1; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL1CACHE); } n++; } else if (0 == strncasecmp(ck[1], "l2cache", len)) { ppr[OPAL_HWLOC_L2CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L2CACHE_LEVEL) { start = OPAL_HWLOC_L2CACHE_LEVEL; cache_level = 2; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL2CACHE); } n++; } else if (0 == strncasecmp(ck[1], "l3cache", len)) { ppr[OPAL_HWLOC_L3CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L3CACHE_LEVEL) { start = OPAL_HWLOC_L3CACHE_LEVEL; cache_level = 3; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL3CACHE); } n++; } else if (0 == strncasecmp(ck[1], "numa", len)) { ppr[OPAL_HWLOC_NUMA_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_NUMA_LEVEL) { start = OPAL_HWLOC_NUMA_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYNUMA); } n++; #endif } else { /* unknown spec */ orte_show_help("help-orte-rmaps-ppr.txt", "unrecognized-ppr-option", true, ck[1], jdata->map->ppr); opal_argv_free(ppr_req); opal_argv_free(ck); return ORTE_ERR_SILENT; } opal_argv_free(ck); } opal_argv_free(ppr_req); /* if nothing was given, that's an error */ if (0 == n) { opal_output(0, "NOTHING GIVEN"); return ORTE_ERR_SILENT; } #if OPAL_HAVE_HWLOC /* if more than one level was specified, then pruning will be reqd */ if (1 < n) { pruning_reqd = true; } #endif opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s assigned policy %s", ORTE_JOBID_PRINT(jdata->jobid), orte_rmaps_base_print_mapping(jdata->map->mapping)); #if OPAL_HAVE_HWLOC /* convenience */ level = start; lowest = opal_hwloc_levels[start]; #endif for (idx=0; idx < (orte_app_idx_t)jdata->apps->size; idx++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, idx))) { continue; } /* if the number of total procs was given, set that * limit - otherwise, set to max so we simply fill * all the nodes with the pattern */ if (0 < app->num_procs) { total_procs = app->num_procs; } else { total_procs = ORTE_VPID_MAX; } /* get the available nodes */ OBJ_CONSTRUCT(&node_list, opal_list_t); if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ initial_map = false; /* if a bookmark exists from some prior mapping, set us to start there */ jdata->bookmark = orte_rmaps_base_get_starting_point(&node_list, jdata); /* cycle across the nodes */ nprocs_mapped = 0; for (item = opal_list_get_first(&node_list); item != opal_list_get_end(&node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; #if OPAL_HAVE_HWLOC /* bozo check */ if (NULL == node->topology) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); rc = ORTE_ERR_SILENT; goto error; } #endif /* add the node to the map, if needed */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); goto error; } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } /* if we are mapping solely at the node level, just put * that many procs on this node */ if (OPAL_HWLOC_NODE_LEVEL == start) { #if OPAL_HAVE_HWLOC obj = hwloc_get_root_obj(node->topology); #endif for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto error; } nprocs_mapped++; #if OPAL_HAVE_HWLOC orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); #endif } #if OPAL_HAVE_HWLOC } else { /* get the number of lowest resources on this node */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, lowest, cache_level, OPAL_HWLOC_AVAILABLE); /* map the specified number of procs to each such resource on this node, * recording the locale of each proc so we know its cpuset */ for (i=0; i < nobjs; i++) { obj = opal_hwloc_base_get_obj_by_type(node->topology, lowest, cache_level, i, OPAL_HWLOC_AVAILABLE); for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto error; } nprocs_mapped++; orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } } if (pruning_reqd) { /* go up the ladder and prune the procs according to * the specification, adjusting the count of procs on the * node as we go */ level--; prune(jdata->jobid, idx, node, &level, &nprocs_mapped); } #endif } /* set the total slots used */ if ((int)node->num_procs <= node->slots) { node->slots_inuse = (int)node->num_procs; } else { node->slots_inuse = node->slots; } /* if no-oversubscribe was specified, check to see if * we have violated the total slot specification - regardless, * if slots_max was given, we are not allowed to violate it! */ if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); } /* if we haven't mapped all the procs, continue on to the * next node */ if (total_procs == nprocs_mapped) { break; } } if (0 == app->num_procs) { app->num_procs = nprocs_mapped; } if (ORTE_VPID_MAX != total_procs && nprocs_mapped < total_procs) { /* couldn't map them all */ orte_show_help("help-orte-rmaps-ppr.txt", "ppr-too-many-procs", true, app->app, app->num_procs, jdata->map->ppr); rc = ORTE_ERR_SILENT; goto error; } /* compute vpids and add proc objects to the job */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); goto error; } /* track the total number of processes we mapped - must update * this AFTER we compute vpids so that computation is done * correctly */ jdata->num_procs += app->num_procs; while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); } return ORTE_SUCCESS; error: while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); return rc; }
/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ static int orte_rmaps_base_open(mca_base_open_flag_t flags) { int rc; /* init the globals */ OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t); orte_rmaps_base.slot_list = NULL; orte_rmaps_base.mapping = 0; orte_rmaps_base.ranking = 0; orte_rmaps_base.device = NULL; /* if a topology file was given, then set our topology * from it. Even though our actual topology may differ, * mpirun only needs to see the compute node topology * for mapping purposes */ if (NULL != rmaps_base_topo_file) { if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file); return ORTE_ERR_SILENT; } } /* check for violations that has to be detected before we parse the mapping option */ if (NULL != orte_rmaps_base.ppr) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--ppr, -ppr", "--map-by ppr:<pattern>", "rmaps_base_pattern, rmaps_ppr_pattern", "rmaps_base_mapping_policy=ppr:<pattern>"); /* if the mapping policy is NULL, then we can proceed */ if (NULL == rmaps_base_mapping_policy) { asprintf(&rmaps_base_mapping_policy, "ppr:%s", orte_rmaps_base.ppr); } else { return ORTE_ERR_SILENT; } } if (1 < orte_rmaps_base.cpus_per_rank) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank", "--map-by <obj>:PE=N, default <obj>=NUMA", "rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA"); } if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping, &orte_rmaps_base.device, rmaps_base_mapping_policy))) { return rc; } if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking, orte_rmaps_base.mapping, rmaps_base_ranking_policy))) { return rc; } if (rmaps_base_bycore) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--bycore, -bycore", "--map-by core", "rmaps_base_bycore", "rmaps_base_mapping_policy=core"); /* set mapping policy to bycore - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYCORE) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bycore", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYCORE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to bycore - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_CORE) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "bycore", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (rmaps_base_byslot) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--byslot, -byslot", "--map-by slot", "rmaps_base_byslot", "rmaps_base_mapping_policy=slot"); /* set mapping policy to byslot - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to byslot - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (rmaps_base_bynode) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--bynode, -bynode", "--map-by node", "rmaps_base_bynode", "rmaps_base_mapping_policy=node"); /* set mapping policy to bynode - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to bynode - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (1 < orte_rmaps_base.cpus_per_rank) { /* if we were asked for multiple cpus/proc, then we have to * bind to those cpus - any other binding policy is an * error */ if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { if (opal_hwloc_use_hwthreads_as_cpus) { if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) && OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true, orte_rmaps_base.cpus_per_rank, "use-hwthreads-as-cpus", opal_hwloc_base_print_binding(opal_hwloc_binding_policy), "bind-to hwthread"); return ORTE_ERR_SILENT; } } else if (OPAL_BIND_TO_CORE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) && OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true, orte_rmaps_base.cpus_per_rank, "cores as cpus", opal_hwloc_base_print_binding(opal_hwloc_binding_policy), "bind-to core"); return ORTE_ERR_SILENT; } } else { if (opal_hwloc_use_hwthreads_as_cpus) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD); } else { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE); } } /* we also need to ensure we are mapping to a high-enough level to have * multiple cpus beneath it - by default, we'll go to the NUMA level */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYHWTHREAD || (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYCORE && !opal_hwloc_use_hwthreads_as_cpus)) { orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true); return ORTE_ERR_SILENT; } } else { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s rmaps:base pe/rank set - setting mapping to BYNUMA", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); } } if (orte_rmaps_base_pernode) { /* there is no way to resolve this conflict, so if something else was * given, we have no choice but to error out */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } /* ensure we set the mapping policy to ppr */ ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* define the ppr */ orte_rmaps_base.ppr = strdup("1:node"); } if (0 < orte_rmaps_base_n_pernode) { /* there is no way to resolve this conflict, so if something else was * given, we have no choice but to error out */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } /* ensure we set the mapping policy to ppr */ ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* define the ppr */ asprintf(&orte_rmaps_base.ppr, "%d:node", orte_rmaps_base_n_pernode); } if (0 < orte_rmaps_base_n_persocket) { /* there is no way to resolve this conflict, so if something else was * given, we have no choice but to error out */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } /* ensure we set the mapping policy to ppr */ ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* define the ppr */ asprintf(&orte_rmaps_base.ppr, "%d:socket", orte_rmaps_base_n_persocket); } /* Should we schedule on the local node or not? */ if (rmaps_base_no_schedule_local) { orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL; } /* Should we oversubscribe or not? */ if (rmaps_base_no_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } /** force oversubscription permission */ if (rmaps_base_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); /* also set the overload allowed flag */ opal_hwloc_binding_policy |= OPAL_BIND_ALLOW_OVERLOAD; } /* should we display a detailed (developer-quality) version of the map after determining it? */ if (rmaps_base_display_devel_map) { orte_rmaps_base.display_map = true; orte_devel_level_output = true; } /* should we display a diffable report of proc locations after determining it? */ if (rmaps_base_display_diffable_map) { orte_rmaps_base.display_map = true; orte_display_diffable_output = true; } /* Open up all available components */ rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags); /* check to see if any component indicated a problem */ if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { /* the component would have already reported the error, so * tell the rest of the chain to shut up */ return ORTE_ERR_SILENT; } /* All done */ return rc; }
/* * Create a round-robin mapping for the job. */ static int orte_rmaps_rr_map(orte_job_t *jdata) { orte_app_context_t *app; int i; opal_list_t node_list; opal_list_item_t *item; orte_std_cntr_t num_slots; int rc; mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version; bool initial_map=true; /* this mapper can only handle initial launch * when rr mapping is desired - allow * restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: job %s is being restarted - rr cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper && 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: job %s not using rr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (ORTE_MAPPING_RR < ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I don't know how to do these - defer */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: job %s not using rr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* start at the beginning... */ jdata->num_procs = 0; /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* setup the nodelist here in case we jump to error */ OBJ_CONSTRUCT(&node_list, opal_list_t); /* if the number of processes wasn't specified, then we know there can be only * one app_context allowed in the launch, and that we are to launch it across * all available slots. We'll double-check the single app_context rule first */ if (0 == app->num_procs && 1 < jdata->num_apps) { orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", true, jdata->num_apps, NULL); rc = ORTE_ERR_SILENT; goto error; } /* for each app_context, we have to get the list of nodes that it can * use since that can now be modified with a hostfile and/or -host * option */ if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ initial_map = false; /* if a bookmark exists from some prior mapping, set us to start there */ jdata->bookmark = orte_rmaps_base_get_starting_point(&node_list, jdata); if (0 == app->num_procs) { /* set the num_procs to equal the number of slots on these * mapped nodes, taking into account the number of cpus/rank */ app->num_procs = num_slots / orte_rmaps_base.cpus_per_rank; /* sometimes, we have only one "slot" assigned, but may * want more than one cpu/rank - so ensure we always wind * up with at least one proc */ if (0 == app->num_procs) { app->num_procs = 1; } } /* Make assignments */ if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_bynode(jdata, app, &node_list, num_slots, app->num_procs); } else if (ORTE_MAPPING_BYSLOT == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); #if OPAL_HAVE_HWLOC } else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_PU, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CORE, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 1); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 2); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 3); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_SOCKET, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_NODE, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } #endif } else { /* unrecognized mapping directive */ orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", orte_rmaps_base_print_mapping(jdata->map->mapping)); rc = ORTE_ERR_SILENT; goto error; } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto error; } /* compute vpids and add proc objects to the job - do this after * each app_context so that the ranks within each context are * contiguous */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); return rc; } /* track the total number of processes we mapped - must update * this value AFTER we compute vpids so that computation * is done correctly */ jdata->num_procs += app->num_procs; /* cleanup the node list - it can differ from one app_context * to another, so we have to get it every time */ while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); } return ORTE_SUCCESS; error: while(NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); return rc; }
/* * JOB_MAP */ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_data_type_t type) { char *tmp=NULL, *tmp2, *tmp3, *pfx, *pfx2; int32_t i, j; int rc; orte_node_t *node; orte_proc_t *proc; /* set default result */ *output = NULL; /* protect against NULL prefix */ if (NULL == prefix) { asprintf(&pfx2, " "); } else { asprintf(&pfx2, "%s", prefix); } if (orte_xml_output) { /* need to create the output in XML format */ asprintf(&tmp, "<map>\n"); /* loop through nodes */ for (i=0; i < src->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(src->nodes, i))) { continue; } orte_dt_print_node(&tmp2, "\t", node, ORTE_NODE); asprintf(&tmp3, "%s%s", tmp, tmp2); free(tmp2); free(tmp); tmp = tmp3; /* for each node, loop through procs and print their rank */ for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } orte_dt_print_proc(&tmp2, "\t\t", proc, ORTE_PROC); asprintf(&tmp3, "%s%s", tmp, tmp2); free(tmp2); free(tmp); tmp = tmp3; } asprintf(&tmp3, "%s\t</host>\n", tmp); free(tmp); tmp = tmp3; } asprintf(&tmp2, "%s</map>\n", tmp); free(tmp); free(pfx2); *output = tmp2; return ORTE_SUCCESS; } asprintf(&pfx, "%s\t", pfx2); if (orte_devel_level_output) { #if OPAL_HAVE_HWLOC asprintf(&tmp, "\n%sMapper requested: %s Last mapper: %s Mapping policy: %s Ranking policy: %s\n%sBinding policy: %s Cpu set: %s PPR: %s Cpus-per-rank: %d", pfx2, (NULL == src->req_mapper) ? "NULL" : src->req_mapper, (NULL == src->last_mapper) ? "NULL" : src->last_mapper, orte_rmaps_base_print_mapping(src->mapping), orte_rmaps_base_print_ranking(src->ranking), pfx2, opal_hwloc_base_print_binding(src->binding), (NULL == opal_hwloc_base_cpu_set) ? "NULL" : opal_hwloc_base_cpu_set, (NULL == src->ppr) ? "NULL" : src->ppr, (int)src->cpus_per_rank); #else asprintf(&tmp, "\n%sMapper requested: %s Last mapper: %s Mapping policy: %s Ranking policy: %s PPR: %s Cpus-per-rank: %d", pfx2, (NULL == src->req_mapper) ? "NULL" : src->req_mapper, (NULL == src->last_mapper) ? "NULL" : src->last_mapper, orte_rmaps_base_print_mapping(src->mapping), orte_rmaps_base_print_ranking(src->ranking), (NULL == src->ppr) ? "NULL" : src->ppr, (int)src->cpus_per_rank); #endif if (ORTE_VPID_INVALID == src->daemon_vpid_start) { asprintf(&tmp2, "%s\n%sNum new daemons: %ld\tNew daemon starting vpid INVALID\n%sNum nodes: %ld", tmp, pfx, (long)src->num_new_daemons, pfx, (long)src->num_nodes); } else { asprintf(&tmp2, "%s\n%sNum new daemons: %ld\tNew daemon starting vpid %ld\n%sNum nodes: %ld", tmp, pfx, (long)src->num_new_daemons, (long)src->daemon_vpid_start, pfx, (long)src->num_nodes); } free(tmp); tmp = tmp2; } else { /* this is being printed for a user, so let's make it easier to see */ asprintf(&tmp, "\n%s======================== JOB MAP ========================", pfx2); } for (i=0; i < src->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(src->nodes, i))) { continue; } if (ORTE_SUCCESS != (rc = opal_dss.print(&tmp2, pfx2, node, ORTE_NODE))) { ORTE_ERROR_LOG(rc); free(pfx); free(tmp); return rc; } asprintf(&tmp3, "%s\n%s", tmp, tmp2); free(tmp); free(tmp2); tmp = tmp3; } if (!orte_devel_level_output) { /* this is being printed for a user, so let's make it easier to see */ asprintf(&tmp2, "%s\n\n%s=============================================================\n", tmp, pfx2); free(tmp); tmp = tmp2; } free(pfx2); /* set the return */ *output = tmp; free(pfx); return ORTE_SUCCESS; }
/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ static int orte_rmaps_base_open(mca_base_open_flag_t flags) { int rc; /* init the globals */ OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t); orte_rmaps_base.ppr = NULL; orte_rmaps_base.slot_list = NULL; orte_rmaps_base.mapping = 0; orte_rmaps_base.ranking = 0; #if OPAL_HAVE_HWLOC /* if a topology file was given, then set our topology * from it. Even though our actual topology may differ, * mpirun only needs to see the compute node topology * for mapping purposes */ if (NULL != rmaps_base_topo_file) { if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file); return ORTE_ERR_SILENT; } } #endif if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping, &orte_rmaps_base.device, rmaps_base_mapping_policy))) { return rc; } if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking, orte_rmaps_base.mapping, rmaps_base_ranking_policy))) { return rc; } if (rmaps_base_byslot) { /* set mapping policy to byslot - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to byslot - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (rmaps_base_bynode) { /* set mapping policy to bynode - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to bynode - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } /* Should we schedule on the local node or not? */ if (rmaps_base_no_schedule_local) { orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL; } /* Should we oversubscribe or not? */ if (rmaps_base_no_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } /** force oversubscription permission */ if (rmaps_base_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } /* should we display a detailed (developer-quality) version of the map after determining it? */ if (rmaps_base_display_devel_map) { orte_rmaps_base.display_map = true; orte_devel_level_output = true; } /* should we display a diffable report of proc locations after determining it? */ if (rmaps_base_display_diffable_map) { orte_rmaps_base.display_map = true; orte_display_diffable_output = true; } /* Open up all available components */ rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags); /* check to see if any component indicated a problem */ if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { /* the component would have already reported the error, so * tell the rest of the chain to shut up */ return ORTE_ERR_SILENT; } /* All done */ return rc; }
static int assign_locations(orte_job_t *jdata) { int i, j, m, n; mca_base_component_t *c=&mca_rmaps_ppr_component.base_version; orte_node_t *node; orte_proc_t *proc; orte_app_context_t *app; hwloc_obj_type_t level; hwloc_obj_t obj; unsigned int cache_level=0; int ppr, cnt, nobjs, nprocs_mapped; char **ppr_req, **ck; if (NULL == jdata->map->last_mapper || 0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr assign: %s", ORTE_JOBID_PRINT(jdata->jobid), (NULL == jdata->map->last_mapper) ? "NULL" : jdata->map->last_mapper); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: assigning locations for job %s with ppr %s policy %s", ORTE_JOBID_PRINT(jdata->jobid), jdata->map->ppr, orte_rmaps_base_print_mapping(jdata->map->mapping)); /* pickup the object level */ if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_MACHINE; } else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_PU; } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_CORE; } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_SOCKET; } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_L1CACHE; cache_level = 1; } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_L2CACHE; cache_level = 2; } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_L3CACHE; cache_level = 3; } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { level = HWLOC_OBJ_NUMANODE; } else { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_TAKE_NEXT_OPTION; } /* get the ppr value */ ppr_req = opal_argv_split(jdata->map->ppr, ','); ck = opal_argv_split(ppr_req[0], ':'); ppr = strtol(ck[0], NULL, 10); opal_argv_free(ck); opal_argv_free(ppr_req); /* start assigning procs to objects, filling each object as we go until * all procs are assigned. */ for (n=0; n < jdata->apps->size; n++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { continue; } nprocs_mapped = 0; for (m=0; m < jdata->map->nodes->size; m++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { continue; } if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); return ORTE_ERR_SILENT; } if (HWLOC_OBJ_MACHINE == level) { obj = hwloc_get_root_obj(node->topology->topo); for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } if (proc->name.jobid != jdata->jobid) { continue; } orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } } else { /* get the number of resources on this node at this level */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, level, cache_level, OPAL_HWLOC_AVAILABLE); /* map the specified number of procs to each such resource on this node, * recording the locale of each proc so we know its cpuset */ for (i=0; i < nobjs; i++) { cnt = 0; obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, level, cache_level, i, OPAL_HWLOC_AVAILABLE); for (j=0; j < node->procs->size && cnt < ppr && nprocs_mapped < app->num_procs; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } if (proc->name.jobid != jdata->jobid) { continue; } /* if we already assigned it, then skip */ if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, NULL, OPAL_PTR)) { continue; } nprocs_mapped++; cnt++; orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } } } } } return ORTE_SUCCESS; }
/* stuff proc attributes for sending back to a proc */ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force) { int rc; orte_proc_t *pptr; int i, k, n; opal_list_t *info, *pmap; opal_value_t *kv; orte_node_t *node, *mynode; opal_vpid_t vpid; char **list, **procs, **micro, *tmp, *regex; orte_job_t *dmns; orte_job_map_t *map; orte_app_context_t *app; uid_t uid; gid_t gid; opal_list_t *cache; hwloc_obj_t machine; opal_output_verbose(2, orte_pmix_server_globals.output, "%s register nspace for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)); /* setup the info list */ info = OBJ_NEW(opal_list_t); uid = geteuid(); gid = getegid(); /* pass our nspace/rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_NSPACE); kv->data.string = strdup(ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); kv->type = OPAL_STRING; opal_list_append(info, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_RANK); kv->data.uint32 = ORTE_PROC_MY_NAME->vpid; kv->type = OPAL_UINT32; opal_list_append(info, &kv->super); /* jobid */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOBID); kv->data.string = strdup(ORTE_JOBID_PRINT(jdata->jobid)); kv->type = OPAL_STRING; opal_list_append(info, &kv->super); /* offset */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NPROC_OFFSET); kv->data.uint32 = jdata->offset; kv->type = OPAL_UINT32; opal_list_append(info, &kv->super); /* check for cached values to add to the job info */ cache = NULL; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && NULL != cache) { while (NULL != (kv = (opal_value_t*)opal_list_remove_first(cache))) { opal_list_append(info, &kv->super); } orte_remove_attribute(&jdata->attributes, ORTE_JOB_INFO_CACHE); OBJ_RELEASE(cache); } /* assemble the node and proc map info */ list = NULL; procs = NULL; map = jdata->map; for (i=0; i < map->nodes->size; i++) { micro = NULL; if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { opal_argv_append_nosize(&list, node->name); /* assemble all the ranks for this job that are on this node */ for (k=0; k < node->procs->size; k++) { if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) { if (jdata->jobid == pptr->name.jobid) { opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid)); } } } /* assemble the rank/node map */ if (NULL != micro) { tmp = opal_argv_join(micro, ','); opal_argv_free(micro); opal_argv_append_nosize(&procs, tmp); free(tmp); } } } /* let the PMIx server generate the nodemap regex */ if (NULL != list) { tmp = opal_argv_join(list, ','); opal_argv_free(list); list = NULL; if (OPAL_SUCCESS != (rc = opal_pmix.generate_regex(tmp, ®ex))) { ORTE_ERROR_LOG(rc); free(tmp); OPAL_LIST_RELEASE(info); return rc; } free(tmp); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_MAP); kv->type = OPAL_STRING; kv->data.string = regex; opal_list_append(info, &kv->super); } /* let the PMIx server generate the procmap regex */ if (NULL != procs) { tmp = opal_argv_join(procs, ';'); opal_argv_free(procs); procs = NULL; if (OPAL_SUCCESS != (rc = opal_pmix.generate_ppn(tmp, ®ex))) { ORTE_ERROR_LOG(rc); free(tmp); OPAL_LIST_RELEASE(info); return rc; } free(tmp); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PROC_MAP); kv->type = OPAL_STRING; kv->data.string = regex; opal_list_append(info, &kv->super); } /* get our local node */ if (NULL == (dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, ORTE_PROC_MY_NAME->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } mynode = pptr->node; if (NULL == mynode) { /* cannot happen */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_LIST_RELEASE(info); return ORTE_ERR_NOT_FOUND; } /* pass our node ID */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODEID); kv->type = OPAL_UINT32; kv->data.uint32 = mynode->index; opal_list_append(info, &kv->super); /* pass our node size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = mynode->num_procs; opal_list_append(info, &kv->super); /* pass the number of nodes in the job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NUM_NODES); kv->type = OPAL_UINT32; kv->data.uint32 = map->num_nodes; opal_list_append(info, &kv->super); /* univ size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_UNIV_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->total_slots_alloc; opal_list_append(info, &kv->super); /* job size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOB_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_procs; opal_list_append(info, &kv->super); /* number of apps in this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOB_NUM_APPS); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_apps; opal_list_append(info, &kv->super); /* local size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->num_local_procs; opal_list_append(info, &kv->super); /* max procs */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_MAX_PROCS); kv->type = OPAL_UINT32; kv->data.uint32 = jdata->total_slots_alloc; opal_list_append(info, &kv->super); /* topology signature */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOPOLOGY_SIGNATURE); kv->type = OPAL_STRING; kv->data.string = strdup(orte_topo_signature); opal_list_append(info, &kv->super); /* total available physical memory */ machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, HWLOC_OBJ_MACHINE, NULL); if (NULL != machine) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_AVAIL_PHYS_MEMORY); kv->type = OPAL_UINT64; #if HWLOC_API_VERSION < 0x20000 kv->data.uint64 = machine->memory.total_memory; #else kv->data.uint64 = machine->total_memory; #endif opal_list_append(info, &kv->super); } /* pass the mapping policy used for this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_MAPBY); kv->type = OPAL_STRING; kv->data.string = strdup(orte_rmaps_base_print_mapping(jdata->map->mapping)); opal_list_append(info, &kv->super); /* pass the ranking policy used for this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANKBY); kv->type = OPAL_STRING; kv->data.string = strdup(orte_rmaps_base_print_ranking(jdata->map->ranking)); opal_list_append(info, &kv->super); /* pass the binding policy used for this job */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_BINDTO); kv->type = OPAL_STRING; kv->data.string = strdup(opal_hwloc_base_print_binding(jdata->map->binding)); opal_list_append(info, &kv->super); /* register any local clients */ vpid = ORTE_VPID_MAX; micro = NULL; for (i=0; i < mynode->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(mynode->procs, i))) { continue; } if (pptr->name.jobid == jdata->jobid) { opal_argv_append_nosize(µ, ORTE_VPID_PRINT(pptr->name.vpid)); if (pptr->name.vpid < vpid) { vpid = pptr->name.vpid; } /* go ahead and register this client */ if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid, (void*)pptr, NULL, NULL))) { ORTE_ERROR_LOG(rc); } } } if (NULL != micro) { /* pass the local peers */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_PEERS); kv->type = OPAL_STRING; kv->data.string = opal_argv_join(micro, ','); opal_argv_free(micro); opal_list_append(info, &kv->super); } /* pass the local ldr */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALLDR); kv->type = OPAL_VPID; kv->data.name.vpid = vpid; opal_list_append(info, &kv->super); /* for each proc in this job, create an object that * includes the info describing the proc so the recipient has a complete * picture. This allows procs to connect to each other without * any further info exchange, assuming the underlying transports * support it. We also pass all the proc-specific data here so * that each proc can lookup info about every other proc in the job */ for (n=0; n < map->nodes->size; n++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) { continue; } /* cycle across each proc on this node, passing all data that * varies by proc */ for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } /* only consider procs from this job */ if (pptr->name.jobid != jdata->jobid) { continue; } /* setup the proc map object */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PROC_DATA); kv->type = OPAL_PTR; kv->data.ptr = OBJ_NEW(opal_list_t); opal_list_append(info, &kv->super); pmap = kv->data.ptr; /* must start with rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->name.vpid; opal_list_append(pmap, &kv->super); /* location, for local procs */ if (node == mynode) { tmp = NULL; if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING) && NULL != tmp) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY_STRING); kv->type = OPAL_STRING; kv->data.string = opal_hwloc_base_get_locality_string(opal_hwloc_topology, tmp); opal_list_append(pmap, &kv->super); free(tmp); } else { /* the proc is not bound */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY_STRING); kv->type = OPAL_STRING; kv->data.string = NULL; opal_list_append(pmap, &kv->super); } } /* global/univ rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_GLOBAL_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->name.vpid + jdata->offset; opal_list_append(pmap, &kv->super); if (1 < jdata->num_apps) { /* appnum */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APPNUM); kv->type = OPAL_UINT32; kv->data.uint32 = pptr->app_idx; opal_list_append(pmap, &kv->super); /* app ldr */ app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APPLDR); kv->type = OPAL_VPID; kv->data.name.vpid = app->first_rank; opal_list_append(pmap, &kv->super); /* app rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APP_RANK); kv->type = OPAL_VPID; kv->data.name.vpid = pptr->app_rank; opal_list_append(pmap, &kv->super); /* app size */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_APP_SIZE); kv->type = OPAL_UINT32; kv->data.uint32 = app->num_procs; opal_list_append(info, &kv->super); } /* local rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_RANK); kv->type = OPAL_UINT16; kv->data.uint16 = pptr->local_rank; opal_list_append(pmap, &kv->super); /* node rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODE_RANK); kv->type = OPAL_UINT16; kv->data.uint32 = pptr->node_rank; opal_list_append(pmap, &kv->super); /* node ID */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_NODEID); kv->type = OPAL_UINT32; kv->data.uint32 = pptr->node->index; opal_list_append(pmap, &kv->super); if (map->num_nodes < orte_hostname_cutoff) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_HOSTNAME); kv->type = OPAL_STRING; kv->data.string = strdup(pptr->node->name); opal_list_append(pmap, &kv->super); } } } /* mark the job as registered */ orte_set_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); /* pass it down */ /* we are in an event, so no need to callback */ rc = opal_pmix.server_register_nspace(jdata->jobid, jdata->num_local_procs, info, NULL, NULL); OPAL_LIST_RELEASE(info); /* if the user has connected us to an external server, then we must * assume there is going to be some cross-mpirun exchange, and so * we protect against that situation by publishing the job info * for this job - this allows any subsequent "connect" to retrieve * the job info */ if (NULL != orte_data_server_uri) { opal_buffer_t buf; OBJ_CONSTRUCT(&buf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &jdata, 1, ORTE_JOB))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return rc; } info = OBJ_NEW(opal_list_t); /* create a key-value with the key being the string jobid * and the value being the byte object */ kv = OBJ_NEW(opal_value_t); orte_util_convert_jobid_to_string(&kv->key, jdata->jobid); kv->type = OPAL_BYTE_OBJECT; opal_dss.unload(&buf, (void**)&kv->data.bo.bytes, &kv->data.bo.size); OBJ_DESTRUCT(&buf); opal_list_append(info, &kv->super); /* set the range to be session */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_RANGE); kv->type = OPAL_UINT; kv->data.uint = OPAL_PMIX_RANGE_SESSION; opal_list_append(info, &kv->super); /* set the persistence to be app */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_PERSISTENCE); kv->type = OPAL_INT; kv->data.integer = OPAL_PMIX_PERSIST_APP; opal_list_append(info, &kv->super); /* add our effective userid to the directives */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_USERID); kv->type = OPAL_UINT32; kv->data.uint32 = geteuid(); opal_list_append(info, &kv->super); /* now publish it */ if (ORTE_SUCCESS != (rc = pmix_server_publish_fn(ORTE_PROC_MY_NAME, info, mycbfunc, info))) { ORTE_ERROR_LOG(rc); } } return rc; }