/* * Sequentially map the ranks according to the placement in the * specified hostfile */ static int orte_rmaps_seq_map(orte_job_t *jdata) { orte_job_map_t *map; orte_app_context_t *app; int i, n; orte_std_cntr_t j; opal_list_item_t *item; orte_node_t *node, *nd; seq_node_t *sq, *save=NULL, *seq;; orte_vpid_t vpid; orte_std_cntr_t num_nodes; int rc; opal_list_t default_seq_list; opal_list_t node_list, *seq_list, sq_list; orte_proc_t *proc; mca_base_component_t *c = &mca_rmaps_seq_component.base_version; char *hosts = NULL, *sep, *eptr; FILE *fp; opal_hwloc_resource_type_t rtype; OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output, "%s rmaps:seq called on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* this mapper can only handle initial launch * when seq mapping is desired - allow * restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s is being restarted - seq cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper) { if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s not using sequential mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } /* we need to process it */ goto process; } if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I don't know how to do these - defer */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s not using seq mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } process: opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* convenience def */ map = jdata->map; /* if there is a default hostfile, go and get its ordered list of nodes */ OBJ_CONSTRUCT(&default_seq_list, opal_list_t); if (NULL != orte_default_hostfile) { char *hstname = NULL; /* open the file */ fp = fopen(orte_default_hostfile, "r"); if (NULL == fp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto error; } while (NULL != (hstname = orte_getline(fp))) { if (0 == strlen(hstname)) { free(hstname); /* blank line - ignore */ continue; } if( '#' == hstname[0] ) { free(hstname); /* Comment line - ignore */ continue; } sq = OBJ_NEW(seq_node_t); if (NULL != (sep = strchr(hstname, ' '))) { *sep = '\0'; sep++; /* remove any trailing space */ eptr = sep + strlen(sep) - 1; while (eptr > sep && isspace(*eptr)) { eptr--; } *(eptr+1) = 0; sq->cpuset = strdup(sep); } // Strip off the FQDN if present, ignore IP addresses if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) { char *ptr; if (NULL != (ptr = strchr(hstname, '.'))) { *ptr = '\0'; } } sq->hostname = hstname; opal_list_append(&default_seq_list, &sq->super); } fclose(fp); } /* start at the beginning... */ vpid = 0; jdata->num_procs = 0; if (0 < opal_list_get_size(&default_seq_list)) { save = (seq_node_t*)opal_list_get_first(&default_seq_list); } /* default to LOGICAL processors */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_PHYSICAL_CPUIDS, NULL, OPAL_BOOL)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using PHYSICAL processors"); rtype = OPAL_HWLOC_PHYSICAL; } else { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using LOGICAL processors"); rtype = OPAL_HWLOC_LOGICAL; } /* initialize all the nodes as not included in this job map */ for (j=0; j < orte_node_pool->size; j++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } } /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* dash-host trumps hostfile */ if (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using dash-host nodes on app %s", app->app); OBJ_CONSTRUCT(&node_list, opal_list_t); /* dash host entries cannot specify cpusets, so used the std function to retrieve the list */ if (ORTE_SUCCESS != (rc = orte_util_get_ordered_dash_host_list(&node_list, hosts))) { ORTE_ERROR_LOG(rc); free(hosts); goto error; } free(hosts); /* transfer the list to a seq_node_t list */ OBJ_CONSTRUCT(&sq_list, opal_list_t); while (NULL != (nd = (orte_node_t*)opal_list_remove_first(&node_list))) { sq = OBJ_NEW(seq_node_t); sq->hostname = strdup(nd->name); opal_list_append(&sq_list, &sq->super); OBJ_RELEASE(nd); } OBJ_DESTRUCT(&node_list); seq_list = &sq_list; } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) { char *hstname; if (NULL == hosts) { rc = ORTE_ERR_NOT_FOUND; goto error; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using hostfile %s nodes on app %s", hosts, app->app); OBJ_CONSTRUCT(&sq_list, opal_list_t); /* open the file */ fp = fopen(hosts, "r"); if (NULL == fp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; OBJ_DESTRUCT(&sq_list); goto error; } while (NULL != (hstname = orte_getline(fp))) { if (0 == strlen(hstname)) { free(hstname); /* blank line - ignore */ continue; } if( '#' == hstname[0] ) { free(hstname); /* Comment line - ignore */ continue; } sq = OBJ_NEW(seq_node_t); if (NULL != (sep = strchr(hstname, ' '))) { *sep = '\0'; sep++; /* remove any trailing space */ eptr = sep + strlen(sep) - 1; while (eptr > sep && isspace(*eptr)) { eptr--; } *(eptr+1) = 0; sq->cpuset = strdup(sep); } // Strip off the FQDN if present, ignore IP addresses if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) { char *ptr; if (NULL != (ptr = strchr(hstname, '.'))) { (*ptr) = '\0'; } } sq->hostname = hstname; opal_list_append(&sq_list, &sq->super); } fclose(fp); free(hosts); seq_list = &sq_list; } else if (0 < opal_list_get_size(&default_seq_list)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using default hostfile nodes on app %s", app->app); seq_list = &default_seq_list; } else { /* can't do anything - no nodes available! */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* check for nolocal and remove the head node, if required */ if (map->mapping & ORTE_MAPPING_NO_USE_LOCAL) { for (item = opal_list_get_first(seq_list); item != opal_list_get_end(seq_list); item = opal_list_get_next(item) ) { seq = (seq_node_t*)item; /* need to check ifislocal because the name in the * hostfile may not have been FQDN, while name returned * by gethostname may have been (or vice versa) */ if (orte_ifislocal(seq->hostname)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: removing head node %s", seq->hostname); opal_list_remove_item(seq_list, item); OBJ_RELEASE(item); /* "un-retain" it */ } } } if (NULL == seq_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(seq_list))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* if num_procs wasn't specified, set it now */ if (0 == app->num_procs) { app->num_procs = num_nodes; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: setting num procs to %s for app %s", ORTE_VPID_PRINT(app->num_procs), app->app); } else if (num_nodes < app->num_procs) { orte_show_help("help-orte-rmaps-base.txt", "seq:not-enough-resources", true, app->num_procs, num_nodes); return ORTE_ERR_SILENT; } if (seq_list == &default_seq_list) { sq = save; } else { sq = (seq_node_t*)opal_list_get_first(seq_list); } for (n=0; n < app->num_procs; n++) { /* find this node on the global array - this is necessary so * that our mapping gets saved on that array as the objects * returned by the hostfile function are -not- on the array */ node = NULL; for (j=0; j < orte_node_pool->size; j++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { continue; } if (0 == strcmp(sq->hostname, node->name)) { break; } } if (NULL == node) { /* wasn't found - that is an error */ orte_show_help("help-orte-rmaps-seq.txt", "orte-rmaps-seq:resource-not-found", true, sq->hostname); rc = ORTE_ERR_SILENT; goto error; } /* ensure the node is in the map */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { OBJ_RETAIN(node); opal_pointer_array_add(map->nodes, node); jdata->map->num_nodes++; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); } proc = orte_rmaps_base_setup_proc(jdata, node, i); if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED); /* check for permission */ if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { /* if we weren't given a directive either way, then we will error out * as the #slots were specifically given, either by the host RM or * via hostfile/dash-host */ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { /* if we were explicitly told not to oversubscribe, then don't */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } } } /* assign the vpid */ proc->name.vpid = vpid++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: assign proc %s to node %s for app %s", ORTE_VPID_PRINT(proc->name.vpid), sq->hostname, app->app); /* record the cpuset, if given */ if (NULL != sq->cpuset) { hwloc_cpuset_t bitmap; char *cpu_bitmap; if (NULL == node->topology) { /* not allowed - for sequential cpusets, we must have * the topology info */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name); rc = ORTE_ERR_SILENT; goto error; } /* if we are using hwthreads as cpus and binding to hwthreads, then * we can just copy the cpuset across as it already specifies things * at that level */ if (opal_hwloc_use_hwthreads_as_cpus && OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { cpu_bitmap = strdup(sq->cpuset); } else { /* setup the bitmap */ bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the socket and core */ if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(sq->cpuset, node->topology, rtype, bitmap))) { ORTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; } /* note that we cannot set the proc locale to any specific object * as the slot list may have assigned it to more than one - so * leave that field NULL */ /* set the proc to the specified map */ hwloc_bitmap_list_asprintf(&cpu_bitmap, bitmap); hwloc_bitmap_free(bitmap); } orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: binding proc %s to cpuset %s bitmap %s", ORTE_VPID_PRINT(proc->name.vpid), sq->cpuset, cpu_bitmap); /* we are going to bind to cpuset since the user is specifying the cpus */ OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CPUSET); /* note that the user specified the mapping */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYUSER); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); /* cleanup */ free(cpu_bitmap); } else { hwloc_obj_t locale; /* assign the locale - okay for the topo to be null as * it just means it wasn't returned */ if (NULL != node->topology) { locale = hwloc_get_root_obj(node->topology); orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, locale, OPAL_PTR); } } /* add to the jdata proc array */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { ORTE_ERROR_LOG(rc); goto error; } /* move to next node */ sq = (seq_node_t*)opal_list_get_next(&sq->super); } /** track the total number of processes we mapped */ jdata->num_procs += app->num_procs; /* cleanup the node list if it came from this app_context */ if (seq_list != &default_seq_list) { OPAL_LIST_DESTRUCT(seq_list); } else { save = sq; } } return ORTE_SUCCESS; error: OPAL_LIST_DESTRUCT(&default_seq_list); return rc; }
/* * Sequentially map the ranks according to the placement in the * specified hostfile */ static int orte_rmaps_seq_map(orte_job_t *jdata) { orte_job_map_t *map; orte_app_context_t *app; int i, n; orte_std_cntr_t j; opal_list_item_t *item; orte_node_t *node, *nd, *save=NULL; orte_vpid_t vpid; orte_std_cntr_t num_nodes; int rc; opal_list_t *default_node_list=NULL; opal_list_t *node_list=NULL; orte_proc_t *proc; mca_base_component_t *c = &mca_rmaps_seq_component.base_version; OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, "%s rmaps:seq mapping job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* this mapper can only handle initial launch * when seq mapping is desired - allow * restarting of failed apps */ if (ORTE_JOB_CONTROL_RESTART & jdata->controls) { opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:seq: job %s is being restarted - seq cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper && 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:seq: job %s not using sequential mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I don't know how to do these - defer */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:seq: job %s not using seq mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:seq: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* conveniece def */ map = jdata->map; /* if there is a default hostfile, go and get its ordered list of nodes */ if (NULL != orte_default_hostfile) { default_node_list = OBJ_NEW(opal_list_t); if (ORTE_SUCCESS != (rc = orte_util_get_ordered_host_list(default_node_list, orte_default_hostfile))) { ORTE_ERROR_LOG(rc); goto error; } } /* start at the beginning... */ vpid = 0; jdata->num_procs = 0; if (NULL != default_node_list) { save = (orte_node_t*)opal_list_get_first(default_node_list); } /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* dash-host trumps hostfile */ if (NULL != app->dash_host) { node_list = OBJ_NEW(opal_list_t); if (ORTE_SUCCESS != (rc = orte_util_get_ordered_dash_host_list(node_list, app->dash_host))) { ORTE_ERROR_LOG(rc); goto error; } nd = (orte_node_t*)opal_list_get_first(node_list); } else if (NULL != app->hostfile) { node_list = OBJ_NEW(opal_list_t); if (ORTE_SUCCESS != (rc = orte_util_get_ordered_host_list(node_list, app->hostfile))) { ORTE_ERROR_LOG(rc); goto error; } nd = (orte_node_t*)opal_list_get_first(node_list); } else if (NULL != default_node_list) { node_list = default_node_list; nd = save; } else { /* can't do anything - no nodes available! */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* check for nolocal and remove the head node, if required */ if (map->mapping & ORTE_MAPPING_NO_USE_LOCAL) { for (item = opal_list_get_first(node_list); item != opal_list_get_end(node_list); item = opal_list_get_next(item) ) { node = (orte_node_t*)item; /* need to check ifislocal because the name in the * hostfile may not have been FQDN, while name returned * by gethostname may have been (or vice versa) */ if (opal_ifislocal(node->name)) { opal_list_remove_item(node_list, item); OBJ_RELEASE(item); /* "un-retain" it */ } } } if (NULL == node_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(node_list))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* if num_procs wasn't specified, set it now */ if (0 == app->num_procs) { app->num_procs = num_nodes; } for (n=0; n < app->num_procs; n++) { /* find this node on the global array - this is necessary so * that our mapping gets saved on that array as the objects * returned by the hostfile function are -not- on the array */ node = NULL; for (j=0; j < orte_node_pool->size; j++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { continue; } if (0 == strcmp(nd->name, node->name)) { break; } } if (NULL == node) { /* wasn't found - that is an error */ orte_show_help("help-orte-rmaps-seq.txt", "orte-rmaps-seq:resource-not-found", true, nd->name); rc = ORTE_ERR_SILENT; goto error; } /* ensure the node is in the map */ if (!node->mapped) { OBJ_RETAIN(node); opal_pointer_array_add(map->nodes, node); node->mapped = true; } proc = orte_rmaps_base_setup_proc(jdata, node, i); if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ node->oversubscribed = true; } /* assign the vpid */ proc->name.vpid = vpid++; #if OPAL_HAVE_HWLOC /* assign the locale - okay for the topo to be null as * it just means it wasn't returned */ if (NULL != node->topology) { proc->locale = hwloc_get_root_obj(node->topology); } #endif /* add to the jdata proc array */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { ORTE_ERROR_LOG(rc); goto error; } /* move to next node */ nd = (orte_node_t*)opal_list_get_next((opal_list_item_t*)nd); } /** track the total number of processes we mapped */ jdata->num_procs += app->num_procs; /* cleanup the node list if it came from this app_context */ if (node_list != default_node_list) { while (NULL != (item = opal_list_remove_first(node_list))) { OBJ_RELEASE(item); } OBJ_RELEASE(node_list); } else { save = nd; } } return ORTE_SUCCESS; error: if (NULL != default_node_list) { while (NULL != (item = opal_list_remove_first(default_node_list))) { OBJ_RELEASE(item); } OBJ_RELEASE(default_node_list); } if (NULL != node_list) { while (NULL != (item = opal_list_remove_first(node_list))) { OBJ_RELEASE(item); } OBJ_RELEASE(node_list); } return rc; }
static int ppr_mapper(orte_job_t *jdata) { int rc = ORTE_SUCCESS, j, n; mca_base_component_t *c=&mca_rmaps_ppr_component.base_version; orte_node_t *node; orte_proc_t *proc; orte_app_context_t *app; orte_vpid_t total_procs, nprocs_mapped; opal_hwloc_level_t start=OPAL_HWLOC_NODE_LEVEL; #if OPAL_HAVE_HWLOC hwloc_obj_t obj; hwloc_obj_type_t lowest; unsigned cache_level=0; unsigned int nobjs, i; bool pruning_reqd = false; opal_hwloc_level_t level; #endif opal_list_t node_list; opal_list_item_t *item; orte_std_cntr_t num_slots; orte_app_idx_t idx; char **ppr_req, **ck; size_t len; bool initial_map=true; /* only handle initial launch of loadbalanced * or NPERxxx jobs - allow restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s being restarted - ppr cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper && 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL == jdata->map->ppr || ORTE_MAPPING_PPR != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* not for us */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: mapping job %s with ppr %s", ORTE_JOBID_PRINT(jdata->jobid), jdata->map->ppr); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* initialize */ memset(ppr, 0, OPAL_HWLOC_HWTHREAD_LEVEL * sizeof(opal_hwloc_level_t)); /* parse option */ n=0; ppr_req = opal_argv_split(jdata->map->ppr, ','); for (j=0; NULL != ppr_req[j]; j++) { /* split on the colon */ ck = opal_argv_split(ppr_req[j], ':'); if (2 != opal_argv_count(ck)) { /* must provide a specification */ orte_show_help("help-orte-rmaps-ppr.txt", "invalid-ppr", true, jdata->map->ppr); opal_argv_free(ppr_req); opal_argv_free(ck); return ORTE_ERR_SILENT; } len = strlen(ck[1]); if (0 == strncasecmp(ck[1], "node", len)) { ppr[OPAL_HWLOC_NODE_LEVEL] = strtol(ck[0], NULL, 10); ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYNODE); start = OPAL_HWLOC_NODE_LEVEL; n++; #if OPAL_HAVE_HWLOC } else if (0 == strncasecmp(ck[1], "hwthread", len) || 0 == strncasecmp(ck[1], "thread", len)) { ppr[OPAL_HWLOC_HWTHREAD_LEVEL] = strtol(ck[0], NULL, 10); start = OPAL_HWLOC_HWTHREAD_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYHWTHREAD); n++; } else if (0 == strncasecmp(ck[1], "core", len)) { ppr[OPAL_HWLOC_CORE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_CORE_LEVEL) { start = OPAL_HWLOC_CORE_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYCORE); } n++; } else if (0 == strncasecmp(ck[1], "socket", len) || 0 == strncasecmp(ck[1], "skt", len)) { ppr[OPAL_HWLOC_SOCKET_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_SOCKET_LEVEL) { start = OPAL_HWLOC_SOCKET_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSOCKET); } n++; } else if (0 == strncasecmp(ck[1], "l1cache", len)) { ppr[OPAL_HWLOC_L1CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L1CACHE_LEVEL) { start = OPAL_HWLOC_L1CACHE_LEVEL; cache_level = 1; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL1CACHE); } n++; } else if (0 == strncasecmp(ck[1], "l2cache", len)) { ppr[OPAL_HWLOC_L2CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L2CACHE_LEVEL) { start = OPAL_HWLOC_L2CACHE_LEVEL; cache_level = 2; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL2CACHE); } n++; } else if (0 == strncasecmp(ck[1], "l3cache", len)) { ppr[OPAL_HWLOC_L3CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L3CACHE_LEVEL) { start = OPAL_HWLOC_L3CACHE_LEVEL; cache_level = 3; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL3CACHE); } n++; } else if (0 == strncasecmp(ck[1], "numa", len)) { ppr[OPAL_HWLOC_NUMA_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_NUMA_LEVEL) { start = OPAL_HWLOC_NUMA_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYNUMA); } n++; #endif } else { /* unknown spec */ orte_show_help("help-orte-rmaps-ppr.txt", "unrecognized-ppr-option", true, ck[1], jdata->map->ppr); opal_argv_free(ppr_req); opal_argv_free(ck); return ORTE_ERR_SILENT; } opal_argv_free(ck); } opal_argv_free(ppr_req); /* if nothing was given, that's an error */ if (0 == n) { opal_output(0, "NOTHING GIVEN"); return ORTE_ERR_SILENT; } #if OPAL_HAVE_HWLOC /* if more than one level was specified, then pruning will be reqd */ if (1 < n) { pruning_reqd = true; } #endif opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s assigned policy %s", ORTE_JOBID_PRINT(jdata->jobid), orte_rmaps_base_print_mapping(jdata->map->mapping)); #if OPAL_HAVE_HWLOC /* convenience */ level = start; lowest = opal_hwloc_levels[start]; #endif for (idx=0; idx < (orte_app_idx_t)jdata->apps->size; idx++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, idx))) { continue; } /* if the number of total procs was given, set that * limit - otherwise, set to max so we simply fill * all the nodes with the pattern */ if (0 < app->num_procs) { total_procs = app->num_procs; } else { total_procs = ORTE_VPID_MAX; } /* get the available nodes */ OBJ_CONSTRUCT(&node_list, opal_list_t); if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ initial_map = false; /* if a bookmark exists from some prior mapping, set us to start there */ jdata->bookmark = orte_rmaps_base_get_starting_point(&node_list, jdata); /* cycle across the nodes */ nprocs_mapped = 0; for (item = opal_list_get_first(&node_list); item != opal_list_get_end(&node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; #if OPAL_HAVE_HWLOC /* bozo check */ if (NULL == node->topology) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); rc = ORTE_ERR_SILENT; goto error; } #endif /* add the node to the map, if needed */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); goto error; } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } /* if we are mapping solely at the node level, just put * that many procs on this node */ if (OPAL_HWLOC_NODE_LEVEL == start) { #if OPAL_HAVE_HWLOC obj = hwloc_get_root_obj(node->topology); #endif for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto error; } nprocs_mapped++; #if OPAL_HAVE_HWLOC orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); #endif } #if OPAL_HAVE_HWLOC } else { /* get the number of lowest resources on this node */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, lowest, cache_level, OPAL_HWLOC_AVAILABLE); /* map the specified number of procs to each such resource on this node, * recording the locale of each proc so we know its cpuset */ for (i=0; i < nobjs; i++) { obj = opal_hwloc_base_get_obj_by_type(node->topology, lowest, cache_level, i, OPAL_HWLOC_AVAILABLE); for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto error; } nprocs_mapped++; orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } } if (pruning_reqd) { /* go up the ladder and prune the procs according to * the specification, adjusting the count of procs on the * node as we go */ level--; prune(jdata->jobid, idx, node, &level, &nprocs_mapped); } #endif } /* set the total slots used */ if ((int)node->num_procs <= node->slots) { node->slots_inuse = (int)node->num_procs; } else { node->slots_inuse = node->slots; } /* if no-oversubscribe was specified, check to see if * we have violated the total slot specification - regardless, * if slots_max was given, we are not allowed to violate it! */ if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); } /* if we haven't mapped all the procs, continue on to the * next node */ if (total_procs == nprocs_mapped) { break; } } if (0 == app->num_procs) { app->num_procs = nprocs_mapped; } if (ORTE_VPID_MAX != total_procs && nprocs_mapped < total_procs) { /* couldn't map them all */ orte_show_help("help-orte-rmaps-ppr.txt", "ppr-too-many-procs", true, app->app, app->num_procs, jdata->map->ppr); rc = ORTE_ERR_SILENT; goto error; } /* compute vpids and add proc objects to the job */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); goto error; } /* track the total number of processes we mapped - must update * this AFTER we compute vpids so that computation is done * correctly */ jdata->num_procs += app->num_procs; while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); } return ORTE_SUCCESS; error: while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); return rc; }
int orte_rmaps_rr_byslot(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { int rc, i, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0; #if OPAL_HAVE_HWLOC hwloc_obj_t obj=NULL; #endif float balance; bool add_one=false; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by slot for job %s slots %d num_procs %lu", ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs); /* check to see if we can map all the procs */ if (num_slots < ((int)app->num_procs * orte_rmaps_base.cpus_per_rank)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); return ORTE_ERR_SILENT; } } /* first pass: map the number of procs to each node until we * map all specified procs or use all allocated slots */ nprocs_mapped = 0; OPAL_LIST_FOREACH(node, node_list, orte_node_t) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot working node %s", node->name); #if OPAL_HAVE_HWLOC /* get the root object as we are not assigning * locale here except at the node level */ if (NULL != node->topology) { obj = hwloc_get_root_obj(node->topology); } #endif if (node->slots <= node->slots_inuse) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot node %s is full - skipping", node->name); continue; } /* assign a number of procs equal to the number of available * slots divided by the number of cpus/rank the user * requested */ num_procs_to_assign = (node->slots - node->slots_inuse) / orte_rmaps_base.cpus_per_rank; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot assigning %d procs to node %s", (int)num_procs_to_assign, node->name); for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { /* add this node to the map - do it only once */ if (!node->mapped) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; #if OPAL_HAVE_HWLOC proc->locale = obj; #endif } }
int orte_rmaps_rr_byslot(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { int rc, i, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0; #if OPAL_HAVE_HWLOC hwloc_obj_t obj=NULL; #endif float balance; bool add_one=false; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by slot for job %s slots %d num_procs %lu", ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs); /* check to see if we can map all the procs */ if (num_slots < ((int)app->num_procs * orte_rmaps_base.cpus_per_rank)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); return ORTE_ERR_SILENT; } #if OPAL_HAVE_HWLOC /* if we will and are allowed to oversubscribe, and binding was given, then * we really should warn the user that we cannot bind */ if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { if ((OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(jdata->map->binding) || OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(jdata->map->binding)) && !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { /* RHC: don't emit this warning at this time while we try to * determine the best path forward. See * https://svn.open-mpi.org/trac/ompi/ticket/4345 * for an explanation orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:oversubscribed", true, num_slots, app->num_procs * orte_rmaps_base.cpus_per_rank); OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE); */ } } else { /* don't default to bound */ OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE); } #endif } /* first pass: map the number of procs to each node until we * map all specified procs or use all allocated slots */ nprocs_mapped = 0; OPAL_LIST_FOREACH(node, node_list, orte_node_t) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot working node %s", node->name); #if OPAL_HAVE_HWLOC /* get the root object as we are not assigning * locale here except at the node level */ if (NULL != node->topology) { obj = hwloc_get_root_obj(node->topology); } #endif if (node->slots <= node->slots_inuse) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot node %s is full - skipping", node->name); continue; } /* assign a number of procs equal to the number of available * slots divided by the number of cpus/rank the user * requested */ num_procs_to_assign = (node->slots - node->slots_inuse) / orte_rmaps_base.cpus_per_rank; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot assigning %d procs to node %s", (int)num_procs_to_assign, node->name); for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { /* add this node to the map - do it only once */ if (!node->mapped) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; #if OPAL_HAVE_HWLOC proc->locale = obj; #endif } }
int orte_rmaps_rr_byslot(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { int i, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0; hwloc_obj_t obj=NULL; float balance; bool add_one=false; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by slot for job %s slots %d num_procs %lu", ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs); /* check to see if we can map all the procs */ if (num_slots < (int)app->num_procs) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } } /* first pass: map the number of procs to each node until we * map all specified procs or use all allocated slots */ nprocs_mapped = 0; OPAL_LIST_FOREACH(node, node_list, orte_node_t) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot working node %s", node->name); /* get the root object as we are not assigning * locale here except at the node level */ if (NULL != node->topology && NULL != node->topology->topo) { obj = hwloc_get_root_obj(node->topology->topo); } if (node->slots <= node->slots_inuse) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot node %s is full - skipping", node->name); continue; } /* assign a number of procs equal to the number of available slots */ num_procs_to_assign = node->slots - node->slots_inuse; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot assigning %d procs to node %s", (int)num_procs_to_assign, node->name); for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { /* add this node to the map - do it only once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); ++(jdata->map->num_nodes); } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } }
static int byobj_span(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs, hwloc_obj_type_t target, unsigned cache_level) { int i, j, nprocs_mapped, lag, delta, navg; orte_node_t *node; orte_proc_t *proc; opal_list_item_t *item; int num_procs_to_assign, nperobj, nprocs, nxtra_objs=0; int extra_procs_to_assign=0, nxtra_nodes=0, idx; hwloc_obj_t obj=NULL; unsigned int nobjs; float balance; bool add_one=false; bool oversubscribed=false; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping span by %s for job %s slots %d num_procs %lu", hwloc_obj_type_string(target), ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs); /* quick check to see if we can map all the procs - can't * do more because we don't know how many total objects exist * across all the nodes */ if (num_slots < (int)app->num_procs) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); return ORTE_ERR_SILENT; } oversubscribed = true; } /* divide the procs evenly across all nodes - this is the * average we have to maintain as we go, but we adjust * the number on each node to reflect its available slots. * Obviously, if all nodes have the same number of slots, * then the avg is what we get on each node - this is * the most common situation. */ navg = app->num_procs / opal_list_get_size(node_list); if (0 == navg) { /* if there are less procs than nodes, we have to * place at least one/node */ navg = 1; } /* compute how many extra procs to put on each node */ balance = (float)((jdata->num_procs + app->num_procs) - (navg * opal_list_get_size(node_list))) / (float)opal_list_get_size(node_list); extra_procs_to_assign = (int)balance; if (0 < (balance - (float)extra_procs_to_assign)) { /* compute how many nodes need an extra proc */ nxtra_nodes = (jdata->num_procs + app->num_procs) - ((navg + extra_procs_to_assign) * opal_list_get_size(node_list)); /* add one so that we add an extra proc to the first nodes * until all procs are mapped */ extra_procs_to_assign++; /* flag that we added one */ add_one = true; } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by %s navg %d extra_procs %d extra_nodes %d", hwloc_obj_type_string(target), navg, extra_procs_to_assign, nxtra_nodes); nprocs_mapped = 0; lag = 0; for (item = opal_list_get_first(node_list); item != opal_list_get_end(node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; /* bozo check */ if (NULL == node->topology) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); return ORTE_ERR_SILENT; } /* add this node to the map, if reqd */ if (!node->mapped) { if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(idx); return idx; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } /* compute the number of procs to go on this node */ if (add_one) { if (0 == nxtra_nodes) { --extra_procs_to_assign; add_one = false; } else { --nxtra_nodes; } } if (oversubscribed) { /* everybody just takes their share */ num_procs_to_assign = navg + extra_procs_to_assign; } else { /* if we are not oversubscribed, then there are enough * slots to handle all the procs. However, not every * node will have the same number of slots, so we * have to track how many procs to "shift" elsewhere * to make up the difference */ if (node->slots <= node->slots_inuse) { /* if there are no extras to take, then we can * safely remove this node as we don't need it */ if (0 == extra_procs_to_assign) { opal_pointer_array_set_item(jdata->map->nodes, node->index, NULL); OBJ_RELEASE(node); --(jdata->map->num_nodes); /* update how many we are lagging behind */ lag += navg; continue; } /* everybody has to take at least the extras */ num_procs_to_assign = extra_procs_to_assign; /* update how many we are lagging behind */ lag += navg; } else { /* if slots < avg, then take all */ if ((node->slots - node->slots_inuse) < navg) { num_procs_to_assign = (node->slots - node->slots_inuse) + extra_procs_to_assign; /* update how many we are lagging behind */ lag += navg - (node->slots - node->slots_inuse); } else { /* take the avg plus as much of the "lag" as we can */ delta = 0; if (0 < lag) { delta = (node->slots - node->slots_inuse) - navg; if (lag < delta) { delta = lag; } lag -= delta; } num_procs_to_assign = navg + delta + extra_procs_to_assign; } } } /* get the number of objects of this type on this node */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: found %d objs on node %s", nobjs, node->name); /* compute the number of procs to go on each object */ nperobj = num_procs_to_assign / nobjs; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: placing %d procs on each object", nperobj); if ((int)(nperobj * nobjs) < num_procs_to_assign) { /* compute how many objs need an extra proc */ nxtra_objs = num_procs_to_assign - nperobj * nobjs; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: adding 1 extra proc to the first %d objects, if needed", nxtra_objs); } /* loop through the number of objects */ for (i=0; i < (int)nobjs && nprocs_mapped < (int)app->num_procs; i++) { /* get the hwloc object */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology, target, cache_level, i, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } /* map the reqd number of procs */ if (0 < nxtra_objs) { nprocs = nperobj + 1; --nxtra_objs; } else { nprocs = nperobj; } for (j=0; j < nprocs && nprocs_mapped < app->num_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; proc->locale = obj; } /* keep track of the node we last used */ jdata->bookmark = node; } /* not all nodes are equal, so only set oversubscribed for * this node if it is in that state */ if (node->slots < (int)node->num_procs) { /* flag the node as oversubscribed so that sched-yield gets * properly set */ node->oversubscribed = true; } if (nprocs_mapped == app->num_procs) { /* we are done */ break; } } return ORTE_SUCCESS; }
/* mapping by hwloc object looks a lot like mapping by node, * but has the added complication of possibly having different * numbers of objects on each node */ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs, hwloc_obj_type_t target, unsigned cache_level) { int i, j, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; opal_list_item_t *item; int num_procs_to_assign, nperobj, nprocs, nxtra_objs=0; int extra_procs_to_assign=0, nxtra_nodes=0, idx; hwloc_obj_t obj=NULL; unsigned int nobjs; float balance; bool add_one=false; /* there are two modes for mapping by object: span and not-span. The * span mode essentially operates as if there was just a single * "super-node" in the system - i.e., it balances the load across * all objects of the indicated type regardless of their location. * In essence, it acts as if we placed one proc on each object, cycling * across all objects on all nodes, and then wrapped around to place * another proc on each object, doing so until all procs were placed. * * In contrast, the non-span mode operates similar to byslot mapping. * All slots on each node are filled, assigning each proc to an object * on that node in a balanced fashion, and then the mapper moves on * to the next node. Thus, procs tend to be "front loaded" onto the * list of nodes, as opposed to being "load balanced" in the span mode */ if (ORTE_MAPPING_SPAN & jdata->map->mapping) { return byobj_span(jdata, app, node_list, num_slots, num_procs, target, cache_level); } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping no-span by %s for job %s slots %d num_procs %lu", hwloc_obj_type_string(target), ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs); /* quick check to see if we can map all the procs - can't * do more because we don't know how many total objects exist * across all the nodes */ if (num_slots < (app->num_procs * orte_rmaps_base.cpus_per_rank)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); return ORTE_ERR_SILENT; } /* compute how many extra procs to put on each node */ if (1 == opal_list_get_size(node_list)) { /* if there is only one node, then they all have to go on it */ extra_procs_to_assign = app->num_procs; } else { balance = (float)(((jdata->num_procs + app->num_procs)*orte_rmaps_base.cpus_per_rank) - num_slots) / (float)opal_list_get_size(node_list); extra_procs_to_assign = (int)balance; if (0 < (balance - (float)extra_procs_to_assign)) { /* compute how many nodes need an extra proc */ nxtra_nodes = ((jdata->num_procs + app->num_procs)*orte_rmaps_base.cpus_per_rank) - num_slots - (extra_procs_to_assign * opal_list_get_size(node_list)); /* add one so that we add an extra proc to the first nodes * until all procs are mapped */ extra_procs_to_assign++; /* flag that we added one */ add_one = true; } } } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping no-span by %s extra_procs %d extra_nodes %d", hwloc_obj_type_string(target), extra_procs_to_assign, nxtra_nodes); nprocs_mapped = 0; for (item = opal_list_get_first(node_list); item != opal_list_get_end(node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; /* bozo check */ if (NULL == node->topology) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); return ORTE_ERR_SILENT; } /* add this node to the map, if reqd */ if (!node->mapped) { if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(idx); return idx; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } /* compute the number of procs to go on this node */ if (add_one) { if (0 == nxtra_nodes) { --extra_procs_to_assign; add_one = false; } else { --nxtra_nodes; } } if (node->slots <= node->slots_inuse) { /* everybody takes at least the extras */ num_procs_to_assign = extra_procs_to_assign; } else { num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank + extra_procs_to_assign; if (app->num_procs < num_procs_to_assign) { /* might have more slots than procs */ num_procs_to_assign = app->num_procs; } } /* get the number of objects of this type on this node */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: nprocs-to-assign %d for %d objs on node %s", num_procs_to_assign, nobjs, node->name); /* if there are no objects of this type, then report the error * and abort - this can happen, for example, on systems that * don't report "sockets" as an independent object. However, IF * this object is the default one - i.e., not specified by the * user - then we can fall back to mapping by slot */ if (0 == nobjs) { if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", true, hwloc_obj_type_string(target), node->name); return ORTE_ERR_SILENT; } else { /* this was the default mapping policy, so clear the map * of any prior work and indicate that map-by slot is reqd */ for (i=0; i < jdata->map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { continue; } for (idx=0; idx < node->procs->size; idx++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, idx))) { continue; } if (proc->name.jobid != jdata->jobid) { continue; } --node->num_procs; OBJ_RELEASE(proc); opal_pointer_array_set_item(node->procs, idx, NULL); } if (0 == node->num_procs) { node->mapped = false; OBJ_RELEASE(node); opal_pointer_array_set_item(jdata->map->nodes, i, NULL); } } return ORTE_ERR_NOT_SUPPORTED; } } /* compute the number of procs to go on each object */ nperobj = num_procs_to_assign / nobjs; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: placing %d procs on each object", nperobj); if ((int)(nperobj * nobjs) < num_procs_to_assign) { /* compute how many objs need an extra proc */ nxtra_objs = num_procs_to_assign - nperobj * nobjs; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: adding 1 extra proc to the first %d objects, if needed", nxtra_objs); } /* loop through the number of objects */ for (i=0; i < (int)nobjs && nprocs_mapped < (int)app->num_procs; i++) { /* get the hwloc object */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology, target, cache_level, i, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } /* map the reqd number of procs */ if (0 < nxtra_objs) { nprocs = nperobj + 1; --nxtra_objs; } else { nprocs = nperobj; } for (j=0; j < nprocs && nprocs_mapped < app->num_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; proc->locale = obj; } } /* not all nodes are equal, so only set oversubscribed for * this node if it is in that state */ if (node->slots < (int)node->num_procs) { /* flag the node as oversubscribed so that sched-yield gets * properly set */ node->oversubscribed = true; } if (nprocs_mapped == app->num_procs) { /* we are done */ break; } } return ORTE_SUCCESS; }
int orte_rmaps_rr_byslot(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { int rc, i, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; opal_list_item_t *item; int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0; #if OPAL_HAVE_HWLOC hwloc_obj_t obj=NULL; #endif float balance; bool add_one=false; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by slot for job %s slots %d num_procs %lu", ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs); /* check to see if we can map all the procs */ if (num_slots < ((int)app->num_procs * orte_rmaps_base.cpus_per_rank)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); return ORTE_ERR_SILENT; } } /* first pass: map the number of procs to each node until we * map all specified procs or use all allocated slots */ nprocs_mapped = 0; for (item = opal_list_get_first(node_list); item != opal_list_get_end(node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot working node %s", node->name); #if OPAL_HAVE_HWLOC /* get the root object as we are not assigning * locale here except at the node level */ if (NULL != node->topology) { obj = hwloc_get_root_obj(node->topology); } #endif if (node->slots <= node->slots_inuse) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot node %s is full - skipping", node->name); continue; } /* assign a number of procs equal to the number of available * slots divided by the number of cpus/rank the user * requested */ num_procs_to_assign = (node->slots - node->slots_inuse) / orte_rmaps_base.cpus_per_rank; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot assigning %d procs to node %s", (int)num_procs_to_assign, node->name); for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { /* add this node to the map - do it only once */ if (!node->mapped) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; #if OPAL_HAVE_HWLOC proc->locale = obj; #endif } } if (nprocs_mapped == app->num_procs) { /* we are done */ return ORTE_SUCCESS; } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot job %s is oversubscribed - performing second pass", ORTE_JOBID_PRINT(jdata->jobid)); /* second pass: if we haven't mapped everyone yet, it is * because we are oversubscribed. Figure out how many procs * to add */ balance = (float)(app->num_procs - nprocs_mapped) / (float)opal_list_get_size(node_list); extra_procs_to_assign = (int)balance; if (0 < (balance - (float)extra_procs_to_assign)) { /* compute how many nodes need an extra proc */ nxtra_nodes = app->num_procs - nprocs_mapped - (extra_procs_to_assign * opal_list_get_size(node_list)); /* add one so that we add an extra proc to the first nodes * until all procs are mapped */ extra_procs_to_assign++; /* flag that we added one */ add_one = true; } for (item = opal_list_get_first(node_list); item != opal_list_get_end(node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot working node %s", node->name); #if OPAL_HAVE_HWLOC /* get the root object as we are not assigning * locale except at the node level */ if (NULL != node->topology) { obj = hwloc_get_root_obj(node->topology); } #endif /* add this node to the map - do it only once */ if (!node->mapped) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (add_one) { if (0 == nxtra_nodes) { --extra_procs_to_assign; add_one = false; } else { --nxtra_nodes; } } num_procs_to_assign = ((node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank) + extra_procs_to_assign; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot adding up to %d procs to node %s", num_procs_to_assign, node->name); for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; #if OPAL_HAVE_HWLOC proc->locale = obj; #endif } /* not all nodes are equal, so only set oversubscribed for * this node if it is in that state */ if (node->slots < (int)node->num_procs) { /* flag the node as oversubscribed so that sched-yield gets * properly set */ node->oversubscribed = true; } } return ORTE_SUCCESS; }
int orte_rmaps_rr_bynode(orte_job_t *jdata, orte_app_context_t *app, opal_list_t *node_list, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { int j, nprocs_mapped, lag, delta; orte_node_t *node; orte_proc_t *proc; opal_list_item_t *item; int num_procs_to_assign, navg, idx; int extra_procs_to_assign=0, nxtra_nodes=0; #if OPAL_HAVE_HWLOC hwloc_obj_t obj=NULL; #endif float balance; bool add_one=false; bool oversubscribed=false; opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by node for job %s app %d slots %d num_procs %lu", ORTE_JOBID_PRINT(jdata->jobid), (int)app->idx, (int)num_slots, (unsigned long)num_procs); /* quick check to see if we can map all the procs */ if (num_slots < ((int)app->num_procs * orte_rmaps_base.cpus_per_rank)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); return ORTE_ERR_SILENT; } oversubscribed = true; } /* divide the procs evenly across all nodes - this is the * average we have to maintain as we go, but we adjust * the number on each node to reflect its available slots. * Obviously, if all nodes have the same number of slots, * then the avg is what we get on each node - this is * the most common situation. */ navg = app->num_procs / opal_list_get_size(node_list); if (0 == navg) { /* if there are less procs than nodes, we have to * place at least one/node */ navg = 1; } /* compute how many extra procs to put on each node */ balance = (float)(app->num_procs - (navg * (float)opal_list_get_size(node_list))) / (float)opal_list_get_size(node_list); extra_procs_to_assign = (int)balance; if (0 < (balance - (float)extra_procs_to_assign)) { /* compute how many nodes need an extra proc */ nxtra_nodes = app->num_procs - ((navg + extra_procs_to_assign) * opal_list_get_size(node_list)); /* add one so that we add an extra proc to the first nodes * until all procs are mapped */ extra_procs_to_assign++; /* flag that we added one */ add_one = true; } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping by node navg %d extra_procs %d extra_nodes %d", navg, extra_procs_to_assign, nxtra_nodes); nprocs_mapped = 0; lag = 0; for (item = opal_list_get_first(node_list); item != opal_list_get_end(node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; #if OPAL_HAVE_HWLOC /* get the root object as we are not assigning * locale except at the node level */ if (NULL != node->topology) { obj = hwloc_get_root_obj(node->topology); } #endif /* add this node to the map, but only do so once */ if (!node->mapped) { if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(idx); return idx; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } /* compute the number of procs to go on this node */ if (add_one) { if (0 == nxtra_nodes) { --extra_procs_to_assign; add_one = false; } else { --nxtra_nodes; } } if (oversubscribed) { /* everybody just takes their share */ num_procs_to_assign = navg + extra_procs_to_assign; } else { /* if we are not oversubscribed, then there are enough * slots to handle all the procs. However, not every * node will have the same number of slots, so we * have to track how many procs to "shift" elsewhere * to make up the difference */ if (node->slots <= node->slots_inuse) { /* if there are no extras to take, then we can * ignore this node */ if (0 == extra_procs_to_assign) { /* update how many we are lagging behind */ lag += navg; continue; } /* everybody has to take at least the extras */ num_procs_to_assign = extra_procs_to_assign; /* update how many we are lagging behind */ lag += navg; } else { /* if slots < avg (adjusted for cpus/proc), then take all */ if ((node->slots - node->slots_inuse) < (navg * orte_rmaps_base.cpus_per_rank)) { num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank; /* update how many we are lagging behind */ lag += navg - num_procs_to_assign; } else { /* take the avg plus as much of the "lag" as we can */ delta = 0; if (0 < lag) { delta = ((node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank) - navg; if (lag < delta) { delta = lag; } lag -= delta; } num_procs_to_assign = navg; } /* add in the extras */ num_procs_to_assign += extra_procs_to_assign; } } for (j=0; j < num_procs_to_assign && nprocs_mapped < app->num_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; #if OPAL_HAVE_HWLOC proc->locale = obj; #endif } /* not all nodes are equal, so only set oversubscribed for * this node if it is in that state */ if (node->slots < ((int)node->num_procs * orte_rmaps_base.cpus_per_rank)) { /* flag the node as oversubscribed so that sched-yield gets * properly set */ node->oversubscribed = true; } if (nprocs_mapped == app->num_procs) { /* we are done */ break; } } /* if we have some remaining lag, then put one/node until * all are assigned */ for (item = opal_list_get_first(node_list); 0 < lag && item != opal_list_get_end(node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; lag--; #if OPAL_HAVE_HWLOC proc->locale = obj; #endif /* not all nodes are equal, so only set oversubscribed for * this node if it is in that state */ if (node->slots < ((int)node->num_procs * orte_rmaps_base.cpus_per_rank)) { /* flag the node as oversubscribed so that sched-yield gets * properly set */ node->oversubscribed = true; } if (nprocs_mapped == app->num_procs) { /* we are done */ break; } } return ORTE_SUCCESS; }