static int orte_rmaps_rank_file_open(void) { /* ensure we flag mapping by user */ #if OPAL_HAVE_HWLOC if (NULL != opal_hwloc_base_slot_list || NULL != orte_rankfile) { #else if (NULL != orte_rankfile) { #endif if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { /* if a non-default mapping is already specified, then we * have an error */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "RANK_FILE", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_CONFLICTED); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYUSER); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* we are going to bind to cpuset since the user is specifying the cpus */ OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET); /* make us first */ my_priority = 10000; } return ORTE_SUCCESS; } static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority) { *priority = my_priority; *module = (mca_base_module_t *)&orte_rmaps_rank_file_module; return ORTE_SUCCESS; }
/* * Sequentially map the ranks according to the placement in the * specified hostfile */ static int orte_rmaps_seq_map(orte_job_t *jdata) { orte_job_map_t *map; orte_app_context_t *app; int i, n; orte_std_cntr_t j; opal_list_item_t *item; orte_node_t *node, *nd; seq_node_t *sq, *save=NULL, *seq;; orte_vpid_t vpid; orte_std_cntr_t num_nodes; int rc; opal_list_t default_seq_list; opal_list_t node_list, *seq_list, sq_list; orte_proc_t *proc; mca_base_component_t *c = &mca_rmaps_seq_component.base_version; char *hosts = NULL, *sep, *eptr; FILE *fp; opal_hwloc_resource_type_t rtype; OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output, "%s rmaps:seq called on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* this mapper can only handle initial launch * when seq mapping is desired - allow * restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s is being restarted - seq cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper) { if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s not using sequential mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } /* we need to process it */ goto process; } if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I don't know how to do these - defer */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s not using seq mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } process: opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* convenience def */ map = jdata->map; /* if there is a default hostfile, go and get its ordered list of nodes */ OBJ_CONSTRUCT(&default_seq_list, opal_list_t); if (NULL != orte_default_hostfile) { char *hstname = NULL; /* open the file */ fp = fopen(orte_default_hostfile, "r"); if (NULL == fp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto error; } while (NULL != (hstname = orte_getline(fp))) { if (0 == strlen(hstname)) { free(hstname); /* blank line - ignore */ continue; } if( '#' == hstname[0] ) { free(hstname); /* Comment line - ignore */ continue; } sq = OBJ_NEW(seq_node_t); if (NULL != (sep = strchr(hstname, ' '))) { *sep = '\0'; sep++; /* remove any trailing space */ eptr = sep + strlen(sep) - 1; while (eptr > sep && isspace(*eptr)) { eptr--; } *(eptr+1) = 0; sq->cpuset = strdup(sep); } // Strip off the FQDN if present, ignore IP addresses if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) { char *ptr; if (NULL != (ptr = strchr(hstname, '.'))) { *ptr = '\0'; } } sq->hostname = hstname; opal_list_append(&default_seq_list, &sq->super); } fclose(fp); } /* start at the beginning... */ vpid = 0; jdata->num_procs = 0; if (0 < opal_list_get_size(&default_seq_list)) { save = (seq_node_t*)opal_list_get_first(&default_seq_list); } /* default to LOGICAL processors */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_PHYSICAL_CPUIDS, NULL, OPAL_BOOL)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using PHYSICAL processors"); rtype = OPAL_HWLOC_PHYSICAL; } else { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using LOGICAL processors"); rtype = OPAL_HWLOC_LOGICAL; } /* initialize all the nodes as not included in this job map */ for (j=0; j < orte_node_pool->size; j++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } } /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* dash-host trumps hostfile */ if (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using dash-host nodes on app %s", app->app); OBJ_CONSTRUCT(&node_list, opal_list_t); /* dash host entries cannot specify cpusets, so used the std function to retrieve the list */ if (ORTE_SUCCESS != (rc = orte_util_get_ordered_dash_host_list(&node_list, hosts))) { ORTE_ERROR_LOG(rc); free(hosts); goto error; } free(hosts); /* transfer the list to a seq_node_t list */ OBJ_CONSTRUCT(&sq_list, opal_list_t); while (NULL != (nd = (orte_node_t*)opal_list_remove_first(&node_list))) { sq = OBJ_NEW(seq_node_t); sq->hostname = strdup(nd->name); opal_list_append(&sq_list, &sq->super); OBJ_RELEASE(nd); } OBJ_DESTRUCT(&node_list); seq_list = &sq_list; } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) { char *hstname; if (NULL == hosts) { rc = ORTE_ERR_NOT_FOUND; goto error; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using hostfile %s nodes on app %s", hosts, app->app); OBJ_CONSTRUCT(&sq_list, opal_list_t); /* open the file */ fp = fopen(hosts, "r"); if (NULL == fp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; OBJ_DESTRUCT(&sq_list); goto error; } while (NULL != (hstname = orte_getline(fp))) { if (0 == strlen(hstname)) { free(hstname); /* blank line - ignore */ continue; } if( '#' == hstname[0] ) { free(hstname); /* Comment line - ignore */ continue; } sq = OBJ_NEW(seq_node_t); if (NULL != (sep = strchr(hstname, ' '))) { *sep = '\0'; sep++; /* remove any trailing space */ eptr = sep + strlen(sep) - 1; while (eptr > sep && isspace(*eptr)) { eptr--; } *(eptr+1) = 0; sq->cpuset = strdup(sep); } // Strip off the FQDN if present, ignore IP addresses if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) { char *ptr; if (NULL != (ptr = strchr(hstname, '.'))) { (*ptr) = '\0'; } } sq->hostname = hstname; opal_list_append(&sq_list, &sq->super); } fclose(fp); free(hosts); seq_list = &sq_list; } else if (0 < opal_list_get_size(&default_seq_list)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using default hostfile nodes on app %s", app->app); seq_list = &default_seq_list; } else { /* can't do anything - no nodes available! */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* check for nolocal and remove the head node, if required */ if (map->mapping & ORTE_MAPPING_NO_USE_LOCAL) { for (item = opal_list_get_first(seq_list); item != opal_list_get_end(seq_list); item = opal_list_get_next(item) ) { seq = (seq_node_t*)item; /* need to check ifislocal because the name in the * hostfile may not have been FQDN, while name returned * by gethostname may have been (or vice versa) */ if (orte_ifislocal(seq->hostname)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: removing head node %s", seq->hostname); opal_list_remove_item(seq_list, item); OBJ_RELEASE(item); /* "un-retain" it */ } } } if (NULL == seq_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(seq_list))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* if num_procs wasn't specified, set it now */ if (0 == app->num_procs) { app->num_procs = num_nodes; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: setting num procs to %s for app %s", ORTE_VPID_PRINT(app->num_procs), app->app); } else if (num_nodes < app->num_procs) { orte_show_help("help-orte-rmaps-base.txt", "seq:not-enough-resources", true, app->num_procs, num_nodes); return ORTE_ERR_SILENT; } if (seq_list == &default_seq_list) { sq = save; } else { sq = (seq_node_t*)opal_list_get_first(seq_list); } for (n=0; n < app->num_procs; n++) { /* find this node on the global array - this is necessary so * that our mapping gets saved on that array as the objects * returned by the hostfile function are -not- on the array */ node = NULL; for (j=0; j < orte_node_pool->size; j++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { continue; } if (0 == strcmp(sq->hostname, node->name)) { break; } } if (NULL == node) { /* wasn't found - that is an error */ orte_show_help("help-orte-rmaps-seq.txt", "orte-rmaps-seq:resource-not-found", true, sq->hostname); rc = ORTE_ERR_SILENT; goto error; } /* ensure the node is in the map */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { OBJ_RETAIN(node); opal_pointer_array_add(map->nodes, node); jdata->map->num_nodes++; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); } proc = orte_rmaps_base_setup_proc(jdata, node, i); if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED); /* check for permission */ if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { /* if we weren't given a directive either way, then we will error out * as the #slots were specifically given, either by the host RM or * via hostfile/dash-host */ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { /* if we were explicitly told not to oversubscribe, then don't */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } } } /* assign the vpid */ proc->name.vpid = vpid++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: assign proc %s to node %s for app %s", ORTE_VPID_PRINT(proc->name.vpid), sq->hostname, app->app); /* record the cpuset, if given */ if (NULL != sq->cpuset) { hwloc_cpuset_t bitmap; char *cpu_bitmap; if (NULL == node->topology) { /* not allowed - for sequential cpusets, we must have * the topology info */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name); rc = ORTE_ERR_SILENT; goto error; } /* if we are using hwthreads as cpus and binding to hwthreads, then * we can just copy the cpuset across as it already specifies things * at that level */ if (opal_hwloc_use_hwthreads_as_cpus && OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { cpu_bitmap = strdup(sq->cpuset); } else { /* setup the bitmap */ bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the socket and core */ if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(sq->cpuset, node->topology, rtype, bitmap))) { ORTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; } /* note that we cannot set the proc locale to any specific object * as the slot list may have assigned it to more than one - so * leave that field NULL */ /* set the proc to the specified map */ hwloc_bitmap_list_asprintf(&cpu_bitmap, bitmap); hwloc_bitmap_free(bitmap); } orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: binding proc %s to cpuset %s bitmap %s", ORTE_VPID_PRINT(proc->name.vpid), sq->cpuset, cpu_bitmap); /* we are going to bind to cpuset since the user is specifying the cpus */ OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CPUSET); /* note that the user specified the mapping */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYUSER); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); /* cleanup */ free(cpu_bitmap); } else { hwloc_obj_t locale; /* assign the locale - okay for the topo to be null as * it just means it wasn't returned */ if (NULL != node->topology) { locale = hwloc_get_root_obj(node->topology); orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, locale, OPAL_PTR); } } /* add to the jdata proc array */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { ORTE_ERROR_LOG(rc); goto error; } /* move to next node */ sq = (seq_node_t*)opal_list_get_next(&sq->super); } /** track the total number of processes we mapped */ jdata->num_procs += app->num_procs; /* cleanup the node list if it came from this app_context */ if (seq_list != &default_seq_list) { OPAL_LIST_DESTRUCT(seq_list); } else { save = sq; } } return ORTE_SUCCESS; error: OPAL_LIST_DESTRUCT(&default_seq_list); return rc; }
static int ppr_mapper(orte_job_t *jdata) { int rc = ORTE_SUCCESS, j, n; mca_base_component_t *c=&mca_rmaps_ppr_component.base_version; orte_node_t *node; orte_proc_t *proc; orte_app_context_t *app; orte_vpid_t total_procs, nprocs_mapped; opal_hwloc_level_t start=OPAL_HWLOC_NODE_LEVEL; #if OPAL_HAVE_HWLOC hwloc_obj_t obj; hwloc_obj_type_t lowest; unsigned cache_level=0; unsigned int nobjs, i; bool pruning_reqd = false; opal_hwloc_level_t level; #endif opal_list_t node_list; opal_list_item_t *item; orte_std_cntr_t num_slots; orte_app_idx_t idx; char **ppr_req, **ck; size_t len; bool initial_map=true; /* only handle initial launch of loadbalanced * or NPERxxx jobs - allow restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s being restarted - ppr cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper && 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL == jdata->map->ppr || ORTE_MAPPING_PPR != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* not for us */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s not using ppr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: mapping job %s with ppr %s", ORTE_JOBID_PRINT(jdata->jobid), jdata->map->ppr); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* initialize */ memset(ppr, 0, OPAL_HWLOC_HWTHREAD_LEVEL * sizeof(opal_hwloc_level_t)); /* parse option */ n=0; ppr_req = opal_argv_split(jdata->map->ppr, ','); for (j=0; NULL != ppr_req[j]; j++) { /* split on the colon */ ck = opal_argv_split(ppr_req[j], ':'); if (2 != opal_argv_count(ck)) { /* must provide a specification */ orte_show_help("help-orte-rmaps-ppr.txt", "invalid-ppr", true, jdata->map->ppr); opal_argv_free(ppr_req); opal_argv_free(ck); return ORTE_ERR_SILENT; } len = strlen(ck[1]); if (0 == strncasecmp(ck[1], "node", len)) { ppr[OPAL_HWLOC_NODE_LEVEL] = strtol(ck[0], NULL, 10); ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYNODE); start = OPAL_HWLOC_NODE_LEVEL; n++; #if OPAL_HAVE_HWLOC } else if (0 == strncasecmp(ck[1], "hwthread", len) || 0 == strncasecmp(ck[1], "thread", len)) { ppr[OPAL_HWLOC_HWTHREAD_LEVEL] = strtol(ck[0], NULL, 10); start = OPAL_HWLOC_HWTHREAD_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYHWTHREAD); n++; } else if (0 == strncasecmp(ck[1], "core", len)) { ppr[OPAL_HWLOC_CORE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_CORE_LEVEL) { start = OPAL_HWLOC_CORE_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYCORE); } n++; } else if (0 == strncasecmp(ck[1], "socket", len) || 0 == strncasecmp(ck[1], "skt", len)) { ppr[OPAL_HWLOC_SOCKET_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_SOCKET_LEVEL) { start = OPAL_HWLOC_SOCKET_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSOCKET); } n++; } else if (0 == strncasecmp(ck[1], "l1cache", len)) { ppr[OPAL_HWLOC_L1CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L1CACHE_LEVEL) { start = OPAL_HWLOC_L1CACHE_LEVEL; cache_level = 1; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL1CACHE); } n++; } else if (0 == strncasecmp(ck[1], "l2cache", len)) { ppr[OPAL_HWLOC_L2CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L2CACHE_LEVEL) { start = OPAL_HWLOC_L2CACHE_LEVEL; cache_level = 2; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL2CACHE); } n++; } else if (0 == strncasecmp(ck[1], "l3cache", len)) { ppr[OPAL_HWLOC_L3CACHE_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_L3CACHE_LEVEL) { start = OPAL_HWLOC_L3CACHE_LEVEL; cache_level = 3; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYL3CACHE); } n++; } else if (0 == strncasecmp(ck[1], "numa", len)) { ppr[OPAL_HWLOC_NUMA_LEVEL] = strtol(ck[0], NULL, 10); if (start < OPAL_HWLOC_NUMA_LEVEL) { start = OPAL_HWLOC_NUMA_LEVEL; ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYNUMA); } n++; #endif } else { /* unknown spec */ orte_show_help("help-orte-rmaps-ppr.txt", "unrecognized-ppr-option", true, ck[1], jdata->map->ppr); opal_argv_free(ppr_req); opal_argv_free(ck); return ORTE_ERR_SILENT; } opal_argv_free(ck); } opal_argv_free(ppr_req); /* if nothing was given, that's an error */ if (0 == n) { opal_output(0, "NOTHING GIVEN"); return ORTE_ERR_SILENT; } #if OPAL_HAVE_HWLOC /* if more than one level was specified, then pruning will be reqd */ if (1 < n) { pruning_reqd = true; } #endif opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: job %s assigned policy %s", ORTE_JOBID_PRINT(jdata->jobid), orte_rmaps_base_print_mapping(jdata->map->mapping)); #if OPAL_HAVE_HWLOC /* convenience */ level = start; lowest = opal_hwloc_levels[start]; #endif for (idx=0; idx < (orte_app_idx_t)jdata->apps->size; idx++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, idx))) { continue; } /* if the number of total procs was given, set that * limit - otherwise, set to max so we simply fill * all the nodes with the pattern */ if (0 < app->num_procs) { total_procs = app->num_procs; } else { total_procs = ORTE_VPID_MAX; } /* get the available nodes */ OBJ_CONSTRUCT(&node_list, opal_list_t); if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ initial_map = false; /* if a bookmark exists from some prior mapping, set us to start there */ jdata->bookmark = orte_rmaps_base_get_starting_point(&node_list, jdata); /* cycle across the nodes */ nprocs_mapped = 0; for (item = opal_list_get_first(&node_list); item != opal_list_get_end(&node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; #if OPAL_HAVE_HWLOC /* bozo check */ if (NULL == node->topology) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); rc = ORTE_ERR_SILENT; goto error; } #endif /* add the node to the map, if needed */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); goto error; } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } /* if we are mapping solely at the node level, just put * that many procs on this node */ if (OPAL_HWLOC_NODE_LEVEL == start) { #if OPAL_HAVE_HWLOC obj = hwloc_get_root_obj(node->topology); #endif for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto error; } nprocs_mapped++; #if OPAL_HAVE_HWLOC orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); #endif } #if OPAL_HAVE_HWLOC } else { /* get the number of lowest resources on this node */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, lowest, cache_level, OPAL_HWLOC_AVAILABLE); /* map the specified number of procs to each such resource on this node, * recording the locale of each proc so we know its cpuset */ for (i=0; i < nobjs; i++) { obj = opal_hwloc_base_get_obj_by_type(node->topology, lowest, cache_level, i, OPAL_HWLOC_AVAILABLE); for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto error; } nprocs_mapped++; orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } } if (pruning_reqd) { /* go up the ladder and prune the procs according to * the specification, adjusting the count of procs on the * node as we go */ level--; prune(jdata->jobid, idx, node, &level, &nprocs_mapped); } #endif } /* set the total slots used */ if ((int)node->num_procs <= node->slots) { node->slots_inuse = (int)node->num_procs; } else { node->slots_inuse = node->slots; } /* if no-oversubscribe was specified, check to see if * we have violated the total slot specification - regardless, * if slots_max was given, we are not allowed to violate it! */ if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); } /* if we haven't mapped all the procs, continue on to the * next node */ if (total_procs == nprocs_mapped) { break; } } if (0 == app->num_procs) { app->num_procs = nprocs_mapped; } if (ORTE_VPID_MAX != total_procs && nprocs_mapped < total_procs) { /* couldn't map them all */ orte_show_help("help-orte-rmaps-ppr.txt", "ppr-too-many-procs", true, app->app, app->num_procs, jdata->map->ppr); rc = ORTE_ERR_SILENT; goto error; } /* compute vpids and add proc objects to the job */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); goto error; } /* track the total number of processes we mapped - must update * this AFTER we compute vpids so that computation is done * correctly */ jdata->num_procs += app->num_procs; while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); } return ORTE_SUCCESS; error: while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); return rc; }
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, char **device, char *inspec) { char *ck; char *ptr; orte_mapping_policy_t tmp; int rc; size_t len; char *spec; char *pch; /* set defaults */ tmp = 0; if (NULL != device) { *device = NULL; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s rmaps:base set policy with %s device %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == inspec) ? "NULL" : inspec, (NULL == device) ? "NULL" : "NONNULL"); if (NULL == inspec) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); } else { spec = strdup(inspec); // protect the input string /* see if a colon was included - if so, then we have a policy + modifier */ ck = strchr(spec, ':'); if (NULL != ck) { /* if the colon is the first character of the string, then we * just have modifiers on the default mapping policy */ if (ck == spec) { ck++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s rmaps:base only modifiers %s provided - assuming bysocket mapping", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck); ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) && ORTE_ERR_BAD_PARAM != rc) { free(spec); return ORTE_ERR_SILENT; } free(spec); goto setpolicy; } /* split the string */ *ck = '\0'; ck++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s rmaps:base policy %s modifiers %s provided", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck); /* if the policy is "dist", then we set the policy to that value * and save the second argument as the device */ if (0 == strncasecmp(spec, "ppr", strlen(spec))) { /* we have to allow additional modifiers here - e.g., specifying * #pe's/proc or oversubscribe - so check for modifiers */ if (NULL == (ptr = strrchr(ck, ':'))) { /* this is an error - there had to be at least one * colon to delimit the number from the object type */ orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec); free(spec); return ORTE_ERR_SILENT; } ptr++; // move past the colon /* check the remaining string for modifiers - may be none, so * don't emit an error message if the modifier isn't recognized */ if (ORTE_ERR_SILENT == (rc = check_modifiers(ptr, &tmp)) && ORTE_ERR_BAD_PARAM != rc) { free(spec); return ORTE_ERR_SILENT; } /* if we found something, then we need to adjust the string */ if (ORTE_SUCCESS == rc) { ptr--; *ptr = '\0'; } /* now get the pattern */ orte_rmaps_base.ppr = strdup(ck); ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); free(spec); goto setpolicy; } if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) && ORTE_ERR_TAKE_NEXT_OPTION != rc) { if (ORTE_ERR_BAD_PARAM == rc) { orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec); } free(spec); return rc; } } len = strlen(spec); if (0 == strncasecmp(spec, "slot", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT); } else if (0 == strncasecmp(spec, "node", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE); } else if (0 == strncasecmp(spec, "seq", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ); } else if (0 == strncasecmp(spec, "core", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE); } else if (0 == strncasecmp(spec, "l1cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE); } else if (0 == strncasecmp(spec, "l2cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE); } else if (0 == strncasecmp(spec, "l3cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE); } else if (0 == strncasecmp(spec, "socket", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); } else if (0 == strncasecmp(spec, "numa", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA); } else if (0 == strncasecmp(spec, "board", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD); } else if (0 == strncasecmp(spec, "hwthread", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD); /* if we are mapping processes to individual hwthreads, then * we need to treat those hwthreads as separate cpus */ opal_hwloc_use_hwthreads_as_cpus = true; } else if (0 == strncasecmp(spec, "dist", len)) { if (NULL != rmaps_dist_device) { if (NULL != (pch = strchr(rmaps_dist_device, ':'))) { *pch = '\0'; } if (NULL != device) { *device = strdup(rmaps_dist_device); } ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST); } else { orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true); free(spec); return ORTE_ERR_SILENT; } } else { orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec); free(spec); return ORTE_ERR_SILENT; } free(spec); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); } setpolicy: *policy = tmp; return ORTE_SUCCESS; }
/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ static int orte_rmaps_base_open(mca_base_open_flag_t flags) { int rc; /* init the globals */ OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t); orte_rmaps_base.slot_list = NULL; orte_rmaps_base.mapping = 0; orte_rmaps_base.ranking = 0; orte_rmaps_base.device = NULL; /* if a topology file was given, then set our topology * from it. Even though our actual topology may differ, * mpirun only needs to see the compute node topology * for mapping purposes */ if (NULL != rmaps_base_topo_file) { if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file); return ORTE_ERR_SILENT; } } /* check for violations that has to be detected before we parse the mapping option */ if (NULL != orte_rmaps_base.ppr) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--ppr, -ppr", "--map-by ppr:<pattern>", "rmaps_base_pattern, rmaps_ppr_pattern", "rmaps_base_mapping_policy=ppr:<pattern>"); /* if the mapping policy is NULL, then we can proceed */ if (NULL == rmaps_base_mapping_policy) { asprintf(&rmaps_base_mapping_policy, "ppr:%s", orte_rmaps_base.ppr); } else { return ORTE_ERR_SILENT; } } if (1 < orte_rmaps_base.cpus_per_rank) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank", "--map-by <obj>:PE=N, default <obj>=NUMA", "rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA"); } if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping, &orte_rmaps_base.device, rmaps_base_mapping_policy))) { return rc; } if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking, orte_rmaps_base.mapping, rmaps_base_ranking_policy))) { return rc; } if (rmaps_base_bycore) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--bycore, -bycore", "--map-by core", "rmaps_base_bycore", "rmaps_base_mapping_policy=core"); /* set mapping policy to bycore - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYCORE) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bycore", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYCORE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to bycore - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_CORE) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "bycore", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (rmaps_base_byslot) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--byslot, -byslot", "--map-by slot", "rmaps_base_byslot", "rmaps_base_mapping_policy=slot"); /* set mapping policy to byslot - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to byslot - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (rmaps_base_bynode) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--bynode, -bynode", "--map-by node", "rmaps_base_bynode", "rmaps_base_mapping_policy=node"); /* set mapping policy to bynode - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to bynode - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (1 < orte_rmaps_base.cpus_per_rank) { /* if we were asked for multiple cpus/proc, then we have to * bind to those cpus - any other binding policy is an * error */ if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { if (opal_hwloc_use_hwthreads_as_cpus) { if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) && OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true, orte_rmaps_base.cpus_per_rank, "use-hwthreads-as-cpus", opal_hwloc_base_print_binding(opal_hwloc_binding_policy), "bind-to hwthread"); return ORTE_ERR_SILENT; } } else if (OPAL_BIND_TO_CORE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) && OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true, orte_rmaps_base.cpus_per_rank, "cores as cpus", opal_hwloc_base_print_binding(opal_hwloc_binding_policy), "bind-to core"); return ORTE_ERR_SILENT; } } else { if (opal_hwloc_use_hwthreads_as_cpus) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD); } else { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE); } } /* we also need to ensure we are mapping to a high-enough level to have * multiple cpus beneath it - by default, we'll go to the NUMA level */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYHWTHREAD || (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYCORE && !opal_hwloc_use_hwthreads_as_cpus)) { orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true); return ORTE_ERR_SILENT; } } else { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s rmaps:base pe/rank set - setting mapping to BYNUMA", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); } } if (orte_rmaps_base_pernode) { /* there is no way to resolve this conflict, so if something else was * given, we have no choice but to error out */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } /* ensure we set the mapping policy to ppr */ ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* define the ppr */ orte_rmaps_base.ppr = strdup("1:node"); } if (0 < orte_rmaps_base_n_pernode) { /* there is no way to resolve this conflict, so if something else was * given, we have no choice but to error out */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } /* ensure we set the mapping policy to ppr */ ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* define the ppr */ asprintf(&orte_rmaps_base.ppr, "%d:node", orte_rmaps_base_n_pernode); } if (0 < orte_rmaps_base_n_persocket) { /* there is no way to resolve this conflict, so if something else was * given, we have no choice but to error out */ if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } /* ensure we set the mapping policy to ppr */ ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* define the ppr */ asprintf(&orte_rmaps_base.ppr, "%d:socket", orte_rmaps_base_n_persocket); } /* Should we schedule on the local node or not? */ if (rmaps_base_no_schedule_local) { orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL; } /* Should we oversubscribe or not? */ if (rmaps_base_no_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } /** force oversubscription permission */ if (rmaps_base_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); /* also set the overload allowed flag */ opal_hwloc_binding_policy |= OPAL_BIND_ALLOW_OVERLOAD; } /* should we display a detailed (developer-quality) version of the map after determining it? */ if (rmaps_base_display_devel_map) { orte_rmaps_base.display_map = true; orte_devel_level_output = true; } /* should we display a diffable report of proc locations after determining it? */ if (rmaps_base_display_diffable_map) { orte_rmaps_base.display_map = true; orte_display_diffable_output = true; } /* Open up all available components */ rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags); /* check to see if any component indicated a problem */ if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { /* the component would have already reported the error, so * tell the rest of the chain to shut up */ return ORTE_ERR_SILENT; } /* All done */ return rc; }
/* * Create a round-robin mapping for the job. */ static int orte_rmaps_rr_map(orte_job_t *jdata) { orte_app_context_t *app; int i; opal_list_t node_list; opal_list_item_t *item; orte_std_cntr_t num_slots; int rc; mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version; bool initial_map=true; /* this mapper can only handle initial launch * when rr mapping is desired - allow * restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: job %s is being restarted - rr cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper && 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: job %s not using rr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (ORTE_MAPPING_RR < ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I don't know how to do these - defer */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: job %s not using rr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* start at the beginning... */ jdata->num_procs = 0; /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* setup the nodelist here in case we jump to error */ OBJ_CONSTRUCT(&node_list, opal_list_t); /* if the number of processes wasn't specified, then we know there can be only * one app_context allowed in the launch, and that we are to launch it across * all available slots. We'll double-check the single app_context rule first */ if (0 == app->num_procs && 1 < jdata->num_apps) { orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", true, jdata->num_apps, NULL); rc = ORTE_ERR_SILENT; goto error; } /* for each app_context, we have to get the list of nodes that it can * use since that can now be modified with a hostfile and/or -host * option */ if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ initial_map = false; /* if a bookmark exists from some prior mapping, set us to start there */ jdata->bookmark = orte_rmaps_base_get_starting_point(&node_list, jdata); if (0 == app->num_procs) { /* set the num_procs to equal the number of slots on these * mapped nodes, taking into account the number of cpus/rank */ app->num_procs = num_slots / orte_rmaps_base.cpus_per_rank; /* sometimes, we have only one "slot" assigned, but may * want more than one cpu/rank - so ensure we always wind * up with at least one proc */ if (0 == app->num_procs) { app->num_procs = 1; } } /* Make assignments */ if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_bynode(jdata, app, &node_list, num_slots, app->num_procs); } else if (ORTE_MAPPING_BYSLOT == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); #if OPAL_HAVE_HWLOC } else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_PU, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CORE, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 1); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 2); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_CACHE, 3); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_SOCKET, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, HWLOC_OBJ_NODE, 0); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try * byslot, then do so */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots, app->num_procs); } #endif } else { /* unrecognized mapping directive */ orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", orte_rmaps_base_print_mapping(jdata->map->mapping)); rc = ORTE_ERR_SILENT; goto error; } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto error; } /* compute vpids and add proc objects to the job - do this after * each app_context so that the ranks within each context are * contiguous */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); return rc; } /* track the total number of processes we mapped - must update * this value AFTER we compute vpids so that computation * is done correctly */ jdata->num_procs += app->num_procs; /* cleanup the node list - it can differ from one app_context * to another, so we have to get it every time */ while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); } return ORTE_SUCCESS; error: while(NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); return rc; }
/* * Function for selecting one component from all those that are * available. */ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) { orte_job_t *jdata; orte_job_map_t *map; int rc; bool did_map; opal_list_item_t *item; orte_rmaps_base_selected_module_t *mod; orte_job_t *parent; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; /* convenience */ jdata = caddy->jdata; jdata->state = ORTE_JOB_STATE_MAP; /* NOTE: NO PROXY COMPONENT REQUIRED - REMOTE PROCS ARE NOT * ALLOWED TO CALL RMAPS INDEPENDENTLY. ONLY THE PLM CAN * DO SO, AND ALL PLM COMMANDS ARE RELAYED TO HNP */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* NOTE: CHECK FOR JDATA->MAP == NULL. IF IT IS, THEN USE * THE VALUES THAT WERE READ BY THE LOCAL MCA PARAMS. THE * PLM PROXY WILL SEND A JOB-OBJECT THAT WILL INCLUDE ANY * MAPPING DIRECTIVES - OTHERWISE, THAT OBJECT WILL HAVE A * NULL MAP FIELD * LONE EXCEPTION - WE COPY DISPLAY MAP ACROSS IF THEY * DIDN'T SET IT */ if (NULL == jdata->map) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: creating new map for job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* create a map object where we will store the results */ map = OBJ_NEW(orte_job_map_t); if (NULL == map) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* load it with the system defaults */ map->mapping = orte_rmaps_base.mapping; map->ranking = orte_rmaps_base.ranking; #if OPAL_HAVE_HWLOC map->binding = opal_hwloc_binding_policy; #endif if (NULL != orte_rmaps_base.ppr) { map->ppr = strdup(orte_rmaps_base.ppr); } map->cpus_per_rank = orte_rmaps_base.cpus_per_rank; map->display_map = orte_rmaps_base.display_map; /* assign the map object to this job */ jdata->map = map; } else { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: setting mapping policies for job %s", ORTE_JOBID_PRINT(jdata->jobid)); if (!jdata->map->display_map) { jdata->map->display_map = orte_rmaps_base.display_map; } /* set the default mapping policy IFF it wasn't provided */ if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { ORTE_SET_MAPPING_POLICY(jdata->map->mapping, orte_rmaps_base.mapping); } if (!ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)); } /* ditto for rank and bind policies */ if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { ORTE_SET_RANKING_POLICY(jdata->map->ranking, orte_rmaps_base.ranking); } #if OPAL_HAVE_HWLOC if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { jdata->map->binding = opal_hwloc_binding_policy; } #endif } #if OPAL_HAVE_HWLOC /* if we are not going to launch, then we need to set any * undefined topologies to match our own so the mapper * can operate */ if (orte_do_not_launch) { orte_node_t *node; hwloc_topology_t t0; int i; node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); t0 = node->topology; for (i=1; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (NULL == node->topology) { node->topology = t0; } } } #endif /* cycle thru the available mappers until one agrees to map * the job */ did_map = false; if (1 == opal_list_get_size(&orte_rmaps_base.selected_modules)) { /* forced selection */ mod = (orte_rmaps_base_selected_module_t*)opal_list_get_first(&orte_rmaps_base.selected_modules); jdata->map->req_mapper = strdup(mod->component->mca_component_name); } for (item = opal_list_get_first(&orte_rmaps_base.selected_modules); item != opal_list_get_end(&orte_rmaps_base.selected_modules); item = opal_list_get_next(item)) { mod = (orte_rmaps_base_selected_module_t*)item; if (ORTE_SUCCESS == (rc = mod->module->map_job(jdata)) || ORTE_ERR_RESOURCE_BUSY == rc) { did_map = true; break; } /* mappers return "next option" if they didn't attempt to * map the job. anything else is a true error. */ if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } } if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) { /* the map was done but nothing could be mapped * for launch as all the resources were busy */ OBJ_RELEASE(caddy); return; } /* if we get here without doing the map, or with zero procs in * the map, then that's an error */ if (!did_map || 0 == jdata->num_procs) { orte_show_help("help-orte-rmaps-base.txt", "failed-map", true); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } /* compute and save local ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } #if OPAL_HAVE_HWLOC /* compute and save bindings */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); return; } #endif /* set the offset so shared memory components can potentially * connect to any spawned jobs */ jdata->offset = orte_total_procs; /* track the total number of procs launched by us */ orte_total_procs += jdata->num_procs; /* if it is a dynamic spawn, save the bookmark on the parent's job too */ if (ORTE_JOBID_INVALID != jdata->originator.jobid) { if (NULL != (parent = orte_get_job_data_object(jdata->originator.jobid))) { parent->bookmark = jdata->bookmark; } } /* if we wanted to display the map, now is the time to do it - ignore * daemon job */ if (jdata->map->display_map) { char *output=NULL; int i, j; orte_node_t *node; orte_proc_t *proc; if (orte_display_diffable_output) { /* intended solely to test mapping methods, this output * can become quite long when testing at scale. Rather * than enduring all the malloc/free's required to * create an arbitrary-length string, custom-generate * the output a line at a time here */ /* display just the procs in a diffable format */ opal_output(orte_clean_output, "<map>\n\t<jobid=%s>\n\t<offset=%s>", ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset)); fflush(stderr); /* loop through nodes */ for (i=0; i < jdata->map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { continue; } opal_output(orte_clean_output, "\t<host name=%s>", (NULL == node->name) ? "UNKNOWN" : node->name); fflush(stderr); for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } #if OPAL_HAVE_HWLOC { char locale[64]; if (NULL != proc->locale) { hwloc_bitmap_list_snprintf(locale, 64, proc->locale->cpuset); } opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu locale=%s binding=%s>", ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx, (unsigned long)proc->local_rank, (unsigned long)proc->node_rank, locale, (NULL == proc->cpu_bitmap) ? "NULL" : proc->cpu_bitmap); } #else opal_output(orte_clean_output, "\t\t<process rank=%s app_idx=%ld local_rank=%lu node_rank=%lu>", ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx, (unsigned long)proc->local_rank, (unsigned long)proc->node_rank); #endif fflush(stderr); } opal_output(orte_clean_output, "\t</host>"); fflush(stderr); } #if OPAL_HAVE_HWLOC { opal_hwloc_locality_t locality; orte_proc_t *p0; /* test locality - for the first node, print the locality of each proc relative to the first one */ node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, 0); p0 = (orte_proc_t*)opal_pointer_array_get_item(node->procs, 0); opal_output(orte_clean_output, "\t<locality>"); for (j=1; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } locality = opal_hwloc_base_get_relative_locality(node->topology, p0->cpu_bitmap, proc->cpu_bitmap); opal_output(orte_clean_output, "\t\t<rank=%s rank=%s locality=%s>", ORTE_VPID_PRINT(p0->name.vpid), ORTE_VPID_PRINT(proc->name.vpid), opal_hwloc_base_print_locality(locality)); } opal_output(orte_clean_output, "\t</locality>\n</map>"); fflush(stderr); } #else opal_output(orte_clean_output, "\n</map>"); fflush(stderr); #endif } else { opal_output(orte_clean_output, " Data for JOB %s offset %s", ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset)); opal_dss.print(&output, NULL, jdata->map, ORTE_JOB_MAP); if (orte_xml_output) { fprintf(orte_xml_fp, "%s\n", output); fflush(orte_xml_fp); } else { opal_output(orte_clean_output, "%s", output); } free(output); } } /* set the job state to the next position */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE); /* cleanup */ OBJ_RELEASE(caddy); }
static int allocate(orte_job_t *jdata, opal_list_t *nodes) { char **nodelist; orte_node_t *node; int i, num_nodes; char *affinity_file; struct stat buf; char *ptr; /* get the list of allocated nodes */ if ((num_nodes = lsb_getalloc(&nodelist)) < 0) { orte_show_help("help-ras-lsf.txt", "nodelist-failed", true); return ORTE_ERR_NOT_AVAILABLE; } node = NULL; /* step through the list */ for (i = 0; i < num_nodes; i++) { if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(nodelist[i]) ) { if (NULL != (ptr = strchr(nodelist[i], '.'))) { *ptr = '\0'; } } /* is this a repeat of the current node? */ if (NULL != node && 0 == strcmp(nodelist[i], node->name)) { /* it is a repeat - just bump the slot count */ ++node->slots; opal_output_verbose(10, orte_ras_base_framework.framework_output, "ras/lsf: +++ Node (%s) [slots=%d]", node->name, node->slots); continue; } /* not a repeat - create a node entry for it */ node = OBJ_NEW(orte_node_t); node->name = strdup(nodelist[i]); node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; node->state = ORTE_NODE_STATE_UP; opal_list_append(nodes, &node->super); opal_output_verbose(10, orte_ras_base_framework.framework_output, "ras/lsf: New Node (%s) [slots=%d]", node->name, node->slots); } /* release the nodelist from lsf */ opal_argv_free(nodelist); /* check for an affinity file */ if (NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { /* check to see if the file is empty - if it is, * then affinity wasn't actually set for this job */ if (0 != stat(affinity_file, &buf)) { orte_show_help("help-ras-lsf.txt", "affinity-file-not-found", true, affinity_file); return ORTE_ERR_SILENT; } if (0 == buf.st_size) { /* no affinity, so just return */ return ORTE_SUCCESS; } /* the affinity file sequentially lists rank locations, with * cpusets given as physical cpu-ids. Setup the job object * so it knows to process this accordingly */ if (NULL == jdata->map) { jdata->map = OBJ_NEW(orte_job_map_t); } ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_SEQ); jdata->map->req_mapper = strdup("seq"); // need sequential mapper /* tell the sequential mapper that all cpusets are to be treated as "physical" */ orte_set_attribute(&jdata->attributes, ORTE_JOB_PHYSICAL_CPUIDS, true, NULL, OPAL_BOOL); /* LSF provides its info as hwthreads, so set the hwthread-as-cpus flag */ opal_hwloc_use_hwthreads_as_cpus = true; /* don't override something provided by the user, but default to bind-to hwthread */ if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD); } /* * Do not set the hostfile attribute on each app_context since that * would confuse the sequential mapper when it tries to assign bindings * when running an MPMD job. * Instead just overwrite the orte_default_hostfile so it will be * general for all of the app_contexts. */ if( NULL != orte_default_hostfile ) { free(orte_default_hostfile); orte_default_hostfile = NULL; } orte_default_hostfile = strdup(affinity_file); opal_output_verbose(10, orte_ras_base_framework.framework_output, "ras/lsf: Set default_hostfile to %s",orte_default_hostfile); return ORTE_SUCCESS; } return ORTE_SUCCESS; }
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, char **device, char *inspec) { char *ck; #if OPAL_HAVE_HWLOC char *ptr; #endif orte_mapping_policy_t tmp; int rc; size_t len; char *spec; /* set defaults */ tmp = 0; *device = NULL; if (NULL == inspec) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); } else { spec = strdup(inspec); // protect the input string /* see if a colon was included - if so, then we have a policy + modifier */ ck = strchr(spec, ':'); if (NULL != ck) { /* split the string */ *ck = '\0'; ck++; /* if the policy is "dist", then we set the policy to that value * and save the second argument as the device */ #if OPAL_HAVE_HWLOC if (0 == strncasecmp(spec, "dist", strlen(spec))) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST); /* the first argument after the colon *must* be the * device we are mapping near - however, other modifiers * could have been provided, so check for them, okay if * none found */ if (NULL != (ptr = strchr(ck, ','))) { *ptr = '\0'; ptr++; // move past the comma /* check the remaining string for modifiers - may be none, so * don't emit an error message if the modifier isn't recognized */ if (ORTE_ERR_SILENT == (rc = check_modifiers(ptr, &tmp)) && ORTE_ERR_BAD_PARAM != rc) { free(spec); return ORTE_ERR_SILENT; } } *device = strdup(ck); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); free(spec); goto setpolicy; } else if (0 == strncasecmp(spec, "ppr", strlen(spec))) { /* we have to allow additional modifiers here - e.g., specifying * #pe's/proc or oversubscribe - so check for modifiers */ if (NULL == (ptr = strrchr(ck, ':'))) { /* this is an error - there had to be at least one * colon to delimit the number from the object type */ orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec); free(spec); return ORTE_ERR_SILENT; } ptr++; // move past the colon /* check the remaining string for modifiers - may be none, so * don't emit an error message if the modifier isn't recognized */ if (ORTE_ERR_SILENT == (rc = check_modifiers(ptr, &tmp)) && ORTE_ERR_BAD_PARAM != rc) { free(spec); return ORTE_ERR_SILENT; } /* if we found something, then we need to adjust the string */ if (ORTE_SUCCESS == rc) { ptr--; *ptr = '\0'; } /* now get the pattern */ orte_rmaps_base.ppr = strdup(ck); ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); free(spec); goto setpolicy; } #endif if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) && ORTE_ERR_TAKE_NEXT_OPTION != rc) { if (ORTE_ERR_BAD_PARAM == rc) { orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec); } free(spec); return rc; } } len = strlen(spec); if (0 == strncasecmp(spec, "slot", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT); } else if (0 == strncasecmp(spec, "node", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE); } else if (0 == strncasecmp(spec, "seq", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ); #if OPAL_HAVE_HWLOC } else if (0 == strncasecmp(spec, "core", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE); } else if (0 == strncasecmp(spec, "l1cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE); } else if (0 == strncasecmp(spec, "l2cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE); } else if (0 == strncasecmp(spec, "l3cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE); } else if (0 == strncasecmp(spec, "socket", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); } else if (0 == strncasecmp(spec, "numa", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA); } else if (0 == strncasecmp(spec, "board", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD); } else if (0 == strncasecmp(spec, "hwthread", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD); /* if we are mapping processes to individual hwthreads, then * we need to treat those hwthreads as separate cpus */ opal_hwloc_use_hwthreads_as_cpus = true; #endif } else { orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec); free(spec); return ORTE_ERR_SILENT; } free(spec); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); } #if OPAL_HAVE_HWLOC setpolicy: #endif *policy = tmp; return ORTE_SUCCESS; }
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, char **device, char *spec) { char **ck, **ck2; orte_mapping_policy_t tmp; int i; size_t len; /* set defaults */ tmp = 0; *device = NULL; if (NULL == spec) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN); ORTE_UNSET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); } else { ck = opal_argv_split(spec, ':'); if (2 < opal_argv_count(ck)) { /* incorrect format */ orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy); opal_argv_free(ck); return ORTE_ERR_SILENT; } if (2 == opal_argv_count(ck)) { /* if the policy is "dist", then we set the policy to that value * and save the second argument as the device */ #if OPAL_HAVE_HWLOC if (0 == strncasecmp(ck[0], "dist", strlen(ck[0]))) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST); ck2 = opal_argv_split(ck[1], ','); if (ck2[0] != NULL) { *device = strdup(ck2[0]); for (i=1; NULL != ck2[i]; i++) { if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) { ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN); } } } opal_argv_free(ck2); goto setpolicy; } #endif ck2 = opal_argv_split(ck[1], ','); for (i=0; NULL != ck2[i]; i++) { if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) { ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN); } else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) { if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(tmp)) { ORTE_UNSET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN); } else if (0 == strncasecmp(ck2[i], "nooversubscribe", strlen(ck2[i]))) { ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN); } else { /* unrecognized modifier */ orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, "mapping", ck2[i]); opal_argv_free(ck); opal_argv_free(ck2); return ORTE_ERR_SILENT; } } opal_argv_free(ck2); } } len = strlen(ck[0]); if (0 == strncasecmp(ck[0], "slot", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT); } else if (0 == strncasecmp(ck[0], "node", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE); #if OPAL_HAVE_HWLOC } else if (0 == strncasecmp(ck[0], "core", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE); } else if (0 == strncasecmp(ck[0], "l1cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE); } else if (0 == strncasecmp(ck[0], "l2cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE); } else if (0 == strncasecmp(ck[0], "l3cache", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE); } else if (0 == strncasecmp(ck[0], "socket", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); } else if (0 == strncasecmp(ck[0], "numa", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA); } else if (0 == strncasecmp(ck[0], "board", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD); } else if (0 == strncasecmp(ck[0], "hwthread", len)) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD); /* if we are mapping processes to individual hwthreads, then * we need to treat those hwthreads as separate cpus */ opal_hwloc_use_hwthreads_as_cpus = true; #endif } else { orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy); opal_argv_free(ck); return ORTE_ERR_SILENT; } opal_argv_free(ck); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); } #if OPAL_HAVE_HWLOC setpolicy: #endif *policy = tmp; return ORTE_SUCCESS; }
/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ static int orte_rmaps_base_open(mca_base_open_flag_t flags) { int rc; /* init the globals */ OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t); orte_rmaps_base.ppr = NULL; orte_rmaps_base.slot_list = NULL; orte_rmaps_base.mapping = 0; orte_rmaps_base.ranking = 0; #if OPAL_HAVE_HWLOC /* if a topology file was given, then set our topology * from it. Even though our actual topology may differ, * mpirun only needs to see the compute node topology * for mapping purposes */ if (NULL != rmaps_base_topo_file) { if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file); return ORTE_ERR_SILENT; } } #endif if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping, &orte_rmaps_base.device, rmaps_base_mapping_policy))) { return rc; } if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking, orte_rmaps_base.mapping, rmaps_base_ranking_policy))) { return rc; } if (rmaps_base_byslot) { /* set mapping policy to byslot - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to byslot - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } if (rmaps_base_bynode) { /* set mapping policy to bynode - error if something else already set */ if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) { orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN); /* set ranking policy to bynode - error if something else already set */ if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) && ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) { /* error - cannot redefine the default ranking policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking", "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); return ORTE_ERR_SILENT; } ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE); ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN); } /* Should we schedule on the local node or not? */ if (rmaps_base_no_schedule_local) { orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL; } /* Should we oversubscribe or not? */ if (rmaps_base_no_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } /** force oversubscription permission */ if (rmaps_base_oversubscribe) { if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { /* error - cannot redefine the default mapping policy */ orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping", "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } /* should we display a detailed (developer-quality) version of the map after determining it? */ if (rmaps_base_display_devel_map) { orte_rmaps_base.display_map = true; orte_devel_level_output = true; } /* should we display a diffable report of proc locations after determining it? */ if (rmaps_base_display_diffable_map) { orte_rmaps_base.display_map = true; orte_display_diffable_output = true; } /* Open up all available components */ rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags); /* check to see if any component indicated a problem */ if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { /* the component would have already reported the error, so * tell the rest of the chain to shut up */ return ORTE_ERR_SILENT; } /* All done */ return rc; }
static void setup_job_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_job_t *jdata = caddy->jdata; int i, j; orte_app_context_t *app; orte_proc_t *proc; orte_vpid_t vpid; opal_buffer_t *buf; /* check that the job meets our requirements */ vpid = 0; for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } if (app->num_procs <= 0) { /* must specify -np for staged_hnp execution */ orte_show_help("help-state-staged-hnp.txt", "no-np", true); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SILENT_ABORT); OBJ_RELEASE(caddy); return; } /* build the proc arrays - we'll need them later */ for (j=0; j < app->num_procs; j++) { proc = OBJ_NEW(orte_proc_t); proc->name.jobid = jdata->jobid; proc->name.vpid = vpid; proc->app_idx = i; proc->app_rank = j; /* flag that the proc is NOT to be included * in a pidmap message so we don't do it until * the proc is actually scheduled for launch */ ORTE_FLAG_UNSET(proc, ORTE_PROC_FLAG_UPDATED); /* procs must not barrier when executing in stages */ orte_set_attribute(&proc->attributes, ORTE_PROC_NOBARRIER, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); /* add it to the job */ opal_pointer_array_set_item(jdata->procs, vpid, proc); jdata->num_procs++; vpid++; /* add it to the app */ OBJ_RETAIN(proc); opal_pointer_array_set_item(&app->procs, j, proc); } } /* set the job map to use the staged_hnp mapper */ if (NULL == jdata->map) { jdata->map = OBJ_NEW(orte_job_map_t); jdata->map->req_mapper = strdup("staged"); ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_STAGED); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); jdata->map->display_map = orte_rmaps_base.display_map; } /* if there are any file_maps attached to this job, load them */ buf = NULL; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&buf, OPAL_BUFFER)) { orte_dfs.load_file_maps(jdata->jobid, buf, NULL, NULL); OBJ_RELEASE(buf); } orte_plm_base_setup_job_complete(0, 0, (void*)caddy); }