static int rte_init(void) { int ret; char *error = NULL; char **nodes = NULL, **ppnlist = NULL; char *envar; int32_t jobfam; int i, j, *ppn; orte_nid_t *node; orte_jmap_t *jmap; orte_pmap_t *pmap; orte_vpid_t vpid; bool byslot; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Only application procs can use this module. Since we * were directly launched by someone, we need to bootstrap * our own global info so we can startup. */ /* ensure that static ports were assigned - otherwise, we cant * work since we won't know how to talk to anyone else */ if (NULL == getenv("OMPI_MCA_oob_tcp_static_ports") && NULL == getenv("OMPI_MCA_oob_tcp_static_ports_v6")) { error = "static ports were not assigned"; goto error; } /* declare ourselves to be standalone - i.e., not launched by orted */ orte_standalone_operation = true; /* extract a jobid from the environment - can be totally * arbitrary. if one isn't provided, just fake it */ if (NULL != (envar = getenv("OMPI_MCA_orte_jobid"))) { jobfam = strtol(envar, NULL, 10); } else { jobfam = 1; } ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(0, jobfam); /* extract a rank from the environment */ if (NULL == (envar = getenv("OMPI_MCA_orte_rank"))) { error = "could not get process rank"; goto error; } ORTE_PROC_MY_NAME->vpid = strtol(envar, NULL, 10); ORTE_EPOCH_SET(ORTE_PROC_MY_NAME->epoch,ORTE_EPOCH_MIN); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "%s completed name definition", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get the number of procs in this job */ if (NULL == (envar = getenv("OMPI_MCA_orte_num_procs"))) { error = "could not get number of processes in job"; goto error; } orte_process_info.num_procs = strtol(envar, NULL, 10); if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } /* set the app_num so that MPI attributes get set correctly */ orte_process_info.app_num = 1; /* get the list of nodes */ if (NULL == (envar = getenv("OMPI_MCA_orte_nodes"))) { error = "could not get list of nodes"; goto error; } /* break this down */ nodes = opal_argv_split(envar, ','); orte_process_info.num_nodes = opal_argv_count(nodes); /* get the ppn */ if (NULL == (envar = getenv("OMPI_MCA_orte_ppn"))) { error = "could not get ppn"; goto error; } ppnlist = opal_argv_split(envar, ','); ppn = (int*)malloc(orte_process_info.num_nodes * sizeof(int)); if (1 == opal_argv_count(ppnlist)) { /* constant ppn */ j = strtol(ppnlist[0], NULL, 10); for (i=0; i < orte_process_info.num_nodes; i++) { ppn[i] = j; } } else { for (i=0; i < orte_process_info.num_nodes; i++) { ppn[i] = strtol(ppnlist[i], NULL, 10); } } opal_argv_free(ppnlist); /* get the mapping mode - default to byslot */ byslot = true; if (NULL != (envar = getenv("OMPI_MCA_mapping")) && 0 == strcmp(envar, "bynode")) { byslot = false; } /* setup the nidmap arrays */ if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(NULL))) { ORTE_ERROR_LOG(ret); error = "orte_util_nidmap_init"; goto error; } /* set the size of the nidmap storage so we minimize realloc's */ if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_process_info.num_nodes))) { error = "could not set pointer array size for nidmap"; goto error; } /* construct the nidmap */ for (i=0; i < orte_process_info.num_nodes; i++) { node = OBJ_NEW(orte_nid_t); if (0 == strcmp(nodes[i], orte_process_info.nodename) || opal_ifislocal(nodes[i])) { node->name = strdup(orte_process_info.nodename); } else { node->name = strdup(nodes[i]); } node->daemon = i; node->index = i; opal_pointer_array_set_item(&orte_nidmap, i, node); } opal_argv_free(nodes); /* create a job map for this job */ jmap = OBJ_NEW(orte_jmap_t); jmap->job = ORTE_PROC_MY_NAME->jobid; opal_pointer_array_add(&orte_jobmap, jmap); /* update the num procs */ jmap->num_procs = orte_process_info.num_procs; /* set the size of the pidmap storage so we minimize realloc's */ if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&jmap->pmap, jmap->num_procs))) { ORTE_ERROR_LOG(ret); error = "could not set pointer array size for pidmap"; goto error; } /* construct the pidmap */ if (byslot) { vpid = 0; for (i=0; i < orte_process_info.num_nodes; i++) { node = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i); /* for each node, cycle through the ppn */ for (j=0; j < ppn[i]; j++) { pmap = OBJ_NEW(orte_pmap_t); pmap->node = i; pmap->local_rank = j; pmap->node_rank = j; if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(&jmap->pmap, vpid, pmap))) { ORTE_ERROR_LOG(ret); error = "could not set pmap values"; goto error; } /* if this is me, then define the daemon's vpid to * be the node number */ if (vpid == ORTE_PROC_MY_NAME->vpid) { ORTE_PROC_MY_DAEMON->jobid = 0; ORTE_PROC_MY_DAEMON->vpid = i; ORTE_EPOCH_SET(ORTE_PROC_MY_DAEMON->epoch,ORTE_PROC_MY_NAME->epoch); } OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "%s node %d name %s rank %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int) node->index, node->name, ORTE_VPID_PRINT(vpid))); vpid++; } } } else { /* cycle across the nodes */ vpid = 0; while (vpid < orte_process_info.num_procs) { for (i=0; i < orte_process_info.num_nodes && vpid < orte_process_info.num_procs; i++) { node = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i); if (0 < ppn[i]) { pmap = OBJ_NEW(orte_pmap_t); pmap->node = i; pmap->local_rank = ppn[i]-1; pmap->node_rank = ppn[i]-1; if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(&jmap->pmap, vpid, pmap))) { ORTE_ERROR_LOG(ret); error = "could not set pmap values"; goto error; } /* if this is me, then define the daemon's vpid to * be the node number */ if (vpid == ORTE_PROC_MY_NAME->vpid) { ORTE_PROC_MY_DAEMON->jobid = 0; ORTE_PROC_MY_DAEMON->vpid = i; ORTE_EPOCH_SET(ORTE_PROC_MY_DAEMON->epoch,ORTE_PROC_MY_NAME->epoch); } OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "%s node %d name %s rank %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int) node->index, node->name, (int)vpid)); vpid++; --ppn[i]; } } } } free(ppn); /* ensure we pick the correct critical components */ putenv("OMPI_MCA_grpcomm=hier"); putenv("OMPI_MCA_routed=direct"); /* use the default procedure to finish my setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } if (0 < opal_output_get_verbosity(orte_ess_base_output)) { orte_nidmap_dump(); orte_jobmap_dump(); } return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }
/* * Add the specified node definitions to the global data store * NOTE: this removes all items from the list! */ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) { opal_list_item_t* item; orte_std_cntr_t num_nodes; int rc, i; orte_node_t *node, *hnp_node; char *ptr; bool hnp_alone = true; /* get the number of nodes */ num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes); if (0 == num_nodes) { return ORTE_SUCCESS; /* nothing to do */ } OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output, "%s ras:base:node_insert inserting %ld nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_nodes)); /* set the size of the global array - this helps minimize time * spent doing realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes))) { ORTE_ERROR_LOG(rc); return rc; } /* get the hnp node's info */ hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); /* cycle through the list */ while (NULL != (item = opal_list_remove_first(nodes))) { node = (orte_node_t*)item; /* the HNP had to already enter its node on the array - that entry is in the * first position since it is the first one entered. We need to check to see * if this node is the same as the HNP's node so we don't double-enter it */ if (NULL != hnp_node && (0 == strcmp(node->name, hnp_node->name) || 0 == strcmp(node->name, "localhost") || opal_ifislocal(node->name))) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output, "%s ras:base:node_insert updating HNP [%s] info to %ld slots", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, (long)node->slots)); /* flag that hnp has been allocated */ orte_hnp_is_allocated = true; /* update the total slots in the job */ orte_ras_base.total_slots_alloc += node->slots; /* copy the allocation data to that node's info */ hnp_node->slots += node->slots; hnp_node->slots_max = node->slots_max; hnp_node->launch_id = node->launch_id; if (orte_managed_allocation) { /* the slots are always treated as sacred * in managed allocations */ hnp_node->slots_given = true; } else { /* in unmanaged allocations, take whatever * was determined by the hostfile or dash-host * parsers */ hnp_node->slots_given = node->slots_given; } /* use the local name for our node - don't trust what * we got from an RM. If requested, store the resolved * nodename info */ if (orte_show_resolved_nodenames) { /* if the node name is different, store it as an alias */ if (0 != strcmp(node->name, hnp_node->name)) { /* add to list of aliases for this node - only add if unique */ opal_argv_append_unique_nosize(&hnp_node->alias, node->name, false); } if (NULL != node->alias) { /* now copy over any aliases that are unique */ for (i=0; NULL != node->alias[i]; i++) { opal_argv_append_unique_nosize(&hnp_node->alias, node->alias[i], false); } } } /* don't keep duplicate copy */ OBJ_RELEASE(node); } else { /* insert the object onto the orte_nodes global array */ OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output, "%s ras:base:node_insert node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == node->name) ? "NULL" : node->name)); if (orte_managed_allocation) { /* the slots are always treated as sacred * in managed allocations */ node->slots_given = true; } /* insert it into the array */ node->index = opal_pointer_array_add(orte_node_pool, (void*)node); if (ORTE_SUCCESS > (rc = node->index)) { ORTE_ERROR_LOG(rc); return rc; } /* update the total slots in the job */ orte_ras_base.total_slots_alloc += node->slots; /* check if we have fqdn names in the allocation */ if (NULL != strchr(node->name, '.')) { orte_have_fqdn_allocation = true; } /* indicate the HNP is not alone */ hnp_alone = false; } } /* if we didn't find any fqdn names in the allocation, then * ensure we don't have any domain info in the node record * for the hnp */ if (!orte_have_fqdn_allocation && !hnp_alone) { if (NULL != (ptr = strchr(hnp_node->name, '.'))) { *ptr = '\0'; } } return ORTE_SUCCESS; }
/* * Add the specified node definitions to the global data store * NOTE: this removes all items from the list! */ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) { opal_list_item_t* item; orte_std_cntr_t num_nodes; int rc, i; orte_node_t *node, *hnp_node; /* get the number of nodes */ num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes); if (0 == num_nodes) { return ORTE_SUCCESS; /* nothing to do */ } OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output, "%s ras:base:node_insert inserting %ld nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_nodes)); /* set the size of the global array - this helps minimize time * spent doing realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes))) { ORTE_ERROR_LOG(rc); return rc; } /* get the hnp node's info */ hnp_node = (orte_node_t*)(orte_node_pool->addr[0]); /* cycle through the list */ while (NULL != (item = opal_list_remove_first(nodes))) { node = (orte_node_t*)item; /* the HNP had to already enter its node on the array - that entry is in the * first position since it is the first one entered. We need to check to see * if this node is the same as the HNP's node so we don't double-enter it */ if (0 == strcmp(node->name, hnp_node->name) || opal_ifislocal(node->name)) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output, "%s ras:base:node_insert updating HNP info to %ld slots", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)node->slots)); /* flag that hnp has been allocated */ orte_hnp_is_allocated = true; /* adjust the total slots in the job */ jdata->total_slots_alloc -= hnp_node->slots; /* copy the allocation data to that node's info */ hnp_node->slots = node->slots; hnp_node->slots_max = node->slots_max; hnp_node->launch_id = node->launch_id; /* default allocate all the slots - may be modified later * as a result of filtering actions in mapper */ hnp_node->slots_alloc = node->slots; /* use the local name for our node - don't trust what * we got from an RM. If requested, store the resolved * nodename info */ if (orte_show_resolved_nodenames) { /* if the node name is different, store it as an alias */ if (0 != strcmp(node->name, hnp_node->name)) { /* add to list of aliases for this node - only add if unique */ opal_argv_append_unique_nosize(&hnp_node->alias, node->name); } if (NULL != node->alias) { /* now copy over any aliases that are unique */ for (i=0; NULL != node->alias[i]; i++) { opal_argv_append_unique_nosize(&hnp_node->alias, node->alias[i]); } } } /* update the total slots in the job */ jdata->total_slots_alloc += hnp_node->slots; /* don't keep duplicate copy */ OBJ_RELEASE(node); } else { /* insert the object onto the orte_nodes global array */ OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output, "%s ras:base:node_insert node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == node->name) ? "NULL" : node->name)); /* default allocate all the slots - may be modified later * as a result of filtering actions in mapper */ node->slots_alloc = node->slots; /* insert it into the array */ node->index = opal_pointer_array_add(orte_node_pool, (void*)node); if (ORTE_SUCCESS > (rc = node->index)) { ORTE_ERROR_LOG(rc); return rc; } /* update the total slots in the job */ jdata->total_slots_alloc += node->slots; } } return ORTE_SUCCESS; }
/* decode a nodemap for a daemon */ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) { int n; int32_t num_nodes, i; orte_vpid_t vpid; orte_node_t *node; opal_buffer_t buf; int rc; uint8_t *oversub; char *name; orte_job_t *daemons; orte_proc_t *dptr; OPAL_OUTPUT_VERBOSE((1, orte_nidmap_output, "%s decode:nidmap decoding daemon nodemap", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* xfer the byte object to a buffer for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, bo->bytes, bo->size); /* unpack number of nodes */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_nodes, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((1, orte_nidmap_output, "%s decode:nidmap decoding %d nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes)); /* set the size of the node pool storage so we minimize realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes))) { ORTE_ERROR_LOG(rc); return rc; } /* transfer the data to the nodes, counting the number of * daemons in the system */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); for (i=0; i < num_nodes; i++) { /* unpack the daemon vpid */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &vpid, &n, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return rc; } /* unpack and store the node's name */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &name, &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } /* do we already have this node? */ if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, vpid))) { node = OBJ_NEW(orte_node_t); node->name = name; opal_pointer_array_set_item(orte_node_pool, vpid, node); } else { free(name); } /* if requested, unpack any aliases */ if (orte_retain_aliases) { char *alias; uint8_t naliases, ni; n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); return rc; } for (ni=0; ni < naliases; ni++) { n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } opal_argv_append_nosize(&node->alias, alias); free(alias); } } /* unpack the oversubscribed flag */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_VPID_INVALID == vpid) { /* no daemon on this node */ continue; } if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { dptr = OBJ_NEW(orte_proc_t); dptr->name.jobid = ORTE_PROC_MY_NAME->jobid; dptr->name.vpid = vpid; opal_pointer_array_set_item(daemons->procs, vpid, dptr); daemons->num_procs++; } if (NULL != node->daemon) { OBJ_RELEASE(node->daemon); } OBJ_RETAIN(dptr); node->daemon = dptr; if (NULL != dptr->node) { OBJ_RELEASE(dptr->node); } OBJ_RETAIN(node); dptr->node = node; if (0 == oversub) { node->oversubscribed = false; } else { node->oversubscribed = true; } } orte_process_info.num_procs = daemons->num_procs; if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } /* update num_daemons */ orte_process_info.num_daemons = daemons->num_procs; if (0 < opal_output_get_verbosity(orte_nidmap_output)) { for (i=0; i < num_nodes; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } opal_output(0, "%s node[%d].name %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, (NULL == node->name) ? "NULL" : node->name, (NULL == node->daemon) ? "NONE" : ORTE_VPID_PRINT(node->daemon->name.vpid)); } } OBJ_DESTRUCT(&buf); return ORTE_SUCCESS; }
int orte_util_decode_pidmap(opal_byte_object_t *bo) { orte_jobid_t jobid; orte_vpid_t i, num_procs; orte_pmap_t *pmap; int32_t *nodes; orte_local_rank_t *local_rank; orte_node_rank_t *node_rank; orte_std_cntr_t n; opal_buffer_t buf; orte_jmap_t *jmap; bool already_present; int j; int rc; /* xfer the byte object to a buffer for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); if (ORTE_SUCCESS != (rc = opal_dss.load(&buf, bo->bytes, bo->size))) { ORTE_ERROR_LOG(rc); goto cleanup; } n = 1; /* cycle through the buffer */ while (ORTE_SUCCESS == (rc = opal_dss.unpack(&buf, &jobid, &n, ORTE_JOBID))) { /* unfortunately, job objects cannot be stored * by index number as the jobid is a constructed * value. So we have no choice but to cycle through * the jobmap pointer array and look for this entry. Since * jobs are cleaned up as they complete, check the * entire array */ jmap = NULL; already_present = false; for (j=0; j < orte_jobmap.size; j++) { if (NULL == (jmap = (orte_jmap_t*)opal_pointer_array_get_item(&orte_jobmap, j))) { continue; } if (jobid == jmap->job) { already_present = true; break; } } /* unpack the number of procs */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_procs, &n, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* allocate memory for the node info */ nodes = (int32_t*)malloc(num_procs * 4); /* unpack it in one shot */ n=num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, nodes, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* allocate memory for local ranks */ local_rank = (orte_local_rank_t*)malloc(num_procs*sizeof(orte_local_rank_t)); /* unpack them in one shot */ n=num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, local_rank, &n, ORTE_LOCAL_RANK))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* allocate memory for node ranks */ node_rank = (orte_node_rank_t*)malloc(num_procs*sizeof(orte_node_rank_t)); /* unpack node ranks in one shot */ n=num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, node_rank, &n, ORTE_NODE_RANK))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if we already know about this job, we need to check the data to see * if something has changed - e.g., a proc that is being restarted somewhere * other than where it previously was */ if (already_present) { /* we already have the jmap object, so let's refresh its pidmap * using the new data - start by cleaning out the old array */ for (j=0; j < jmap->pmap.size; j++) { if (NULL == (pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, j))) { continue; } OBJ_RELEASE(pmap); } /* now use the opal function to reset the internal pointers */ opal_pointer_array_remove_all(&jmap->pmap); /* set the size of the storage so we minimize realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&jmap->pmap, num_procs))) { ORTE_ERROR_LOG(rc); return rc; } /* add in the updated array */ for (i=0; i < num_procs; i++) { pmap = OBJ_NEW(orte_pmap_t); /* add the pidmap entry at the specific site corresponding * to the proc's vpid */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(&jmap->pmap, i, pmap))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* add/update the data */ pmap->node = nodes[i]; pmap->local_rank = local_rank[i]; pmap->node_rank = node_rank[i]; } /* update the #procs */ jmap->num_procs = num_procs; } else { /* if we don't already have this data, store it * unfortunately, job objects cannot be stored * by index number as the jobid is a constructed * value. So we have to just add it to the end * of the array */ jmap = OBJ_NEW(orte_jmap_t); jmap->job = jobid; jmap->num_procs = num_procs; if (0 > (j = opal_pointer_array_add(&orte_jobmap, jmap))) { ORTE_ERROR_LOG(j); rc = j; goto cleanup; } /* allocate memory for the procs array */ opal_pointer_array_set_size(&jmap->pmap, num_procs); /* xfer the data */ for (i=0; i < num_procs; i++) { pmap = OBJ_NEW(orte_pmap_t); pmap->node = nodes[i]; pmap->local_rank = local_rank[i]; pmap->node_rank = node_rank[i]; /* add the pidmap entry at the specific site corresponding * to the proc's vpid */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(&jmap->pmap, i, pmap))) { ORTE_ERROR_LOG(rc); goto cleanup; } } } /* release data */ free(nodes); free(local_rank); free(node_rank); /* setup for next cycle */ n = 1; } if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER == OPAL_SOS_GET_ERROR_CODE(rc)) { rc = ORTE_SUCCESS; } cleanup: OBJ_DESTRUCT(&buf); return rc; }
int orte_util_decode_nodemap(opal_byte_object_t *bo) { int n; int32_t num_nodes, i, num_daemons; orte_nid_t *node; orte_vpid_t *vpids; orte_nid_t *nd, *ndptr; opal_buffer_t buf; int rc; uint8_t *oversub; OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s decode:nidmap decoding nodemap", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if there are any entries already in the node array, clear it out */ if (0 < orte_nidmap.size) { /* unfortunately, the opal function "remove_all" doesn't release * the memory pointed to by the elements in the array, so we need * to release those first */ for (i=0; i < orte_nidmap.size; i++) { if (NULL != (ndptr = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { OBJ_RELEASE(ndptr); } } /* now use the opal function to reset the internal pointers */ opal_pointer_array_remove_all(&orte_nidmap); } /* xfer the byte object to a buffer for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, bo->bytes, bo->size); /* unpack number of nodes */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_nodes, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s decode:nidmap decoding %d nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes)); /* set the size of the nidmap storage so we minimize realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&orte_nidmap, num_nodes))) { ORTE_ERROR_LOG(rc); return rc; } /* loop over nodes and unpack the raw nodename */ for (i=0; i < num_nodes; i++) { node = OBJ_NEW(orte_nid_t); /* the arch defaults to our arch so that non-hetero * case will yield correct behavior */ opal_pointer_array_set_item(&orte_nidmap, i, node); /* unpack the node's name */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } } /* unpack the daemon vpids */ vpids = (orte_vpid_t*)malloc(num_nodes * sizeof(orte_vpid_t)); n=num_nodes; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, vpids, &n, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return rc; } /* unpack the oversubscribed flags */ oversub = (uint8_t*)malloc(num_nodes * sizeof(uint8_t)); n=num_nodes; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, oversub, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); return rc; } /* transfer the data to the nidmap, counting the number of * daemons in the system */ num_daemons = 0; for (i=0; i < num_nodes; i++) { if (NULL != (ndptr = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { ndptr->daemon = vpids[i]; if (0 == oversub[i]) { ndptr->oversubscribed = false; } else { ndptr->oversubscribed = true; } if (ORTE_VPID_INVALID != vpids[i]) { ++num_daemons; } } } free(vpids); free(oversub); /* if we are a daemon or the HNP, update our num_procs */ if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { orte_process_info.num_procs = num_daemons; if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } } /* update num_daemons */ orte_process_info.num_daemons = num_daemons; if (0 < opal_output_get_verbosity(orte_debug_output)) { for (i=0; i < num_nodes; i++) { if (NULL == (nd = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { continue; } opal_output(0, "%s node[%d].name %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, (NULL == nd->name) ? "NULL" : nd->name, ORTE_VPID_PRINT(nd->daemon)); } } OBJ_DESTRUCT(&buf); return ORTE_SUCCESS; }
int orte_util_build_daemon_nidmap(char **nodes) { orte_nid_t *node; int i, num_nodes; int rc; struct hostent *h; opal_buffer_t buf; orte_process_name_t proc; char *uri, *addr; char *proc_name; num_nodes = opal_argv_count(nodes); OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s orte:util:build:daemon:nidmap found %d nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes)); if (0 == num_nodes) { /* nothing to do */ return ORTE_SUCCESS; } /* set the size of the nidmap storage so we minimize realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&orte_nidmap, num_nodes+1))) { ORTE_ERROR_LOG(rc); return rc; } /* install the entry for the HNP */ node = OBJ_NEW(orte_nid_t); node->name = strdup("HNP"); node->daemon = 0; /* the arch defaults to our arch so that non-hetero * case will yield correct behavior */ opal_pointer_array_set_item(&orte_nidmap, 0, node); /* the daemon vpids will be assigned in order, * starting with vpid=1 for the first node in * the list */ OBJ_CONSTRUCT(&buf, opal_buffer_t); proc.jobid = ORTE_PROC_MY_NAME->jobid; for (i=0; i < num_nodes; i++) { node = OBJ_NEW(orte_nid_t); node->name = strdup(nodes[i]); node->daemon = i+1; /* the arch defaults to our arch so that non-hetero * case will yield correct behavior */ opal_pointer_array_set_item(&orte_nidmap, node->daemon, node); /* lookup the address of this node */ if (NULL == (h = gethostbyname(node->name))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } addr = inet_ntoa(*(struct in_addr*)h->h_addr_list[0]); /* since we are using static ports, all my fellow daemons will be on my * port. Setup the contact info for each daemon in my hash tables. Note * that this will -not- open a port to those daemons, but will only * define the info necessary for opening such a port if/when I communicate * to them */ /* construct the URI */ proc.vpid = node->daemon; ORTE_EPOCH_SET(proc.epoch,ORTE_EPOCH_MIN); orte_util_convert_process_name_to_string(&proc_name, &proc); asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_process_info.my_port); OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s orte:util:build:daemon:nidmap node %s daemon %d addr %s uri %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, (int)node->daemon, addr, uri)); opal_dss.pack(&buf, &uri, 1, OPAL_STRING); free(proc_name); free(uri); } /* load the hash tables */ if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) { ORTE_ERROR_LOG(rc); } OBJ_DESTRUCT(&buf); return rc; }