static int rte_init(void) { int rc; orte_vpid_t vpid; char *vpid_string; char *nidmap_string; vpid_string = getenv("PTL_MY_RID"); nidmap_string = getenv("PTL_NIDMAP"); if (NULL == vpid_string || NULL == nidmap_string || NULL == getenv("PTL_PIDMAP") || NULL == getenv("PTL_IFACE")) { return ORTE_ERR_NOT_FOUND; } /* Get our process information */ /* Procs in this environment are directly launched. Hence, there * was no mpirun to create a jobid for us, and each app proc is * going to have to fend for itself. For now, we assume that the * jobid is some arbitrary number (say, 1). */ ORTE_PROC_MY_NAME->jobid = 1; /* not 0, since it has special meaning */ /* find our vpid assuming range starts at 0 */ if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->vpid = vpid; /* * Get the number of procs in the job. We assume vpids start at 0. We * assume that there are <num : + 1> procs, since the nidmap is a * : seperated list of nids, and the utcp reference implementation * assumes all will be present */ /* split the nidmap string */ nidmap = opal_argv_split(nidmap_string, ':'); orte_process_info.num_procs = (orte_std_cntr_t) opal_argv_count(nidmap); /* MPI_Init needs the grpcomm framework, so we have to init it */ if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_grpcomm_base_select())) { ORTE_ERROR_LOG(rc); return rc; } /* that's all we need here */ return ORTE_SUCCESS; }
static int lsf_set_name(void) { int rc; int lsf_nodeid; orte_jobid_t jobid; orte_vpid_t vpid; char* tmp; mca_base_param_reg_string_name("orte", "ess_jobid", "Process jobid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); ORTE_PROC_MY_NAME->jobid = jobid; /* get the vpid from the nodeid */ mca_base_param_reg_string_name("orte", "ess_vpid", "Process vpid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); lsf_nodeid = atoi(getenv("LSF_PM_TASKID")); opal_output_verbose(1, orte_ess_base_output, "ess:lsf found LSF_PM_TASKID set to %d", lsf_nodeid); ORTE_PROC_MY_NAME->vpid = vpid + lsf_nodeid - 1; /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int slave_set_name(void) { char *jobid_str, *procid_str; int id, rc; orte_jobid_t jobid; orte_vpid_t vpid; id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_str); if (NULL == jobid_str) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_str))) { ORTE_ERROR_LOG(rc); return(rc); } free(jobid_str); id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); mca_base_param_lookup_string(id, &procid_str); if (NULL == procid_str) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, procid_str))) { ORTE_ERROR_LOG(rc); return(rc); } free(procid_str); ORTE_PROC_MY_NAME->jobid = jobid; ORTE_PROC_MY_NAME->vpid = vpid; ORTE_EPOCH_SET(ORTE_PROC_MY_NAME->epoch,orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME)); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slave set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int env_set_name(void) { char *tmp; int rc; orte_jobid_t jobid; orte_vpid_t vpid; mca_base_param_reg_string_name("orte", "ess_jobid", "Process jobid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); mca_base_param_reg_string_name("orte", "ess_vpid", "Process vpid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); ORTE_PROC_MY_NAME->jobid = jobid; ORTE_PROC_MY_NAME->vpid = vpid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:env set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int alps_set_name(void) { int rc; int rank; orte_jobid_t jobid; if (NULL == orte_ess_base_jobid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { ORTE_ERROR_LOG(rc); return rc; } if (NULL == orte_ess_base_vpid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid, orte_ess_base_vpid))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->jobid = jobid; if (ORTE_SUCCESS != (rc = orte_ess_alps_get_first_rank_on_node(&rank))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->vpid = (orte_vpid_t)rank + starting_vpid; /* get the num procs as provided in the cmd line param */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int tm_set_name(void) { int rc; orte_jobid_t jobid; orte_vpid_t vpid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:tm setting name")); if (NULL == orte_ess_base_jobid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { ORTE_ERROR_LOG(rc); return(rc); } if (NULL == orte_ess_base_vpid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->jobid = jobid; ORTE_PROC_MY_NAME->vpid = vpid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:tm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int slurm_set_name(void) { int slurm_nodeid; int rc; orte_jobid_t jobid; orte_vpid_t vpid; char* tmp; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm setting name")); mca_base_param_reg_string_name("orte", "ess_jobid", "Process jobid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); mca_base_param_reg_string_name("orte", "ess_vpid", "Process vpid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); ORTE_PROC_MY_NAME->jobid = jobid; /* fix up the vpid and make it the "real" vpid */ slurm_nodeid = atoi(getenv("SLURM_NODEID")); ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get my node rank in case we are using static ports - this won't * be present for daemons, so don't error out if we don't have it */ mca_base_param_reg_string_name("orte", "ess_node_rank", "Process node rank", true, false, NULL, &tmp); if (NULL != tmp) { my_node_rank = strtol(tmp, NULL, 10); } /* fix up the system info nodename to match exactly what slurm returned */ if (NULL != orte_process_info.nodename) { free(orte_process_info.nodename); } orte_process_info.nodename = get_slurm_nodename(slurm_nodeid); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm set nodename to %s", orte_process_info.nodename)); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int rte_init(void) { int ret; char *error = NULL; char *tmp=NULL, *tailpiece; orte_jobid_t jobid=ORTE_JOBID_INVALID; orte_vpid_t vpid=ORTE_VPID_INVALID; int32_t jfam; OBJ_CONSTRUCT(&ctl, orte_thread_ctl_t); my_uid = (uint32_t)getuid(); /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* if we were given a jobid, use it */ mca_base_param_reg_string_name("orte", "ess_jobid", "Process jobid", true, false, NULL, &tmp); if (NULL != tmp) { if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, tmp))) { ORTE_ERROR_LOG(ret); error = "convert_jobid"; goto error; } free(tmp); ORTE_PROC_MY_NAME->jobid = jobid; } else { /* if we were given a job family, use it */ mca_base_param_reg_string_name("orte", "ess_job_family", "Job family", true, false, NULL, &tmp); if (NULL != tmp) { jfam = strtoul(tmp, &tailpiece, 10); if (UINT16_MAX < jfam || NULL != tailpiece) { /* use a string hash to restructure this to fit */ OPAL_HASH_STR(tmp, jfam); } ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(jfam << 16, 0); } } /* if we were given a vpid, use it */ mca_base_param_reg_string_name("orte", "ess_vpid", "Process vpid", true, false, NULL, &tmp); if (NULL != tmp) { if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, tmp))) { ORTE_ERROR_LOG(ret); error = "convert_vpid"; goto error; } free(tmp); ORTE_PROC_MY_NAME->vpid = vpid; if (vpid < 2) { /* NOT ALLOWED - POTENTIAL CONFLICT WITH ORCM AND ORCM-SCHED */ error = "disallowed_vpid"; ret = ORTE_ERR_BAD_PARAM; goto error; } } /* if both were given, then we are done */ if (ORTE_JOBID_INVALID != ORTE_PROC_MY_NAME->jobid && ORTE_VPID_INVALID != ORTE_PROC_MY_NAME->vpid) { goto complete; } #if HAVE_QINFO_H /* if we have qlib, then we can ask it for info by which we determine our * name based on provided rack location info */ { qinfo_t *qinfo; if (NULL != (qinfo = get_qinfo())) { /* if we were given a jobid, then leave it alone */ if (ORTE_JOBID_INVALID == ORTE_PROC_MY_NAME->jobid) { /* not given - assign it to 0 */ ORTE_PROC_MY_NAME->jobid = 0; } /* must ensure that no daemon gets vpid 0 or 1 */ ORTE_PROC_MY_NAME->vpid = (qinfo->rack * QLIB_MAX_SLOTS_PER_RACK) + qinfo->slot + 2; /* ensure that the HNP uri is NULL */ if (NULL != orte_process_info.my_hnp_uri) { opal_output(0, "%s CONFLICTING NAME RESOLUTION - NO NAME GIVEN, BUT HNP SPECIFIED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); error = "name conflict"; ret = ORTE_ERR_FATAL; goto error; } OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output, "GOT NAME %s FROM QINFO rack %d slot %d ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), qinfo->rack, qinfo->slot)); goto complete; } } #endif /* we must have been given a vpid - we can get the jobid * in other ways */ if (ORTE_VPID_INVALID == ORTE_PROC_MY_NAME->vpid) { /* we have an error */ error = "missing vpid assignment"; ret = ORTE_ERR_FATAL; goto error; } /* if we were given an HNP, we can get the jobid from * the HNP's name - this is decoded in proc_info.c during * the prolog */ if (ORTE_JOBID_INVALID != ORTE_PROC_MY_HNP->jobid) { ORTE_PROC_MY_NAME->jobid = orte_process_info.my_hnp.jobid; } else { /* just fake it */ ORTE_PROC_MY_NAME->jobid = 0; } complete: if (ORTE_SUCCESS != (ret = local_setup())) { ORTE_ERROR_LOG(ret); error = "local_setup"; goto error; } OBJ_DESTRUCT(&ctl); return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); OBJ_DESTRUCT(&ctl); return ret; }
static int rte_init(void) { int ret; char *error = NULL; char *tmp=NULL; orte_jobid_t jobid=ORTE_JOBID_INVALID; orte_vpid_t vpid=ORTE_VPID_INVALID; int32_t jfam; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* if we were given a jobid, use it */ mca_base_param_reg_string_name("orte", "ess_jobid", "Process jobid", true, false, NULL, &tmp); if (NULL != tmp) { if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, tmp))) { ORTE_ERROR_LOG(ret); error = "convert_jobid"; goto error; } free(tmp); ORTE_PROC_MY_NAME->jobid = jobid; } /* if we were given a vpid, use it */ mca_base_param_reg_string_name("orte", "ess_vpid", "Process vpid", true, false, NULL, &tmp); if (NULL != tmp) { if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, tmp))) { ORTE_ERROR_LOG(ret); error = "convert_vpid"; goto error; } free(tmp); ORTE_PROC_MY_NAME->vpid = vpid; } /* if both were given, then we are done */ if (ORTE_JOBID_INVALID == jobid || ORTE_VPID_INVALID == vpid) { /* create our own name */ if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_select"; goto error; } if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { ORTE_ERROR_LOG(ret); error = "orte_plm_set_hnp_name"; goto error; } /* close the plm since we opened it to set our * name, but have no further use for it */ orte_plm_base_close(); } /* do the rest of the standard tool init */ if (ORTE_SUCCESS != (ret = local_init())) { ORTE_ERROR_LOG(ret); error = "orte_ess_tool_init"; goto error; } return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }
static int rte_init(void) { int ret; char *error = NULL; orte_jobid_t jobid; orte_vpid_t vpid; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } if (NULL != orte_ess_base_jobid && NULL != orte_ess_base_vpid) { opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:obtaining name from environment"); if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { return(ret); } ORTE_PROC_MY_NAME->jobid = jobid; if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { return(ret); } ORTE_PROC_MY_NAME->vpid = vpid; } else { /* If we are a tool with no name, then define it here */ uint16_t jobfam; uint32_t hash32; uint32_t bias; opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:computing name"); /* hash the nodename */ OPAL_HASH_STR(orte_process_info.nodename, hash32); bias = (uint32_t)orte_process_info.pid; /* fold in the bias */ hash32 = hash32 ^ bias; /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); /* set the name */ ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); ORTE_PROC_MY_NAME->vpid = 0; } /* do the rest of the standard tool init */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int slurm_set_name(void) { int slurm_nodeid; int rc; int id; orte_jobid_t jobid; orte_vpid_t vpid; char* jobid_string; char* vpid_string; char *nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm setting name")); id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { ORTE_ERROR_LOG(rc); return(rc); } id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); mca_base_param_lookup_string(id, &vpid_string); if (NULL == vpid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->jobid = jobid; /* fix up the vpid and make it the "real" vpid */ if (NULL == (nodeid = getenv("SLURM_NODEID"))) { opal_output(0, "SLURM_NODEID not found - cannot define name"); return ORTE_ERR_NOT_FOUND; } slurm_nodeid = atoi(nodeid); ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* fix up the system info nodename to match exactly what slurm returned */ if (NULL != orte_process_info.nodename) { free(orte_process_info.nodename); } orte_process_info.nodename = get_slurm_nodename(slurm_nodeid); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm set nodename to %s", orte_process_info.nodename)); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static int slurm_set_name(void) { int slurm_nodeid; int rc; orte_jobid_t jobid; orte_vpid_t vpid; char *tmp; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:slurm setting name")); if (NULL == orte_ess_base_jobid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { ORTE_ERROR_LOG(rc); return(rc); } if (NULL == orte_ess_base_vpid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->jobid = jobid; /* fix up the vpid and make it the "real" vpid */ slurm_nodeid = atoi(getenv("SLURM_NODEID")); ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* fix up the system info nodename to match exactly what slurm returned */ if (NULL != orte_process_info.nodename) { free(orte_process_info.nodename); } if (NULL == (tmp = getenv("SLURMD_NODENAME"))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } orte_process_info.nodename = strdup(tmp); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:slurm set nodename to %s", (NULL == orte_process_info.nodename) ? "NULL" : orte_process_info.nodename)); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
int orte_ess_base_tool_setup(void) { int ret; char *error = NULL; opal_list_t transports; orte_jobid_t jobid; orte_vpid_t vpid; /* setup the PMIx framework - ensure it skips all non-PMIx components, * but do not override anything we were given */ opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_pmix_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { ORTE_ERROR_LOG(ret); error = "opal_pmix_base_select"; goto error; } /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* we have to define our name here */ if (NULL != orte_ess_base_jobid && NULL != orte_ess_base_vpid) { opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:obtaining name from environment"); if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { return(ret); } ORTE_PROC_MY_NAME->jobid = jobid; if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { return(ret); } ORTE_PROC_MY_NAME->vpid = vpid; } else { /* If we are a tool with no name, then define it here */ uint16_t jobfam; uint32_t hash32; uint32_t bias; opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:computing name"); /* hash the nodename */ OPAL_HASH_STR(orte_process_info.nodename, hash32); bias = (uint32_t)orte_process_info.pid; /* fold in the bias */ hash32 = hash32 ^ bias; /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); /* set the name */ ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); ORTE_PROC_MY_NAME->vpid = 0; } /* my name is set, xfer it to the OPAL layer */ orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; /* initialize - PMIx may set our name here if we attach to * a PMIx server */ if (NULL != opal_pmix.tool_init) { opal_list_t info; opal_value_t *kv; OBJ_CONSTRUCT(&info, opal_list_t); /* pass our name so the PMIx layer can use it */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOOL_NSPACE); orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid); kv->type = OPAL_STRING; opal_list_append(&info, &kv->super); /* ditto for our rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOOL_RANK); kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid; kv->type = OPAL_VPID; opal_list_append(&info, &kv->super); /* ORTE tools don't need to connect to a PMIx server as * they will connect via the OOB */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT); kv->data.flag = true; kv->type = OPAL_BOOL; opal_list_append(&info, &kv->super); if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) { ORTE_ERROR_LOG(ret); error = "opal_pmix.init"; OPAL_LIST_DESTRUCT(&info); goto error; } OPAL_LIST_DESTRUCT(&info); ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; } orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename); orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; orte_process_info.super.proc_arch = opal_local_arch; opal_proc_local_set(&orte_process_info.super); /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_state_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_state_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_state_base_select"; goto error; } /* open and setup the error manager */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } /* Setup the communication infrastructure */ /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_routed_base_select"; goto error; } if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_select"; goto error; } /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_rml_base_select"; goto error; } /* get a conduit for our use - we never route IO over fabric */ OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); orte_mgmt_conduit = orte_rml.open_conduit(&transports); OPAL_LIST_DESTRUCT(&transports); /* since I am a tool, then all I really want to do is communicate. * So setup communications and be done - finding the HNP * to which I want to communicate and setting up a route for * that link is my responsibility */ /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on * this node might be located */ ret = orte_session_setup_base(ORTE_PROC_MY_NAME); if (ORTE_SUCCESS != ret ) { ORTE_ERROR_LOG(ret); error = "define session dir names"; goto error; } /* setup I/O forwarding system - must come after we init routes */ if (NULL != orte_process_info.my_hnp_uri) { /* only do this if we were given an HNP */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_iof_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_iof_base_select"; goto error; } /* if we were given an HNP, then also setup the PLM in case this * tool wants to request that we spawn something for it */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_open"; goto error; } /* we don't select the plm framework as we only want the * base proxy functions */ } #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_open"; goto error; } if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_sstore_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_select"; goto error; } if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_sstore_base_select"; goto error; } /* Tools do not need all the OPAL CR stuff */ opal_cr_set_enabled(false); #endif return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }
static int alps_set_name(void) { int rc; orte_jobid_t jobid; char *tmp; orte_vpid_t vpid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:alps setting name")); mca_base_param_reg_string_name("orte", "ess_jobid", "Process jobid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, tmp))) { ORTE_ERROR_LOG(rc); return rc; } free(tmp); mca_base_param_reg_string_name("orte", "ess_vpid", "Process vpid", true, false, NULL, &tmp); if (NULL == tmp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid, tmp))) { ORTE_ERROR_LOG(rc); return(rc); } free(tmp); if (ORTE_SUCCESS != (rc = get_vpid(&vpid, starting_vpid))) { ORTE_ERROR_LOG(rc); return rc; } ORTE_PROC_MY_NAME->jobid = jobid; ORTE_PROC_MY_NAME->vpid = vpid; ORTE_EPOCH_SET(ORTE_PROC_MY_NAME->epoch,ORTE_EPOCH_INVALID); ORTE_EPOCH_SET(ORTE_PROC_MY_NAME->epoch, orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME)); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get the num procs as provided in the cmd line param */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } return ORTE_SUCCESS; }