static int rte_init(void) { int ret; char *error = NULL; char **hosts = NULL; char *nodelist; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name */ tm_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { /* get the list of nodes used for this job */ nodelist = getenv("OMPI_MCA_orte_nodelist"); if (NULL != nodelist) { /* split the node list into an argv array */ hosts = opal_argv_split(nodelist, ','); } if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } opal_argv_free(hosts); return ORTE_SUCCESS; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } return ORTE_SUCCESS; } /* no other options are supported! */ error = "ess_error"; ret = ORTE_ERROR; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int rte_init(void) { int ret; char *error = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name */ slurm_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } return ORTE_SUCCESS; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(NULL))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } return ORTE_SUCCESS; } /* no other options are supported! */ error = "ess_error"; ret = ORTE_ERROR; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int rte_init(void) { int ret; char *error = NULL; char **hosts = NULL; char *slurm_nodelist; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name */ slurm_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { /* get the list of nodes used for this job */ mca_base_param_reg_string_name("orte", "nodelist", "List of nodes in job", true, false, NULL, &slurm_nodelist); if (NULL != slurm_nodelist) { /* split the node list into an argv array */ hosts = opal_argv_split(slurm_nodelist, ','); } if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } opal_argv_free(hosts); return ORTE_SUCCESS; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } /* as a tool, I don't need a nidmap - so just return now */ return ORTE_SUCCESS; } /* otherwise, I must be an application process - use * the default procedure to finish my setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* setup the nidmap arrays */ if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) { ORTE_ERROR_LOG(ret); error = "orte_util_nidmap_init"; goto error; } return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }
static int rte_init(void) { int ret; char *error = NULL; char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name from the enviro */ env_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { if (NULL != orte_node_regex) { /* extract the nodes */ if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) { error = "orte_regex_extract_node_names"; goto error; } } if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } opal_argv_free(hosts); return ORTE_SUCCESS; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } /* as a tool, I don't need a nidmap - so just return now */ return ORTE_SUCCESS; } /* otherwise, I must be an application process - use * the default procedure to finish my setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* if data was provided, update the database */ if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) { ORTE_ERROR_LOG(ret); error = "orte_util_nidmap_init"; goto error; } /* setup process binding */ if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { error = "proc_binding"; goto error; } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int rte_init(void) { int ret; char *error = NULL; orte_jobid_t jobid; orte_vpid_t vpid; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } if (NULL != orte_ess_base_jobid && NULL != orte_ess_base_vpid) { opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:obtaining name from environment"); if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { return(ret); } ORTE_PROC_MY_NAME->jobid = jobid; if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { return(ret); } ORTE_PROC_MY_NAME->vpid = vpid; } else { /* If we are a tool with no name, then define it here */ uint16_t jobfam; uint32_t hash32; uint32_t bias; opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:computing name"); /* hash the nodename */ OPAL_HASH_STR(orte_process_info.nodename, hash32); bias = (uint32_t)orte_process_info.pid; /* fold in the bias */ hash32 = hash32 ^ bias; /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); /* set the name */ ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); ORTE_PROC_MY_NAME->vpid = 0; } /* do the rest of the standard tool init */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int rte_init(char flags) { int ret; char *error = NULL; orte_jmap_t *jmap; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name */ slurm_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (orte_process_info.daemon) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } } else if (orte_process_info.tool) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } } else { /* otherwise, I must be an application process - use * the default procedure to finish my setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* setup the nidmap arrays */ OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t); opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8); /* setup array of jmaps */ OBJ_CONSTRUCT(&jobmap, opal_pointer_array_t); opal_pointer_array_init(&jobmap, 1, INT32_MAX, 1); jmap = OBJ_NEW(orte_jmap_t); jmap->job = ORTE_PROC_MY_NAME->jobid; opal_pointer_array_add(&jobmap, jmap); /* if one was provided, build my nidmap */ if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf, &nidmap, &jmap->pmap, &nprocs))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_build_nidmap"; goto error; } } return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }
static int rte_init(void) { int ret; char *error = NULL; char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name from the enviro */ env_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { if (NULL != orte_node_regex) { /* extract the nodes */ if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) { error = "orte_regex_extract_node_names"; goto error; } } if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } opal_argv_free(hosts); return ORTE_SUCCESS; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } /* as a tool, I don't need a nidmap - so just return now */ return ORTE_SUCCESS; } /* use the default procedure to finish my setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(true))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* if data was provided, update the database */ if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) { ORTE_ERROR_LOG(ret); error = "orte_util_nidmap_init"; goto error; } /* setup process binding */ if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { error = "proc_binding"; goto error; } /* if we are an ORTE app - and not an MPI app - then * we need to exchange our connection info here. * MPI_Init has its own modex, so we don't need to do * two of them. However, if we don't do a modex at all, * then processes have no way to communicate * * NOTE: only do this when the process originally launches. * Cannot do this on a restart as the rest of the processes * in the job won't be executing this step, so we would hang */ if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) { orte_grpcomm_collective_t coll; OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t); coll.id = orte_process_info.peer_modex; coll.active = true; if (ORTE_SUCCESS != (ret = orte_grpcomm.modex(&coll))) { ORTE_ERROR_LOG(ret); error = "orte modex"; goto error; } ORTE_WAIT_FOR_COMPLETION(coll.active); OBJ_DESTRUCT(&coll); } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int rte_init(void) { int ret; char *error = NULL; char **hosts = NULL; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:alps in rte_init")); /* * shouldn't have been able to open this ess component if * process is app proc */ if (ORTE_PROC_IS_APP) { error = "mpi rank invoking alps rte_init"; ret = ORTE_ERR_NOT_SUPPORTED; goto fn_fail; } /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto fn_fail; } if (ORTE_SUCCESS != (ret = alps_set_name())) { error = "alps_set_name"; goto fn_fail; } /* * if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { if (NULL != orte_node_regex) { /* extract the nodes */ if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts)) || NULL == hosts) { error = "orte_regex_extract_node_names"; goto fn_fail; } } if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto fn_fail; } if (NULL != hosts) { opal_argv_free(hosts); } /* * now synchronize with aprun. */ if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_start())) { error = "orte_ess_alps_sync"; goto fn_fail; } ret = ORTE_SUCCESS; goto fn_exit; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto fn_fail; } /* as a tool, I don't need a nidmap - so just return now */ ret = ORTE_SUCCESS; goto fn_exit; } fn_exit: return ret; fn_fail: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } goto fn_exit; }
static int rte_init(void) { int ret, i; char *error = NULL; char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* Start by getting a unique name */ alps_set_name(); /* if I am a daemon, complete my setup using the * default procedure */ if (ORTE_PROC_IS_DAEMON) { if (NULL != orte_node_regex) { /* extract the nodes */ if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts)) || NULL == hosts) { error = "orte_regex_extract_node_names"; goto error; } /* find our host in the list */ for (i=0; NULL != hosts[i]; i++) { if (0 == strncmp(hosts[i], orte_process_info.nodename, strlen(hosts[i]))) { /* correct our vpid */ ORTE_PROC_MY_NAME->vpid = starting_vpid + i; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:alps reset name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); break; } } } if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } opal_argv_free(hosts); return ORTE_SUCCESS; } if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_tool_setup"; goto error; } /* as a tool, I don't need a nidmap - so just return now */ return ORTE_SUCCESS; } /* otherwise, I must be an application process - use * the default procedure to finish my setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* setup the nidmap arrays */ if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) { ORTE_ERROR_LOG(ret); error = "orte_util_nidmap_init"; goto error; } return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }