int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = orte_ess_base_open())) { error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (ORTE_PROC_IS_APP) { #if !ORTE_DISABLE_FULL_SUPPORT && ORTE_ENABLE_PROGRESS_THREADS #if OPAL_EVENT_HAVE_THREAD_SUPPORT /* get a separate orte event base */ orte_event_base = opal_event_base_create(); /* setup the finalize event - we'll need it * to break the thread out of the event lib * when we want to stop it */ opal_event_set(orte_event_base, &orte_finalize_event, -1, OPAL_EV_WRITE, ignore_callback, NULL); opal_event_set_priority(&orte_finalize_event, ORTE_ERROR_PRI); #if 0 { /* seems strange, but wake us up once a second just so we can check for new events */ opal_event_t *ev; struct timeval tv = {1,0}; ev = opal_event_alloc(); opal_event_evtimer_set(orte_event_base, ev, ignore_callback, ev); opal_event_set_priority(ev, ORTE_INFO_PRI); opal_event_evtimer_add(ev, &tv); } #endif /* construct the thread object */ OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t); /* fork off a thread to progress it */ orte_progress_thread.t_run = orte_progress_thread_engine; if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) { error = "orte progress thread start"; goto error; } #else error = "event thread support is not configured"; ret = ORTE_ERROR; goto error; #endif #else /* set the event base to the opal one */ orte_event_base = opal_event_base; #endif } else { /* set the event base to the opal one */ orte_event_base = opal_event_base; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int main(int argc, char *argv[]) { int rc, i, j; opal_cmd_line_t cmd_line; char *param, *value; orte_job_t *jdata=NULL; orte_app_context_t *app; char *uri, *ptr; /* Setup and parse the command line */ memset(&myglobals, 0, sizeof(myglobals)); /* find our basename (the name of the executable) so that we can use it in pretty-print error messages */ myglobals.basename = opal_basename(argv[0]); opal_cmd_line_create(&cmd_line, cmd_line_init); mca_base_cmd_line_setup(&cmd_line); if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true, argc, argv)) ) { if (OPAL_ERR_SILENT != rc) { fprintf(stderr, "%s: command line error (%s)\n", argv[0], opal_strerror(rc)); } return rc; } /* print version if requested. Do this before check for help so that --version --help works as one might expect. */ if (myglobals.version) { char *str; str = opal_info_make_version_str("all", OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION, OPAL_GREEK_VERSION, OPAL_REPO_REV); if (NULL != str) { fprintf(stdout, "%s %s\n\nReport bugs to %s\n", myglobals.basename, str, PACKAGE_BUGREPORT); free(str); } exit(0); } /* check if we are running as root - if we are, then only allow * us to proceed if the allow-run-as-root flag was given. Otherwise, * exit with a giant warning flag */ if (0 == geteuid() && !myglobals.run_as_root) { fprintf(stderr, "--------------------------------------------------------------------------\n"); if (myglobals.help) { fprintf(stderr, "%s cannot provide the help message when run as root\n", myglobals.basename); } else { /* show_help is not yet available, so print an error manually */ fprintf(stderr, "%s has detected an attempt to run as root.\n", myglobals.basename); } fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n"); fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n"); fprintf(stderr, "your system in an unusable state.\n\n"); fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); fprintf(stderr, "against doing so - please do so at your own risk.\n"); fprintf(stderr, "--------------------------------------------------------------------------\n"); exit(1); } /* * Since this process can now handle MCA/GMCA parameters, make sure to * process them. * NOTE: It is "safe" to call mca_base_cmd_line_process_args() before * opal_init_util() since mca_base_cmd_line_process_args() does *not* * depend upon opal_init_util() functionality. */ if (OPAL_SUCCESS != mca_base_cmd_line_process_args(&cmd_line, &environ, &environ)) { exit(1); } /* Need to initialize OPAL so that install_dirs are filled in */ if (OPAL_SUCCESS != opal_init(&argc, &argv)) { exit(1); } /* Check for help request */ if (myglobals.help) { char *str, *args = NULL; char *project_name = NULL; if (0 == strcmp(myglobals.basename, "mpirun")) { project_name = "Open MPI"; } else { project_name = "OpenRTE"; } args = opal_cmd_line_get_usage_msg(&cmd_line); str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, myglobals.basename, project_name, OPAL_VERSION, myglobals.basename, args, PACKAGE_BUGREPORT); if (NULL != str) { printf("%s", str); free(str); } free(args); /* If someone asks for help, that should be all we do */ exit(0); } /* flag that I am the HNP */ orte_process_info.proc_type = ORTE_PROC_HNP; /* Setup MCA params */ orte_register_params(); /* specify the DVM state machine */ opal_setenv("OMPI_MCA_state", "dvm", true, &environ); /* Intialize our Open RTE environment */ if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_HNP))) { /* cannot call ORTE_ERROR_LOG as it could be the errmgr * never got loaded! */ return rc; } /* finalize OPAL. As it was opened again from orte_init->opal_init * we continue to have a reference count on it. So we have to finalize it twice... */ opal_finalize(); /* check for request to report uri */ uri = orte_rml.get_contact_info(); if (NULL != myglobals.report_uri) { FILE *fp; if (0 == strcmp(myglobals.report_uri, "-")) { /* if '-', then output to stdout */ printf("VMURI: %s\n", uri); } else if (0 == strcmp(myglobals.report_uri, "+")) { /* if '+', output to stderr */ fprintf(stderr, "VMURI: %s\n", uri); } else if (0 == strncasecmp(myglobals.report_uri, "file:", strlen("file:"))) { ptr = strchr(myglobals.report_uri, ':'); ++ptr; fp = fopen(ptr, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, myglobals.basename, "pid", ptr); exit(0); } fprintf(fp, "%s\n", uri); fclose(fp); } else { fp = fopen(myglobals.report_uri, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, myglobals.basename, "pid", myglobals.report_uri); exit(0); } fprintf(fp, "%s\n", uri); fclose(fp); } free(uri); } else { printf("VMURI: %s\n", uri); } /* get the daemon job object - was created by ess/hnp component */ if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { orte_show_help("help-orterun.txt", "bad-job-object", true, myglobals.basename); exit(0); } /* also should have created a daemon "app" */ if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { orte_show_help("help-orterun.txt", "bad-app-object", true, myglobals.basename); exit(0); } /* Did the user specify a prefix, or want prefix by default? */ if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) { size_t param_len; /* if both the prefix was given and we have a prefix * given above, check to see if they match */ if (opal_cmd_line_is_taken(&cmd_line, "prefix") && NULL != myglobals.prefix) { /* if they don't match, then that merits a warning */ param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); /* ensure we strip any trailing '/' */ if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { param[strlen(param)-1] = '\0'; } value = strdup(myglobals.prefix); if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) { value[strlen(value)-1] = '\0'; } if (0 != strcmp(param, value)) { orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", true, myglobals.basename, value, param); /* let the global-level prefix take precedence since we * know that one is being used */ free(param); param = strdup(myglobals.prefix); } free(value); } else if (NULL != myglobals.prefix) { param = myglobals.prefix; } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ /* must be --prefix alone */ param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); } else { /* --enable-orterun-prefix-default was given to orterun */ param = strdup(opal_install_dirs.prefix); } if (NULL != param) { /* "Parse" the param, aka remove superfluous path_sep. */ param_len = strlen(param); while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) { param[param_len-1] = '\0'; param_len--; if (0 == param_len) { orte_show_help("help-orterun.txt", "orterun:empty-prefix", true, myglobals.basename, myglobals.basename); return ORTE_ERR_FATAL; } } orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING); free(param); } } /* Did the user specify a hostfile. Need to check for both * hostfile and machine file. * We can only deal with one hostfile per app context, otherwise give an error. */ if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { if(1 < j) { orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", true, myglobals.basename, NULL); return ORTE_ERR_FATAL; } else { value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING); } } if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", true, myglobals.basename, NULL); return ORTE_ERR_FATAL; } else { value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING); } } /* Did the user specify any hosts? */ if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) { char **targ=NULL, *tval; for (i = 0; i < j; ++i) { value = opal_cmd_line_get_param(&cmd_line, "host", i, 0); opal_argv_append_nosize(&targ, value); } tval = opal_argv_join(targ, ','); orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING); opal_argv_free(targ); free(tval); } OBJ_DESTRUCT(&cmd_line); /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have * to receive it too */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_PERSISTENT, orte_daemon_recv, NULL); /* override the notify_completed state so we can send a message * back to anyone who submits a job to us telling them the job * completed */ if (ORTE_SUCCESS != (rc = orte_state.set_job_state_callback(ORTE_JOB_STATE_NOTIFY_COMPLETED, notify_requestor))) { ORTE_ERROR_LOG(rc); ORTE_UPDATE_EXIT_STATUS(rc); exit(orte_exit_status); } /* spawn the DVM - we skip the initial steps as this * isn't a user-level application */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATE); /* loop the event lib until an exit event is detected */ while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } /* cleanup and leave */ orte_finalize(); if (orte_debug_flag) { fprintf(stderr, "exiting with status %d\n", orte_exit_status); } exit(orte_exit_status); }
int orte_info_register_components(opal_pointer_array_t *mca_types, opal_pointer_array_t *component_map) { opal_info_component_map_t *map; char *env, *str; int i, rc; char *target, *save, *type; char **env_save=NULL; /* Clear out the environment. Use strdup() to orphan the resulting * strings because items are placed in the environment by reference, * not by value. */ for (i = 0; i < mca_types->size; ++i) { if (NULL == (type = (char*)opal_pointer_array_get_item(mca_types, i))) { continue; } asprintf(&env, "OMPI_MCA_%s", type); if (NULL != (save = getenv(env))) { /* save this param so it can later be restored */ asprintf(&str, "%s=%s", env, save); opal_argv_append_nosize(&env_save, str); free(str); /* can't manipulate it directly, so make a copy first */ asprintf(&target, "%s=", env); putenv(target); free(target); } free(env); } /* Set orte_process_info.proc_type to HNP to force all frameworks to * open components */ orte_process_info.proc_type = ORTE_PROC_HNP; /* set the event base to be the opal event base as we * aren't attempting to do anything with progress threads here */ orte_event_base = opal_event_base; /* Register the ORTE layer's MCA parameters */ if (ORTE_SUCCESS != (rc = orte_register_params()) && ORTE_ERR_BAD_PARAM != rc) { str = "orte_register_params"; goto error; } if (ORTE_SUCCESS != (rc = orte_db_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "db_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("db"); map->components = &orte_db_base.available_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "db"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_errmgr_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "errmgr_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("errmgr"); map->components = &orte_errmgr_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "errmgr"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_ess_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "ess_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("ess"); map->components = &orte_ess_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "ess"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_filem_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "filem_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("filem"); map->components = &orte_filem_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "filem"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "grpcomm_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("grpcomm"); map->components = &orte_grpcomm_base.components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "grpcomm"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_iof_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "iof_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("iof"); map->components = &orte_iof_base.iof_components_opened; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "iof"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_odls_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "odls_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("odls"); map->components = &orte_odls_base.available_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "odls"; goto breakout; } if (ORTE_SUCCESS != (rc = mca_oob_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "oob_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("oob"); map->components = &mca_oob_base_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "oob"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_plm_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "plm_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("plm"); map->components = &orte_plm_base.available_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "plm"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_ras_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "ras_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("ras"); map->components = &orte_ras_base.ras_opened; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "ras"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_rmaps_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "rmaps_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("rmaps"); map->components = &orte_rmaps_base.available_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "rmaps"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_routed_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "routed_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("routed"); map->components = &orte_routed_base_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "routed"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_rml_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "rml_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("rml"); map->components = &orte_rml_base_components; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "rml"; goto breakout; } #if ORTE_ENABLE_SENSORS if (ORTE_SUCCESS != (rc = orte_sensor_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "sensor_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("sensor"); map->components = &mca_sensor_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "sensor"; goto breakout; } #endif #if OPAL_ENABLE_FT_CR == 1 if (ORTE_SUCCESS != (rc = orte_snapc_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "snapc_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("snapc"); map->components = &orte_snapc_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "snapc"; goto breakout; } if (ORTE_SUCCESS != (rc = orte_sstore_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "sstore_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("sstore"); map->components = &orte_sstore_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "sstore"; goto breakout; } #endif if (ORTE_SUCCESS != (rc = orte_state_base_open()) && ORTE_ERR_BAD_PARAM != rc) { str = "state_base_open"; goto error; } map = OBJ_NEW(opal_info_component_map_t); map->type = strdup("state"); map->components = &orte_state_base_components_available; opal_pointer_array_add(component_map, map); if (ORTE_ERR_BAD_PARAM == rc) { str = "state"; goto breakout; } breakout: /* Restore the environment to what it was before we started so that * if users setenv OMPI_MCA_<framework name> to some value, they'll * see that value when it is shown via --param output. */ if (NULL != env_save) { for (i = 0; i < opal_argv_count(env_save); ++i) { putenv(env_save[i]); } } if (ORTE_ERR_BAD_PARAM == rc) { fprintf(stderr, "\nA \"bad parameter\" error was encountered when opening the ORTE %s framework\n", str); fprintf(stderr, "The output received from that framework includes the following parameters:\n\n"); } return rc; error: fprintf(stderr, "orte_info_register: %s failed\n", str); return ORTE_ERROR; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (orte_initialized) { return ORTE_SUCCESS; } /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { ORTE_ERROR_LOG(ret); return ret; } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { ORTE_ERROR_LOG(ret); error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = orte_ess_base_open())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_select"; goto error; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { ORTE_ERROR_LOG(ret); error = "orte_ess_set_name"; goto error; } /* All done */ orte_initialized = true; return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != OPAL_SOS_GET_ERROR_CODE(ret)) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* Convince OPAL to use our naming scheme */ opal_process_name_print = _process_name_print_for_opal; opal_vpid_print = _vpid_print_for_opal; opal_jobid_print = _jobid_print_for_opal; opal_compare_proc = _process_name_compare; opal_convert_string_to_process_name = _convert_string_to_process_name; /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* we may have modified the local nodename according to * request to retain/strip the FQDN and prefix, so update * it here. The OPAL layer will strdup the hostname, so * we have to free it first to avoid a memory leak */ if (NULL != opal_process_info.nodename) { free(opal_process_info.nodename); } /* opal_finalize_util will call free on this pointer so set from strdup */ opal_process_info.nodename = strdup (orte_process_info.nodename); /* setup the dstore framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_select"; goto error; } /* create the handle */ if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL", "hash", NULL))) { error = "opal dstore internal"; ret = ORTE_ERR_FATAL; goto error; } if (ORTE_PROC_IS_APP) { if (0 > (opal_dstore_modex = opal_dstore.open("MODEX", "sm,hash", NULL))) { error = "opal dstore modex"; ret = ORTE_ERR_FATAL; goto error; } } if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* let the pmix server register params */ pmix_server_register(); } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (!ORTE_PROC_IS_APP) { /* ORTE tools "block" in their own loop over the event * base, so no progress thread is required - apps will * start their progress thread in ess_base_std_app.c * at the appropriate point */ orte_event_base = opal_event_base; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* set the remaining opal_process_info fields. Note that * the OPAL layer will have initialized these to NULL, and * anyone between us would not have strdup'd the string, so * we cannot free it here */ opal_process_info.job_session_dir = orte_process_info.job_session_dir; opal_process_info.proc_session_dir = orte_process_info.proc_session_dir; opal_process_info.num_local_peers = (int32_t)orte_process_info.num_local_peers; opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank; #if OPAL_HAVE_HWLOC opal_process_info.cpuset = orte_process_info.cpuset; #endif /* OPAL_HAVE_HWLOC */ #if OPAL_ENABLE_TIMING opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); #endif /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* Convince OPAL to use our naming scheme */ opal_process_name_print = _process_name_print_for_opal; opal_process_name_vpid = _process_name_vpid_for_opal; opal_process_name_jobid = _process_name_jobid_for_opal; opal_compare_proc = _process_name_compare; /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (!ORTE_PROC_IS_APP) { /* ORTE tools "block" in their own loop over the event * base, so no progress thread is required - apps will * start their progress thread in ess_base_std_app.c * at the appropriate point */ orte_event_base = opal_event_base; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
static int orcmd_init(void) { int ret = ORTE_ERROR; char *error = NULL; opal_buffer_t buf, *clusterbuf, *uribuf; orte_job_t *jdata; orte_node_t *node; orte_proc_t *proc; opal_list_t config; orcm_scheduler_t *scheduler; orcm_node_t *mynode=NULL; int32_t n; if (initialized) { return ORCM_SUCCESS; } initialized = true; /* Initialize the ORTE data type support */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_std_prolog"; goto error; } /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, 1, ORTE_GLOBAL_ARRAY_MAX_SIZE, 1))) { ORTE_ERROR_LOG(ret); error = "setup job array"; goto error; } orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, ORTE_GLOBAL_ARRAY_MAX_SIZE, ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { ORTE_ERROR_LOG(ret); error = "setup node array"; goto error; } orte_node_topologies = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, ORTE_GLOBAL_ARRAY_MAX_SIZE, ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { ORTE_ERROR_LOG(ret); error = "setup node topologies array"; goto error; } /* create a job tracker for the daemons */ jdata = OBJ_NEW(orte_job_t); jdata->jobid = 0; ORTE_PROC_MY_NAME->jobid = 0; opal_pointer_array_set_item(orte_job_data, 0, jdata); /* read the site configuration */ OBJ_CONSTRUCT(&config, opal_list_t); if (ORCM_SUCCESS != (ret = orcm_cfgi.read_config(&config))) { error = "getting config"; goto error; } /* define the cluster and collect contact info for all * aggregators - we'll need to know how to talk to any * of them in case of failures */ OBJ_CONSTRUCT(&buf, opal_buffer_t); if (ORCM_SUCCESS != (ret = orcm_cfgi.define_system(&config, &mynode, &orte_process_info.num_procs, &buf))) { OBJ_DESTRUCT(&buf); error = "define system"; goto error; } /* if my name didn't get set, then we didn't find our node * in the config - report it and die */ if (NULL == mynode) { orte_show_help("help-ess-orcm.txt", "node-not-found", true, orcm_cfgi_base.config_file, orte_process_info.nodename); OBJ_DESTRUCT(&buf); return ORTE_ERR_SILENT; } /* define a node and proc object for ourselves as some parts * of ORTE and ORCM require it */ if (NULL == (node = OBJ_NEW(orte_node_t))) { ret = ORTE_ERR_OUT_OF_RESOURCE; error = "out of memory"; goto error; } node->name = strdup(orte_process_info.nodename); opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); if (NULL == (proc = OBJ_NEW(orte_proc_t))) { ret = ORTE_ERR_OUT_OF_RESOURCE; error = "out of memory"; goto error; } proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; OBJ_RETAIN(proc); node->daemon = proc; OBJ_RETAIN(node); proc->node = node; opal_pointer_array_set_item(jdata->procs, ORTE_PROC_MY_NAME->vpid, proc); /* For now, we only support a single scheduler daemon in the system. * This *may* change someday in the future */ scheduler = (orcm_scheduler_t*)opal_list_get_first(orcm_schedulers); /* If we are in test mode, then we don't *require* that a scheduler * be defined in the system - otherwise, we do */ if (NULL == scheduler) { if (mca_sst_orcmd_component.scheduler_reqd) { error = "no scheduler found"; ret = ORTE_ERR_NOT_FOUND; goto error; } } else { ORTE_PROC_MY_SCHEDULER->jobid = scheduler->controller.daemon.jobid; ORTE_PROC_MY_SCHEDULER->vpid = scheduler->controller.daemon.vpid; } /* register the ORTE-level params at this time now that the * config has had a chance to push things into the environ */ if (ORTE_SUCCESS != (ret = orte_register_params())) { OBJ_DESTRUCT(&buf); error = "orte_register_params"; goto error; } /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* Set signal handlers to catch kill signals so we can properly clean up * after ourselves. */ setup_sighandler(SIGTERM, &term_handler, shutdown_signal); setup_sighandler(SIGINT, &int_handler, shutdown_signal); /** setup callbacks for signals we should ignore */ setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback); setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); signals_set = true; #if OPAL_HAVE_HWLOC { hwloc_obj_t obj; unsigned i, j; /* get the local topology */ if (NULL == opal_hwloc_topology) { if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { OBJ_DESTRUCT(&buf); error = "topology discovery"; goto error; } } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(opal_hwloc_topology); for (i=0; i < obj->infos_count; i++) { if (NULL == obj->infos[i].name || NULL == obj->infos[i].value) { continue; } if (0 == strncmp(obj->infos[i].name, "HostName", strlen("HostName"))) { free(obj->infos[i].name); free(obj->infos[i].value); /* left justify the array */ for (j=i; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } if (15 < opal_output_get_verbosity(orcm_sst_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); } /* if we were asked to bind to specific core(s), do so now */ if (NULL != orte_daemon_cores) { char **cores=NULL, tmp[128]; hwloc_obj_t pu; hwloc_cpuset_t ours, pucpus, res; int core; /* could be a collection of comma-delimited ranges, so * use our handy utility to parse it */ orte_util_parse_range_options(orte_daemon_cores, &cores); if (NULL != cores) { ours = hwloc_bitmap_alloc(); hwloc_bitmap_zero(ours); pucpus = hwloc_bitmap_alloc(); res = hwloc_bitmap_alloc(); for (i=0; NULL != cores[i]; i++) { core = strtoul(cores[i], NULL, 10); if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core, OPAL_HWLOC_LOGICAL))) { orte_show_help("help-orted.txt", "orted:cannot-bind", true, orte_process_info.nodename, orte_daemon_cores); ret = ORTE_ERR_NOT_SUPPORTED; OBJ_DESTRUCT(&buf); error = "cannot bind"; goto error; } hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); hwloc_bitmap_or(res, ours, pucpus); hwloc_bitmap_copy(ours, res); } /* if the result is all zeros, then don't bind */ if (!hwloc_bitmap_iszero(ours)) { (void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0); if (opal_hwloc_report_bindings) { opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), opal_hwloc_topology, ours); opal_output(0, "Daemon %s is bound to cores %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); } } /* cleanup */ hwloc_bitmap_free(ours); hwloc_bitmap_free(pucpus); hwloc_bitmap_free(res); opal_argv_free(cores); } } } #endif /* open and select the pstat framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "opal_pstat_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "opal_pstat_base_select"; goto error; } /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_state_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_state_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_state_base_select"; goto error; } /* open the notifier */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_notifier_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_notifier_base_open"; goto error; } /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_errmgr_base_open"; goto error; } /* Setup the communication infrastructure */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_oob_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_select"; goto error; } if (!opal_list_get_size(&orte_oob_base.actives)) { ret = ORTE_ERROR; error = "orte_oob: Found 0 active transports"; goto error; } /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_rml_base_select"; goto error; } /* select the notifier*/ if (ORTE_SUCCESS != (ret = orte_notifier_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_notifier_base_select"; goto error; } /* select the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_errmgr_base_select"; goto error; } /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_routed_base_select"; goto error; } /* database */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_db_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orcm_db_base_open"; goto error; } /* always restrict daemons to local database components */ if (ORTE_SUCCESS != (ret = orcm_db_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orcm_db_base_select"; goto error; } /* datastore - ensure we don't pickup the pmi component, but * don't override anything set by user */ if (NULL == getenv(OPAL_MCA_PREFIX"dstore")) { putenv(OPAL_MCA_PREFIX"dstore=^pmi"); } if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_select"; goto error; } /* create the handle */ if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL", NULL, NULL))) { error = "opal dstore internal"; ret = ORTE_ERR_FATAL; goto error; } /* extract the cluster description and setup the routed info - the orcm routed component * will know what to do. */ n = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(&buf, &clusterbuf, &n, OPAL_BUFFER))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "extract cluster buf"; goto error; } if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, clusterbuf))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); OBJ_RELEASE(clusterbuf); error = "orte_routed.init_routes"; goto error; } OBJ_RELEASE(clusterbuf); /* extract the uri buffer and load the hash tables */ n = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(&buf, &uribuf, &n, OPAL_BUFFER))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "extract uri buffer"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_update_contact_info(uribuf))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); OBJ_RELEASE(uribuf); error = "load hash tables"; goto error; } OBJ_DESTRUCT(&buf); OBJ_RELEASE(uribuf); /* * Group communications */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_grpcomm_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_grpcomm_base_select"; goto error; } /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_odls_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_odls_base_select"; goto error; } /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_filem_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_filem_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_filem_base_select"; goto error; } /* * Initalize the CR setup * Note: Always do this, even in non-FT builds. * If we don't some user level tools may hang. */ opal_cr_set_enabled(false); if (ORTE_SUCCESS != (ret = orte_cr_init())) { ORTE_ERROR_LOG(ret); error = "orte_cr_init"; goto error; } /* setup the ANALYTICS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_analytics_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orcm_analytics_base_open"; goto error; } /* setup the EVGEN framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_evgen_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orcm_evgen_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orcm_evgen_base_select())) { ORTE_ERROR_LOG(ret); error = "orcm_evgen_select"; goto error; } /* setup the SENSOR framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_sensor_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orcm_sensor_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orcm_sensor_base_select())) { ORTE_ERROR_LOG(ret); error = "orcm_sensor_select"; goto error; } /* start the local sensors */ orcm_sensor.start(ORTE_PROC_MY_NAME->jobid); /* setup the PWRMGMT framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_pwrmgmt_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orcm_pwrmgmt_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orcm_pwrmgmt_base_select())) { ORTE_ERROR_LOG(ret); error = "orcm_pwrmgmt_select"; goto error; } /* setup the DFS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_dfs_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_dfs_select"; goto error; } /* open and setup the DIAG framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_diag_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orcm_diag_base_open"; goto error; } if (ORCM_SUCCESS != (ret = orcm_diag_base_select())) { ORTE_ERROR_LOG(ret); error = "orcm_diag_select"; goto error; } return ORTE_SUCCESS; error: orte_show_help("help-orcm-runtime.txt", "orcm_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ORTE_ERR_SILENT; }
static int tool_init(void) { int ret = ORTE_ERROR; char *error = NULL; opal_buffer_t buf, *clusterbuf, *uribuf; orte_job_t *jdata; orte_node_t *node; orte_proc_t *proc; opal_list_t config; orcm_scheduler_t *scheduler; orcm_node_t *mynode=NULL; int32_t n; if (initialized) { return ORCM_SUCCESS; } initialized = true; /* Initialize the ORTE data type support */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_std_prolog"; goto error; } /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, 1, ORTE_GLOBAL_ARRAY_MAX_SIZE, 1))) { ORTE_ERROR_LOG(ret); error = "setup job array"; goto error; } orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, ORTE_GLOBAL_ARRAY_MAX_SIZE, ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { ORTE_ERROR_LOG(ret); error = "setup node array"; goto error; } orte_node_topologies = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, ORTE_GLOBAL_ARRAY_MAX_SIZE, ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { ORTE_ERROR_LOG(ret); error = "setup node topologies array"; goto error; } /* create a job tracker for the daemons */ jdata = OBJ_NEW(orte_job_t); jdata->jobid = 0; ORTE_PROC_MY_NAME->jobid = 0; opal_pointer_array_set_item(orte_job_data, 0, jdata); /* read the site configuration */ OBJ_CONSTRUCT(&config, opal_list_t); if (ORCM_SUCCESS != (ret = orcm_cfgi.read_config(&config))) { error = "getting config"; goto error; } /* define the cluster and collect contact info for all * aggregators - we'll need to know how to talk to any * of them in case of failures */ OBJ_CONSTRUCT(&buf, opal_buffer_t); if (ORCM_SUCCESS != (ret = orcm_cfgi.define_system(&config, &mynode, &orte_process_info.num_procs, &buf))) { OBJ_DESTRUCT(&buf); error = "define system"; goto error; } /* define a name for myself */ if (ORTE_SUCCESS != (ret = orte_plm_base_set_hnp_name())) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_set_hnp_name"; goto error; } /* define a node and proc object for ourselves as some parts * of ORTE and ORCM require it */ if (NULL == (node = OBJ_NEW(orte_node_t))) { ret = ORTE_ERR_OUT_OF_RESOURCE; error = "out of memory"; goto error; } node->name = strdup(orte_process_info.nodename); opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); if (NULL == (proc = OBJ_NEW(orte_proc_t))) { ret = ORTE_ERR_OUT_OF_RESOURCE; error = "out of memory"; goto error; } proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; OBJ_RETAIN(proc); node->daemon = proc; OBJ_RETAIN(node); proc->node = node; opal_pointer_array_set_item(jdata->procs, ORTE_PROC_MY_NAME->vpid, proc); /* For now, we only support a single scheduler daemon in the system. * This *may* change someday in the future */ scheduler = (orcm_scheduler_t*)opal_list_get_first(orcm_schedulers); ORTE_PROC_MY_SCHEDULER->jobid = scheduler->controller.daemon.jobid; ORTE_PROC_MY_SCHEDULER->vpid = scheduler->controller.daemon.vpid; /* register the ORTE-level params at this time now that the * config has had a chance to push things into the environ */ if (ORTE_SUCCESS != (ret = orte_register_params())) { OBJ_DESTRUCT(&buf); error = "orte_register_params"; goto error; } /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* Set signal handlers to catch kill signals so we can properly clean up * after ourselves. */ setup_sighandler(SIGTERM, &term_handler, shutdown_signal); setup_sighandler(SIGINT, &int_handler, shutdown_signal); /** setup callbacks for signals we should ignore */ setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback); setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); signals_set = true; /* open and select the pstat framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "opal_pstat_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "opal_pstat_base_select"; goto error; } /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_state_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_state_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_state_base_select"; goto error; } /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_errmgr_base_open"; goto error; } /* Setup the communication infrastructure */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_oob_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_select"; goto error; } /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_rml_base_select"; goto error; } /* select the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_errmgr_base_select"; goto error; } /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_routed_base_select"; goto error; } /* database */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_db_base_framework, 0))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orcm_db_base_open"; goto error; } /* always restrict daemons to local database components */ if (ORTE_SUCCESS != (ret = orcm_db_base_select())) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orcm_db_base_select"; goto error; } /* datastore - ensure we don't pickup the pmi component, but * don't override anything set by user */ if (NULL == getenv("OMPI_MCA_dstore")) { putenv("OMPI_MCA_dstore=^pmi"); } if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_select"; goto error; } /* create the handles */ if (0 > (opal_dstore_peer = opal_dstore.open("PEER"))) { error = "opal dstore global"; ret = ORTE_ERR_FATAL; goto error; } if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL"))) { error = "opal dstore internal"; ret = ORTE_ERR_FATAL; goto error; } if (0 > (opal_dstore_nonpeer = opal_dstore.open("NONPEER"))) { error = "opal dstore nonpeer"; ret = ORTE_ERR_FATAL; goto error; } /* initialize the nidmaps */ if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(NULL))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "orte_util_nidmap_init"; goto error; } /* extract the cluster description and setup the routed info - the orcm routed component * will know what to do. */ n = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(&buf, &clusterbuf, &n, OPAL_BUFFER))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "extract cluster buf"; goto error; } if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, clusterbuf))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); OBJ_RELEASE(clusterbuf); error = "orte_routed.init_routes"; goto error; } OBJ_RELEASE(clusterbuf); /* extract the uri buffer and load the hash tables */ n = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(&buf, &uribuf, &n, OPAL_BUFFER))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); error = "extract uri buffer"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_update_contact_info(uribuf))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); OBJ_RELEASE(uribuf); error = "load hash tables"; goto error; } OBJ_DESTRUCT(&buf); OBJ_RELEASE(uribuf); /* construct the thread object */ OBJ_CONSTRUCT(&progress_thread, opal_thread_t); /* fork off a thread to progress it */ progress_thread.t_run = progress_thread_engine; progress_thread_running = true; if (OPAL_SUCCESS != (ret = opal_thread_start(&progress_thread))) { error = "progress thread start"; progress_thread_running = false; goto error; } /* * Group communications */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_grpcomm_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_grpcomm_base_select"; goto error; } /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_odls_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_odls_base_select"; goto error; } /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_filem_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_filem_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_filem_base_select"; goto error; } /* * Initalize the CR setup * Note: Always do this, even in non-FT builds. * If we don't some user level tools may hang. */ opal_cr_set_enabled(false); if (ORTE_SUCCESS != (ret = orte_cr_init())) { ORTE_ERROR_LOG(ret); error = "orte_cr_init"; goto error; } /* setup the DFS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_dfs_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_dfs_select"; goto error; } return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ORTE_ERR_SILENT; }
/* * Open all MCA components so that they can register their MCA * parameters. Take a shotgun approach here and indiscriminately open * all components -- don't be selective. To this end, we need to clear * out the environment of all OMPI_MCA_<type> variables to ensure * that the open algorithms don't try to only open one component. */ void orte_info_open_components(void) { int i; char *env, *str; char *target, *save, *type; char **env_save=NULL; bool need_close_components = false; orte_info_component_map_t *map; if (opened_components) { return; } /* init the map */ OBJ_CONSTRUCT(&component_map, opal_pointer_array_t); opal_pointer_array_init(&component_map, 256, INT_MAX, 128); /* Clear out the environment. Use strdup() to orphan the resulting * strings because items are placed in the environment by reference, * not by value. */ for (i = 0; i < mca_types.size; ++i) { if (NULL == (type = (char*)opal_pointer_array_get_item(&mca_types, i))) { continue; } asprintf(&env, "OMPI_MCA_%s", type); if (NULL != (save = getenv(env))) { /* save this param so it can later be restored */ asprintf(&str, "%s=%s", env, save); opal_argv_append_nosize(&env_save, str); free(str); /* can't manipulate it directly, so make a copy first */ asprintf(&target, "%s=", env); putenv(target); free(target); } } /* some components require the event library be active, so activate it */ if (OPAL_SUCCESS != opal_event_base_open()) { str = "opal_event_base_open"; goto error; } /* Open the DSS */ if (ORTE_SUCCESS != opal_dss_open()) { str = "Unable to initialize the DSS"; goto error; } /* Open up the MCA */ if (OPAL_SUCCESS != mca_base_open()) { str = "mca_base_open failed"; goto error; } /* Register the OPAL layer's MCA parameters */ if (OPAL_SUCCESS != opal_register_params()) { str = "opal_register_params failed"; goto error; } /* Register the ORTE layer's MCA parameters */ if (ORTE_SUCCESS != orte_register_params()) { str = "orte_register_params failed"; goto error; } /* Initialize the opal_output system */ if (!opal_output_init()) { str = "opal_output_init failed"; goto error; } /* Find / open all components */ map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("base"); opal_pointer_array_add(&component_map, map); /* set default error message from here forward */ str = "A component framework failed to open properly."; /* OPAL frameworks */ if (OPAL_SUCCESS != opal_backtrace_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("backtrace"); map->components = &opal_backtrace_base_components_opened; opal_pointer_array_add(&component_map, map); if (OPAL_SUCCESS != opal_memory_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("memory"); map->components = &opal_memory_base_components_opened; opal_pointer_array_add(&component_map, map); /* the event framework is already open - just get its components */ map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("event"); map->components = &opal_event_components; opal_pointer_array_add(&component_map, map); if (OPAL_SUCCESS != opal_memchecker_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("memchecker"); map->components = &opal_memchecker_base_components_opened; opal_pointer_array_add(&component_map, map); if (OPAL_SUCCESS != opal_shmem_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("shmem"); map->components = &opal_shmem_base_components_opened; opal_pointer_array_add(&component_map, map); #if OPAL_HAVE_HWLOC if (OPAL_SUCCESS != opal_hwloc_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("hwloc"); map->components = &opal_hwloc_base_components; opal_pointer_array_add(&component_map, map); #endif if (OPAL_SUCCESS != opal_timer_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("timer"); map->components = &opal_timer_base_components_opened; opal_pointer_array_add(&component_map, map); #if OPAL_ENABLE_FT_CR == 1 if (OPAL_SUCCESS != opal_crs_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("crs"); map->components = &opal_crs_base_components_available; opal_pointer_array_add(&component_map, map); #endif if (OPAL_SUCCESS != opal_if_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("if"); map->components = &opal_if_components; opal_pointer_array_add(&component_map, map); /* OPAL's installdirs base open has already been called as part of * opal_init_util() back in main(). */ map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("installdirs"); map->components = &opal_installdirs_components; opal_pointer_array_add(&component_map, map); /* ORTE frameworks * Set orte_process_info.proc_type to HNP to force all frameworks to * open components */ orte_process_info.proc_type = ORTE_PROC_HNP; if (ORTE_SUCCESS != orte_state_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("state"); map->components = &orte_state_base_components_available; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_errmgr_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("errmgr"); map->components = &orte_errmgr_base_components_available; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_grpcomm_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("grpcomm"); map->components = &orte_grpcomm_base.components_available; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_db_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("db"); map->components = &orte_db_base.available_components; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_ess_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("ess"); map->components = &orte_ess_base_components_available; opal_pointer_array_add(&component_map, map); #if !ORTE_DISABLE_FULL_SUPPORT if (ORTE_SUCCESS != mca_oob_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("oob"); map->components = &mca_oob_base_components; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_odls_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("odls"); map->components = &orte_odls_base.available_components; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_iof_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("iof"); map->components = &orte_iof_base.iof_components_opened; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_ras_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("ras"); map->components = &orte_ras_base.ras_opened; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_rmaps_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("rmaps"); map->components = &orte_rmaps_base.available_components; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_rml_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("rml"); map->components = &orte_rml_base_components; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_routed_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("routed"); map->components = &orte_routed_base_components; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_plm_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("plm"); map->components = &orte_plm_base.available_components; opal_pointer_array_add(&component_map, map); #if OPAL_ENABLE_FT_CR == 1 if (ORTE_SUCCESS != orte_snapc_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("snapc"); map->components = &orte_snapc_base_components_available; opal_pointer_array_add(&component_map, map); #endif if (ORTE_SUCCESS != orte_sensor_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("sensor"); map->components = &mca_sensor_base_components_available; opal_pointer_array_add(&component_map, map); if (ORTE_SUCCESS != orte_filem_base_open()) { goto error; } map = OBJ_NEW(orte_info_component_map_t); map->type = strdup("filem"); map->components = &orte_filem_base_components_available; opal_pointer_array_add(&component_map, map); #endif /* flag that we need to close components */ need_close_components = true; /* Restore the environment to what it was before we started so that * if users setenv OMPI_MCA_<framework name> to some value, they'll * see that value when it is shown via --param output. */ if (NULL != env_save) { for (i = 0; i < opal_argv_count(env_save); ++i) { putenv(env_save[i]); } } /* All done */ opened_components = true; return; error: fprintf(stderr, "%s\n", str); fprintf(stderr, "orte-info will likely not display all configuration information\n"); if (need_close_components) { opened_components = true; orte_info_close_components(); } }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = orte_ess_base_open())) { error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } #if ORTE_ENABLE_PROGRESS_THREADS #if OPAL_EVENT_HAVE_THREAD_SUPPORT /* get a separate orte event base */ orte_event_base = opal_event_base_create(); /* construct the thread object */ OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t); /* fork off a thread to progress it */ orte_progress_thread.t_run = orte_progress_thread_engine; if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) { error = "orte progress thread start"; goto error; } #else error = "event thread support is not configured"; ret = ORTE_ERROR; goto error; #endif #else /* set the event base to the opal one */ orte_event_base = opal_event_base; #endif /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }