static int setupOrcm(void) { int ret; /* Call enough of orcm/orte so that the configuration file is * read and we know if we are an aggregator or a compute node daemon. */ if (ORTE_SUCCESS != (ret = opal_init(NULL, NULL))) { printf("FAIL Error in opal_init()\n"); return 1; } orte_process_info.proc_type = ORCM_DAEMON; if (ORTE_SUCCESS != (ret = orte_proc_info())) { printf("FAIL Error in orte_proc_info()\n"); return 1; } orte_event_base = opal_sync_event_base; orcm_clusters = OBJ_NEW(opal_list_t); orcm_schedulers = OBJ_NEW(opal_list_t); if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_cfgi_base_framework, 0))) { printf("FAIL orcm_cfgi_base_open\n"); return 1; } if (ORCM_SUCCESS != (ret = orcm_cfgi_base_select())) { printf("FAIL orcm_cfgi_select\n"); /* bad configuration file */ return 99; } if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_sst_base_framework, 0))) { printf("FAIL orcm_sst_base_framework\n"); return 1; } if (ORCM_SUCCESS != (ret = orcm_sst_base_select())) { printf("FAIL orcm_sst_base_select\n"); return 1; } /* We need to set up the ESS framework because when ft_tester kills * itself it calls the abort function. */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { printf("FAIL orte_ess_base_open\n"); return 1; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { printf("FAIL orte_ess_base_select\n"); return 1; } if (ORTE_SUCCESS != (ret = orte_ess.init())) { printf("FAIL orte_ess_init\n"); return 1; } return 0; }
static int orte_cr_coord_post_restart(void) { int ret, exit_status = ORTE_SUCCESS; orte_proc_type_t prev_type = ORTE_PROC_TYPE_NONE; char * tmp_dir = NULL; opal_output_verbose(10, orte_cr_output, "orte_cr: coord_post_restart: orte_cr_coord_post_restart()"); /* * Add the previous session directory for cleanup */ opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true); tmp_dir = opal_dirname(orte_process_info.job_session_dir); if( NULL != tmp_dir ) { opal_crs_base_cleanup_append(tmp_dir, true); free(tmp_dir); tmp_dir = NULL; } /* * Refresh System information */ prev_type = orte_process_info.proc_type; if( ORTE_SUCCESS != (ret = orte_proc_info_finalize()) ) { exit_status = ret; } if( NULL != orte_process_info.my_hnp_uri ) { free(orte_process_info.my_hnp_uri); orte_process_info.my_hnp_uri = NULL; } if( NULL != orte_process_info.my_daemon_uri ) { free(orte_process_info.my_daemon_uri); orte_process_info.my_daemon_uri = NULL; } if( ORTE_SUCCESS != (ret = orte_proc_info()) ) { exit_status = ret; } orte_process_info.proc_type = prev_type; orte_process_info.my_name = *ORTE_NAME_INVALID; /* * Notify the ESS */ if( NULL != orte_ess.ft_event ) { if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_RESTART))) { exit_status = ret; goto cleanup; } } cleanup: return exit_status; }
int orte_session_setup_base(orte_process_name_t *proc) { int rc; /* Ensure that system info is set */ orte_proc_info(); /* setup job and proc session directories */ if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){ return rc; } if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){ return rc; } /* BEFORE doing anything else, check to see if this prefix is * allowed by the system */ if (NULL != orte_prohibited_session_dirs || NULL != orte_process_info.tmpdir_base ) { char **list; int i, len; /* break the string into tokens - it should be * separated by ',' */ list = opal_argv_split(orte_prohibited_session_dirs, ','); len = opal_argv_count(list); /* cycle through the list */ for (i=0; i < len; i++) { /* check if prefix matches */ if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) { /* this is a prohibited location */ orte_show_help("help-orte-runtime.txt", "orte:session:dir:prohibited", true, orte_process_info.tmpdir_base, orte_prohibited_session_dirs); opal_argv_free(list); return ORTE_ERR_FATAL; } } opal_argv_free(list); /* done with this */ } return ORTE_SUCCESS; }
static int orte_cr_coord_post_restart(void) { int ret, exit_status = ORTE_SUCCESS; orte_proc_type_t prev_type = ORTE_PROC_TYPE_NONE; opal_output_verbose(10, orte_cr_output, "orte_cr: coord_post_restart: orte_cr_coord_post_restart()"); /* * Refresh System information */ prev_type = orte_process_info.proc_type; if( ORTE_SUCCESS != (ret = orte_proc_info_finalize()) ) { exit_status = ret; } if( NULL != orte_process_info.my_hnp_uri ) { free(orte_process_info.my_hnp_uri); orte_process_info.my_hnp_uri = NULL; } if( NULL != orte_process_info.my_daemon_uri ) { free(orte_process_info.my_daemon_uri); orte_process_info.my_daemon_uri = NULL; } if( ORTE_SUCCESS != (ret = orte_proc_info()) ) { exit_status = ret; } orte_process_info.proc_type = prev_type; orte_process_info.my_name = *ORTE_NAME_INVALID; /* * Notify the ESS */ if( NULL != orte_ess.ft_event ) { if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_RESTART))) { exit_status = ret; goto cleanup; } } cleanup: return exit_status; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = orte_ess_base_open())) { error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (ORTE_PROC_IS_APP) { #if !ORTE_DISABLE_FULL_SUPPORT && ORTE_ENABLE_PROGRESS_THREADS #if OPAL_EVENT_HAVE_THREAD_SUPPORT /* get a separate orte event base */ orte_event_base = opal_event_base_create(); /* setup the finalize event - we'll need it * to break the thread out of the event lib * when we want to stop it */ opal_event_set(orte_event_base, &orte_finalize_event, -1, OPAL_EV_WRITE, ignore_callback, NULL); opal_event_set_priority(&orte_finalize_event, ORTE_ERROR_PRI); #if 0 { /* seems strange, but wake us up once a second just so we can check for new events */ opal_event_t *ev; struct timeval tv = {1,0}; ev = opal_event_alloc(); opal_event_evtimer_set(orte_event_base, ev, ignore_callback, ev); opal_event_set_priority(ev, ORTE_INFO_PRI); opal_event_evtimer_add(ev, &tv); } #endif /* construct the thread object */ OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t); /* fork off a thread to progress it */ orte_progress_thread.t_run = orte_progress_thread_engine; if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) { error = "orte progress thread start"; goto error; } #else error = "event thread support is not configured"; ret = ORTE_ERROR; goto error; #endif #else /* set the event base to the opal one */ orte_event_base = opal_event_base; #endif } else { /* set the event base to the opal one */ orte_event_base = opal_event_base; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orte_restart(orte_process_name_t *name, const char* uri) { int rc; orte_process_name_t* old_name; orte_process_name_t* new_name; if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&old_name, orte_process_info.my_name, ORTE_NAME))) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&new_name, name, ORTE_NAME))) { ORTE_ERROR_LOG(rc); return rc; } /* * Restart event library */ if (ORTE_SUCCESS != (rc = opal_event_restart())) { ORTE_ERROR_LOG(rc); return rc; } /* * Close selected components. */ orte_iof_base.iof_flush = false; if (ORTE_SUCCESS != (rc = orte_iof_base_close())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_smr_base_close())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_gpr_base_close())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_ns_base_close())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_rml_base_close())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_wait_finalize())) { ORTE_ERROR_LOG(rc); return rc; } /* * setup new global state */ orte_process_info.seed = false; /* if NULL, set ns_replica to old_name and set the corresponding uri parameter */ if (NULL == orte_process_info.ns_replica) { orte_process_info.ns_replica = old_name; orte_process_info.ns_replica_uri = strdup(uri); } /* if NULL, set gpr_replica to old_name and set the corresponding uri parameter */ if (NULL == orte_process_info.gpr_replica) { orte_process_info.gpr_replica = old_name; orte_process_info.gpr_replica_uri = strdup(uri); } /* ensure my_name is set to the new_name */ if (NULL != orte_process_info.my_name) { free(orte_process_info.my_name); } orte_process_info.my_name = new_name; #if 0 /* close the proc_info structure so it can be reinitialized */ if (ORTE_SUCCESS != (rc = orte_proc_info_finalize())) { ORTE_ERROR_LOG(rc); return rc; } /* set seed flag to false */ id = mca_base_param_register_int("seed", NULL, NULL, NULL, (int)false); if (ORTE_SUCCESS != (rc = mca_base_param_set_int(id, (int)false))) { ORTE_ERROR_LOG(rc); return rc; } /* call proc_info to reset the structure */ if (ORTE_SUCCESS != (rc = orte_proc_info())) { ORTE_ERROR_LOG(rc); return rc; } /* finalize the sys_info structure so it can be reinitialized */ if (ORTE_SUCCESS != (rc = orte_sys_info_finalize())) { ORTE_ERROR_LOG(rc); return rc; } /* call the sys_info function to load structure with any new info */ orte_system_info.init = false; if (ORTE_SUCCESS != (rc = orte_sys_info())) { ORTE_ERROR_LOG(rc); return rc; } /* establish the session directory structure for this process */ if (ORTE_SUCCESS != (rc = orte_ns.get_jobid_string(&jobid_str, orte_process_info.my_name))) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_string(&procid_str, orte_process_info.my_name))) { ORTE_ERROR_LOG(rc); return rc; } if (orte_debug_flag) { opal_output(0, "[%lu,%lu,%lu] setting up session dir with", ORTE_NAME_ARGS(orte_process_info.my_name)); if (NULL != orte_process_info.tmpdir_base) { opal_output(0, "\ttmpdir %s", orte_process_info.tmpdir_base); } opal_output(0, "\tuniverse %s", orte_universe_info.name); opal_output(0, "\tuser %s", orte_system_info.user); opal_output(0, "\thost %s", orte_system_info.nodename); opal_output(0, "\tjobid %s", jobid_str); opal_output(0, "\tprocid %s", procid_str); } if (ORTE_SUCCESS != (rc = orte_session_dir(true, orte_process_info.tmpdir_base, orte_system_info.user, orte_system_info.nodename, NULL, orte_universe_info.name, jobid_str, procid_str))) { ORTE_ERROR_LOG(rc); if (jobid_str != NULL) free(jobid_str); if (procid_str != NULL) free(procid_str); return rc; } if (NULL != jobid_str) { free(jobid_str); } if (NULL != procid_str) { free(procid_str); } #endif /* * Re-open components. */ if (ORTE_SUCCESS != (rc = orte_wait_init())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_ns_base_open())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_rml_base_open())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_gpr_base_open())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_smr_base_open())) { ORTE_ERROR_LOG(rc); return rc; } /* * Select new modules. */ if (ORTE_SUCCESS != (rc = orte_rml_base_select())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_ns_base_select())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_gpr_base_select())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_smr_base_select())) { ORTE_ERROR_LOG(rc); return rc; } /* * Set contact info for the replicas */ if (ORTE_SUCCESS != (rc = orte_rml.set_uri(orte_process_info.ns_replica_uri))) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_rml.set_uri(orte_process_info.gpr_replica_uri))) { ORTE_ERROR_LOG(rc); return rc; } /* * Re-init selected modules. */ if (ORTE_SUCCESS != (rc = orte_rml.init())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_ns.init())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_gpr.init())) { ORTE_ERROR_LOG(rc); return rc; } /* * Complete restart */ if (ORTE_SUCCESS != (rc = orte_iof_base_open())) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_iof_base_select())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
int main(int argc, char* argv[]) { orte_proc_info(); /* initialize proc info structure */ orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; test_init("orte_session_dir_t"); test_out = fopen( "test_session_dir_out", "w+" ); if( test_out == NULL ) { test_failure("test_session_dir couldn't open test file failed"); test_finalize(); exit(1); } fprintf(test_out, "running test1\n"); if (test1()) { test_success(); } else { test_failure("orte_session_dir_t test1 failed"); } fprintf(test_out, "running test2\n"); if (test2()) { test_success(); } else { test_failure("orte_session_dir_t test2 failed"); } fprintf(test_out, "running test3\n"); if (test3()) { test_success(); } else { test_failure("orte_session_dir_t test3 failed"); } fprintf(test_out, "running test4\n"); if (test4()) { test_success(); } else { test_failure("orte_session_dir_t test4 failed"); } fprintf(test_out, "running test5\n"); if (test5()) { test_success(); } else { test_failure("orte_session_dir_t test5 failed"); } fprintf(test_out, "running test6\n"); if (test6()) { test_success(); } else { test_failure("orte_session_dir_t test6 failed"); } fprintf(test_out, "running test7\n"); if (test7()) { test_success(); } else { test_failure("orte_session_dir_t test7 failed"); } fprintf(test_out, "running test8\n"); if (test8()) { test_success(); } else { test_failure("orte_session_dir_t test8 failed"); } fprintf(test_out, "completed all tests\n"); fclose(test_out); /* clean up */ orte_proc_info_finalize(); test_finalize(); return 0; }
int orcm_init(orcm_proc_type_t flags) { int ret; char *error; int spin; opal_output_stream_t lds; if (0 < orcm_initialized) { /* track number of times we have been called */ orcm_initialized++; return ORCM_SUCCESS; } orcm_initialized++; if (NULL != getenv("ORCM_MCA_spin")) { spin = 1; /* spin until a debugger can attach */ while (0 != spin) { ret = 0; while (ret < 10000) { ret++; }; } } /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(NULL, NULL))) { error = "opal_init"; goto error; } orcm_debug_verbosity = -1; (void) mca_base_var_register ("orcm", "orcm", NULL, "debug_verbose", "Verbosity level for ORCM debug messages (default: 1)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, &orcm_debug_verbosity); if (0 <= orcm_debug_verbosity) { /* get a debug output channel */ OBJ_CONSTRUCT(&lds, opal_output_stream_t); lds.lds_want_stdout = true; orcm_debug_output = opal_output_open(&lds); OBJ_DESTRUCT(&lds); /* set the verbosity */ opal_output_set_verbosity(orcm_debug_output, orcm_debug_verbosity); } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORCM", ORCM_ERR_BASE, ORCM_ERR_MAX, orcm_err2str); /* register handler for attr key -> string conversion */ if (ORTE_SUCCESS != (ret = orte_attr_register("orcm", ORCM_ATTR_KEY_BASE, ORCM_ATTR_KEY_MAX, orcm_attr_key_print))) { error = "register attr print"; goto error; } /* we don't need a progress thread as all our tools loop inside themselves, * so define orte_event_base to be the base opal_event_base */ orte_event_base = opal_sync_event_base; /* setup the globals */ orcm_clusters = OBJ_NEW(opal_list_t); orcm_schedulers = OBJ_NEW(opal_list_t); /* everyone must open the cfgi framework */ if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_cfgi_base_framework, 0))) { error = "orcm_cfgi_base_open"; goto error; } if (ORCM_SUCCESS != (ret = orcm_cfgi_base_select())) { error = "orcm_cfgi_select"; goto error; } /* everyone must open the sst framework */ if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_sst_base_framework, 0))) { error = "orcm_sst_base_open"; goto error; } if (ORCM_SUCCESS != (ret = orcm_sst_base_select())) { error = "orcm_sst_select"; goto error; } /* open the ESS and select the correct module for this environment - the * orcm module is basically a no-op, but we need the framework defined * as other parts of ORTE will want to call it */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* initialize us - we will register the ORTE-level MCA params in there */ if (ORTE_SUCCESS != (ret = orcm_sst.init())) { error = "orte_init"; goto error; } /* setup the orte_show_help system - don't do this until the * end as otherwise show_help messages won't appear */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* initialize orcm datatype support */ if (ORCM_SUCCESS != (ret = orcm_dt_init())) { error = "orcm_dt_init"; goto error; } /* flag that orte is initialized so things can work */ orte_initialized = true; return ORCM_SUCCESS; error: if (ORCM_ERR_SILENT != ret) { opal_show_help("help-orcm-runtime.txt", "orcm_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* Convince OPAL to use our naming scheme */ opal_process_name_print = _process_name_print_for_opal; opal_vpid_print = _vpid_print_for_opal; opal_jobid_print = _jobid_print_for_opal; opal_compare_proc = _process_name_compare; opal_convert_string_to_process_name = _convert_string_to_process_name; /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* we may have modified the local nodename according to * request to retain/strip the FQDN and prefix, so update * it here. The OPAL layer will strdup the hostname, so * we have to free it first to avoid a memory leak */ if (NULL != opal_process_info.nodename) { free(opal_process_info.nodename); } /* opal_finalize_util will call free on this pointer so set from strdup */ opal_process_info.nodename = strdup (orte_process_info.nodename); /* setup the dstore framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) { ORTE_ERROR_LOG(ret); error = "opal_dstore_base_select"; goto error; } /* create the handle */ if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL", "hash", NULL))) { error = "opal dstore internal"; ret = ORTE_ERR_FATAL; goto error; } if (ORTE_PROC_IS_APP) { if (0 > (opal_dstore_modex = opal_dstore.open("MODEX", "sm,hash", NULL))) { error = "opal dstore modex"; ret = ORTE_ERR_FATAL; goto error; } } if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* let the pmix server register params */ pmix_server_register(); } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (!ORTE_PROC_IS_APP) { /* ORTE tools "block" in their own loop over the event * base, so no progress thread is required - apps will * start their progress thread in ess_base_std_app.c * at the appropriate point */ orte_event_base = opal_event_base; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* set the remaining opal_process_info fields. Note that * the OPAL layer will have initialized these to NULL, and * anyone between us would not have strdup'd the string, so * we cannot free it here */ opal_process_info.job_session_dir = orte_process_info.job_session_dir; opal_process_info.proc_session_dir = orte_process_info.proc_session_dir; opal_process_info.num_local_peers = (int32_t)orte_process_info.num_local_peers; opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank; #if OPAL_HAVE_HWLOC opal_process_info.cpuset = orte_process_info.cpuset; #endif /* OPAL_HAVE_HWLOC */ #if OPAL_ENABLE_TIMING opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); #endif /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (orte_initialized) { return ORTE_SUCCESS; } /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { ORTE_ERROR_LOG(ret); return ret; } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { ORTE_ERROR_LOG(ret); error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = orte_ess_base_open())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_select"; goto error; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { ORTE_ERROR_LOG(ret); error = "orte_ess_set_name"; goto error; } /* All done */ orte_initialized = true; return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != OPAL_SOS_GET_ERROR_CODE(ret)) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orcm_init(orcm_proc_type_t flags) { int ret; char *error, *envar; int spin; opal_output_stream_t lds; if (0 < orcm_initialized) { /* track number of times we have been called */ orcm_initialized++; return ORCM_SUCCESS; } orcm_initialized++; if (NULL != getenv("ORCM_MCA_spin")) { spin = 1; /* spin until a debugger can attach */ while (0 != spin) { ret = 0; while (ret < 10000) { ret++; }; } } /* prior to initializing the OPAL layer, check to see * if the OPAL (and friends) install location has been * moved. In order to avoid conflicts with any other * OPAL-using software, the relocation point will have * been expressed as a set of "ORCM_foo" envars. We * therefore check for the ORCM_foo values, and name-shift * any we find to OPAL_foo so that OPAL will find them. * Since all ORCM tools will have already copied their * local environment, these name-shifted vars will not * appear in the environment of any launched processes */ if (NULL != (envar = getenv("ORCM_PREFIX"))) { opal_unsetenv("ORCM_PREFIX", &environ); opal_setenv("OPAL_PREFIX", envar, true, &environ); } if (NULL != (envar = getenv("ORCM_LIBDIR"))) { opal_unsetenv("ORCM_LIBDIR", &environ); opal_setenv("OPAL_LIBDIR", envar, true, &environ); } if (NULL != (envar = getenv("ORCM_DATADIR"))) { opal_unsetenv("ORCM_DATADIR", &environ); opal_setenv("OPAL_DATADIR", envar, true, &environ); } /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(NULL, NULL))) { error = "opal_init"; goto error; } orcm_debug_verbosity = -1; (void) mca_base_var_register ("orcm", "orcm", NULL, "debug_verbose", "Verbosity level for ORCM debug messages (default: 1)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, &orcm_debug_verbosity); if (0 <= orcm_debug_verbosity) { /* get a debug output channel */ OBJ_CONSTRUCT(&lds, opal_output_stream_t); lds.lds_want_stdout = true; orcm_debug_output = opal_output_open(&lds); OBJ_DESTRUCT(&lds); /* set the verbosity */ opal_output_set_verbosity(orcm_debug_output, orcm_debug_verbosity); } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORCM", ORCM_ERR_BASE, ORCM_ERR_MAX, orcm_err2str); /* register handler for attr key -> string conversion */ if (ORTE_SUCCESS != (ret = orte_attr_register("orcm", ORCM_ATTR_KEY_BASE, ORCM_ATTR_KEY_MAX, orcm_attr_key_print))) { error = "register attr print"; goto error; } /* we don't need a progress thread as all our tools loop inside themselves, * so define orte_event_base to be the base opal_event_base */ orte_event_base = opal_sync_event_base; /* setup the globals */ orcm_clusters = OBJ_NEW(opal_list_t); orcm_schedulers = OBJ_NEW(opal_list_t); if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_parser_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orcm_parser_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orcm_parser_base_select())) { ORTE_ERROR_LOG(ret); error = "orcm_parser_select"; goto error; } /* everyone must open the cfgi framework */ if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_cfgi_base_framework, 0))) { error = "orcm_cfgi_base_open"; goto error; } if (ORCM_SUCCESS != (ret = orcm_cfgi_base_select())) { error = "orcm_cfgi_select"; goto error; } envar = getenv("ORCM_MCA_logical_group_config_file"); if (ORCM_SUCCESS != (ret = orcm_logical_group_load_to_memory(envar))) { error = "orcm_logical_group_load_to_memory"; goto error; } if (ORCM_SCHED == flags) { if (NULL == (envar = getenv("ORCM_MCA_event_exec_path"))) { asprintf(&orcm_event_exec_path, "%s/bin", opal_install_dirs.prefix); } else { orcm_event_exec_path = strdup(envar); } if (NULL == orcm_event_exec_path) { error = "orcm_event_exec_path"; goto error; } } /* everyone must open the sst framework */ if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_sst_base_framework, 0))) { error = "orcm_sst_base_open"; goto error; } if (ORCM_SUCCESS != (ret = orcm_sst_base_select())) { error = "orcm_sst_select"; goto error; } /* open the ESS and select the correct module for this environment - the * orcm module is basically a no-op, but we need the framework defined * as other parts of ORTE will want to call it */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* initialize us - we will register the ORTE-level MCA params in there */ if (ORTE_SUCCESS != (ret = orcm_sst.init())) { error = "orte_init"; goto error; } /* setup the orte_show_help system - don't do this until the * end as otherwise show_help messages won't appear */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* initialize orcm datatype support */ if (ORCM_SUCCESS != (ret = orcm_dt_init())) { error = "orcm_dt_init"; goto error; } /* flag that orte is initialized so things can work */ orte_initialized = true; orte_help_want_aggregate = false; return ORCM_SUCCESS; error: if (ORCM_ERR_SILENT != ret) { opal_show_help("help-orcm-runtime.txt", "orcm_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
/* * Construct the fullpath to the session directory */ int orte_session_dir_get_name(char **fulldirpath, char **return_prefix, /* This will come back as the valid tmp dir */ char **return_frontend, char *hostid, char *batchid, orte_process_name_t *proc) { char *hostname = NULL, *batchname = NULL, *sessions = NULL, *user = NULL, *prefix = NULL, *frontend = NULL, *jobfam = NULL, *job = NULL, *vpidstr = NULL; bool prefix_provided = false; int exit_status = ORTE_SUCCESS; size_t len; int uid; struct passwd *pwdent; /* Ensure that system info is set */ orte_proc_info(); /* get the name of the user */ uid = getuid(); #ifdef HAVE_GETPWUID pwdent = getpwuid(uid); #else pwdent = NULL; #endif if (NULL != pwdent) { user = strdup(pwdent->pw_name); } else { orte_show_help("help-orte-runtime.txt", "orte:session:dir:nopwname", true); return ORTE_ERR_OUT_OF_RESOURCE; } /* * set the 'hostname' */ if( NULL != hostid) { /* User specified version */ hostname = strdup(hostid); } else { /* check if it is set elsewhere */ if( NULL != orte_process_info.nodename) hostname = strdup(orte_process_info.nodename); else { /* Couldn't find it, so fail */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); exit_status = ORTE_ERR_BAD_PARAM; goto cleanup; } } /* * set the 'batchid' */ if (NULL != batchid) batchname = strdup(batchid); else batchname = strdup("0"); /* * get the front part of the session directory * Will look something like: * openmpi-sessions-USERNAME@HOSTNAME_BATCHID */ if (NULL != orte_process_info.top_session_dir) { frontend = strdup(orte_process_info.top_session_dir); } else { /* If not set then construct it */ if (0 > asprintf(&frontend, "openmpi-sessions-%s@%s_%s", user, hostname, batchname)) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } } /* * Construct the session directory */ /* If we were given a valid vpid then we can construct it fully into: * openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID/VPID */ if( NULL != proc) { if (ORTE_VPID_INVALID != proc->vpid) { if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } sessions = opal_os_path( false, frontend, jobfam, job, vpidstr, NULL ); if( NULL == sessions ) { ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } } /* If we were given a valid jobid then we can construct it partially into: * openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID */ else if (ORTE_JOBID_INVALID != proc->jobid) { if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } sessions = opal_os_path( false, frontend, jobfam, job, NULL ); if( NULL == sessions ) { ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } } /* if both are invalid */ else { sessions = strdup(frontend); /* must dup this to avoid double-free later */ } } /* If we were not given a proc at all, then we just set it to frontend */ else { sessions = strdup(frontend); /* must dup this to avoid double-free later */ } /* * If the user specified an invalid prefix, or no prefix at all * we need to keep looking */ if( NULL != fulldirpath && NULL != *fulldirpath) { free(*fulldirpath); *fulldirpath = NULL; } if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */ prefix = strdup(*return_prefix); prefix_provided = true; } /* Try to find a proper alternative prefix */ else if (NULL != orte_process_info.tmpdir_base) { /* stored value */ prefix = strdup(orte_process_info.tmpdir_base); } else { /* General Environment var */ prefix = strdup(opal_tmp_directory()); } len = strlen(prefix); /* check for a trailing path separator */ if (OPAL_PATH_SEP[0] == prefix[len-1]) { prefix[len-1] = '\0'; } /* BEFORE doing anything else, check to see if this prefix is * allowed by the system */ if (NULL != orte_prohibited_session_dirs) { char **list; int i, len; /* break the string into tokens - it should be * separated by ',' */ list = opal_argv_split(orte_prohibited_session_dirs, ','); len = opal_argv_count(list); /* cycle through the list */ for (i=0; i < len; i++) { /* check if prefix matches */ if (0 == strncmp(prefix, list[i], strlen(list[i]))) { /* this is a prohibited location */ orte_show_help("help-orte-runtime.txt", "orte:session:dir:prohibited", true, prefix, orte_prohibited_session_dirs); return ORTE_ERR_FATAL; } } opal_argv_free(list); /* done with this */ } /* * Construct the absolute final path, if requested */ if (NULL != fulldirpath) { *fulldirpath = opal_os_path(false, prefix, sessions, NULL); } /* * Return the frontend and prefix, if user requested we do so */ if (NULL != return_frontend) { *return_frontend = strdup(frontend); } if (!prefix_provided && NULL != return_prefix) { *return_prefix = strdup(prefix); } cleanup: if(NULL != hostname) free(hostname); if(NULL != batchname) free(batchname); if(NULL != sessions) free(sessions); if(NULL != user) free(user); if (NULL != prefix) free(prefix); if (NULL != frontend) free(frontend); if (NULL != jobfam) free(jobfam); if (NULL != job) free(job); if (NULL != vpidstr) free(vpidstr); return exit_status; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* Convince OPAL to use our naming scheme */ opal_process_name_print = _process_name_print_for_opal; opal_process_name_vpid = _process_name_vpid_for_opal; opal_process_name_jobid = _process_name_jobid_for_opal; opal_compare_proc = _process_name_compare; /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } if (!ORTE_PROC_IS_APP) { /* ORTE tools "block" in their own loop over the event * base, so no progress thread is required - apps will * start their progress thread in ess_base_std_app.c * at the appropriate point */ orte_event_base = opal_event_base; } /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; if (0 < orte_initialized) { /* track number of times we have been called */ orte_initialized++; return ORTE_SUCCESS; } orte_initialized++; /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { error = "orte_locks_init"; goto error; } /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = orte_ess_base_open())) { error = "orte_ess_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } #if ORTE_ENABLE_PROGRESS_THREADS #if OPAL_EVENT_HAVE_THREAD_SUPPORT /* get a separate orte event base */ orte_event_base = opal_event_base_create(); /* construct the thread object */ OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t); /* fork off a thread to progress it */ orte_progress_thread.t_run = orte_progress_thread_engine; if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) { error = "orte progress thread start"; goto error; } #else error = "event thread support is not configured"; ret = ORTE_ERROR; goto error; #endif #else /* set the event base to the opal one */ orte_event_base = opal_event_base; #endif /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; goto error; } /* All done */ return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }