/* * Construct the session directory and create it if necessary */ int orte_session_dir(bool create, orte_process_name_t *proc) { int rc = ORTE_SUCCESS; /* * Get the session directory full name */ if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) { if (ORTE_ERR_FATAL == rc) { /* this indicates we should abort quietly */ rc = ORTE_ERR_SILENT; } goto cleanup; } /* * Now that we have the full path, go ahead and create it if necessary */ if( create ) { if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) { ORTE_ERROR_LOG(rc); goto cleanup; } } if (orte_debug_flag) { opal_output(0, "procdir: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir)); opal_output(0, "jobdir: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir)); opal_output(0, "top: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir)); opal_output(0, "top: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir)); opal_output(0, "tmp: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base)); } cleanup: return rc; }
int orte_ess_base_tool_setup(void) { int ret; char *error = NULL; opal_list_t transports; orte_jobid_t jobid; orte_vpid_t vpid; /* setup the PMIx framework - ensure it skips all non-PMIx components, * but do not override anything we were given */ opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_pmix_base_open"; goto error; } if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { ORTE_ERROR_LOG(ret); error = "opal_pmix_base_select"; goto error; } /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* we have to define our name here */ if (NULL != orte_ess_base_jobid && NULL != orte_ess_base_vpid) { opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:obtaining name from environment"); if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { return(ret); } ORTE_PROC_MY_NAME->jobid = jobid; if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { return(ret); } ORTE_PROC_MY_NAME->vpid = vpid; } else { /* If we are a tool with no name, then define it here */ uint16_t jobfam; uint32_t hash32; uint32_t bias; opal_output_verbose(2, orte_ess_base_framework.framework_output, "ess:tool:computing name"); /* hash the nodename */ OPAL_HASH_STR(orte_process_info.nodename, hash32); bias = (uint32_t)orte_process_info.pid; /* fold in the bias */ hash32 = hash32 ^ bias; /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); /* set the name */ ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); ORTE_PROC_MY_NAME->vpid = 0; } /* my name is set, xfer it to the OPAL layer */ orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; /* initialize - PMIx may set our name here if we attach to * a PMIx server */ if (NULL != opal_pmix.tool_init) { opal_list_t info; opal_value_t *kv; OBJ_CONSTRUCT(&info, opal_list_t); /* pass our name so the PMIx layer can use it */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOOL_NSPACE); orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid); kv->type = OPAL_STRING; opal_list_append(&info, &kv->super); /* ditto for our rank */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOOL_RANK); kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid; kv->type = OPAL_VPID; opal_list_append(&info, &kv->super); /* ORTE tools don't need to connect to a PMIx server as * they will connect via the OOB */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT); kv->data.flag = true; kv->type = OPAL_BOOL; opal_list_append(&info, &kv->super); if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) { ORTE_ERROR_LOG(ret); error = "opal_pmix.init"; OPAL_LIST_DESTRUCT(&info); goto error; } OPAL_LIST_DESTRUCT(&info); ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; } orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename); orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; orte_process_info.super.proc_arch = opal_local_arch; opal_proc_local_set(&orte_process_info.super); /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_state_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_state_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_state_base_select"; goto error; } /* open and setup the error manager */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } /* Setup the communication infrastructure */ /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_routed_base_select"; goto error; } if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_oob_base_select"; goto error; } /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rml_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_rml_base_select"; goto error; } /* get a conduit for our use - we never route IO over fabric */ OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); orte_mgmt_conduit = orte_rml.open_conduit(&transports); OPAL_LIST_DESTRUCT(&transports); /* since I am a tool, then all I really want to do is communicate. * So setup communications and be done - finding the HNP * to which I want to communicate and setting up a route for * that link is my responsibility */ /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on * this node might be located */ ret = orte_session_setup_base(ORTE_PROC_MY_NAME); if (ORTE_SUCCESS != ret ) { ORTE_ERROR_LOG(ret); error = "define session dir names"; goto error; } /* setup I/O forwarding system - must come after we init routes */ if (NULL != orte_process_info.my_hnp_uri) { /* only do this if we were given an HNP */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_iof_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_iof_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_iof_base_select"; goto error; } /* if we were given an HNP, then also setup the PLM in case this * tool wants to request that we spawn something for it */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_open"; goto error; } /* we don't select the plm framework as we only want the * base proxy functions */ } #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_open"; goto error; } if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_sstore_base_open"; goto error; } if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_select"; goto error; } if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_sstore_base_select"; goto error; } /* Tools do not need all the OPAL CR stuff */ opal_cr_set_enabled(false); #endif return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); return ret; }