Example #1
0
static int setupOrcm(void)
{
  int ret;
  /* Call enough of orcm/orte so that the configuration file is
   * read and we know if we are an aggregator or a compute node daemon.
   */

  if (ORTE_SUCCESS != (ret = opal_init(NULL, NULL))) {
    printf("FAIL Error in opal_init()\n");
    return 1;
  }

  orte_process_info.proc_type = ORCM_DAEMON;
  if (ORTE_SUCCESS != (ret = orte_proc_info())) {
    printf("FAIL Error in orte_proc_info()\n");
    return 1;
  }   
  orte_event_base = opal_sync_event_base;
  orcm_clusters = OBJ_NEW(opal_list_t);
  orcm_schedulers = OBJ_NEW(opal_list_t);

  if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_cfgi_base_framework, 0))) {
    printf("FAIL orcm_cfgi_base_open\n");
    return 1;
  }
  if (ORCM_SUCCESS != (ret = orcm_cfgi_base_select())) {
    printf("FAIL orcm_cfgi_select\n");  /* bad configuration file */
    return 99;
  }

  if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_sst_base_framework, 0))) {
    printf("FAIL orcm_sst_base_framework\n");
    return 1;
  }
  if (ORCM_SUCCESS != (ret = orcm_sst_base_select())) {
    printf("FAIL orcm_sst_base_select\n");
    return 1;
  }

  /* We need to set up the ESS framework because when ft_tester kills
   * itself it calls the abort function.
   */

  if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
    printf("FAIL orte_ess_base_open\n");
    return 1;
  }
  if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
    printf("FAIL orte_ess_base_select\n");
    return 1;
  }
  if (ORTE_SUCCESS != (ret = orte_ess.init())) {
    printf("FAIL orte_ess_init\n");
    return 1;
  }

  return 0;
}
Example #2
0
static int orte_cr_coord_post_restart(void) {
    int ret, exit_status = ORTE_SUCCESS;
    orte_proc_type_t prev_type = ORTE_PROC_TYPE_NONE;
    char * tmp_dir = NULL;

    opal_output_verbose(10, orte_cr_output,
                        "orte_cr: coord_post_restart: orte_cr_coord_post_restart()");

    /*
     * Add the previous session directory for cleanup
     */
    opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
    tmp_dir = opal_dirname(orte_process_info.job_session_dir);
    if( NULL != tmp_dir ) {
        opal_crs_base_cleanup_append(tmp_dir, true);
        free(tmp_dir);
        tmp_dir = NULL;
    }

    /*
     * Refresh System information
     */
    prev_type = orte_process_info.proc_type;
    if( ORTE_SUCCESS != (ret = orte_proc_info_finalize()) ) {
        exit_status = ret;
    }

    if( NULL != orte_process_info.my_hnp_uri ) {
        free(orte_process_info.my_hnp_uri);
        orte_process_info.my_hnp_uri = NULL;
    }

    if( NULL != orte_process_info.my_daemon_uri ) {
        free(orte_process_info.my_daemon_uri);
        orte_process_info.my_daemon_uri = NULL;
    }

    if( ORTE_SUCCESS != (ret = orte_proc_info()) ) {
        exit_status = ret;
    }

    orte_process_info.proc_type = prev_type;
    orte_process_info.my_name = *ORTE_NAME_INVALID;

    /*
     * Notify the ESS
     */
    if( NULL != orte_ess.ft_event ) {
        if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_RESTART))) {
            exit_status = ret;
            goto cleanup;
        }
    }

 cleanup:
    return exit_status;
}
Example #3
0
int orte_session_setup_base(orte_process_name_t *proc)
{
    int rc;

    /* Ensure that system info is set */
    orte_proc_info();

    /* setup job and proc session directories */
    if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
        return rc;
    }

    if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){
        return rc;
    }

    /* BEFORE doing anything else, check to see if this prefix is
     * allowed by the system
     */
    if (NULL != orte_prohibited_session_dirs ||
            NULL != orte_process_info.tmpdir_base ) {
        char **list;
        int i, len;
        /* break the string into tokens - it should be
         * separated by ','
         */
        list = opal_argv_split(orte_prohibited_session_dirs, ',');
        len = opal_argv_count(list);
        /* cycle through the list */
        for (i=0; i < len; i++) {
            /* check if prefix matches */
            if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) {
                /* this is a prohibited location */
                orte_show_help("help-orte-runtime.txt",
                               "orte:session:dir:prohibited",
                               true, orte_process_info.tmpdir_base,
                               orte_prohibited_session_dirs);
                opal_argv_free(list);
                return ORTE_ERR_FATAL;
            }
        }
        opal_argv_free(list);  /* done with this */
    }
    return ORTE_SUCCESS;
}
Example #4
0
static int orte_cr_coord_post_restart(void) {
    int ret, exit_status = ORTE_SUCCESS;
    orte_proc_type_t prev_type = ORTE_PROC_TYPE_NONE;

    opal_output_verbose(10, orte_cr_output,
                        "orte_cr: coord_post_restart: orte_cr_coord_post_restart()");

    /*
     * Refresh System information
     */
    prev_type = orte_process_info.proc_type;
    if( ORTE_SUCCESS != (ret = orte_proc_info_finalize()) ) {
        exit_status = ret;
    }

    if( NULL != orte_process_info.my_hnp_uri ) {
        free(orte_process_info.my_hnp_uri);
        orte_process_info.my_hnp_uri = NULL;
    }

    if( NULL != orte_process_info.my_daemon_uri ) {
        free(orte_process_info.my_daemon_uri);
        orte_process_info.my_daemon_uri = NULL;
    }

    if( ORTE_SUCCESS != (ret = orte_proc_info()) ) {
        exit_status = ret;
    }

    orte_process_info.proc_type = prev_type;
    orte_process_info.my_name = *ORTE_NAME_INVALID;

    /*
     * Notify the ESS
     */
    if( NULL != orte_ess.ft_event ) {
        if( ORTE_SUCCESS != (ret = orte_ess.ft_event(OPAL_CRS_RESTART))) {
            exit_status = ret;
            goto cleanup;
        }
    }

 cleanup:
    return exit_status;
}
Example #5
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (0 < orte_initialized) {
        /* track number of times we have been called */
        orte_initialized++;
        return ORTE_SUCCESS;
    }
    orte_initialized++;

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        error = "opal_init";
        goto error;
    }
    
    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }
    
    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess_base_open())) {
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }

    if (ORTE_PROC_IS_APP) {
#if !ORTE_DISABLE_FULL_SUPPORT && ORTE_ENABLE_PROGRESS_THREADS
#if OPAL_EVENT_HAVE_THREAD_SUPPORT
        /* get a separate orte event base */
        orte_event_base = opal_event_base_create();
        /* setup the finalize event - we'll need it
         * to break the thread out of the event lib
         * when we want to stop it
         */
        opal_event_set(orte_event_base, &orte_finalize_event, -1, OPAL_EV_WRITE, ignore_callback, NULL);
        opal_event_set_priority(&orte_finalize_event, ORTE_ERROR_PRI);
#if 0
        {
            /* seems strange, but wake us up once a second just so we can check for new events */
            opal_event_t *ev;
            struct timeval tv = {1,0};
            ev = opal_event_alloc();
            opal_event_evtimer_set(orte_event_base,
                               ev, ignore_callback, ev);
            opal_event_set_priority(ev, ORTE_INFO_PRI);
            opal_event_evtimer_add(ev, &tv);
        }
#endif
        /* construct the thread object */
        OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t);
        /* fork off a thread to progress it */
        orte_progress_thread.t_run = orte_progress_thread_engine;
        if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) {
            error = "orte progress thread start";
            goto error;
        }
#else
        error = "event thread support is not configured";
        ret = ORTE_ERROR;
        goto error;
#endif
#else
        /* set the event base to the opal one */
        orte_event_base = opal_event_base;
#endif
    } else {
        /* set the event base to the opal one */
        orte_event_base = opal_event_base;
    }

    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }
    
    /* All done */
    return ORTE_SUCCESS;
    
 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Example #6
0
int orte_restart(orte_process_name_t *name, const char* uri)
{
    int rc;
    orte_process_name_t* old_name;
    orte_process_name_t* new_name;

    if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&old_name, orte_process_info.my_name, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&new_name, name, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /*
     * Restart event library
     */

    if (ORTE_SUCCESS != (rc = opal_event_restart())) {
        ORTE_ERROR_LOG(rc);
	return rc;
    }

    /*
     * Close selected components.
     */

    orte_iof_base.iof_flush = false;
    if (ORTE_SUCCESS != (rc = orte_iof_base_close())) {
        ORTE_ERROR_LOG(rc);
	return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_smr_base_close())) {
        ORTE_ERROR_LOG(rc);
	return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_gpr_base_close())) {
        ORTE_ERROR_LOG(rc);
	return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_ns_base_close())) {
        ORTE_ERROR_LOG(rc);
	return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_rml_base_close())) {
        ORTE_ERROR_LOG(rc);
	return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_wait_finalize())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /*
     * setup new global state
     */
    orte_process_info.seed = false;

    /* if NULL, set ns_replica to old_name and set the corresponding uri parameter */
    if (NULL == orte_process_info.ns_replica) {
        orte_process_info.ns_replica = old_name;
        orte_process_info.ns_replica_uri = strdup(uri);
    }
    
    /* if NULL, set gpr_replica to old_name and set the corresponding uri parameter */
    if (NULL == orte_process_info.gpr_replica) {
        orte_process_info.gpr_replica = old_name;
        orte_process_info.gpr_replica_uri = strdup(uri);
    }

    /* ensure my_name is set to the new_name */
    if (NULL != orte_process_info.my_name) {
        free(orte_process_info.my_name);
    }
    orte_process_info.my_name = new_name;

#if 0
    /* close the proc_info structure so it can be reinitialized */
    if (ORTE_SUCCESS != (rc = orte_proc_info_finalize())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    /* set seed flag to false */
    id = mca_base_param_register_int("seed", NULL, NULL, NULL, (int)false);
    if (ORTE_SUCCESS != (rc = mca_base_param_set_int(id, (int)false))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* call proc_info to reset the structure */
    if (ORTE_SUCCESS != (rc = orte_proc_info())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    /* finalize the sys_info structure so it can be reinitialized */
    if (ORTE_SUCCESS != (rc = orte_sys_info_finalize())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    /* call the sys_info function to load structure with any new info */
    orte_system_info.init = false;
    if (ORTE_SUCCESS != (rc = orte_sys_info())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
   
    /* establish the session directory structure for this process */
    if (ORTE_SUCCESS != (rc = orte_ns.get_jobid_string(&jobid_str, orte_process_info.my_name))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_string(&procid_str, orte_process_info.my_name))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
 
    if (orte_debug_flag) {
        opal_output(0, "[%lu,%lu,%lu] setting up session dir with",
                    ORTE_NAME_ARGS(orte_process_info.my_name));
        if (NULL != orte_process_info.tmpdir_base) {
            opal_output(0, "\ttmpdir %s", orte_process_info.tmpdir_base);
        }
        opal_output(0, "\tuniverse %s", orte_universe_info.name);
        opal_output(0, "\tuser %s", orte_system_info.user);
        opal_output(0, "\thost %s", orte_system_info.nodename);
        opal_output(0, "\tjobid %s", jobid_str);
        opal_output(0, "\tprocid %s", procid_str);
    }
    if (ORTE_SUCCESS != (rc = orte_session_dir(true,
                                orte_process_info.tmpdir_base,
                                orte_system_info.user,
                                orte_system_info.nodename, NULL,
                                orte_universe_info.name,
                                jobid_str, procid_str))) {
        ORTE_ERROR_LOG(rc);
        if (jobid_str != NULL) free(jobid_str);
        if (procid_str != NULL) free(procid_str);
        return rc;
    }
    if (NULL != jobid_str) {
        free(jobid_str);
    }
    if (NULL != procid_str) {
        free(procid_str);
    }
#endif

    /*
     * Re-open components.
     */

    if (ORTE_SUCCESS != (rc = orte_wait_init())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_ns_base_open())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_rml_base_open())) {
        ORTE_ERROR_LOG(rc);
            return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_gpr_base_open())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_smr_base_open())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /*
     * Select new modules.
     */

    if (ORTE_SUCCESS != (rc = orte_rml_base_select())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_ns_base_select())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_gpr_base_select())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_smr_base_select())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }


    /*
     * Set contact info for the replicas
     */

    if (ORTE_SUCCESS != (rc = orte_rml.set_uri(orte_process_info.ns_replica_uri))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_rml.set_uri(orte_process_info.gpr_replica_uri))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /*
     * Re-init selected modules.
     */
    if (ORTE_SUCCESS != (rc = orte_rml.init())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_ns.init())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_gpr.init())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /*
     * Complete restart
     */
    if (ORTE_SUCCESS != (rc = orte_iof_base_open())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_iof_base_select())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    return ORTE_SUCCESS;
}
Example #7
0
int main(int argc, char* argv[])
{
    orte_proc_info(); /* initialize proc info structure */
    orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
    orte_process_info.my_name->cellid = 0;
    orte_process_info.my_name->jobid = 0;
    orte_process_info.my_name->vpid = 0;

    test_init("orte_session_dir_t");
    test_out = fopen( "test_session_dir_out", "w+" );
    if( test_out == NULL ) {
      test_failure("test_session_dir couldn't open test file failed");
      test_finalize();
      exit(1);
    }


    fprintf(test_out, "running test1\n");
    if (test1()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test1 failed");
    }

    fprintf(test_out, "running test2\n");
    if (test2()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test2 failed");
    }

    fprintf(test_out, "running test3\n");
    if (test3()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test3 failed");
    }

    fprintf(test_out, "running test4\n");
    if (test4()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test4 failed");
    }

    fprintf(test_out, "running test5\n");
    if (test5()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test5 failed");
    }

    fprintf(test_out, "running test6\n");
    if (test6()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test6 failed");
    }

    fprintf(test_out, "running test7\n");
    if (test7()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test7 failed");
    }

    fprintf(test_out, "running test8\n");
    if (test8()) {
        test_success();
    }
    else {
      test_failure("orte_session_dir_t test8 failed");
    }

    fprintf(test_out, "completed all tests\n");

    fclose(test_out);

    /* clean up */
    orte_proc_info_finalize();

    test_finalize();
    return 0;
}
Example #8
0
int orcm_init(orcm_proc_type_t flags)
{
    int ret;
    char *error;
    int spin;
    opal_output_stream_t lds;

    if (0 < orcm_initialized) {
        /* track number of times we have been called */
        orcm_initialized++;
        return ORCM_SUCCESS;
    }
    orcm_initialized++;

    if (NULL != getenv("ORCM_MCA_spin")) {
        spin = 1;
        /* spin until a debugger can attach */
        while (0 != spin) {
            ret = 0;
            while (ret < 10000) {
                ret++;
            };
        }
    }
    
    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(NULL, NULL))) {
        error = "opal_init";
        goto error;
    }
    
    orcm_debug_verbosity = -1;
    (void) mca_base_var_register ("orcm", "orcm", NULL, "debug_verbose",
                                  "Verbosity level for ORCM debug messages (default: 1)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orcm_debug_verbosity);
    if (0 <= orcm_debug_verbosity) {
        /* get a debug output channel */
        OBJ_CONSTRUCT(&lds, opal_output_stream_t);
        lds.lds_want_stdout = true;
        orcm_debug_output = opal_output_open(&lds);
        OBJ_DESTRUCT(&lds);
        /* set the verbosity */
        opal_output_set_verbosity(orcm_debug_output, orcm_debug_verbosity);
    }

    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* register handler for errnum -> string conversion */
    opal_error_register("ORCM", ORCM_ERR_BASE, ORCM_ERR_MAX, orcm_err2str);

    /* register handler for attr key -> string conversion */
    if (ORTE_SUCCESS != (ret = orte_attr_register("orcm", ORCM_ATTR_KEY_BASE, ORCM_ATTR_KEY_MAX,
                                                  orcm_attr_key_print))) {
        error = "register attr print";
        goto error;
    }
    
    /* we don't need a progress thread as all our tools loop inside themselves,
     * so define orte_event_base to be the base opal_event_base
     */
    orte_event_base = opal_sync_event_base;

    /* setup the globals */
    orcm_clusters = OBJ_NEW(opal_list_t);
    orcm_schedulers = OBJ_NEW(opal_list_t);

    /* everyone must open the cfgi framework */
    if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_cfgi_base_framework, 0))) {
        error = "orcm_cfgi_base_open";
        goto error;
    }
    if (ORCM_SUCCESS != (ret = orcm_cfgi_base_select())) {
        error = "orcm_cfgi_select";
        goto error;
    }

    /* everyone must open the sst framework */
    if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_sst_base_framework, 0))) {
        error = "orcm_sst_base_open";
        goto error;
    }
    if (ORCM_SUCCESS != (ret = orcm_sst_base_select())) {
        error = "orcm_sst_select";
        goto error;
    }

    /* open the ESS and select the correct module for this environment - the
     * orcm module is basically a no-op, but we need the framework defined
     * as other parts of ORTE will want to call it
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }

    /* initialize us - we will register the ORTE-level MCA params in there */
    if (ORTE_SUCCESS != (ret = orcm_sst.init())) {
        error = "orte_init";
        goto error;
    }
    
    /* setup the orte_show_help system - don't do this until the
     * end as otherwise show_help messages won't appear
     */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }

    /* initialize orcm datatype support */
    if (ORCM_SUCCESS != (ret = orcm_dt_init())) {
        error = "orcm_dt_init";
        goto error;
    }
    
    /* flag that orte is initialized so things can work */
    orte_initialized = true;

    return ORCM_SUCCESS;

 error:
    if (ORCM_ERR_SILENT != ret) {
        opal_show_help("help-orcm-runtime.txt",
                       "orcm_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    
    return ret;
}
Example #9
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (0 < orte_initialized) {
        /* track number of times we have been called */
        orte_initialized++;
        return ORTE_SUCCESS;
    }
    orte_initialized++;

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        error = "opal_init";
        goto error;
    }
    
    /* Convince OPAL to use our naming scheme */
    opal_process_name_print = _process_name_print_for_opal;
    opal_vpid_print = _vpid_print_for_opal;
    opal_jobid_print = _jobid_print_for_opal;
    opal_compare_proc = _process_name_compare;
    opal_convert_string_to_process_name = _convert_string_to_process_name;
    
    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }
    
    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* we may have modified the local nodename according to
     * request to retain/strip the FQDN and prefix, so update
     * it here. The OPAL layer will strdup the hostname, so
     * we have to free it first to avoid a memory leak */
    if (NULL != opal_process_info.nodename) {
        free(opal_process_info.nodename);
    }
    /* opal_finalize_util will call free on this pointer so set from strdup */
    opal_process_info.nodename = strdup (orte_process_info.nodename);

    /* setup the dstore framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "opal_dstore_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "opal_dstore_base_select";
        goto error;
    }
    /* create the handle */
    if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL", "hash", NULL))) {
        error = "opal dstore internal";
        ret = ORTE_ERR_FATAL;
        goto error;
    }

    if (ORTE_PROC_IS_APP) {
        if (0 > (opal_dstore_modex = opal_dstore.open("MODEX", "sm,hash", NULL))) {
            error = "opal dstore modex";
            ret = ORTE_ERR_FATAL;
            goto error;
        }
    }

    if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
        /* let the pmix server register params */
        pmix_server_register();
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }

    if (!ORTE_PROC_IS_APP) {
        /* ORTE tools "block" in their own loop over the event
         * base, so no progress thread is required - apps will
         * start their progress thread in ess_base_std_app.c
         * at the appropriate point
         */
        orte_event_base = opal_event_base;
    }

    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }

    /* set the remaining opal_process_info fields. Note that
     * the OPAL layer will have initialized these to NULL, and
     * anyone between us would not have strdup'd the string, so
     * we cannot free it here */
    opal_process_info.job_session_dir  = orte_process_info.job_session_dir;
    opal_process_info.proc_session_dir = orte_process_info.proc_session_dir;
    opal_process_info.num_local_peers  = (int32_t)orte_process_info.num_local_peers;
    opal_process_info.my_local_rank    = (int32_t)orte_process_info.my_local_rank;
#if OPAL_HAVE_HWLOC
    opal_process_info.cpuset           = orte_process_info.cpuset;
#endif  /* OPAL_HAVE_HWLOC */

#if OPAL_ENABLE_TIMING
    opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
#endif

    /* All done */
    return ORTE_SUCCESS;
    
 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Example #10
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (orte_initialized) {
        return ORTE_SUCCESS;
    }

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }
    
    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }
    
    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        ORTE_ERROR_LOG(ret);
        error = "opal_output_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess_base_open())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_select";
        goto error;
    }
    
    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_set_name";
        goto error;
    }
    
    /* All done */
    orte_initialized = true;
    return ORTE_SUCCESS;
    
error:
    if (ORTE_ERR_SILENT != OPAL_SOS_GET_ERROR_CODE(ret)) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Example #11
0
int orcm_init(orcm_proc_type_t flags)
{
    int ret;
    char *error, *envar;
    int spin;
    opal_output_stream_t lds;

    if (0 < orcm_initialized) {
        /* track number of times we have been called */
        orcm_initialized++;
        return ORCM_SUCCESS;
    }
    orcm_initialized++;

    if (NULL != getenv("ORCM_MCA_spin")) {
        spin = 1;
        /* spin until a debugger can attach */
        while (0 != spin) {
            ret = 0;
            while (ret < 10000) {
                ret++;
            };
        }
    }

    /* prior to initializing the OPAL layer, check to see
     * if the OPAL (and friends) install location has been
     * moved. In order to avoid conflicts with any other
     * OPAL-using software, the relocation point will have
     * been expressed as a set of "ORCM_foo" envars. We
     * therefore check for the ORCM_foo values, and name-shift
     * any we find to OPAL_foo so that OPAL will find them.
     * Since all ORCM tools will have already copied their
     * local environment, these name-shifted vars will not
     * appear in the environment of any launched processes */
    if (NULL != (envar = getenv("ORCM_PREFIX"))) {
        opal_unsetenv("ORCM_PREFIX", &environ);
        opal_setenv("OPAL_PREFIX", envar, true, &environ);
    }
    if (NULL != (envar = getenv("ORCM_LIBDIR"))) {
        opal_unsetenv("ORCM_LIBDIR", &environ);
        opal_setenv("OPAL_LIBDIR", envar, true, &environ);
    }
    if (NULL != (envar = getenv("ORCM_DATADIR"))) {
        opal_unsetenv("ORCM_DATADIR", &environ);
        opal_setenv("OPAL_DATADIR", envar, true, &environ);
    }
    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(NULL, NULL))) {
        error = "opal_init";
        goto error;
    }

    orcm_debug_verbosity = -1;
    (void) mca_base_var_register ("orcm", "orcm", NULL, "debug_verbose",
                                  "Verbosity level for ORCM debug messages (default: 1)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orcm_debug_verbosity);
    if (0 <= orcm_debug_verbosity) {
        /* get a debug output channel */
        OBJ_CONSTRUCT(&lds, opal_output_stream_t);
        lds.lds_want_stdout = true;
        orcm_debug_output = opal_output_open(&lds);
        OBJ_DESTRUCT(&lds);
        /* set the verbosity */
        opal_output_set_verbosity(orcm_debug_output, orcm_debug_verbosity);
    }

    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }

    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* register handler for errnum -> string conversion */
    opal_error_register("ORCM", ORCM_ERR_BASE, ORCM_ERR_MAX, orcm_err2str);

    /* register handler for attr key -> string conversion */
    if (ORTE_SUCCESS != (ret = orte_attr_register("orcm", ORCM_ATTR_KEY_BASE, ORCM_ATTR_KEY_MAX,
                                                  orcm_attr_key_print))) {
        error = "register attr print";
        goto error;
    }

    /* we don't need a progress thread as all our tools loop inside themselves,
     * so define orte_event_base to be the base opal_event_base
     */
    orte_event_base = opal_sync_event_base;

    /* setup the globals */
    orcm_clusters = OBJ_NEW(opal_list_t);
    orcm_schedulers = OBJ_NEW(opal_list_t);

    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_parser_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_parser_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orcm_parser_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_parser_select";
        goto error;
    }

    /* everyone must open the cfgi framework */
    if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_cfgi_base_framework, 0))) {
        error = "orcm_cfgi_base_open";
        goto error;
    }
    if (ORCM_SUCCESS != (ret = orcm_cfgi_base_select())) {
        error = "orcm_cfgi_select";
        goto error;
    }

    envar = getenv("ORCM_MCA_logical_group_config_file");
    if (ORCM_SUCCESS != (ret = orcm_logical_group_load_to_memory(envar))) {
        error = "orcm_logical_group_load_to_memory";
        goto error;
    }

    if (ORCM_SCHED == flags) {
        if (NULL == (envar = getenv("ORCM_MCA_event_exec_path"))) {
            asprintf(&orcm_event_exec_path, "%s/bin", opal_install_dirs.prefix);
        } else {
            orcm_event_exec_path = strdup(envar);
        }
        if (NULL == orcm_event_exec_path) {
            error = "orcm_event_exec_path";
            goto error;
        }
    }

    /* everyone must open the sst framework */
    if (ORCM_SUCCESS != (ret = mca_base_framework_open(&orcm_sst_base_framework, 0))) {
        error = "orcm_sst_base_open";
        goto error;
    }
    if (ORCM_SUCCESS != (ret = orcm_sst_base_select())) {
        error = "orcm_sst_select";
        goto error;
    }

    /* open the ESS and select the correct module for this environment - the
     * orcm module is basically a no-op, but we need the framework defined
     * as other parts of ORTE will want to call it
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }

    /* initialize us - we will register the ORTE-level MCA params in there */
    if (ORTE_SUCCESS != (ret = orcm_sst.init())) {
        error = "orte_init";
        goto error;
    }

    /* setup the orte_show_help system - don't do this until the
     * end as otherwise show_help messages won't appear
     */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }

    /* initialize orcm datatype support */
    if (ORCM_SUCCESS != (ret = orcm_dt_init())) {
        error = "orcm_dt_init";
        goto error;
    }

    /* flag that orte is initialized so things can work */
    orte_initialized = true;
    orte_help_want_aggregate = false;

    return ORCM_SUCCESS;

 error:
    if (ORCM_ERR_SILENT != ret) {
        opal_show_help("help-orcm-runtime.txt",
                       "orcm_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Example #12
0
/*
 * Construct the fullpath to the session directory
 */
int
orte_session_dir_get_name(char **fulldirpath,
                          char **return_prefix,  /* This will come back as the valid tmp dir */
                          char **return_frontend,
                          char *hostid,
                          char *batchid, 
                          orte_process_name_t *proc) {
    char *hostname  = NULL, 
        *batchname = NULL,
        *sessions  = NULL, 
        *user      = NULL, 
        *prefix = NULL,
        *frontend = NULL,
        *jobfam = NULL,
        *job = NULL,
        *vpidstr = NULL;
    bool prefix_provided = false;
    int exit_status = ORTE_SUCCESS;
    size_t len;
    int uid;
    struct passwd *pwdent;
    
    /* Ensure that system info is set */
    orte_proc_info();

     /* get the name of the user */
    uid = getuid();
#ifdef HAVE_GETPWUID
    pwdent = getpwuid(uid);
#else
    pwdent = NULL;
#endif
    if (NULL != pwdent) {
        user = strdup(pwdent->pw_name);
    } else {
        orte_show_help("help-orte-runtime.txt",
                       "orte:session:dir:nopwname", true);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }
    
    /*
     * set the 'hostname'
     */
    if( NULL != hostid) { /* User specified version */
        hostname = strdup(hostid);
    }
    else {            /* check if it is set elsewhere */
        if( NULL != orte_process_info.nodename)
            hostname = strdup(orte_process_info.nodename);
        else {
            /* Couldn't find it, so fail */
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            exit_status = ORTE_ERR_BAD_PARAM;
            goto cleanup;
        }
    }
    
    /*
     * set the 'batchid'
     */
    if (NULL != batchid)
        batchname = strdup(batchid);
    else 
        batchname = strdup("0");

    /*
     * get the front part of the session directory
     * Will look something like:
     *    openmpi-sessions-USERNAME@HOSTNAME_BATCHID
     */
    if (NULL != orte_process_info.top_session_dir) {
        frontend = strdup(orte_process_info.top_session_dir);
    }
    else { /* If not set then construct it */
        if (0 > asprintf(&frontend, "openmpi-sessions-%s@%s_%s", user, hostname, batchname)) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            exit_status = ORTE_ERR_OUT_OF_RESOURCE;
            goto cleanup;
        }
    }

    /*
     * Construct the session directory
     */
    /* If we were given a valid vpid then we can construct it fully into:
     *   openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID/VPID
     */
    if( NULL != proc) {
        if (ORTE_VPID_INVALID != proc->vpid) {
            
            if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            sessions = opal_os_path( false, frontend, jobfam, job, vpidstr, NULL );
            if( NULL == sessions ) {
                ORTE_ERROR_LOG(ORTE_ERROR);
                exit_status = ORTE_ERROR;
                goto cleanup;
            }
        }
        /* If we were given a valid jobid then we can construct it partially into:
         *   openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID
         */
        else if (ORTE_JOBID_INVALID != proc->jobid) {
            if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            sessions = opal_os_path( false, frontend, jobfam, job, NULL );
            if( NULL == sessions ) {
                ORTE_ERROR_LOG(ORTE_ERROR);
                exit_status = ORTE_ERROR;
                goto cleanup;
            }
        } /* if both are invalid */
        else {
            sessions = strdup(frontend); /* must dup this to avoid double-free later */
        }
        
    }    /* If we were not given a proc at all, then we just set it to frontend
     */
    else {
        sessions = strdup(frontend); /* must dup this to avoid double-free later */
    }
    
    /*
     * If the user specified an invalid prefix, or no prefix at all
     * we need to keep looking
     */
    if( NULL != fulldirpath && NULL != *fulldirpath) {
        free(*fulldirpath);
        *fulldirpath = NULL;
    }

    if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */ 
        prefix = strdup(*return_prefix); 
        prefix_provided = true;
    }
    /* Try to find a proper alternative prefix */
    else if (NULL != orte_process_info.tmpdir_base) { /* stored value */
        prefix = strdup(orte_process_info.tmpdir_base);
    }
    else { /* General Environment var */
        prefix = strdup(opal_tmp_directory());
    }
    len = strlen(prefix);
    /* check for a trailing path separator */
    if (OPAL_PATH_SEP[0] == prefix[len-1]) {
        prefix[len-1] = '\0';
    }
    
    /* BEFORE doing anything else, check to see if this prefix is
     * allowed by the system
     */
    if (NULL != orte_prohibited_session_dirs) {
        char **list;
        int i, len;
        /* break the string into tokens - it should be
         * separated by ','
         */
        list = opal_argv_split(orte_prohibited_session_dirs, ',');
        len = opal_argv_count(list);
        /* cycle through the list */
        for (i=0; i < len; i++) {
            /* check if prefix matches */
            if (0 == strncmp(prefix, list[i], strlen(list[i]))) {
                /* this is a prohibited location */
                orte_show_help("help-orte-runtime.txt",
                               "orte:session:dir:prohibited",
                               true, prefix, orte_prohibited_session_dirs);
                return ORTE_ERR_FATAL;
            }
        }
        opal_argv_free(list);  /* done with this */
    }
    /*
     * Construct the absolute final path, if requested
     */
    if (NULL != fulldirpath) {
        *fulldirpath = opal_os_path(false, prefix, sessions, NULL);
    }

    /* 
     * Return the frontend and prefix, if user requested we do so 
     */ 
    if (NULL != return_frontend) { 
        *return_frontend = strdup(frontend); 
    } 
    if (!prefix_provided && NULL != return_prefix) { 
        *return_prefix = strdup(prefix); 
    } 

 cleanup:
    if(NULL != hostname)
        free(hostname);
    if(NULL != batchname)
        free(batchname);
    if(NULL != sessions)
        free(sessions);
    if(NULL != user)
        free(user);
    if (NULL != prefix) free(prefix);
    if (NULL != frontend) free(frontend);
    if (NULL != jobfam) free(jobfam);
    if (NULL != job) free(job);
    if (NULL != vpidstr) free(vpidstr);

    return exit_status;
}
Example #13
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (0 < orte_initialized) {
        /* track number of times we have been called */
        orte_initialized++;
        return ORTE_SUCCESS;
    }
    orte_initialized++;

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        error = "opal_init";
        goto error;
    }
    
    /* Convince OPAL to use our naming scheme */
    opal_process_name_print = _process_name_print_for_opal;
    opal_process_name_vpid = _process_name_vpid_for_opal;
    opal_process_name_jobid = _process_name_jobid_for_opal;
    opal_compare_proc = _process_name_compare;

    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }
    
    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }
    
    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }
    
    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }

    if (!ORTE_PROC_IS_APP) {
        /* ORTE tools "block" in their own loop over the event
         * base, so no progress thread is required - apps will
         * start their progress thread in ess_base_std_app.c
         * at the appropriate point
         */
        orte_event_base = opal_event_base;
    }

    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }
    
    /* All done */
    return ORTE_SUCCESS;
    
 error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Example #14
0
int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
    int ret;
    char *error = NULL;

    if (0 < orte_initialized) {
        /* track number of times we have been called */
        orte_initialized++;
        return ORTE_SUCCESS;
    }
    orte_initialized++;

    /* initialize the opal layer */
    if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
        error = "opal_init";
        goto error;
    }

    /* ensure we know the type of proc for when we finalize */
    orte_process_info.proc_type = flags;

    /* setup the locks */
    if (ORTE_SUCCESS != (ret = orte_locks_init())) {
        error = "orte_locks_init";
        goto error;
    }

    /* Register all MCA Params */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        error = "orte_register_params";
        goto error;
    }

    /* setup the orte_show_help system */
    if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
        error = "opal_output_init";
        goto error;
    }

    /* register handler for errnum -> string conversion */
    opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);

    /* Ensure the rest of the process info structure is initialized */
    if (ORTE_SUCCESS != (ret = orte_proc_info())) {
        error = "orte_proc_info";
        goto error;
    }

    /* open the ESS and select the correct module for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess_base_open())) {
        error = "orte_ess_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
        error = "orte_ess_base_select";
        goto error;
    }

#if ORTE_ENABLE_PROGRESS_THREADS
#if OPAL_EVENT_HAVE_THREAD_SUPPORT
    /* get a separate orte event base */
    orte_event_base = opal_event_base_create();
    /* construct the thread object */
    OBJ_CONSTRUCT(&orte_progress_thread, opal_thread_t);
    /* fork off a thread to progress it */
    orte_progress_thread.t_run = orte_progress_thread_engine;
    if (OPAL_SUCCESS != (ret = opal_thread_start(&orte_progress_thread))) {
        error = "orte progress thread start";
        goto error;
    }
#else
    error = "event thread support is not configured";
    ret = ORTE_ERROR;
    goto error;
#endif
#else
    /* set the event base to the opal one */
    orte_event_base = opal_event_base;
#endif

    /* initialize the RTE for this environment */
    if (ORTE_SUCCESS != (ret = orte_ess.init())) {
        error = "orte_ess_init";
        goto error;
    }

    /* All done */
    return ORTE_SUCCESS;

error:
    if (ORTE_ERR_SILENT != ret) {
        orte_show_help("help-orte-runtime",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}