Example #1
0
/**
  * component open/close/init function
  */
static int orte_sensor_file_open(void)
{
    mca_base_component_t *c = &mca_sensor_file_component.super.base_version;
    int tmp;

    /* lookup parameters */
    mca_base_param_reg_string(c, "filename",
                           "File to be monitored",
                           false, false, NULL, &mca_sensor_file_component.file);

    mca_base_param_reg_int(c, "check_size",
                           "Check the file size",
                           false, false, false, &tmp);
    mca_sensor_file_component.check_size = OPAL_INT_TO_BOOL(tmp);
    
    mca_base_param_reg_int(c, "check_access",
                           "Check access time",
                           false, false, false, &tmp);
    mca_sensor_file_component.check_access = OPAL_INT_TO_BOOL(tmp);

    mca_base_param_reg_int(c, "check_mod",
                           "Check modification time",
                           false, false, false, &tmp);
    mca_sensor_file_component.check_mod = OPAL_INT_TO_BOOL(tmp);

    mca_base_param_reg_int(c, "limit",
                           "Number of times the sensor can detect no motion before declaring error (default=3)",
                           false, false, 3, &mca_sensor_file_component.limit);
    
    return ORTE_SUCCESS;
}
/*
 * Register some paffinity-wide MCA params
 */
int opal_paffinity_base_register_params(void)
{
    int value, id;
    static int been_here = 0;

    /* We may get called twice; be harmless in that case. */
    if (1 == been_here) {
        return OPAL_SUCCESS;
    }
    been_here = 1;

    /* Debugging / verbose output */

    mca_base_param_reg_int_name("paffinity", "base_verbose", 
                                "Verbosity level of the paffinity framework",
                                false, false,
                                0, &value);
    if (0 != value) {
        opal_paffinity_base_output = opal_output_open(NULL);
    } else {
        opal_paffinity_base_output = -1;
    }

    id = mca_base_param_reg_int_name("opal", "paffinity_alone", 
                                "If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
                                false, false,
                                0, NULL);
    /* register the historical mpi_paffinity_alone synonym, but don't
     * declare it deprecated so we don't scare the users.
     *
     * Yes, this breaks the abstraction barrier, but as indicated
     * on the developer list....live with it. :-)
     */
    mca_base_param_reg_syn_name(id, "mpi", "paffinity_alone", false);
    mca_base_param_lookup_int(id, &value);
    opal_paffinity_alone = OPAL_INT_TO_BOOL(value);

    mca_base_param_reg_int_name("paffinity", "base_bound",
                                "Process affinity was set by an external entity",
                                true, false,
                                false, &value);
    opal_paffinity_base_bound = OPAL_INT_TO_BOOL(value);

    mca_base_param_reg_string_name("paffinity", "base_applied_binding",
                                   "Process affinity was set by an external entity",
                                   true, false,
                                   NULL, &opal_paffinity_base_applied_binding);

    return OPAL_SUCCESS;
}
static int
mmap_register(void)
{
    int value;

    /*
     * Do we want the "warning: your mmap file is on NFS!" message?  Per a
     * thread on the OMPI devel list
     * (http://www.open-mpi.org/community/lists/devel/2011/12/10054.php),
     * on some systems, it doesn't seem to matter.  But per older threads,
     * it definitely does matter on some systems.  Perhaps newer kernels
     * are smarter about this kind of stuff...?  Regardless, we should
     * provide the ability to turn off this message for systems where the
     * effect doesn't matter.
     */
    mca_base_param_reg_int(&mca_shmem_mmap_component.super.base_version,
                           "enable_nfs_warning",
                           "Enable the warning emitted when Open MPI detects "
                           "that its shared memory backing file is located on "
                           "a network filesystem (1 = enabled, 0 = disabled).",
                           false, false,
                           (int)true, &value);

    opal_shmem_mmap_nfs_warning = OPAL_INT_TO_BOOL(value);

    return OPAL_SUCCESS;
}
static int snapc_full_open(void) 
{
    int value;

    /*
     * This should be the last componet to ever get used since
     * it doesn't do anything.
     */
    mca_base_param_reg_int(&mca_snapc_full_component.super.base_version,
                           "priority",
                           "Priority of the SNAPC full component",
                           false, false,
                           mca_snapc_full_component.super.priority,
                           &mca_snapc_full_component.super.priority);
    
    mca_base_param_reg_int(&mca_snapc_full_component.super.base_version,
                           "verbose",
                           "Verbose level for the SNAPC full component",
                           false, false,
                           mca_snapc_full_component.super.verbose, 
                           &mca_snapc_full_component.super.verbose);
    /* If there is a custom verbose level for this component than use it
     * otherwise take our parents level and output channel
     */
    if ( 0 != mca_snapc_full_component.super.verbose) {
        mca_snapc_full_component.super.output_handle = opal_output_open(NULL);
        opal_output_set_verbosity(mca_snapc_full_component.super.output_handle,
                                  mca_snapc_full_component.super.verbose);
    } else {
        mca_snapc_full_component.super.output_handle = orte_snapc_base_output;
    }

    mca_base_param_reg_int(&mca_snapc_full_component.super.base_version,
                           "skip_filem",
                           "Not for general use! For debugging only! Pretend to move files. [Default = disabled]",
                           false, false,
                           0,
                           &value);
    orte_snapc_full_skip_filem = OPAL_INT_TO_BOOL(value);

    /*
     * Debug Output
     */
    opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
                        "snapc:full: open()");
    opal_output_verbose(20, mca_snapc_full_component.super.output_handle,
                        "snapc:full: open: priority    = %d", 
                        mca_snapc_full_component.super.priority);
    opal_output_verbose(20, mca_snapc_full_component.super.output_handle,
                        "snapc:full: open: verbosity   = %d", 
                        mca_snapc_full_component.super.verbose);
    opal_output_verbose(20, mca_snapc_full_component.super.output_handle,
                        "snapc:full: open: skip_filem  = %s", 
                        (orte_snapc_full_skip_filem == true ? "True" : "False"));

    return ORTE_SUCCESS;
}
int opal_crs_base_select(void)
{
    int ret, exit_status = OPAL_SUCCESS;
    opal_crs_base_component_t *best_component = NULL;
    opal_crs_base_module_t *best_module = NULL;
    int int_value = 0;

    /*
     * Note: If we are a tool, then we will manually run the selection routine 
     *       for the checkpointer.  The tool will set the MCA parameter 
     *       'crs_base_do_not_select' before opal_init and then reset it after to 
     *       disable the selection logic.
     *       This is useful for opal_restart because it reads the metadata file
     *       that indicates the checkpointer to be used after calling opal_init.
     *       Therefore it would need to select a specific module, but it doesn't
     *       know which one until later. It will set the MCA parameter 'crs' 
     *       before calling this function.
     */
    mca_base_param_reg_int_name("crs", 
                                "base_do_not_select",
                                "Do not do the selection of the CRS component",
                                true, false,
                                false, 
                                &int_value);
    if( OPAL_INT_TO_BOOL(int_value) ) {
        opal_output_verbose(10, opal_crs_base_output,
                            "crs:select: Not selecting at this time!");
        return OPAL_SUCCESS;
    }

    /*
     * Select the best component
     */
    if( OPAL_SUCCESS != mca_base_select("crs", opal_crs_base_output,
                                        &opal_crs_base_components_available,
                                        (mca_base_module_t **) &best_module,
                                        (mca_base_component_t **) &best_component) ) {
        /* This will only happen if no component was selected */
        exit_status = OPAL_ERROR;
        goto cleanup;
    }

    /* Save the winner */
    opal_crs_base_selected_component = *best_component;
    opal_crs = *best_module;

    /* Initialize the winner */
    if (NULL != best_module) {
        if (OPAL_SUCCESS != (ret = opal_crs.crs_init()) ) {
            exit_status = ret;
            goto cleanup;
        }
    }

 cleanup:
    return exit_status;
}
static int state_novm_open(void) 
{
    int tmp;
    mca_base_component_t *c=&mca_state_novm_component.base_version;

    mca_base_param_reg_int(c, "select",
                           "Use this component",
                           false, false, (int)false, &tmp);
    select_me = OPAL_INT_TO_BOOL(tmp);
    return ORTE_SUCCESS;
}
static int crs_blcr_open(void)
{
    int value;

    mca_base_param_reg_int(&mca_crs_blcr_component.super.base_version,
                           "priority",
                           "Priority of the CRS blcr component",
                           false, false,
                           mca_crs_blcr_component.super.priority,
                           &mca_crs_blcr_component.super.priority);

    mca_base_param_reg_int(&mca_crs_blcr_component.super.base_version,
                           "verbose",
                           "Verbose level for the CRS blcr component",
                           false, false,
                           mca_crs_blcr_component.super.verbose,
                           &mca_crs_blcr_component.super.verbose);
    /* If there is a custom verbose level for this component than use it
     * otherwise take our parents level and output channel
     */
    if ( 0 != mca_crs_blcr_component.super.verbose) {
        mca_crs_blcr_component.super.output_handle = opal_output_open(NULL);
        opal_output_set_verbosity(mca_crs_blcr_component.super.output_handle,
                                  mca_crs_blcr_component.super.verbose);
    } else {
        mca_crs_blcr_component.super.output_handle = opal_crs_base_output;
    }

    mca_base_param_reg_int(&mca_crs_blcr_component.super.base_version,
                           "dev_null",
                           "Not for general use! For debugging only! Save checkpoint to /dev/null. [Default = disabled]",
                           false, false,
                           0,
                           &value);
    opal_crs_blcr_dev_null = OPAL_INT_TO_BOOL(value);

    /*
     * Debug output
     */
    opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: open()");
    opal_output_verbose(20, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: open: priority = %d",
                        mca_crs_blcr_component.super.priority);
    opal_output_verbose(20, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: open: verbosity = %d",
                        mca_crs_blcr_component.super.verbose);
    opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: open: dev_null = %s",
                        (opal_crs_blcr_dev_null == true ? "True" : "False"));

    return OPAL_SUCCESS;
}
/**
 * Function for finding and opening either all MCA components, or the one
 * that was specifically requested via a MCA parameter.
 */
int mca_btl_base_open(void)
{
    int i;
    if( ++mca_btl_base_already_opened > 1 ) return OMPI_SUCCESS;

    /* Verbose output */
    mca_base_param_reg_int_name("btl", 
                                "base_verbose", 
                                "Verbosity level of the BTL framework", 
                                false, false, 
                                0, 
                                &mca_btl_base_verbose);

    mca_btl_base_output = opal_output_open(NULL);
    opal_output_set_verbosity(mca_btl_base_output, mca_btl_base_verbose);

    /* Override the per-BTL "don't run if THREAD_MULTIPLE selected"
       embargo? */
    mca_base_param_reg_int_name("btl", 
                                "base_thread_multiple_override", 
                                "Enable BTLs that are not normally enabled when MPI_THREAD_MULTIPLE is enabled (THIS IS FOR DEVELOPERS ONLY!  SHOULD NOT BE USED BY END USERS!)",
                                true, false, 
                                0, &i);
    mca_btl_base_thread_multiple_override = OPAL_INT_TO_BOOL(i);

  /* Open up all available components */
    
  if (OMPI_SUCCESS != 
      mca_base_components_open("btl", mca_btl_base_output, mca_btl_base_static_components,
                               &mca_btl_base_components_opened, true)) {
    return OMPI_ERROR;
  }

  /* Initialize the list so that in mca_btl_base_close(), we can
     iterate over it (even if it's empty, as in the case of
     ompi_info) */

  OBJ_CONSTRUCT(&mca_btl_base_modules_initialized, opal_list_t);

  /* register parameters */
  mca_base_param_lookup_string(
      mca_base_param_register_string("btl","base","include",NULL,NULL), &mca_btl_base_include);
  mca_base_param_lookup_string(
      mca_base_param_register_string("btl","base","exclude",NULL,NULL), &mca_btl_base_exclude);
  mca_base_param_reg_int_name("btl", "base_warn_component_unused",
      "This parameter is used to turn on warning messages when certain NICs are not used",
      false, false, 1, &mca_btl_base_warn_component_unused);

  /* All done */
  return OMPI_SUCCESS;
}
Example #9
0
/**
 * Function for finding and opening either all MCA components, or the one
 * that was specifically requested via a MCA parameter.
 */
int orte_debugger_base_open(void)
{
    int value;

    /* Debugging / verbose output.  Always have stream open, with
       verbose set by the mca open system... */
    orte_debugger_base.output = opal_output_open(NULL);

    mca_base_param_reg_int_name("orte",
                                "output_debugger_proctable",
                                "Whether or not to output the debugger proctable after launch (default: false)",
                                true, false, 0, &value);
    orte_debugger_base.dump_proctable = OPAL_INT_TO_BOOL(value);

    mca_base_param_reg_string_name("orte", "debugger_test_daemon",
                                   "Name of the executable to be used to simulate a debugger colaunch (relative or absolute path)",
                                   false, false, NULL, &orte_debugger_base.test_daemon);

    mca_base_param_reg_int_name("orte",
                                "debugger_test_attach",
                                "Test debugger colaunch after debugger attachment",
                                false, false, 0, &value);
    orte_debugger_base.test_attach = OPAL_INT_TO_BOOL(value);
    
    /* Open up all available components */

    if (ORTE_SUCCESS !=
        mca_base_components_open("debugger", orte_debugger_base.output,
                                 mca_debugger_base_static_components,
                                 &orte_debugger_base_components_available, 
                                 true)) {
        return ORTE_ERROR;
    }

    /* All done */

    return ORTE_SUCCESS;
}
int mca_bml_r2_component_open(void)
{
    int tmp;

    mca_base_param_reg_int(&mca_bml_r2_component.bml_version,
                           "show_unreach_errors",
                           "Show error message when procs are unreachable",
                           false,
                           false,
                           1,
                           &tmp);
    mca_bml_r2.show_unreach_errors = OPAL_INT_TO_BOOL(tmp);


    return OMPI_SUCCESS; 
}
Example #11
0
int opal_cr_refresh_environ(int prev_pid) {
    int val;
    char *file_name = NULL;
    struct stat file_status;

    if( 0 >= prev_pid ) {
        prev_pid = getpid();
    }

    /*
     * Make sure the file exists. If it doesn't then this means 2 things:
     *  1) We have already executed this function, and
     *  2) The file has been deleted on the previous round.
     */
    asprintf(&file_name, "%s/%s-%d", opal_tmp_directory(), OPAL_CR_BASE_ENV_NAME, prev_pid);
    if(0 != stat(file_name, &file_status) ){
        return OPAL_SUCCESS;
    }

#if OPAL_ENABLE_CRDEBUG == 1
    opal_unsetenv(mca_base_param_env_var("opal_cr_enable_crdebug"), &environ);
#endif

    extract_env_vars(prev_pid, file_name);

#if OPAL_ENABLE_CRDEBUG == 1
    mca_base_param_reg_int_name("opal_cr", "enable_crdebug",
                                "Enable checkpoint/restart debugging",
                                false, false,
                                0,
                                &val);
    MPIR_debug_with_checkpoint = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: C/R Debugging Enabled [%s] (refresh)\n",
                        (MPIR_debug_with_checkpoint ? "True": "False"));
#else
    val = 0; /* Silence Compiler warning */
#endif

    if( NULL != file_name ){
        free(file_name);
        file_name = NULL;
    }

    return OPAL_SUCCESS;
}
static int test_open(void)
{
    int tmp;
    
    mca_base_param_reg_int(&mca_paffinity_test_component.super.base_version, "bound",
                           "Whether or not to test as if externally bound (default=0: no)",
                           false, false, (int)false, &tmp);
    mca_paffinity_test_component.bound = OPAL_INT_TO_BOOL(tmp);
    
    mca_base_param_reg_int(&mca_paffinity_test_component.super.base_version, "num_sockets",
                           "Number of sockets on each node (default=4)",
                           false, false, 4, &mca_paffinity_test_component.num_sockets);

    mca_base_param_reg_int(&mca_paffinity_test_component.super.base_version, "num_cores",
                           "Number of cores in each socket (default=4)",
                           false, false, 4, &mca_paffinity_test_component.num_cores);
    return OPAL_SUCCESS;
}
Example #13
0
int orte_proc_info(void)
{

    int id, tmp;
    
    /* all other params are set elsewhere */
    
    id = mca_base_param_register_int("seed", NULL, NULL, NULL, orte_process_info.seed);
    mca_base_param_lookup_int(id, &tmp);
    orte_process_info.seed = OPAL_INT_TO_BOOL(tmp);
    /* if we are a seed, then make sure the daemon flag is NOT set so that
     * framework components are properly selected
     */
    if (orte_process_info.seed) {
        orte_process_info.daemon = false;
    }

    id = mca_base_param_register_int("orte", "app", "num", NULL, -1);
    mca_base_param_lookup_int(id, &tmp);
    orte_process_info.app_num = tmp;

    id = mca_base_param_register_string("gpr", "replica", "uri", NULL, orte_process_info.gpr_replica_uri);
    mca_base_param_lookup_string(id, &(orte_process_info.gpr_replica_uri));
    mca_base_param_set_internal(id, true);

    id = mca_base_param_register_string("ns", "replica", "uri", NULL, orte_process_info.ns_replica_uri);
    mca_base_param_lookup_string(id, &(orte_process_info.ns_replica_uri));
    mca_base_param_set_internal(id, true);

    id = mca_base_param_register_string("tmpdir", "base", NULL, NULL, orte_process_info.tmpdir_base);
    mca_base_param_lookup_string(id, &(orte_process_info.tmpdir_base));

    /* get the process id */
    orte_process_info.pid = getpid();

    return ORTE_SUCCESS;
}
Example #14
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    mca_base_component_t *c = &mca_sensor_file_component.super.base_version;
    opal_list_item_t *item;
    orte_odls_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int rc, tmp;
    char *filename;
    file_tracker_t *ft;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));
    
    /* get the local jobdat for this job */
    for (item = opal_list_get_first(&orte_local_jobdata);
         item != opal_list_get_end(&orte_local_jobdata);
         item = opal_list_get_end(&orte_local_jobdata)) {
        jobdat = (orte_odls_job_t*)item;
        if (jobid == jobdat->jobid || ORTE_JOBID_WILDCARD == jobid) {
            /* must be at least one app_context, so use the first one found */
            app = NULL;
            for (tmp=0; tmp < jobdat->apps.size; tmp++) {
                if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(&jobdat->apps, tmp))) {
                    app = aptr;
                    break;
                }
            }
            if (NULL == app) {
                /* got a problem */
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                continue;
            }
            
            /* search the environ to get the filename */
            if (ORTE_SUCCESS != (rc = mca_base_param_find_string(c, "filename", app->env, &filename))) {
                /* was a default file given */
                if (NULL == mca_sensor_file_component.file) {
                    /* can't do anything without a file */
                    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                         "%s sensor:file no file for job %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_JOBID_PRINT(jobid)));
                    continue;
                }
                filename = mca_sensor_file_component.file;
            }
            
            /* create the tracking object */
            ft = OBJ_NEW(file_tracker_t);
            ft->jobid = jobid;
            ft->file = strdup(filename);
            
            /* search the environ to see what we are checking */
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_size", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_size) {
                    ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
                }
            } else {
                ft->check_size = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_access", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_access) {
                    ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
                }
            } else {
                ft->check_access = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_mod", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_mod) {
                    ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
                }
            } else {
                ft->check_mod = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "limit", app->env, &tmp))) {
                ft->limit = mca_sensor_file_component.limit;
            } else {
                ft->limit = tmp;
            }
            opal_list_append(&jobs, &ft->super);
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s file %s monitored for %s%s%s with limit %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ft->file, ft->check_size ? "SIZE:" : " ",
                                 ft->check_access ? "ACCESS TIME:" : " ",
                                 ft->check_mod ? "MOD TIME" : " ", ft->limit));
        }
    }
    
    /* start sampling */
    if (NULL == sample_ev && !opal_list_is_empty(&jobs)) {
        /* startup a timer to wake us up periodically
         * for a data sample
         */
        sample_ev =  (opal_event_t *) malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(opal_event_base, sample_ev, sample, sample_ev);
        sample_time.tv_sec = mca_sensor_file_component.sample_rate;
        sample_time.tv_usec = 0;
        opal_event_evtimer_add(sample_ev, &sample_time);
    }
    return;
}
Example #15
0
#include "opal/runtime/opal_params.h"
#include "opal/dss/dss.h"
#include "opal/util/show_help.h"
#include "opal/util/timings.h"

char *opal_signal_string = NULL;
char *opal_net_private_ipv4 = NULL;
char *opal_set_max_sys_limits = NULL;

#if OPAL_ENABLE_TIMING
char *opal_timing_sync_file = NULL;
char *opal_timing_output = NULL;
bool opal_timing_overhead = true;
#endif

bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
bool opal_cuda_support = false;
#if OPAL_ENABLE_FT_CR == 1
bool opal_base_distill_checkpoint_ready = false;
#endif

/**
 * Globals imported from the OMPI layer.
 */
int opal_leave_pinned = -1;
bool opal_leave_pinned_pipeline = false;
bool opal_abort_print_stack = false;
int opal_abort_delay = 0;

static bool opal_register_done = false;
Example #16
0
 * included in all MPI function imlementation files
 *
 * The values below are the default values.
 */
bool ompi_mpi_param_check = true;
bool ompi_debug_show_handle_leaks = false;
int ompi_debug_show_mpi_alloc_mem_leaks = 0;
bool ompi_debug_no_free_handles = false;
bool ompi_mpi_show_mca_params = false;
char *ompi_mpi_show_mca_params_file = NULL;
bool ompi_mpi_abort_print_stack = false;
int ompi_mpi_abort_delay = 0;
bool ompi_mpi_keep_fqdn_hostnames = false;
int ompi_mpi_leave_pinned = -1;
bool ompi_mpi_leave_pinned_pipeline = false;
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_mpi_built_with_cuda_support = OPAL_INT_TO_BOOL(OMPI_CUDA_SUPPORT);
bool ompi_mpi_cuda_support;

uint32_t ompi_hostname_cutoff = UINT32_MAX;
bool ompi_mpi_yield_when_idle = true;
int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false;

static bool show_default_mca_params = false;
static bool show_file_mca_params = false;
static bool show_enviro_mca_params = false;
static bool show_override_mca_params = false;
/**
 * Function for finding and opening either all MCA components, or the
 * one that was specifically requested via a MCA parameter.
 */
int mca_base_components_open(const char *type_name, int output_id,
                             const mca_base_component_t **static_components,
                             opal_list_t *components_available,
                             bool open_dso_components)
{
    int ret, param;
    opal_list_item_t *item;
    opal_list_t components_found;
    char **requested_component_names;
    int param_verbose = -1;
    int param_type = -1;
    int verbose_level;
    char *str;
    bool include_mode;
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
    opal_list_item_t *next;
    uint32_t open_only_flags = MCA_BASE_METADATA_PARAM_NONE;
    const mca_base_component_t *component;
#endif

    /* Register MCA parameters */
    /* Check to see if it exists first */
    if( 0 > (param_type = mca_base_param_find(type_name, NULL, NULL) ) ) {
        asprintf(&str, "Default selection set of components for the %s framework (<none>"
                 " means use all components that can be found)", type_name);
        param_type =
            mca_base_param_reg_string_name(type_name, NULL, str,
                                           false, false, NULL, NULL);
        free(str);
    }

    param = mca_base_param_find("mca", NULL, "component_show_load_errors");
    mca_base_param_lookup_int(param, &ret);
    show_errors = OPAL_INT_TO_BOOL(ret);

    /* Setup verbosity for this MCA type */
    asprintf(&str, "Verbosity level for the %s framework (0 = no verbosity)", type_name);
    param_verbose =
        mca_base_param_reg_int_name(type_name, "base_verbose",
                                    str, false, false, 0, NULL);
    free(str);
    mca_base_param_lookup_int(param_verbose, &verbose_level);
    if (output_id != 0) {
        opal_output_set_verbosity(output_id, verbose_level);
    }
    opal_output_verbose(10, output_id,
                        "mca: base: components_open: Looking for %s components",
                        type_name);

    ret = parse_requested(param_type, &include_mode, &requested_component_names);
    if( OPAL_SUCCESS != ret ) {
        return ret;
    }

    /* Find and load requested components */
    if (OPAL_SUCCESS != (ret =
                             mca_base_component_find(NULL, type_name, static_components,
                                     requested_component_names, include_mode,
                                     &components_found, open_dso_components)) ) {
        return ret;
    }

#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
    {
        int param_id = -1;
        int param_val = 0;
        /*
         * Extract supported mca parameters for selection contraints
         * Supported Options:
         *   - mca_base_component_distill_checkpoint_ready = Checkpoint Ready
         */
        param_id = mca_base_param_reg_int_name("mca", "base_component_distill_checkpoint_ready",
                                               "Distill only those components that are Checkpoint Ready",
                                               false, false,
                                               0, &param_val);
        if( 0 != param_val ) { /* Select Checkpoint Ready */
            open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT;
        }
    }
#endif  /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */

    /*
     * Pre-process the list with parameter constraints
     * e.g., If requested to select only CR enabled components
     *       then only make available those components.
     *
     * JJH Note: Currently checkpoint/restart is the only user of this
     *           functionality. If other component constraint options are
     *           added, then this logic can be used for all contraint
     *           options.
     */
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
    if( !(MCA_BASE_METADATA_PARAM_NONE & open_only_flags) ) {
        if( MCA_BASE_METADATA_PARAM_CHECKPOINT & open_only_flags) {
            opal_output_verbose(10, output_id,
                                "mca: base: components_open: "
                                "including only %s components that are checkpoint enabled", type_name);
        }

        /*
         * Check all the components to make sure they adhere to the user
         * expressed requirements.
         */
        for(item  = opal_list_get_first(&components_found);
                item != opal_list_get_end(&components_found);
                item  = next ) {
            mca_base_open_only_dummy_component_t *dummy;
            mca_base_component_list_item_t *cli = (mca_base_component_list_item_t *) item;
            dummy = (mca_base_open_only_dummy_component_t*) cli->cli_component;
            component = cli->cli_component;

            next = opal_list_get_next(item);

            /*
             * If the user asked for a checkpoint enabled run
             * then only load checkpoint enabled components.
             */
            if( MCA_BASE_METADATA_PARAM_CHECKPOINT & open_only_flags) {
                if( MCA_BASE_METADATA_PARAM_CHECKPOINT & dummy->data.param_field) {
                    opal_output_verbose(10, output_id,
                                        "mca: base: components_open: "
                                        "(%s) Component %s is Checkpointable",
                                        type_name,
                                        dummy->version.mca_component_name);
                }
                else {
                    opal_output_verbose(10, output_id,
                                        "mca: base: components_open: "
                                        "(%s) Component %s is *NOT* Checkpointable - Disabled",
                                        type_name,
                                        dummy->version.mca_component_name);
                    opal_list_remove_item(&components_found, item);
                    /* Make sure to release the component since we are not
                     * opening it */
                    mca_base_component_repository_release(component);
                }
            }
        }
    }
#endif  /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */

    /* Open all remaining components */
    ret = open_components(type_name, output_id,
                          &components_found, components_available);

    /* Free resources */
    for (item = opal_list_remove_first(&components_found); NULL != item;
            item = opal_list_remove_first(&components_found)) {
        OBJ_RELEASE(item);
    }
    OBJ_DESTRUCT(&components_found);

    if (NULL != requested_component_names) {
        opal_argv_free(requested_component_names);
    }

    /* All done */
    return ret;
}
int ompi_mpi_register_params(void)
{
    int value;
    char *param;

    /* Whether we want MPI API function parameter checking or not */

    mca_base_param_reg_int_name("mpi", "param_check", 
                                "Whether you want MPI API parameters checked at run-time or not.  Possible values are 0 (no checking) and 1 (perform checking at run-time)",
                                false, false, MPI_PARAM_CHECK, &value);
    ompi_mpi_param_check = OPAL_INT_TO_BOOL(value);
    if (ompi_mpi_param_check) {
        value = 0;
        if (MPI_PARAM_CHECK) {
            value = 1;
        }
        if (0 == value) {
            orte_show_help("help-mpi-runtime.txt", 
                           "mpi-param-check-enabled-but-compiled-out",
                           true);
            ompi_mpi_param_check = false;
        }
    }
    
    /*
     * opal_progress: decide whether to yield and the event library
     * tick rate
     */
    /* JMS: Need ORTE data here -- set this to 0 when
       exactly/under-subscribed, or 1 when oversubscribed */
    mca_base_param_reg_int_name("mpi", "yield_when_idle", 
                                "Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
                                false, false, -1, NULL);
    mca_base_param_reg_int_name("mpi", "event_tick_rate", 
                                "How often to progress TCP communications (0 = never, otherwise specified in microseconds)",
                                false, false, -1, NULL);

    /* Whether or not to show MPI handle leaks */
    
    mca_base_param_reg_int_name("mpi", "show_handle_leaks",
                                "Whether MPI_FINALIZE shows all MPI handles that were not freed or not",
                                false, false, 
                                (int) ompi_debug_show_handle_leaks, &value);
    ompi_debug_show_handle_leaks = OPAL_INT_TO_BOOL(value);
    
    /* Whether or not to free MPI handles.  Useless without run-time
       param checking, so implicitly set that to true if we don't want
       to free the handles. */
    
    mca_base_param_reg_int_name("mpi", "no_free_handles", 
                                "Whether to actually free MPI objects when their handles are freed",
                                false, false, 
                                (int) ompi_debug_no_free_handles, &value);
    ompi_debug_no_free_handles = OPAL_INT_TO_BOOL(value);
    if (ompi_debug_no_free_handles) {
        ompi_mpi_param_check = true;
        value = 0;
        if (MPI_PARAM_CHECK) {
            value = 1;
        }
        if (0 == value) {
            opal_output(0, "WARNING: MCA parameter mpi_no_free_handles set to true, but MPI");
            opal_output(0, "WARNING: parameter checking has been compiled out of Open MPI.");
            opal_output(0, "WARNING: mpi_no_free_handles is therefore only partially effective!");
        }
    }

    /* Whether or not to show MPI_ALLOC_MEM leaks */

    mca_base_param_reg_int_name("mpi", "show_mpi_alloc_mem_leaks",
                                "If >0, MPI_FINALIZE will show up to this many instances of memory allocated by MPI_ALLOC_MEM that was not freed by MPI_FREE_MEM",
                                false, false, 
                                ompi_debug_show_mpi_alloc_mem_leaks,
                                &ompi_debug_show_mpi_alloc_mem_leaks);

    /* Whether or not to print all MCA parameters in MPI_INIT */
    mca_base_param_reg_string_name("mpi", "show_mca_params",
                                   "Whether to show all MCA parameter values during MPI_INIT or not (good for reproducability of MPI jobs "
                                   "for debug purposes). Accepted values are all, default, file, api, and enviro - or a comma "
                                   "delimited combination of them",
                                   false, false, NULL,  &param);
    if (NULL != param) {
        char **args;
        int i;
        
        ompi_mpi_show_mca_params = true;
        args = opal_argv_split(param, ',');
        if (NULL == args) {
            opal_output(0, "WARNING: could not parse mpi_show_mca_params request - defaulting to show \"all\"");
            show_default_mca_params = true;
            show_file_mca_params = true;
            show_enviro_mca_params = true;
            show_override_mca_params = true;
        } else {
            for (i=0; NULL != args[i]; i++) {
                if (0 == strcasecmp(args[i], "all")  || 0 == strcmp(args[i], "1")) {
                    show_default_mca_params = true;
                    show_file_mca_params = true;
                    show_enviro_mca_params = true;
                    show_override_mca_params = true;
                } else if (0 == strcasecmp(args[i], "default")) {
                    show_default_mca_params = true;
                } else if (0 == strcasecmp(args[i], "file")) {
                    show_file_mca_params = true;
                } else if (0 == strcasecmp(args[i], "enviro") || 
                           0 == strcasecmp(args[i], "env")) {
                    show_enviro_mca_params = true;
                } else if (0 == strcasecmp(args[i], "api")) {
                    show_override_mca_params = true;
                }
            }
            opal_argv_free(args);
        }
    }

    /* File to use when dumping the parameters */
    mca_base_param_reg_string_name("mpi", "show_mca_params_file",
                                   "If mpi_show_mca_params is true, setting this string to a valid filename tells Open MPI to dump all the MCA parameter values into a file suitable for reading via the mca_param_files parameter (good for reproducability of MPI jobs)",
                                   false, false,
                                   "", &ompi_mpi_show_mca_params_file);
    
    /* User-level process pinning controls */

    /* Do we want to save hostnames for debugging messages?  This can
       eat quite a bit of memory... */

    mca_base_param_reg_int_name("mpi", "keep_peer_hostnames",
                                "If nonzero, save the string hostnames of all MPI peer processes (mostly for error / debugging output messages).  This can add quite a bit of memory usage to each MPI process.",
                                false, false, 1, &value);
    ompi_mpi_keep_peer_hostnames = OPAL_INT_TO_BOOL(value);

    /* MPI_ABORT controls */

    mca_base_param_reg_int_name("mpi", "abort_delay",
                                "If nonzero, print out an identifying message when MPI_ABORT is invoked (hostname, PID of the process that called MPI_ABORT) and delay for that many seconds before exiting (a negative delay value means to never abort).  This allows attaching of a debugger before quitting the job.",
                                false, false, 
                                ompi_mpi_abort_delay,
                                &ompi_mpi_abort_delay);
    
    mca_base_param_reg_int_name("mpi", "abort_print_stack",
                                "If nonzero, print out a stack trace when MPI_ABORT is invoked",
                                false, 
                                /* If we do not have stack trace
                                   capability, make this a read-only
                                   MCA param */
#if OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) && defined(HAVE_BACKTRACE)
                                false, 
#else
                                true,
#endif
                                (int) ompi_mpi_abort_print_stack,
                                &value);
#if OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) && defined(HAVE_BACKTRACE)
    /* Only take the value if we have stack trace capability */
    ompi_mpi_abort_print_stack = OPAL_INT_TO_BOOL(value);
#else
    /* If we do not have stack trace capability, ensure that this is
       hard-coded to false */
    ompi_mpi_abort_print_stack = false;
#endif

    value = mca_base_param_reg_int_name("mpi", "preconnect_mpi",
                                        "Whether to force MPI processes to fully "
                                        "wire-up the MPI connections between MPI "
                                        "processes during "
                                        "MPI_INIT (vs. making connections lazily -- "
                                        "upon the first MPI traffic between each "
                                        "process peer pair)",
                                        false, false, 0, NULL);
    mca_base_param_reg_syn_name(value, "mpi", "preconnect_all", true);
    
    /* Leave pinned parameter */

    mca_base_param_reg_int_name("mpi", "leave_pinned",
                                "Whether to use the \"leave pinned\" protocol or not.  Enabling this setting can help bandwidth performance when repeatedly sending and receiving large messages with the same buffers over RDMA-based networks (0 = do not use \"leave pinned\" protocol, 1 = use \"leave pinned\" protocol, -1 = allow network to choose at runtime).",
                                false, false,
                                ompi_mpi_leave_pinned, &value);
    ompi_mpi_leave_pinned = (value >= 1) ? true: false;

    mca_base_param_reg_int_name("mpi", "leave_pinned_pipeline",
                                "Whether to use the \"leave pinned pipeline\" protocol or not.",
                                false, false,
                                (int) ompi_mpi_leave_pinned_pipeline, &value);
    ompi_mpi_leave_pinned_pipeline = OPAL_INT_TO_BOOL(value);
    
    if (ompi_mpi_leave_pinned && ompi_mpi_leave_pinned_pipeline) {
        ompi_mpi_leave_pinned_pipeline = 0;
        orte_show_help("help-mpi-runtime.txt", 
                       "mpi-params:leave-pinned-and-pipeline-selected",
                       true);
    }

    mca_base_param_reg_int_name("mpi", "warn_on_fork",
                                "If nonzero, issue a warning if program forks under conditions that could cause system errors",
                                false, false, 
                                (int) true, &value);
    ompi_warn_on_fork = OPAL_INT_TO_BOOL(value);
    
    /* Sparse group storage support */

    mca_base_param_reg_int_name("mpi", "have_sparse_group_storage", 
                                "Whether this Open MPI installation supports storing of data in MPI groups in \"sparse\" formats (good for extremely large process count MPI jobs that create many communicators/groups)",
                                false, true, (int) OMPI_GROUP_SPARSE, NULL);
    mca_base_param_reg_int_name("mpi", "use_sparse_group_storage", 
                                "Whether to use \"sparse\" storage formats for MPI groups (only relevant if mpi_have_sparse_group_storage is 1)",
                                false, false, OMPI_GROUP_SPARSE, &value);
    ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(value);
    if (ompi_use_sparse_group_storage) {
        value = 0;
        if (OMPI_GROUP_SPARSE) {
            value = 1;
        }
        if (0 == value) {
            orte_show_help("help-mpi-runtime.txt", 
                           "sparse groups enabled but compiled out",
                           true);
            ompi_use_sparse_group_storage = false;
        }
    }

    /* The ddt engine has a few parameters */
    return ompi_ddt_register_params();
}
Example #19
0
int opal_cr_init(void )
{
    int ret, exit_status = OPAL_SUCCESS;
    opal_cr_coord_callback_fn_t prev_coord_func;
    int val;

    if( ++opal_cr_initalized != 1 ) {
        if( opal_cr_initalized < 1 ) {
            exit_status = OPAL_ERROR;
            goto cleanup;
        }
        exit_status = OPAL_SUCCESS;
        goto cleanup;
    }

    /*
     * Some startup MCA parameters
     */
    ret = mca_base_param_reg_int_name("opal_cr", "verbose",
                                      "Verbose output level for the runtime OPAL Checkpoint/Restart functionality",
                                      false, false,
                                      0,
                                      &val);
    if(0 != val) {
        opal_cr_output = opal_output_open(NULL);
    } else {
        opal_cr_output = -1;
    }
    opal_output_set_verbosity(opal_cr_output, val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Verbose Level: %d",
                        val);

    mca_base_param_reg_int_name("ft", "cr_enabled",
                                "Enable fault tolerance for this program",
                                false, false,
                                0, &val);
    opal_cr_set_enabled(OPAL_INT_TO_BOOL(val));

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: FT Enabled: %d",
                        val);

    mca_base_param_reg_int_name("opal_cr", "enable_timer",
                                "Enable Checkpoint timer (Default: Disabled)",
                                false, false,
                                0, &val);
    opal_cr_timing_enabled = OPAL_INT_TO_BOOL(val);

    mca_base_param_reg_int_name("opal_cr", "enable_timer_barrier",
                                "Enable Checkpoint timer Barrier (Default: Disabled)",
                                false, false,
                                0, &val);
    if( opal_cr_timing_enabled ) {
        opal_cr_timing_barrier_enabled = OPAL_INT_TO_BOOL(val);
    } else {
        opal_cr_timing_barrier_enabled = false;
    }

    mca_base_param_reg_int_name("opal_cr", "timer_target_rank",
                                "Target Rank for the timer (Default: 0)",
                                false, false,
                                0, &val);
    opal_cr_timing_target_rank = val;

#if OPAL_ENABLE_FT_THREAD == 1
    mca_base_param_reg_int_name("opal_cr", "use_thread",
                                "Use an async thread to checkpoint this program (Default: Disabled)",
                                false, false,
                                0, &val);
    opal_cr_thread_use_if_avail = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: FT Use thread: %d",
                        val);

    mca_base_param_reg_int_name("opal_cr", "thread_sleep_check",
                                "Time to sleep between checking for a checkpoint (Default: 0)",
                                false, false,
                                0, &val);
    opal_cr_thread_sleep_check = val;

    mca_base_param_reg_int_name("opal_cr", "thread_sleep_wait",
                                "Time to sleep waiting for process to exit MPI library (Default: 0)",
                                false, false,
                                0, &val);
    opal_cr_thread_sleep_wait = val;

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: FT thread sleep: check = %d, wait = %d",
                        opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait);
#endif

    mca_base_param_reg_int_name("opal_cr", "is_tool",
                                "Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.",
                                false, false,
                                0,
                                &val);
    opal_cr_is_tool = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Is a tool program: %d",
                        val);
#ifndef __WINDOWS__
    mca_base_param_reg_int_name("opal_cr", "signal",
                                "Checkpoint/Restart signal used to initialize an OPAL Only checkpoint of a program",
                                false, false,
                                SIGUSR1,
                                &opal_cr_entry_point_signal);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Checkpoint Signal: %d",
                        opal_cr_entry_point_signal);

    mca_base_param_reg_int_name("opal_cr", "debug_sigpipe",
                                "Activate a signal handler for debugging SIGPIPE Errors that can happen on restart. (Default: Disabled)",
                                false, false,
                                0, &val);
    opal_cr_debug_sigpipe = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Debug SIGPIPE: %d (%s)",
                        val, (opal_cr_debug_sigpipe ? "True" : "False"));

#if OPAL_ENABLE_FT_THREAD == 1
    /* If we have a thread, then attach the SIGPIPE signal handler there since
     * it is most likely to be the one that needs it.
     */
    if( opal_cr_debug_sigpipe && !opal_cr_thread_use_if_avail ) {
        if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
            ;
        }
    }
#else
    if( opal_cr_debug_sigpipe ) {
        if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
            ;
        }
    }
#endif

#else
    opal_cr_is_tool = true;  /* no support for CR on Windows yet */ 
#endif  /* __WINDOWS__ */

    mca_base_param_reg_string_name("opal_cr", "tmp_dir",
                                   "Temporary directory to place rendezvous files for a checkpoint",
                                   false, false,
                                   "/tmp",
                                   &opal_cr_pipe_dir);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Temp Directory: %s",
                        opal_cr_pipe_dir);

    if( !opal_cr_is_tool ) {
        /* Register the OPAL interlevel coordination callback */
        opal_cr_reg_coord_callback(opal_cr_coord, &prev_coord_func);

        opal_cr_stall_check = false;
        opal_cr_currently_stalled = false;

    } /* End opal_cr_is_tool = true */

    /* 
     * If fault tolerance was not compiled in then
     * we need to make sure that the listener thread is active to tell
     * the tools that this is not a checkpointable job.
     * We don't need the CRS framework to be initalized.
     */
#if OPAL_ENABLE_FT    == 1
    /*
     * Open the checkpoint / restart service components
     */
    if (OPAL_SUCCESS != (ret = opal_crs_base_open())) {
        opal_output(opal_cr_output,
                    "opal_cr: init: opal_crs_base_open Failed to open. (%d)\n", ret);
        exit_status = ret;
        goto cleanup;
    }
    
    if (OPAL_SUCCESS != (ret = opal_crs_base_select())) {
        opal_output(opal_cr_output,
                    "opal_cr: init: opal_crs_base_select Failed. (%d)\n", ret);
        exit_status = ret;
        goto cleanup;
    }
#endif

#if OPAL_ENABLE_FT_THREAD == 1
    if( !opal_cr_is_tool && opal_cr_thread_use_if_avail) {
        opal_output_verbose(10, opal_cr_output,
                            "opal_cr: init: starting the thread\n");

        opal_set_using_threads(true);
        /*
         * Start the thread
         */
        OBJ_CONSTRUCT(&opal_cr_thread,     opal_thread_t);
        OBJ_CONSTRUCT(&opal_cr_thread_lock, opal_mutex_t);

        opal_cr_thread_is_done    = false;
        opal_cr_thread_is_active  = false;
        opal_cr_thread_in_library = false;
        opal_cr_thread_num_in_library = 0;

        opal_cr_thread.t_run = opal_cr_thread_fn;
        opal_cr_thread.t_arg = NULL;
        opal_thread_start(&opal_cr_thread);

    } /* End opal_cr_is_tool = true */
    else {
        opal_output_verbose(10, opal_cr_output,
                            "opal_cr: init: *Not* Using C/R thread\n");
    }
#endif /* OPAL_ENABLE_FT_THREAD == 1 */

 cleanup:
    return exit_status;
}
Example #20
0
int opal_register_params(void)
{
    int ret;

    /*
     * This string is going to be used in opal/util/stacktrace.c
     */
    {
        char *string = NULL;
        int j;
        int signals[] = {
#ifdef SIGABRT
            SIGABRT,
#endif
#ifdef SIGBUS
            SIGBUS,
#endif
#ifdef SIGFPE
            SIGFPE,
#endif
#ifdef SIGSEGV
            SIGSEGV,
#endif
            -1
        };
        for (j = 0 ; signals[j] != -1 ; ++j) {
            if (j == 0) {
                asprintf(&string, "%d", signals[j]);
            } else {
                char *tmp;
                asprintf(&tmp, "%s,%d", string, signals[j]);
                free(string);
                string = tmp;
            }
        }

        mca_base_param_reg_string_name("opal", "signal", 
                                       "Comma-delimited list of integer signal numbers to Open MPI to attempt to intercept.  Upon receipt of the intercepted signal, Open MPI will display a stack trace and abort.  Open MPI will *not* replace signals if handlers are already installed by the time MPI_INIT is invoked.  Optionally append \":complain\" to any signal number in the comma-delimited list to make Open MPI complain if it detects another signal handler (and therefore does not insert its own).",
                                       false, false, string, NULL);
        free(string);
    }

    {
        int j;

        mca_base_param_reg_int_name("opal", "profile", 
                                    "Set to non-zero to profile component selections",
                                    false, false, (int)false, &j);
        opal_profile = OPAL_INT_TO_BOOL(j);

        mca_base_param_reg_string_name("opal", "profile_file", 
                                       "Name of the file containing the cluster configuration information",
                                       false, false, NULL, &opal_profile_file);
    }
    
#if OPAL_ENABLE_DEBUG


    mca_base_param_reg_int_name("opal", "progress_debug", 
                                "Set to non-zero to debug progress engine features",
                                false, false, 0, NULL);

    {
        int value;
        mca_base_param_reg_int_name("opal", "debug_locks",
                                    "Debug mutex usage within Open MPI.  On a "
                                    "non-threaded build, this enables integer counters and "
                                    "warning messages when double-locks are detected.",
                                    false, false, 0, &value);
        if (value) opal_mutex_check_locks = true;
    }
#endif
    /* The ddt engine has a few parameters */
    ret = opal_datatype_register_params();
    if (OPAL_SUCCESS != ret) {
        return ret;
    }

    /* Paffinity base also has some parameters */
    return opal_paffinity_base_register_params();
}
Example #21
0
int opal_register_params(void)
{
    int value;

    /*
     * This string is going to be used in opal/util/stacktrace.c
     */
    {
        char *string = NULL;
        int j;
        int signals[] = {
#ifdef SIGABRT
            SIGABRT,
#endif
#ifdef SIGBUS
            SIGBUS,
#endif
#ifdef SIGFPE
            SIGFPE,
#endif
#ifdef SIGSEGV
            SIGSEGV,
#endif
            -1
        };
        for (j = 0 ; signals[j] != -1 ; ++j) {
            if (j == 0) {
                asprintf(&string, "%d", signals[j]);
            } else {
                char *tmp;
                asprintf(&tmp, "%s,%d", string, signals[j]);
                free(string);
                string = tmp;
            }
        }

        mca_base_param_reg_string_name("opal", "signal", 
                                       "Comma-delimited list of integer signal numbers to Open MPI to attempt to intercept.  Upon receipt of the intercepted signal, Open MPI will display a stack trace and abort.  Open MPI will *not* replace signals if handlers are already installed by the time MPI_INIT is invoked.  Optionally append \":complain\" to any signal number in the comma-delimited list to make Open MPI complain if it detects another signal handler (and therefore does not insert its own).",
                                       false, false, string, NULL);
        free(string);
    }

#if OMPI_ENABLE_DEBUG


    mca_base_param_reg_int_name("opal", "progress_debug", 
                                "Set to non-zero to debug progress engine features",
                                false, false, 0, NULL);

    {
        mca_base_param_reg_int_name("opal", "debug_locks",
                                    "Debug mutex usage within Open MPI.  On a "
                                    "non-threaded build, this enables integer counters and "
                                    "warning messages when double-locks are detected.",
                                    false, false, 0, &value);
        if (value) opal_mutex_check_locks = true;
    }
#endif

    /*
     * Do we want the "warning: your mmap file is on NFS!" message?  Per a
     * thread on the OMPI devel list
     * (http://www.open-mpi.org/community/lists/devel/2011/12/10054.php),
     * on some systems, it doesn't seem to matter.  But per older threads,
     * it definitely does matter on some systems.  Perhaps newer kernels
     * are smarter about this kind of stuff...?  Regardless, we should
     * provide the ability to turn off this message for systems where the
     * effect doesn't matter.
     *
     * v1.4.x-specific note: the MCA param name is "shmem_mmap_...",
     * where "shmem" is not a framework that exists in the v1.4
     * series.  This parameter was added right before 1.4.5, and at a
     * similar time as 1.5.5 (where the "shmem" framework *does*
     * exist).  The idea was to have a consistent MCA param name
     * starting with v1.4.5.  Hence, we put a slightly non-sensiscal
     * name here in v1.4.x so that we'd have a correct/good name
     * moving forward.
     */
    mca_base_param_reg_int_name("shmem",
                                "mmap_enable_nfs_warning", 
                                "Enable the warning emitted when Open MPI detects that its shared memory backing file is located on a network filesystem (1 = enabled, 0 = disabled).",
                                false, false,
                                (int)true, &value);
    opal_mmap_on_nfs_warning = OPAL_INT_TO_BOOL(value);

    /* Paffinity base also has some parameters */
    return opal_paffinity_base_register_params();
}
Example #22
0
int opal_cr_init(void )
{
    int ret, exit_status = OPAL_SUCCESS;
    opal_cr_coord_callback_fn_t prev_coord_func;
    int val, t;

    if( ++opal_cr_initalized != 1 ) {
        if( opal_cr_initalized < 1 ) {
            exit_status = OPAL_ERROR;
            goto cleanup;
        }
        exit_status = OPAL_SUCCESS;
        goto cleanup;
    }

    /*
     * Some startup MCA parameters
     */
    ret = mca_base_param_reg_int_name("opal_cr", "verbose",
                                      "Verbose output level for the runtime OPAL Checkpoint/Restart functionality",
                                      false, false,
                                      0,
                                      &val);
    if(0 != val) {
        opal_cr_output = opal_output_open(NULL);
    } else {
        opal_cr_output = -1;
    }
    opal_output_set_verbosity(opal_cr_output, val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Verbose Level: %d",
                        val);

    mca_base_param_reg_int_name("ft", "cr_enabled",
                                "Enable fault tolerance for this program",
                                false, false,
                                0, &val);
    opal_cr_set_enabled(OPAL_INT_TO_BOOL(val));

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: FT Enabled: %d",
                        val);

    mca_base_param_reg_int_name("opal_cr", "enable_timer",
                                "Enable Checkpoint timer (Default: Disabled)",
                                false, false,
                                0, &val);
    opal_cr_timing_enabled = OPAL_INT_TO_BOOL(val);

    mca_base_param_reg_int_name("opal_cr", "enable_timer_barrier",
                                "Enable Checkpoint timer Barrier (Default: Disabled)",
                                false, false,
                                0, &val);
    if( opal_cr_timing_enabled ) {
        opal_cr_timing_barrier_enabled = OPAL_INT_TO_BOOL(val);
    } else {
        opal_cr_timing_barrier_enabled = false;
    }

    mca_base_param_reg_int_name("opal_cr", "timer_target_rank",
                                "Target Rank for the timer (Default: 0)",
                                false, false,
                                0, &val);
    opal_cr_timing_target_rank = val;

#if OPAL_ENABLE_FT_THREAD == 1
    mca_base_param_reg_int_name("opal_cr", "use_thread",
                                "Use an async thread to checkpoint this program (Default: Disabled)",
                                false, false,
                                0, &val);
    opal_cr_thread_use_if_avail = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: FT Use thread: %d",
                        val);

    mca_base_param_reg_int_name("opal_cr", "thread_sleep_check",
                                "Time to sleep between checking for a checkpoint (Default: 0)",
                                false, false,
                                0, &val);
    opal_cr_thread_sleep_check = val;

    mca_base_param_reg_int_name("opal_cr", "thread_sleep_wait",
                                "Time to sleep waiting for process to exit MPI library (Default: 1000)",
                                false, false,
                                1000, &val);
    opal_cr_thread_sleep_wait = val;

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: FT thread sleep: check = %d, wait = %d",
                        opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait);
#endif

    mca_base_param_reg_int_name("opal_cr", "is_tool",
                                "Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.",
                                false, false,
                                0,
                                &val);
    opal_cr_is_tool = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Is a tool program: %d",
                        val);
#if OPAL_ENABLE_CRDEBUG == 1
    mca_base_param_reg_int_name("opal_cr", "enable_crdebug",
                                "Enable checkpoint/restart debugging",
                                false, false,
                                0,
                                &val);
    MPIR_debug_with_checkpoint = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: C/R Debugging Enabled [%s]\n",
                        (MPIR_debug_with_checkpoint ? "True": "False"));
#endif

#ifndef __WINDOWS__
    mca_base_param_reg_int_name("opal_cr", "signal",
                                "Checkpoint/Restart signal used to initialize an OPAL Only checkpoint of a program",
                                false, false,
                                SIGUSR1,
                                &opal_cr_entry_point_signal);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Checkpoint Signal: %d",
                        opal_cr_entry_point_signal);

    mca_base_param_reg_int_name("opal_cr", "debug_sigpipe",
                                "Activate a signal handler for debugging SIGPIPE Errors that can happen on restart. (Default: Disabled)",
                                false, false,
                                0, &val);
    opal_cr_debug_sigpipe = OPAL_INT_TO_BOOL(val);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Debug SIGPIPE: %d (%s)",
                        val, (opal_cr_debug_sigpipe ? "True" : "False"));

#if OPAL_ENABLE_FT_THREAD == 1
    /* If we have a thread, then attach the SIGPIPE signal handler there since
     * it is most likely to be the one that needs it.
     */
    if( opal_cr_debug_sigpipe && !opal_cr_thread_use_if_avail ) {
        if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
            ;
        }
    }
#else
    if( opal_cr_debug_sigpipe ) {
        if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
            ;
        }
    }
#endif

#else
    opal_cr_is_tool = true;  /* no support for CR on Windows yet */ 
#endif  /* __WINDOWS__ */

#if OPAL_ENABLE_CRDEBUG == 1
    opal_cr_debug_num_free_threads = 3;
    opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
    for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
        opal_cr_debug_free_threads[t] = NULL;
    }
 
    mca_base_param_reg_int_name("opal_cr", "crdebug_signal",
                                "Checkpoint/Restart signal used to hold threads when debugging",
                                false, false,
                                SIGTSTP,
                                &opal_cr_debug_signal);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Checkpoint Signal (Debug): %d",
                        opal_cr_debug_signal);
    if( SIG_ERR == signal(opal_cr_debug_signal, MPIR_checkpoint_debugger_signal_handler) ) {
        opal_output(opal_cr_output,
                    "opal_cr: init: Failed to register C/R debug signal (%d)",
                    opal_cr_debug_signal);
    }
#else
    /* Silence a compiler warning */
    t = 0;
#endif

    mca_base_param_reg_string_name("opal_cr", "tmp_dir",
                                   "Temporary directory to place rendezvous files for a checkpoint",
                                   false, false,
                                   opal_tmp_directory(),
                                   &opal_cr_pipe_dir);

    opal_output_verbose(10, opal_cr_output,
                        "opal_cr: init: Temp Directory: %s",
                        opal_cr_pipe_dir);

    if( !opal_cr_is_tool ) {
        /* Register the OPAL interlevel coordination callback */
        opal_cr_reg_coord_callback(opal_cr_coord, &prev_coord_func);

        opal_cr_stall_check = false;
        opal_cr_currently_stalled = false;

    } /* End opal_cr_is_tool = true */

    /* 
     * If fault tolerance was not compiled in then
     * we need to make sure that the listener thread is active to tell
     * the tools that this is not a checkpointable job.
     * We don't need the CRS framework to be initalized.
     */
#if OPAL_ENABLE_FT_CR    == 1
    /*
     * Open the checkpoint / restart service components
     */
    if (OPAL_SUCCESS != (ret = opal_crs_base_open())) {
        opal_show_help( "help-opal-runtime.txt",
                        "opal_cr_init:no-crs", true,
                        "opal_crs_base_open", ret );
        exit_status = ret;
        goto cleanup;
    }
    
    if (OPAL_SUCCESS != (ret = opal_crs_base_select())) {
        opal_show_help( "help-opal-runtime.txt",
                        "opal_cr_init:no-crs", true,
                        "opal_crs_base_select", ret );
        exit_status = ret;
        goto cleanup;
    }
#endif

#if OPAL_ENABLE_FT_THREAD == 1
    if( !opal_cr_is_tool && opal_cr_thread_use_if_avail) {
        opal_output_verbose(10, opal_cr_output,
                            "opal_cr: init: starting the thread\n");

        /* JJH: We really do need this line below since it enables
         *      actual locks for threads. However currently the
         *      upper layers will deadlock if it is enabled.
         *      So hack around the problem for now, while working
         *      on a complete solution. See ticket #2741 for more
         *      details.
         * opal_set_using_threads(true);
         */

        /*
         * Start the thread
         */
        OBJ_CONSTRUCT(&opal_cr_thread,     opal_thread_t);
        OBJ_CONSTRUCT(&opal_cr_thread_lock, opal_mutex_t);

        opal_cr_thread_is_done    = false;
        opal_cr_thread_is_active  = false;
        opal_cr_thread_in_library = false;
        opal_cr_thread_num_in_library = 0;

        opal_cr_thread.t_run = opal_cr_thread_fn;
        opal_cr_thread.t_arg = NULL;
        opal_thread_start(&opal_cr_thread);

    } /* End opal_cr_is_tool = true */
    else {
        opal_output_verbose(10, opal_cr_output,
                            "opal_cr: init: *Not* Using C/R thread\n");
    }
#endif /* OPAL_ENABLE_FT_THREAD == 1 */

 cleanup:
    return exit_status;
}
Example #23
0
int orcm_init(orcm_proc_type_t flags)
{
    int ret;
    char *error;
    int i, spin;

    if (NULL != getenv("ORCM_MCA_spin")) {
        spin = 1;
        /* spin until a debugger can attach */
        while (0 != spin) {
            ret = 0;
            while (ret < 10000) {
                ret++;
            };
        }
    }
    
    if (!orcm_util_initialized) {
        orcm_init_util();
    }
    
    /* set the default leader policy */
    orcm_default_leader_policy.jobid = ORTE_JOBID_WILDCARD;
    orcm_default_leader_policy.vpid = ORTE_VPID_WILDCARD;

    /* get the number of max msgs */
    mca_base_param_reg_int_name("orcm", "max_buffered_msgs",
                                "Number of recvd messages to hold in storage from each source",
                                false, false, ORCM_MAX_MSG_RING_SIZE, &orcm_max_msg_ring_size);

    /* independent mode or not */
    mca_base_param_reg_int_name("orcm", "sched_kill_dvm",
                                "Whether or not scheduler kills associated daemons upon termination (default: no)",
                                false, false, (int)false, &ret);
    orcm_sched_kill_dvm = OPAL_INT_TO_BOOL(ret);

    /* setup the globals that require initialization */
    orcm_triplets = OBJ_NEW(orcm_triplets_array_t);

#ifdef HAVE_QSYSTEM_H
#ifdef Q_SYSTEM_INTFCS_TO_PROBE_FOR_IP_ADDRESS
{
    char *eth_ifs[] = Q_SYSTEM_INTFCS_TO_PROBE_FOR_IP_ADDRESS;
    char **adds=NULL, *ifs, *envar;
    int i, num_ifs;

    num_ifs = sizeof(eth_ifs) / sizeof(eth_ifs[0]);
    for (i=0; i < num_ifs; i++) {
        opal_argv_append_nosize(&adds, eth_ifs[i]);
    }
    ifs = opal_argv_join(adds, ',');
    opal_argv_free(adds);
    /* push it into the environ so that the rmcast framework can get it */
    asprintf(&envar, "OMPI_MCA_rmcast_base_if_include=%s", ifs);
    putenv(envar);
    /* cannot release envar as the environ doesn't keep its own copy */
    free(ifs);
}
#endif
#endif

    /* initialize us */
    if (ORTE_SUCCESS != (ret = orte_init(NULL, NULL, flags))) {
        error = "orte_init";
        goto error;
    }

    if (!ORCM_PROC_IS_TOOL) {
        opal_set_using_threads(true);
    }

    if (!ORCM_PROC_IS_APP) {
        trap_signals();
    }

    orcm_initialized = true;
    
    return ORCM_SUCCESS;

error:
    if (ORCM_ERR_SILENT != ret) {
        orte_show_help("help-openrcm-runtime.txt",
                       "orcm_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    
    return ret;
}
Example #24
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    orte_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int i;
    char *filename;
    file_tracker_t *ft;
    char *ptr;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));
    
    /* get the local jobdat for this job */
    if (NULL == (jobdat = orte_get_job_data_object(jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }

    /* must be at least one app_context, so use the first one found */
    app = NULL;
    for (i=0; i < jobdat->apps->size; i++) {
        if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, i))) {
            app = aptr;
            break;
        }
    }
    if (NULL == app) {
        /* got a problem */
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return;
    }
            
    /* search the environ to get the filename */
    if (!find_value(app, "OMPI_MCA_sensor_file_filename", &filename)) {
        /* was a default file given */
        if (NULL == mca_sensor_file_component.file) {
            /* can't do anything without a file */
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s sensor:file no file for job %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jobid)));
            return;
        }
        filename = mca_sensor_file_component.file;
    }
            
    /* create the tracking object */
    ft = OBJ_NEW(file_tracker_t);
    ft->jobid = jobid;
    ft->file = strdup(filename);
    
    /* search the environ to see what we are checking */
    if (!find_value(app, "OMPI_MCA_sensor_file_check_size", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_size) {
            ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
        }
    } else {
        ft->check_size = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, "OMPI_MCA_sensor_file_check_access", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_access) {
            ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
        }
    } else {
        ft->check_access = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, "OMPI_MCA_sensor_file_check_mod", &ptr)) {
        /* was a default value given */
        if (0 < mca_sensor_file_component.check_mod) {
            ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
        }
    } else {
        ft->check_mod = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10));
        free(ptr);
    }

    if (!find_value(app, "OMPI_MCA_sensor_file_limit", &ptr)) {
        ft->limit = mca_sensor_file_component.limit;
    } else {
        ft->limit = strtol(ptr, NULL, 10);
        free(ptr);
    }
    opal_list_append(&jobs, &ft->super);
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s file %s monitored for %s%s%s with limit %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ft->file, ft->check_size ? "SIZE:" : " ",
                         ft->check_access ? "ACCESS TIME:" : " ",
                         ft->check_mod ? "MOD TIME" : " ", ft->limit));
    return;
}
Example #25
0
void mca_common_cuda_init(void)
{
    int id, value;
    CUresult res;
    CUcontext cuContext;

    if (initialized) {
        return;
    }

    /* Set different levels of verbosity in the cuda related code. */
    id = mca_base_param_reg_int_name("mpi", "common_cuda_verbose", 
                                     "Set level of common cuda verbosity",
                                     false, false, 0, &mca_common_cuda_verbose);
    mca_common_cuda_output = opal_output_open(NULL);
    opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);

    /* Control whether system buffers get CUDA pinned or not.  Allows for 
     * performance analysis. */
    id = mca_base_param_reg_int_name("mpi", "common_cuda_register_memory",
                                     "Whether to cuMemHostRegister preallocated BTL buffers",
                                     false, false, 
                                     (int) mca_common_cuda_register_memory, &value);
    mca_common_cuda_register_memory = OPAL_INT_TO_BOOL(value);

    /* Control whether we see warnings when CUDA memory registration fails.  This is
     * useful when CUDA support is configured in, but we are running a regular MPI
     * application without CUDA. */
    id = mca_base_param_reg_int_name("mpi", "common_cuda_warning",
                                     "Whether to print warnings when CUDA registration fails",
                                     false, false, 
                                     (int) mca_common_cuda_warning, &value);
    mca_common_cuda_warning = OPAL_INT_TO_BOOL(value);

    /* Check to see if this process is running in a CUDA context.  If
     * so, all is good.  If not, then disable registration of memory. */
    res = cuCtxGetCurrent(&cuContext);
    if (CUDA_SUCCESS != res) {
        if (mca_common_cuda_warning) {
            /* Check for the not initialized error since we can make suggestions to
             * user for this error. */
            if (CUDA_ERROR_NOT_INITIALIZED == res) {
                orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed not initialized",
                               true);
            } else {
                orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed",
                               true, res);
            }
        }
        mca_common_cuda_enabled = false;
        mca_common_cuda_register_memory = false;
    } else if ((CUDA_SUCCESS == res) && (NULL == cuContext)) {
        if (mca_common_cuda_warning) {
            orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent returned NULL",
                           true);
        }
        mca_common_cuda_enabled = false;
        mca_common_cuda_register_memory = false;
    } else {
        /* All is good.  mca_common_cuda_register_memory will retain its original
		 * value.  Normally, that is 1, but the user can override it to disable
		 * registration of the internal buffers. */
        mca_common_cuda_enabled = true;
        opal_output_verbose(20, mca_common_cuda_output,
                            "CUDA: cuCtxGetCurrent succeeded");
    }

    opal_output_verbose(30, mca_common_cuda_output,
                        "CUDA: initialized");
    initialized = true;
}
int orte_register_params(void)
{
    int id;
    opal_output_stream_t lds;

    /* only go thru this once - mpirun calls it twice, which causes
     * any error messages to show up twice
     */
    if (passed_thru) {
        return ORTE_SUCCESS;
    }
    passed_thru = true;

    /* get a clean output channel too - need to do this here because
     * we use it below, and orterun and some other tools call this
     * function prior to calling orte_init
     */
    OBJ_CONSTRUCT(&lds, opal_output_stream_t);
    lds.lds_want_stdout = true;
    orte_clean_output = opal_output_open(&lds);
    OBJ_DESTRUCT(&lds);

    orte_help_want_aggregate = true;
    (void) mca_base_var_register ("orte", "orte", "base", "help_aggregate",
                                  "If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually.  This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_help_want_aggregate);

    /* LOOK FOR A TMP DIRECTORY BASE */
    /* Several options are provided to cover a range of possibilities:
     *
     * (a) all processes need to use a specified location as the base
     *     for tmp directories
     * (b) daemons on remote nodes need to use a specified location, but
     *     one different from that used by mpirun
     * (c) mpirun needs to use a specified location, but one different
     *     from that used on remote nodes
     */
    orte_tmpdir_base = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "tmpdir_base",
                                  "Base of the session directory tree to be used by all processes",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_tmpdir_base);

    orte_local_tmpdir_base = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "local_tmpdir_base",
                                  "Base of the session directory tree to be used by orterun/mpirun",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_local_tmpdir_base);

    orte_remote_tmpdir_base = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "remote_tmpdir_base",
                                  "Base of the session directory tree on remote nodes, if required to be different from head node",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_remote_tmpdir_base);

    /* if a global tmpdir was specified, then we do not allow specification
     * of the local or remote values to avoid confusion
     */
    if (NULL != orte_tmpdir_base &&
        (NULL != orte_local_tmpdir_base || NULL != orte_remote_tmpdir_base)) {
        opal_output(orte_clean_output,
                    "------------------------------------------------------------------\n"
                    "The MCA param orte_tmpdir_base was specified, which sets the base\n"
                    "of the temporary directory tree for all procs. However, values for\n"
                    "the local and/or remote tmpdir base were also given. This can lead\n"
                    "to confusion and is therefore not allowed. Please specify either a\n"
                    "global tmpdir base OR a local/remote tmpdir base value\n"
                    "------------------------------------------------------------------");
        exit(1);
    }

    if (NULL != orte_tmpdir_base) {
        if (NULL != orte_process_info.tmpdir_base) {
            free(orte_process_info.tmpdir_base);
        }
        orte_process_info.tmpdir_base = strdup (orte_tmpdir_base);
    } else if (ORTE_PROC_IS_HNP && NULL != orte_local_tmpdir_base) {
        /* orterun will pickup the value for its own use */
        if (NULL != orte_process_info.tmpdir_base) {
            free(orte_process_info.tmpdir_base);
        }
        orte_process_info.tmpdir_base = strdup (orte_local_tmpdir_base);
    } else if (ORTE_PROC_IS_DAEMON && NULL != orte_remote_tmpdir_base) {
        /* orterun will pickup the value and forward it along, but must not
         * use it in its own work. So only a daemon needs to get it, and the
         * daemon will pass it down to its application procs. Note that orterun
         * will pass -its- value to any procs local to it
         */
        if (NULL != orte_process_info.tmpdir_base) {
            free(orte_process_info.tmpdir_base);
        }
        orte_process_info.tmpdir_base = strdup (orte_remote_tmpdir_base);
    }

    orte_top_session_dir = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "top_session_dir",
                                  "Top of the session directory tree for applications",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_top_session_dir);

    if (NULL != orte_top_session_dir) {
         if (NULL != orte_process_info.top_session_dir) {
            free(orte_process_info.top_session_dir);
        }
        orte_process_info.top_session_dir = strdup(orte_top_session_dir);
    }

    orte_jobfam_session_dir = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "jobfam_session_dir",
                                  "The jobfamily session directory for applications",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_jobfam_session_dir);

    if (NULL != orte_jobfam_session_dir) {
        if (NULL != orte_process_info.jobfam_session_dir) {
            free(orte_process_info.jobfam_session_dir);
        }
        orte_process_info.jobfam_session_dir = strdup(orte_jobfam_session_dir);
    }

    orte_prohibited_session_dirs = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "no_session_dirs",
                                  "Prohibited locations for session directories (multiple locations separated by ',', default=NULL)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_prohibited_session_dirs);

    orte_create_session_dirs = true;
    (void) mca_base_var_register ("orte", "orte", NULL, "create_session_dirs",
                                  "Create session directories",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_create_session_dirs);

    orte_execute_quiet = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "execute_quiet",
                                  "Do not output error and help messages",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_execute_quiet);

    orte_report_silent_errors = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_silent_errors",
                                  "Report all errors, including silent ones",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_report_silent_errors);

    orte_debug_flag = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "debug",
                                  "Top-level ORTE debug switch (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debug_flag);

    orte_debug_verbosity = -1;
    (void) mca_base_var_register ("orte", "orte", NULL, "debug_verbose",
                                  "Verbosity level for ORTE debug messages (default: 1)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debug_verbosity);

    orte_debug_daemons_file_flag = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons_file",
                                  "Whether want stdout/stderr of daemons to go to a file or not",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debug_daemons_file_flag);
    /* If --debug-daemons-file was specified, that also implies
       --debug-daemons */
    if (orte_debug_daemons_file_flag) {
        orte_debug_daemons_flag = true;

        /* value can't change */
        (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons",
                                      "Whether to debug the ORTE daemons or not",
                                      MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                      OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_CONSTANT,
                                      &orte_debug_daemons_flag);
    } else {
        orte_debug_daemons_flag = false;

        (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons",
                                      "Whether to debug the ORTE daemons or not",
                                      MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                      OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                      &orte_debug_daemons_flag);
    }

    orte_progress_thread_debug_level = -1;
    (void) mca_base_var_register ("orte", "orte", NULL, "progress_thread_debug",
                                  "Debug level for ORTE progress threads",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_progress_thread_debug_level);

    if (0 <= orte_progress_thread_debug_level) {
        orte_progress_thread_debug = opal_output_open(NULL);
        opal_output_set_verbosity(orte_progress_thread_debug,
                                  orte_progress_thread_debug_level);
    }

    /* do we want session output left open? */
    orte_leave_session_attached = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "leave_session_attached",
                                  "Whether applications and/or daemons should leave their sessions "
                                  "attached so that any output can be received - this allows X forwarding "
                                  "without all the attendant debugging output",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_leave_session_attached);

    /* if any debug level is set, ensure we output debug level dumps */
    if (orte_debug_flag || orte_debug_daemons_flag || orte_leave_session_attached) {
        orte_devel_level_output = true;
    }

    /* See comment in orte/tools/orterun/orterun.c about this MCA
       param (this param is internal) */
    orte_in_parallel_debugger = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "in_parallel_debugger",
                                  "Whether the application is being debugged "
                                  "in a parallel debugger (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_in_parallel_debugger);

    orte_debugger_dump_proctable = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "output_debugger_proctable",
                                  "Whether or not to output the debugger proctable after launch (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debugger_dump_proctable);

    orte_debugger_test_daemon = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "debugger_test_daemon",
                                  "Name of the executable to be used to simulate a debugger colaunch (relative or absolute path)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_debugger_test_daemon);

    orte_debugger_test_attach = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "debugger_test_attach",
                                  "Test debugger colaunch after debugger attachment",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_debugger_test_daemon);

    orte_debugger_check_rate = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "debugger_check_rate",
                                  "Set rate (in secs) for auto-detect of debugger attachment (0 => do not check)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_debugger_check_rate);

    orte_do_not_launch = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "do_not_launch",
                                  "Perform all necessary operations to prepare to launch the application, but do not actually launch it",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_do_not_launch);

    orted_spin_flag = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_spin",
                                  "Have any orteds spin until we can connect a debugger to them",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orted_spin_flag);

    orted_debug_failure = ORTE_VPID_INVALID;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_fail",
                                  "Have the specified orted fail after init for debugging purposes",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orted_debug_failure);

    orted_debug_failure_delay = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_fail_delay",
                                  "Have the specified orted fail after specified number of seconds (default: 0 => no delay)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orted_debug_failure_delay);

    orte_startup_timeout = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "startup_timeout",
                                  "Seconds to wait for startup or job launch before declaring failed_to_start (default: 0 => do not check)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_startup_timeout);

    /* User-level debugger info string */
    orte_base_user_debugger = "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@";
    (void) mca_base_var_register ("orte", "orte", NULL, "base_user_debugger",
                                  "Sequence of user-level debuggers to search for in orterun",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_base_user_debugger);

#if 0
    mca_base_param_reg_int_name("orte", "abort_timeout",
                                "Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)",
                                false, false, 1, &value);
    orte_max_timeout = 1000000.0 * value;  /* convert to usec */

    mca_base_param_reg_int_name("orte", "timeout_step",
                                "Time to wait [in usecs/proc] before aborting an ORTE operation (default: 1000 usec/proc)",
                                false, false, 1000, &orte_timeout_usec_per_proc);
#endif

    /* default hostfile */
    orte_default_hostfile = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "default_hostfile",
                                  "Name of the default hostfile (relative or absolute path, \"none\" to ignore environmental or default MCA param setting)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_default_hostfile);

    if (NULL == orte_default_hostfile) {
        /* nothing was given, so define the default */
        asprintf(&orte_default_hostfile, "%s/openmpi-default-hostfile", opal_install_dirs.sysconfdir);
        /* flag that nothing was given */
        orte_default_hostfile_given = false;
    } else if (0 == strcmp(orte_default_hostfile, "none")) {
        free (orte_default_hostfile);
        orte_default_hostfile = NULL;
        /* flag that it was given */
        orte_default_hostfile_given = true;
    } else {
        /* flag that it was given */
        orte_default_hostfile_given = true;
    }

    /* default dash-host */
    orte_default_dash_host = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "default_dash_host",
                                  "Default -host setting (specify \"none\" to ignore environmental or default MCA param setting)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_default_dash_host);
    if (NULL != orte_default_dash_host &&
        0 == strcmp(orte_default_dash_host, "none")) {
        free(orte_default_dash_host);
        orte_default_dash_host = NULL;
    }

    /* regex of nodes in system */
    orte_node_regex = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "node_regex",
                                  "Regular expression defining nodes in the system",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_node_regex);

    /* whether or not to keep FQDN hostnames */
    orte_keep_fqdn_hostnames = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "keep_fqdn_hostnames",
                                  "Whether or not to keep FQDN hostnames [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_keep_fqdn_hostnames);

    /* whether or not to retain aliases of hostnames */
    orte_retain_aliases = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "retain_aliases",
                                  "Whether or not to keep aliases for host names [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_retain_aliases);

    /* which alias to use in MPIR_proctab */
    orte_use_hostname_alias = 1;
    (void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
                                  "If hostname aliases are being retained, which one to use for the debugger proc table [default: 1st alias]",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_use_hostname_alias);

    orte_xml_output = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "xml_output",
                                  "Display all output in XML format (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_xml_output);

    /* whether to tag output */
    /* if we requested xml output, be sure to tag the output as well */
    orte_tag_output = orte_xml_output;
    (void) mca_base_var_register ("orte", "orte", NULL, "tag_output",
                                  "Tag all output with [job,rank] (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_tag_output);
    if (orte_xml_output) {
        orte_tag_output = true;
    }


    orte_xml_file = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "xml_file",
                                  "Provide all output in XML format to the specified file",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_xml_file);
    if (NULL != orte_xml_file) {
        if (ORTE_PROC_IS_HNP && NULL == orte_xml_fp) {
            /* only the HNP opens this file! Make sure it only happens once */
            orte_xml_fp = fopen(orte_xml_file, "w");
            if (NULL == orte_xml_fp) {
                opal_output(0, "Could not open specified xml output file: %s", orte_xml_file);
                return ORTE_ERROR;
            }
        }
        /* ensure we set the flags to tag output */
        orte_xml_output = true;
        orte_tag_output = true;
    } else {
        /* default to stdout */
        orte_xml_fp = stdout;
    }

    /* whether to timestamp output */
    orte_timestamp_output = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "timestamp_output",
                                  "Timestamp all application process output (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_timestamp_output);

    /* redirect output into files */
    orte_output_filename = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "output_filename",
                                  "Redirect output from application processes into filename.rank [default: NULL]",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_output_filename);

    orte_show_resolved_nodenames = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "show_resolved_nodenames",
                                  "Display any node names that are resolved to a different name (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_show_resolved_nodenames);

#if 0
    /* XXX -- option doesn't appear to do anything */
    mca_base_param_reg_int_name("orte", "hetero_apps",
                                "Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries (default: false)",
                                false, false, (int) false, &value);
    orte_hetero_apps = OPAL_INT_TO_BOOL(value);
#endif

    orte_hetero_nodes = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "hetero_nodes",
                                  "Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_hetero_nodes);

    /* allow specification of the launch agent */
    orte_launch_agent = "orted";
    (void) mca_base_var_register ("orte", "orte", NULL, "launch_agent",
                                  "Command used to start processes on remote nodes (default: orted)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_launch_agent);

    orte_fork_agent_string = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "fork_agent",
                                  "Command used to fork processes on remote nodes (default: NULL)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_fork_agent_string);

    if (NULL != orte_fork_agent_string) {
        orte_fork_agent = opal_argv_split(orte_fork_agent_string, ' ');
    }

    /* whether or not to require RM allocation */
    orte_allocation_required = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "allocation_required",
                                  "Whether or not an allocation by a resource manager is required [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_allocation_required);

    /* whether or not to map stddiag to stderr */
    orte_map_stddiag_to_stderr = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "map_stddiag_to_stderr",
                                  "Map output from opal_output to stderr of the local process [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_map_stddiag_to_stderr);

    /* generate new terminal windows to display output from specified ranks */
    orte_xterm = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "xterm",
                                  "Create a new xterm window and display output from the specified ranks there [default: none]",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_xterm);
    if (NULL != orte_xterm) {
        /* if an xterm request is given, we have to leave any ssh
         * sessions attached so the xterm window manager can get
         * back to the controlling terminal
         */
        orte_leave_session_attached = true;
        /* also want to redirect stddiag output from opal_output
         * to stderr from the process so those messages show
         * up in the xterm window instead of being forwarded to mpirun
         */
        orte_map_stddiag_to_stderr = true;
    }

    /* whether or not to report launch progress */
    orte_report_launch_progress = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_launch_progress",
                                  "Output a brief periodic report on launch progress [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_report_launch_progress);

    /* cluster hardware info detected by orte only */
    orte_local_cpu_type = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "cpu_type",
                                  "cpu type detected in node",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_local_cpu_type);

    orte_local_cpu_model = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "cpu_model",
                                  "cpu model detected in node",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_local_cpu_model);

    /* tool communication controls */
    orte_report_events_uri = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_events",
                                  "URI to which events are to be reported (default: NULL)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_report_events_uri);
    if (NULL != orte_report_events_uri) {
        orte_report_events = true;
    }

    /* barrier control */
    orte_do_not_barrier = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "do_not_barrier",
                                  "Do not barrier in orte_init",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_do_not_barrier);

    orte_enable_recovery = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "enable_recovery",
                                  "Enable recovery from process failure [Default = disabled]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_enable_recovery);

    orte_max_restarts = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "max_restarts",
                                  "Max number of times to restart a failed process",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_max_restarts);

    if (!orte_enable_recovery && orte_max_restarts != 0) {
        if (ORTE_PROC_IS_HNP) {
            opal_output(orte_clean_output,
                        "------------------------------------------------------------------\n"
                        "The MCA param orte_enable_recovery was not set to true, but\n"
                        "a value was provided for the number of restarts:\n\n"
                        "Max restarts: %d\n"
                        "We are enabling process recovery and continuing execution. To avoid\n"
                        "this warning in the future, please set the orte_enable_recovery\n"
                        "param to non-zero.\n"
                        "------------------------------------------------------------------",
                        orte_max_restarts);
        }
        orte_enable_recovery = true;
    }

    orte_abort_non_zero_exit = true;
    (void) mca_base_var_register ("orte", "orte", NULL, "abort_on_non_zero_status",
                                  "Abort the job if any process returns a non-zero exit status - no restart in such cases",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_abort_non_zero_exit);

    orte_allowed_exit_without_sync = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "allowed_exit_without_sync",
                                  "Process exiting without calling finalize will not trigger job termination",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_allowed_exit_without_sync);

    orte_staged_execution = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "staged_execution",
                                  "Staged execution is being used",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_staged_execution);

    orte_report_child_jobs_separately = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
                                  "Return the exit status of the primary job only",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_report_child_jobs_separately);


#if 0
    /* XXX -- unused parameter */
    mca_base_param_reg_int_name("orte", "child_time_to_exit",
                                "Max time a spawned child job is allowed to run after the primary job has terminated (seconds)",
                                false, false,
                                INT_MAX, &value);
    orte_child_time_to_exit.tv_sec = value;
    orte_child_time_to_exit.tv_usec = 0;
#endif

    orte_stat_history_size = 1;
    (void) mca_base_var_register ("orte", "orte", NULL, "stat_history_size",
                                  "Number of stat samples to keep",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_stat_history_size);

    orte_max_vm_size = -1;
    (void) mca_base_var_register ("orte", "orte", NULL, "max_vm_size",
                                  "Maximum size of virtual machine - used to subdivide allocation",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_max_vm_size);

    if (opal_hwloc_use_hwthreads_as_cpus) {
        orte_set_slots = "hwthreads";
    } else {
        orte_set_slots = "cores";
    }
    (void) mca_base_var_register ("orte", "orte", NULL, "set_default_slots",
                                  "Set the number of slots on nodes that lack such info to the"
                                  " number of specified objects [a number, \"cores\" (default),"
                                  " \"numas\", \"sockets\", \"hwthreads\" (default if hwthreads_as_cpus is set),"
                                  " or \"none\" to skip this option]",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_set_slots);

    /* should we display the allocation after determining it? */
    orte_display_allocation = false;
    id = mca_base_var_register ("orte", "orte", NULL, "display_alloc",
                                "Whether to display the allocation after it is determined",
                                MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                &orte_display_allocation);
    /* register a synonym for old name -- should we remove this now? */
    mca_base_var_register_synonym (id, "orte", "ras", "base", "display_alloc", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

    /* should we display a detailed (developer-quality) version of the allocation after determining it? */
    orte_devel_level_output = false;
    id = mca_base_var_register ("orte", "orte", NULL, "display_devel_alloc",
                                "Whether to display a developer-detail allocation after it is determined",
                                MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                &orte_devel_level_output);
    /* register a synonym for old name -- should we remove this now? */
    mca_base_var_register_synonym (id, "orte", "ras", "base", "display_devel_alloc", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

    if (orte_devel_level_output) {
        orte_display_allocation = true;
    }

    /* should we treat any -host directives as "soft" - i.e., desired
     * but not required
     */
    orte_soft_locations = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "soft_locations",
                                  "Treat -host directives as desired, but not required",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_soft_locations);

    /* allow specification of the cores to be used by daemons */
    orte_daemon_cores = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_cores",
                                  "Restrict the ORTE daemons (including mpirun) to operate on the specified cores (comma-separated list of ranges)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_daemon_cores);

    /* cutoff for full modex */
    orte_direct_modex_cutoff = UINT32_MAX;
    id = mca_base_var_register ("orte", "orte", NULL, "direct_modex_cutoff",
                                "If the number of processes in the application exceeds the provided value,"
                                "modex will be done upon demand [default: UINT32_MAX]",
                                MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
                                OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                &orte_direct_modex_cutoff);
    /* register a synonym for old name */
    mca_base_var_register_synonym (id, "ompi", "ompi", "hostname", "cutoff", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

    return ORTE_SUCCESS;
}
 *
 * The values below are the default values.
 */
bool ompi_mpi_param_check = true;
bool ompi_debug_show_handle_leaks = false;
int ompi_debug_show_mpi_alloc_mem_leaks = 0;
bool ompi_debug_no_free_handles = false;
bool ompi_mpi_show_mca_params = false;
char *ompi_mpi_show_mca_params_file = NULL;
bool ompi_mpi_abort_print_stack = false;
int ompi_mpi_abort_delay = 0;
bool ompi_mpi_keep_peer_hostnames = true;
bool ompi_mpi_keep_fqdn_hostnames = false;
int ompi_mpi_leave_pinned = -1;
bool ompi_mpi_leave_pinned_pipeline = false;
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);

static bool show_default_mca_params = false;
static bool show_file_mca_params = false;
static bool show_enviro_mca_params = false;
static bool show_override_mca_params = false;

int ompi_mpi_register_params(void)
{
    int value;
    char *param;

    /* Whether we want MPI API function parameter checking or not */

    mca_base_param_reg_int_name("mpi", "param_check",