Ejemplo n.º 1
0
/**
 * Function for finding and opening either all MCA components, or the one
 * that was specifically requested via a MCA parameter.
 */
int mca_btl_base_open(void)
{
    int i;
    if( ++mca_btl_base_already_opened > 1 ) return OMPI_SUCCESS;

    /* Verbose output */
    mca_base_param_reg_int_name("btl", 
                                "base_verbose", 
                                "Verbosity level of the BTL framework", 
                                false, false, 
                                0, 
                                &mca_btl_base_verbose);

    mca_btl_base_output = opal_output_open(NULL);
    opal_output_set_verbosity(mca_btl_base_output, mca_btl_base_verbose);

    /* Override the per-BTL "don't run if THREAD_MULTIPLE selected"
       embargo? */
    mca_base_param_reg_int_name("btl", 
                                "base_thread_multiple_override", 
                                "Enable BTLs that are not normally enabled when MPI_THREAD_MULTIPLE is enabled (THIS IS FOR DEVELOPERS ONLY!  SHOULD NOT BE USED BY END USERS!)",
                                true, false, 
                                0, &i);
    mca_btl_base_thread_multiple_override = OPAL_INT_TO_BOOL(i);

  /* Open up all available components */
    
  if (OMPI_SUCCESS != 
      mca_base_components_open("btl", mca_btl_base_output, mca_btl_base_static_components,
                               &mca_btl_base_components_opened, true)) {
    return OMPI_ERROR;
  }

  /* Initialize the list so that in mca_btl_base_close(), we can
     iterate over it (even if it's empty, as in the case of
     ompi_info) */

  OBJ_CONSTRUCT(&mca_btl_base_modules_initialized, opal_list_t);

  /* register parameters */
  mca_base_param_lookup_string(
      mca_base_param_register_string("btl","base","include",NULL,NULL), &mca_btl_base_include);
  mca_base_param_lookup_string(
      mca_base_param_register_string("btl","base","exclude",NULL,NULL), &mca_btl_base_exclude);
  mca_base_param_reg_int_name("btl", "base_warn_component_unused",
      "This parameter is used to turn on warning messages when certain NICs are not used",
      false, false, 1, &mca_btl_base_warn_component_unused);

  /* All done */
  return OMPI_SUCCESS;
}
Ejemplo n.º 2
0
/*
 * Main MCA initialization.  
 */
int mca_base_open(void)
{
  int param_index;
  char *value;
  opal_output_stream_t lds;
  char hostname[64];

  if (!mca_base_opened) {
    mca_base_opened = true;
  } else {
    return OPAL_SUCCESS;
  }

  /* Register some params */
#if OMPI_WANT_HOME_CONFIG_FILES
  asprintf(&value, "%s%c%s"OPAL_PATH_SEP".openmpi"OPAL_PATH_SEP"components", opal_install_dirs.pkglibdir, OPAL_ENV_SEP, opal_home_directory() );
#else
# if defined(__WINDOWS__) && defined(_DEBUG) 
    asprintf(&value, "%s/debug", opal_install_dirs.pkglibdir);  
# else 
    asprintf(&value, "%s", opal_install_dirs.pkglibdir);  
# endif 
#endif

  mca_base_param_component_path = 
    mca_base_param_reg_string_name("mca", "component_path",
                                   "Path where to look for Open MPI and ORTE components", 
                                   false, false, value, NULL);
  free(value);
  param_index = mca_base_param_reg_string_name("mca", "verbose", 
                                               "Top-level verbosity parameter",
                                               false, false, NULL, NULL);

  mca_base_param_reg_int_name("mca", "component_show_load_errors", 
                              "Whether to show errors for components that failed to load or not", 
                              false, false, 1, NULL);

  mca_base_param_reg_int_name("mca", "component_disable_dlopen",
                              "Whether to attempt to disable opening dynamic components or not",
                              false, false, 0, NULL);

  /* What verbosity level do we want? */

  mca_base_param_lookup_string(param_index, &value);
  memset(&lds, 0, sizeof(lds));
  if (NULL != value) {
    parse_verbose(value, &lds);
    free(value);
  } else {
    set_defaults(&lds);
  }
  gethostname(hostname, 64);
  asprintf(&lds.lds_prefix, "[%s:%05d] ", hostname, getpid());
  opal_output_reopen(0, &lds);
  opal_output_verbose(5, 0, "mca: base: opening components");
  free(lds.lds_prefix);
  /* Open up the component repository */

  return mca_base_component_repository_init();
}
Ejemplo n.º 3
0
/**
 * Removes the bproc directory
 * @code /tmp/openmpi-bproc-<user>/ @endcode and all of its contents
 * @retval ORTE_SUCCESS
 * @retval error
 */
static int
odls_bproc_remove_dir()
{
    char *frontend = NULL, *user = NULL, *filename = NULL;
    int id;

    /* get the username set by the bproc pls. We need to get it from here
     * because on many bproc systems the method we use to get the username
     * from the system on the backend fails and we only get the uid. */
    id = mca_base_param_register_string("pls", "bproc", "username", NULL,
                                        orte_system_info.user);
    mca_base_param_lookup_string(id,&user);
    asprintf(&filename, "openmpi-bproc-%s", user );
    if( NULL == filename ) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return ORTE_ERROR;
    }
    frontend = opal_os_path(false, "tmp", filename, NULL );
    free(filename);  /* Always free the filename */
    if (NULL == frontend) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return ORTE_ERROR;
    }
    /* we do our best to clean up the directory tree, but we ignore errors*/
    odls_bproc_delete_dir_tree(frontend);
    free(frontend);
    return ORTE_SUCCESS;
}
Ejemplo n.º 4
0
/*
 * utility routine for string parameter registration
 */
static int reg_string(const char* param_name, 
                      const char* deprecated_param_name,
                      const char* param_desc,
                      const char* default_value, char **out_value,
                      int flags)
{
    int index;
    char *value;
    index = mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
                                      param_name, param_desc, false, false,
                                      default_value, &value);
    if (NULL != deprecated_param_name) {
        mca_base_param_reg_syn(index, 
                               &mca_btl_openib_component.super.btl_version, 
                               deprecated_param_name, true);
    }
    mca_base_param_lookup_string(index, &value);

    if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
        opal_output(0, "Bad parameter value for parameter \"%s\"",
                param_name);
        return OMPI_ERR_BAD_PARAM;
    }
    *out_value = value;
    return OMPI_SUCCESS;
}
Ejemplo n.º 5
0
static int slave_set_name(void)
{
    char *jobid_str, *procid_str;
    int id, rc;
    orte_jobid_t jobid;
    orte_vpid_t vpid;
    
    id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL);
    mca_base_param_lookup_string(id, &jobid_str);
    if (NULL == jobid_str) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_str))) {
        ORTE_ERROR_LOG(rc);
        return(rc);
    }
    free(jobid_str);
    
    id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL);
    mca_base_param_lookup_string(id, &procid_str);
    if (NULL == procid_str) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, procid_str))) {
        ORTE_ERROR_LOG(rc);
        return(rc);
    }
    free(procid_str);
    
    ORTE_PROC_MY_NAME->jobid = jobid;
    ORTE_PROC_MY_NAME->vpid = vpid;
    ORTE_EPOCH_SET(ORTE_PROC_MY_NAME->epoch,orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME));
    
    OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
                         "ess:slave set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* get the non-name common environmental variables */
    if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    return ORTE_SUCCESS;
}
Ejemplo n.º 6
0
static inline char *mca_pml_v_param_register_string( const char* param_name,
                                                  char *default_value )
{
    int id = mca_base_param_register_string("pml", "v", param_name, NULL, default_value);
    char *param_value = default_value;
    mca_base_param_lookup_string(id, &param_value);
    return param_value;
}
Ejemplo n.º 7
0
static inline char* mca_btl_template_param_register_string(
                                                     const char* param_name, 
                                                     const char* default_value)
{
    char *param_value;
    int id = mca_base_param_register_string("btl","template",param_name,NULL,default_value);
    mca_base_param_lookup_string(id, &param_value);
    return param_value;
}
Ejemplo n.º 8
0
/**
 * Run a user-level debugger
 */
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
                       int argc, char *argv[])
{
    int i, id;
    char **new_argv = NULL;
    char *value, **lines;

    /* Get the orte_base_debug MCA parameter and search for a debugger
       that can run */
    
    id = mca_base_param_find("orte", NULL, "base_user_debugger");
    if (id < 0) {
        opal_show_help("help-orterun.txt", "debugger-mca-param-not-found", 
                       true);
        exit(1);
    }
    value = NULL;
    mca_base_param_lookup_string(id, &value);
    if (NULL == value) {
        opal_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty",
                       true);
        exit(1);
    }

    /* Look through all the values in the MCA param */

    lines = opal_argv_split(value, ':');
    free(value);
    for (i = 0; NULL != lines[i]; ++i) {
        if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv, 
                                    &new_argv)) {
            break;
        }
    }

    /* If we didn't find one, abort */

    if (NULL == lines[i]) {
        opal_show_help("help-orterun.txt", "debugger-not-found", true);
        exit(1);
    }
    opal_argv_free(lines);

    /* We found one */

    execvp(new_argv[0], new_argv);
    value = opal_argv_join(new_argv, ' ');
    opal_show_help("help-orterun.txt", "debugger-exec-failed",
                   true, basename, value, new_argv[0]);
    free(value);
    opal_argv_free(new_argv);
    exit(1);
}
static int parse_requested(int mca_param, bool *include_mode,
                           char ***requested_component_names)
{
    int i;
    char *requested, *requested_orig;

    *requested_component_names = NULL;
    *include_mode = true;

    /* See if the user requested anything */

    if (OPAL_ERROR == mca_base_param_lookup_string(mca_param, &requested)) {
        return OPAL_ERROR;
    }
    if (NULL == requested || 0 == strlen(requested)) {
        return OPAL_SUCCESS;
    }
    requested_orig = requested;

    /* Are we including or excluding?  We only allow the negate
       character to be the *first* character of the value (but be nice
       and allow any number of negate characters in the beginning). */

    while (negate == requested[0] && '\0' != requested[0]) {
        *include_mode = false;
        ++requested;
    }

    /* Double check to ensure that the user did not specify the negate
       character anywhere else in the value. */

    i = 0;
    while ('\0' != requested[i]) {
        if (negate == requested[i]) {
            opal_show_help("help-mca-base.txt",
                           "framework-param:too-many-negates",
                           true, requested_orig);
            free(requested_orig);
            return OPAL_ERROR;
        }
        ++i;
    }

    /* Split up the value into individual component names */

    *requested_component_names = opal_argv_split(requested, ',');

    /* All done */

    free(requested_orig);
    return OPAL_SUCCESS;
}
Ejemplo n.º 10
0
int orte_proc_info(void)
{

    int id, tmp;
    
    /* all other params are set elsewhere */
    
    id = mca_base_param_register_int("seed", NULL, NULL, NULL, orte_process_info.seed);
    mca_base_param_lookup_int(id, &tmp);
    orte_process_info.seed = OPAL_INT_TO_BOOL(tmp);
    /* if we are a seed, then make sure the daemon flag is NOT set so that
     * framework components are properly selected
     */
    if (orte_process_info.seed) {
        orte_process_info.daemon = false;
    }

    id = mca_base_param_register_int("orte", "app", "num", NULL, -1);
    mca_base_param_lookup_int(id, &tmp);
    orte_process_info.app_num = tmp;

    id = mca_base_param_register_string("gpr", "replica", "uri", NULL, orte_process_info.gpr_replica_uri);
    mca_base_param_lookup_string(id, &(orte_process_info.gpr_replica_uri));
    mca_base_param_set_internal(id, true);

    id = mca_base_param_register_string("ns", "replica", "uri", NULL, orte_process_info.ns_replica_uri);
    mca_base_param_lookup_string(id, &(orte_process_info.ns_replica_uri));
    mca_base_param_set_internal(id, true);

    id = mca_base_param_register_string("tmpdir", "base", NULL, NULL, orte_process_info.tmpdir_base);
    mca_base_param_lookup_string(id, &(orte_process_info.tmpdir_base));

    /* get the process id */
    orte_process_info.pid = getpid();

    return ORTE_SUCCESS;
}
Ejemplo n.º 11
0
/**
 * Returns a path of the form:
 * @code
 * /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/
 * @endcode
 * which is used to put links to the pty/pipes in
 * @param proc_rank   the process's rank on the node
 * @param jobid       the jobid the proc belongs to
 * @param app_context the application context number within the job
 * @retval path
 */
static char *
 odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
                                   orte_std_cntr_t app_context)
{
    char *path = NULL, *user = NULL, *job = NULL;
    int rc;

    /* ensure that system info is set */
    orte_sys_info();

    if (NULL == orte_universe_info.name) {  /* error condition */
        ORTE_ERROR_LOG(ORTE_ERROR);
        return NULL;
    }

    rc = orte_ns.convert_jobid_to_string(&job, jobid);
    if(ORTE_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
        return NULL;
    }

    /* get the username set by the bproc pls. We need to get it from here
     * because on many bproc systems the method we use to get the username
     * from the system on the backend fails and we only get the uid. */
    rc = mca_base_param_register_string("pls", "bproc", "username", NULL,
                                        orte_system_info.user);
    mca_base_param_lookup_string(rc,&user);

    if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d",
                     user, orte_universe_info.name,
                     job, (int) app_context, proc_rank)) {
        ORTE_ERROR_LOG(ORTE_ERROR);
        path = NULL;
    }
    if(0 < mca_odls_bproc_component.debug) {
        opal_output(0, "odls bproc io setup. Path: %s\n", path);
    }
    free(user);
    free(job);
    return path;
}
Ejemplo n.º 12
0
static int slurm_set_name(void)
{
    int slurm_nodeid;
    int rc;
    int id;
    orte_jobid_t jobid;
    orte_vpid_t vpid;
    char* jobid_string;
    char* vpid_string;
    char *nodeid;
    
    OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
                         "ess:slurm setting name"));
    
    id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL);
    mca_base_param_lookup_string(id, &jobid_string);
    if (NULL == jobid_string) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) {
        ORTE_ERROR_LOG(rc);
        return(rc);
    }
    
    id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL);
    mca_base_param_lookup_string(id, &vpid_string);
    if (NULL == vpid_string) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) {
        ORTE_ERROR_LOG(rc);
        return(rc);
    }
    
    ORTE_PROC_MY_NAME->jobid = jobid;
    
    /* fix up the vpid and make it the "real" vpid */
    if (NULL == (nodeid = getenv("SLURM_NODEID"))) {
        opal_output(0, "SLURM_NODEID not found - cannot define name");
        return ORTE_ERR_NOT_FOUND;
    }
    slurm_nodeid = atoi(nodeid);
    ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid;

    OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
                         "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* fix up the system info nodename to match exactly what slurm returned */
    if (NULL != orte_process_info.nodename) {
        free(orte_process_info.nodename);
    }
    orte_process_info.nodename = get_slurm_nodename(slurm_nodeid);
    
    OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
                         "ess:slurm set nodename to %s",
                         orte_process_info.nodename));
    
    /* get the non-name common environmental variables */
    if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    return ORTE_SUCCESS;
}
Ejemplo n.º 13
0
/**
 * Function for finding and opening either all MCA components, or the one
 * that was specifically requested via a MCA parameter.
 */
int orte_ras_base_open(void)
{
    int value, rc, param;
    orte_data_type_t tmp;
    char *requested;

    /* Debugging / verbose output */

    orte_ras_base.ras_output = opal_output_open(NULL);
    mca_base_param_reg_int_name("ras", "base_verbose", 
                                "Enable debugging for the RAS framework (nonzero = enabled)",
                                false, false, 0, &value);
    if (value != 0) {
        orte_ras_base.ras_output = opal_output_open(NULL);
    } else {
        orte_ras_base.ras_output = -1;
    }

    /* Defaults */

    orte_ras_base.ras_opened_valid = false;
    orte_ras_base.ras_using_proxy = false;
    orte_ras_base.ras_available_valid = false;

    /** register the base system types with the DSS */
    tmp = ORTE_RAS_NODE;
    if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ras_base_pack_node,
                                                     orte_ras_base_unpack_node,
                                                     (orte_dss_copy_fn_t)orte_ras_base_copy_node,
                                                     (orte_dss_compare_fn_t)orte_ras_base_compare_node,
                                                     (orte_dss_size_fn_t)orte_ras_base_size_node,
                                                     (orte_dss_print_fn_t)orte_ras_base_print_node,
                                                     (orte_dss_release_fn_t)orte_ras_base_std_obj_release,
                                                     ORTE_DSS_STRUCTURED,
                                                     "ORTE_RAS_NODE", &tmp))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* Some systems do not want any RAS support. In those cases,
        * memory consumption is also an issue. For those systems, we
        * avoid opening the RAS components by checking for a directive
        * to use the "null" component.
        */
    param = mca_base_param_reg_string_name("ras", NULL, NULL,
                                           false, false, NULL, NULL);
    if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) {
        return ORTE_ERROR;
    }
    if (NULL != requested && 0 == strcmp(requested, "null")) {
        /* the user has specifically requested that we use the "null"
        * component. In this case, that means we do NOT open any
        * components, and we simply use the default module we have
        * already defined above
        */
        orte_ras_base.ras_opened_valid = false;
        orte_ras = orte_ras_no_op; /* use the no_op module */
        return ORTE_SUCCESS;
    }

    /* check for timing tests */
    param = mca_base_param_reg_int_name("orte", "timing",
                                        "Request that critical timing loops be measured",
                                        false, false, 0, &value);
    if (value != 0) {
        orte_ras_base.timing = true;
    } else {
        orte_ras_base.timing = false;
    }
    
    /* Open up all available components */
    if (ORTE_SUCCESS != 
        mca_base_components_open("ras", orte_ras_base.ras_output,
                                 mca_ras_base_static_components, 
                                 &orte_ras_base.ras_opened, true)) {
        return ORTE_ERROR;
    }

    /* if we are not on a HNP, select the proxy 'module' */
    if (!orte_process_info.seed) {
        orte_ras = orte_ras_base_proxy_module;
        /* initialize the module */
        orte_ras_base_proxy_init(&rc);
        orte_ras_base.ras_using_proxy = true;
        return ORTE_SUCCESS;
    }

    /* All done */

    orte_ras_base.ras_opened_valid = true;
    return ORTE_SUCCESS;
}
Ejemplo n.º 14
0
static int orte_rds_hostfile_query(orte_jobid_t job)
{
    opal_list_t existing;
    opal_list_t updates, rds_updates;
    opal_list_item_t *item;
    orte_rds_cell_desc_t *rds_item;
    orte_rds_cell_attr_t *new_attr;
    orte_ras_node_t *ras_item;
    int rc;

    if (orte_rds_hostfile_queried) {
        /* if we have already been queried, then
         * our info is on the registry, so just
         * return. Note that this restriction
         * may eventually be lifted - ideally, 
         * we might check to see if this is a
         * new file name and go ahead with the
         * query if so.
         */
        return ORTE_SUCCESS;
    }
    orte_rds_hostfile_queried = true;
    
    OBJ_CONSTRUCT(&existing, opal_list_t);
    OBJ_CONSTRUCT(&updates, opal_list_t);
    OBJ_CONSTRUCT(&rds_updates, opal_list_t);
    rc = orte_ras_base_node_query(&existing);
    if(ORTE_SUCCESS != rc) {
        goto cleanup;
    }

    rc = mca_base_param_find("rds", "hostfile", "path");
    mca_base_param_lookup_string(rc, &mca_rds_hostfile_component.path);

    rc = orte_rds_hostfile_parse(mca_rds_hostfile_component.path, &existing, &updates);
    if (ORTE_ERR_NOT_FOUND == rc) {
        if(mca_rds_hostfile_component.default_hostfile) {
            rc = ORTE_SUCCESS;
        } else {
            opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile",
                           true,
                           mca_rds_hostfile_component.path);
        }
        goto cleanup;
    } else if (ORTE_SUCCESS != rc) {
        goto cleanup;
    }

    if ( !opal_list_is_empty(&updates) ) {

        /* Convert RAS update list to RDS update list */
        for ( ras_item  = (orte_ras_node_t*)opal_list_get_first(&updates);
              ras_item != (orte_ras_node_t*)opal_list_get_end(&updates);
              ras_item  = (orte_ras_node_t*)opal_list_get_next(ras_item)) {

            rds_item = OBJ_NEW(orte_rds_cell_desc_t);
            if (NULL == rds_item) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            rds_item->site  = strdup("Hostfile");
            rds_item->name  = strdup(ras_item->node_name);
            if (need_cellid) {
#if 0 /* JJH Repair when cellid's are fixed */
                /* Create a new cellid for this hostfile */
                rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name);
                if (ORTE_SUCCESS != rc) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
#endif
                local_cellid = 0;
                need_cellid = false;
            }

            rds_item->cellid      = local_cellid;
            ras_item->node_cellid = local_cellid;

            new_attr = OBJ_NEW(orte_rds_cell_attr_t);
            if (NULL == new_attr) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.key          = strdup(ORTE_RDS_NAME);
            new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
            if (NULL == new_attr->keyval.value) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.value->type   = ORTE_STRING;
            new_attr->keyval.value->data   = strdup(ras_item->node_name);
            opal_list_append(&(rds_item->attributes), &new_attr->super);

            new_attr = OBJ_NEW(orte_rds_cell_attr_t);
            if (NULL == new_attr) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.key          = strdup(ORTE_CELLID_KEY);
            new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
            if (NULL == new_attr->keyval.value) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.value->type   = ORTE_CELLID;
            if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(&(rds_item->attributes), &new_attr->super);

            opal_list_append(&rds_updates, &rds_item->super);
        }

        /* Insert the new node into the RDS */
        rc = orte_rds.store_resource(&rds_updates);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }

        /* Then the RAS, since we can assume that any
         * resources listed in the hostfile have been
         * already allocated for our use.
         */
        rc = orte_ras_base_node_insert(&updates);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }
        
        /* and now, indicate that ORTE should override any oversubscribed conditions
         * based on local hardware limits since the user (a) might not have
         * provided us any info on the #slots for a node, and (b) the user
         * might have been wrong! If we don't check the number of local physical
         * processors, then we could be too aggressive on our sched_yield setting
         * and cause performance problems.
         */
        rc = orte_ras_base_set_oversubscribe_override(job);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }
    }

cleanup:
    if (NULL != mca_rds_hostfile_component.path) {
        free(mca_rds_hostfile_component.path);
        mca_rds_hostfile_component.path = NULL;
    }

    while(NULL != (item = opal_list_remove_first(&existing))) {
        OBJ_RELEASE(item);
    }

    while(NULL != (item = opal_list_remove_first(&updates))) {
        OBJ_RELEASE(item);
    }

    while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) {
        while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) {
            OBJ_RELEASE(new_attr);
        }
        OBJ_RELEASE(rds_item);
    }

    OBJ_DESTRUCT(&existing);
    OBJ_DESTRUCT(&updates);
    OBJ_DESTRUCT(&rds_updates);

    return rc;
}
Ejemplo n.º 15
0
/**
 * Run a user-level debugger
 */
void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line,
                       int argc, char *argv[], int num_procs)
{
    int i, id;
    char **new_argv = NULL;
    char *value, **lines, *env_name;

    /* Get the orte_base_debug MCA parameter and search for a debugger
       that can run */
    
    id = mca_base_param_find("orte", NULL, "base_user_debugger");
    if (id < 0) {
        orte_show_help("help-orterun.txt", "debugger-mca-param-not-found", 
                       true);
        exit(1);
    }
    value = NULL;
    mca_base_param_lookup_string(id, &value);
    if (NULL == value) {
        orte_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty",
                       true);
        exit(1);
    }

    /* Look through all the values in the MCA param */

    lines = opal_argv_split(value, ':');
    free(value);
    for (i = 0; NULL != lines[i]; ++i) {
        if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv, 
                                    &new_argv, num_procs)) {
            break;
        }
    }

    /* If we didn't find one, abort */

    if (NULL == lines[i]) {
        orte_show_help("help-orterun.txt", "debugger-not-found", true);
        exit(1);
    }
    opal_argv_free(lines);

    /* We found one */
    
    /* cleanup the MPIR arrays in case the debugger doesn't set them */
    memset((char*)MPIR_executable_path, 0, MPIR_MAX_PATH_LENGTH);
    memset((char*)MPIR_server_arguments, 0, MPIR_MAX_ARG_LENGTH);
    
    /* Set an MCA param so that everyone knows that they are being
       launched under a debugger; not all debuggers are consistent
       about setting MPIR_being_debugged in both the launcher and the
       MPI processes */
    env_name = mca_base_param_environ_variable("orte", 
                                               "in_parallel_debugger", NULL);
    if (NULL != env_name) {
        opal_setenv(env_name, "1", true, &environ);
        free(env_name);
    }

    /* Launch the debugger */
    execvp(new_argv[0], new_argv);
    value = opal_argv_join(new_argv, ' ');
    orte_show_help("help-orterun.txt", "debugger-exec-failed",
                   true, basename, value, new_argv[0]);
    free(value);
    opal_argv_free(new_argv);
    exit(1);
}
Ejemplo n.º 16
0
int orcm_init_util(void)
{
    int ret, i;
    char *error;
    char *destdir, *tmp, *mcp, *new_mcp;

    /* Setup OPAL */
    if( ORTE_SUCCESS != (ret = opal_init(NULL, NULL)) ) {
        error = "opal_init_util";
        goto error;
    }
    /* register handler for errnum -> string conversion */
    opal_error_register("OPENRCM", ORCM_ERR_BASE, ORCM_ERR_MAX, orcm_err2str);
    /* register where the OPENRCM show_help files are located */
    if (NULL != (destdir = getenv("ORCM_DESTDIR"))) {
        asprintf(&tmp, "%s%s", destdir, ORCM_PKGHELPDIR);
    } else {
        tmp = strdup(ORCM_PKGHELPDIR);
    }
    if (ORTE_SUCCESS != (ret = opal_show_help_add_dir(tmp))) {
        error = "register show_help_dir";
        goto error;
    }
    free(tmp);
    
    /* Add ORCM's component directory into the
       mca_base_param_component_path */
    i = mca_base_param_find("mca", NULL, "component_path");
    if (i < 0) {
        ret = ORCM_ERR_NOT_FOUND;
        error = "Could not find mca_component_path";
        goto error;
    }
    mca_base_param_lookup_string(i, &mcp);
    if (NULL == mcp) {
        ret = ORCM_ERR_NOT_FOUND;
        error = "Could not find mca_component_path";
        goto error;
    }
    if (NULL != destdir) {
        asprintf(&new_mcp, "%s%s:%s", destdir, ORCM_PKGLIBDIR, mcp);
    } else {
        asprintf(&new_mcp, "%s:%s", ORCM_PKGLIBDIR, mcp);
    }
    mca_base_param_set_string(i, new_mcp);
    free(new_mcp);
    free(mcp);

    orcm_util_initialized = true;
    
    return ORCM_SUCCESS;
    
error:
    if (ORCM_ERR_SILENT != ret) {
        orte_show_help("help-openrcm-runtime.txt",
                       "orcm_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    
    return ret;
}
/*
 * Open up all directories in a given path and search for components of
 * the specified type (and possibly of a given name).
 *
 * Note that we use our own path iteration functionality (vs. ltdl's
 * lt_dladdsearchdir() functionality) because we need to look at
 * companion .ompi_info files in the same directory as the library to
 * generate dependencies, etc.  If we use the plain lt_dlopen()
 * functionality, we would not get the directory name of the file
 * finally opened in recursive dependency traversals.
 */
static void find_dyn_components(const char *path, const char *type_name, 
                                const char **name, bool include_mode,
                                opal_list_t *found_components)
{
    ltfn_data_holder_t params;
    char *path_to_use, *dir, *end;
    component_file_item_t *file;
    opal_list_item_t *cur;

    strncpy(params.type, type_name, MCA_BASE_MAX_TYPE_NAME_LEN);
    params.type[MCA_BASE_MAX_TYPE_NAME_LEN] = '\0';

    params.name[0] = '\0';
  
    /* If path is NULL, iterate over the set of directories specified by
       the MCA param mca_base_component_path.  If path is not NULL, then
       use that as the path. */
  
    if (NULL == path) {
        mca_base_param_lookup_string(mca_base_param_component_path, &path_to_use);
        if (NULL == path_to_use) {
            /* If there's no path, then there's nothing to search -- we're
               done */
            return;
        }
    } else {
        path_to_use = strdup(path);
    }
  
    /* Iterate over all the files in the directories in the path and
       make a master array of all the matching filenames that we
       find. */
  
    OBJ_CONSTRUCT(&found_files, opal_list_t);
    dir = path_to_use;
    if (NULL != dir) {
        do {
            end = strchr(dir, OPAL_ENV_SEP);
            if (NULL != end) {
                *end = '\0';
            }
            if (0 != lt_dlforeachfile(dir, save_filename, &params)) {
                break;
            }
            dir = end + 1;
        } while (NULL != end);
    }
  
    /* Iterate through all the filenames that we found.  Since one
       component may [try to] call another to be loaded, only try to load
       the UNVISITED files.  Also, ignore the return code -- basically,
       give every file one chance to try to load.  If they load, great.
       If not, great. */

    for (cur = opal_list_get_first(&found_files); 
         opal_list_get_end(&found_files) != cur;
         cur = opal_list_get_next(cur)) {
        file = (component_file_item_t *) cur;

        if( UNVISITED == file->status ) {
            bool op = true;
            file->status = CHECKING_CYCLE;

            op = use_component(include_mode, name, file->name);
            if( true == op ) {
                open_component(file, found_components);
            }
        }
    }
    

    /* So now we have a final list of loaded components.  We can free all
       the file information. */
  
    for (cur = opal_list_remove_first(&found_files); 
         NULL != cur;
         cur = opal_list_remove_first(&found_files)) {
        OBJ_RELEASE(cur);
    }

    /* All done, now let's cleanup */
    free(path_to_use);

    OBJ_DESTRUCT(&found_files);
}
Ejemplo n.º 18
0
/*
 * Open up all directories in a given path and search for components of
 * the specified type (and possibly of a given name).
 *
 * Note that we use our own path iteration functionality (vs. ltdl's
 * lt_dladdsearchdir() functionality) because we need to look at
 * companion .ompi_info files in the same directory as the library to
 * generate dependencies, etc.  If we use the plain lt_dlopen()
 * functionality, we would not get the directory name of the file
 * finally opened in recursive dependency traversals.
 */
static void find_dyn_components(const char *path, const char *type_name, 
                                const char *name,
                                opal_list_t *found_components)
{
  ltfn_data_holder_t params;
  char *path_to_use, *dir, *end, *param;
  component_file_item_t *file;
  opal_list_item_t *cur;

  strcpy(params.type, type_name);

  if (NULL == name) {
    params.name[0] = '\0';
    opal_output_verbose(40, 0, "mca: base: component_find: looking for all dynamic %s MCA components", 
                       type_name, NULL);
  } else {
    strcpy(params.name, name);
    opal_output_verbose(40, 0,
                       "mca: base: component_find: looking for dynamic %s MCA component named \"%s\"",
                       type_name, name, NULL);
  }

  /* If path is NULL, iterate over the set of directories specified by
     the MCA param mca_base_component_path.  If path is not NULL, then
     use that as the path. */

  param = NULL;
  if (NULL == path) {
    mca_base_param_lookup_string(mca_base_param_component_path, &param);
    if (NULL == param) {
      /* If there's no path, then there's nothing to search -- we're
         done */
      return;
    } else {
      path_to_use = strdup(param);
    }
  } else {
    path_to_use = strdup(path);
  }

  /* Iterate over all the files in the directories in the path and
     make a master array of all the matching filenames that we
     find. */

  OBJ_CONSTRUCT(&found_files, opal_list_t);
  dir = path_to_use;
  if (NULL != dir) {
    do {
      end = strchr(dir, OPAL_ENV_SEP);
      if (NULL != end) {
        *end = '\0';
      }
      if (0 != lt_dlforeachfile(dir, save_filename, &params)) {
        break;
      }
      dir = end + 1;
    } while (NULL != end);
  }

  /* Iterate through all the filenames that we found.  Since one
     component may [try to] call another to be loaded, only try to load
     the UNVISITED files.  Also, ignore the return code -- basically,
     give every file one chance to try to load.  If they load, great.
     If not, great. */

  for (cur = opal_list_get_first(&found_files); 
       opal_list_get_end(&found_files) != cur;
       cur = opal_list_get_next(cur)) {
    file = (component_file_item_t *) cur;
    if (UNVISITED == file->status) {
      open_component(file, found_components);
    }
  }

  /* So now we have a final list of loaded components.  We can free all
     the file information. */
  
  for (cur = opal_list_remove_first(&found_files); 
       NULL != cur;
       cur = opal_list_remove_first(&found_files)) {
    OBJ_RELEASE(cur);
  }

  /* All done */

  if (NULL != param) {
    free(param);
  }
  if (NULL != path_to_use) {
    free(path_to_use);
  }
  OBJ_DESTRUCT(&found_files);
}
Ejemplo n.º 19
0
int
orte_sds_slurm_set_name(void)
{
    int rc;
    int id;
    int vpid_start;
    int num_procs;
    char* name_string = NULL;
    int slurm_nodeid;

    /* start by getting our cellid, jobid, and vpid (which is the
       starting vpid for the list of daemons) */
    id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
    mca_base_param_lookup_string(id, &name_string);

    if(name_string != NULL) {
        if (ORTE_SUCCESS != 
            (rc = orte_ns.convert_string_to_process_name(&(orte_process_info.my_name),
                                                              name_string))) {
            ORTE_ERROR_LOG(rc);
            free(name_string);
            return rc;
        }
        free(name_string);

    } else {
        orte_cellid_t cellid;
        orte_jobid_t jobid;
        orte_vpid_t vpid;
        char* cellid_string;
        char* jobid_string;
        char* vpid_string;
      
        id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL);
        mca_base_param_lookup_string(id, &cellid_string);
        if (NULL == cellid_string) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) {
            ORTE_ERROR_LOG(rc);
            return(rc);
        }
            
        id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL);
        mca_base_param_lookup_string(id, &jobid_string);
        if (NULL == jobid_string) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_jobid(&jobid, jobid_string))) {
            ORTE_ERROR_LOG(rc);
            return(rc);
        }
        
        id = mca_base_param_register_string("ns", "nds", "vpid", NULL, NULL);
        mca_base_param_lookup_string(id, &vpid_string);
        if (NULL == vpid_string) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_vpid(&vpid, vpid_string))) {
            ORTE_ERROR_LOG(rc);
            return(rc);
        }

        if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name),
                                                              cellid,
                                                              jobid,
                                                              vpid))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }

    /* fix up the base name and make it the "real" name */
    slurm_nodeid = atoi(getenv("SLURM_NODEID"));
    orte_process_info.my_name->vpid += slurm_nodeid;

    /* fix up the system info nodename to match exactly what slurm returned */
    if (NULL != orte_system_info.nodename) {
        free(orte_system_info.nodename);
    }
    orte_system_info.nodename = get_slurm_nodename(slurm_nodeid);

    id = mca_base_param_register_int("ns", "nds", "vpid_start", NULL, -1);
    mca_base_param_lookup_int(id, &vpid_start);
    if (vpid_start < 0) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }

    id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
    mca_base_param_lookup_int(id, &num_procs);
    if (num_procs < 0) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        return ORTE_ERR_NOT_FOUND;
    }
    
    orte_process_info.vpid_start = (orte_vpid_t)vpid_start;
    orte_process_info.num_procs = (size_t)num_procs;
    return ORTE_SUCCESS;
}
Ejemplo n.º 20
0
int ompi_show_all_mca_params(int32_t rank, int requested, char *nodename) {
    opal_list_t *info;
    opal_list_item_t *i;
    mca_base_param_info_t *item;
    char *value_string;
    int value_int;
    FILE *fp = NULL;
    time_t timestamp;
    mca_base_param_source_t source;
    char *src_file;
    char *src_string;
    
    if (rank != 0) {
        return OMPI_SUCCESS;
    }
    
    timestamp = time(NULL);
    
    /* Open the file if one is specified */
    if (0 != strlen(ompi_mpi_show_mca_params_file)) {
        if ( NULL == (fp = fopen(ompi_mpi_show_mca_params_file, "w")) ) {
            opal_output(0, "Unable to open file <%s> to write MCA parameters", ompi_mpi_show_mca_params_file);
            return OMPI_ERR_FILE_OPEN_FAILURE;
        }
        fprintf(fp, "#\n");
        fprintf(fp, "# This file was automatically generated on %s", ctime(&timestamp));
        fprintf(fp, "# by MPI_COMM_WORLD rank %d (out of a total of %d) on %s\n", rank, requested, nodename );
        fprintf(fp, "#\n");
    }
    
    mca_base_param_dump(&info, false);
    for (i =  opal_list_get_first(info); 
         i != opal_list_get_last(info);
         i =  opal_list_get_next(i)) {
        item = (mca_base_param_info_t*) i;

        /* If this is an internal param, don't print it */
        if (item->mbpp_internal) {
            continue;
        }
        
        /* get the source - where the param was last set */
        if (OPAL_SUCCESS != 
            mca_base_param_lookup_source(item->mbpp_index, &source, &src_file)) {
            continue;
        }
        
        /* is this a default value and we are not displaying
         * defaults, ignore this one
         */
        if (MCA_BASE_PARAM_SOURCE_DEFAULT == source && !show_default_mca_params) {
            continue;
        }
        
        /* is this a file value and we are not displaying files,
         * ignore it
         */
        if (MCA_BASE_PARAM_SOURCE_FILE == source && !show_file_mca_params) {
            continue;
        }
        
        /* is this an enviro value and we are not displaying enviros,
         * ignore it
         */
        if (MCA_BASE_PARAM_SOURCE_ENV == source && !show_enviro_mca_params) {
            continue;
        }
        
        /* is this an API value and we are not displaying APIs,
         * ignore it
         */
        if (MCA_BASE_PARAM_SOURCE_OVERRIDE == source && !show_override_mca_params) {
            continue;
        }
        
        /* Get the parameter name, and convert it to a printable string */
        if (MCA_BASE_PARAM_TYPE_STRING == item->mbpp_type) {
            mca_base_param_lookup_string(item->mbpp_index, &value_string);
            if (NULL == value_string) {
                value_string = strdup("");
            }
        } else {
            mca_base_param_lookup_int(item->mbpp_index, &value_int);
            asprintf(&value_string, "%d", value_int);
        }
        
        switch(source) {
            case MCA_BASE_PARAM_SOURCE_DEFAULT:
                src_string = "default value";
                break;
            case MCA_BASE_PARAM_SOURCE_ENV:
                src_string = "environment";
                break;
            case MCA_BASE_PARAM_SOURCE_FILE:
                src_string = "file";
                break;
            case MCA_BASE_PARAM_SOURCE_OVERRIDE:
                src_string = "API override";
                break;
            default:
                src_string = NULL;
                break;
        }
        
        /* Print the parameter */
        if (0 != strlen(ompi_mpi_show_mca_params_file)) {
            if (NULL == src_file) {
                fprintf(fp, "%s=%s (%s)\n", item->mbpp_full_name, value_string,
                        (NULL != src_string ? src_string : "unknown"));
            } else {
                fprintf(fp, "%s=%s (%s:%s)\n", item->mbpp_full_name, value_string,
                        (NULL != src_string ? src_string : "unknown"), src_file);
            }
        } else {
            if (NULL == src_file) {
                opal_output(0, "%s=%s (%s)\n", item->mbpp_full_name, value_string,
                            (NULL != src_string ? src_string : "unknown"));
            } else {
                opal_output(0, "%s=%s (%s:%s)\n", item->mbpp_full_name, value_string,
                            (NULL != src_string ? src_string : "unknown"), src_file);
            }
        }
        
        free(value_string);
    }
    
    /* Close file, cleanup allocated memory*/
    if (0 != strlen(ompi_mpi_show_mca_params_file)) {
        fclose(fp);
    }
    mca_base_param_dump_release(info);
    
    return OMPI_SUCCESS;
}
int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file,
                                mca_base_component_t *preferred) 
{
    int priority; 
    int best_priority; 
    opal_list_item_t *item; 
    opal_list_item_t *next_item; 
    char *names, **name_array;
    int num_names;
    mca_base_component_priority_list_item_t *cpli;
    mca_fcoll_base_component_t *component; 
    mca_fcoll_base_component_t *best_component;
    mca_fcoll_base_module_t *module; 
    opal_list_t queried;
    queried_module_t *om;
    opal_list_t *selectable;
    char *str;
    int err = MPI_SUCCESS;
    int i;
    bool was_selectable_constructed = false;

    /* Check and see if a preferred component was provided. If it was
     provided then it should be used (if possible) */
    if (NULL != preferred) {
         
        /* We have a preferred component. Check if it is available
           and if so, whether it wants to run */
         
         str = &(preferred->mca_component_name[0]);
         
         opal_output_verbose(10, mca_fcoll_base_output,
                             "fcoll:base:file_select: Checking preferred component: %s",
                             str);
         
         /* query the component for its priority and get its module 
            structure. This is necessary to proceed */
         
         component = (mca_fcoll_base_component_t *)preferred;
         module = component->fcollm_file_query (file, &priority);
         if (NULL != module && 
             NULL != module->fcoll_module_init) {

             /* this query seems to have returned something legitimate
              * and we can now go ahead and initialize the
              * file with it * but first, the functions which
              * are null need to be filled in */

             /*fill_null_pointers (module);*/
             file->f_fcoll = module;
             file->f_fcoll_component = preferred;

             return module->fcoll_module_init(file);
         } 
            /* His preferred component is present, but is unable to
             * run. This is not a good sign. We should try selecting
             * some other component We let it fall through and select
             * from the list of available components
             */
     } /*end of selection for preferred component */

    /*
     * We fall till here if one of the two things happened:
     * 1. The preferred component was provided but for some reason was
     * not able to be selected
     * 2. No preferred component was provided
     *
     * All we need to do is to go through the list of available
     * components and find the one which has the highest priority and
     * use that for this file
     */ 

    /* Check if anything was requested by means on the name parameters */
    names = NULL;
    mca_base_param_lookup_string (mca_fcoll_base_param, &names);

    if (NULL != names && 0 < strlen(names)) {
        name_array = opal_argv_split (names, ',');
        num_names = opal_argv_count (name_array);

        opal_output_verbose(10, mca_fcoll_base_output,
                            "fcoll:base:file_Select: Checking all available module");

        /* since there are somethings which the mca requested through the 
           if the intersection is NULL, then we barf saying that the requested
           modules are not being available */

        selectable = OBJ_NEW(opal_list_t);
        was_selectable_constructed = true;
        
        /* go through the compoents_available list and check against the names
         * to see whether this can be added or not */

        for (item = opal_list_get_first(&mca_fcoll_base_components_available);
            item != opal_list_get_end(&mca_fcoll_base_components_available);
            item = opal_list_get_next(item)) {
            /* convert the opal_list_item_t returned into the proper type */
            cpli = (mca_base_component_priority_list_item_t *) item;
            component = (mca_fcoll_base_component_t *) cpli->super.cli_component;
            opal_output_verbose(10, mca_fcoll_base_output,
                                "select: initialising %s component %s",
                                component->fcollm_version.mca_type_name,
                                component->fcollm_version.mca_component_name);

            /* check if this name is present in the mca_base_params */
            for (i=0; i < num_names; i++) {
                if (0 == strcmp(name_array[i], component->fcollm_version.mca_component_name)) {
                    /* this is present, and should be added o the selectable list */

                    /* We need to create a seperate object to initialise this list with
                     * since we cannot have the same item in 2 lists */
                    module = component->fcollm_file_query (file, &priority);
                    if (NULL != module && 
                        NULL != module->fcoll_module_init) {

                        file->f_fcoll = module;
                        file->f_fcoll_component = (mca_base_component_t *)component;
                        return module->fcoll_module_init(file);
                    }

                    /*
                    selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t);
                    *selectable_item = *cpli;
                    opal_list_append (selectable, (opal_list_item_t *)selectable_item);
                    break;*/
                }
            }
        }
        
        /* check for a NULL intersection between the available list and the 
         * list which was asked for */

        if (0 == opal_list_get_size(selectable)) {
            was_selectable_constructed = true;
            OBJ_RELEASE (selectable);
            opal_output_verbose (10, mca_fcoll_base_output,
                                 "fcoll:base:file_select: preferred modules were not available");
            return OMPI_ERROR;
        }
    } else { /* if there was no name_array, then we need to simply initialize 
                selectable to mca_fcoll_base_components_available */
        selectable = &mca_fcoll_base_components_available;
    }

    best_component = NULL;
    best_priority = -1;
    OBJ_CONSTRUCT(&queried, opal_list_t);

    for (item = opal_list_get_first(selectable);
         item != opal_list_get_end(selectable);
         item = opal_list_get_next(item)) {
       /*
        * convert the opal_list_item_t returned into the proper type
        */
       cpli = (mca_base_component_priority_list_item_t *) item;
       component = (mca_fcoll_base_component_t *) cpli->super.cli_component;
       opal_output_verbose(10, mca_fcoll_base_output,
                           "select: initialising %s component %s",
                           component->fcollm_version.mca_type_name,
                           component->fcollm_version.mca_component_name);

       /*
        * we can call the query function only if there is a function :-)
        */
       if (NULL == component->fcollm_file_query) {
          opal_output_verbose(10, mca_fcoll_base_output,
                             "select: no query, ignoring the component");
       } else {
           /*
            * call the query function and see what it returns
            */ 
           module = component->fcollm_file_query (file, &priority);

           if (NULL == module ||
               NULL == module->fcoll_module_init) {
               /*
                * query did not return any action which can be used
                */ 
               opal_output_verbose(10, mca_fcoll_base_output,
                                  "select: query returned failure");
           } else {
               opal_output_verbose(10, mca_fcoll_base_output,
                                  "select: query returned priority %d",
                                  priority);
               /* 
                * is this the best component we have found till now?
                */
               if (priority > best_priority) {
                   best_priority = priority;
                   best_component = component;
               }

               om = OBJ_NEW(queried_module_t);
               /*
                * check if we have run out of space
                */
               if (NULL == om) {
                   OBJ_DESTRUCT(&queried);
                   return OMPI_ERR_OUT_OF_RESOURCE;
               }
               om->om_component = component;
               om->om_module = module; 
               opal_list_append(&queried, (opal_list_item_t *)om); 
           } /* end else of if (NULL == module) */
       } /* end else of if (NULL == component->fcollm_init) */
    } /* end for ... end of traversal */

    /* We have to remove empty out the selectable list if the selectable 
     * list was constructed as a duplicate and not as a pointer to the
     * mca_base_components_available list. So, check and destroy */

    if (was_selectable_constructed) {

        /* remove all the items first */
        for (item = opal_list_get_first(&mca_fcoll_base_components_available);
             item != opal_list_get_end(&mca_fcoll_base_components_available);
             item = next_item) {
             next_item = opal_list_get_next(item);
             OBJ_RELEASE (item);
        }
                
        /* release the list itself */
        OBJ_RELEASE (selectable);
        was_selectable_constructed = false;
    }

    /*
     * Now we have alist of components which successfully returned
     * their module struct.  One of these components has the best
     * priority. The rest have to be comm_unqueried to counter the
     * effects of file_query'ing them. Finalize happens only on
     * components which should are initialized.
     */
    if (NULL == best_component) {
       /*
        * This typically means that there was no component which was
        * able to run properly this time. So, we need to abort
        */
        OBJ_DESTRUCT(&queried);
        return OMPI_ERROR;
    }

    /*
     * We now have a list of components which have successfully
     * returned their priorities from the query. We now have to
     * unquery() those components which have not been selected and
     * init() the component which was selected
     */ 
    for (item = opal_list_remove_first(&queried);
         NULL != item;
         item = opal_list_remove_first(&queried)) {
        om = (queried_module_t *) item;
        if (om->om_component == best_component) {
           /*
            * this is the chosen component, we have to initialise the
            * module of this component.
            *
            * ANJU: a component might not have all the functions
            * defined.  Whereever a function pointer is null in the
            * module structure we need to fill it in with the base
            * structure function pointers. This is yet to be done
            */ 

            /*
             * We don return here coz we still need to go through and
             * elease the other objects
             */

            /*fill_null_pointers (om->om_module);*/
            file->f_fcoll = om->om_module;
            err = om->om_module->fcoll_module_init(file);
            file->f_fcoll_component = (mca_base_component_t *)best_component;
            /*
            printf ("SELECTED: %s\n", best_component->fcollm_version.mca_component_name);
            */
         } else {
            /*
             * this is not the "choosen one", finalize
             */
             if (NULL != om->om_component->fcollm_file_unquery) {
                /* unquery the component only if they have some clean
                 * up job to do. Components which are queried but do
                 * not actually do anything typically do not have a
                 * unquery. Hence this check is necessary
                 */
                 (void) om->om_component->fcollm_file_unquery(file);
                 opal_output_verbose(10, mca_fcoll_base_output,
                                     "select: component %s is not selected",
                                     om->om_component->fcollm_version.mca_component_name);
               } /* end if */
          } /* if not best component */
          OBJ_RELEASE(om);
    } /* traversing through the entire list */
    
    opal_output_verbose(10, mca_fcoll_base_output,
                       "select: component %s selected",
                        best_component->fcollm_version.mca_component_name);

    OBJ_DESTRUCT(&queried);

    return err;
}
Ejemplo n.º 22
0
int mca_io_base_delete(char *filename, struct ompi_info_t *info)
{
    int err, num_names;
    char *names, **name_array;
    opal_list_t *selectable;
    opal_list_item_t *item;
    avail_io_t *avail, selected;

    /* Announce */

    opal_output_verbose(10, mca_io_base_output,
                        "io:base:delete: deleting file: %s", 
                        filename);
  
    /* See if a set of component was requested by the MCA parameter.
       Don't check for error. */

    names = NULL;
    mca_base_param_lookup_string(mca_io_base_param, &names);

    /* Compute the intersection of all of my available components with
       the components from all the other processes in this file */

    /* JMS CONTINUE HERE */

    /* See if there were any listed in the MCA parameter; parse them
       and check them all */

    err = OMPI_ERROR;
    if (NULL != names && 0 < strlen(names)) {
        name_array = opal_argv_split(names, ',');
        num_names = opal_argv_count(name_array);
        
        opal_output_verbose(10, mca_io_base_output, 
                            "io:base:delete: Checking specific modules: %s",
                            names);
        selectable = check_components(&mca_io_base_components_available, 
                                      filename, info, name_array, num_names);
        opal_argv_free(name_array);
    }

    /* Nope -- a specific [set of] component[s] was not requested.  Go
       check them all. */
  
    else {
        opal_output_verbose(10, mca_io_base_output, 
                            "io:base:delete: Checking all available modules");
        selectable = check_components(&mca_io_base_components_available, 
                                      filename, info, NULL, 0);
    }

    /* Upon return from the above, the modules list will contain the
       list of modules that returned (priority >= 0).  If we have no
       io modules available, it's an error */

    if (NULL == selectable) {
        /* There's no modules available.  Doh! */
        /* show_help */
        return OMPI_ERROR;
    }
    /* Do some kind of collective operation to find a module that
       everyone has available */
#if 1
    /* For the moment, just take the top module off the list */
    /* MSC actually take the buttom */
    item = opal_list_remove_last(selectable);
    avail = (avail_io_t *) item;
    selected = *avail;
    OBJ_RELEASE(avail);
#else
    /* JMS CONTINUE HERE */
#endif

    /* Everything left in the selectable list is therefore unwanted,
       and we call their unquery() method (because they all had
       query() invoked, but will never have init() invoked in this
       scope). */

    for (item = opal_list_remove_first(selectable); item != NULL;
         item = opal_list_remove_first(selectable)) {
        avail = (avail_io_t *) item;
        unquery(avail, filename, info);
        OBJ_RELEASE(item);
    }
    OBJ_RELEASE(selectable);

    /* Finally -- delete the file with the selected component */

    if (OMPI_SUCCESS != (err = delete_file(&selected, filename, info))) {
        return err;
    }

    /* Announce the winner */
  
    opal_output_verbose(10, mca_io_base_output,
                        "io:base:delete: Selected io component %s", 
                        selected.ai_component.v2_0_0.io_version.mca_component_name);
  
    return OMPI_SUCCESS;
}
Ejemplo n.º 23
0
void ompi_info_show_mca_params(opal_list_t *info,
                               const char *type, const char *component, 
                               bool want_internal)
{
    opal_list_item_t *i;
    mca_base_param_info_t *p;
    char *value_string, *empty = "";
    char *message, *content, *tmp;
    int value_int, j;
    mca_base_param_source_t source;
    char *src_file;
    
    for (i = opal_list_get_first(info); i != opal_list_get_last(info);
         i = opal_list_get_next(i)) {
        p = (mca_base_param_info_t*) i;
        
        if (NULL != p->mbpp_type_name && 0 == strcmp(type, p->mbpp_type_name)) {
            if (0 == strcmp(component, ompi_info_component_all) || 
                NULL == p->mbpp_component_name ||
                (NULL != p->mbpp_component_name &&
                 0 == strcmp(component, p->mbpp_component_name))) {
                
                /* Find the source of the value */
                if (OPAL_SUCCESS != 
                    mca_base_param_lookup_source(p->mbpp_index, &source, &src_file)) {
                    continue;
                }
                
                /* Make a char *for the default value.  Invoke a
                 * lookup because it may transform the char *("~/" ->
                 * "<home dir>/") or get the value from the
                 * environment, a file, etc.
                 */
                if (MCA_BASE_PARAM_TYPE_STRING == p->mbpp_type) {
                    mca_base_param_lookup_string(p->mbpp_index,
                                                 &value_string);
                    
                    /* Can't let the char *be NULL because we
                     * assign it to a std::string, below
                     */
                    if (NULL == value_string) {
                        value_string = strdup(empty);
                    }
                } else {
                    mca_base_param_lookup_int(p->mbpp_index, &value_int);
                    asprintf(&value_string, "%d", value_int);
                }
                
                /* Build up the strings to ompi_info_output. */
                
                if (ompi_info_pretty) {
                    asprintf(&message, "MCA %s", p->mbpp_type_name);
                    
                    /* Put in the real, full name (which may be
                     * different than the categorization).
                     */
                    asprintf(&content, "%s \"%s\" (%s: <%s>, data source: ",
                             p->mbpp_read_only ? "information" : "parameter",
                             p->mbpp_full_name,
                             p->mbpp_read_only ? "value" : "current value",
                             (0 == strlen(value_string)) ? "none" : value_string);
                    
                    /* Indicate where the param was set from */
                    switch(source) {
                        case MCA_BASE_PARAM_SOURCE_DEFAULT:
                            asprintf(&tmp, "%sdefault value", content);
                            free(content);
                            content = tmp;
                            break;
                        case MCA_BASE_PARAM_SOURCE_ENV:
                            asprintf(&tmp, "%senvironment or cmdline", content);
                            free(content);
                            content = tmp;
                            break;
                        case MCA_BASE_PARAM_SOURCE_FILE:
                            asprintf(&tmp, "%sfile [%s]", content, src_file);
                            free(content);
                            content = tmp;
                            break;
                        case MCA_BASE_PARAM_SOURCE_OVERRIDE:
                            asprintf(&tmp, "%sAPI override", content);
                            free(content);
                            content = tmp;
                            break;
                        default:
                            break;
                    }
                    
                    /* Is this parameter deprecated? */
                    if (p->mbpp_deprecated) {
                        asprintf(&tmp, "%s, deprecated", content);
                        free(content);
                        content = tmp;
                    }
                    
                    /* Does this parameter have any synonyms? */
                    if (p->mbpp_synonyms_len > 0) {
                        asprintf(&tmp, "%s, synonyms: ", content);
                        free(content);
                        content = tmp;
                        for (j = 0; j < p->mbpp_synonyms_len; ++j) {
                            if (j > 0) {
                                asprintf(&tmp, "%s, %s", content, p->mbpp_synonyms[j]->mbpp_full_name);
                                free(content);
                                content = tmp;
                            } else {
                                asprintf(&tmp, "%s%s", content, p->mbpp_synonyms[j]->mbpp_full_name);
                                free(content);
                                content = tmp;
                            }
                        }
                    }
                    
                    /* Is this parameter a synonym of something else? */
                    else if (NULL != p->mbpp_synonym_parent) {
                        asprintf(&tmp, "%s, synonym of: %s", content, p->mbpp_synonym_parent->mbpp_full_name);
                        free(content);
                        content = tmp;
                    }
                    asprintf(&tmp, "%s)", content);
                    free(content);
                    content = tmp;
                    ompi_info_out(message, message, content);
                    free(message);
                    free(content);
                    
                    /* If we have a help message, ompi_info_output it */
                    if (NULL != p->mbpp_help_msg) {
                        ompi_info_out("", "", p->mbpp_help_msg);
                    }
                } else {
                    /* build the message*/
                    asprintf(&tmp, "mca:%s:%s:param:%s:", p->mbpp_type_name,
                             (NULL == p->mbpp_component_name) ? "base" : p->mbpp_component_name,
                             p->mbpp_full_name);

                    /* Output the value */
                    asprintf(&message, "%svalue", tmp);
                    ompi_info_out(message, message, value_string);
                    free(message);
                    
                    /* Indicate where the param was set from */
                    
                    asprintf(&message, "%sdata_source", tmp);
                    switch(source) {
                        case MCA_BASE_PARAM_SOURCE_DEFAULT:
                            content = strdup("default value");
                            break;
                        case MCA_BASE_PARAM_SOURCE_ENV:
                            content = strdup("environment-cmdline");
                            break;
                        case MCA_BASE_PARAM_SOURCE_FILE:
                            asprintf(&content, "file: %s", src_file);
                            break;
                        case MCA_BASE_PARAM_SOURCE_OVERRIDE:
                            content = strdup("API override");
                            break;
                        default:
                            break;
                    }
                    ompi_info_out(message, message, content);
                    free(message);
                    free(content);
                    
                    /* Output whether it's read only or writable */
                    
                    asprintf(&message, "%sstatus", tmp);
                    content = p->mbpp_read_only ? "read-only" : "writable";
                    ompi_info_out(message, message, content);
                    free(message);
                    
                    /* If it has a help message, ompi_info_output that */
                    
                    if (NULL != p->mbpp_help_msg) {
                        asprintf(&message, "%shelp", tmp);
                        content = p->mbpp_help_msg;
                        ompi_info_out(message, message, content);
                        free(message);
                    }
                    
                    /* Is this parameter deprecated? */
                    asprintf(&message, "%sdeprecated", tmp);
                    content = p->mbpp_deprecated ? "yes" : "no";
                    ompi_info_out(message, message, content);
                    free(message);
                    
                    /* Does this parameter have any synonyms? */
                    if (p->mbpp_synonyms_len > 0) {
                        for (j = 0; j < p->mbpp_synonyms_len; ++j) {
                            asprintf(&message, "%ssynonym:name", tmp);
                            content = p->mbpp_synonyms[j]->mbpp_full_name;
                            ompi_info_out(message, message, content);
                            free(message);
                        }
                    }
                    
                    /* Is this parameter a synonym of something else? */
                    else if (NULL != p->mbpp_synonym_parent) {
                        asprintf(&message, "%ssynonym_of:name", tmp);
                        content = p->mbpp_synonym_parent->mbpp_full_name;
                        ompi_info_out(message, message, content);
                        free(message);
                    }
                }
                
                /* If we allocated the string, then free it */
                
                if (NULL != value_string) {
                    free(value_string);
                }
            }
        }
    }
}
Ejemplo n.º 24
0
int mca_bml_r2_ft_event(int state)
{
    static bool first_continue_pass = false;
    ompi_proc_t** procs = NULL;
    size_t num_procs;
    size_t btl_idx;
    int ret, p;
    int loc_state;
    int param_type = -1;
    char *param_list = NULL;

    if(OPAL_CRS_CHECKPOINT == state) {
        /* Do nothing for now */
    }
    else if(OPAL_CRS_CONTINUE == state) {
        first_continue_pass = !first_continue_pass;

        /* Since nothing in Checkpoint, we are fine here (unless required by BTL) */
        if( ompi_cr_continue_like_restart && !first_continue_pass) {
            procs = ompi_proc_all(&num_procs);
            if(NULL == procs) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
        }
    }
    else if(OPAL_CRS_RESTART_PRE == state ) {
        /* Nothing here */
    }
    else if(OPAL_CRS_RESTART == state ) {
        procs = ompi_proc_all(&num_procs);
        if(NULL == procs) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }

    /* Never call the ft_event functions attached to the BTLs on the second
     * pass of RESTART since on the first pass they were unloaded and therefore
     * no longer exist.
     */
    if( OPAL_CRS_RESTART != state ) {
        if( OPAL_CRS_CONTINUE == state && !first_continue_pass ) {
            ;
        } else {
            /* Since we only ever call into the BTLs once during the first restart
             * pass, just lie to them on this pass for a bit of local clarity.
             */
            if( OPAL_CRS_RESTART_PRE == state ) {
                loc_state = OPAL_CRS_RESTART;
            } else {
                loc_state = state;
            }

            /*
             * Call ft_event in:
             * - BTL modules
             * - MPool modules
             *
             * These should be cleaning out stale state, and memory references in 
             * preparation for being shut down.
             */
            for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) {
                /*
                 * Notify Mpool
                 */
                if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool &&
                    NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event ) {
                    opal_output_verbose(10, ompi_cr_output,
                                        "bml:r2: ft_event: Notify the %s MPool.\n",
                                        (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_component->mpool_version.mca_component_name);
                    if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event(loc_state) ) ) {
                        continue;
                    }
                }

                /*
                 * Notify BTL
                 */
                if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) {
                    opal_output_verbose(10, ompi_cr_output,
                                        "bml:r2: ft_event: Notify the %s BTL.\n",
                                        (mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name);
                    if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(loc_state) ) ) {
                        continue;
                    }
                }
            }
        } /* OPAL_CRS_CONTINUE == state && !first_continue_pass */
    }
    
    if(OPAL_CRS_CHECKPOINT == state) {
        ;
    }
    else if(OPAL_CRS_CONTINUE == state) {
        /* Matches OPAL_CRS_RESTART_PRE */
        if( ompi_cr_continue_like_restart && first_continue_pass) {
            if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) {
                opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n");
                return ret;
            }
        }
        /* Matches OPAL_CRS_RESTART */
        else if( ompi_cr_continue_like_restart && !first_continue_pass ) {
            /*
             * Barrier to make all processes have been successfully restarted before
             * we try to remove some restart only files.
             */
            if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) {
                opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret);
                return ret;
            }

            opal_output_verbose(10, ompi_cr_output,
                                "bml:r2: ft_event(Restart): Cleanup restart files\n");
            opal_crs_base_cleanup_flush();

            /*
             * Re-open the BTL framework to get the full list of components.
             */
            if( OMPI_SUCCESS != (ret = mca_btl_base_open()) ) {
                opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n");
                return ret;
            }

            /*
             * Re-select the BTL components/modules
             * This will cause the BTL components to discover the available
             * network options on this machine, and post proper modex informaiton.
             */
            if( OMPI_SUCCESS != (ret = mca_btl_base_select(OMPI_ENABLE_PROGRESS_THREADS,
                                                           OMPI_ENABLE_MPI_THREADS) ) ) {
                opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n");
                return ret;
            }

            /*
             * Clear some structures so we can properly repopulate them
             */
            mca_bml_r2.btls_added = false;

            for(p = 0; p < (int)num_procs; ++p) {
                if( NULL != procs[p]->proc_bml) {
                    OBJ_RELEASE(procs[p]->proc_bml);
                    procs[p]->proc_bml = NULL;
                }

                OBJ_RELEASE(procs[p]);
            }

            if( NULL != procs ) {
                free(procs);
                procs = NULL;
            }
        }
    }
    else if(OPAL_CRS_RESTART_PRE == state ) {
        opal_output_verbose(10, ompi_cr_output,
                            "bml:r2: ft_event(Restart): Finalize BML\n");

        /*
         * Finalize the BML
         * - Flush progress functions
         * - Flush module references
         * - mca_btl_base_close()
         *   Need to do this because we may have BTL components that were
         *   unloaded in the first selection that may be available now.
         *   Conversely we may have BTL components loaded now that
         *   are not available now.
         */
        if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) {
            opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n");
            return ret;
        }
    }
    else if(OPAL_CRS_RESTART == state  ) {

        /*
         * Barrier to make all processes have been successfully restarted before
         * we try to remove some restart only files.
         */
        if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) {
            opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret);
            return ret;
        }

        opal_output_verbose(10, ompi_cr_output,
                            "bml:r2: ft_event(Restart): Cleanup restart files\n");
        opal_crs_base_cleanup_flush();

        /*
         * Re-open the BTL framework to get the full list of components.
         * - but first clear the MCA value that was there
         */
        param_type = mca_base_param_find("btl", NULL, NULL);
        mca_base_param_lookup_string(param_type, &param_list);
        opal_output_verbose(11, ompi_cr_output,
                            "Restart (Previous BTL MCA): <%s>\n", param_list);
        if( NULL != param_list ) {
            free(param_list);
            param_list = NULL;
        }

        /* Deregister the old value, and refresh the file cache to grab any updates */
        mca_base_param_deregister(param_type);
        mca_base_param_recache_files(false);

        if( OMPI_SUCCESS != (ret = mca_btl_base_open()) ) {
            opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n");
            return ret;
        }

        param_type = mca_base_param_find("btl", NULL, NULL);
        mca_base_param_lookup_string(param_type, &param_list);
        opal_output_verbose(11, ompi_cr_output,
                            "Restart (New BTL MCA): <%s>\n", param_list);
        if( NULL != param_list ) {
            free(param_list);
            param_list = NULL;
        }

        /*
         * Re-select the BTL components/modules
         * This will cause the BTL components to discover the available
         * network options on this machine, and post proper modex informaiton.
         */
        if( OMPI_SUCCESS != (ret = mca_btl_base_select(OMPI_ENABLE_PROGRESS_THREADS,
                                                       OMPI_ENABLE_MPI_THREADS) ) ) {
            opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n");
            return ret;
        }

        /*
         * Clear some structures so we can properly repopulate them
         */
        mca_bml_r2.btls_added = false;

        for(p = 0; p < (int)num_procs; ++p) {
            if( NULL != procs[p]->proc_bml) {
                OBJ_RELEASE(procs[p]->proc_bml);
                procs[p]->proc_bml = NULL;
            }

            OBJ_RELEASE(procs[p]);
        }

        if( NULL != procs ) {
            free(procs);
            procs = NULL;
        }
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }
    
    return OMPI_SUCCESS;
}