/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ int mca_btl_base_open(void) { int i; if( ++mca_btl_base_already_opened > 1 ) return OMPI_SUCCESS; /* Verbose output */ mca_base_param_reg_int_name("btl", "base_verbose", "Verbosity level of the BTL framework", false, false, 0, &mca_btl_base_verbose); mca_btl_base_output = opal_output_open(NULL); opal_output_set_verbosity(mca_btl_base_output, mca_btl_base_verbose); /* Override the per-BTL "don't run if THREAD_MULTIPLE selected" embargo? */ mca_base_param_reg_int_name("btl", "base_thread_multiple_override", "Enable BTLs that are not normally enabled when MPI_THREAD_MULTIPLE is enabled (THIS IS FOR DEVELOPERS ONLY! SHOULD NOT BE USED BY END USERS!)", true, false, 0, &i); mca_btl_base_thread_multiple_override = OPAL_INT_TO_BOOL(i); /* Open up all available components */ if (OMPI_SUCCESS != mca_base_components_open("btl", mca_btl_base_output, mca_btl_base_static_components, &mca_btl_base_components_opened, true)) { return OMPI_ERROR; } /* Initialize the list so that in mca_btl_base_close(), we can iterate over it (even if it's empty, as in the case of ompi_info) */ OBJ_CONSTRUCT(&mca_btl_base_modules_initialized, opal_list_t); /* register parameters */ mca_base_param_lookup_string( mca_base_param_register_string("btl","base","include",NULL,NULL), &mca_btl_base_include); mca_base_param_lookup_string( mca_base_param_register_string("btl","base","exclude",NULL,NULL), &mca_btl_base_exclude); mca_base_param_reg_int_name("btl", "base_warn_component_unused", "This parameter is used to turn on warning messages when certain NICs are not used", false, false, 1, &mca_btl_base_warn_component_unused); /* All done */ return OMPI_SUCCESS; }
/* * Main MCA initialization. */ int mca_base_open(void) { int param_index; char *value; opal_output_stream_t lds; char hostname[64]; if (!mca_base_opened) { mca_base_opened = true; } else { return OPAL_SUCCESS; } /* Register some params */ #if OMPI_WANT_HOME_CONFIG_FILES asprintf(&value, "%s%c%s"OPAL_PATH_SEP".openmpi"OPAL_PATH_SEP"components", opal_install_dirs.pkglibdir, OPAL_ENV_SEP, opal_home_directory() ); #else # if defined(__WINDOWS__) && defined(_DEBUG) asprintf(&value, "%s/debug", opal_install_dirs.pkglibdir); # else asprintf(&value, "%s", opal_install_dirs.pkglibdir); # endif #endif mca_base_param_component_path = mca_base_param_reg_string_name("mca", "component_path", "Path where to look for Open MPI and ORTE components", false, false, value, NULL); free(value); param_index = mca_base_param_reg_string_name("mca", "verbose", "Top-level verbosity parameter", false, false, NULL, NULL); mca_base_param_reg_int_name("mca", "component_show_load_errors", "Whether to show errors for components that failed to load or not", false, false, 1, NULL); mca_base_param_reg_int_name("mca", "component_disable_dlopen", "Whether to attempt to disable opening dynamic components or not", false, false, 0, NULL); /* What verbosity level do we want? */ mca_base_param_lookup_string(param_index, &value); memset(&lds, 0, sizeof(lds)); if (NULL != value) { parse_verbose(value, &lds); free(value); } else { set_defaults(&lds); } gethostname(hostname, 64); asprintf(&lds.lds_prefix, "[%s:%05d] ", hostname, getpid()); opal_output_reopen(0, &lds); opal_output_verbose(5, 0, "mca: base: opening components"); free(lds.lds_prefix); /* Open up the component repository */ return mca_base_component_repository_init(); }
/** * Removes the bproc directory * @code /tmp/openmpi-bproc-<user>/ @endcode and all of its contents * @retval ORTE_SUCCESS * @retval error */ static int odls_bproc_remove_dir() { char *frontend = NULL, *user = NULL, *filename = NULL; int id; /* get the username set by the bproc pls. We need to get it from here * because on many bproc systems the method we use to get the username * from the system on the backend fails and we only get the uid. */ id = mca_base_param_register_string("pls", "bproc", "username", NULL, orte_system_info.user); mca_base_param_lookup_string(id,&user); asprintf(&filename, "openmpi-bproc-%s", user ); if( NULL == filename ) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERROR; } frontend = opal_os_path(false, "tmp", filename, NULL ); free(filename); /* Always free the filename */ if (NULL == frontend) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERROR; } /* we do our best to clean up the directory tree, but we ignore errors*/ odls_bproc_delete_dir_tree(frontend); free(frontend); return ORTE_SUCCESS; }
/* * utility routine for string parameter registration */ static int reg_string(const char* param_name, const char* deprecated_param_name, const char* param_desc, const char* default_value, char **out_value, int flags) { int index; char *value; index = mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version, param_name, param_desc, false, false, default_value, &value); if (NULL != deprecated_param_name) { mca_base_param_reg_syn(index, &mca_btl_openib_component.super.btl_version, deprecated_param_name, true); } mca_base_param_lookup_string(index, &value); if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) { opal_output(0, "Bad parameter value for parameter \"%s\"", param_name); return OMPI_ERR_BAD_PARAM; } *out_value = value; return OMPI_SUCCESS; }
static int slave_set_name(void) { char *jobid_str, *procid_str; int id, rc; orte_jobid_t jobid; orte_vpid_t vpid; id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_str); if (NULL == jobid_str) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_str))) { ORTE_ERROR_LOG(rc); return(rc); } free(jobid_str); id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); mca_base_param_lookup_string(id, &procid_str); if (NULL == procid_str) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, procid_str))) { ORTE_ERROR_LOG(rc); return(rc); } free(procid_str); ORTE_PROC_MY_NAME->jobid = jobid; ORTE_PROC_MY_NAME->vpid = vpid; ORTE_EPOCH_SET(ORTE_PROC_MY_NAME->epoch,orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME)); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slave set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static inline char *mca_pml_v_param_register_string( const char* param_name, char *default_value ) { int id = mca_base_param_register_string("pml", "v", param_name, NULL, default_value); char *param_value = default_value; mca_base_param_lookup_string(id, ¶m_value); return param_value; }
static inline char* mca_btl_template_param_register_string( const char* param_name, const char* default_value) { char *param_value; int id = mca_base_param_register_string("btl","template",param_name,NULL,default_value); mca_base_param_lookup_string(id, ¶m_value); return param_value; }
/** * Run a user-level debugger */ void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line, int argc, char *argv[]) { int i, id; char **new_argv = NULL; char *value, **lines; /* Get the orte_base_debug MCA parameter and search for a debugger that can run */ id = mca_base_param_find("orte", NULL, "base_user_debugger"); if (id < 0) { opal_show_help("help-orterun.txt", "debugger-mca-param-not-found", true); exit(1); } value = NULL; mca_base_param_lookup_string(id, &value); if (NULL == value) { opal_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty", true); exit(1); } /* Look through all the values in the MCA param */ lines = opal_argv_split(value, ':'); free(value); for (i = 0; NULL != lines[i]; ++i) { if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv, &new_argv)) { break; } } /* If we didn't find one, abort */ if (NULL == lines[i]) { opal_show_help("help-orterun.txt", "debugger-not-found", true); exit(1); } opal_argv_free(lines); /* We found one */ execvp(new_argv[0], new_argv); value = opal_argv_join(new_argv, ' '); opal_show_help("help-orterun.txt", "debugger-exec-failed", true, basename, value, new_argv[0]); free(value); opal_argv_free(new_argv); exit(1); }
static int parse_requested(int mca_param, bool *include_mode, char ***requested_component_names) { int i; char *requested, *requested_orig; *requested_component_names = NULL; *include_mode = true; /* See if the user requested anything */ if (OPAL_ERROR == mca_base_param_lookup_string(mca_param, &requested)) { return OPAL_ERROR; } if (NULL == requested || 0 == strlen(requested)) { return OPAL_SUCCESS; } requested_orig = requested; /* Are we including or excluding? We only allow the negate character to be the *first* character of the value (but be nice and allow any number of negate characters in the beginning). */ while (negate == requested[0] && '\0' != requested[0]) { *include_mode = false; ++requested; } /* Double check to ensure that the user did not specify the negate character anywhere else in the value. */ i = 0; while ('\0' != requested[i]) { if (negate == requested[i]) { opal_show_help("help-mca-base.txt", "framework-param:too-many-negates", true, requested_orig); free(requested_orig); return OPAL_ERROR; } ++i; } /* Split up the value into individual component names */ *requested_component_names = opal_argv_split(requested, ','); /* All done */ free(requested_orig); return OPAL_SUCCESS; }
int orte_proc_info(void) { int id, tmp; /* all other params are set elsewhere */ id = mca_base_param_register_int("seed", NULL, NULL, NULL, orte_process_info.seed); mca_base_param_lookup_int(id, &tmp); orte_process_info.seed = OPAL_INT_TO_BOOL(tmp); /* if we are a seed, then make sure the daemon flag is NOT set so that * framework components are properly selected */ if (orte_process_info.seed) { orte_process_info.daemon = false; } id = mca_base_param_register_int("orte", "app", "num", NULL, -1); mca_base_param_lookup_int(id, &tmp); orte_process_info.app_num = tmp; id = mca_base_param_register_string("gpr", "replica", "uri", NULL, orte_process_info.gpr_replica_uri); mca_base_param_lookup_string(id, &(orte_process_info.gpr_replica_uri)); mca_base_param_set_internal(id, true); id = mca_base_param_register_string("ns", "replica", "uri", NULL, orte_process_info.ns_replica_uri); mca_base_param_lookup_string(id, &(orte_process_info.ns_replica_uri)); mca_base_param_set_internal(id, true); id = mca_base_param_register_string("tmpdir", "base", NULL, NULL, orte_process_info.tmpdir_base); mca_base_param_lookup_string(id, &(orte_process_info.tmpdir_base)); /* get the process id */ orte_process_info.pid = getpid(); return ORTE_SUCCESS; }
/** * Returns a path of the form: * @code * /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/ * @endcode * which is used to put links to the pty/pipes in * @param proc_rank the process's rank on the node * @param jobid the jobid the proc belongs to * @param app_context the application context number within the job * @retval path */ static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, orte_std_cntr_t app_context) { char *path = NULL, *user = NULL, *job = NULL; int rc; /* ensure that system info is set */ orte_sys_info(); if (NULL == orte_universe_info.name) { /* error condition */ ORTE_ERROR_LOG(ORTE_ERROR); return NULL; } rc = orte_ns.convert_jobid_to_string(&job, jobid); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return NULL; } /* get the username set by the bproc pls. We need to get it from here * because on many bproc systems the method we use to get the username * from the system on the backend fails and we only get the uid. */ rc = mca_base_param_register_string("pls", "bproc", "username", NULL, orte_system_info.user); mca_base_param_lookup_string(rc,&user); if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d", user, orte_universe_info.name, job, (int) app_context, proc_rank)) { ORTE_ERROR_LOG(ORTE_ERROR); path = NULL; } if(0 < mca_odls_bproc_component.debug) { opal_output(0, "odls bproc io setup. Path: %s\n", path); } free(user); free(job); return path; }
static int slurm_set_name(void) { int slurm_nodeid; int rc; int id; orte_jobid_t jobid; orte_vpid_t vpid; char* jobid_string; char* vpid_string; char *nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm setting name")); id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) { ORTE_ERROR_LOG(rc); return(rc); } id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL); mca_base_param_lookup_string(id, &vpid_string); if (NULL == vpid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) { ORTE_ERROR_LOG(rc); return(rc); } ORTE_PROC_MY_NAME->jobid = jobid; /* fix up the vpid and make it the "real" vpid */ if (NULL == (nodeid = getenv("SLURM_NODEID"))) { opal_output(0, "SLURM_NODEID not found - cannot define name"); return ORTE_ERR_NOT_FOUND; } slurm_nodeid = atoi(nodeid); ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* fix up the system info nodename to match exactly what slurm returned */ if (NULL != orte_process_info.nodename) { free(orte_process_info.nodename); } orte_process_info.nodename = get_slurm_nodename(slurm_nodeid); OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "ess:slurm set nodename to %s", orte_process_info.nodename)); /* get the non-name common environmental variables */ if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
/** * Function for finding and opening either all MCA components, or the one * that was specifically requested via a MCA parameter. */ int orte_ras_base_open(void) { int value, rc, param; orte_data_type_t tmp; char *requested; /* Debugging / verbose output */ orte_ras_base.ras_output = opal_output_open(NULL); mca_base_param_reg_int_name("ras", "base_verbose", "Enable debugging for the RAS framework (nonzero = enabled)", false, false, 0, &value); if (value != 0) { orte_ras_base.ras_output = opal_output_open(NULL); } else { orte_ras_base.ras_output = -1; } /* Defaults */ orte_ras_base.ras_opened_valid = false; orte_ras_base.ras_using_proxy = false; orte_ras_base.ras_available_valid = false; /** register the base system types with the DSS */ tmp = ORTE_RAS_NODE; if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ras_base_pack_node, orte_ras_base_unpack_node, (orte_dss_copy_fn_t)orte_ras_base_copy_node, (orte_dss_compare_fn_t)orte_ras_base_compare_node, (orte_dss_size_fn_t)orte_ras_base_size_node, (orte_dss_print_fn_t)orte_ras_base_print_node, (orte_dss_release_fn_t)orte_ras_base_std_obj_release, ORTE_DSS_STRUCTURED, "ORTE_RAS_NODE", &tmp))) { ORTE_ERROR_LOG(rc); return rc; } /* Some systems do not want any RAS support. In those cases, * memory consumption is also an issue. For those systems, we * avoid opening the RAS components by checking for a directive * to use the "null" component. */ param = mca_base_param_reg_string_name("ras", NULL, NULL, false, false, NULL, NULL); if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) { return ORTE_ERROR; } if (NULL != requested && 0 == strcmp(requested, "null")) { /* the user has specifically requested that we use the "null" * component. In this case, that means we do NOT open any * components, and we simply use the default module we have * already defined above */ orte_ras_base.ras_opened_valid = false; orte_ras = orte_ras_no_op; /* use the no_op module */ return ORTE_SUCCESS; } /* check for timing tests */ param = mca_base_param_reg_int_name("orte", "timing", "Request that critical timing loops be measured", false, false, 0, &value); if (value != 0) { orte_ras_base.timing = true; } else { orte_ras_base.timing = false; } /* Open up all available components */ if (ORTE_SUCCESS != mca_base_components_open("ras", orte_ras_base.ras_output, mca_ras_base_static_components, &orte_ras_base.ras_opened, true)) { return ORTE_ERROR; } /* if we are not on a HNP, select the proxy 'module' */ if (!orte_process_info.seed) { orte_ras = orte_ras_base_proxy_module; /* initialize the module */ orte_ras_base_proxy_init(&rc); orte_ras_base.ras_using_proxy = true; return ORTE_SUCCESS; } /* All done */ orte_ras_base.ras_opened_valid = true; return ORTE_SUCCESS; }
static int orte_rds_hostfile_query(orte_jobid_t job) { opal_list_t existing; opal_list_t updates, rds_updates; opal_list_item_t *item; orte_rds_cell_desc_t *rds_item; orte_rds_cell_attr_t *new_attr; orte_ras_node_t *ras_item; int rc; if (orte_rds_hostfile_queried) { /* if we have already been queried, then * our info is on the registry, so just * return. Note that this restriction * may eventually be lifted - ideally, * we might check to see if this is a * new file name and go ahead with the * query if so. */ return ORTE_SUCCESS; } orte_rds_hostfile_queried = true; OBJ_CONSTRUCT(&existing, opal_list_t); OBJ_CONSTRUCT(&updates, opal_list_t); OBJ_CONSTRUCT(&rds_updates, opal_list_t); rc = orte_ras_base_node_query(&existing); if(ORTE_SUCCESS != rc) { goto cleanup; } rc = mca_base_param_find("rds", "hostfile", "path"); mca_base_param_lookup_string(rc, &mca_rds_hostfile_component.path); rc = orte_rds_hostfile_parse(mca_rds_hostfile_component.path, &existing, &updates); if (ORTE_ERR_NOT_FOUND == rc) { if(mca_rds_hostfile_component.default_hostfile) { rc = ORTE_SUCCESS; } else { opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile", true, mca_rds_hostfile_component.path); } goto cleanup; } else if (ORTE_SUCCESS != rc) { goto cleanup; } if ( !opal_list_is_empty(&updates) ) { /* Convert RAS update list to RDS update list */ for ( ras_item = (orte_ras_node_t*)opal_list_get_first(&updates); ras_item != (orte_ras_node_t*)opal_list_get_end(&updates); ras_item = (orte_ras_node_t*)opal_list_get_next(ras_item)) { rds_item = OBJ_NEW(orte_rds_cell_desc_t); if (NULL == rds_item) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } rds_item->site = strdup("Hostfile"); rds_item->name = strdup(ras_item->node_name); if (need_cellid) { #if 0 /* JJH Repair when cellid's are fixed */ /* Create a new cellid for this hostfile */ rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } #endif local_cellid = 0; need_cellid = false; } rds_item->cellid = local_cellid; ras_item->node_cellid = local_cellid; new_attr = OBJ_NEW(orte_rds_cell_attr_t); if (NULL == new_attr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.key = strdup(ORTE_RDS_NAME); new_attr->keyval.value = OBJ_NEW(orte_data_value_t); if (NULL == new_attr->keyval.value) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.value->type = ORTE_STRING; new_attr->keyval.value->data = strdup(ras_item->node_name); opal_list_append(&(rds_item->attributes), &new_attr->super); new_attr = OBJ_NEW(orte_rds_cell_attr_t); if (NULL == new_attr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.key = strdup(ORTE_CELLID_KEY); new_attr->keyval.value = OBJ_NEW(orte_data_value_t); if (NULL == new_attr->keyval.value) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.value->type = ORTE_CELLID; if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) { ORTE_ERROR_LOG(rc); return rc; } opal_list_append(&(rds_item->attributes), &new_attr->super); opal_list_append(&rds_updates, &rds_item->super); } /* Insert the new node into the RDS */ rc = orte_rds.store_resource(&rds_updates); if (ORTE_SUCCESS != rc) { goto cleanup; } /* Then the RAS, since we can assume that any * resources listed in the hostfile have been * already allocated for our use. */ rc = orte_ras_base_node_insert(&updates); if (ORTE_SUCCESS != rc) { goto cleanup; } /* and now, indicate that ORTE should override any oversubscribed conditions * based on local hardware limits since the user (a) might not have * provided us any info on the #slots for a node, and (b) the user * might have been wrong! If we don't check the number of local physical * processors, then we could be too aggressive on our sched_yield setting * and cause performance problems. */ rc = orte_ras_base_set_oversubscribe_override(job); if (ORTE_SUCCESS != rc) { goto cleanup; } } cleanup: if (NULL != mca_rds_hostfile_component.path) { free(mca_rds_hostfile_component.path); mca_rds_hostfile_component.path = NULL; } while(NULL != (item = opal_list_remove_first(&existing))) { OBJ_RELEASE(item); } while(NULL != (item = opal_list_remove_first(&updates))) { OBJ_RELEASE(item); } while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) { while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) { OBJ_RELEASE(new_attr); } OBJ_RELEASE(rds_item); } OBJ_DESTRUCT(&existing); OBJ_DESTRUCT(&updates); OBJ_DESTRUCT(&rds_updates); return rc; }
/** * Run a user-level debugger */ void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line, int argc, char *argv[], int num_procs) { int i, id; char **new_argv = NULL; char *value, **lines, *env_name; /* Get the orte_base_debug MCA parameter and search for a debugger that can run */ id = mca_base_param_find("orte", NULL, "base_user_debugger"); if (id < 0) { orte_show_help("help-orterun.txt", "debugger-mca-param-not-found", true); exit(1); } value = NULL; mca_base_param_lookup_string(id, &value); if (NULL == value) { orte_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty", true); exit(1); } /* Look through all the values in the MCA param */ lines = opal_argv_split(value, ':'); free(value); for (i = 0; NULL != lines[i]; ++i) { if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv, &new_argv, num_procs)) { break; } } /* If we didn't find one, abort */ if (NULL == lines[i]) { orte_show_help("help-orterun.txt", "debugger-not-found", true); exit(1); } opal_argv_free(lines); /* We found one */ /* cleanup the MPIR arrays in case the debugger doesn't set them */ memset((char*)MPIR_executable_path, 0, MPIR_MAX_PATH_LENGTH); memset((char*)MPIR_server_arguments, 0, MPIR_MAX_ARG_LENGTH); /* Set an MCA param so that everyone knows that they are being launched under a debugger; not all debuggers are consistent about setting MPIR_being_debugged in both the launcher and the MPI processes */ env_name = mca_base_param_environ_variable("orte", "in_parallel_debugger", NULL); if (NULL != env_name) { opal_setenv(env_name, "1", true, &environ); free(env_name); } /* Launch the debugger */ execvp(new_argv[0], new_argv); value = opal_argv_join(new_argv, ' '); orte_show_help("help-orterun.txt", "debugger-exec-failed", true, basename, value, new_argv[0]); free(value); opal_argv_free(new_argv); exit(1); }
int orcm_init_util(void) { int ret, i; char *error; char *destdir, *tmp, *mcp, *new_mcp; /* Setup OPAL */ if( ORTE_SUCCESS != (ret = opal_init(NULL, NULL)) ) { error = "opal_init_util"; goto error; } /* register handler for errnum -> string conversion */ opal_error_register("OPENRCM", ORCM_ERR_BASE, ORCM_ERR_MAX, orcm_err2str); /* register where the OPENRCM show_help files are located */ if (NULL != (destdir = getenv("ORCM_DESTDIR"))) { asprintf(&tmp, "%s%s", destdir, ORCM_PKGHELPDIR); } else { tmp = strdup(ORCM_PKGHELPDIR); } if (ORTE_SUCCESS != (ret = opal_show_help_add_dir(tmp))) { error = "register show_help_dir"; goto error; } free(tmp); /* Add ORCM's component directory into the mca_base_param_component_path */ i = mca_base_param_find("mca", NULL, "component_path"); if (i < 0) { ret = ORCM_ERR_NOT_FOUND; error = "Could not find mca_component_path"; goto error; } mca_base_param_lookup_string(i, &mcp); if (NULL == mcp) { ret = ORCM_ERR_NOT_FOUND; error = "Could not find mca_component_path"; goto error; } if (NULL != destdir) { asprintf(&new_mcp, "%s%s:%s", destdir, ORCM_PKGLIBDIR, mcp); } else { asprintf(&new_mcp, "%s:%s", ORCM_PKGLIBDIR, mcp); } mca_base_param_set_string(i, new_mcp); free(new_mcp); free(mcp); orcm_util_initialized = true; return ORCM_SUCCESS; error: if (ORCM_ERR_SILENT != ret) { orte_show_help("help-openrcm-runtime.txt", "orcm_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
/* * Open up all directories in a given path and search for components of * the specified type (and possibly of a given name). * * Note that we use our own path iteration functionality (vs. ltdl's * lt_dladdsearchdir() functionality) because we need to look at * companion .ompi_info files in the same directory as the library to * generate dependencies, etc. If we use the plain lt_dlopen() * functionality, we would not get the directory name of the file * finally opened in recursive dependency traversals. */ static void find_dyn_components(const char *path, const char *type_name, const char **name, bool include_mode, opal_list_t *found_components) { ltfn_data_holder_t params; char *path_to_use, *dir, *end; component_file_item_t *file; opal_list_item_t *cur; strncpy(params.type, type_name, MCA_BASE_MAX_TYPE_NAME_LEN); params.type[MCA_BASE_MAX_TYPE_NAME_LEN] = '\0'; params.name[0] = '\0'; /* If path is NULL, iterate over the set of directories specified by the MCA param mca_base_component_path. If path is not NULL, then use that as the path. */ if (NULL == path) { mca_base_param_lookup_string(mca_base_param_component_path, &path_to_use); if (NULL == path_to_use) { /* If there's no path, then there's nothing to search -- we're done */ return; } } else { path_to_use = strdup(path); } /* Iterate over all the files in the directories in the path and make a master array of all the matching filenames that we find. */ OBJ_CONSTRUCT(&found_files, opal_list_t); dir = path_to_use; if (NULL != dir) { do { end = strchr(dir, OPAL_ENV_SEP); if (NULL != end) { *end = '\0'; } if (0 != lt_dlforeachfile(dir, save_filename, ¶ms)) { break; } dir = end + 1; } while (NULL != end); } /* Iterate through all the filenames that we found. Since one component may [try to] call another to be loaded, only try to load the UNVISITED files. Also, ignore the return code -- basically, give every file one chance to try to load. If they load, great. If not, great. */ for (cur = opal_list_get_first(&found_files); opal_list_get_end(&found_files) != cur; cur = opal_list_get_next(cur)) { file = (component_file_item_t *) cur; if( UNVISITED == file->status ) { bool op = true; file->status = CHECKING_CYCLE; op = use_component(include_mode, name, file->name); if( true == op ) { open_component(file, found_components); } } } /* So now we have a final list of loaded components. We can free all the file information. */ for (cur = opal_list_remove_first(&found_files); NULL != cur; cur = opal_list_remove_first(&found_files)) { OBJ_RELEASE(cur); } /* All done, now let's cleanup */ free(path_to_use); OBJ_DESTRUCT(&found_files); }
/* * Open up all directories in a given path and search for components of * the specified type (and possibly of a given name). * * Note that we use our own path iteration functionality (vs. ltdl's * lt_dladdsearchdir() functionality) because we need to look at * companion .ompi_info files in the same directory as the library to * generate dependencies, etc. If we use the plain lt_dlopen() * functionality, we would not get the directory name of the file * finally opened in recursive dependency traversals. */ static void find_dyn_components(const char *path, const char *type_name, const char *name, opal_list_t *found_components) { ltfn_data_holder_t params; char *path_to_use, *dir, *end, *param; component_file_item_t *file; opal_list_item_t *cur; strcpy(params.type, type_name); if (NULL == name) { params.name[0] = '\0'; opal_output_verbose(40, 0, "mca: base: component_find: looking for all dynamic %s MCA components", type_name, NULL); } else { strcpy(params.name, name); opal_output_verbose(40, 0, "mca: base: component_find: looking for dynamic %s MCA component named \"%s\"", type_name, name, NULL); } /* If path is NULL, iterate over the set of directories specified by the MCA param mca_base_component_path. If path is not NULL, then use that as the path. */ param = NULL; if (NULL == path) { mca_base_param_lookup_string(mca_base_param_component_path, ¶m); if (NULL == param) { /* If there's no path, then there's nothing to search -- we're done */ return; } else { path_to_use = strdup(param); } } else { path_to_use = strdup(path); } /* Iterate over all the files in the directories in the path and make a master array of all the matching filenames that we find. */ OBJ_CONSTRUCT(&found_files, opal_list_t); dir = path_to_use; if (NULL != dir) { do { end = strchr(dir, OPAL_ENV_SEP); if (NULL != end) { *end = '\0'; } if (0 != lt_dlforeachfile(dir, save_filename, ¶ms)) { break; } dir = end + 1; } while (NULL != end); } /* Iterate through all the filenames that we found. Since one component may [try to] call another to be loaded, only try to load the UNVISITED files. Also, ignore the return code -- basically, give every file one chance to try to load. If they load, great. If not, great. */ for (cur = opal_list_get_first(&found_files); opal_list_get_end(&found_files) != cur; cur = opal_list_get_next(cur)) { file = (component_file_item_t *) cur; if (UNVISITED == file->status) { open_component(file, found_components); } } /* So now we have a final list of loaded components. We can free all the file information. */ for (cur = opal_list_remove_first(&found_files); NULL != cur; cur = opal_list_remove_first(&found_files)) { OBJ_RELEASE(cur); } /* All done */ if (NULL != param) { free(param); } if (NULL != path_to_use) { free(path_to_use); } OBJ_DESTRUCT(&found_files); }
int orte_sds_slurm_set_name(void) { int rc; int id; int vpid_start; int num_procs; char* name_string = NULL; int slurm_nodeid; /* start by getting our cellid, jobid, and vpid (which is the starting vpid for the list of daemons) */ id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL); mca_base_param_lookup_string(id, &name_string); if(name_string != NULL) { if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(&(orte_process_info.my_name), name_string))) { ORTE_ERROR_LOG(rc); free(name_string); return rc; } free(name_string); } else { orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid; char* cellid_string; char* jobid_string; char* vpid_string; id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL); mca_base_param_lookup_string(id, &cellid_string); if (NULL == cellid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) { ORTE_ERROR_LOG(rc); return(rc); } id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_jobid(&jobid, jobid_string))) { ORTE_ERROR_LOG(rc); return(rc); } id = mca_base_param_register_string("ns", "nds", "vpid", NULL, NULL); mca_base_param_lookup_string(id, &vpid_string); if (NULL == vpid_string) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_vpid(&vpid, vpid_string))) { ORTE_ERROR_LOG(rc); return(rc); } if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), cellid, jobid, vpid))) { ORTE_ERROR_LOG(rc); return rc; } } /* fix up the base name and make it the "real" name */ slurm_nodeid = atoi(getenv("SLURM_NODEID")); orte_process_info.my_name->vpid += slurm_nodeid; /* fix up the system info nodename to match exactly what slurm returned */ if (NULL != orte_system_info.nodename) { free(orte_system_info.nodename); } orte_system_info.nodename = get_slurm_nodename(slurm_nodeid); id = mca_base_param_register_int("ns", "nds", "vpid_start", NULL, -1); mca_base_param_lookup_int(id, &vpid_start); if (vpid_start < 0) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1); mca_base_param_lookup_int(id, &num_procs); if (num_procs < 0) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } orte_process_info.vpid_start = (orte_vpid_t)vpid_start; orte_process_info.num_procs = (size_t)num_procs; return ORTE_SUCCESS; }
int ompi_show_all_mca_params(int32_t rank, int requested, char *nodename) { opal_list_t *info; opal_list_item_t *i; mca_base_param_info_t *item; char *value_string; int value_int; FILE *fp = NULL; time_t timestamp; mca_base_param_source_t source; char *src_file; char *src_string; if (rank != 0) { return OMPI_SUCCESS; } timestamp = time(NULL); /* Open the file if one is specified */ if (0 != strlen(ompi_mpi_show_mca_params_file)) { if ( NULL == (fp = fopen(ompi_mpi_show_mca_params_file, "w")) ) { opal_output(0, "Unable to open file <%s> to write MCA parameters", ompi_mpi_show_mca_params_file); return OMPI_ERR_FILE_OPEN_FAILURE; } fprintf(fp, "#\n"); fprintf(fp, "# This file was automatically generated on %s", ctime(×tamp)); fprintf(fp, "# by MPI_COMM_WORLD rank %d (out of a total of %d) on %s\n", rank, requested, nodename ); fprintf(fp, "#\n"); } mca_base_param_dump(&info, false); for (i = opal_list_get_first(info); i != opal_list_get_last(info); i = opal_list_get_next(i)) { item = (mca_base_param_info_t*) i; /* If this is an internal param, don't print it */ if (item->mbpp_internal) { continue; } /* get the source - where the param was last set */ if (OPAL_SUCCESS != mca_base_param_lookup_source(item->mbpp_index, &source, &src_file)) { continue; } /* is this a default value and we are not displaying * defaults, ignore this one */ if (MCA_BASE_PARAM_SOURCE_DEFAULT == source && !show_default_mca_params) { continue; } /* is this a file value and we are not displaying files, * ignore it */ if (MCA_BASE_PARAM_SOURCE_FILE == source && !show_file_mca_params) { continue; } /* is this an enviro value and we are not displaying enviros, * ignore it */ if (MCA_BASE_PARAM_SOURCE_ENV == source && !show_enviro_mca_params) { continue; } /* is this an API value and we are not displaying APIs, * ignore it */ if (MCA_BASE_PARAM_SOURCE_OVERRIDE == source && !show_override_mca_params) { continue; } /* Get the parameter name, and convert it to a printable string */ if (MCA_BASE_PARAM_TYPE_STRING == item->mbpp_type) { mca_base_param_lookup_string(item->mbpp_index, &value_string); if (NULL == value_string) { value_string = strdup(""); } } else { mca_base_param_lookup_int(item->mbpp_index, &value_int); asprintf(&value_string, "%d", value_int); } switch(source) { case MCA_BASE_PARAM_SOURCE_DEFAULT: src_string = "default value"; break; case MCA_BASE_PARAM_SOURCE_ENV: src_string = "environment"; break; case MCA_BASE_PARAM_SOURCE_FILE: src_string = "file"; break; case MCA_BASE_PARAM_SOURCE_OVERRIDE: src_string = "API override"; break; default: src_string = NULL; break; } /* Print the parameter */ if (0 != strlen(ompi_mpi_show_mca_params_file)) { if (NULL == src_file) { fprintf(fp, "%s=%s (%s)\n", item->mbpp_full_name, value_string, (NULL != src_string ? src_string : "unknown")); } else { fprintf(fp, "%s=%s (%s:%s)\n", item->mbpp_full_name, value_string, (NULL != src_string ? src_string : "unknown"), src_file); } } else { if (NULL == src_file) { opal_output(0, "%s=%s (%s)\n", item->mbpp_full_name, value_string, (NULL != src_string ? src_string : "unknown")); } else { opal_output(0, "%s=%s (%s:%s)\n", item->mbpp_full_name, value_string, (NULL != src_string ? src_string : "unknown"), src_file); } } free(value_string); } /* Close file, cleanup allocated memory*/ if (0 != strlen(ompi_mpi_show_mca_params_file)) { fclose(fp); } mca_base_param_dump_release(info); return OMPI_SUCCESS; }
int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, mca_base_component_t *preferred) { int priority; int best_priority; opal_list_item_t *item; opal_list_item_t *next_item; char *names, **name_array; int num_names; mca_base_component_priority_list_item_t *cpli; mca_fcoll_base_component_t *component; mca_fcoll_base_component_t *best_component; mca_fcoll_base_module_t *module; opal_list_t queried; queried_module_t *om; opal_list_t *selectable; char *str; int err = MPI_SUCCESS; int i; bool was_selectable_constructed = false; /* Check and see if a preferred component was provided. If it was provided then it should be used (if possible) */ if (NULL != preferred) { /* We have a preferred component. Check if it is available and if so, whether it wants to run */ str = &(preferred->mca_component_name[0]); opal_output_verbose(10, mca_fcoll_base_output, "fcoll:base:file_select: Checking preferred component: %s", str); /* query the component for its priority and get its module structure. This is necessary to proceed */ component = (mca_fcoll_base_component_t *)preferred; module = component->fcollm_file_query (file, &priority); if (NULL != module && NULL != module->fcoll_module_init) { /* this query seems to have returned something legitimate * and we can now go ahead and initialize the * file with it * but first, the functions which * are null need to be filled in */ /*fill_null_pointers (module);*/ file->f_fcoll = module; file->f_fcoll_component = preferred; return module->fcoll_module_init(file); } /* His preferred component is present, but is unable to * run. This is not a good sign. We should try selecting * some other component We let it fall through and select * from the list of available components */ } /*end of selection for preferred component */ /* * We fall till here if one of the two things happened: * 1. The preferred component was provided but for some reason was * not able to be selected * 2. No preferred component was provided * * All we need to do is to go through the list of available * components and find the one which has the highest priority and * use that for this file */ /* Check if anything was requested by means on the name parameters */ names = NULL; mca_base_param_lookup_string (mca_fcoll_base_param, &names); if (NULL != names && 0 < strlen(names)) { name_array = opal_argv_split (names, ','); num_names = opal_argv_count (name_array); opal_output_verbose(10, mca_fcoll_base_output, "fcoll:base:file_Select: Checking all available module"); /* since there are somethings which the mca requested through the if the intersection is NULL, then we barf saying that the requested modules are not being available */ selectable = OBJ_NEW(opal_list_t); was_selectable_constructed = true; /* go through the compoents_available list and check against the names * to see whether this can be added or not */ for (item = opal_list_get_first(&mca_fcoll_base_components_available); item != opal_list_get_end(&mca_fcoll_base_components_available); item = opal_list_get_next(item)) { /* convert the opal_list_item_t returned into the proper type */ cpli = (mca_base_component_priority_list_item_t *) item; component = (mca_fcoll_base_component_t *) cpli->super.cli_component; opal_output_verbose(10, mca_fcoll_base_output, "select: initialising %s component %s", component->fcollm_version.mca_type_name, component->fcollm_version.mca_component_name); /* check if this name is present in the mca_base_params */ for (i=0; i < num_names; i++) { if (0 == strcmp(name_array[i], component->fcollm_version.mca_component_name)) { /* this is present, and should be added o the selectable list */ /* We need to create a seperate object to initialise this list with * since we cannot have the same item in 2 lists */ module = component->fcollm_file_query (file, &priority); if (NULL != module && NULL != module->fcoll_module_init) { file->f_fcoll = module; file->f_fcoll_component = (mca_base_component_t *)component; return module->fcoll_module_init(file); } /* selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t); *selectable_item = *cpli; opal_list_append (selectable, (opal_list_item_t *)selectable_item); break;*/ } } } /* check for a NULL intersection between the available list and the * list which was asked for */ if (0 == opal_list_get_size(selectable)) { was_selectable_constructed = true; OBJ_RELEASE (selectable); opal_output_verbose (10, mca_fcoll_base_output, "fcoll:base:file_select: preferred modules were not available"); return OMPI_ERROR; } } else { /* if there was no name_array, then we need to simply initialize selectable to mca_fcoll_base_components_available */ selectable = &mca_fcoll_base_components_available; } best_component = NULL; best_priority = -1; OBJ_CONSTRUCT(&queried, opal_list_t); for (item = opal_list_get_first(selectable); item != opal_list_get_end(selectable); item = opal_list_get_next(item)) { /* * convert the opal_list_item_t returned into the proper type */ cpli = (mca_base_component_priority_list_item_t *) item; component = (mca_fcoll_base_component_t *) cpli->super.cli_component; opal_output_verbose(10, mca_fcoll_base_output, "select: initialising %s component %s", component->fcollm_version.mca_type_name, component->fcollm_version.mca_component_name); /* * we can call the query function only if there is a function :-) */ if (NULL == component->fcollm_file_query) { opal_output_verbose(10, mca_fcoll_base_output, "select: no query, ignoring the component"); } else { /* * call the query function and see what it returns */ module = component->fcollm_file_query (file, &priority); if (NULL == module || NULL == module->fcoll_module_init) { /* * query did not return any action which can be used */ opal_output_verbose(10, mca_fcoll_base_output, "select: query returned failure"); } else { opal_output_verbose(10, mca_fcoll_base_output, "select: query returned priority %d", priority); /* * is this the best component we have found till now? */ if (priority > best_priority) { best_priority = priority; best_component = component; } om = OBJ_NEW(queried_module_t); /* * check if we have run out of space */ if (NULL == om) { OBJ_DESTRUCT(&queried); return OMPI_ERR_OUT_OF_RESOURCE; } om->om_component = component; om->om_module = module; opal_list_append(&queried, (opal_list_item_t *)om); } /* end else of if (NULL == module) */ } /* end else of if (NULL == component->fcollm_init) */ } /* end for ... end of traversal */ /* We have to remove empty out the selectable list if the selectable * list was constructed as a duplicate and not as a pointer to the * mca_base_components_available list. So, check and destroy */ if (was_selectable_constructed) { /* remove all the items first */ for (item = opal_list_get_first(&mca_fcoll_base_components_available); item != opal_list_get_end(&mca_fcoll_base_components_available); item = next_item) { next_item = opal_list_get_next(item); OBJ_RELEASE (item); } /* release the list itself */ OBJ_RELEASE (selectable); was_selectable_constructed = false; } /* * Now we have alist of components which successfully returned * their module struct. One of these components has the best * priority. The rest have to be comm_unqueried to counter the * effects of file_query'ing them. Finalize happens only on * components which should are initialized. */ if (NULL == best_component) { /* * This typically means that there was no component which was * able to run properly this time. So, we need to abort */ OBJ_DESTRUCT(&queried); return OMPI_ERROR; } /* * We now have a list of components which have successfully * returned their priorities from the query. We now have to * unquery() those components which have not been selected and * init() the component which was selected */ for (item = opal_list_remove_first(&queried); NULL != item; item = opal_list_remove_first(&queried)) { om = (queried_module_t *) item; if (om->om_component == best_component) { /* * this is the chosen component, we have to initialise the * module of this component. * * ANJU: a component might not have all the functions * defined. Whereever a function pointer is null in the * module structure we need to fill it in with the base * structure function pointers. This is yet to be done */ /* * We don return here coz we still need to go through and * elease the other objects */ /*fill_null_pointers (om->om_module);*/ file->f_fcoll = om->om_module; err = om->om_module->fcoll_module_init(file); file->f_fcoll_component = (mca_base_component_t *)best_component; /* printf ("SELECTED: %s\n", best_component->fcollm_version.mca_component_name); */ } else { /* * this is not the "choosen one", finalize */ if (NULL != om->om_component->fcollm_file_unquery) { /* unquery the component only if they have some clean * up job to do. Components which are queried but do * not actually do anything typically do not have a * unquery. Hence this check is necessary */ (void) om->om_component->fcollm_file_unquery(file); opal_output_verbose(10, mca_fcoll_base_output, "select: component %s is not selected", om->om_component->fcollm_version.mca_component_name); } /* end if */ } /* if not best component */ OBJ_RELEASE(om); } /* traversing through the entire list */ opal_output_verbose(10, mca_fcoll_base_output, "select: component %s selected", best_component->fcollm_version.mca_component_name); OBJ_DESTRUCT(&queried); return err; }
int mca_io_base_delete(char *filename, struct ompi_info_t *info) { int err, num_names; char *names, **name_array; opal_list_t *selectable; opal_list_item_t *item; avail_io_t *avail, selected; /* Announce */ opal_output_verbose(10, mca_io_base_output, "io:base:delete: deleting file: %s", filename); /* See if a set of component was requested by the MCA parameter. Don't check for error. */ names = NULL; mca_base_param_lookup_string(mca_io_base_param, &names); /* Compute the intersection of all of my available components with the components from all the other processes in this file */ /* JMS CONTINUE HERE */ /* See if there were any listed in the MCA parameter; parse them and check them all */ err = OMPI_ERROR; if (NULL != names && 0 < strlen(names)) { name_array = opal_argv_split(names, ','); num_names = opal_argv_count(name_array); opal_output_verbose(10, mca_io_base_output, "io:base:delete: Checking specific modules: %s", names); selectable = check_components(&mca_io_base_components_available, filename, info, name_array, num_names); opal_argv_free(name_array); } /* Nope -- a specific [set of] component[s] was not requested. Go check them all. */ else { opal_output_verbose(10, mca_io_base_output, "io:base:delete: Checking all available modules"); selectable = check_components(&mca_io_base_components_available, filename, info, NULL, 0); } /* Upon return from the above, the modules list will contain the list of modules that returned (priority >= 0). If we have no io modules available, it's an error */ if (NULL == selectable) { /* There's no modules available. Doh! */ /* show_help */ return OMPI_ERROR; } /* Do some kind of collective operation to find a module that everyone has available */ #if 1 /* For the moment, just take the top module off the list */ /* MSC actually take the buttom */ item = opal_list_remove_last(selectable); avail = (avail_io_t *) item; selected = *avail; OBJ_RELEASE(avail); #else /* JMS CONTINUE HERE */ #endif /* Everything left in the selectable list is therefore unwanted, and we call their unquery() method (because they all had query() invoked, but will never have init() invoked in this scope). */ for (item = opal_list_remove_first(selectable); item != NULL; item = opal_list_remove_first(selectable)) { avail = (avail_io_t *) item; unquery(avail, filename, info); OBJ_RELEASE(item); } OBJ_RELEASE(selectable); /* Finally -- delete the file with the selected component */ if (OMPI_SUCCESS != (err = delete_file(&selected, filename, info))) { return err; } /* Announce the winner */ opal_output_verbose(10, mca_io_base_output, "io:base:delete: Selected io component %s", selected.ai_component.v2_0_0.io_version.mca_component_name); return OMPI_SUCCESS; }
void ompi_info_show_mca_params(opal_list_t *info, const char *type, const char *component, bool want_internal) { opal_list_item_t *i; mca_base_param_info_t *p; char *value_string, *empty = ""; char *message, *content, *tmp; int value_int, j; mca_base_param_source_t source; char *src_file; for (i = opal_list_get_first(info); i != opal_list_get_last(info); i = opal_list_get_next(i)) { p = (mca_base_param_info_t*) i; if (NULL != p->mbpp_type_name && 0 == strcmp(type, p->mbpp_type_name)) { if (0 == strcmp(component, ompi_info_component_all) || NULL == p->mbpp_component_name || (NULL != p->mbpp_component_name && 0 == strcmp(component, p->mbpp_component_name))) { /* Find the source of the value */ if (OPAL_SUCCESS != mca_base_param_lookup_source(p->mbpp_index, &source, &src_file)) { continue; } /* Make a char *for the default value. Invoke a * lookup because it may transform the char *("~/" -> * "<home dir>/") or get the value from the * environment, a file, etc. */ if (MCA_BASE_PARAM_TYPE_STRING == p->mbpp_type) { mca_base_param_lookup_string(p->mbpp_index, &value_string); /* Can't let the char *be NULL because we * assign it to a std::string, below */ if (NULL == value_string) { value_string = strdup(empty); } } else { mca_base_param_lookup_int(p->mbpp_index, &value_int); asprintf(&value_string, "%d", value_int); } /* Build up the strings to ompi_info_output. */ if (ompi_info_pretty) { asprintf(&message, "MCA %s", p->mbpp_type_name); /* Put in the real, full name (which may be * different than the categorization). */ asprintf(&content, "%s \"%s\" (%s: <%s>, data source: ", p->mbpp_read_only ? "information" : "parameter", p->mbpp_full_name, p->mbpp_read_only ? "value" : "current value", (0 == strlen(value_string)) ? "none" : value_string); /* Indicate where the param was set from */ switch(source) { case MCA_BASE_PARAM_SOURCE_DEFAULT: asprintf(&tmp, "%sdefault value", content); free(content); content = tmp; break; case MCA_BASE_PARAM_SOURCE_ENV: asprintf(&tmp, "%senvironment or cmdline", content); free(content); content = tmp; break; case MCA_BASE_PARAM_SOURCE_FILE: asprintf(&tmp, "%sfile [%s]", content, src_file); free(content); content = tmp; break; case MCA_BASE_PARAM_SOURCE_OVERRIDE: asprintf(&tmp, "%sAPI override", content); free(content); content = tmp; break; default: break; } /* Is this parameter deprecated? */ if (p->mbpp_deprecated) { asprintf(&tmp, "%s, deprecated", content); free(content); content = tmp; } /* Does this parameter have any synonyms? */ if (p->mbpp_synonyms_len > 0) { asprintf(&tmp, "%s, synonyms: ", content); free(content); content = tmp; for (j = 0; j < p->mbpp_synonyms_len; ++j) { if (j > 0) { asprintf(&tmp, "%s, %s", content, p->mbpp_synonyms[j]->mbpp_full_name); free(content); content = tmp; } else { asprintf(&tmp, "%s%s", content, p->mbpp_synonyms[j]->mbpp_full_name); free(content); content = tmp; } } } /* Is this parameter a synonym of something else? */ else if (NULL != p->mbpp_synonym_parent) { asprintf(&tmp, "%s, synonym of: %s", content, p->mbpp_synonym_parent->mbpp_full_name); free(content); content = tmp; } asprintf(&tmp, "%s)", content); free(content); content = tmp; ompi_info_out(message, message, content); free(message); free(content); /* If we have a help message, ompi_info_output it */ if (NULL != p->mbpp_help_msg) { ompi_info_out("", "", p->mbpp_help_msg); } } else { /* build the message*/ asprintf(&tmp, "mca:%s:%s:param:%s:", p->mbpp_type_name, (NULL == p->mbpp_component_name) ? "base" : p->mbpp_component_name, p->mbpp_full_name); /* Output the value */ asprintf(&message, "%svalue", tmp); ompi_info_out(message, message, value_string); free(message); /* Indicate where the param was set from */ asprintf(&message, "%sdata_source", tmp); switch(source) { case MCA_BASE_PARAM_SOURCE_DEFAULT: content = strdup("default value"); break; case MCA_BASE_PARAM_SOURCE_ENV: content = strdup("environment-cmdline"); break; case MCA_BASE_PARAM_SOURCE_FILE: asprintf(&content, "file: %s", src_file); break; case MCA_BASE_PARAM_SOURCE_OVERRIDE: content = strdup("API override"); break; default: break; } ompi_info_out(message, message, content); free(message); free(content); /* Output whether it's read only or writable */ asprintf(&message, "%sstatus", tmp); content = p->mbpp_read_only ? "read-only" : "writable"; ompi_info_out(message, message, content); free(message); /* If it has a help message, ompi_info_output that */ if (NULL != p->mbpp_help_msg) { asprintf(&message, "%shelp", tmp); content = p->mbpp_help_msg; ompi_info_out(message, message, content); free(message); } /* Is this parameter deprecated? */ asprintf(&message, "%sdeprecated", tmp); content = p->mbpp_deprecated ? "yes" : "no"; ompi_info_out(message, message, content); free(message); /* Does this parameter have any synonyms? */ if (p->mbpp_synonyms_len > 0) { for (j = 0; j < p->mbpp_synonyms_len; ++j) { asprintf(&message, "%ssynonym:name", tmp); content = p->mbpp_synonyms[j]->mbpp_full_name; ompi_info_out(message, message, content); free(message); } } /* Is this parameter a synonym of something else? */ else if (NULL != p->mbpp_synonym_parent) { asprintf(&message, "%ssynonym_of:name", tmp); content = p->mbpp_synonym_parent->mbpp_full_name; ompi_info_out(message, message, content); free(message); } } /* If we allocated the string, then free it */ if (NULL != value_string) { free(value_string); } } } } }
int mca_bml_r2_ft_event(int state) { static bool first_continue_pass = false; ompi_proc_t** procs = NULL; size_t num_procs; size_t btl_idx; int ret, p; int loc_state; int param_type = -1; char *param_list = NULL; if(OPAL_CRS_CHECKPOINT == state) { /* Do nothing for now */ } else if(OPAL_CRS_CONTINUE == state) { first_continue_pass = !first_continue_pass; /* Since nothing in Checkpoint, we are fine here (unless required by BTL) */ if( ompi_cr_continue_like_restart && !first_continue_pass) { procs = ompi_proc_all(&num_procs); if(NULL == procs) { return OMPI_ERR_OUT_OF_RESOURCE; } } } else if(OPAL_CRS_RESTART_PRE == state ) { /* Nothing here */ } else if(OPAL_CRS_RESTART == state ) { procs = ompi_proc_all(&num_procs); if(NULL == procs) { return OMPI_ERR_OUT_OF_RESOURCE; } } else if(OPAL_CRS_TERM == state ) { ; } else { ; } /* Never call the ft_event functions attached to the BTLs on the second * pass of RESTART since on the first pass they were unloaded and therefore * no longer exist. */ if( OPAL_CRS_RESTART != state ) { if( OPAL_CRS_CONTINUE == state && !first_continue_pass ) { ; } else { /* Since we only ever call into the BTLs once during the first restart * pass, just lie to them on this pass for a bit of local clarity. */ if( OPAL_CRS_RESTART_PRE == state ) { loc_state = OPAL_CRS_RESTART; } else { loc_state = state; } /* * Call ft_event in: * - BTL modules * - MPool modules * * These should be cleaning out stale state, and memory references in * preparation for being shut down. */ for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) { /* * Notify Mpool */ if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool && NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event ) { opal_output_verbose(10, ompi_cr_output, "bml:r2: ft_event: Notify the %s MPool.\n", (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_component->mpool_version.mca_component_name); if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event(loc_state) ) ) { continue; } } /* * Notify BTL */ if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) { opal_output_verbose(10, ompi_cr_output, "bml:r2: ft_event: Notify the %s BTL.\n", (mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name); if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(loc_state) ) ) { continue; } } } } /* OPAL_CRS_CONTINUE == state && !first_continue_pass */ } if(OPAL_CRS_CHECKPOINT == state) { ; } else if(OPAL_CRS_CONTINUE == state) { /* Matches OPAL_CRS_RESTART_PRE */ if( ompi_cr_continue_like_restart && first_continue_pass) { if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n"); return ret; } } /* Matches OPAL_CRS_RESTART */ else if( ompi_cr_continue_like_restart && !first_continue_pass ) { /* * Barrier to make all processes have been successfully restarted before * we try to remove some restart only files. */ if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) { opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret); return ret; } opal_output_verbose(10, ompi_cr_output, "bml:r2: ft_event(Restart): Cleanup restart files\n"); opal_crs_base_cleanup_flush(); /* * Re-open the BTL framework to get the full list of components. */ if( OMPI_SUCCESS != (ret = mca_btl_base_open()) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n"); return ret; } /* * Re-select the BTL components/modules * This will cause the BTL components to discover the available * network options on this machine, and post proper modex informaiton. */ if( OMPI_SUCCESS != (ret = mca_btl_base_select(OMPI_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_MPI_THREADS) ) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n"); return ret; } /* * Clear some structures so we can properly repopulate them */ mca_bml_r2.btls_added = false; for(p = 0; p < (int)num_procs; ++p) { if( NULL != procs[p]->proc_bml) { OBJ_RELEASE(procs[p]->proc_bml); procs[p]->proc_bml = NULL; } OBJ_RELEASE(procs[p]); } if( NULL != procs ) { free(procs); procs = NULL; } } } else if(OPAL_CRS_RESTART_PRE == state ) { opal_output_verbose(10, ompi_cr_output, "bml:r2: ft_event(Restart): Finalize BML\n"); /* * Finalize the BML * - Flush progress functions * - Flush module references * - mca_btl_base_close() * Need to do this because we may have BTL components that were * unloaded in the first selection that may be available now. * Conversely we may have BTL components loaded now that * are not available now. */ if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n"); return ret; } } else if(OPAL_CRS_RESTART == state ) { /* * Barrier to make all processes have been successfully restarted before * we try to remove some restart only files. */ if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) { opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret); return ret; } opal_output_verbose(10, ompi_cr_output, "bml:r2: ft_event(Restart): Cleanup restart files\n"); opal_crs_base_cleanup_flush(); /* * Re-open the BTL framework to get the full list of components. * - but first clear the MCA value that was there */ param_type = mca_base_param_find("btl", NULL, NULL); mca_base_param_lookup_string(param_type, ¶m_list); opal_output_verbose(11, ompi_cr_output, "Restart (Previous BTL MCA): <%s>\n", param_list); if( NULL != param_list ) { free(param_list); param_list = NULL; } /* Deregister the old value, and refresh the file cache to grab any updates */ mca_base_param_deregister(param_type); mca_base_param_recache_files(false); if( OMPI_SUCCESS != (ret = mca_btl_base_open()) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n"); return ret; } param_type = mca_base_param_find("btl", NULL, NULL); mca_base_param_lookup_string(param_type, ¶m_list); opal_output_verbose(11, ompi_cr_output, "Restart (New BTL MCA): <%s>\n", param_list); if( NULL != param_list ) { free(param_list); param_list = NULL; } /* * Re-select the BTL components/modules * This will cause the BTL components to discover the available * network options on this machine, and post proper modex informaiton. */ if( OMPI_SUCCESS != (ret = mca_btl_base_select(OMPI_ENABLE_PROGRESS_THREADS, OMPI_ENABLE_MPI_THREADS) ) ) { opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n"); return ret; } /* * Clear some structures so we can properly repopulate them */ mca_bml_r2.btls_added = false; for(p = 0; p < (int)num_procs; ++p) { if( NULL != procs[p]->proc_bml) { OBJ_RELEASE(procs[p]->proc_bml); procs[p]->proc_bml = NULL; } OBJ_RELEASE(procs[p]); } if( NULL != procs ) { free(procs); procs = NULL; } } else if(OPAL_CRS_TERM == state ) { ; } else { ; } return OMPI_SUCCESS; }