Esempio n. 1
0
/**
 *  Forms a complete pathname and checks it for existance and
 *  permissions
 *            
 *  Accepts:
 *      -fname File name
 *      -path  Path prefix 
 *      -mode  Target permissions which must be satisfied
 *
 *  Returns:
 *      -Full pathname of located file Success
 *      -NULL Failure
 */
char *opal_path_access(char *fname, char *path, int mode)
{
    char *fullpath = NULL;
    struct stat buf;
    
    /* Allocate space for the full pathname. */
    if (NULL == path) {
        fullpath = opal_os_path(false, fname, NULL);
    } else {
        fullpath = opal_os_path(false, path, fname, NULL);
    }
    if (NULL == fullpath)
        return NULL;
    
    /* first check to see - is this a file or a directory? We
     * only want files
     */
    if (0 != stat(fullpath, &buf)) {
        /* couldn't stat the path - obviously, this also meets the
         * existence check, if that was requested
         */
        free(fullpath);
        return NULL;
    }
    
    if (!(S_IFREG & buf.st_mode) &&
        !(S_IFLNK & buf.st_mode)) {
        /* this isn't a regular file or a symbolic link, so
         * ignore it 
         */
        free(fullpath);
        return NULL;
    }
    
    /* check the permissions */
    if ((X_OK & mode) && !(S_IXUSR & buf.st_mode)) {
        /* if they asked us to check executable permission,
         * and that isn't set, then return NULL
         */
        free(fullpath);
        return NULL;
    }
    if ((R_OK & mode) && !(S_IRUSR & buf.st_mode)) {
        /* if they asked us to check read permission,
         * and that isn't set, then return NULL
         */
        free(fullpath);
        return NULL;
    }
    if ((W_OK & mode) && !(S_IWUSR & buf.st_mode)) {
        /* if they asked us to check write permission,
         * and that isn't set, then return NULL
         */
        free(fullpath);
        return NULL;
    }
    
    /* must have met all criteria! */
    return fullpath;
}
static bool test3(void)
{
    char *out;
    struct stat buf;
    char *a[] = { "aaa", "bbb", "ccc", NULL };

    if (NULL == path_sep) {
        printf("test3 cannot be run\n");
        return(false);
    }

    out = opal_os_path(true, a[0], a[1], a[2], NULL);
    if (OPAL_SUCCESS != opal_os_dirpath_create(out, S_IRWXU)) {
        out = opal_os_path(true, a[0], a[1], a[2], NULL);
        if (0 == stat(out, &buf))
            rmdir(out);
        out = opal_os_path(true, a[0], a[1], NULL);
        if (0 == stat(out, &buf))
            rmdir(out);
        out = opal_os_path(true, a[0], NULL);
        if (0 == stat(out, &buf))
            rmdir(out);
        return(false);
    }

    free(out);

    return(true);
}
Esempio n. 3
0
/**
 * deletes the passed directory tree recursively
 * @param path the path to the base directory to delete
 */
static void
odls_bproc_delete_dir_tree(char * path)
{
    DIR *dp;
    struct dirent *ep;
    char *filenm;
    int ret;
    struct stat buf;
    dp = opendir(path);
    if (NULL == dp) {
        return;
    }

    while (NULL != (ep = readdir(dp)) ) {
        /* skip: . and ..  */
        if ((0 != strcmp(ep->d_name, ".")) && (0 != strcmp(ep->d_name, ".."))) {
            filenm = opal_os_path(false, path, ep->d_name, NULL);
            ret = stat(filenm, &buf);
            if (ret < 0 || S_ISDIR(buf.st_mode)) {
                odls_bproc_delete_dir_tree(filenm);
                free(filenm);
                continue;
            }
            unlink(filenm);
            free(filenm);
        }
    }
    closedir(dp);
    rmdir(path);
}
Esempio n. 4
0
/**
 * Try to figure out the absolute path based on the application name
 * (usually argv[0]). If the path is already absolute return a copy, if
 * it start with . look into the current directory, if not dig into
 * the $PATH.
 * In case of error or if executable was not found (as an example if
 * the application did a cwd between the start and this call), the
 * function will return NULL. Otherwise, an newly allocated string
 * will be returned.
 */
char* opal_find_absolute_path( char* app_name )
{
    char* abs_app_name;
    char cwd[OPAL_PATH_MAX], *pcwd;

    if( opal_path_is_absolute(app_name) ) { /* already absolute path */
        abs_app_name = app_name;
    } else if ( '.' == app_name[0] ||
               NULL != strchr(app_name, OPAL_PATH_SEP[0])) {
        /* the app is in the current directory or below it */
        pcwd = getcwd( cwd, OPAL_PATH_MAX );
        if( NULL == pcwd ) {
            /* too bad there is no way we can get the app absolute name */
            return NULL;
        }
        abs_app_name = opal_os_path( false, pcwd, app_name, NULL );
    } else {
        /* Otherwise try to search for the application in the PATH ... */
        abs_app_name = opal_path_findv( app_name, X_OK, NULL, NULL );
    }
    
    if( NULL != abs_app_name ) {
        char* resolved_path = (char*)malloc(OPAL_PATH_MAX);
#if !defined(__WINDOWS__)
        realpath( abs_app_name, resolved_path );
#else
#ifdef HAVE_SHLWAPI_H
		PathCanonicalize(resolved_path, abs_app_name);
#endif
#endif  /* !defined(__WINDOWS__) */
        if( abs_app_name != app_name ) free(abs_app_name);
        return resolved_path;
    }
    return NULL;
}
Esempio n. 5
0
static bool test1(void)
{
    /* Test proper action when given a prefix */

    char *prefix;

    /* see if we can create a specified path */

    clear_proc_info();

    prefix = opal_os_path(false, "tmp", NULL);
    if (ORTE_SUCCESS != orte_session_dir(true, prefix, NULL, NULL, "test-universe", NULL, NULL)) {
	fprintf(test_out, "test1 - couldn't create specified path\n");
        free(prefix);
        return(false);
    }
    /* see if it can access an existing path */

    if (ORTE_SUCCESS != orte_session_dir(false, prefix, NULL, NULL, "test-universe", NULL, NULL)) {
	fprintf(test_out, "test1 - couldn't access existing path\n");
        free(prefix);
        return(false);
    }

    orte_session_dir_finalize(orte_process_info.my_name);
    free(orte_process_info.universe_session_dir);
    free(prefix);

    return true;
}
Esempio n. 6
0
/**
 * Removes the bproc directory
 * @code /tmp/openmpi-bproc-<user>/ @endcode and all of its contents
 * @retval ORTE_SUCCESS
 * @retval error
 */
static int
odls_bproc_remove_dir()
{
    char *frontend = NULL, *user = NULL, *filename = NULL;
    int id;

    /* get the username set by the bproc pls. We need to get it from here
     * because on many bproc systems the method we use to get the username
     * from the system on the backend fails and we only get the uid. */
    id = mca_base_param_register_string("pls", "bproc", "username", NULL,
                                        orte_system_info.user);
    mca_base_param_lookup_string(id,&user);
    asprintf(&filename, "openmpi-bproc-%s", user );
    if( NULL == filename ) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return ORTE_ERROR;
    }
    frontend = opal_os_path(false, "tmp", filename, NULL );
    free(filename);  /* Always free the filename */
    if (NULL == frontend) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return ORTE_ERROR;
    }
    /* we do our best to clean up the directory tree, but we ignore errors*/
    odls_bproc_delete_dir_tree(frontend);
    free(frontend);
    return ORTE_SUCCESS;
}
Esempio n. 7
0
static bool test8(void)
{
    char *filenm[5];
    FILE *fp;
    int i;

    clear_proc_info();

    /* create test proc session directory tree */
    if (ORTE_SUCCESS != orte_session_dir(true, NULL, "localhost", NULL, "test-universe2", "test-job2", "test-proc2")) {
	return(false);
    }

    fprintf(test_out, "removing directories: %s\n\t%s\n\t%s\n",
	    orte_process_info.proc_session_dir,
	    orte_process_info.job_session_dir,
	    orte_process_info.universe_session_dir);

    /* create some files */

    filenm[0] = opal_os_path(false, orte_process_info.proc_session_dir, "dum1", NULL);
    fp = fopen(filenm[0], "w");
    fprintf(fp, "ss");
    fclose(fp);

    filenm[1] = opal_os_path(false, orte_process_info.job_session_dir, "dum2", NULL);
    fp = fopen(filenm[1], "w");
    fprintf(fp, "ss");
    fclose(fp);

    filenm[2] = opal_os_path(false, orte_process_info.universe_session_dir, "dum3", NULL);
    fp = fopen(filenm[2], "w");
    fprintf(fp, "ss");
    fclose(fp);


    if (ORTE_SUCCESS != orte_session_dir_finalize(orte_process_info.my_name)) {
	   return(false);
    }

    for (i=0; i < 3; i++) unlink(filenm[i]);
    orte_session_dir_finalize(orte_process_info.my_name);

    return true;
}
Esempio n. 8
0
/**
 * Leave ORTE.
 *
 * @retval ORTE_SUCCESS Upon success.
 * @retval ORTE_ERROR Upon failure.
 *
 * This function performs 
 */
int orte_system_finalize(void)
{
    char *contact_path;
    
    /* if I'm the seed, remove the universe contact info file */
    if (orte_process_info.seed) {
        contact_path = opal_os_path(false, orte_process_info.universe_session_dir,
                    "universe-setup.txt", NULL);
        unlink(contact_path);
        free(contact_path);
    }
    
    /* rmgr and odls close depend on wait/iof */
    orte_rmgr_base_close();
    orte_odls_base_close();
    orte_wait_finalize();
    orte_iof_base_close();

    orte_ns_base_close();
    orte_gpr_base_close();
    orte_schema_base_close();
    
    /* finalize selected modules so they can de-register
     * their receives
     */
    orte_rds_base_close();
    orte_ras_base_close();
    orte_rmaps_base_close();
    orte_pls_base_close();
    /* the errmgr close function retains the base
     * module so that error logging can continue
     */
    orte_errmgr_base_close();
    
    /* now can close the rml */
    orte_rml_base_close();
    orte_dss_close();
    
    opal_progress_finalize();

    opal_event_fini();

    orte_session_dir_finalize(orte_process_info.my_name);

    /* clean out the global structures */
    orte_sys_info_finalize();
    orte_proc_info_finalize();
    orte_univ_info_finalize();
    
    return ORTE_SUCCESS;
}
Esempio n. 9
0
static char *orte_build_job_session_dir(char *top_dir,
                                        orte_process_name_t *proc,
                                        orte_jobid_t jobid)
{
    char *jobfam = NULL;
    char *job_session_dir;

    if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return NULL;
    }

    if (ORTE_JOBID_WILDCARD != jobid) {
        char *job = NULL;

        if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            job_session_dir = NULL;
            goto out;
        }
        job_session_dir = opal_os_path(false, top_dir, jobfam, job, NULL);
        free(job);
        if (NULL == job_session_dir) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        }
    } else {
        job_session_dir = opal_os_path(false, top_dir, jobfam, NULL);
        if( NULL == job_session_dir) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        }
    }

out:
    free(jobfam);
    return job_session_dir;
}
/*
 * We do NOT call the regular C-library "abort" function, even
 * though that would have alerted us to the fact that this is
 * an abnormal termination, because it would automatically cause
 * a core file to be generated. On large systems, that can be
 * overwhelming (imagine a few thousand Gbyte-sized files hitting
                 * a shared file system simultaneously...ouch!).
 *
 * However, this causes a problem for OpenRTE as the system truly
 * needs to know that this actually IS an abnormal termination.
 * To get around the problem, we create a file in the session
 * directory - we don't need to put anything in it, though, as its
 * very existence simply alerts us that this was an abnormal
 * termination.
 *
 * The session directory finalize system will clean this file up
 * for us automagically. However, it needs to stick around long
 * enough for our local daemon to find it! So, we do NOT call
 * session_dir_finalize here!!! Someone will clean up for us.
 *
 * In some cases, however, we DON'T want to create that alert. For
 * example, if an orted detects that the HNP has died, then there
 * is truly nobody to alert! In these cases, we pass report=false
 * to prevent the abort file from being created. This allows the
 * session directory tree to cleanly be eliminated.
 */
void orte_ess_base_app_abort(int status, bool report)
{
    char *abort_file;
    int fd;
    
    /* Exit - do NOT do a normal finalize as this will very likely
     * hang the process. We are aborting due to an abnormal condition
     * that precludes normal cleanup 
     *
     * We do need to do the following bits to make sure we leave a 
     * clean environment. Taken from orte_finalize():
     * - Assume errmgr cleans up child processes before we exit.
     */
    
    /* CRS cleanup since it may have a named pipe and thread active */
    orte_cr_finalize();
    
    /* If we were asked to report this termination,
     * write an "abort" file into our session directory
     */
    if (report) {
        abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL);
        if (NULL == abort_file) {
            /* got a problem */
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            goto CLEANUP;
        }
        OPAL_OUTPUT_VERBOSE((5, orte_debug_output,
                             "%s orte_ess_app_abort: dropping abort file %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), abort_file));
        fd = open(abort_file, O_CREAT, 0600);
        if (0 < fd) close(fd);        
    }
    
CLEANUP:
    /* - Clean out the global structures 
     * (not really necessary, but good practice) */
    orte_proc_info_finalize();
    
    /* Now Exit */
    exit(status);
}
Esempio n. 11
0
/*
 * We do NOT call the regular C-library "abort" function, even
 * though that would have alerted us to the fact that this is
 * an abnormal termination, because it would automatically cause
 * a core file to be generated. On large systems, that can be
 * overwhelming (imagine a few thousand Gbyte-sized files hitting
                 * a shared file system simultaneously...ouch!).
 *
 * However, this causes a problem for OpenRTE as the system truly
 * needs to know that this actually IS an abnormal termination.
 * To get around the problem, we drop a marker in the proc-level
 * session dir. If session dir's were not allowed, then we just
 * ignore this question.
 *
 * In some cases, however, we DON'T want to create that alert. For
 * example, if an orted detects that the HNP has died, then there
 * is truly nobody to alert! In these cases, we pass report=false
 * to indicate that we don't want the marker dropped.
 */
void orte_ess_base_app_abort(int status, bool report)
{
    int fd;
    char *myfile;
    struct timespec tp = {0, 100000};

    /* Exit - do NOT do a normal finalize as this will very likely
     * hang the process. We are aborting due to an abnormal condition
     * that precludes normal cleanup 
     *
     * We do need to do the following bits to make sure we leave a 
     * clean environment. Taken from orte_finalize():
     * - Assume errmgr cleans up child processes before we exit.
     */
    
    /* CRS cleanup since it may have a named pipe and thread active */
    orte_cr_finalize();
    
    /* If we were asked to report this termination, do so.
     * Since singletons don't start an HNP unless necessary, and
     * direct-launched procs don't have daemons at all, only send
     * the message if routing is enabled as this indicates we
     * have someone to send to
     */
    if (report && orte_routing_is_enabled && orte_create_session_dirs) {
        myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL);
        fd = open(myfile, O_CREAT, S_IRUSR);
        close(fd);
        /* now introduce a short delay to allow any pending
         * messages (e.g., from a call to "show_help") to
         * have a chance to be sent */
        nanosleep(&tp, NULL);
    }
    
    /* - Clean out the global structures 
     * (not really necessary, but good practice) */
    orte_proc_info_finalize();
    
    /* Now Exit */
    _exit(status);
}
Esempio n. 12
0
static void
add_extra_includes(const char *includes, const char* includedir)
{
    int i;
    char **values = opal_argv_split(includes, ' ');

    for (i = 0 ; i < opal_argv_count(values) ; ++i) {
        char *line, *include_directory;

        include_directory = opal_os_path(false, includedir, values[i], NULL);

#if defined(__WINDOWS__)
        asprintf(&line, OPAL_INCLUDE_FLAG"\"%s\"", include_directory);
#else
        asprintf(&line, OPAL_INCLUDE_FLAG"%s", include_directory);
#endif  /* defined(__WINDOWS__) */

        opal_argv_append_nosize(&options_data[parse_options_idx].preproc_flags, line);
        free(include_directory);
        free(line);
    }
}
Esempio n. 13
0
static bool
orte_dir_check_file(const char *root, const char *path)
{
    struct stat st;
    char *fullpath;

    /*
     * Keep:
     *  - non-zero files starting with "output-"
     */
    if (0 == strncmp(path, "output-", strlen("output-"))) {
        fullpath = opal_os_path(false, &fullpath, root, path, NULL);
        stat(fullpath, &st);
        free(fullpath);
        if (0 == st.st_size) {
            return true;
        }
        return false;
    }

    return true;
}
static bool test2(void)
{
    char *tmp;
    struct stat buf;
 
    if (NULL == path_sep) {
        printf("test2 cannot be run\n");
        return(false);
    }
    tmp = opal_os_path(true, "tmp", NULL);
    if (0 != mkdir(tmp, S_IRWXU)) {
        printf("test2 could not be run - directory could not be made\n");
        return(false);
    }

    if (OPAL_SUCCESS != opal_os_dirpath_create(tmp, S_IRWXU)) {
        rmdir(tmp);
        return(false);
    }

    chmod(tmp, S_IRUSR);

    if (OPAL_SUCCESS != opal_os_dirpath_create(tmp, S_IRWXU)) {
        rmdir(tmp);
        return(false);
    }

    stat(tmp, &buf);
    if (S_IRWXU != (S_IRWXU & buf.st_mode)) {
        rmdir(tmp);
        return(false);
    }

    rmdir(tmp);

    free(tmp);
    return true;
}
Esempio n. 15
0
File: orted.c Progetto: aosm/openmpi
int main(int argc, char *argv[])
{
    int ret = 0;
    int fd;
    opal_cmd_line_t *cmd_line = NULL;
    char *log_path = NULL;
    char log_file[PATH_MAX];
    char *jobidstring;
    orte_gpr_value_t *value;
    char *segment;
    int i;
    orte_buffer_t answer;
    char *umask_str;

    /* Allow the PLS starters to pass us a umask to use, if required.
       Most starters by default can do something sane with the umask,
       but some (like TM) do not pass on the umask but instead inherit
       it form the root level process starter.  This has to happen
       before opal_init and everything else so that the couple of
       places that stash a umask end up with the correct value.  Only
       do it here (and not in orte_daemon) mainly to make it clear
       that this should only happen when starting an orted for the
       first time.  All startes I'm aware of that don't require an
       orted are smart enough to pass on a reasonable umask, so they
       wouldn't need this functionality anyway. */
    umask_str = getenv("ORTE_DAEMON_UMASK_VALUE");
    if (NULL != umask_str) {
        char *endptr;
        long mask = strtol(umask_str, &endptr, 8);
        if ((! (0 == mask && (EINVAL == errno || ERANGE == errno))) &&
            (*endptr == '\0')) {
            umask(mask);
        }
    }

    /* initialize the globals */
    memset(&orted_globals, 0, sizeof(orted_globals_t));

    /* Ensure that enough of OPAL is setup for us to be able to run */
    if (OPAL_SUCCESS != opal_init_util()) {
        fprintf(stderr, "OPAL failed to initialize -- orted aborting\n");
        exit(1);
    }

    /* save the environment for use when launching application processes */
    orted_globals.saved_environ = opal_argv_copy(environ);

    /* setup mca param system */
    mca_base_param_init();
    
    /* setup to check common command line options that just report and die */
    cmd_line = OBJ_NEW(opal_cmd_line_t);
    opal_cmd_line_create(cmd_line, orte_cmd_line_opts);
    if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false,
                                                   argc, argv))) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        opal_show_help("help-orted.txt", "orted:usage", false,
                       argv[0], args);
        free(args);
        return ret;
    }

    /* check for help request */
    if (orted_globals.help) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        opal_show_help("help-orted.txt", "orted:usage", false,
                       argv[0], args);
        free(args);
        return 1;
    }

    /* see if we were directed to separate from current session */
    if (orted_globals.set_sid) {
        setsid();
    }
    
    /* see if they want us to spin until they can connect a debugger to us */
    i=0;
    while (orted_globals.spin) {
        i++;
        if (1000 < i) i=0;        
    }
    
    /* Okay, now on to serious business! */
    
    /* Ensure the process info structure in instantiated and initialized
     * and set the daemon flag to true
     */
    orte_process_info.daemon = true;

    /*
     * If the daemon was given a name on the command line, need to set the
     * proper indicators in the environment so the name discovery service
     * can find it
     */
    if (orted_globals.name) {
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds",
                                              "env", true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds", "env", ret);
            return ret;
        }
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds_name",
                                  orted_globals.name, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds_name", orted_globals.name, ret);
            return ret;
        }
        /* the following values are meaningless to the daemon, but may have
         * been passed in anyway. we set them here because the nds_env component
         * requires that they be set
         */
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds_vpid_start",
                                  orted_globals.vpid_start, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds_vpid_start", orted_globals.vpid_start, ret);
            return ret;
        }
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds_num_procs",
                                  orted_globals.num_procs, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds_num_procs", orted_globals.num_procs, ret);
            return ret;
        }
    }
    if (orted_globals.ns_nds) {
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds",
                                               orted_globals.ns_nds, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds", "env", ret);
            return ret;
        }
    }

    /* turn on debug if debug_file is requested so output will be generated */
    if (orted_globals.debug_daemons_file) {
        orted_globals.debug_daemons = true;
    }

    /* detach from controlling terminal
     * otherwise, remain attached so output can get to us
     */
    if(orted_globals.debug == false &&
       orted_globals.debug_daemons == false &&
       orted_globals.no_daemonize == false) {
        opal_daemon_init(NULL);
    }

    /* Intialize the Open RTE */
    /* Set the flag telling orte_init that I am NOT a
     * singleton, but am "infrastructure" - prevents setting
     * up incorrect infrastructure that only a singleton would
     * require
     */
    if (ORTE_SUCCESS != (ret = orte_init(true))) {
        opal_show_help("help-orted.txt", "orted:init-failure", false,
                       "orte_init()", ret);
        return ret;
    }

    /* Set signal handlers to catch kill signals so we can properly clean up
     * after ourselves. 
     */
    opal_event_set(&term_handler, SIGTERM, OPAL_EV_SIGNAL,
                   signal_callback, NULL);
    opal_event_add(&term_handler, NULL);
    opal_event_set(&int_handler, SIGINT, OPAL_EV_SIGNAL,
                   signal_callback, NULL);
    opal_event_add(&int_handler, NULL);

    /* if requested, report my uri to the indicated pipe */
    if (orted_globals.uri_pipe > 0) {
        write(orted_globals.uri_pipe, orte_universe_info.seed_uri,
                    strlen(orte_universe_info.seed_uri)+1); /* need to add 1 to get the NULL */
        close(orted_globals.uri_pipe);
    }

    /* setup stdout/stderr */
    if (orted_globals.debug_daemons_file) {
        /* if we are debugging to a file, then send stdout/stderr to
         * the orted log file
         */

        /* get my jobid */
        if (ORTE_SUCCESS != (ret = orte_ns.get_jobid_string(&jobidstring,
                                        orte_process_info.my_name))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* define a log file name in the session directory */
        sprintf(log_file, "output-orted-%s-%s.log",
                jobidstring, orte_system_info.nodename);
        log_path = opal_os_path(false,
                                orte_process_info.tmpdir_base,
                                orte_process_info.top_session_dir,
                                log_file,
                                NULL);

        fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
        if (fd < 0) {
            /* couldn't open the file for some reason, so
             * just connect everything to /dev/null
             */
             fd = open("/dev/null", O_RDWR|O_CREAT|O_TRUNC, 0666);
        } else {
            dup2(fd, STDOUT_FILENO);
            dup2(fd, STDERR_FILENO);
            if(fd != STDOUT_FILENO && fd != STDERR_FILENO) {
               close(fd);
            }
        }
    }

    /* output a message indicating we are alive, our name, and our pid
     * for debugging purposes
     */
    if (orted_globals.debug_daemons) {
        fprintf(stderr, "Daemon [%ld,%ld,%ld] checking in as pid %ld on host %s\n",
                ORTE_NAME_ARGS(orte_process_info.my_name), (long)orte_process_info.pid,
                orte_system_info.nodename);
    }

    /* setup the thread lock and condition variables */
    OBJ_CONSTRUCT(&orted_globals.mutex, opal_mutex_t);
    OBJ_CONSTRUCT(&orted_globals.condition, opal_condition_t);

    /* register the daemon main receive functions */
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED, ORTE_RML_NON_PERSISTENT, orte_daemon_recv_pls, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_NON_PERSISTENT, orte_daemon_recv, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }

    /* check to see if I'm a bootproxy */
    if (orted_globals.bootproxy) { /* perform bootproxy-specific things */
        if (orted_globals.mpi_call_yield > 0) {
            char *var;
            var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
            opal_setenv(var, "1", true, &environ);
        }

        /* attach a subscription to the orted standard trigger so I can get
         * information on the processes I am to locally launch as soon as all
         * the orteds for this job are started.
         *
         * Once the registry gets to 2.0, we will be able to setup the
         * subscription so we only get our own launch info back. In the interim,
         * we setup the subscription so that ALL launch info for this job
         * is returned. We will then have to parse that message to get our
         * own local launch info.
         *
         * Since we have chosen this approach, we can take advantage of the
         * fact that the callback function will directly receive this data.
         * By setting up that callback function to actually perform the launch
         * based on the received data, all we have to do here is go into our
         * conditioned wait until the job completes!
         *
         * Sometimes, life can be good! :-)
         */

        /** put all this registry stuff in a compound command to limit communications */
        if (ORTE_SUCCESS != (ret = orte_gpr.begin_compound_cmd())) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* let the local launcher setup a subscription for its required data. We
         * pass the local_cb_launcher function so that this gets called back - this
         * allows us to wakeup the orted so it can exit cleanly if the callback
         * generates an error
         */
        if (ORTE_SUCCESS != (ret = orte_odls.subscribe_launch_data(orted_globals.bootproxy, orted_local_cb_launcher))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* get the job segment name */
        if (ORTE_SUCCESS != (ret = orte_schema.get_job_segment_name(&segment, orted_globals.bootproxy))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

       /** increment the orted stage gate counter */
        if (ORTE_SUCCESS != (ret = orte_gpr.create_value(&value, ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_AND,
                                                         segment, 1, 1))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
        free(segment); /* done with this now */

        value->tokens[0] = strdup(ORTE_JOB_GLOBALS);
        if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(value->keyvals[0]), ORTED_LAUNCH_STAGE_GATE_CNTR, ORTE_UNDEF, NULL))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* do the increment */
        if (ORTE_SUCCESS != (ret = orte_gpr.increment_value(value))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
        OBJ_RELEASE(value);  /* done with this now */

        /** send the compound command */
        if (ORTE_SUCCESS != (ret = orte_gpr.exec_compound_cmd())) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* setup and enter the event monitor to wait for a wakeup call */
        OPAL_THREAD_LOCK(&orted_globals.mutex);
        while (false == orted_globals.exit_condition) {
            opal_condition_wait(&orted_globals.condition, &orted_globals.mutex);
        }
        OPAL_THREAD_UNLOCK(&orted_globals.mutex);

        /* make sure our local procs are dead - but don't update their state
         * on the HNP as this may be redundant
         */
        orte_odls.kill_local_procs(ORTE_JOBID_WILDCARD, false);

        /* cleanup their session directory */
        orte_session_dir_cleanup(orted_globals.bootproxy);

        /* send an ack - we are as close to done as we can be while
         * still able to communicate
         */
        OBJ_CONSTRUCT(&answer, orte_buffer_t);
        if (0 > orte_rml.send_buffer(ORTE_PROC_MY_HNP, &answer, ORTE_RML_TAG_PLS_ORTED_ACK, 0)) {
            ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
        }
        OBJ_DESTRUCT(&answer);


        /* Finalize and clean up ourselves */
        if (ORTE_SUCCESS != (ret = orte_finalize())) {
            ORTE_ERROR_LOG(ret);
        }
        exit(ret);
    }

    /*
     *  Set my process status to "running". Note that this must be done
     *  after the rte init is completed.
     */
    if (ORTE_SUCCESS != (ret = orte_smr.set_proc_state(orte_process_info.my_name,
                                                     ORTE_PROC_STATE_RUNNING, 0))) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }

    if (orted_globals.debug_daemons) {
        opal_output(0, "[%lu,%lu,%lu] ompid: issuing callback", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

   /* go through the universe fields and see what else I need to do
     * - could be setup a virtual machine, spawn a console, etc.
     */

    if (orted_globals.debug_daemons) {
        opal_output(0, "[%lu,%lu,%lu] ompid: setting up event monitor", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

     /* setup and enter the event monitor */
    OPAL_THREAD_LOCK(&orted_globals.mutex);

    while (false == orted_globals.exit_condition) {
        opal_condition_wait(&orted_globals.condition, &orted_globals.mutex);
    }

    OPAL_THREAD_UNLOCK(&orted_globals.mutex);

    if (orted_globals.debug_daemons) {
       opal_output(0, "[%lu,%lu,%lu] orted: mutex cleared - finalizing", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

    /* cleanup */
    if (NULL != log_path) {
        unlink(log_path);
    }

    /* finalize the system */
    orte_finalize();

    if (orted_globals.debug_daemons) {
       opal_output(0, "[%lu,%lu,%lu] orted: done - exiting", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

    exit(0);
}
Esempio n. 16
0
/**
 * Initialization of data structures for running under a debugger
 * using the MPICH/TotalView parallel debugger interface. This stage
 * of initialization must occur after spawn
 * 
 * NOTE: We -always- perform this step to ensure that any debugger
 * that attaches to us post-launch of the application can get a
 * completed proctable
 */
void orte_debugger_init_after_spawn(orte_job_t *jdata)
{
    orte_proc_t *proc;
    orte_app_context_t *appctx;
    orte_vpid_t i, j;
    opal_buffer_t buf;
    orte_process_name_t rank0;
    int rc;

    if (MPIR_proctable) {
        /* already initialized */
        return;
    }

    /* fill in the proc table for the application processes */
    
    if (orte_debug_flag) {
        opal_output(0, "Info: Setting up debugger process table for applications\n");
    }
    
    MPIR_debug_state = 1;
    
    /* set the total number of processes in the job */
    MPIR_proctable_size = jdata->num_procs;
    
    /* allocate MPIR_proctable */
    MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) *
                                                     MPIR_proctable_size);
    if (MPIR_proctable == NULL) {
        opal_output(0, "Error: Out of memory\n");
        return;
    }
    
    /* initialize MPIR_proctable */
    for (j=0; j < jdata->num_procs; j++) {
        if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
            continue;
        }
        /* store this data in the location whose index
         * corresponds to the proc's rank
         */
        i = proc->name.vpid;
        if (NULL == (appctx = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx))) {
            continue;
        }
        
        MPIR_proctable[i].host_name = strdup(proc->node->name);
        if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) { 
            MPIR_proctable[i].executable_name = 
            opal_os_path( false, appctx->app, NULL ); 
        } else {
            MPIR_proctable[i].executable_name =
            opal_os_path( false, appctx->cwd, appctx->app, NULL ); 
        } 
        MPIR_proctable[i].pid = proc->pid;
    }

    if (orte_debug_flag) {
        dump();
    }

    /* if we are being launched under a debugger, then we must wait
     * for it to be ready to go and do some things to start the job
     */
    if (MPIR_being_debugged) {
        /* wait for all procs to have reported their contact info - this
         * ensures that (a) they are all into mpi_init, and (b) the system
         * has the contact info to successfully send a message to rank=0
         */
        ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
        
        (void) MPIR_Breakpoint();
        
        /* send a message to rank=0 to release it */
        OBJ_CONSTRUCT(&buf, opal_buffer_t); /* don't need anything in this */
        rank0.jobid = jdata->jobid;
        rank0.vpid = 0;
        if (0 > (rc = orte_rml.send_buffer(&rank0, &buf, ORTE_RML_TAG_DEBUGGER_RELEASE, 0))) {
            opal_output(0, "Error: could not send debugger release to MPI procs - error %s", ORTE_ERROR_NAME(rc));
        }
        OBJ_DESTRUCT(&buf);
    }
}
Esempio n. 17
0
static int discover(opal_list_t* nodelist, char *pbs_jobid)
{
    int32_t nodeid;
    orte_node_t *node;
    opal_list_item_t* item;
    FILE *fp;
    char *hostname;
    struct timeval start, stop;

    /* check for timing request - get start time if so */
    if (orte_timing) {
        gettimeofday(&start, NULL);
    }
    
    /* Ignore anything that the user already specified -- we're
       getting nodes only from TM. */

    /* TM "nodes" may actually correspond to PBS "VCPUs", which means
       there may be multiple "TM nodes" that correspond to the same
       physical node.  This doesn't really affect what we're doing
       here (we actually ignore the fact that they're duplicates --
       slightly inefficient, but no big deal); just mentioned for
       completeness... */

    /* setup the full path to the PBS file */
    filename = opal_os_path(false, mca_ras_tm_component.nodefile_dir,
                            pbs_jobid, NULL);
    fp = fopen(filename, "r");
    if (NULL == fp) {
        ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
        free(filename);
        return ORTE_ERR_FILE_OPEN_FAILURE;
    }

    /* Iterate through all the nodes and make an entry for each.  TM
       node ID's will never be duplicated, but they may end up
       resolving to the same hostname (i.e., vcpu's on a single
       host). */

    nodeid=0;
    while (NULL != (hostname = tm_getline(fp))) {

        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                             "%s ras:tm:allocate:discover: got hostname %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));

        /* Remember that TM may list the same node more than once.  So
           we have to check for duplicates. */

        for (item = opal_list_get_first(nodelist);
             opal_list_get_end(nodelist) != item;
             item = opal_list_get_next(item)) {
            node = (orte_node_t*) item;
            if (0 == strcmp(node->name, hostname)) {
                ++node->slots;

                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                     "%s ras:tm:allocate:discover: found -- bumped slots to %d",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->slots));
                
                break;
            }
        }

        /* Did we find it? */

        if (opal_list_get_end(nodelist) == item) {

            /* Nope -- didn't find it, so add a new item to the list */
            
            OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                 "%s ras:tm:allocate:discover: not found -- added to list",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            
            node = OBJ_NEW(orte_node_t);
            node->name = hostname;
            node->launch_id = nodeid;
            node->slots_inuse = 0;
            node->slots_max = 0;
            node->slots = 1;
            opal_list_append(nodelist, &node->super);
        } else {

            /* Yes, so we need to free the hostname that came back */
            free(hostname);
        }

        /* up the nodeid */
        nodeid++;
    }

    /* check for timing request - get stop time and report elapsed time if so */
    if (orte_timing) {
        gettimeofday(&stop, NULL);
        opal_output(0, "ras_tm: time to allocate is %ld usec",
                    (long int)((stop.tv_sec - start.tv_sec)*1000000 +
                               (stop.tv_usec - start.tv_usec)));
        gettimeofday(&start, NULL);
    }
    
    return ORTE_SUCCESS;
}
Esempio n. 18
0
/*
 * A job has aborted - so force cleanup of the session directory
 */
int
orte_session_dir_cleanup(orte_jobid_t jobid)
{
    int rc = ORTE_SUCCESS;
    char *tmp;
    char *job_session_dir=NULL;

    if (!orte_create_session_dirs) {
        /* didn't create them */
        return ORTE_SUCCESS;
    }
    
    /* need to setup the top_session_dir with the prefix */
    tmp = opal_os_path(false,
                       orte_process_info.tmpdir_base,
                       orte_process_info.top_session_dir, NULL);

    /* we can only blow away session directories for our job family */
    job_session_dir = orte_build_job_session_dir(tmp, ORTE_PROC_MY_NAME, jobid);
    if (NULL == job_session_dir) {
        rc = ORTE_ERR_OUT_OF_RESOURCE;
        goto CLEANUP;
    }
    
    if (ORTE_JOBID_WILDCARD != jobid) {
        opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file);
    } else {
        /* if we want the session_dir removed for ALL jobids, then
         * just recursively blow the whole session away for our job family,
         * saving only output files
         */
        opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file_output);
    }
    
    /* now attempt to eliminate the top level directory itself - this
     * will fail if anything is present, but ensures we cleanup if
     * we are the last one out
     */
    opal_os_dirpath_destroy(tmp, false, orte_dir_check_file);

    if (NULL != job_session_dir && opal_os_dirpath_is_empty(job_session_dir)) {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
    	}
    	rmdir(job_session_dir);
    } else {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
    	}
        goto CLEANUP;
    }

    if (opal_os_dirpath_is_empty(tmp)) {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
    	}
    	rmdir(tmp);
    } else {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
    	}
    }

CLEANUP:
    free(tmp);
    if (NULL != job_session_dir) free(job_session_dir);
    return rc;
}
Esempio n. 19
0
int
orte_session_dir_finalize(orte_process_name_t *proc)
{
    int rc;
    char *tmp;
    char *job_session_dir, *vpid, *proc_session_dir;

    if (!orte_create_session_dirs) {
        /* didn't create them */
        return ORTE_SUCCESS;
    }
    
    /* need to setup the top_session_dir with the prefix */
    tmp = opal_os_path(false,
                       orte_process_info.tmpdir_base,
                       orte_process_info.top_session_dir, NULL);
    
    /* define the proc and job session directories for this process */
    if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) {
        ORTE_ERROR_LOG(rc);
        free(tmp);
        return rc;
    }
    job_session_dir = orte_build_job_session_dir(tmp, proc, proc->jobid);
    if( NULL == job_session_dir) {
        free(tmp);
        free(vpid);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }
    proc_session_dir = opal_os_path( false, job_session_dir, vpid, NULL );
    if( NULL == proc_session_dir ) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        free(tmp);
        free(vpid);
        free(job_session_dir);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }
    
    opal_os_dirpath_destroy(proc_session_dir,
                            false, orte_dir_check_file);
    opal_os_dirpath_destroy(job_session_dir,
                            false, orte_dir_check_file);
    opal_os_dirpath_destroy(tmp,
                            false, orte_dir_check_file);

    if (opal_os_dirpath_is_empty(proc_session_dir)) {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
    	}
    	rmdir(proc_session_dir);
    } else {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
    	}
        goto CLEANUP;
    }

    if (opal_os_dirpath_is_empty(job_session_dir)) {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
    	}
    	rmdir(job_session_dir);
    } else {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
    	}
        goto CLEANUP;
    }

    if (opal_os_dirpath_is_empty(tmp)) {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
    	}
    	rmdir(tmp);
    } else {
    	if (orte_debug_flag) {
    	    opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
    	}
    }

CLEANUP:
    free(tmp);
    free(vpid);
    free(job_session_dir);
    free(proc_session_dir);
    return ORTE_SUCCESS;
}
Esempio n. 20
0
int
main(int argc, char *argv[])
{
    int exit_status = 0, ret, flags = 0, i;
    int exec_argc = 0, user_argc = 0;
    char **exec_argv = NULL, **user_argv = NULL;
    char *exec_command, *base_argv0 = NULL;
    bool disable_flags = true;
    bool real_flag = false;

    if (OPAL_SUCCESS != (ret = opal_init_util())) {
        return ret;
    }

    /****************************************************
     *
     * Setup compiler information
     *
     ****************************************************/

    base_argv0 = opal_basename(argv[0]);
#if defined(EXEEXT)
    if( 0 != strlen(EXEEXT) ) {
        char extension[] = EXEEXT;
        char* temp = strstr( base_argv0, extension );
        char* old_match = temp;
        while( NULL != temp ) {
            old_match = temp;
            temp = strstr( temp + 1, extension );
        }
        /* Only if there was a match of .exe, erase the last occurence of .exe */
        if ( NULL != old_match ) {
            *old_match = '\0';
        }
    }
#endif  /* defined(EXEEXT) */

    if (OPAL_SUCCESS != (ret = data_init(base_argv0))) {
        fprintf(stderr, "Error parsing data file %s: %s\n", base_argv0, opal_strerror(ret));
        return ret;
    }

    for (i = 1 ; i < argc && user_data_idx < 0 ; ++i) {
        user_data_idx = find_options_index(argv[i]);
    }
    /* if we didn't find a match, look for the NULL (base case) options */
    if (user_data_idx < 0) {
        user_data_idx = default_data_idx;
    }
    /* if we still didn't find a match, abort */
    if (user_data_idx < 0) {
        char *flat = opal_argv_join(argv, ' ');
        opal_show_help("help-opal-wrapper.txt", "no-options-support", true,
                       base_argv0, flat, NULL);
        free(flat);
        exit(1);
    }

    /* compiler */
    load_env_data(options_data[user_data_idx].project_short, options_data[user_data_idx].compiler_env, &options_data[user_data_idx].compiler);

    /* preprocessor flags */
    load_env_data_argv(options_data[user_data_idx].project_short, "CPPFLAGS", &options_data[user_data_idx].preproc_flags);

    /* compiler flags */
    load_env_data_argv(options_data[user_data_idx].project_short, options_data[user_data_idx].compiler_flags_env,
                       &options_data[user_data_idx].comp_flags);

    /* linker flags */
    load_env_data_argv(options_data[user_data_idx].project_short, "LDFLAGS", &options_data[user_data_idx].link_flags);

    /* libs */
    load_env_data_argv(options_data[user_data_idx].project_short, "LIBS", &options_data[user_data_idx].libs);


    /****************************************************
     *
     * Sanity Checks
     *
     ****************************************************/
    
    if (NULL != options_data[user_data_idx].req_file) {
        /* make sure the language is supported */
        if (0 == strcmp(options_data[user_data_idx].req_file, "not supported")) {
            opal_show_help("help-opal-wrapper.txt", "no-language-support", true,
                           options_data[user_data_idx].language, base_argv0, NULL);
            exit_status = 1;
            goto cleanup;
        }

        if (options_data[user_data_idx].req_file[0] != '\0') {
            char *filename;
            struct stat buf;
            filename = opal_os_path( false, options_data[user_data_idx].path_libdir, options_data[user_data_idx].req_file, NULL );
            if (0 != stat(filename, &buf)) {
                opal_show_help("help-opal-wrapper.txt", "file-not-found", true,
                               base_argv0, options_data[user_data_idx].req_file, options_data[user_data_idx].language, NULL);
            }
        }
    }

    /****************************************************
     *
     * Parse user flags
     *
     ****************************************************/
    flags = COMP_WANT_COMMAND|COMP_WANT_PREPROC|
        COMP_WANT_COMPILE|COMP_WANT_LINK;

    user_argv = opal_argv_copy(argv + 1);
    user_argc = opal_argv_count(user_argv);

    for (i = 0 ; i < user_argc ; ++i) {
        if (0 == strncmp(user_argv[i], "-showme", strlen("-showme")) ||
            0 == strncmp(user_argv[i], "--showme", strlen("--showme")) ||
            0 == strncmp(user_argv[i], "-show", strlen("-show")) ||
            0 == strncmp(user_argv[i], "--show", strlen("--show"))) {
            bool done_now = false;

            /* check for specific things we want to see.  First three
               still invoke all the building routines.  Last set want
               to parse out certain flags, so we don't go through the
               normal build routine - skip to cleanup. */
            if (0 == strncmp(user_argv[i], "-showme:command", strlen("-showme:command")) ||
                0 == strncmp(user_argv[i], "--showme:command", strlen("--showme:command"))) {
                flags = COMP_WANT_COMMAND;
                /* we know what we want, so don't process any more args */
                done_now = true;
            } else if (0 == strncmp(user_argv[i], "-showme:compile", strlen("-showme:compile")) ||
                0 == strncmp(user_argv[i], "--showme:compile", strlen("--showme:compile"))) {
                flags = COMP_WANT_PREPROC|COMP_WANT_COMPILE;
                /* we know what we want, so don't process any more args */
                done_now = true;
            } else if (0 == strncmp(user_argv[i], "-showme:link", strlen("-showme:link")) ||
                       0 == strncmp(user_argv[i], "--showme:link", strlen("--showme:link"))) {
                flags = COMP_WANT_COMPILE|COMP_WANT_LINK;
                /* we know what we want, so don't process any more args */
                done_now = true;
            } else if (0 == strncmp(user_argv[i], "-showme:incdirs", strlen("-showme:incdirs")) ||
                       0 == strncmp(user_argv[i], "--showme:incdirs", strlen("--showme:incdirs"))) {
                print_flags(options_data[user_data_idx].preproc_flags, OPAL_INCLUDE_FLAG);
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:libdirs", strlen("-showme:libdirs")) ||
                       0 == strncmp(user_argv[i], "--showme:libdirs", strlen("--showme:libdirs"))) {
                print_flags(options_data[user_data_idx].link_flags, OPAL_LIBDIR_FLAG);
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:libs", strlen("-showme:libs")) ||
                       0 == strncmp(user_argv[i], "--showme:libs", strlen("--showme:libs"))) {
                print_flags(options_data[user_data_idx].libs, "-l");
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:version", strlen("-showme:version")) ||
                       0 == strncmp(user_argv[i], "--showme:version", strlen("--showme:version"))) {
                opal_show_help("help-opal-wrapper.txt", "version", false,
                               argv[0], options_data[user_data_idx].project, options_data[user_data_idx].version, options_data[user_data_idx].language, NULL);
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:", strlen("-showme:")) ||
                       0 == strncmp(user_argv[i], "--showme:", strlen("--showme:"))) {
                opal_show_help("help-opal-wrapper.txt", "usage", true,
                               argv[0], options_data[user_data_idx].project, NULL);
                goto cleanup;
            }

            flags |= (COMP_DRY_RUN|COMP_SHOW_ERROR);
            /* remove element from user_argv */
            opal_argv_delete(&user_argc, &user_argv, i, 1);
            --i;

            if (done_now) {
                disable_flags = false;
                break;
            }

        } else if (0 == strcmp(user_argv[i], "-c")) {
            flags &= ~COMP_WANT_LINK;
            real_flag = true;
        } else if (0 == strcmp(user_argv[i], "-E") || 
                   0 == strcmp(user_argv[i], "-M")) {
            flags &= ~(COMP_WANT_COMPILE | COMP_WANT_LINK);
            real_flag = true;
        } else if (0 == strcmp(user_argv[i], "-S")) {
            flags &= ~COMP_WANT_LINK;
            real_flag = true;
        } else if (0 == strcmp(user_argv[i], "-lpmpi")) {
            flags |= COMP_WANT_PMPI;

            /* remove element from user_argv */
            opal_argv_delete(&user_argc, &user_argv, i, 1);
            --i;
        } else if ('-' != user_argv[i][0]) {
            disable_flags = false;
            flags |= COMP_SHOW_ERROR;
            real_flag = true;
        } else { 
            /* if the option flag is one that we use to determine
               which set of compiler data to use, don't count it as a
               real option */
            if (find_options_index(user_argv[i]) < 0) {
                real_flag = true;
            }
        }
    }

    /* clear out the want_flags if we got no arguments not starting
       with a - (dash) and -showme wasn't given OR -showme was given
       and we had at least one more non-showme argument that started
       with a - (dash) and no other non-dash arguments.  Some examples:

       opal_wrapper                : clear our flags
       opal_wrapper -v             : clear our flags
       opal_wrapper -E a.c         : don't clear our flags
       opal_wrapper a.c            : don't clear our flags
       opal_wrapper -showme        : don't clear our flags
       opal_wrapper -showme -v     : clear our flags
       opal_wrapper -showme -E a.c : don't clear our flags
       opal_wrapper -showme a.c    : don't clear our flags
    */
    if (disable_flags && !((flags & COMP_DRY_RUN) && !real_flag)) {
        flags &= ~(COMP_WANT_PREPROC|COMP_WANT_COMPILE|COMP_WANT_LINK);
    }

#if !OMPI_ENABLE_MPI_PROFILING
    /* sanity check */
    if (flags & COMP_WANT_PMPI) {
	    opal_show_help("help-opal-wrapper.txt", "no-profiling-support", true,
		               argv[0], NULL);
    }
#endif


    /****************************************************
     *
     * Assemble the command line
     *
     ****************************************************/

    /* compiler (may be multiple arguments, so split) */
    if (flags & COMP_WANT_COMMAND) {
        exec_argv = opal_argv_split(options_data[user_data_idx].compiler, ' ');
        exec_argc = opal_argv_count(exec_argv);
    } else {
        exec_argv = (char **) malloc(sizeof(char*));
        exec_argv[0] = NULL;
        exec_argc = 0;
    }

    /* Per https://svn.open-mpi.org/trac/ompi/ticket/2201, add all the
       user arguments before anything else. */
    opal_argv_insert(&exec_argv, exec_argc, user_argv);
    exec_argc = opal_argv_count(exec_argv);

    /* preproc flags */
    if (flags & COMP_WANT_PREPROC) {
        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].preproc_flags);
        exec_argc = opal_argv_count(exec_argv);
    }

    /* compiler flags */
    if (flags & COMP_WANT_COMPILE) {
        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].comp_flags);
        /* Deal with languages like Fortran 90 that have special
           places and flags for modules or whatever */
        if (options_data[user_data_idx].module_option != NULL) {
            char *line;
            asprintf(&line, "%s%s", options_data[user_data_idx].module_option, options_data[user_data_idx].path_libdir);
            opal_argv_append_nosize(&exec_argv, line);
            free(line);
        }
        exec_argc = opal_argv_count(exec_argv);
    }

    /* link flags and libs */
    if (flags & COMP_WANT_LINK) {
        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].link_flags);
        exec_argc = opal_argv_count(exec_argv);

        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].libs);
        exec_argc = opal_argv_count(exec_argv);
    }


    /****************************************************
     *
     * Execute the command
     *
     ****************************************************/

    if (flags & COMP_DRY_RUN) {
        exec_command = opal_argv_join(exec_argv, ' ');
        printf("%s\n", exec_command);
    } else {
        char *tmp;

#if 0
        exec_command = opal_argv_join(exec_argv, ' ');
        printf("command: %s\n", exec_command);
#endif

        tmp = opal_path_findv(exec_argv[0], 0, environ, NULL);
        if (NULL == tmp) {
            opal_show_help("help-opal-wrapper.txt", "no-compiler-found", true,
                           exec_argv[0], NULL);
            errno = 0;
            exit_status = 1;
        }  else {
            int status;

            free(exec_argv[0]);
            exec_argv[0] = tmp;
            ret = opal_few(exec_argv, &status);
            exit_status = WIFEXITED(status) ? WEXITSTATUS(status) :
                              (WIFSIGNALED(status) ? WTERMSIG(status) :
                                  (WIFSTOPPED(status) ? WSTOPSIG(status) : 255));
            if( (OPAL_SUCCESS != ret) || ((0 != exit_status) && (flags & COMP_SHOW_ERROR)) ) {
                char* exec_command = opal_argv_join(exec_argv, ' ');
                if( OPAL_SUCCESS != ret ) {
                    opal_show_help("help-opal-wrapper.txt", "spawn-failed", true,
                                   exec_argv[0], strerror(status), exec_command, NULL);
                } else {
#if 0
                    opal_show_help("help-opal-wrapper.txt", "compiler-failed", true,
                                   exec_argv[0], exit_status, exec_command, NULL);
#endif
                }
                free(exec_command);
            }
        }
    }

    /****************************************************
     *
     * Cleanup
     *
     ****************************************************/
 cleanup:

    opal_argv_free(exec_argv);
    opal_argv_free(user_argv);
    if (NULL != base_argv0) free(base_argv0);

    if (OPAL_SUCCESS != (ret = data_finalize())) {
        return ret;
    }

    if (OPAL_SUCCESS != (ret = opal_finalize_util())) {
        return ret;
    }

    return exit_status;
}
Esempio n. 21
0
/** 
 * This function attempts to remove a directory along with all the
 * files in it.  If the recursive variable is non-zero, then it will
 * try to recursively remove all directories.  If provided, the
 * callback function is executed prior to the directory or file being
 * removed.  If the callback returns non-zero, then no removal is
 * done.
 */
int opal_os_dirpath_destroy(const char *path,
                            bool recursive,
                            opal_os_dirpath_destroy_callback_fn_t cbfunc)
{
    int rc, exit_status = OPAL_SUCCESS;
    bool is_dir = false;

    if (NULL == path) {  /* protect against error */
        return OPAL_ERROR;
    }

    /*
     * Make sure we have access to the the base directory
     */
    if( OPAL_SUCCESS != (rc = opal_os_dirpath_access(path, 0) ) ) {
        exit_status = rc;
        goto cleanup;
    }

#ifndef __WINDOWS__
    {
        DIR *dp;
        struct dirent *ep;
        char *filenm;
#ifndef HAVE_STRUCT_DIRENT_D_TYPE 
        struct stat buf;
#endif

        /* Open up the directory */
        dp = opendir(path);
        if (NULL == dp) {
            return OPAL_ERROR;
        }

        while (NULL != (ep = readdir(dp)) ) {
            /* skip:
            *  - . and ..
            */
            if ((0 == strcmp(ep->d_name, ".")) ||
                (0 == strcmp(ep->d_name, "..")) ) {
                    continue;
            }
        
            /* Check to see if it is a directory */
            is_dir = false;

            /* Create a pathname.  This is not always needed, but it makes
             * for cleaner code just to create it here.  Note that we are
             * allocating memory here, so we need to free it later on. 
             */
            filenm = opal_os_path(false, path, ep->d_name, NULL);
#ifdef HAVE_STRUCT_DIRENT_D_TYPE
            if (DT_DIR == ep->d_type) {
                is_dir = true;
            }
#else /* have dirent.d_type */
            rc = stat(filenm, &buf);
            if (rc < 0 || S_ISDIR(buf.st_mode)) {
                is_dir = true;
            }
#endif /* have dirent.d_type */        

            /*
              * If not recursively decending, then if we find a directory then fail
              * since we were not told to remove it.
              */
            if( is_dir && !recursive) {
                /* Set the error indicating that we found a directory,
                 * but continue removing files
                 */
                exit_status = OPAL_ERROR;
                free(filenm);
                continue;
            }

            /* Will the caller allow us to remove this file/directory? */
            if(NULL != cbfunc) {
                /*
                 * Caller does not wish to remove this file/directory,
                 * continue with the rest of the entries
                 */
                if( ! (cbfunc(path, ep->d_name)) ) {
                    free(filenm);
                    continue;
                }
            }
            /* Directories are recursively destroyed */
            if(is_dir) {
                rc = opal_os_dirpath_destroy(filenm, recursive, cbfunc);
                free(filenm);
                if (OPAL_SUCCESS != rc) {
                    exit_status = rc;
                    closedir(dp);
                    goto cleanup;
                }
            }
            /* Files are removed right here */
            else {
                if( 0 != (rc = unlink(filenm) ) ) {
                    exit_status = OPAL_ERROR;
                }
                free(filenm);
            }
        }
    
        /* Done with this directory */
        closedir(dp);
    }
#else
    {
        char search_path[MAX_PATH];
        HANDLE file;
        WIN32_FIND_DATA file_data;
        TCHAR *file_name;
    
        strncpy(search_path, path, strlen(path)+1);
        strncat (search_path, OPAL_PATH_SEP"*", 3);
        file = FindFirstFile(search_path, &file_data);
    
        if (INVALID_HANDLE_VALUE == file) {
            FindClose(file);
            return OPAL_ERROR;
        } 
    
        do {
            /* Skip . and .. */
            if ((0 == strcmp(file_data.cFileName, ".")) ||
                (0 == strcmp(file_data.cFileName, "..")) ) {
                    continue;
            }
        
            is_dir = false;
            if(file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
                is_dir = true;
            } 
            /*
             * If not recursively decending, then if we find a directory then fail
             * since we were not told to remove it.
             */
            if( is_dir && !recursive) {
                /* Set the error indicating that we found a directory,
                 * but continue removing files
                 */
                exit_status = OPAL_ERROR;
                continue;
            }
        
            /* Will the caller allow us to remove this file/directory */
            if( NULL != cbfunc) {
                /*
                 * Caller does not wish to remove this file/directory,
                 * continue with the rest of the entries
                 */
                if( !cbfunc(path, file_data.cFileName) ) {
                    continue;
                }
            }

            file_name = opal_os_path(false, path, file_data.cFileName, NULL);
            if( is_dir ) {
                if( OPAL_SUCCESS != (rc = opal_os_dirpath_destroy(file_name,
                                                                  recursive,
                                                                  cbfunc) ) ) {
                    exit_status = rc;
                    free(file_name);
                    FindClose(file);
                    goto cleanup;
                }
            } else {
                DeleteFile(file_name);
            }
            free(file_name);
        } while( 0 != FindNextFile(file, &file_data) );
    
        FindClose(file);
    }
#endif
    
 cleanup:
    
    /*
     * If the directory is empty, them remove it
     */
    if(opal_os_dirpath_is_empty(path)) {
        rmdir(path);
    }

    return exit_status;
}
Esempio n. 22
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char *contact_path, *jobfam_dir;
    orte_job_t *jdata;
    orte_node_t *node;
    orte_proc_t *proc;
    orte_app_context_t *app;
    char **aliases, *aptr;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* setup callback for SIGPIPE */
    setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback);
    /** setup callbacks for abort signals - from this point
     * forward, we need to abort in a manner that allows us
     * to cleanup. However, we cannot directly use libevent
     * to trap these signals as otherwise we cannot respond
     * to them if we are stuck in an event! So instead use
     * the basic POSIX trap functions to handle the signal,
     * and then let that signal handler do some magic to
     * avoid the hang
     *
     * NOTE: posix traps don't allow us to do anything major
     * in them, so use a pipe tied to a libevent event to
     * reach a "safe" place where the termination event can
     * be created
     */
    pipe(term_pipe);
    /* setup an event to attempt normal termination on signal */
    opal_event_set(orte_event_base, &term_handler, term_pipe[0], OPAL_EV_READ, clean_abort, NULL);
    opal_event_set_priority(&term_handler, ORTE_ERROR_PRI);
    opal_event_add(&term_handler, NULL);

    /* Set both ends of this pipe to be close-on-exec so that no
       children inherit it */
    if (opal_fd_set_cloexec(term_pipe[0]) != OPAL_SUCCESS ||
        opal_fd_set_cloexec(term_pipe[1]) != OPAL_SUCCESS) {
        error = "unable to set the pipe to CLOEXEC";
        goto error;
    }

    /* point the signal trap to a function that will activate that event */
    signal(SIGTERM, abort_signal_callback);
    signal(SIGINT, abort_signal_callback);
    signal(SIGHUP, abort_signal_callback);

    /** setup callbacks for signals we should foward */
    setup_sighandler(SIGUSR1, &sigusr1_handler, signal_forward_callback);
    setup_sighandler(SIGUSR2, &sigusr2_handler, signal_forward_callback);
    setup_sighandler(SIGTSTP, &sigtstp_handler, signal_forward_callback);
    setup_sighandler(SIGCONT, &sigcont_handler, signal_forward_callback);
    signals_set = true;

#if OPAL_HAVE_HWLOC
    {
        hwloc_obj_t obj;
        unsigned i, j;

        /* get the local topology */
        if (NULL == opal_hwloc_topology) {
            if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
                error = "topology discovery";
                goto error;
            }
        }

        /* remove the hostname from the topology. Unfortunately, hwloc
         * decided to add the source hostname to the "topology", thus
         * rendering it unusable as a pure topological description. So
         * we remove that information here.
         */
        obj = hwloc_get_root_obj(opal_hwloc_topology);
        for (i=0; i < obj->infos_count; i++) {
            if (NULL == obj->infos[i].name ||
                NULL == obj->infos[i].value) {
                continue;
            }
            if (0 == strncmp(obj->infos[i].name, "HostName", strlen("HostName"))) {
                free(obj->infos[i].name);
                free(obj->infos[i].value);
                /* left justify the array */
                for (j=i; j < obj->infos_count-1; j++) {
                    obj->infos[j] = obj->infos[j+1];
                }
                obj->infos[obj->infos_count-1].name = NULL;
                obj->infos[obj->infos_count-1].value = NULL;
                obj->infos_count--;
                break;
            }
        }

        if (4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
            opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
        }
    }
#endif

    /* if we are using xml for output, put an mpirun start tag */
    if (orte_xml_output) {
        fprintf(orte_xml_fp, "<mpirun>\n");
        fflush(orte_xml_fp);
    }

    /* open and setup the opal_pstat framework so we can provide
     * process stats if requested
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "opal_pstat_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "opal_pstat_base_select";
        goto error;
    }
  
    /* open and setup the state machine */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_select";
        goto error;
    }

    /* open the errmgr */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_open";
        goto error;
    }

    /* Since we are the HNP, then responsibility for
     * defining the name falls to the PLM component for our
     * respective environment - hence, we have to open the PLM
     * first and select that component.
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_plm_base_open";
        goto error;
    }
    
    if (ORTE_SUCCESS != (ret = orte_plm_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_plm_base_select";
        goto error;
    }
    /* if we were spawned by a singleton, our jobid was given to us */
    if (NULL != orte_ess_base_jobid) {
        if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&ORTE_PROC_MY_NAME->jobid, orte_ess_base_jobid))) {
            ORTE_ERROR_LOG(ret);
            error = "convert_string_to_jobid";
            goto error;
        }
        ORTE_PROC_MY_NAME->vpid = 0;
    } else {
        if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) {
            ORTE_ERROR_LOG(ret);
            error = "orte_plm_set_hnp_name";
            goto error;
        }
    }
    /* Setup the communication infrastructure */
    
    /*
     * OOB Layer
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_select";
        goto error;
    }

    /*
     * Runtime Messaging Layer
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_select";
        goto error;
    }

    if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_select";
        goto error;
    }
    
    /* setup the global job and node arrays */
    orte_job_data = OBJ_NEW(opal_pointer_array_t);
    if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data,
                                                       1,
                                                       ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                                       1))) {
        ORTE_ERROR_LOG(ret);
        error = "setup job array";
        goto error;
    }
    
    orte_node_pool = OBJ_NEW(opal_pointer_array_t);
    if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
                                                       ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
        ORTE_ERROR_LOG(ret);
        error = "setup node array";
        goto error;
    }
    orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
    if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
                                                       ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
        ORTE_ERROR_LOG(ret);
        error = "setup node topologies array";
        goto error;
    }

    /* init the nidmap - just so we register that verbosity */
    orte_util_nidmap_init(NULL);

    /* Setup the job data object for the daemons */        
    /* create and store the job data object */
    jdata = OBJ_NEW(orte_job_t);
    jdata->jobid = ORTE_PROC_MY_NAME->jobid;
    opal_pointer_array_set_item(orte_job_data, 0, jdata);
    /* mark that the daemons have reported as we are the
     * only ones in the system right now, and we definitely
     * are running!
     */
    jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
   
    /* every job requires at least one app */
    app = OBJ_NEW(orte_app_context_t);
    opal_pointer_array_set_item(jdata->apps, 0, app);
    jdata->num_apps++;

    /* create and store a node object where we are */
    node = OBJ_NEW(orte_node_t);
    node->name = strdup(orte_process_info.nodename);
    node->index = opal_pointer_array_set_item(orte_node_pool, 0, node);
#if OPAL_HAVE_HWLOC
    /* add it to the array of known topologies */
    opal_pointer_array_add(orte_node_topologies, opal_hwloc_topology);
#endif

    /* create and store a proc object for us */
    proc = OBJ_NEW(orte_proc_t);
    proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
    proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
    
    proc->pid = orte_process_info.pid;
    proc->rml_uri = orte_rml.get_contact_info();
    proc->state = ORTE_PROC_STATE_RUNNING;
    OBJ_RETAIN(node);  /* keep accounting straight */
    proc->node = node;
    opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc);

    /* record that the daemon (i.e., us) is on this node 
     * NOTE: we do not add the proc object to the node's
     * proc array because we are not an application proc.
     * Instead, we record it in the daemon field of the
     * node object
     */
    OBJ_RETAIN(proc);   /* keep accounting straight */
    node->daemon = proc;
    ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
    node->state = ORTE_NODE_STATE_UP;
    
    /* if we are to retain aliases, get ours */
    if (orte_retain_aliases) {
        aliases = NULL;
        opal_ifgetaliases(&aliases);
        /* add our own local name to it */
        opal_argv_append_nosize(&aliases, orte_process_info.nodename);
        aptr = opal_argv_join(aliases, ',');
        opal_argv_free(aliases);
        orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
        free(aptr);
    }

    /* record that the daemon job is running */
    jdata->num_procs = 1;
    jdata->state = ORTE_JOB_STATE_RUNNING;
    /* obviously, we have "reported" */
    jdata->num_reported = 1;

    /*
     * Routed system
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_select";
        goto error;
    }
    
    /* datastore - ensure we don't pickup the pmi component, but
     * don't override anything set by user
     */
    if (NULL == getenv("OMPI_MCA_dstore")) {
        putenv("OMPI_MCA_dstore=^pmi");
    }
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "opal_dstore_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "opal_dstore_base_select";
        goto error;
    }
    /* create the handles */
    if (0 > (opal_dstore_peer = opal_dstore.open("PEER"))) {
        error = "opal dstore global";
        ret = ORTE_ERR_FATAL;
        goto error;
    }
    if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL"))) {
        error = "opal dstore internal";
        ret = ORTE_ERR_FATAL;
        goto error;
    }
    if (0 > (opal_dstore_nonpeer = opal_dstore.open("NONPEER"))) {
        error = "opal dstore nonpeer";
        ret = ORTE_ERR_FATAL;
        goto error;
    }

    /*
     * Group communications
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_select";
        goto error;
    }

    /* Now provide a chance for the PLM
     * to perform any module-specific init functions. This
     * needs to occur AFTER the communications are setup
     * as it may involve starting a non-blocking recv
     */
    if (ORTE_SUCCESS != (ret = orte_plm.init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_plm_init";
        goto error;
    }

    /*
     * Setup the remaining resource
     * management and errmgr frameworks - application procs
     * and daemons do not open these frameworks as they only use
     * the hnp proxy support in the PLM framework.
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ras_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ras_base_open";
        goto error;
    }    
    if (ORTE_SUCCESS != (ret = orte_ras_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ras_base_find_available";
        goto error;
    }
    
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rmaps_base_open";
        goto error;
    }    
    if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rmaps_base_find_available";
        goto error;
    }
#if OPAL_HAVE_HWLOC
    {
        char *coprocessors, **sns;
        uint32_t h;
        int idx;

        /* if a topology file was given, then the rmaps framework open
         * will have reset our topology. Ensure we always get the right
         * one by setting our node topology afterwards
         */
        node->topology = opal_hwloc_topology;

        /* init the hash table, if necessary */
        if (NULL == orte_coprocessors) {
            orte_coprocessors = OBJ_NEW(opal_hash_table_t);
            opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
        }
        /* detect and add any coprocessors */
        coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology);
        if (NULL != coprocessors) {
            /* separate the serial numbers of the coprocessors
             * on this host
             */
            sns = opal_argv_split(coprocessors, ',');
            for (idx=0; NULL != sns[idx]; idx++) {
                /* compute the hash */
                OPAL_HASH_STR(sns[idx], h);
                /* mark that this coprocessor is hosted by this node */
                opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid));
            }
            opal_argv_free(sns);
            free(coprocessors);
            orte_coprocessors_detected = true;
        }
        /* see if I am on a coprocessor */
        coprocessors = opal_hwloc_base_check_on_coprocessor();
        if (NULL != coprocessors) {
            orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING);
            free(coprocessors);
            orte_coprocessors_detected = true;
        }
    }
#endif

    /* Open/select the odls */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_odls_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_odls_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_odls_base_select";
        goto error;
    }
    
    /* Open/select the rtc */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rtc_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rtc_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rtc_base_select";
        goto error;
    }
    
    /* enable communication with the rml */
    if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml.enable_comm";
        goto error;
    }

    /* we are an hnp, so update the contact info field for later use */
    orte_process_info.my_hnp_uri = orte_rml.get_contact_info();
    proc->rml_uri = strdup(orte_process_info.my_hnp_uri);

    /* we are also officially a daemon, so better update that field too */
    orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
    
    /* setup the orte_show_help system to recv remote output */
    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP,
                            ORTE_RML_PERSISTENT, orte_show_help_recv, NULL);

    /* setup my session directory */
    if (orte_create_session_dirs) {
        OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                             "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
                             orte_process_info.nodename));
        
        /* take a pass thru the session directory code to fillin the
         * tmpdir names - don't create anything yet
         */
        if (ORTE_SUCCESS != (ret = orte_session_dir(false,
                                                    orte_process_info.tmpdir_base,
                                                    orte_process_info.nodename, NULL,
                                                    ORTE_PROC_MY_NAME))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_session_dir define";
            goto error;
        }
        /* clear the session directory just in case there are
         * stale directories laying around
         */
        orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);

        /* now actually create the directory tree */
        if (ORTE_SUCCESS != (ret = orte_session_dir(true,
                                                    orte_process_info.tmpdir_base,
                                                    orte_process_info.nodename, NULL,
                                                    ORTE_PROC_MY_NAME))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_session_dir";
            goto error;
        }
        
        /* Once the session directory location has been established, set
           the opal_output hnp file location to be in the
           proc-specific session directory. */
        opal_output_set_output_file_info(orte_process_info.proc_session_dir,
                                         "output-", NULL, NULL);
        
        /* save my contact info in a file for others to find */
        jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
        contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
        free(jobfam_dir);
        
        OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                             "%s writing contact file %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             contact_path));
        
        if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) {
            OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                                 "%s writing contact file failed with error %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_ERROR_NAME(ret)));
        } else {
            OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                                 "%s wrote contact file",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        }
        free(contact_path);
    }

    /* setup the routed info - the selected routed component
     * will know what to do. 
     */
    if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed.init_routes";
        goto error;
    }
    
    /* setup I/O forwarding system - must come after we init routes */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_iof_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_iof_base_select";
        goto error;
    }
    
    /* setup the FileM */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_filem_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_filem_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_filem_base_select";
        goto error;
    }

#if OPAL_ENABLE_FT_CR == 1
    /*
     * Setup the SnapC
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_snapc_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_sstore_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_snapc_base_select";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_sstore_base_select";
        goto error;
    }

    /* For HNP, ORTE doesn't need the OPAL CR stuff */
    opal_cr_set_enabled(false);
#else
    opal_cr_set_enabled(false);
#endif

    /*
     * Initalize the CR setup
     * Note: Always do this, even in non-FT builds.
     * If we don't some user level tools may hang.
     */
    if (ORTE_SUCCESS != (ret = orte_cr_init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_cr_init";
        goto error;
    }
    
    /* setup the dfs framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_select";
        goto error;
    }

    /* if a tool has launched us and is requesting event reports,
     * then set its contact info into the comm system
     */
    if (orte_report_events) {
        if (ORTE_SUCCESS != (ret = orte_util_comm_connect_tool(orte_report_events_uri))) {
            error = "could not connect to tool";
            goto error;
        }
    }

    /* We actually do *not* want an HNP to voluntarily yield() the
       processor more than necessary.  Orterun already blocks when
       it is doing nothing, so it doesn't use any more CPU cycles than
       it should; but when it *is* doing something, we do not want it
       to be unnecessarily delayed because it voluntarily yielded the
       processor in the middle of its work.
     
       For example: when a message arrives at orterun, we want the
       OS to wake us up in a timely fashion (which most OS's
       seem good about doing) and then we want orterun to process
       the message as fast as possible.  If orterun yields and lets
       aggressive MPI applications get the processor back, it may be a
       long time before the OS schedules orterun to run again
       (particularly if there is no IO event to wake it up).  Hence,
       routed OOB messages (for example) may be significantly delayed
       before being delivered to MPI processes, which can be
       problematic in some scenarios (e.g., COMM_SPAWN, BTL's that
       require OOB messages for wireup, etc.). */
    opal_progress_set_yield_when_idle(false);

    return ORTE_SUCCESS;

 error:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    
    return ORTE_ERR_SILENT;
}
Esempio n. 23
0
int
main(int argc, char *argv[])
{
    int exit_status = 0, ret, flags = 0, i;
    int exec_argc = 0, user_argc = 0;
    char **exec_argv = NULL, **user_argv = NULL;
    char *exec_command, *base_argv0 = NULL;
    bool disable_flags = true;
    bool real_flag = false;

    if (OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv))) {
        return ret;
    }

    /****************************************************
     *
     * Setup compiler information
     *
     ****************************************************/

    base_argv0 = opal_basename(argv[0]);
#if defined(EXEEXT)
    if( 0 != strlen(EXEEXT) ) {
        char extension[] = EXEEXT;
        char* temp = strstr( base_argv0, extension );
        char* old_match = temp;
        while( NULL != temp ) {
            old_match = temp;
            temp = strstr( temp + 1, extension );
        }
        /* Only if there was a match of .exe, erase the last occurence of .exe */
        if ( NULL != old_match ) {
            *old_match = '\0';
        }
    }
#endif  /* defined(EXEEXT) */

    if (OPAL_SUCCESS != (ret = data_init(base_argv0))) {
        fprintf(stderr, "Error parsing data file %s: %s\n", base_argv0, opal_strerror(ret));
        return ret;
    }

    for (i = 1 ; i < argc && user_data_idx < 0 ; ++i) {
        user_data_idx = find_options_index(argv[i]);
    }
    /* if we didn't find a match, look for the NULL (base case) options */
    if (user_data_idx < 0) {
        user_data_idx = default_data_idx;
    }
    /* if we still didn't find a match, abort */
    if (user_data_idx < 0) {
        char *flat = opal_argv_join(argv, ' ');
        opal_show_help("help-opal-wrapper.txt", "no-options-support", true,
                       base_argv0, flat, NULL);
        free(flat);
        exit(1);
    }

    /* compiler */
    load_env_data(options_data[user_data_idx].project_short, options_data[user_data_idx].compiler_env, &options_data[user_data_idx].compiler);

    /* preprocessor flags */
    load_env_data_argv(options_data[user_data_idx].project_short, "CPPFLAGS", &options_data[user_data_idx].preproc_flags);

    /* compiler flags */
    load_env_data_argv(options_data[user_data_idx].project_short, options_data[user_data_idx].compiler_flags_env,
                       &options_data[user_data_idx].comp_flags);

    /* linker flags */
    load_env_data_argv(options_data[user_data_idx].project_short, "LDFLAGS", &options_data[user_data_idx].link_flags);

    /* libs */
    load_env_data_argv(options_data[user_data_idx].project_short, "LIBS", &options_data[user_data_idx].libs);


    /****************************************************
     *
     * Sanity Checks
     *
     ****************************************************/
    
    if (NULL != options_data[user_data_idx].req_file) {
        /* make sure the language is supported */
        if (0 == strcmp(options_data[user_data_idx].req_file, "not supported")) {
            opal_show_help("help-opal-wrapper.txt", "no-language-support", true,
                           options_data[user_data_idx].language, base_argv0, NULL);
            exit_status = 1;
            goto cleanup;
        }

        if (options_data[user_data_idx].req_file[0] != '\0') {
            char *filename;
            struct stat buf;
            filename = opal_os_path( false, options_data[user_data_idx].path_libdir, options_data[user_data_idx].req_file, NULL );
            if (0 != stat(filename, &buf)) {
                opal_show_help("help-opal-wrapper.txt", "file-not-found", true,
                               base_argv0, options_data[user_data_idx].req_file, options_data[user_data_idx].language, NULL);
            }
        }
    }

    /****************************************************
     *
     * Parse user flags
     *
     ****************************************************/
    flags = COMP_WANT_COMMAND|COMP_WANT_PREPROC|
        COMP_WANT_COMPILE|COMP_WANT_LINK;

    user_argv = opal_argv_copy(argv + 1);
    user_argc = opal_argv_count(user_argv);

    for (i = 0 ; i < user_argc ; ++i) {
        if (0 == strncmp(user_argv[i], "-showme", strlen("-showme")) ||
            0 == strncmp(user_argv[i], "--showme", strlen("--showme")) ||
            0 == strncmp(user_argv[i], "-show", strlen("-show")) ||
            0 == strncmp(user_argv[i], "--show", strlen("--show"))) {
            bool done_now = false;

            /* check for specific things we want to see.  First three
               still invoke all the building routines.  Last set want
               to parse out certain flags, so we don't go through the
               normal build routine - skip to cleanup. */
            if (0 == strncmp(user_argv[i], "-showme:command", strlen("-showme:command")) ||
                0 == strncmp(user_argv[i], "--showme:command", strlen("--showme:command"))) {
                flags = COMP_WANT_COMMAND;
                /* we know what we want, so don't process any more args */
                done_now = true;
            } else if (0 == strncmp(user_argv[i], "-showme:compile", strlen("-showme:compile")) ||
                0 == strncmp(user_argv[i], "--showme:compile", strlen("--showme:compile"))) {
                flags = COMP_WANT_PREPROC|COMP_WANT_COMPILE;
                /* we know what we want, so don't process any more args */
                done_now = true;
            } else if (0 == strncmp(user_argv[i], "-showme:link", strlen("-showme:link")) ||
                       0 == strncmp(user_argv[i], "--showme:link", strlen("--showme:link"))) {
                flags = COMP_WANT_COMPILE|COMP_WANT_LINK;
                /* we know what we want, so don't process any more args */
                done_now = true;
            } else if (0 == strncmp(user_argv[i], "-showme:incdirs", strlen("-showme:incdirs")) ||
                       0 == strncmp(user_argv[i], "--showme:incdirs", strlen("--showme:incdirs"))) {
                print_flags(options_data[user_data_idx].preproc_flags, OPAL_INCLUDE_FLAG);
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:libdirs", strlen("-showme:libdirs")) ||
                       0 == strncmp(user_argv[i], "--showme:libdirs", strlen("--showme:libdirs"))) {
                print_flags(options_data[user_data_idx].link_flags, OPAL_LIBDIR_FLAG);
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:libs", strlen("-showme:libs")) ||
                       0 == strncmp(user_argv[i], "--showme:libs", strlen("--showme:libs"))) {
                print_flags(options_data[user_data_idx].libs, "-l");
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:version", strlen("-showme:version")) ||
                       0 == strncmp(user_argv[i], "--showme:version", strlen("--showme:version"))) {
                char * str;
                str = opal_show_help_string("help-opal-wrapper.txt",
                                            "version", false,
                                            argv[0], options_data[user_data_idx].project, options_data[user_data_idx].version, options_data[user_data_idx].language, NULL);
                if (NULL != str) {
                    printf("%s", str);
                    free(str);
                }
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:help", strlen("-showme:help")) ||
                       0 == strncmp(user_argv[i], "--showme:help", strlen("--showme:help"))) {
                char *str;
                str = opal_show_help_string("help-opal-wrapper.txt", "usage", 
                                            false, argv[0],
                                            options_data[user_data_idx].project, 
                                            NULL);
                if (NULL != str) {
                    printf("%s", str);
                    free(str);
                }

                exit_status = 0;
                goto cleanup;
            } else if (0 == strncmp(user_argv[i], "-showme:", strlen("-showme:")) ||
                       0 == strncmp(user_argv[i], "--showme:", strlen("--showme:"))) {
                fprintf(stderr, "%s: unrecognized option: %s\n", argv[0],
                        user_argv[i]);
                fprintf(stderr, "Type '%s --showme:help' for usage.\n",
                        argv[0]);
                exit_status = 1;
                goto cleanup;
            }

            flags |= (COMP_DRY_RUN|COMP_SHOW_ERROR);
            /* remove element from user_argv */
            opal_argv_delete(&user_argc, &user_argv, i, 1);
            --i;

            if (done_now) {
                disable_flags = false;
                break;
            }

        } else if (0 == strcmp(user_argv[i], "-c")) {
            flags &= ~COMP_WANT_LINK;
            real_flag = true;
        } else if (0 == strcmp(user_argv[i], "-E") || 
                   0 == strcmp(user_argv[i], "-M")) {
            flags &= ~(COMP_WANT_COMPILE | COMP_WANT_LINK);
            real_flag = true;
        } else if (0 == strcmp(user_argv[i], "-S")) {
            flags &= ~COMP_WANT_LINK;
            real_flag = true;
        } else if (0 == strcmp(user_argv[i], "-lpmpi")) {
            flags |= COMP_WANT_PMPI;

            /* remove element from user_argv */
            opal_argv_delete(&user_argc, &user_argv, i, 1);
            --i;
        } else if (0 == strcmp(user_argv[i], "-static") ||
                   0 == strcmp(user_argv[i], "--static") ||
                   0 == strcmp(user_argv[i], "-Bstatic") ||
                   0 == strcmp(user_argv[i], "-Wl,-static") ||
                   0 == strcmp(user_argv[i], "-Wl,--static") ||
                   0 == strcmp(user_argv[i], "-Wl,-Bstatic")) {
            flags |= COMP_WANT_STATIC;
        } else if (0 == strcmp(user_argv[i], "-dynamic") ||
                   0 == strcmp(user_argv[i], "--dynamic") ||
                   0 == strcmp(user_argv[i], "-Bdynamic") ||
                   0 == strcmp(user_argv[i], "-Wl,-dynamic") ||
                   0 == strcmp(user_argv[i], "-Wl,--dynamic") ||
                   0 == strcmp(user_argv[i], "-Wl,-Bdynamic")) {
            flags &= ~COMP_WANT_STATIC;
        } else if (0 == strcmp(user_argv[i], "--openmpi:linkall")) {
            /* This is an intentionally undocummented wrapper compiler
               switch.  It should only be used by Open MPI developers
               -- not end users.  It will cause mpicc to use the
               static library list, even if we're compiling
               dynamically (i.e., it'll specifically -lopen-rte and
               -lopen-pal (and all their dependent libs)).  We provide
               this flag for test MPI applications that also invoke
               ORTE and/or OPAL function calls.

               On some systems (e.g., OS X), if the top-level
               application calls ORTE/OPAL functions and you don't -l
               ORTE and OPAL, then the functions won't be resolved at
               link time (i.e., the implicit library dependencies of
               libmpi won't be pulled in at link time), and therefore
               the link will fail.  This flag will cause the wrapper
               to explicitly list the ORTE and OPAL libs on the
               underlying compiler command line, so the application
               will therefore link properly. */
            flags |= COMP_WANT_LINKALL;

            /* remove element from user_argv */
            opal_argv_delete(&user_argc, &user_argv, i, 1);
        } else if ('-' != user_argv[i][0]) {
            disable_flags = false;
            flags |= COMP_SHOW_ERROR;
            real_flag = true;
        } else { 
            /* if the option flag is one that we use to determine
               which set of compiler data to use, don't count it as a
               real option */
            if (find_options_index(user_argv[i]) < 0) {
                real_flag = true;
            }
        }
    }

    /* clear out the want_flags if we got no arguments not starting
       with a - (dash) and -showme wasn't given OR -showme was given
       and we had at least one more non-showme argument that started
       with a - (dash) and no other non-dash arguments.  Some examples:

       opal_wrapper                : clear our flags
       opal_wrapper -v             : clear our flags
       opal_wrapper -E a.c         : don't clear our flags
       opal_wrapper a.c            : don't clear our flags
       opal_wrapper -showme        : don't clear our flags
       opal_wrapper -showme -v     : clear our flags
       opal_wrapper -showme -E a.c : don't clear our flags
       opal_wrapper -showme a.c    : don't clear our flags
    */
    if (disable_flags && !((flags & COMP_DRY_RUN) && !real_flag)) {
        flags &= ~(COMP_WANT_PREPROC|COMP_WANT_COMPILE|COMP_WANT_LINK);
    }

    /****************************************************
     *
     * Assemble the command line
     *
     ****************************************************/

    /* compiler (may be multiple arguments, so split) */
    if (flags & COMP_WANT_COMMAND) {
        exec_argv = opal_argv_split(options_data[user_data_idx].compiler, ' ');
        exec_argc = opal_argv_count(exec_argv);
    } else {
        exec_argv = (char **) malloc(sizeof(char*));
        exec_argv[0] = NULL;
        exec_argc = 0;
    }

    /* This error would normally not happen unless the user edits the 
       wrapper data files manually */
    if (NULL == exec_argv) {
        opal_show_help("help-opal-wrapper.txt", "no-compiler-specified", true);
        return 1;
    }

    if (flags & COMP_WANT_COMPILE) {
        opal_argv_insert(&exec_argv, exec_argc,
                         options_data[user_data_idx].comp_flags_prefix);
        exec_argc = opal_argv_count(exec_argv);
    }

    /* Per https://svn.open-mpi.org/trac/ompi/ticket/2201, add all the
       user arguments before anything else. */
    opal_argv_insert(&exec_argv, exec_argc, user_argv);
    exec_argc = opal_argv_count(exec_argv);

    /* preproc flags */
    if (flags & COMP_WANT_PREPROC) {
        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].preproc_flags);
        exec_argc = opal_argv_count(exec_argv);
    }

    /* compiler flags */
    if (flags & COMP_WANT_COMPILE) {
        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].comp_flags);
        exec_argc = opal_argv_count(exec_argv);
    }

    /* link flags and libs */
    if (flags & COMP_WANT_LINK) {
        bool have_static_lib;
        bool have_dyn_lib;
        bool use_static_libs;
        char *filename1, *filename2;
        struct stat buf;

        opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].link_flags);
        exec_argc = opal_argv_count(exec_argv);

        /* Are we linking statically?  If so, decide what libraries to
           list.  It depends on two factors:

           1. Was --static (etc.) specified?
           2. Does OMPI have static, dynamic, or both libraries installed?

           Here's a matrix showing what we'll do in all 6 cases:

           What's installed    --static    no --static
           ----------------    ----------  -----------
           ompi .so libs       -lmpi       -lmpi
           ompi .a libs        all         all
           ompi both libs      all         -lmpi

        */

        filename1 = opal_os_path( false, options_data[user_data_idx].path_libdir, options_data[user_data_idx].static_lib_file, NULL );
        if (0 == stat(filename1, &buf)) {
            have_static_lib = true;
        } else {
            have_static_lib = false;
        }

        filename2 = opal_os_path( false, options_data[user_data_idx].path_libdir, options_data[user_data_idx].dyn_lib_file, NULL );
        if (0 == stat(filename2, &buf)) {
            have_dyn_lib = true;
        } else {
            have_dyn_lib = false;
        }

        /* Determine which set of libs to use: dynamic or static.  Be
           pedantic to make the code easy to read. */
        if (flags & COMP_WANT_LINKALL) {
            /* If --openmpi:linkall was specified, list all the libs
               (i.e., the static libs) if they're available, either in
               static or dynamic form. */
            if (have_static_lib || have_dyn_lib) {
                use_static_libs = true;
            } else {
                fprintf(stderr, "The linkall option has failed as we were unable to find either static or dynamic libs\n"
                        "Files looked for:\n  Static: %s\n  Dynamic: %s\n",
                        filename1, filename2);
                free(filename1);
                free(filename2);
                exit(1);
            }
        } else if (flags & COMP_WANT_STATIC) {
            /* If --static (or something like it) was specified, if we
               have the static libs, then use them.  Otherwise, use
               the dynamic libs. */
            if (have_static_lib) {
                use_static_libs = true;
            } else {
                use_static_libs = false;
            }
        } else {
            /* If --static (or something like it) was NOT specified
               (or if --dyanic, or something like it, was specified),
               if we have the dynamic libs, then use them.  Otherwise,
               use the static libs. */
            if (have_dyn_lib) {
                use_static_libs = false;
            } else {
                use_static_libs = true;
            }
        }
        free(filename1);
        free(filename2);

        if (use_static_libs) {
            opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].libs_static);
        } else {
            opal_argv_insert(&exec_argv, exec_argc, options_data[user_data_idx].libs);
        }
        exec_argc = opal_argv_count(exec_argv);
    }


    /****************************************************
     *
     * Execute the command
     *
     ****************************************************/

    if (flags & COMP_DRY_RUN) {
        exec_command = opal_argv_join(exec_argv, ' ');
        printf("%s\n", exec_command);
    } else {
        char *tmp;

#if 0
        exec_command = opal_argv_join(exec_argv, ' ');
        printf("command: %s\n", exec_command);
#endif

        tmp = opal_path_findv(exec_argv[0], 0, environ, NULL);
        if (NULL == tmp) {
            opal_show_help("help-opal-wrapper.txt", "no-compiler-found", true,
                           exec_argv[0], NULL);
            errno = 0;
            exit_status = 1;
        }  else {
            int status;

            free(exec_argv[0]);
            exec_argv[0] = tmp;
            ret = opal_few(exec_argv, &status);
            exit_status = WIFEXITED(status) ? WEXITSTATUS(status) :
                              (WIFSIGNALED(status) ? WTERMSIG(status) :
                                  (WIFSTOPPED(status) ? WSTOPSIG(status) : 255));
            if( (OPAL_SUCCESS != ret) || ((0 != exit_status) && (flags & COMP_SHOW_ERROR)) ) {
                char* exec_command = opal_argv_join(exec_argv, ' ');
                if( OPAL_SUCCESS != ret ) {
                    opal_show_help("help-opal-wrapper.txt", "spawn-failed", true,
                                   exec_argv[0], strerror(status), exec_command, NULL);
                } else {
#if 0
                    opal_show_help("help-opal-wrapper.txt", "compiler-failed", true,
                                   exec_argv[0], exit_status, exec_command, NULL);
#endif
                }
                free(exec_command);
            }
        }
    }

    /****************************************************
     *
     * Cleanup
     *
     ****************************************************/
 cleanup:

    opal_argv_free(exec_argv);
    opal_argv_free(user_argv);
    if (NULL != base_argv0) free(base_argv0);

    if (OPAL_SUCCESS != (ret = data_finalize())) {
        return ret;
    }

    if (OPAL_SUCCESS != (ret = opal_finalize_util())) {
        return ret;
    }

    return exit_status;
}
Esempio n. 24
0
static void check_config(int fd, short args, void *cbdata)
{
    DIR *dirp = NULL;
    struct dirent * dir_entry;
    struct stat buf;
    int i, rc, n, j, k, m;
    char *fullpath;
    orcm_cfgi_app_t *app, *app2, *aptr;
    orcm_cfgi_run_t *run;
    orcm_cfgi_exec_t *exec, *exec2, *eptr;
    orcm_cfgi_version_t *vers, *vers2, *vptr;
    orcm_cfgi_bin_t *bin;
    orte_job_t *jdat, *jptr;
    orte_app_context_t *ax;
    opal_pointer_array_t found_apps;
    bool found, dir_found;
    orcm_cfgi_caddy_t *caddy;

    /* take control */
    ORTE_ACQUIRE_THREAD(&orcm_cfgi_base.ctl);

    OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                         "%s CHECKING CONFIG DIRECTORY %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         mca_orcm_cfgi_file_component.dir));

    /* Open the directory so we can get a listing */
    if (NULL == (dirp = opendir(mca_orcm_cfgi_file_component.dir))) {
        if (0 < opal_output_get_verbosity(orcm_cfgi_base.output)) {
            orte_show_help("help-cfgi-file.txt", "no-dir",
                           true, mca_orcm_cfgi_file_component.dir);
        }
        dir_found = false;
        goto restart;
    }
    dir_found = true;

    /* setup the array of apps */
    OBJ_CONSTRUCT(&found_apps, opal_pointer_array_t);
    opal_pointer_array_init(&found_apps, 16, INT_MAX, 16);

    /* cycle thru the directory */
    while (NULL != (dir_entry = readdir(dirp))) {
        /* Skip the obvious */
        if (0 == strncmp(dir_entry->d_name, ".", strlen(".")) ||
            0 == strncmp(dir_entry->d_name, "..", strlen(".."))) {
            continue;
        }
        /* Skip editor-related files */
        if (NULL != strstr(dir_entry->d_name, ".swp") ||
            NULL != strstr(dir_entry->d_name, ".swx") ||
            NULL != strchr(dir_entry->d_name, '~')) {
            continue;
        }
        if ('#' == dir_entry->d_name[0]) {
            continue;
        }

        /* parse the file, adding all found apps to the array */
        fullpath = opal_os_path(false, mca_orcm_cfgi_file_component.dir, dir_entry->d_name, NULL);
        if (ORCM_SUCCESS != (rc = parse_file(fullpath, &found_apps))) {
            OPAL_OUTPUT_VERBOSE((1, orcm_cfgi_base.output,
                                 "%s CANNOT PARSE FILE %s: %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 dir_entry->d_name, ORTE_ERROR_NAME(rc)));
        }
        free(fullpath);
    }
    closedir(dirp);

    /* cycle thru the installed apps */
    for (i=0; i < orcm_cfgi_base.installed_apps.size; i++) {
        if (NULL == (app = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&orcm_cfgi_base.installed_apps, i))) {
            continue;
        }
        app->modified = false;
        /* is this app present in the found apps? */
        app2 = NULL;
        for (j=0; j < found_apps.size; j++) {
            if (NULL == (aptr = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&found_apps, j))) {
                continue;
            }
            if (0 == strcmp(app->application, aptr->application)) {
                app2 = aptr;
                /* remove it from the found_apps array as we will now process it */
                opal_pointer_array_set_item(&found_apps, j, NULL);
                break;
            }
        }
        if (NULL == app2) {
            OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                 "%s APP %s IS NO LONGER INSTALLED",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 app->application));
            /* no longer present - remove this object from the installed array */
            opal_pointer_array_set_item(&orcm_cfgi_base.installed_apps, app->idx, NULL);
            /* find all instances */
            for (j=0; j < app->instances.size; j++) {
                if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, j))) {
                    continue;
                }
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s IS NO LONGER INSTALLED - KILLING INSTANCE %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, run->instance));
                /* remove it from the array */
                opal_pointer_array_set_item(&app->instances, j, NULL);
                run->app = NULL;
                run->app_idx = -1;
                /* delink all the binaries */
                for (k=0; k < run->binaries.size; k++) {
                    if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, k))) {
                        continue;
                    }
                    bin->vers = NULL;
                    bin->exec = NULL;
                }
                /* kill the associated executing job, if any */
                caddy = OBJ_NEW(orcm_cfgi_caddy_t);
                caddy->cmd = ORCM_CFGI_KILL_JOB;
                /* retain the run object as it has -not- been removed from
                 * the running config
                 */
                OBJ_RETAIN(run);
                caddy->run = run;
                /* send it off to be processed */
                opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy);
            }
            /* release it */
            OBJ_RELEASE(app);
            continue;
        }
        /* app was present - did we modify it */
        if (app->max_instances != app2->max_instances) {
            app->max_instances = app2->max_instances;
            app->modified = true;
        }
        /* did we remove any executables? */
        for (j=0; j < app->executables.size; j++) {
            if (NULL == (exec = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app->executables, j))) {
                continue;
            }
            /* is it present in the found apps */
            exec2 = NULL;
            for (k=0; k < app2->executables.size; k++) {
                if (NULL == (eptr = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app2->executables, k))) {
                    continue;
                }
                if (0 == strcmp(exec->appname, eptr->appname)) {
                    exec2 = eptr;
                    break;
                }
            }
            if (NULL == exec2) {
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s EXECUTABLE %s IS NO LONGER INSTALLED",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, exec->appname));
                /* this executable has been removed */
                opal_pointer_array_set_item(&app->executables, j, NULL);
                /* find all instances
                 * that use this executable and kill associated binaries
                 */
                for (k=0; k < app->instances.size; k++) {
                    if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, k))) {
                        continue;
                    }
                    /* search the binaries to see if they include this executable */
                    for (n=0; n < run->binaries.size; n++) {
                        if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, n))) {
                            continue;
                        }
                        if (0 == strcmp(bin->appname, exec->appname)) {
                            OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                                 "%s APP %s EXECUTABLE %s IS NO LONGER INSTALLED - KILLING BINARY %s",
                                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                 app->application, exec->appname, bin->binary));
                            exec->total_procs -= bin->num_procs;
                            /* ensure we know it is no longer pointing to an installed exec/version */
                            bin->vers = NULL;
                            bin->exec = NULL;
                            /* kill the associated executing exec, if any */
                            caddy = OBJ_NEW(orcm_cfgi_caddy_t);
                            caddy->cmd = ORCM_CFGI_KILL_EXE;
                            /* retain the run object as it has -not- been removed from
                             * the running config
                             */
                            OBJ_RETAIN(run);
                            caddy->run = run;
                            /* send it off to be processed */
                            opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy);
                            break;
                        }
                    }
                }
                OBJ_RELEASE(exec);
                continue;
            }
            /* kept the exec, but was it modified */
            if (exec->process_limit != exec2->process_limit) {
                exec->process_limit = exec2->process_limit;
                app->modified = true;
            }
            /* did we remove any versions */
            for (k=0; k < exec->versions.size; k++) {
                if (NULL == (vers = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec->versions, k))) {
                    continue;
                }
                /* is it present in the found app/exec */
                vers2 = NULL;
                for (n=0; n < exec2->versions.size; n++) {
                    if (NULL == (vptr = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec2->versions, n))) {
                        continue;
                    }
                    if (0 == strcmp(vptr->version, vers->version)) {
                        vers2 = vptr;
                        /* since we have this version, we can remove it from
                         * the found app
                         */
                        opal_pointer_array_set_item(&exec2->versions, n, NULL);
                        break;
                    }
                }
                if (NULL != vers2) {
                    OBJ_RELEASE(vers2);
                    continue;
                }
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s EXEC %s VERSION %s IS NO LONGER INSTALLED",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, exec->appname, vers->version));
                /* nope - been removed, so take it out of the array */
                opal_pointer_array_set_item(&exec->versions, k, NULL);
                /* find all instances and kill this version */
                for (m=0; m < app->instances.size; m++) {
                    if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, m))) {
                        continue;
                    }
                    /* search the binaries to see if they include this version */
                    for (n=0; n < run->binaries.size; n++) {
                        if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, n))) {
                            continue;
                        }
                        if (0 == strcmp(bin->appname, exec->appname) &&
                            0 == strcmp(bin->version, vers->version)) {
                            OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                                 "%s APP %s EXECUTABLE %s VERSION %s IS NO LONGER INSTALLED - KILLING BINARY %s",
                                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                 app->application, exec->appname, vers->version, bin->binary));
                            exec->total_procs -= bin->num_procs;
                            /* ensure we know it is no longer pointing to an installed version */
                            bin->vers = NULL;
                            /* kill the associated executing exec, if any */
                            caddy = OBJ_NEW(orcm_cfgi_caddy_t);
                            caddy->cmd = ORCM_CFGI_KILL_EXE;
                            /* retain the run object as it has -not- been removed from
                             * the running config
                             */
                            OBJ_RETAIN(run);
                            caddy->run = run;
                            /* send it off to be processed */
                            opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy);
                            break;
                        }
                    }
                }
                /* cleanup */
                OBJ_RELEASE(vers);
            }
        }
        /* did we add any executables or versions */
        for (k=0; k < app2->executables.size; k++) {
            if (NULL == (exec2 = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app2->executables, k))) {
                continue;
            }
            exec = NULL;
            for (j=0; j < app->executables.size; j++) {
                if (NULL == (eptr = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app->executables, j))) {
                    continue;
                }
                if (0 == strcmp(eptr->appname, exec2->appname)) {
                    exec = eptr;
                    break;
                }
            }
            if (NULL == exec) {
                OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                     "%s APP %s ADDING EXECUTABLE %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     app->application, exec2->appname));
                /* added this exec - just move it across */
                opal_pointer_array_set_item(&app2->executables, k, NULL);
                exec2->idx = opal_pointer_array_add(&app->executables, exec2);
                app->modified = true;
                continue;
            }
            /* exec already present, and we dealt with mods above - so
             * see if any versions were added.
             */
            for (j=0; j < exec2->versions.size; j++) {
                if (NULL == (vers = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec2->versions, j))) {
                    continue;
                }
                /* if already present, ignore */
                vers2 = NULL;
                for (n=0; n < exec->versions.size; n++) {
                    if (NULL == (vptr = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec->versions, n))) {
                        continue;
                    }
                    if (0 == strcmp(vptr->version, vers->version)) {
                        vers2 = vptr;
                        break;
                    }
                }
                if (NULL == vers2) {
                    OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                                         "%s APP %s ADDING EXECUTABLE %s VERSION %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         app->application, exec2->appname, vers->version));
                    opal_pointer_array_set_item(&exec2->versions, j, NULL);
                    vers->exec = exec;
                    vers->idx = opal_pointer_array_add(&exec->versions, vers);
                    app->modified = true;
                } else {
                    OBJ_RELEASE(vers2);
                }
            }
        }
        /* done with this entry */
        OBJ_RELEASE(app2);
    }

    /* any added applications get handled now - anything still in found_apps
     * would have been added
     */
    for (j=0; j < found_apps.size; j++) {
        if (NULL == (aptr = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&found_apps, j))) {
            continue;
        }
        OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output,
                             "%s ADDING APP %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             aptr->application));
        /* just shift the entry to the installed_apps array */
        aptr->idx = opal_pointer_array_add(&orcm_cfgi_base.installed_apps, aptr);
        /* mark it as modified so it will be handled below */
        aptr->modified = true;
    }
    OBJ_DESTRUCT(&found_apps);

    /* check installed vs configd for anything needing starting,
     * but only check modified apps
     */
    check_installed(false);

 restart:
#ifdef HAVE_SYS_INOTIFY_H
    if (dir_found) {
        if (timer_in_use) {
            /* redefine the event to use inotify now
             * that the dir has been found
             */
            if (0 > (watch = inotify_add_watch(notifier, mca_orcm_cfgi_file_component.dir,
                                               IN_DELETE | IN_MODIFY | IN_MOVE))) {
                close(notifier);
                opal_event_evtimer_add(probe_ev, &probe_time);
            } else {
                opal_event_del(probe_ev);
                opal_event_set(opal_event_base, probe_ev, notifier,
                               OPAL_EV_READ|OPAL_EV_PERSIST, inotify_handler, NULL);
                opal_event_add(probe_ev, 0);
                timer_in_use = false;
            }
        } else {
            /* reset the event */
            opal_event_add(probe_ev, 0);
        }
    } else {
        /* restart the timer so we keep looking for it */
        opal_event_evtimer_add(probe_ev, &probe_time);
    }
#else
    /* restart the timer */
    opal_event_evtimer_add(probe_ev, &probe_time);
#endif
    /* release control */
    ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl);
}
Esempio n. 25
0
/**
 * Initialization of data structures for running under a debugger
 * using the MPICH/TotalView parallel debugger interface. This stage
 * of initialization must occur after stage2 of spawn and is invoked
 * via a callback.
 * 
 * @param jobid  The jobid returned by spawn.
 */
void orte_totalview_init_after_spawn(orte_jobid_t jobid)
{
    orte_job_map_t *map;
    opal_list_item_t *item, *item2;
    orte_mapped_node_t *node;
    orte_mapped_proc_t *proc;
    orte_app_context_t *appctx;
    orte_std_cntr_t i;
    int rc;

    if (MPIR_proctable) {
        /* already initialized */
        return;
    }

    if (0) { /* debugging daemons <<-- needs work */

        if (orte_debug_flag) {
            opal_output(0, "Info: Setting up debugger process table for daemons\n");
        }

    } else {

        /*
         * Debugging applications or not being debugged.
         *
         * Either way, fill in the proc table for the application
         * processes in case someone attaches later.
         */

        if (orte_debug_flag) {
            opal_output(0, "Info: Setting up debugger process table for applications\n");
        }

        MPIR_debug_state = 1;

        /* Get the resource map for this job */

        rc = orte_rmaps.get_job_map(&map, jobid);
        if (ORTE_SUCCESS != rc) {
            opal_output(0, "Error: Can't get resource map\n");
            ORTE_ERROR_LOG(rc);
        }

        /* find the total number of processes in the job */

        for (i=0; i < map->num_apps; i++) {
            MPIR_proctable_size += map->apps[i]->num_procs;
        }

        /* allocate MPIR_proctable */

        MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) *
                                                         MPIR_proctable_size);
        if (MPIR_proctable == NULL) {
            opal_output(0, "Error: Out of memory\n");
            OBJ_RELEASE(map);
        }

        /* initialize MPIR_proctable */

        for (item =  opal_list_get_first(&map->nodes);
             item != opal_list_get_end(&map->nodes);
             item =  opal_list_get_next(item)) {
            node = (orte_mapped_node_t*)item;
            
            for (item2 = opal_list_get_first(&node->procs);
                 item2 != opal_list_get_end(&node->procs);
                 item2 = opal_list_get_next(item2)) {
                proc = (orte_mapped_proc_t*)item2;
                appctx = map->apps[proc->app_idx];
                /* store this data in the location whose index
                 * corresponds to the proc's rank
                 */
                i = proc->rank;
                
                MPIR_proctable[i].host_name = strdup(node->nodename);
                if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
                   MPIR_proctable[i].executable_name =
                     opal_os_path( false, appctx->app, NULL );
                } else {
                   MPIR_proctable[i].executable_name =
                     opal_os_path( false, appctx->cwd, appctx->app, NULL );
                }
                MPIR_proctable[i].pid = proc->pid;
            }
        }
        OBJ_RELEASE(map);
    }

    if (orte_debug_flag) {
        dump();
    }

    (void) MPIR_Breakpoint();
}
Esempio n. 26
0
/*
 * Construct the fullpath to the session directory
 */
int
orte_session_dir_get_name(char **fulldirpath,
                          char **return_prefix,  /* This will come back as the valid tmp dir */
                          char **return_frontend,
                          char *hostid,
                          char *batchid, 
                          orte_process_name_t *proc) {
    char *hostname  = NULL, 
        *batchname = NULL,
        *sessions  = NULL, 
        *user      = NULL, 
        *prefix = NULL,
        *frontend = NULL,
        *jobfam = NULL,
        *job = NULL,
        *vpidstr = NULL;
    bool prefix_provided = false;
    int exit_status = ORTE_SUCCESS;
    size_t len;
    int uid;
    struct passwd *pwdent;
    
    /* Ensure that system info is set */
    orte_proc_info();

     /* get the name of the user */
    uid = getuid();
#ifdef HAVE_GETPWUID
    pwdent = getpwuid(uid);
#else
    pwdent = NULL;
#endif
    if (NULL != pwdent) {
        user = strdup(pwdent->pw_name);
    } else {
        orte_show_help("help-orte-runtime.txt",
                       "orte:session:dir:nopwname", true);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }
    
    /*
     * set the 'hostname'
     */
    if( NULL != hostid) { /* User specified version */
        hostname = strdup(hostid);
    }
    else {            /* check if it is set elsewhere */
        if( NULL != orte_process_info.nodename)
            hostname = strdup(orte_process_info.nodename);
        else {
            /* Couldn't find it, so fail */
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            exit_status = ORTE_ERR_BAD_PARAM;
            goto cleanup;
        }
    }
    
    /*
     * set the 'batchid'
     */
    if (NULL != batchid)
        batchname = strdup(batchid);
    else 
        batchname = strdup("0");

    /*
     * get the front part of the session directory
     * Will look something like:
     *    openmpi-sessions-USERNAME@HOSTNAME_BATCHID
     */
    if (NULL != orte_process_info.top_session_dir) {
        frontend = strdup(orte_process_info.top_session_dir);
    }
    else { /* If not set then construct it */
        if (0 > asprintf(&frontend, "openmpi-sessions-%s@%s_%s", user, hostname, batchname)) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            exit_status = ORTE_ERR_OUT_OF_RESOURCE;
            goto cleanup;
        }
    }

    /*
     * Construct the session directory
     */
    /* If we were given a valid vpid then we can construct it fully into:
     *   openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID/VPID
     */
    if( NULL != proc) {
        if (ORTE_VPID_INVALID != proc->vpid) {
            
            if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            sessions = opal_os_path( false, frontend, jobfam, job, vpidstr, NULL );
            if( NULL == sessions ) {
                ORTE_ERROR_LOG(ORTE_ERROR);
                exit_status = ORTE_ERROR;
                goto cleanup;
            }
        }
        /* If we were given a valid jobid then we can construct it partially into:
         *   openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID
         */
        else if (ORTE_JOBID_INVALID != proc->jobid) {
            if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                exit_status = ORTE_ERR_OUT_OF_RESOURCE;
                goto cleanup;
            }
            
            sessions = opal_os_path( false, frontend, jobfam, job, NULL );
            if( NULL == sessions ) {
                ORTE_ERROR_LOG(ORTE_ERROR);
                exit_status = ORTE_ERROR;
                goto cleanup;
            }
        } /* if both are invalid */
        else {
            sessions = strdup(frontend); /* must dup this to avoid double-free later */
        }
        
    }    /* If we were not given a proc at all, then we just set it to frontend
     */
    else {
        sessions = strdup(frontend); /* must dup this to avoid double-free later */
    }
    
    /*
     * If the user specified an invalid prefix, or no prefix at all
     * we need to keep looking
     */
    if( NULL != fulldirpath && NULL != *fulldirpath) {
        free(*fulldirpath);
        *fulldirpath = NULL;
    }

    if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */ 
        prefix = strdup(*return_prefix); 
        prefix_provided = true;
    }
    /* Try to find a proper alternative prefix */
    else if (NULL != orte_process_info.tmpdir_base) { /* stored value */
        prefix = strdup(orte_process_info.tmpdir_base);
    }
    else { /* General Environment var */
        prefix = strdup(opal_tmp_directory());
    }
    len = strlen(prefix);
    /* check for a trailing path separator */
    if (OPAL_PATH_SEP[0] == prefix[len-1]) {
        prefix[len-1] = '\0';
    }
    
    /* BEFORE doing anything else, check to see if this prefix is
     * allowed by the system
     */
    if (NULL != orte_prohibited_session_dirs) {
        char **list;
        int i, len;
        /* break the string into tokens - it should be
         * separated by ','
         */
        list = opal_argv_split(orte_prohibited_session_dirs, ',');
        len = opal_argv_count(list);
        /* cycle through the list */
        for (i=0; i < len; i++) {
            /* check if prefix matches */
            if (0 == strncmp(prefix, list[i], strlen(list[i]))) {
                /* this is a prohibited location */
                orte_show_help("help-orte-runtime.txt",
                               "orte:session:dir:prohibited",
                               true, prefix, orte_prohibited_session_dirs);
                return ORTE_ERR_FATAL;
            }
        }
        opal_argv_free(list);  /* done with this */
    }
    /*
     * Construct the absolute final path, if requested
     */
    if (NULL != fulldirpath) {
        *fulldirpath = opal_os_path(false, prefix, sessions, NULL);
    }

    /* 
     * Return the frontend and prefix, if user requested we do so 
     */ 
    if (NULL != return_frontend) { 
        *return_frontend = strdup(frontend); 
    } 
    if (!prefix_provided && NULL != return_prefix) { 
        *return_prefix = strdup(prefix); 
    } 

 cleanup:
    if(NULL != hostname)
        free(hostname);
    if(NULL != batchname)
        free(batchname);
    if(NULL != sessions)
        free(sessions);
    if(NULL != user)
        free(user);
    if (NULL != prefix) free(prefix);
    if (NULL != frontend) free(frontend);
    if (NULL != jobfam) free(jobfam);
    if (NULL != job) free(job);
    if (NULL != vpidstr) free(vpidstr);

    return exit_status;
}
Esempio n. 27
0
/**
 * This function attempts to remove a directory along with all the
 * files in it.  If the recursive variable is non-zero, then it will
 * try to recursively remove all directories.  If provided, the
 * callback function is executed prior to the directory or file being
 * removed.  If the callback returns non-zero, then no removal is
 * done.
 */
int opal_os_dirpath_destroy(const char *path,
                            bool recursive,
                            opal_os_dirpath_destroy_callback_fn_t cbfunc)
{
    int rc, exit_status = OPAL_SUCCESS;
    bool is_dir = false;
    DIR *dp;
    struct dirent *ep;
    char *filenm;
#ifndef HAVE_STRUCT_DIRENT_D_TYPE
    struct stat buf;
#endif

    if (NULL == path) {  /* protect against error */
        return OPAL_ERROR;
    }

    /*
     * Make sure we have access to the the base directory
     */
    if( OPAL_SUCCESS != (rc = opal_os_dirpath_access(path, 0) ) ) {
        exit_status = rc;
        goto cleanup;
    }

    /* Open up the directory */
    dp = opendir(path);
    if (NULL == dp) {
        return OPAL_ERROR;
    }

    while (NULL != (ep = readdir(dp)) ) {
        /* skip:
         *  - . and ..
         */
        if ((0 == strcmp(ep->d_name, ".")) ||
            (0 == strcmp(ep->d_name, "..")) ) {
            continue;
        }

        /* Check to see if it is a directory */
        is_dir = false;

        /* Create a pathname.  This is not always needed, but it makes
         * for cleaner code just to create it here.  Note that we are
         * allocating memory here, so we need to free it later on.
         */
        filenm = opal_os_path(false, path, ep->d_name, NULL);
#ifdef HAVE_STRUCT_DIRENT_D_TYPE
        if (DT_DIR == ep->d_type) {
            is_dir = true;
        }
#else /* have dirent.d_type */
        rc = stat(filenm, &buf);
        if (rc < 0 || S_ISDIR(buf.st_mode)) {
            is_dir = true;
        }
#endif /* have dirent.d_type */

        /*
         * If not recursively decending, then if we find a directory then fail
         * since we were not told to remove it.
         */
        if( is_dir && !recursive) {
            /* Set the error indicating that we found a directory,
             * but continue removing files
             */
            exit_status = OPAL_ERROR;
            free(filenm);
            continue;
        }

        /* Will the caller allow us to remove this file/directory? */
        if(NULL != cbfunc) {
            /*
             * Caller does not wish to remove this file/directory,
             * continue with the rest of the entries
             */
            if( ! (cbfunc(path, ep->d_name)) ) {
                free(filenm);
                continue;
            }
        }
        /* Directories are recursively destroyed */
        if(is_dir) {
            rc = opal_os_dirpath_destroy(filenm, recursive, cbfunc);
            free(filenm);
            if (OPAL_SUCCESS != rc) {
                exit_status = rc;
                closedir(dp);
                goto cleanup;
            }
        }
        /* Files are removed right here */
        else {
            if( 0 != (rc = unlink(filenm) ) ) {
                exit_status = OPAL_ERROR;
            }
            free(filenm);
        }
    }

    /* Done with this directory */
    closedir(dp);

 cleanup:

    /*
     * If the directory is empty, them remove it
     */
    if(opal_os_dirpath_is_empty(path)) {
        rmdir(path);
    }

    return exit_status;
}
Esempio n. 28
0
static int rte_finalize(void)
{
    char *contact_path;
    char *jobfam_dir;

    if (signals_set) {
        /* Remove the epipe handler */
        opal_event_signal_del(&epipe_handler);
        /* remove the term handler */
        opal_event_del(&term_handler);
        /** Remove the USR signal handlers */
        opal_event_signal_del(&sigusr1_handler);
        opal_event_signal_del(&sigusr2_handler);
        if (orte_forward_job_control) {
            opal_event_signal_del(&sigtstp_handler);
            opal_event_signal_del(&sigcont_handler);
        }
        signals_set = false;
    }

    /* close the dfs */
    (void) mca_base_framework_close(&orte_dfs_base_framework);
    (void) mca_base_framework_close(&orte_filem_base_framework);
    /* output any lingering stdout/err data */
    fflush(stdout);
    fflush(stderr);
    (void) mca_base_framework_close(&orte_iof_base_framework);
    (void) mca_base_framework_close(&orte_rtc_base_framework);
    (void) mca_base_framework_close(&orte_odls_base_framework);
    (void) mca_base_framework_close(&orte_rmaps_base_framework);
    (void) mca_base_framework_close(&orte_ras_base_framework);
    (void) mca_base_framework_close(&orte_grpcomm_base_framework);
    (void) mca_base_framework_close(&opal_dstore_base_framework);
    (void) mca_base_framework_close(&orte_routed_base_framework);
    (void) mca_base_framework_close(&orte_plm_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);
    (void) mca_base_framework_close(&orte_state_base_framework);

    /* cleanup the pstat stuff */
    (void) mca_base_framework_close(&opal_pstat_base_framework);

    /* remove my contact info file, if we have session directories */
    if (NULL != orte_process_info.job_session_dir) {
        jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
        contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
        free(jobfam_dir);
        unlink(contact_path);
        free(contact_path);
    }

    /* shutdown the messaging frameworks */
    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_oob_base_framework);

    /* ensure we scrub the session directory tree */
    orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
    
    /* close the xml output file, if open */
    if (orte_xml_output) {
        fprintf(orte_xml_fp, "</mpirun>\n");
        fflush(orte_xml_fp);
        if (stdout != orte_xml_fp) {
            fclose(orte_xml_fp);
        }
    }

    return ORTE_SUCCESS;
}
Esempio n. 29
0
/**
 * Create Standard I/O symlinks in the filesystem for a given proc
 *
 * Create Standard I/O symlinks in the filesystem for a given proc.
 * The symlinks will be placed in:
 * @code
 * /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/
 * @endcode
 *
 * The symlinks will be to FIFOs for stdin and stderr.  stdout will either
 * be to a FIFO or pty, depending on the configuration of Open MPI.
 *
 * @param proc_rank   the process's rank on the node
 * @param jobid       the jobid the proc belongs to
 * @param app_context the application context number within the job
 * @param connect_stdin if true, stdin will be connected, otherwise it will be
 *                      set to /dev/null
 *
 * @retval ORTE_SUCCESS
 * @retval error
 */
static int
odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank,
                            orte_jobid_t jobid,
                            orte_std_cntr_t app_context, bool connect_stdin)
{
    char *path_prefix, *fd_link_path = NULL;
    int rc = ORTE_SUCCESS, fd;
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
    int amaster, aslave;
    char pty_name[256];
    struct termios term_attrs;
#endif
    
    path_prefix = odls_bproc_get_base_dir_name(proc_rank, jobid, (size_t)app_context);
    if (NULL == path_prefix) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    /* check for existence and access, or create it */
    if (ORTE_SUCCESS != (rc = odls_bproc_make_dir(path_prefix))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    /* setup the stdin FIFO.  Always use a fifo for the same reason we
       always use a pipe in the iof_setup code -- don't want to flush
       onto the floor during close */
    fd_link_path = opal_os_path( false, path_prefix, "0", NULL );
   if (NULL == fd_link_path) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    if (connect_stdin) {
        if (0 != mkfifo(fd_link_path, S_IRWXU)) {
             perror("odls_bproc mkfifo failed");
             rc = ORTE_ERROR;
             ORTE_ERROR_LOG(rc);
             goto cleanup;
        }

        fd = open(fd_link_path, O_RDWR);
        if (-1 == fd) {
            perror("odls_bproc open failed");
            rc = ORTE_ERROR;
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        orte_iof.iof_publish(proc_name, ORTE_IOF_SINK,
                             ORTE_IOF_STDIN, fd);
    } else {
        if(0 != symlink("/dev/null", fd_link_path)) {
            perror("odls_bproc could not create symlink");
            rc = ORTE_ERROR;
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }
    }

    free(fd_link_path);
    fd_link_path = NULL;

    /* setup the stdout PTY / FIFO */
    fd_link_path = opal_os_path( false, path_prefix, "1", NULL );
    if (NULL == fd_link_path) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
    if (0 != openpty(&amaster, &aslave, pty_name, NULL, NULL)) {
         opal_output(0, "odls_bproc: openpty failed, using pipes instead");
         goto stdout_fifo_setup;
    } 

    if (0 != symlink(pty_name, fd_link_path)) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    if (tcgetattr(aslave, &term_attrs) < 0) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK |
                             ECHOCTL | ECHOKE | ECHONL);
    term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON);
    term_attrs.c_oflag &= ~ (OCRNL | ONLCR);
    if (tcsetattr(aslave, TCSANOW, &term_attrs) == -1) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
 
    orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
                         ORTE_IOF_STDOUT, amaster);

    goto stderr_fifo_setup;

stdout_fifo_setup:
#endif

    if (0 != mkfifo(fd_link_path, S_IRWXU)) {
         perror("odls_bproc mkfifo failed");
         rc = ORTE_ERROR;
         goto cleanup;
    }

    fd = open(fd_link_path, O_RDWR);
    if (-1 == fd) {
        perror("odls_bproc open failed");
        rc = ORTE_ERROR;
        goto cleanup;
    }

    orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
                         ORTE_IOF_STDOUT, fd);

#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
stderr_fifo_setup:
#endif

    free(fd_link_path);
    fd_link_path = NULL;

    /* setup the stderr FIFO.  Always a fifo */
    fd_link_path = opal_os_path( false, path_prefix, "2", NULL );
    if (NULL == fd_link_path) {
        rc = ORTE_ERROR;
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    if (0 != mkfifo(fd_link_path, S_IRWXU)) {
         perror("odls_bproc mkfifo failed");
         rc = ORTE_ERROR;
         goto cleanup;
    }

    fd = open(fd_link_path, O_RDWR);
    if (-1 == fd) {
        perror("odls_bproc open failed");
        rc = ORTE_ERROR;
        goto cleanup;
    }

    orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
                         ORTE_IOF_STDERR, fd);

cleanup:
    if (NULL != path_prefix) {
       free(path_prefix);
    }
    if (NULL != fd_link_path) {
        free(fd_link_path);
    }
    return rc;
}
Esempio n. 30
0
static int bootstrap_comm(ompi_communicator_t *comm,
                          mca_coll_sm_module_t *module)
{
    int i;
    char *shortpath, *fullpath;
    mca_coll_sm_component_t *c = &mca_coll_sm_component;
    mca_coll_sm_comm_t *data = module->sm_comm_data;
    int comm_size = ompi_comm_size(comm);
    int num_segments = c->sm_comm_num_segments;
    int num_in_use = c->sm_comm_num_in_use_flags;
    int frag_size = c->sm_fragment_size;
    int control_size = c->sm_control_size;
    ompi_process_name_t *lowest_name = NULL;
    size_t size;
    ompi_proc_t *proc;

    /* Make the rendezvous filename for this communicators shmem data
       segment.  The CID is not guaranteed to be unique among all
       procs on this node, so also pair it with the PID of the proc
       with the lowest ORTE name to form a unique filename. */
    proc = ompi_group_peer_lookup(comm->c_local_group, 0);
    lowest_name = &(proc->proc_name);
    for (i = 1; i < comm_size; ++i) {
        proc = ompi_group_peer_lookup(comm->c_local_group, i);
        if (ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, 
                                          &(proc->proc_name),
                                          lowest_name) < 0) {
            lowest_name = &(proc->proc_name);
        }
    }
    asprintf(&shortpath, "coll-sm-cid-%d-name-%s.mmap", comm->c_contextid,
             OMPI_NAME_PRINT(lowest_name));
    if (NULL == shortpath) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:enable:bootstrap comm (%d/%s): asprintf failed", 
                            comm->c_contextid, comm->c_name);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    fullpath = opal_os_path(false, ompi_process_info.job_session_dir,
                            shortpath, NULL);
    free(shortpath);
    if (NULL == fullpath) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:enable:bootstrap comm (%d/%s): opal_os_path failed", 
                            comm->c_contextid, comm->c_name);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* Calculate how much space we need in the per-communicator shmem
       data segment.  There are several values to add:

       - size of the barrier data (2 of these):
           - fan-in data (num_procs * control_size)
           - fan-out data (num_procs * control_size)
       - size of the "in use" buffers:
           - num_in_use_buffers * control_size
       - size of the message fragment area (one for each segment):
           - control (num_procs * control_size)
           - fragment data (num_procs * (frag_size))

       So it's:

           barrier: 2 * control_size + 2 * control_size
           in use:  num_in_use * control_size
           control: num_segments * (num_procs * control_size * 2 +
                                    num_procs * control_size)
           message: num_segments * (num_procs * frag_size)
     */

    size = 4 * control_size +
        (num_in_use * control_size) +
        (num_segments * (comm_size * control_size * 2)) +
        (num_segments * (comm_size * frag_size));
    opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                        "coll:sm:enable:bootstrap comm (%d/%s): attaching to %" PRIsize_t " byte mmap: %s",
                        comm->c_contextid, comm->c_name, size, fullpath);
    data->sm_bootstrap_meta =
        mca_common_sm_init_group(comm->c_local_group, size, fullpath,
                                 sizeof(mca_common_sm_seg_header_t),
                                 getpagesize());
    if (NULL == data->sm_bootstrap_meta) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:enable:bootstrap comm (%d/%s): mca_common_sm_init_group failed", 
                            comm->c_contextid, comm->c_name);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* All done */
    return OMPI_SUCCESS;
}