Пример #1
0
int mca_scoll_enable(void)
{
    int ret = OSHMEM_SUCCESS;

    OPAL_TIMING_ENV_INIT(mca_scoll_enable);

    if (!mca_scoll_sync_array) {
        void* ptr = (void*) mca_scoll_sync_array;
        int i = 0;

        MCA_MEMHEAP_CALL(private_alloc((_SHMEM_BARRIER_SYNC_SIZE * sizeof(*mca_scoll_sync_array)), &ptr));
        mca_scoll_sync_array = ptr;

        for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
            mca_scoll_sync_array[i] = _SHMEM_SYNC_VALUE;
        }
    }

    OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "memheap");

    /* Note: it is done to support FCA only and we need to consider possibility to
     * find a way w/o this ugly hack
     */
    if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_all))) {
        return ret;
    }

    OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "group_all");

    if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_self))) {
        return ret;
    }

    OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "group_self");

    return OSHMEM_SUCCESS;
}
Пример #2
0
int orte_ess_base_app_setup(bool db_restrict_local)
{
    int ret;
    char *error = NULL;
    opal_list_t transports;

    OPAL_TIMING_ENV_INIT(ess_base_setup);
    /*
     * stdout/stderr buffering
     * If the user requested to override the default setting then do
     * as they wish.
     */
    if( orte_ess_base_std_buffering > -1 ) {
        if( 0 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IONBF, 0);
            setvbuf(stderr, NULL, _IONBF, 0);
        }
        else if( 1 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IOLBF, 0);
            setvbuf(stderr, NULL, _IOLBF, 0);
        }
        else if( 2 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IOFBF, 0);
            setvbuf(stderr, NULL, _IOFBF, 0);
        }
    }

    /* if I am an MPI app, we will let the MPI layer define and
     * control the opal_proc_t structure. Otherwise, we need to
     * do so here */
    if (ORTE_PROC_NON_MPI) {
        orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
        orte_process_info.super.proc_hostname = orte_process_info.nodename;
        orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
        orte_process_info.super.proc_arch = opal_local_arch;
        opal_proc_local_set(&orte_process_info.super);
    }

    /* open and setup the state machine */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "state_framework_open");

    /* open the errmgr */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_open";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_framework_open");

    /* setup my session directory */
    if (orte_create_session_dirs) {
        OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
                             "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
                             orte_process_info.nodename));
        if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_session_dir";
            goto error;
        }
        /* Once the session directory location has been established, set
           the opal_output env file location to be in the
           proc-specific session directory. */
        opal_output_set_output_file_info(orte_process_info.proc_session_dir,
                                         "output-", NULL, NULL);
        /* register the directory for cleanup */
        if (NULL != opal_pmix.register_cleanup) {
            if (orte_standalone_operation) {
                if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, true, false, true))) {
                    ORTE_ERROR_LOG(ret);
                    error = "register cleanup";
                    goto error;
                }
            } else {
                if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.job_session_dir, true, false, false))) {
                    ORTE_ERROR_LOG(ret);
                    error = "register cleanup";
                    goto error;
                }
            }
        }
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "create_session_dirs");

    /* Setup the communication infrastructure */
    /* Routed system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "routed_framework_open");

    /*
     * OOB Layer
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "oob_framework_open");
    
    /* Runtime Messaging Layer */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_framework_open");
    
    /* if we have info on the HNP and local daemon, process it */
    if (NULL != orte_process_info.my_hnp_uri) {
        /* we have to set the HNP's name, even though we won't route messages directly
         * to it. This is required to ensure that we -do- send messages to the correct
         * HNP name
         */
        if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
                                                            ORTE_PROC_MY_HNP, NULL))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_rml_parse_HNP";
            goto error;
        }
    }
    if (NULL != orte_process_info.my_daemon_uri) {
        opal_value_t val;

        /* extract the daemon's name so we can update the routing table */
        if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
                                                            ORTE_PROC_MY_DAEMON, NULL))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_rml_parse_daemon";
            goto error;
        }
        /* Set the contact info in the database - this won't actually establish
         * the connection, but just tells us how to reach the daemon
         * if/when we attempt to send to it
         */
        OBJ_CONSTRUCT(&val, opal_value_t);
        val.key = OPAL_PMIX_PROC_URI;
        val.type = OPAL_STRING;
        val.data.string = orte_process_info.my_daemon_uri;
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_DAEMON, &val))) {
            ORTE_ERROR_LOG(ret);
            val.key = NULL;
            val.data.string = NULL;
            OBJ_DESTRUCT(&val);
            error = "store DAEMON URI";
            goto error;
        }
        val.key = NULL;
        val.data.string = NULL;
        OBJ_DESTRUCT(&val);
    }

    /* setup the errmgr */
    if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_select");

    /* get a conduit for our use - we never route IO over fabric */
    OBJ_CONSTRUCT(&transports, opal_list_t);
    orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
                       ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
    if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
        ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
        error = "orte_rml_open_mgmt_conduit";
        goto error;
    }
    OPAL_LIST_DESTRUCT(&transports);

    OBJ_CONSTRUCT(&transports, opal_list_t);
    orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
                       ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
    if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
        ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
        error = "orte_rml_open_coll_conduit";
        goto error;
    }
    OPAL_LIST_DESTRUCT(&transports);
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_open_conduit");

    /*
     * Group communications
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "grpcomm_framework_open");

    /* open the distributed file system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_select";
        goto error;
    }
    OPAL_TIMING_ENV_NEXT(ess_base_setup, "dfs_framework_open");

    return ORTE_SUCCESS;
 error:
    orte_show_help("help-orte-runtime.txt",
                   "orte_init:startup:internal-failure",
                   true, error, ORTE_ERROR_NAME(ret), ret);
    return ret;
}
Пример #3
0
int
opal_init_util(int* pargc, char*** pargv)
{
    int ret;
    char *error = NULL;
    char hostname[OPAL_MAXHOSTNAMELEN];
    OPAL_TIMING_ENV_INIT(otmng);

    if( ++opal_util_initialized != 1 ) {
        if( opal_util_initialized < 1 ) {
            return OPAL_ERROR;
        }
        return OPAL_SUCCESS;
    }


    OBJ_CONSTRUCT(&opal_init_util_domain, opal_finalize_domain_t);
    (void) opal_finalize_domain_init (&opal_init_util_domain, "opal_init_util");
    opal_finalize_set_domain (&opal_init_util_domain);

    opal_thread_set_main();

    opal_init_called = true;

    /* register for */
    opal_finalize_register_cleanup_arg (mca_base_framework_close_list, opal_init_util_frameworks);

    /* set the nodename right away so anyone who needs it has it. Note
     * that we don't bother with fqdn and prefix issues here - we let
     * the RTE later replace this with a modified name if the user
     * requests it */
    gethostname(hostname, sizeof(hostname));
    opal_process_info.nodename = strdup(hostname);

    /* initialize the memory allocator */
    opal_malloc_init();

    OPAL_TIMING_ENV_NEXT(otmng, "opal_malloc_init");

    /* initialize the output system */
    opal_output_init();

    /* initialize install dirs code */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_installdirs_base_framework, 0))) {
        fprintf(stderr, "opal_installdirs_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n",
                __FILE__, __LINE__, ret);
        return ret;
    }

    /* initialize the help system */
    opal_show_help_init();

    OPAL_TIMING_ENV_NEXT(otmng, "opal_show_help_init");

    /* register handler for errnum -> string converstion */
    if (OPAL_SUCCESS !=
        (ret = opal_error_register("OPAL",
                                   OPAL_ERR_BASE, OPAL_ERR_MAX, opal_err2str))) {
        return opal_init_error ("opal_error_register", ret);
    }

    /* keyval lex-based parser */
    if (OPAL_SUCCESS != (ret = opal_util_keyval_parse_init())) {
        return opal_init_error ("opal_util_keyval_parse_init", ret);
    }

    // Disable PSM signal hijacking (see comment in function for more
    // details)
    opal_init_psm();

    OPAL_TIMING_ENV_NEXT(otmng, "opal_init_psm");

    /* Setup the parameter system */
    if (OPAL_SUCCESS != (ret = mca_base_var_init())) {
        return opal_init_error ("mca_base_var_init", ret);
    }
    OPAL_TIMING_ENV_NEXT(otmng, "opal_var_init");

    /* read any param files that were provided */
    if (OPAL_SUCCESS != (ret = mca_base_var_cache_files(false))) {
        return opal_init_error ("failed to cache files", ret);
    }

    OPAL_TIMING_ENV_NEXT(otmng, "opal_var_cache");


    /* register params for opal */
    if (OPAL_SUCCESS != (ret = opal_register_params())) {
        return opal_init_error ("opal_register_params", ret);
    }

    if (OPAL_SUCCESS != (ret = opal_net_init())) {
        return opal_init_error ("opal_net_init", ret);
    }

    OPAL_TIMING_ENV_NEXT(otmng, "opal_net_init");

    /* pretty-print stack handlers */
    if (OPAL_SUCCESS != (ret = opal_util_register_stackhandlers())) {
        return opal_init_error ("opal_util_register_stackhandlers", ret);
    }

    /* set system resource limits - internally protected against
     * doing so twice in cases where the launch agent did it for us
     */
    if (OPAL_SUCCESS != (ret = opal_util_init_sys_limits(&error))) {
        opal_show_help("help-opal-runtime.txt",
                        "opal_init:syslimit", false,
                        error);
        return OPAL_ERR_SILENT;
    }

    /* initialize the arch string */
    if (OPAL_SUCCESS != (ret = opal_arch_init ())) {
        return opal_init_error ("opal_arch_init", ret);
    }

    OPAL_TIMING_ENV_NEXT(otmng, "opal_arch_init");

    /* initialize the datatype engine */
    if (OPAL_SUCCESS != (ret = opal_datatype_init ())) {
        return opal_init_error ("opal_datatype_init", ret);
    }

    OPAL_TIMING_ENV_NEXT(otmng, "opal_datatype_init");

    /* Initialize the data storage service. */
    if (OPAL_SUCCESS != (ret = opal_dss_open())) {
        return opal_init_error ("opal_dss_open", ret);
    }

    OPAL_TIMING_ENV_NEXT(otmng, "opal_dss_open");

    /* initialize the mca */
    if (OPAL_SUCCESS != (ret = mca_base_open())) {
        return opal_init_error ("mca_base_open", ret);
    }

    OPAL_TIMING_ENV_NEXT(otmng, "mca_base_open");

    /* initialize if framework */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_if_base_framework, 0))) {
        fprintf(stderr, "opal_if_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n",
                __FILE__, __LINE__, ret);
        return ret;
    }

    OPAL_TIMING_ENV_NEXT(otmng, "opal_if_init");

    return OPAL_SUCCESS;
}