Exemplo n.º 1
0
int
opal_finalize(void)
{
    if( --opal_initialized != 0 ) {
        if( opal_initialized < 0 ) {
            return OPAL_ERROR;
        }
        return OPAL_SUCCESS;
    }

    opal_progress_finalize();

    opal_event_base_close();

    /* close high resolution timers */
    (void) mca_base_framework_close(&opal_timer_base_framework);

    (void) mca_base_framework_close(&opal_backtrace_base_framework);

    /* close hwloc */
    opal_hwloc_base_close();

    /* cleanup the main thread specific stuff */
    opal_tsd_keys_destruct();

    /* finalize util code */
    opal_finalize_util();

    return OPAL_SUCCESS;
}
Exemplo n.º 2
0
int orte_ess_base_app_finalize(void)
{
    orte_cr_finalize();

#if OPAL_ENABLE_FT_CR == 1
    (void) mca_base_framework_close(&orte_snapc_base_framework);
    (void) mca_base_framework_close(&orte_sstore_base_framework);
#endif

    /* close frameworks */
    (void) mca_base_framework_close(&orte_filem_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);

    /* now can close the rml and its friendly group comm */
    (void) mca_base_framework_close(&orte_grpcomm_base_framework);
    (void) mca_base_framework_close(&orte_dfs_base_framework);
    (void) mca_base_framework_close(&orte_routed_base_framework);

    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_oob_base_framework);
    (void) mca_base_framework_close(&orte_state_base_framework);

    orte_session_dir_finalize(ORTE_PROC_MY_NAME);

    /* release the event base */
    if (progress_thread_running) {
        opal_progress_thread_finalize(NULL);
        progress_thread_running = false;
    }

    return ORTE_SUCCESS;
}
Exemplo n.º 3
0
int orte_ess_base_app_finalize(void)
{
    /* release the conduits */
    orte_rml.close_conduit(orte_mgmt_conduit);
    orte_rml.close_conduit(orte_coll_conduit);

    /* close frameworks */
    (void) mca_base_framework_close(&orte_filem_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);

    /* now can close the rml and its friendly group comm */
    (void) mca_base_framework_close(&orte_grpcomm_base_framework);
    (void) mca_base_framework_close(&orte_dfs_base_framework);
    (void) mca_base_framework_close(&orte_routed_base_framework);

    (void) mca_base_framework_close(&orte_rml_base_framework);
    if (NULL != opal_pmix.finalize) {
        opal_pmix.finalize();
        (void) mca_base_framework_close(&opal_pmix_base_framework);
    }
    (void) mca_base_framework_close(&orte_oob_base_framework);
    (void) mca_base_framework_close(&orte_state_base_framework);

    if (NULL == opal_pmix.register_cleanup) {
        orte_session_dir_finalize(ORTE_PROC_MY_NAME);
    }
    /* cleanup the process info */
    orte_proc_info_finalize();

    return ORTE_SUCCESS;
}
Exemplo n.º 4
0
int
opal_finalize_util(void)
{
    if( --opal_util_initialized != 0 ) {
        if( opal_util_initialized < 0 ) {
            return OPAL_ERROR;
        }
        return OPAL_SUCCESS;
    }

    /* close interfaces code. */
    (void) mca_base_framework_close(&opal_if_base_framework);

    (void) mca_base_framework_close(&opal_event_base_framework);

    /* Clear out all the registered MCA params */
    opal_deregister_params();
    mca_base_var_finalize();

    opal_net_finalize();

    /* keyval lex-based parser */
    opal_util_keyval_parse_finalize();

    (void) mca_base_framework_close(&opal_installdirs_base_framework);

    mca_base_close();

    /* finalize the memory allocator */
    opal_malloc_finalize();

    /* finalize the show_help system */
    opal_show_help_finalize();

    /* finalize the output system.  This has to come *after* the
       malloc code, as the malloc code needs to call into this, but
       the malloc code turning off doesn't affect opal_output that
       much */
    opal_output_finalize();

    /* close the dss */
    opal_dss_close();

    opal_datatype_finalize();

    /* finalize the class/object system */
    opal_class_finalize();

    free (opal_process_info.nodename);
    opal_process_info.nodename = NULL;

    return OPAL_SUCCESS;
}
Exemplo n.º 5
0
void opal_finalize_test(void)
{
    /* Clear out all the registered MCA params */
    mca_base_var_finalize();

    (void) mca_base_framework_close(&opal_installdirs_base_framework);

    /* finalize the mca */
    mca_base_close();

    /* finalize the show_help system */
    opal_show_help_finalize();

    /* finalize the output system.  This has to come *after* the
       malloc code, as the malloc code needs to call into this, but
       the malloc code turning off doesn't affect opal_output that
       much */
    opal_output_finalize();

    /* close the dss */
    opal_dss_close();

    /* finalize the class/object system */
    opal_class_finalize();
}
Exemplo n.º 6
0
static int rte_finalize(void)
{
    int ret;

    /* remove the envars that we pushed into environ
     * so we leave that structure intact
     */
    if (added_transport_keys) {
        unsetenv(OPAL_MCA_PREFIX"orte_precondition_transports");
    }
    if (added_num_procs) {
        unsetenv(OPAL_MCA_PREFIX"orte_ess_num_procs");
    }
    if (added_app_ctx) {
        unsetenv("OMPI_APP_CTX_NUM_PROCS");
    }
    /* use the default app procedure to finish */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }
    
    /* mark us as finalized */
    if (NULL != opal_pmix.finalize) {
        opal_pmix.finalize();
        (void) mca_base_framework_close(&opal_pmix_base_framework);
    }
        
    return ORTE_SUCCESS;
}
Exemplo n.º 7
0
static int mca_spml_yoda_component_close(void)
{
    int rc;
    if (OMPI_SUCCESS != (rc = mca_base_framework_close(&ompi_bml_base_framework))) {
        return rc;
    }
    return OSHMEM_SUCCESS;
}
Exemplo n.º 8
0
static int patcher_close(void)
{
    mca_base_framework_close (&opal_patcher_base_framework);

    /* Note that we don't need to unpatch any symbols here; the
       patcher framework will take care of all of that for us. */
    return OPAL_SUCCESS;
}
Exemplo n.º 9
0
int orcm_finalize(void)
{
    --orcm_initialized;
    if (0 != orcm_initialized) {
        /* check for mismatched calls */
        if (0 > orcm_initialized) {
            opal_output(0, "%s MISMATCHED CALLS TO ORCM FINALIZE",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        }
        return ORCM_ERROR;
    }
    /* mark that orte is finalizing so that system will work correctly */
    orte_finalizing = true;

    /* everyone must finalize and close the cfgi framework */
    (void) mca_base_framework_close(&orcm_cfgi_base_framework);

    /* cleanup any globals */
    if (NULL != orcm_clusters) {
        OPAL_LIST_RELEASE(orcm_clusters);
    }
    if (NULL != orcm_schedulers) {
        OPAL_LIST_RELEASE(orcm_schedulers);
    }

    (void)orte_ess.finalize();

    /* close the ess itself */
    (void) mca_base_framework_close(&orte_ess_base_framework);

    /* close the sst itself */
    (void) mca_base_framework_close(&orcm_sst_base_framework);

    /* cleanup the process info */
    orte_proc_info_finalize();

    orte_initialized = false;

    /* Close the general debug stream */
    opal_output_close(orte_debug_output);

    /* finalize the opal utilities */
    opal_finalize();

    return ORCM_SUCCESS;
}
Exemplo n.º 10
0
int orte_ess_base_tool_finalize(void)
{
    orte_wait_finalize();

#if OPAL_ENABLE_FT_CR == 1
    mca_base_framework_close(&orte_snapc_base_framework);
    mca_base_framework_close(&orte_sstore_base_framework);
#endif

    orte_rml.close_conduit(orte_mgmt_conduit);

    /* if I am a tool, then all I will have done is
     * a very small subset of orte_init - ensure that
     * I only back those elements out
     */
    if (NULL != orte_process_info.my_hnp_uri) {
        (void) mca_base_framework_close(&orte_iof_base_framework);
    }
    (void) mca_base_framework_close(&orte_routed_base_framework);
    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);

    (void) mca_base_framework_close(&opal_pmix_base_framework);

    return ORTE_SUCCESS;
}
Exemplo n.º 11
0
int orte_ess_base_tool_finalize(void)
{
    orte_wait_finalize();

#if OPAL_ENABLE_FT_CR == 1
    mca_base_framework_close(&orte_snapc_base_framework);
    mca_base_framework_close(&orte_sstore_base_framework);
#endif

    /* if I am a tool, then all I will have done is
     * a very small subset of orte_init - ensure that
     * I only back those elements out
     */
    if (NULL != orte_process_info.my_hnp_uri) {
        (void) mca_base_framework_close(&orte_iof_base_framework);
    }
    (void) mca_base_framework_close(&orte_routed_base_framework);
    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_schizo_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);

    /* release the event base */
    if (progress_thread_running) {
        opal_stop_progress_thread("orte", true);
        progress_thread_running = false;
    }
    return ORTE_SUCCESS;
}
Exemplo n.º 12
0
int orte_ess_base_tool_finalize(void)
{
    orte_wait_finalize();

    /* if I am a tool, then all I will have done is
     * a very small subset of orte_init - ensure that
     * I only back those elements out
     */
    if (NULL != orte_process_info.my_hnp_uri) {
        (void) mca_base_framework_close(&orte_iof_base_framework);
    }
    (void) mca_base_framework_close(&orte_routed_base_framework);
    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_schizo_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);

    return ORTE_SUCCESS;
}
Exemplo n.º 13
0
int opal_cr_finalize(void)
{
    int exit_status = OPAL_SUCCESS;

    if( --opal_cr_initalized != 0 ) {
        if( opal_cr_initalized < 0 ) {
            return OPAL_ERROR;
        }
        return OPAL_SUCCESS;
    }

    if( !opal_cr_is_tool ) {
#if OPAL_ENABLE_FT_THREAD == 1
        if( opal_cr_thread_use_if_avail ) {
            void *data;
            /*
             * Stop the thread
             */
            opal_cr_thread_is_done    = true;
            opal_cr_thread_is_active  = false;
            opal_cr_thread_in_library = true;

            opal_thread_join(&opal_cr_thread, &data);
            OBJ_DESTRUCT(&opal_cr_thread);
            OBJ_DESTRUCT(&opal_cr_thread_lock);
        }
#endif /* OPAL_ENABLE_FT_THREAD == 1 */

        /* Nothing to do for just process notifications */
        opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM;
        opal_cr_checkpoint_request  = OPAL_CR_STATUS_TERM;
    }

#if OPAL_ENABLE_CRDEBUG == 1
    if( NULL != opal_cr_debug_free_threads ) {
        free( opal_cr_debug_free_threads );
        opal_cr_debug_free_threads = NULL;
    }
    opal_cr_debug_num_free_threads = 0;
#endif

    if (NULL != opal_cr_pipe_dir) {
        free(opal_cr_pipe_dir);
        opal_cr_pipe_dir = NULL;
    }

#if OPAL_ENABLE_FT_CR    == 1
    /*
     * Close the checkpoint / restart service components
     */
    (void) mca_base_framework_close(&opal_crs_base_framework);
#endif

    return exit_status;
}
Exemplo n.º 14
0
static int mca_pml_ob1_component_close(void)
{
    int rc;

    if (OMPI_SUCCESS != (rc = mca_base_framework_close(&ompi_bml_base_framework))) {
         return rc;
    }
    opal_output_close(mca_pml_ob1_output);

    return OMPI_SUCCESS;
}
Exemplo n.º 15
0
static int mca_bml_base_close( void )
{
    int ret;

    /* close any open components (including the selected one) */
    ret = mca_base_framework_components_close(&ompi_bml_base_framework, NULL);
    if (OMPI_SUCCESS != ret) {
        return ret;
    }

    return mca_base_framework_close(&opal_btl_base_framework);
}
int
orte_rml_oob_ft_event(int state) {
    int exit_status = ORTE_SUCCESS;
    int ret;

    if(OPAL_CRS_CHECKPOINT == state) {
        ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CHECKPOINT);
    }
    else if(OPAL_CRS_CONTINUE == state) {
        ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_CONTINUE);
    }
    else if(OPAL_CRS_RESTART == state) {
        ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FT_RESTART);
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }


    if(OPAL_CRS_CHECKPOINT == state) {
        ;
    }
    else if(OPAL_CRS_CONTINUE == state) {
        ;
    }
    else if(OPAL_CRS_RESTART == state) {
        (void) mca_base_framework_close(&orte_oob_base_framework);

        if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }

        if( ORTE_SUCCESS != (ret = orte_oob_base_select())) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }

 cleanup:
    return exit_status;
}
Exemplo n.º 17
0
static int mca_pml_v_component_parasite_close(void)
{
    V_OUTPUT_VERBOSE(500, "parasite_close: Ok, I accept to die and let %s component finish",
                          mca_pml_v.host_pml_component.pmlm_version.mca_component_name);
    mca_pml_base_selected_component = mca_pml_v.host_pml_component;

    (void) mca_base_framework_close(&ompi_vprotocol_base_framework);
    pml_v_output_close();

    mca_pml.pml_enable = mca_pml_v.host_pml.pml_enable;
    /* don't need to call the host component's close: pml_base will do it */
    return OMPI_SUCCESS; /* ignore any errors as we are leaving anyway */
}
Exemplo n.º 18
0
int ww_finalize(void)
{
    if( --ww_initialized != 0 ) {
        if( ww_initialized < 0 ) {
            return WW_ERROR;
        }
        return WW_SUCCESS;
    }

    /* close the security framework */
    (void) mca_base_framework_close(&ww_sec_base_framework);

    /* Clear out all the registered MCA params */
    ww_deregister_params();
    mca_base_var_finalize();

    /* keyval lex-based parser */
    ww_util_keyval_parse_finalize();

    (void) mca_base_framework_close(&ww_installdirs_base_framework);

    /* finalize the show_help system */
    ww_show_help_finalize();

    /* finalize the output system.  This has to come *after* the
       malloc code, as the malloc code needs to call into this, but
       the malloc code turning off doesn't affect ww_output that
       much */
    ww_output_finalize();

    /* close the dstore */
    (void) mca_base_framework_close(&ww_dstore_base_framework);

    #if WW_NO_LIB_DESTRUCTOR
        ww_cleanup();
    #endif

    return WW_SUCCESS;
}
Exemplo n.º 19
0
int orte_finalize(void)
{
    int rc;

    --orte_initialized;
    if (0 != orte_initialized) {
        /* check for mismatched calls */
        if (0 > orte_initialized) {
            opal_output(0, "%s MISMATCHED CALLS TO ORTE FINALIZE",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        }
        return ORTE_ERROR;
    }

    /* protect against multiple calls */
    if (opal_atomic_trylock(&orte_finalize_lock)) {
        return ORTE_SUCCESS;
    }
    
    /* flag that we are finalizing */
    orte_finalizing = true;

    if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
        /* stop listening for connections - will
         * be ignored if no listeners were registered */
        orte_stop_listening();
    }
    
    /* flush the show_help system */
    orte_show_help_finalize();

    /* call the finalize function for this environment */
    if (ORTE_SUCCESS != (rc = orte_ess.finalize())) {
        return rc;
    }

    /* close the ess itself */
    (void) mca_base_framework_close(&orte_ess_base_framework);

    /* cleanup the process info */
    orte_proc_info_finalize();

    /* Close the general debug stream */
    opal_output_close(orte_debug_output);
    
    /* finalize the opal utilities */
    rc = opal_finalize();

    return rc;
}
Exemplo n.º 20
0
int
opal_finalize(void)
{
    if( --opal_initialized != 0 ) {
        if( opal_initialized < 0 ) {
            return OPAL_ERROR;
        }
        return OPAL_SUCCESS;
    }

    opal_progress_finalize();

    /* close the checkpoint and restart service */
    opal_cr_finalize();

#if OPAL_ENABLE_FT_CR    == 1
    (void) mca_base_framework_close(&opal_compress_base_framework);
#endif

    (void) mca_base_framework_close(&opal_event_base_framework);

    /* close high resolution timers */
    (void) mca_base_framework_close(&opal_timer_base_framework);

    (void) mca_base_framework_close(&opal_backtrace_base_framework);
    (void) mca_base_framework_close(&opal_memchecker_base_framework);

    /* close the memcpy framework */
    (void) mca_base_framework_close(&opal_memcpy_base_framework);

    /* finalize the memory manager / tracker */
    opal_mem_hooks_finalize();

    /* close the hwloc framework */
    (void) mca_base_framework_close(&opal_hwloc_base_framework);

    /* close the shmem framework */
    (void) mca_base_framework_close(&opal_shmem_base_framework);

    /* cleanup the main thread specific stuff */
    opal_tsd_keys_destruct();

    /* finalize util code */
    opal_finalize_util();

    return OPAL_SUCCESS;
}
Exemplo n.º 21
0
/**
 * Current Coordination callback routines
 */
int opal_cr_coord(int state)
{
    if(OPAL_CRS_CHECKPOINT == state) {
        /* Do Checkpoint Phase work */
    }
    else if (OPAL_CRS_CONTINUE == state ) {
        /* Do Continue Phase work */
    }
    else if (OPAL_CRS_RESTART == state ) {
        /* Do Restart Phase work */

        /*
         * Re-initialize the event engine
         * Otherwise it may/will use stale file descriptors which will disrupt
         * the intended users of the soon-to-be newly assigned file descriptors.
         */
        opal_event_reinit(opal_sync_event_base);

        /*
         * Flush if() functionality, since it caches system specific info.
         */
        (void) mca_base_framework_close(&opal_if_base_framework);
        /* Since opal_ifinit() is not exposed, the necessary
         * functions will call it when needed. Just make sure we
         * finalized this code so we don't get old socket addrs.
         */
        opal_output_reopen_all();
    }
    else if (OPAL_CRS_TERM == state ) {
        /* Do Continue Phase work in prep to terminate the application */
    }
    else {
        /* We must have been in an error state from the checkpoint
         * recreate everything, as in the Continue Phase
         */
    }

    /*
     * Here we are returning to either:
     *  - [orte | ompi]_notify()
     */
    opal_cr_checkpointing_state  = OPAL_CR_STATUS_RESTART_POST;

    return OPAL_SUCCESS;
}
Exemplo n.º 22
0
static int rte_finalize(void)
{
    int ret;

    /* remove the envars that we pushed into environ
     * so we leave that structure intact
     */
    if (added_transport_keys) {
        unsetenv(OPAL_MCA_PREFIX"orte_precondition_transports");
    }
    if (added_num_procs) {
        unsetenv(OPAL_MCA_PREFIX"orte_ess_num_procs");
    }
    if (added_app_ctx) {
        unsetenv("OMPI_APP_CTX_NUM_PROCS");
    }
    if (added_pmix_envs) {
        unsetenv("PMIX_NAMESPACE");
        unsetenv("PMIX_RANK");
        unsetenv("PMIX_SERVER_URI");
        unsetenv("PMIX_SECURITY_MODE");
    }

    /* use the default procedure to finish */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
        ORTE_ERROR_LOG(ret);
    }

    /* mark us as finalized */
    if (NULL != opal_pmix.finalize) {
        opal_pmix.finalize();
        (void) mca_base_framework_close(&opal_pmix_base_framework);
    }

    /* release the event base */
    if (progress_thread_running) {
        opal_progress_thread_finalize(NULL);
        progress_thread_running = false;
    }

    return ret;
}
Exemplo n.º 23
0
static int mca_pml_v_component_open(void)
{
    int rc;
    ompi_pml_v_output_open(ompi_pml_v_output, ompi_pml_v_verbose);

    V_OUTPUT_VERBOSE(500, "loaded");

    mca_vprotocol_base_set_include_list(ompi_pml_vprotocol_include_list);

    if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_vprotocol_base_framework, 0))) {
        return rc;
    }

    if( NULL == mca_vprotocol_base_include_list ) {
        ompi_pml_v_output_close();
        return mca_base_framework_close(&ompi_vprotocol_base_framework);
    }

    return rc;
}
Exemplo n.º 24
0
static int pmi_component_query(mca_base_module_t **module, int *priority)
{
    int ret;

    /* all APPS must use pmix */
    if (ORTE_PROC_IS_APP) {
        /* open and setup pmix */
        if (NULL == opal_pmix.initialized) {
            if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
                ORTE_ERROR_LOG(ret);
                *priority = -1;
                *module = NULL;
                return ret;
            }
            if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
                /* don't error log this as it might not be an error at all */
                *priority = -1;
                *module = NULL;
                (void) mca_base_framework_close(&opal_pmix_base_framework);
                return ret;
            }
        }
        if (!opal_pmix.initialized()) {
            /* we may have everything setup, but we are not
             * in a PMIx environment and so we need to disqualify
             * ourselves - we are likely a singleton and will
             * pick things up from there */
            *priority = -1;
            *module = NULL;
            return ORTE_ERROR;
        }
        *priority = 35;
        *module = (mca_base_module_t *)&orte_ess_pmi_module;
        return ORTE_SUCCESS;
    }

    /* we can't run */
    *priority = -1;
    *module = NULL;
    return ORTE_ERROR;
}
Exemplo n.º 25
0
static int pmi_component_query(mca_base_module_t **module, int *priority)
{
    int ret;

    /* all APPS must use pmix */
    if (ORTE_PROC_IS_APP) {
        if (NULL == opal_pmix.initialized) {
            /* open and setup pmix */
            if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
                ORTE_ERROR_LOG(ret);
                *priority = -1;
                *module = NULL;
                return ret;
            }
            if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
                /* don't error log this as it might not be an error at all */
                *priority = -1;
                *module = NULL;
                (void) mca_base_framework_close(&opal_pmix_base_framework);
                return ret;
            }
        }
        if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
            /* we cannot be in a PMI environment */
            *priority = -1;
            *module = NULL;
            return ORTE_ERROR;
        }
        *priority = 35;
        *module = (mca_base_module_t *)&orte_ess_pmi_module;
        return ORTE_SUCCESS;
    }

    /* we can't run */
    *priority = -1;
    *module = NULL;
    return ORTE_ERROR;
}
Exemplo n.º 26
0
static void tool_finalize(void)
{
    if (signals_set) {
        /* Release all local signal handlers */
        opal_event_del(&epipe_handler);
        opal_event_del(&term_handler);
        opal_event_del(&int_handler);
        opal_event_signal_del(&sigusr1_handler);
        opal_event_signal_del(&sigusr2_handler);
    }
    
    (void) mca_base_framework_close(&orte_errmgr_base_framework);
    (void) mca_base_framework_close(&orte_routed_base_framework);

    orte_wait_finalize();
    if (progress_thread_running) {
        /* we had to leave the progress thread running until
         * we closed the routed framework as that closure
         * sends a "sync" message to the local daemon. it
         * is now safe to stop the progress thread
         */
        orte_event_base_active = false;
        /* break the event loop */
        opal_event_base_loopbreak(orte_event_base);
        /* wait for thread to exit */
        opal_thread_join(&progress_thread, NULL);
        OBJ_DESTRUCT(&progress_thread);
        progress_thread_running = false;
    }

    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_oob_base_framework);
    (void) mca_base_framework_close(&orte_state_base_framework);

    (void) mca_base_framework_close(&orcm_db_base_framework);
    (void) mca_base_framework_close(&opal_dstore_base_framework);

}
Exemplo n.º 27
0
int ompi_mpi_finalize(void)
{
    int ret;
    static int32_t finalize_has_already_started = 0;
    opal_list_item_t *item;
    struct timeval ompistart, ompistop;
    ompi_rte_collective_t *coll;
    ompi_proc_t** procs;
    size_t nprocs;

    /* Be a bit social if an erroneous program calls MPI_FINALIZE in
       two different threads, otherwise we may deadlock in
       ompi_comm_free() (or run into other nasty lions, tigers, or
       bears) */

    if (! opal_atomic_cmpset_32(&finalize_has_already_started, 0, 1)) {
        /* Note that if we're already finalized, we cannot raise an
           MPI exception.  The best that we can do is write something
           to stderr. */
        char hostname[MAXHOSTNAMELEN];
        pid_t pid = getpid();
        gethostname(hostname, sizeof(hostname));

        opal_show_help("help-mpi-runtime.txt",
                       "mpi_finalize:invoked_multiple_times",
                       true, hostname, pid);
        return MPI_ERR_OTHER;
    }

    ompi_mpiext_fini();

    /* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing
       anything else in MPI_FINALIZE (to include setting up such that
       MPI_FINALIZED will return true). */

    if (NULL != ompi_mpi_comm_self.comm.c_keyhash) {
        ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self,
                             ompi_mpi_comm_self.comm.c_keyhash);
        OBJ_RELEASE(ompi_mpi_comm_self.comm.c_keyhash);
        ompi_mpi_comm_self.comm.c_keyhash = NULL;
    }

    /* Proceed with MPI_FINALIZE */

    ompi_mpi_finalized = true;

    /* As finalize is the last legal MPI call, we are allowed to force the release
     * of the user buffer used for bsend, before going anywhere further.
     */
    (void)mca_pml_base_bsend_detach(NULL, NULL);

    nprocs = 0;
    procs = ompi_proc_all(&nprocs);
    MCA_PML_CALL(del_procs(procs, nprocs));
    free(procs);

#if OMPI_ENABLE_PROGRESS_THREADS == 0
    opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK);
#endif

    /* Redo ORTE calling opal_progress_event_users_increment() during
       MPI lifetime, to get better latency when not using TCP */
    opal_progress_event_users_increment();

    /* check to see if we want timing information */
    if (ompi_enable_timing != 0 && 0 == OMPI_PROC_MY_NAME->vpid) {
        gettimeofday(&ompistart, NULL);
    }

    /* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
       *all* connected processes.  This only means that all processes
       have to call it.  It does *not* mean that all connected
       processes need to synchronize (either directly or indirectly).  

       For example, it is quite easy to construct complicated
       scenarios where one job is "connected" to another job via
       transitivity, but have no direct knowledge of each other.
       Consider the following case: job A spawns job B, and job B
       later spawns job C.  A "connectedness" graph looks something
       like this:

           A <--> B <--> C

       So what are we *supposed* to do in this case?  If job A is
       still connected to B when it calls FINALIZE, should it block
       until jobs B and C also call FINALIZE?

       After lengthy discussions many times over the course of this
       project, the issue was finally decided at the Louisville Feb
       2009 meeting: no.

       Rationale:

       - "Collective" does not mean synchronizing.  It only means that
         every process call it.  Hence, in this scenario, every
         process in A, B, and C must call FINALIZE.

       - KEY POINT: if A calls FINALIZE, then it is erroneous for B or
         C to try to communicate with A again.

       - Hence, OMPI is *correct* to only effect a barrier across each
         jobs' MPI_COMM_WORLD before exiting.  Specifically, if A
         calls FINALIZE long before B or C, it's *correct* if A exits
         at any time (and doesn't notify B or C that it is exiting).

       - Arguably, if B or C do try to communicate with the now-gone
         A, OMPI should try to print a nice error ("you tried to
         communicate with a job that is already gone...") instead of
         segv or other Badness.  However, that is an *extremely*
         difficult problem -- sure, it's easy for A to tell B that it
         is finalizing, but how can A tell C?  A doesn't even know
         about C.  You'd need to construct a "connected" graph in a
         distributed fashion, which is fraught with race conditions,
         etc.

      Hence, our conclusion is: OMPI is *correct* in its current
      behavior (of only doing a barrier across its own COMM_WORLD)
      before exiting.  Any problems that occur are as a result of
      erroneous MPI applications.  We *could* tighten up the erroneous
      cases and ensure that we print nice error messages / don't
      crash, but that is such a difficult problem that we decided we
      have many other, much higher priority issues to handle that deal
      with non-erroneous cases. */

    /* wait for everyone to reach this point
       This is a grpcomm barrier instead of an MPI barrier because an
       MPI barrier doesn't ensure that all messages have been transmitted
       before exiting, so the possibility of a stranded message exists.
    */
    coll = OBJ_NEW(ompi_rte_collective_t);
    coll->id = ompi_process_info.peer_fini_barrier;
    coll->active = true;
    if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }

    /* wait for barrier to complete */
    OMPI_LAZY_WAIT_FOR_COMPLETION(coll->active);
    OBJ_RELEASE(coll);

    /* check for timing request - get stop time and report elapsed
     time if so */
    if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
        gettimeofday(&ompistop, NULL);
        opal_output(0, "ompi_mpi_finalize[%ld]: time to execute barrier %ld usec",
                    (long)OMPI_PROC_MY_NAME->vpid,
                    (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
                               (ompistop.tv_usec - ompistart.tv_usec)));
    }

    /*
     * Shutdown the Checkpoint/Restart Mech.
     */
    if (OMPI_SUCCESS != (ret = ompi_cr_finalize())) {
        OMPI_ERROR_LOG(ret);
    }

    /* Shut down any bindings-specific issues: C++, F77, F90 */

    /* Remove all memory associated by MPI_REGISTER_DATAREP (per
       MPI-2:9.5.3, there is no way for an MPI application to
       *un*register datareps, but we don't want the OMPI layer causing
       memory leaks). */
    while (NULL != (item = opal_list_remove_first(&ompi_registered_datareps))) {
        OBJ_RELEASE(item);
    }
    OBJ_DESTRUCT(&ompi_registered_datareps);

    /* Remove all F90 types from the hash tables. As the OBJ_DESTRUCT will
     * call a special destructor able to release predefined types, we can
     * simply call the OBJ_DESTRUCT on the hash table and all memory will
     * be correctly released.
     */
    OBJ_DESTRUCT( &ompi_mpi_f90_integer_hashtable );
    OBJ_DESTRUCT( &ompi_mpi_f90_real_hashtable );
    OBJ_DESTRUCT( &ompi_mpi_f90_complex_hashtable );

    /* Free communication objects */

    /* free file resources */
    if (OMPI_SUCCESS != (ret = ompi_file_finalize())) {
        return ret;
    }

    /* free window resources */
    if (OMPI_SUCCESS != (ret = ompi_win_finalize())) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = ompi_osc_base_finalize())) {
        return ret;
    }

    /* free pml resource */ 
    if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { 
      return ret;
    }
    /* free communicator resources */
    if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) {
        return ret;
    }

    /* free requests */
    if (OMPI_SUCCESS != (ret = ompi_request_finalize())) {
        return ret;
    }

    if (OMPI_SUCCESS != (ret = ompi_message_finalize())) {
        return ret;
    }

    /* If requested, print out a list of memory allocated by ALLOC_MEM
       but not freed by FREE_MEM */
    if (0 != ompi_debug_show_mpi_alloc_mem_leaks) {
        mca_mpool_base_tree_print();
    }

    /* Now that all MPI objects dealing with communications are gone,
       shut down MCA types having to do with communications */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }

    /* shut down buffered send code */
    mca_pml_base_bsend_fini();

#if OPAL_ENABLE_FT_CR == 1
    /*
     * Shutdown the CRCP Framework, must happen after PML shutdown
     */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_crcp_base_framework) ) ) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }
#endif

    /* Free secondary resources */

    /* free attr resources */
    if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) {
        return ret;
    }

    /* free group resources */
    if (OMPI_SUCCESS != (ret = ompi_group_finalize())) {
        return ret;
    }

    /* free proc resources */
    if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
        return ret;
    }
    
    /* finalize the pubsub functions */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pubsub_base_framework) ) ) {
        return ret;
    }
    
    /* finalize the DPM framework */
    if ( OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_dpm_base_framework))) {
        return ret;
    }
    
    /* free internal error resources */
    if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) {
        return ret;
    }
     
    /* free error code resources */
    if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) {
        return ret;
    }

    /* free errhandler resources */
    if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) {
        return ret;
    }

    /* Free all other resources */

    /* free op resources */
    if (OMPI_SUCCESS != (ret = ompi_op_finalize())) {
        return ret;
    }

    /* free ddt resources */
    if (OMPI_SUCCESS != (ret = ompi_datatype_finalize())) {
        return ret;
    }

    /* free info resources */
    if (OMPI_SUCCESS != (ret = ompi_info_finalize())) {
        return ret;
    }

    /* Close down MCA modules */

    /* io is opened lazily, so it's only necessary to close it if it
       was actually opened */
    if (0 < ompi_io_base_framework.framework_refcnt) {
        /* May have been "opened" multiple times. We want it closed now */
        ompi_io_base_framework.framework_refcnt = 1;

        if (OMPI_SUCCESS != mca_base_framework_close(&ompi_io_base_framework)) {
            return ret;
        }
    }
    (void) mca_base_framework_close(&ompi_topo_base_framework);
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_osc_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_coll_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bml_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_mpool_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_rcache_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_allocator_base_framework))) {
        return ret;
    }

    if (NULL != ompi_mpi_main_thread) {
        OBJ_RELEASE(ompi_mpi_main_thread);
        ompi_mpi_main_thread = NULL;
    }

    /* Leave the RTE */

    if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) {
        return ret;
    }

    /* now close the rte framework */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_rte_base_framework) ) ) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }

    if (OPAL_SUCCESS != (ret = opal_finalize_util())) {
        return ret;
    }

    /* All done */

    return MPI_SUCCESS;
}
Exemplo n.º 28
0
static int
mca_pml_cm_component_close(void)
{
    return mca_base_framework_close(&ompi_mtl_base_framework);
}
Exemplo n.º 29
0
int orte_ess_base_app_finalize(void)
{
    orte_cr_finalize();

    /* release the event base so we stop all potential
     * race conditions in the messaging teardown */
    if (progress_thread_running) {
        opal_stop_progress_thread("orte", false);
        progress_thread_running = false;
    }

#if OPAL_ENABLE_FT_CR == 1
    (void) mca_base_framework_close(&orte_snapc_base_framework);
    (void) mca_base_framework_close(&orte_sstore_base_framework);
#endif

    /* close frameworks */
    (void) mca_base_framework_close(&orte_filem_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);

    /* now can close the rml and its friendly group comm */
    (void) mca_base_framework_close(&orte_grpcomm_base_framework);
    (void) mca_base_framework_close(&opal_dstore_base_framework);
    (void) mca_base_framework_close(&orte_dfs_base_framework);
    (void) mca_base_framework_close(&orte_routed_base_framework);

    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_oob_base_framework);
    (void) mca_base_framework_close(&orte_state_base_framework);

    orte_session_dir_finalize(ORTE_PROC_MY_NAME);

    /* free the event base to cleanup memory */
    opal_stop_progress_thread("orte", true);
    return ORTE_SUCCESS;    
}
Exemplo n.º 30
0
static int rte_finalize(void)
{
    char *contact_path;
    char *jobfam_dir;

    if (signals_set) {
        /* Remove the epipe handler */
        opal_event_signal_del(&epipe_handler);
        /* remove the term handler */
        opal_event_del(&term_handler);
        /** Remove the USR signal handlers */
        opal_event_signal_del(&sigusr1_handler);
        opal_event_signal_del(&sigusr2_handler);
        if (orte_forward_job_control) {
            opal_event_signal_del(&sigtstp_handler);
            opal_event_signal_del(&sigcont_handler);
        }
        signals_set = false;
    }

    /* close the dfs */
    (void) mca_base_framework_close(&orte_dfs_base_framework);
    (void) mca_base_framework_close(&orte_filem_base_framework);
    /* output any lingering stdout/err data */
    fflush(stdout);
    fflush(stderr);
    (void) mca_base_framework_close(&orte_iof_base_framework);
    (void) mca_base_framework_close(&orte_rtc_base_framework);
    (void) mca_base_framework_close(&orte_odls_base_framework);
    (void) mca_base_framework_close(&orte_rmaps_base_framework);
    (void) mca_base_framework_close(&orte_ras_base_framework);
    (void) mca_base_framework_close(&orte_grpcomm_base_framework);
    (void) mca_base_framework_close(&opal_dstore_base_framework);
    (void) mca_base_framework_close(&orte_routed_base_framework);
    (void) mca_base_framework_close(&orte_plm_base_framework);
    (void) mca_base_framework_close(&orte_errmgr_base_framework);
    (void) mca_base_framework_close(&orte_state_base_framework);

    /* cleanup the pstat stuff */
    (void) mca_base_framework_close(&opal_pstat_base_framework);

    /* remove my contact info file, if we have session directories */
    if (NULL != orte_process_info.job_session_dir) {
        jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
        contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
        free(jobfam_dir);
        unlink(contact_path);
        free(contact_path);
    }

    /* shutdown the messaging frameworks */
    (void) mca_base_framework_close(&orte_rml_base_framework);
    (void) mca_base_framework_close(&orte_oob_base_framework);

    /* ensure we scrub the session directory tree */
    orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
    
    /* close the xml output file, if open */
    if (orte_xml_output) {
        fprintf(orte_xml_fp, "</mpirun>\n");
        fflush(orte_xml_fp);
        if (stdout != orte_xml_fp) {
            fclose(orte_xml_fp);
        }
    }

    return ORTE_SUCCESS;
}