Exemple #1
0
/**
 * Leave ORTE.
 *
 * @retval ORTE_SUCCESS Upon success.
 * @retval ORTE_ERROR Upon failure.
 *
 * This function performs 
 */
int orte_system_finalize(void)
{
    char *contact_path;
    
    /* if I'm the seed, remove the universe contact info file */
    if (orte_process_info.seed) {
        contact_path = opal_os_path(false, orte_process_info.universe_session_dir,
                    "universe-setup.txt", NULL);
        unlink(contact_path);
        free(contact_path);
    }
    
    /* rmgr and odls close depend on wait/iof */
    orte_rmgr_base_close();
    orte_odls_base_close();
    orte_wait_finalize();
    orte_iof_base_close();

    orte_ns_base_close();
    orte_gpr_base_close();
    orte_schema_base_close();
    
    /* finalize selected modules so they can de-register
     * their receives
     */
    orte_rds_base_close();
    orte_ras_base_close();
    orte_rmaps_base_close();
    orte_pls_base_close();
    /* the errmgr close function retains the base
     * module so that error logging can continue
     */
    orte_errmgr_base_close();
    
    /* now can close the rml */
    orte_rml_base_close();
    orte_dss_close();
    
    opal_progress_finalize();

    opal_event_fini();

    orte_session_dir_finalize(orte_process_info.my_name);

    /* clean out the global structures */
    orte_sys_info_finalize();
    orte_proc_info_finalize();
    orte_univ_info_finalize();
    
    return ORTE_SUCCESS;
}
int
opal_finalize(void)
{
    if( --opal_initialized != 0 ) {
        if( opal_initialized < 0 ) {
            return OPAL_ERROR;
        }
        return OPAL_SUCCESS;
    }

    /* close the checkpoint and restart service */
    opal_cr_finalize();

    opal_progress_finalize();

    opal_event_fini();

    /* close high resolution timers */
    opal_timer_base_close();

    opal_backtrace_base_close();

    /* close the memory manager components.  Registered hooks can
       still be fired any time between now and the call to
       opal_mem_free_finalize(), and callbacks from the memory manager
       hooks to the bowels of the mem_free code can still occur any
       time between now and end of application (even post main()!) */
    opal_memory_base_close();

    /* finalize the memory manager / tracker */
    opal_mem_hooks_finalize();

    /* close the carto framework */
    opal_carto_base_close();
    
    /* close the processor affinity base */
    opal_paffinity_base_close();

    /* close the memcpy base */
    opal_memcpy_base_close();

    /* finalize the mca */
    mca_base_close();

    /* finalize util code */
    opal_finalize_util();

    return OPAL_SUCCESS;
}
int opal_crs_blcr_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid)
{
    opal_crs_blcr_snapshot_t *snapshot = OBJ_NEW(opal_crs_blcr_snapshot_t);
    char **cr_argv = NULL;
    char *cr_cmd = NULL;
    int ret;
    int exit_status = OPAL_SUCCESS;
    int status;

    snapshot->super = *base_snapshot;

    opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: restart(%s, %d)", snapshot->super.reference_name, spawn_child);

    /*
     * If we need to reconstruct the snapshot,
     */
    if(snapshot->super.cold_start) {
        if( OPAL_SUCCESS != (ret = blcr_cold_start(snapshot)) ) {
            exit_status = OPAL_ERROR;
            opal_output(mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: blcr_restart: Unable to reconstruct the snapshot.");
            goto cleanup;
        }
    }
    

    /*
     * Get the restart command
     */
    if ( OPAL_SUCCESS != (ret = opal_crs_blcr_restart_cmd(snapshot->context_filename, &cr_cmd)) ) {
        exit_status = ret;
        goto cleanup;
    }
    if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) {
        exit_status = OPAL_ERROR;
        goto cleanup;
    }


    /*
     * Restart by replacing this process
     */
    /* Need to shutdown the event engine before this.
     * for some reason the BLCR checkpointer and our event engine don't get
     * along very well.
     */
    opal_progress_finalize();
    opal_event_fini();

    if (!spawn_child) {
        opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                            "crs:blcr: blcr_restart: SELF: exec :(%s, %s):", 
                            strdup(blcr_restart_cmd),
                            opal_argv_join(cr_argv, ' '));

        status = execvp(strdup(blcr_restart_cmd), cr_argv);

        if(status < 0) {
            opal_output(mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: blcr_restart: SELF: Child failed to execute :(%d):", status);
        }
        opal_output(mca_crs_blcr_component.super.output_handle,
                    "crs:blcr: blcr_restart: SELF: execvp returned %d", status);

        exit_status = status;
        goto cleanup;
    }
    /*
     * Restart by starting a new process
     */
    else {
        *child_pid = fork();

        if( 0 == *child_pid) {
            /* Child Process */
            opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                                "crs:blcr: blcr_restart: CHILD: exec :(%s, %s):", 
                                strdup(blcr_restart_cmd),
                                opal_argv_join(cr_argv, ' '));
            
            status = execvp(strdup(blcr_restart_cmd), cr_argv);

            if(status < 0) {
                opal_output(mca_crs_blcr_component.super.output_handle,
                            "crs:blcr: blcr_restart: CHILD: Child failed to execute :(%d):", status);
            }
            opal_output(mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: blcr_restart: CHILD: execvp returned %d", status);

            exit_status = status;
            goto cleanup;
        }
        else if(*child_pid > 0) {
            /* Parent is done once it is started. */
            ;
        }
        else {
            opal_output(mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: blcr_restart: CHILD: fork failed :(%d):", *child_pid);
        }
    }

 cleanup:
    if(NULL != cr_cmd)
        free(cr_cmd);
    if(NULL != cr_argv)
        opal_argv_free(cr_argv);
    
    return exit_status;
}