/** * Leave ORTE. * * @retval ORTE_SUCCESS Upon success. * @retval ORTE_ERROR Upon failure. * * This function performs */ int orte_system_finalize(void) { char *contact_path; /* if I'm the seed, remove the universe contact info file */ if (orte_process_info.seed) { contact_path = opal_os_path(false, orte_process_info.universe_session_dir, "universe-setup.txt", NULL); unlink(contact_path); free(contact_path); } /* rmgr and odls close depend on wait/iof */ orte_rmgr_base_close(); orte_odls_base_close(); orte_wait_finalize(); orte_iof_base_close(); orte_ns_base_close(); orte_gpr_base_close(); orte_schema_base_close(); /* finalize selected modules so they can de-register * their receives */ orte_rds_base_close(); orte_ras_base_close(); orte_rmaps_base_close(); orte_pls_base_close(); /* the errmgr close function retains the base * module so that error logging can continue */ orte_errmgr_base_close(); /* now can close the rml */ orte_rml_base_close(); orte_dss_close(); opal_progress_finalize(); opal_event_fini(); orte_session_dir_finalize(orte_process_info.my_name); /* clean out the global structures */ orte_sys_info_finalize(); orte_proc_info_finalize(); orte_univ_info_finalize(); return ORTE_SUCCESS; }
int opal_finalize(void) { if( --opal_initialized != 0 ) { if( opal_initialized < 0 ) { return OPAL_ERROR; } return OPAL_SUCCESS; } /* close the checkpoint and restart service */ opal_cr_finalize(); opal_progress_finalize(); opal_event_fini(); /* close high resolution timers */ opal_timer_base_close(); opal_backtrace_base_close(); /* close the memory manager components. Registered hooks can still be fired any time between now and the call to opal_mem_free_finalize(), and callbacks from the memory manager hooks to the bowels of the mem_free code can still occur any time between now and end of application (even post main()!) */ opal_memory_base_close(); /* finalize the memory manager / tracker */ opal_mem_hooks_finalize(); /* close the carto framework */ opal_carto_base_close(); /* close the processor affinity base */ opal_paffinity_base_close(); /* close the memcpy base */ opal_memcpy_base_close(); /* finalize the mca */ mca_base_close(); /* finalize util code */ opal_finalize_util(); return OPAL_SUCCESS; }
int opal_crs_blcr_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) { opal_crs_blcr_snapshot_t *snapshot = OBJ_NEW(opal_crs_blcr_snapshot_t); char **cr_argv = NULL; char *cr_cmd = NULL; int ret; int exit_status = OPAL_SUCCESS; int status; snapshot->super = *base_snapshot; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: restart(%s, %d)", snapshot->super.reference_name, spawn_child); /* * If we need to reconstruct the snapshot, */ if(snapshot->super.cold_start) { if( OPAL_SUCCESS != (ret = blcr_cold_start(snapshot)) ) { exit_status = OPAL_ERROR; opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: Unable to reconstruct the snapshot."); goto cleanup; } } /* * Get the restart command */ if ( OPAL_SUCCESS != (ret = opal_crs_blcr_restart_cmd(snapshot->context_filename, &cr_cmd)) ) { exit_status = ret; goto cleanup; } if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) { exit_status = OPAL_ERROR; goto cleanup; } /* * Restart by replacing this process */ /* Need to shutdown the event engine before this. * for some reason the BLCR checkpointer and our event engine don't get * along very well. */ opal_progress_finalize(); opal_event_fini(); if (!spawn_child) { opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: SELF: exec :(%s, %s):", strdup(blcr_restart_cmd), opal_argv_join(cr_argv, ' ')); status = execvp(strdup(blcr_restart_cmd), cr_argv); if(status < 0) { opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: SELF: Child failed to execute :(%d):", status); } opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: SELF: execvp returned %d", status); exit_status = status; goto cleanup; } /* * Restart by starting a new process */ else { *child_pid = fork(); if( 0 == *child_pid) { /* Child Process */ opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: CHILD: exec :(%s, %s):", strdup(blcr_restart_cmd), opal_argv_join(cr_argv, ' ')); status = execvp(strdup(blcr_restart_cmd), cr_argv); if(status < 0) { opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: CHILD: Child failed to execute :(%d):", status); } opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: CHILD: execvp returned %d", status); exit_status = status; goto cleanup; } else if(*child_pid > 0) { /* Parent is done once it is started. */ ; } else { opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: blcr_restart: CHILD: fork failed :(%d):", *child_pid); } } cleanup: if(NULL != cr_cmd) free(cr_cmd); if(NULL != cr_argv) opal_argv_free(cr_argv); return exit_status; }