int mca_pml_ob1_ft_event( int state ) { static bool first_continue_pass = false; ompi_proc_t** procs = NULL; size_t num_procs; int ret, p; ompi_rte_collective_t *coll, *modex; coll = OBJ_NEW(ompi_rte_collective_t); coll->id = ompi_process_info.peer_init_barrier; if(OPAL_CRS_CHECKPOINT == state) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1); ompi_rte_barrier(coll); OMPI_WAIT_FOR_COMPLETION(coll->active); } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0); } else if(OPAL_CRS_CONTINUE == state) { first_continue_pass = !first_continue_pass; if( !first_continue_pass ) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0); ompi_rte_barrier(coll); OMPI_WAIT_FOR_COMPLETION(coll->active); } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2); } if( orte_cr_continue_like_restart && !first_continue_pass ) { /* * Get a list of processes */ procs = ompi_proc_all(&num_procs); if(NULL == procs) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto clean; } /* * Refresh the proc structure, and publish our proc info in the modex. * NOTE: Do *not* call ompi_proc_finalize as there are many places in * the code that point to indv. procs in this strucutre. For our * needs here we only need to fix up the modex, bml and pml * references. */ if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { opal_output(0, "pml:ob1: ft_event(Restart): proc_refresh Failed %d", ret); for(p = 0; p < (int)num_procs; ++p) { OBJ_RELEASE(procs[p]); } free (procs); goto clean; } } } else if(OPAL_CRS_RESTART_PRE == state ) { /* Nothing here */ } else if(OPAL_CRS_RESTART == state ) { /* * Get a list of processes */ procs = ompi_proc_all(&num_procs); if(NULL == procs) { ret = OMPI_ERR_OUT_OF_RESOURCE; goto clean; } /* * Clean out the modex information since it is invalid now. * ompi_rte_purge_proc_attrs(); * This happens at the ORTE level, so doing it again here will cause * some issues with socket caching. */ /* * Refresh the proc structure, and publish our proc info in the modex. * NOTE: Do *not* call ompi_proc_finalize as there are many places in * the code that point to indv. procs in this strucutre. For our * needs here we only need to fix up the modex, bml and pml * references. */ if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { opal_output(0, "pml:ob1: ft_event(Restart): proc_refresh Failed %d", ret); for(p = 0; p < (int)num_procs; ++p) { OBJ_RELEASE(procs[p]); } free (procs); goto clean; } } else if(OPAL_CRS_TERM == state ) { ; } else { ; } /* Call the BML * BML is expected to call ft_event in * - BTL(s) * - MPool(s) */ if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) { opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n", ret); } if(OPAL_CRS_CHECKPOINT == state) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1); if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0); /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/ } } else if(OPAL_CRS_CONTINUE == state) { if( !first_continue_pass ) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); ompi_rte_barrier(coll); OMPI_WAIT_FOR_COMPLETION(coll->active); } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); } if( orte_cr_continue_like_restart && !first_continue_pass ) { /* * Exchange the modex information once again. * BTLs will have republished their modex information. */ modex = OBJ_NEW(ompi_rte_collective_t); modex->id = ompi_process_info.peer_modex; if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) { opal_output(0, "pml:ob1: ft_event(Restart): Failed orte_grpcomm.modex() = %d", ret); OBJ_RELEASE(modex); goto clean; } OMPI_WAIT_FOR_COMPLETION(modex->active); OBJ_RELEASE(modex); /* * Startup the PML stack now that the modex is running again * Add the new procs (BTLs redo modex recv's) */ if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret); goto clean; } /* Is this barrier necessary ? JJH */ if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) { opal_output(0, "pml:ob1: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret); goto clean; } OMPI_WAIT_FOR_COMPLETION(coll->active); if( NULL != procs ) { for(p = 0; p < (int)num_procs; ++p) { OBJ_RELEASE(procs[p]); } free(procs); procs = NULL; } } if( !first_continue_pass ) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); ompi_rte_barrier(coll); OMPI_WAIT_FOR_COMPLETION(coll->active); } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); } } else if(OPAL_CRS_RESTART_PRE == state ) { /* Nothing here */ } else if(OPAL_CRS_RESTART == state ) { /* * Exchange the modex information once again. * BTLs will have republished their modex information. */ modex = OBJ_NEW(ompi_rte_collective_t); modex->id = ompi_process_info.peer_modex; if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) { opal_output(0, "pml:ob1: ft_event(Restart): Failed orte_grpcomm.modex() = %d", ret); OBJ_RELEASE(modex); goto clean; } OMPI_WAIT_FOR_COMPLETION(modex->active); OBJ_RELEASE(modex); /* * Startup the PML stack now that the modex is running again * Add the new procs (BTLs redo modex recv's) */ if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret); goto clean; } /* Is this barrier necessary ? JJH */ if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) { opal_output(0, "pml:ob1: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret); goto clean; } OMPI_WAIT_FOR_COMPLETION(coll->active); if( NULL != procs ) { for(p = 0; p < (int)num_procs; ++p) { OBJ_RELEASE(procs[p]); } free(procs); procs = NULL; } } else if(OPAL_CRS_TERM == state ) { ; } else { ; } ret = OMPI_SUCCESS; clean: OBJ_RELEASE(coll); return ret; }
static mca_mtl_base_module_t* ompi_mtl_psm_component_init(bool enable_progress_threads, bool enable_mpi_threads) { psm_error_t err; int rc; int verno_major = PSM_VERNO_MAJOR; int verno_minor = PSM_VERNO_MINOR; ompi_proc_t *my_proc, **procs; size_t num_total_procs, proc; int local_rank = -1, num_local_procs = 0; /* Compute the total number of processes on this host and our local rank * on that node. We need to provide PSM with these values so it can * allocate hardware contexts appropriately across processes. */ if ((rc = ompi_proc_refresh()) != OMPI_SUCCESS) { return NULL; } my_proc = ompi_proc_local(); if (NULL == (procs = ompi_proc_world(&num_total_procs))) { return NULL; } for (proc = 0; proc < num_total_procs; proc++) { if (my_proc == procs[proc]) { local_rank = num_local_procs++; continue; } if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) { num_local_procs++; } } assert(local_rank >= 0 && num_local_procs > 0); free(procs); err = psm_error_register_handler(NULL /* no ep */, PSM_ERRHANDLER_NOP); if (err) { opal_output(0, "Error in psm_error_register_handler (error %s)\n", psm_error_get_string(err)); return NULL; } #if PSM_VERNO >= 0x010c /* Set infinipath debug level */ err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG, (const void*) &ompi_mtl_psm.debug_level, sizeof(unsigned)); if (err) { /* Non fatal error. Can continue */ orte_show_help("help-mtl-psm.txt", "psm init", false, psm_error_get_string(err)); } #endif /* Only allow for shm and ipath devices in 2.0 and earlier releases * (unless the user overrides the setting). */ if (PSM_VERNO >= 0x0104) { setenv("PSM_DEVICES", "self,shm,ipath", 0); } else { setenv("PSM_DEVICES", "shm,ipath", 0); } err = psm_init(&verno_major, &verno_minor); if (err) { orte_show_help("help-mtl-psm.txt", "psm init", true, psm_error_get_string(err)); return NULL; } /* Complete PSM initialization */ ompi_mtl_psm_module_init(local_rank, num_local_procs); ompi_mtl_psm.super.mtl_request_size = sizeof(mca_mtl_psm_request_t) - sizeof(struct mca_mtl_request_t); return &ompi_mtl_psm.super; }