void *MPIR_checkpoint_debugger_waitpoint(void) { int t; opal_thread_t *thr = NULL; thr = opal_thread_get_self(); /* * Sanity check, if the debugger is not going to attach, then do not wait * Make sure to open the debug gate, so that threads can get out */ if( !MPIR_debug_with_checkpoint ) { opal_output_verbose(1, opal_cr_output, "crs: MPIR_checkpoint_debugger_waitpoint(): Debugger is not attaching... (%d)", (int)thr->t_handle); MPIR_checkpoint_debug_gate = 1; return NULL; } else { opal_output_verbose(1, opal_cr_output, "crs: MPIR_checkpoint_debugger_waitpoint(): Waiting for the Debugger to attach... (%d)", (int)thr->t_handle); MPIR_checkpoint_debug_gate = 0; } /* * Let special threads escape without waiting, they will wait later */ for(t = 0; t < opal_cr_debug_num_free_threads; ++t) { if( opal_cr_debug_free_threads[t] != NULL && opal_thread_self_compare(opal_cr_debug_free_threads[t]) ) { opal_output_verbose(1, opal_cr_output, "crs: MPIR_checkpoint_debugger_waitpoint(): Checkpointing thread does not wait here... (%d)", (int)thr->t_handle); return NULL; } } /* * Force all other threads into the waiting function, * unless they are already in there, then just return so we do not nest * calls into this wait function and potentially confuse the debugger. */ if( opal_cr_debug_threads_already_waiting ) { opal_output_verbose(1, opal_cr_output, "crs: MPIR_checkpoint_debugger_waitpoint(): Threads are already waiting from debugger detach, do not wait here... (%d)", (int)thr->t_handle); return NULL; } else { opal_output_verbose(1, opal_cr_output, "crs: MPIR_checkpoint_debugger_waitpoint(): Wait... (%d)", (int)thr->t_handle); return MPIR_checkpoint_debugger_breakpoint(); } }
/* * Interlayer coordination callback */ int ompi_cr_coord(int state) { int ret, exit_status = OMPI_SUCCESS; opal_output_verbose(10, ompi_cr_output, "ompi_cr: coord: ompi_cr_coord(%s)\n", opal_crs_base_state_str((opal_crs_state_type_t)state)); /* * Before calling the previous callback, we have the opportunity to * take action given the state. */ if(OPAL_CRS_CHECKPOINT == state) { /* Do Checkpoint Phase work */ ret = ompi_cr_coord_pre_ckpt(); if( ret == OMPI_EXISTS) { return ret; } else if( ret != OMPI_SUCCESS) { return ret; } } else if (OPAL_CRS_CONTINUE == state ) { /* Do Continue Phase work */ ompi_cr_coord_pre_continue(); } else if (OPAL_CRS_RESTART == state ) { /* Do Restart Phase work */ ompi_cr_coord_pre_restart(); } else if (OPAL_CRS_TERM == state ) { /* Do Continue Phase work in prep to terminate the application */ } else { /* We must have been in an error state from the checkpoint * recreate everything, as in the Continue Phase */ } /* * Call the previous callback, which should be ORTE [which will handle OPAL] */ if(OMPI_SUCCESS != (ret = prev_coord_callback(state)) ) { exit_status = ret; goto cleanup; } /* * After calling the previous callback, we have the opportunity to * take action given the state to tidy up. */ if(OPAL_CRS_CHECKPOINT == state) { /* Do Checkpoint Phase work */ ompi_cr_coord_post_ckpt(); } else if (OPAL_CRS_CONTINUE == state ) { /* Do Continue Phase work */ ompi_cr_coord_post_continue(); #if OPAL_ENABLE_CRDEBUG == 1 /* * If C/R enabled debugging, * wait here for debugger to attach */ if( MPIR_debug_with_checkpoint ) { MPIR_checkpoint_debugger_breakpoint(); } #endif } else if (OPAL_CRS_RESTART == state ) { /* Do Restart Phase work */ ompi_cr_coord_post_restart(); #if OPAL_ENABLE_CRDEBUG == 1 /* * If C/R enabled debugging, * wait here for debugger to attach */ if( MPIR_debug_with_checkpoint ) { MPIR_checkpoint_debugger_breakpoint(); } #endif } else if (OPAL_CRS_TERM == state ) { /* Do Continue Phase work in prep to terminate the application */ } else { /* We must have been in an error state from the checkpoint * recreate everything, as in the Continue Phase */ } cleanup: return exit_status; }