/** @details -# Get the sim_mode -# Reset the real-time clock reference -# If sim_mode is Run -# Call start_realtime to start the real time clock -# Else if sim_mode is Freeze -# Call freeze init to set the sleep timer to freeze mode. */ int Trick::RealtimeSync::restart(long long ref_time) { SIM_MODE sim_mode = exec_get_mode() ; rt_clock->clock_reset(ref_time) ; if ( sim_mode == Run ) { start_realtime(exec_get_software_frame() , ref_time) ; } else if ( sim_mode == Freeze ) { freeze_init(exec_get_freeze_frame()) ; } return 0 ; }
int Trick::SlaveInfo::read_slave_status() { MS_SIM_COMMAND slave_command ; MS_SIM_COMMAND exec_command ; /** @par Detailed Design: */ /** @li If the slave is an active synchronization partner (activated == true) */ if (activated == true) { /** @li read the current slave exec_command */ slave_command = connection->read_command() ; //printf("DEBUG master read %d command from slave\n", slave_command);fflush(stdout); exec_command = (MS_SIM_COMMAND)exec_get_exec_command() ; // fixup: is it possible we won't get slave's Exit command over socket when it terminates?, set it here if that happens if (dynamic_cast<MSSocket*>(connection)) { if ((slave_command == MS_ErrorCmd) && (reconnect_wait_limit > 0.0) && (reconnect_count == 0)) { slave_command = MS_ExitCmd; } } /** @li If the master is not currently exiting, change modes if the slave is freezing/exiting or has an error */ if ( exec_command != MS_ExitCmd ) { switch ( slave_command ) { case (MS_ErrorCmd): /** @li if the user has set a reconnect_wait_limit, continue on if we are still under that limit, otherwise if the current slave mode cannot be read, exit the master if sync_error_terminate == true, otherwise set the activated flag to false */ if ( (reconnect_count * exec_get_freeze_frame()) < reconnect_wait_limit) { reconnect_count++; } else if (sync_error_terminate == true) { message_publish(MSG_ERROR, "Master lost sync with slave, so master is terminating.\n") ; exec_terminate_with_return(-1, __FILE__, __LINE__ , "Master lost sync with slave.") ; } else { message_publish(MSG_ERROR, "Master lost sync with slave, so slave is being deactivated.\n") ; activated = false ; return(0) ; } break ; case (MS_ExitCmd): /** @li if the current slave mode is exiting, exit the master if sync_error_terminate == true. otherwise wait for slave to reconnect. when wait limit is 0, set the activated flag to false */ if (sync_error_terminate == true){ message_publish(MSG_WARNING, "sync_error_terminate is true: Slave is exiting, so master is terminating.\n") ; exec_terminate_with_return(-1, __FILE__, __LINE__ , "Slave is exiting, so is the master.") ; } else { message_publish(MSG_WARNING, "Slave is exiting.\n") ; // if reconnect_wait_limit is set, master waits for slave to reconnect (e.g. dmtcp restarting) if (reconnect_wait_limit > 0.0) { message_publish(MSG_WARNING, "Master will wait %f seconds for slave to reconnect.\n", reconnect_wait_limit) ; // make reads (shared mem connection) return quickly so we don't overrun waiting for reconnect // TODO: for socket connection we will overrun in the accept call (see restart_dmtcp_slave) connection->set_sync_wait_limit(exec_get_freeze_frame()); if (chkpnt_binary) { restart_dmtcp_slave(); // restart the slave dmtcp executable } } else { message_publish(MSG_WARNING, "reconnect_wait_limit: 0.0 - Master will stop communicating with slave.\n") ; activated = false ; } return(0) ; } break ; case (MS_ChkpntLoadBinCmd): // slave has received our load command and is now sending us his dmtcp port and checkpoint file name dmtcp_port = connection->read_port() ; connection->read_name(chkpnt_name, sizeof(chkpnt_name)); // dir/filename message_publish(MSG_WARNING , "Master received DMTCP Port and Checkpoint Filename from slave.\n"); connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ; // send this as an ack so slove can shut down break ; case (MS_FreezeCmd): /** @li if the current slave is freezing, freeze the master too */ message_publish(MSG_INFO, "Slave is freezing.\n") ; exec_set_exec_command(FreezeCmd) ; reconnect_count = 0; break ; case (MS_ReconnectCmd): // set the sync wait limit back to its default connection->set_sync_wait_limit(sync_wait_limit); message_publish(MSG_INFO, "Master has reconnected to slave.\n") ; reconnect_count = 0; break ; default: break ; } } } return(0) ; }