Example #1
0
/**
@details
-# Get the sim_mode
-# Reset the real-time clock reference
-# If sim_mode is Run
   -# Call start_realtime to start the real time clock
-# Else if sim_mode is Freeze
   -# Call freeze init to set the sleep timer to freeze mode.
*/
int Trick::RealtimeSync::restart(long long ref_time) {

    SIM_MODE sim_mode = exec_get_mode() ;

    rt_clock->clock_reset(ref_time) ;
    if ( sim_mode == Run ) {
        start_realtime(exec_get_software_frame() , ref_time) ;
    } else if ( sim_mode == Freeze ) {
        freeze_init(exec_get_freeze_frame()) ;
    }

    return 0 ;
}
Example #2
0
int Trick::SlaveInfo::read_slave_status() {

    MS_SIM_COMMAND slave_command ;
    MS_SIM_COMMAND exec_command ;

    /** @par Detailed Design: */
    /** @li If the slave is an active synchronization partner (activated == true) */
    if (activated == true) {

        /** @li read the current slave exec_command */
        slave_command = connection->read_command() ;
        //printf("DEBUG master read %d command from slave\n", slave_command);fflush(stdout);

        exec_command = (MS_SIM_COMMAND)exec_get_exec_command() ;
        // fixup: is it possible we won't get slave's Exit command over socket when it terminates?, set it here if that happens
        if (dynamic_cast<MSSocket*>(connection)) {
            if ((slave_command == MS_ErrorCmd) && (reconnect_wait_limit > 0.0) && (reconnect_count == 0)) {
                slave_command = MS_ExitCmd;
            }
        }

        /** @li If the master is not currently exiting, change modes if the slave is freezing/exiting or has an error */
        if ( exec_command != MS_ExitCmd ) {
            switch ( slave_command ) {
                case (MS_ErrorCmd):
                    /** @li if the user has set a reconnect_wait_limit, continue on if we are still under that limit, otherwise
                            if the current slave mode cannot be read, exit the master if sync_error_terminate == true,
                            otherwise set the activated flag to false */
                    if ( (reconnect_count * exec_get_freeze_frame()) < reconnect_wait_limit) {
                        reconnect_count++;
                    } else if (sync_error_terminate == true) {
                        message_publish(MSG_ERROR, "Master lost sync with slave, so master is terminating.\n") ;
                        exec_terminate_with_return(-1, __FILE__, __LINE__ , "Master lost sync with slave.") ;
                    }
                    else {
                        message_publish(MSG_ERROR, "Master lost sync with slave, so slave is being deactivated.\n") ;
                        activated = false ;
                        return(0) ;
                    }
                    break ;
                case (MS_ExitCmd):
                    /** @li if the current slave mode is exiting, exit the master if sync_error_terminate == true.
                            otherwise wait for slave to reconnect. when wait limit is 0, set the activated flag to false */
                    if  (sync_error_terminate == true){
                        message_publish(MSG_WARNING, "sync_error_terminate is true: Slave is exiting, so master is terminating.\n") ;
                        exec_terminate_with_return(-1, __FILE__, __LINE__ , "Slave is exiting, so is the master.") ;
                    }
                    else {
                        message_publish(MSG_WARNING, "Slave is exiting.\n") ;
                        // if reconnect_wait_limit is set, master waits for slave to reconnect (e.g. dmtcp restarting)
                        if (reconnect_wait_limit > 0.0) {
                            message_publish(MSG_WARNING, "Master will wait %f seconds for slave to reconnect.\n", reconnect_wait_limit) ;
                            // make reads (shared mem connection) return quickly so we don't overrun waiting for reconnect
                            // TODO: for socket connection we will overrun in the accept call (see restart_dmtcp_slave)
                            connection->set_sync_wait_limit(exec_get_freeze_frame());
                            if (chkpnt_binary) {
                                restart_dmtcp_slave(); // restart the slave dmtcp executable
                            }
                        }
                        else {
                            message_publish(MSG_WARNING, "reconnect_wait_limit: 0.0 - Master will stop communicating with slave.\n") ;
                            activated = false ;
                        }
                        return(0) ;
                    }
                    break ;
                case (MS_ChkpntLoadBinCmd):
                    // slave has received our load command and is now sending us his dmtcp port and checkpoint file name
                    dmtcp_port = connection->read_port() ;
                    connection->read_name(chkpnt_name, sizeof(chkpnt_name)); // dir/filename
                    message_publish(MSG_WARNING , "Master received DMTCP Port and Checkpoint Filename from slave.\n");
                    connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ; // send this as an ack so slove can shut down
                    break ;
                case (MS_FreezeCmd):
                    /** @li if the current slave is freezing, freeze the master too */
                    message_publish(MSG_INFO, "Slave is freezing.\n") ;
                    exec_set_exec_command(FreezeCmd) ;
                    reconnect_count = 0;
                    break ;
                case (MS_ReconnectCmd):
                    // set the sync wait limit back to its default
                    connection->set_sync_wait_limit(sync_wait_limit);
                    message_publish(MSG_INFO, "Master has reconnected to slave.\n") ;
                    reconnect_count = 0;
                    break ;
                default:
                    break ;
            }
        }
    }
    return(0) ;
}