Пример #1
0
TEST_F(ExecutiveTest , Moding) {
	//req.add_requirement("411993330 2253353824 2874482372");

    exec.freeze() ;
    exec.async_freeze_to_exec_command() ;
    EXPECT_EQ(exec_get_exec_command() , FreezeCmd) ;
    exec.run() ;
    EXPECT_EQ(exec_get_exec_command() , RunCmd) ;
    exec.stop() ;
    EXPECT_EQ(exec_get_exec_command() , ExitCmd) ;
    exec.stop(123.0) ;
    EXPECT_NEAR(exec_get_terminate_time() , 123.0 , 0.000001) ;
}
Пример #2
0
int Trick::SlaveInfo::write_master_status() {
    /** @par Detailed Design: */
    /** @li If the slave is an active synchronization partner (activated == true) */
    /** @li and we are not currently waiting for slave to reconnect, */
    if (( activated == true ) && (reconnect_count == 0)) {
        /** @li write the current time according to the master to the slave */
        connection->write_time(exec_get_time_tics()) ;
        /** @li write the current exec_command according to the master to the slave */
        connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ;
    }
    if ((MS_SIM_COMMAND)exec_get_exec_command() == MS_ChkpntLoadBinCmd) {
        // dmtcp slave will exit, so stop writing status to slave until it reconnects
        // reconnect_count prevents us from writing status to slave, & is incremented every freeze cycle until we have reconnected
        reconnect_count = 1;
    }

    return(0) ;
}
Пример #3
0
int Trick::Master::checkpoint() {
    /** @par Detailed Design: */
    /** @li If chkpnt_dump_auto, tell slave to dump a checkpoint */
    unsigned int ii ;
    // do not tell slave to dump if this is a pre_init, post_init, or end checkpoint
    // those are handled with flags sent to slave in init()
    if ((exec_get_mode() == Initialization) || (exec_get_mode() == ExitMode)) {
        return(0);
    }
    if (enabled) {
        // Use 2 loops to read all slave status before writing any status out.
        for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
            slaves[ii]->read_slave_status() ;
        }
        SIM_COMMAND save_command = exec_get_exec_command() ;
        std::string full_path_name = checkpoint_get_output_file();
        for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
            if (slaves[ii]->chkpnt_dump_auto) {
                if (slaves[ii]->chkpnt_binary) {
                    if (slaves[ii]->slave_type == "dmtcp") {
                        exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpBinCmd) ;
                        slaves[ii]->write_master_status() ;
                        slaves[ii]->write_master_chkpnt_name(full_path_name) ;
                        exec_set_exec_command(save_command) ;
                    } else {
                        message_publish(MSG_ERROR, "Slave is not running under dmtcp control so it cannot dump binary checkpoint.\n") ;
                        slaves[ii]->write_master_status() ;
                    }
                } else { // ascii
                    exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpAsciiCmd) ;
                    slaves[ii]->write_master_status() ;
                    slaves[ii]->write_master_chkpnt_name(full_path_name) ;
                    exec_set_exec_command(save_command) ;
                }
            } else { // no auto dump
                slaves[ii]->write_master_status() ;
            }
        }
    }
    return(0) ;
}
Пример #4
0
int Trick::Master::preload_checkpoint() {
    /** @par Detailed Design: */
    /** @li If chkpnt_load_auto, tell slave to load a checkpoint */
    unsigned int ii ;
    if (enabled) {
        // Use 2 loops to read all slave status before writing any status out.
        for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
            slaves[ii]->read_slave_status() ;
        }
        SIM_COMMAND save_command = exec_get_exec_command() ;
        std::string full_path_name = checkpoint_get_load_file();
        for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
            if (slaves[ii]->chkpnt_load_auto) {
                if (slaves[ii]->chkpnt_binary) {
                    if (slaves[ii]->slave_type == "dmtcp") {
                        exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadBinCmd) ;
                        slaves[ii]->write_master_status() ;
                        slaves[ii]->write_master_chkpnt_name(full_path_name) ;
                        exec_set_exec_command(save_command) ;
                    } else {
                        message_publish(MSG_ERROR, "Slave is not running under dmtcp control so it cannot load binary checkpoint.\n") ;
                        slaves[ii]->write_master_status() ;
                    }
                } else { // ascii
                    exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadAsciiCmd) ;
                    slaves[ii]->write_master_status() ;
                    slaves[ii]->write_master_chkpnt_name(full_path_name) ;
                    exec_set_exec_command(save_command) ;
                }
            } else { // no auto load
                slaves[ii]->write_master_status() ;
            }
        }
    }
    return(0) ;
}
Пример #5
0
int Trick::SlaveInfo::read_slave_status() {

    MS_SIM_COMMAND slave_command ;
    MS_SIM_COMMAND exec_command ;

    /** @par Detailed Design: */
    /** @li If the slave is an active synchronization partner (activated == true) */
    if (activated == true) {

        /** @li read the current slave exec_command */
        slave_command = connection->read_command() ;
        //printf("DEBUG master read %d command from slave\n", slave_command);fflush(stdout);

        exec_command = (MS_SIM_COMMAND)exec_get_exec_command() ;
        // fixup: is it possible we won't get slave's Exit command over socket when it terminates?, set it here if that happens
        if (dynamic_cast<MSSocket*>(connection)) {
            if ((slave_command == MS_ErrorCmd) && (reconnect_wait_limit > 0.0) && (reconnect_count == 0)) {
                slave_command = MS_ExitCmd;
            }
        }

        /** @li If the master is not currently exiting, change modes if the slave is freezing/exiting or has an error */
        if ( exec_command != MS_ExitCmd ) {
            switch ( slave_command ) {
                case (MS_ErrorCmd):
                    /** @li if the user has set a reconnect_wait_limit, continue on if we are still under that limit, otherwise
                            if the current slave mode cannot be read, exit the master if sync_error_terminate == true,
                            otherwise set the activated flag to false */
                    if ( (reconnect_count * exec_get_freeze_frame()) < reconnect_wait_limit) {
                        reconnect_count++;
                    } else if (sync_error_terminate == true) {
                        message_publish(MSG_ERROR, "Master lost sync with slave, so master is terminating.\n") ;
                        exec_terminate_with_return(-1, __FILE__, __LINE__ , "Master lost sync with slave.") ;
                    }
                    else {
                        message_publish(MSG_ERROR, "Master lost sync with slave, so slave is being deactivated.\n") ;
                        activated = false ;
                        return(0) ;
                    }
                    break ;
                case (MS_ExitCmd):
                    /** @li if the current slave mode is exiting, exit the master if sync_error_terminate == true.
                            otherwise wait for slave to reconnect. when wait limit is 0, set the activated flag to false */
                    if  (sync_error_terminate == true){
                        message_publish(MSG_WARNING, "sync_error_terminate is true: Slave is exiting, so master is terminating.\n") ;
                        exec_terminate_with_return(-1, __FILE__, __LINE__ , "Slave is exiting, so is the master.") ;
                    }
                    else {
                        message_publish(MSG_WARNING, "Slave is exiting.\n") ;
                        // if reconnect_wait_limit is set, master waits for slave to reconnect (e.g. dmtcp restarting)
                        if (reconnect_wait_limit > 0.0) {
                            message_publish(MSG_WARNING, "Master will wait %f seconds for slave to reconnect.\n", reconnect_wait_limit) ;
                            // make reads (shared mem connection) return quickly so we don't overrun waiting for reconnect
                            // TODO: for socket connection we will overrun in the accept call (see restart_dmtcp_slave)
                            connection->set_sync_wait_limit(exec_get_freeze_frame());
                            if (chkpnt_binary) {
                                restart_dmtcp_slave(); // restart the slave dmtcp executable
                            }
                        }
                        else {
                            message_publish(MSG_WARNING, "reconnect_wait_limit: 0.0 - Master will stop communicating with slave.\n") ;
                            activated = false ;
                        }
                        return(0) ;
                    }
                    break ;
                case (MS_ChkpntLoadBinCmd):
                    // slave has received our load command and is now sending us his dmtcp port and checkpoint file name
                    dmtcp_port = connection->read_port() ;
                    connection->read_name(chkpnt_name, sizeof(chkpnt_name)); // dir/filename
                    message_publish(MSG_WARNING , "Master received DMTCP Port and Checkpoint Filename from slave.\n");
                    connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ; // send this as an ack so slove can shut down
                    break ;
                case (MS_FreezeCmd):
                    /** @li if the current slave is freezing, freeze the master too */
                    message_publish(MSG_INFO, "Slave is freezing.\n") ;
                    exec_set_exec_command(FreezeCmd) ;
                    reconnect_count = 0;
                    break ;
                case (MS_ReconnectCmd):
                    // set the sync wait limit back to its default
                    connection->set_sync_wait_limit(sync_wait_limit);
                    message_publish(MSG_INFO, "Master has reconnected to slave.\n") ;
                    reconnect_count = 0;
                    break ;
                default:
                    break ;
            }
        }
    }
    return(0) ;
}