void statusUpdate(const StatusUpdate& update, const UPID& pid) { const TaskStatus& status = update.status(); VLOG(1) << "Status update: task " << status.task_id() << " of framework " << update.framework_id() << " is now in state " << status.state(); CHECK(frameworkId == update.framework_id()); // TODO(benh): Note that this maybe a duplicate status update! // Once we get support to try and have a more consistent view // of what's running in the cluster, we'll just let this one // slide. The alternative is possibly dealing with a scheduler // failover and not correctly giving the scheduler it's status // update, which seems worse than giving a status update // multiple times (of course, if a scheduler re-uses a TaskID, // that could be bad. invoke(bind(&Scheduler::statusUpdate, sched, driver, cref(status))); if (pid) { // Acknowledge the message (we do this last, after we invoked // the scheduler, if we did at all, in case it causes a crash, // since this way the message might get resent/routed after the // scheduler comes back online). StatusUpdateAcknowledgementMessage message; message.mutable_framework_id()->MergeFrom(frameworkId); message.mutable_slave_id()->MergeFrom(update.slave_id()); message.mutable_task_id()->MergeFrom(status.task_id()); message.set_uuid(update.uuid()); send(pid, message); } }
inline StatusUpdate createStatusUpdate( const FrameworkID& frameworkId, const SlaveID& slaveId, const TaskID& taskId, const TaskState& state, const std::string& message = "", const Option<ExecutorID>& executorId = None()) { StatusUpdate update; update.set_timestamp(process::Clock::now().secs()); update.set_uuid(UUID::random().toBytes()); update.mutable_framework_id()->MergeFrom(frameworkId); update.mutable_slave_id()->MergeFrom(slaveId); if (executorId.isSome()) { update.mutable_executor_id()->MergeFrom(executorId.get()); } TaskStatus* status = update.mutable_status(); status->mutable_task_id()->MergeFrom(taskId); status->mutable_slave_id()->MergeFrom(slaveId); status->set_state(state); status->set_message(message); status->set_timestamp(update.timestamp()); return update; }
void statusUpdateAcknowledgement(const StatusUpdate& update, const UPID& pid) { if (aborted) { VLOG(1) << "Not sending status update acknowledgment message because " << "the driver is aborted!"; return; } VLOG(2) << "Sending ACK for status update " << update << " to " << pid; StatusUpdateAcknowledgementMessage message; message.mutable_framework_id()->MergeFrom(framework.id()); message.mutable_slave_id()->MergeFrom(update.slave_id()); message.mutable_task_id()->MergeFrom(update.status().task_id()); message.set_uuid(update.uuid()); send(pid, message); }
void statusUpdate(const StatusUpdate& update, const UPID& pid) { const TaskStatus& status = update.status(); if (aborted) { VLOG(1) << "Ignoring task status update message because " << "the driver is aborted!"; return; } VLOG(2) << "Received status update " << update << " from " << pid; CHECK(framework.id() == update.framework_id()); // TODO(benh): Note that this maybe a duplicate status update! // Once we get support to try and have a more consistent view // of what's running in the cluster, we'll just let this one // slide. The alternative is possibly dealing with a scheduler // failover and not correctly giving the scheduler it's status // update, which seems worse than giving a status update // multiple times (of course, if a scheduler re-uses a TaskID, // that could be bad. Stopwatch stopwatch; if (FLAGS_v >= 1) { stopwatch.start(); } scheduler->statusUpdate(driver, status); VLOG(1) << "Scheduler::statusUpdate took " << stopwatch.elapsed(); // Acknowledge the status update. // NOTE: We do a dispatch here instead of directly sending the ACK because, // we want to avoid sending the ACK if the driver was aborted when we // made the statusUpdate call. This works because, the 'abort' message will // be enqueued before the ACK message is processed. if (pid > 0) { dispatch(self(), &Self::statusUpdateAcknowledgement, update, pid); } }
void _handle(const StatusUpdate& update, const StatusUpdateRecord::Type& type) { CHECK(error.isNone()); if (type == StatusUpdateRecord::UPDATE) { // Record this update. received.insert(UUID::fromBytes(update.uuid())); // Add it to the pending updates queue. pending.push(update); } else { // Record this ACK. acknowledged.insert(UUID::fromBytes(update.uuid())); // Remove the corresponding update from the pending queue. pending.pop(); if (!terminated) { terminated = protobuf::isTerminalState(update.status().state()); } } }
// TODO(vinod): Make SlaveID optional because 'StatusUpdate.SlaveID' // is optional. StatusUpdate createStatusUpdate( const FrameworkID& frameworkId, const Option<SlaveID>& slaveId, const TaskID& taskId, const TaskState& state, const TaskStatus::Source& source, const string& message = "", const Option<TaskStatus::Reason>& reason = None(), const Option<ExecutorID>& executorId = None(), const Option<bool>& healthy = None()) { StatusUpdate update; update.set_timestamp(process::Clock::now().secs()); update.set_uuid(UUID::random().toBytes()); update.mutable_framework_id()->MergeFrom(frameworkId); if (slaveId.isSome()) { update.mutable_slave_id()->MergeFrom(slaveId.get()); } if (executorId.isSome()) { update.mutable_executor_id()->MergeFrom(executorId.get()); } TaskStatus* status = update.mutable_status(); status->mutable_task_id()->MergeFrom(taskId); if (slaveId.isSome()) { status->mutable_slave_id()->MergeFrom(slaveId.get()); } status->set_state(state); status->set_source(source); status->set_message(message); status->set_timestamp(update.timestamp()); if (reason.isSome()) { status->set_reason(reason.get()); } if (healthy.isSome()) { status->set_healthy(healthy.get()); } return update; }
void sendStatusUpdate(const TaskStatus& status) { if (status.state() == TASK_STAGING) { VLOG(1) << "Executor is not allowed to send " << "TASK_STAGING status update. Aborting!"; driver->abort(); Stopwatch stopwatch; if (FLAGS_v >= 1) { stopwatch.start(); } executor->error(driver, "Attempted to send TASK_STAGING status update"); VLOG(1) << "Executor::error took " << stopwatch.elapsed(); return; } StatusUpdateMessage message; StatusUpdate* update = message.mutable_update(); update->mutable_framework_id()->MergeFrom(frameworkId); update->mutable_executor_id()->MergeFrom(executorId); update->mutable_slave_id()->MergeFrom(slaveId); update->mutable_status()->MergeFrom(status); update->set_timestamp(Clock::now().secs()); update->set_uuid(UUID::random().toBytes()); message.set_pid(self()); VLOG(1) << "Executor sending status update " << *update; // Capture the status update. updates[UUID::fromBytes(update->uuid())] = *update; send(slave, message); }
int main( int argc, const char* argv[] ) { daemonize( "account_serverd" ); U32 beginTime = GetCurrentMilliseconds(); Sleep( 1000 ); U32 endTime = GetCurrentMilliseconds(); cout<< "Time for 1000 ms sleep was : " << endTime - beginTime << endl; CommandLineParser parser( argc, argv ); string serverName = "Account server"; string chatPortString = "9602"; string chatIpAddressString = "localhost"; string agricolaPortString = "23996"; string agricolaIpAddressString = "localhost"; string enableUserProducts = "false"; string userUuidFixOnly = "false"; //--------------------------------------- parser.FindValue( "server.name", serverName ); parser.FindValue( "chat.port", chatPortString ); parser.FindValue( "chat.address", chatIpAddressString ); parser.FindValue( "agricola.address", agricolaPortString ); parser.FindValue( "agricola.port", agricolaIpAddressString ); parser.FindValue( "user.products_update", enableUserProducts ); parser.FindValue( "user.uuid_fix_only", userUuidFixOnly ); string dbPortString = "16384"; string dbIpAddress = "localhost"; string dbUsername = "******"; string dbPassword = "******"; string dbSchema = "playdek"; parser.FindValue( "db.address", dbIpAddress ); parser.FindValue( "db.port", dbPortString ); parser.FindValue( "db.username", dbUsername ); parser.FindValue( "db.password", dbPassword ); parser.FindValue( "db.schema", dbSchema ); bool enableAddingUserProducts = false; bool onlyUpdatesUuid = false; int chatPort = 9602, dbPortAddress = 3306, agricolaPort = 23996; try { //listenPort = boost::lexical_cast<int>( listenPortString ); chatPort = boost::lexical_cast<int>( chatPortString ); agricolaPort = boost::lexical_cast<int>( agricolaPortString ); dbPortAddress = boost::lexical_cast<int>( dbPortString ); if( enableUserProducts.size() ) { enableAddingUserProducts = ( enableUserProducts == "true" || enableUserProducts == "1" ); } if( userUuidFixOnly.size() ) { onlyUpdatesUuid = ( userUuidFixOnly == "true" || userUuidFixOnly == "1" ); } } catch( boost::bad_lexical_cast const& ) { std::cout << "Error: input string was not valid" << std::endl; LogMessage(LOG_PRIO_ERR, "Error: input string was not valid\n"); } //-------------------------------------------------------------- U64 serverUniqueHashValue = GenerateUniqueHash( serverName ); U32 serverId = (U32)serverUniqueHashValue; cout << serverName << endl; cout << "Server stack version " << ServerStackVersion << endl; cout << "ServerId " << serverId << endl; cout << "Network protocol version: " << (int)NetworkVersionMajor << ":" << (int)NetworkVersionMinor << endl; cout << "------------------------------------------------------------------" << endl << endl << endl; LogMessage(LOG_PRIO_ERR, "serverName\n"); LogMessage(LOG_PRIO_ERR, "Server stack version %s\n", ServerStackVersion ); LogMessage(LOG_PRIO_ERR, "ServerId %d\n", serverId); LogMessage(LOG_PRIO_ERR, "------------------------------------------------------------------\n\n\n"); StatusUpdate* server = new StatusUpdate( serverName, serverId ); //---------------------------------------------------------------- if( Database::ConnectToMultipleDatabases< StatusUpdate > ( parser, server ) == false ) { Database::Deltadromeus* delta = new Database::Deltadromeus; delta->SetConnectionInfo( dbIpAddress, dbPortAddress, dbUsername, dbPassword, dbSchema ); delta->SetConnectionType( Database::Deltadromeus::DbConnectionType_All ); if( delta->IsConnected() == false ) { cout << "Error: Database connection is invalid." << endl; getch(); return 1; } server->AddOutputChain( delta ); } //---------------------------------------------------------------- server->EnableAddingUserProducts( enableAddingUserProducts ); server->SetAsServicingUuidOnly( onlyUpdatesUuid ); server->Init(); server->Resume(); //getch(); while( 1 ) // infinite loop { Sleep( 1000 ); } return 0; }
void sendStatusUpdate(const TaskStatus& status) { StatusUpdateMessage message; StatusUpdate* update = message.mutable_update(); update->mutable_framework_id()->MergeFrom(frameworkId); update->mutable_executor_id()->MergeFrom(executorId); update->mutable_slave_id()->MergeFrom(slaveId); update->mutable_status()->MergeFrom(status); update->set_timestamp(Clock::now().secs()); update->mutable_status()->set_timestamp(update->timestamp()); message.set_pid(self()); // We overwrite the UUID for this status update, however with // the HTTP API, the executor will have to generate a UUID // (which needs to be validated to be RFC-4122 compliant). UUID uuid = UUID::random(); update->set_uuid(uuid.toBytes()); update->mutable_status()->set_uuid(uuid.toBytes()); // We overwrite the SlaveID for this status update, however with // the HTTP API, this can be overwritten by the slave instead. update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId); VLOG(1) << "Executor sending status update " << *update; // Capture the status update. updates[uuid] = *update; send(slave, message); }
// This test verifies that status update manager ignores // unexpected ACK for an earlier update when it is waiting // for an ACK for another update. We do this by dropping ACKs // for the original update and sending a random ACK to the slave. TEST_F(StatusUpdateManagerTest, IgnoreUnexpectedStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), master.get(), _); // Drop the ACKs, so that status update manager // retries the update. DROP_PROTOBUFS(StatusUpdateAcknowledgementMessage(), _, _); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); Future<Nothing> unexpectedAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send an ACK with a random UUID. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, update.slave_id(), frameworkId, update.status().task_id(), UUID::random().toBytes()); AWAIT_READY(unexpectedAck); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that status update manager ignores // duplicate ACK for an earlier update when it is waiting // for an ACK for a later update. This could happen when the // duplicate ACK is for a retried update. TEST_F(StatusUpdateManagerTest, IgnoreDuplicateStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop the first update, so that status update manager // resends the update. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // This is the ACK for the retried update. Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(ack); // Now send TASK_FINISHED update so that the status update manager // is waiting for its ACK, which it never gets because we drop the // update. DROP_PROTOBUFS(StatusUpdateMessage(), master.get(), _); Future<Nothing> update2 = FUTURE_DISPATCH(_, &Slave::_statusUpdate); TaskStatus status2 = status.get(); status2.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(status2); AWAIT_READY(update2); // This is to catch the duplicate ack for TASK_RUNNING. Future<Nothing> duplicateAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send a duplicate ACK for the TASK_RUNNING update. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, update.slave_id(), frameworkId, update.status().task_id(), update.uuid()); AWAIT_READY(duplicateAck); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
StatusUpdate createStatusUpdate( const FrameworkID& frameworkId, const Option<SlaveID>& slaveId, const TaskID& taskId, const TaskState& state, const TaskStatus::Source& source, const Option<UUID>& uuid, const string& message = "", const Option<TaskStatus::Reason>& reason = None(), const Option<ExecutorID>& executorId = None(), const Option<bool>& healthy = None()) { StatusUpdate update; update.set_timestamp(process::Clock::now().secs()); update.mutable_framework_id()->MergeFrom(frameworkId); if (slaveId.isSome()) { update.mutable_slave_id()->MergeFrom(slaveId.get()); } if (executorId.isSome()) { update.mutable_executor_id()->MergeFrom(executorId.get()); } TaskStatus* status = update.mutable_status(); status->mutable_task_id()->MergeFrom(taskId); if (slaveId.isSome()) { status->mutable_slave_id()->MergeFrom(slaveId.get()); } status->set_state(state); status->set_source(source); status->set_message(message); status->set_timestamp(update.timestamp()); if (uuid.isSome()) { update.set_uuid(uuid.get().toBytes()); status->set_uuid(uuid.get().toBytes()); } else { // Note that in 0.22.x, the StatusUpdate.uuid was required // even though the scheduler driver ignores it for master // and scheduler driver generated updates. So we continue // to "set" it here so that updates coming from a 0.23.x // master can be parsed by a 0.22.x scheduler driver. // // TODO(bmahler): In 0.24.x, leave the uuid unset. update.set_uuid(""); } if (reason.isSome()) { status->set_reason(reason.get()); } if (healthy.isSome()) { status->set_healthy(healthy.get()); } return update; }
void sendStatusUpdate(const TaskStatus& status) { StatusUpdateMessage message; StatusUpdate* update = message.mutable_update(); update->mutable_framework_id()->MergeFrom(frameworkId); update->mutable_executor_id()->MergeFrom(executorId); update->mutable_slave_id()->MergeFrom(slaveId); update->mutable_status()->MergeFrom(status); update->set_timestamp(Clock::now().secs()); update->mutable_status()->set_timestamp(update->timestamp()); update->set_uuid(UUID::random().toBytes()); message.set_pid(self()); // Incoming status update might come from an executor which has not set // slave id in TaskStatus. Set/overwrite slave id. update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId); VLOG(1) << "Executor sending status update " << *update; // Capture the status update. updates[UUID::fromBytes(update->uuid())] = *update; send(slave, message); }