void sendStatusUpdate(const TaskStatus& status) { StatusUpdateMessage message; StatusUpdate* update = message.mutable_update(); update->mutable_framework_id()->MergeFrom(frameworkId); update->mutable_executor_id()->MergeFrom(executorId); update->mutable_slave_id()->MergeFrom(slaveId); update->mutable_status()->MergeFrom(status); update->set_timestamp(Clock::now().secs()); update->mutable_status()->set_timestamp(update->timestamp()); message.set_pid(self()); // We overwrite the UUID for this status update, however with // the HTTP API, the executor will have to generate a UUID // (which needs to be validated to be RFC-4122 compliant). UUID uuid = UUID::random(); update->set_uuid(uuid.toBytes()); update->mutable_status()->set_uuid(uuid.toBytes()); // We overwrite the SlaveID for this status update, however with // the HTTP API, this can be overwritten by the slave instead. update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId); VLOG(1) << "Executor sending status update " << *update; // Capture the status update. updates[uuid] = *update; send(slave, message); }
v1::scheduler::Event evolve(const StatusUpdateMessage& message) { v1::scheduler::Event event; event.set_type(v1::scheduler::Event::UPDATE); v1::scheduler::Event::Update* update = event.mutable_update(); update->mutable_status()->CopyFrom(evolve(message.update().status())); if (message.update().has_slave_id()) { update->mutable_status()->mutable_agent_id()->CopyFrom( evolve(message.update().slave_id())); } if (message.update().has_executor_id()) { update->mutable_status()->mutable_executor_id()->CopyFrom( evolve(message.update().executor_id())); } update->mutable_status()->set_timestamp(message.update().timestamp()); // If the update does not have a 'uuid', it does not need // acknowledging. However, prior to 0.23.0, the update uuid // was required and always set. In 0.24.0, we can rely on the // update uuid check here, until then we must still check for // this being sent from the driver (from == UPID()) or from // the master (pid == UPID()). // TODO(vinod): Get rid of this logic in 0.25.0 because master // and slave correctly set task status in 0.24.0. if (!message.update().has_uuid() || message.update().uuid() == "") { update->mutable_status()->clear_uuid(); } else if (UPID(message.pid()) == UPID()) { update->mutable_status()->clear_uuid(); } else { update->mutable_status()->set_uuid(message.update().uuid()); } return event; }
void sendStatusUpdate(const TaskStatus& status) { if (status.state() == TASK_STAGING) { VLOG(1) << "Executor is not allowed to send " << "TASK_STAGING status update. Aborting!"; driver->abort(); Stopwatch stopwatch; if (FLAGS_v >= 1) { stopwatch.start(); } executor->error(driver, "Attempted to send TASK_STAGING status update"); VLOG(1) << "Executor::error took " << stopwatch.elapsed(); return; } StatusUpdateMessage message; StatusUpdate* update = message.mutable_update(); update->mutable_framework_id()->MergeFrom(frameworkId); update->mutable_executor_id()->MergeFrom(executorId); update->mutable_slave_id()->MergeFrom(slaveId); update->mutable_status()->MergeFrom(status); update->set_timestamp(Clock::now().secs()); update->set_uuid(UUID::random().toBytes()); message.set_pid(self()); VLOG(1) << "Executor sending status update " << *update; // Capture the status update. updates[UUID::fromBytes(update->uuid())] = *update; send(slave, message); }
void sendStatusUpdate(const TaskStatus& status) { StatusUpdateMessage message; StatusUpdate* update = message.mutable_update(); update->mutable_framework_id()->MergeFrom(frameworkId); update->mutable_executor_id()->MergeFrom(executorId); update->mutable_slave_id()->MergeFrom(slaveId); update->mutable_status()->MergeFrom(status); update->set_timestamp(Clock::now().secs()); update->mutable_status()->set_timestamp(update->timestamp()); update->set_uuid(UUID::random().toBytes()); message.set_pid(self()); // Incoming status update might come from an executor which has not set // slave id in TaskStatus. Set/overwrite slave id. update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId); VLOG(1) << "Executor sending status update " << *update; // Capture the status update. updates[UUID::fromBytes(update->uuid())] = *update; send(slave, message); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send status updates, we send // a ShutdownMessage to the slave. Why? Because during a network // partition, the master will remove a partitioned slave, thus sending // its tasks to LOST. At this point, when the partition is removed, // the slave may attempt to send updates if it was unaware that the // master removed it. We've already notified frameworks that these // tasks were LOST, so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave sends an update. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // At this point, the slave still thinks it's registered, so we // simulate a status update coming from the slave. TaskID taskId; taskId.set_value("task_id"); const StatusUpdate& update = protobuf::createStatusUpdate( frameworkId.get(), slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update); message.set_pid(stringify(slave.get()->pid)); process::post(master.get()->pid, message); // The master should shutdown the slave upon receiving the update. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }