void operator()() { TaskStatus status; status.mutable_task_id()->MergeFrom(task.task_id()); // Currently, just call the K3 executable with the generated command line from task.data() try { FILE* pipe = popen(k3_cmd.c_str(), "r"); if (!pipe) { status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); cout << "Failed to open subprocess" << endl; } char buffer[256]; while (!feof(pipe)) { if (fgets(buffer, 256, pipe) != NULL) { std::string s = std::string(buffer); if (this->isMaster) { driver->sendFrameworkMessage(s); cout << s << endl; } else { cout << s << endl; } } } int k3 = pclose(pipe); if (k3 == 0) { status.set_state(TASK_FINISHED); cout << "Task " << task.task_id().value() << " Completed!" << endl; driver->sendStatusUpdate(status); } else { status.set_state(TASK_FAILED); cout << "K3 Task " << task.task_id().value() << " returned error code: " << k3 << endl; driver->sendStatusUpdate(status); } } catch (...) { status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); } //------------- END OF TASK ------------------- }
// This test verifies that the slave task status label decorator can // add and remove labels from a TaskStatus during the status update // sequence. A TaskStatus with two labels ("foo":"bar" and // "bar":"baz") is sent from the executor. The labels get modified by // the slave hook to strip the "foo":"bar" pair and/ add a new // "baz":"qux" pair. TEST_F(HookTest, VerifySlaveTaskStatusDecorator) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); // Start a task. TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); Future<TaskInfo> execTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureArg<1>(&execTask)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(execTask); // Now send TASK_RUNNING update with two labels. The first label // ("foo:bar") will be removed by the task status hook to ensure // that it can remove labels. The second label will be preserved // and forwarded to Master (and eventually to the framework). // The hook also adds a new label with the same key but a different // value ("bar:quz"). TaskStatus runningStatus; runningStatus.mutable_task_id()->MergeFrom(execTask.get().task_id()); runningStatus.set_state(TASK_RUNNING); // Add two labels to the TaskStatus Labels* labels = runningStatus.mutable_labels(); labels->add_labels()->CopyFrom(createLabel("foo", "bar")); labels->add_labels()->CopyFrom(createLabel("bar", "baz")); execDriver->sendStatusUpdate(runningStatus); AWAIT_READY(status); // The hook will hang an extra label off. const Labels& labels_ = status.get().labels(); EXPECT_EQ(2, labels_.labels_size()); // The test hook will prepend a new "baz":"qux" label. EXPECT_EQ("bar", labels_.labels(0).key()); EXPECT_EQ("qux", labels_.labels(0).value()); // And lastly, we only expect the "foo":"bar" pair to be stripped by // the module. The last pair should be the original "bar":"baz" // pair set by the test. EXPECT_EQ("bar", labels_.labels(1).key()); EXPECT_EQ("baz", labels_.labels(1).value()); // Now validate TaskInfo.container_status. We must have received a // container_status with one network_info set by the test hook module. EXPECT_TRUE(status.get().has_container_status()); EXPECT_EQ(1, status.get().container_status().network_infos().size()); const NetworkInfo networkInfo = status.get().container_status().network_infos(0); // The hook module sets up '4.3.2.1' as the IP address and 'public' as the // network isolation group. The `ip_address` field is deprecated, but the // hook module should continue to set it as well as the new `ip_addresses` // field for now. EXPECT_TRUE(networkInfo.has_ip_address()); EXPECT_EQ("4.3.2.1", networkInfo.ip_address()); EXPECT_EQ(1, networkInfo.ip_addresses().size()); EXPECT_TRUE(networkInfo.ip_addresses(0).has_ip_address()); EXPECT_EQ("4.3.2.1", networkInfo.ip_addresses(0).ip_address()); EXPECT_EQ(1, networkInfo.groups().size()); EXPECT_EQ("public", networkInfo.groups(0)); EXPECT_TRUE(networkInfo.has_labels()); EXPECT_EQ(1, networkInfo.labels().labels().size()); const Label networkInfoLabel = networkInfo.labels().labels(0); // Finally, the labels set inside NetworkInfo by the hook module. EXPECT_EQ("net_foo", networkInfoLabel.key()); EXPECT_EQ("net_bar", networkInfoLabel.value()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
TEST(MasterTest, FrameworkMessage) { ASSERT_TRUE(GTEST_IS_THREADSAFE); SimpleAllocator a; Master m(&a); PID<Master> master = process::spawn(&m); MockExecutor exec; ExecutorDriver* execDriver; string execData; trigger execFrameworkMessageCall, shutdownCall; EXPECT_CALL(exec, registered(_, _, _, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdate(TASK_RUNNING)); EXPECT_CALL(exec, frameworkMessage(_, _)) .WillOnce(DoAll(SaveArg<1>(&execData), Trigger(&execFrameworkMessageCall))); EXPECT_CALL(exec, shutdown(_)) .WillOnce(Trigger(&shutdownCall)); map<ExecutorID, Executor*> execs; execs[DEFAULT_EXECUTOR_ID] = &exec; TestingIsolationModule isolationModule(execs); Resources resources = Resources::parse("cpus:2;mem:1024"); Slave s(resources, true, &isolationModule); PID<Slave> slave = process::spawn(&s); BasicMasterDetector detector(master, slave, true); // Launch the first (i.e., failing) scheduler and wait until the // first status update message is sent to it (drop the message). MockScheduler sched; MesosSchedulerDriver schedDriver(&sched, "", DEFAULT_EXECUTOR_INFO, master); vector<Offer> offers; TaskStatus status; string schedData; trigger resourceOffersCall, statusUpdateCall, schedFrameworkMessageCall; EXPECT_CALL(sched, registered(&schedDriver, _)) .Times(1); EXPECT_CALL(sched, resourceOffers(&schedDriver, _)) .WillOnce(DoAll(SaveArg<1>(&offers), Trigger(&resourceOffersCall))) .WillRepeatedly(Return()); EXPECT_CALL(sched, statusUpdate(&schedDriver, _)) .WillOnce(DoAll(SaveArg<1>(&status), Trigger(&statusUpdateCall))); EXPECT_CALL(sched, frameworkMessage(&schedDriver, _, _, _)) .WillOnce(DoAll(SaveArg<3>(&schedData), Trigger(&schedFrameworkMessageCall))); schedDriver.start(); WAIT_UNTIL(resourceOffersCall); EXPECT_NE(0, offers.size()); TaskDescription task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers[0].slave_id()); task.mutable_resources()->MergeFrom(offers[0].resources()); vector<TaskDescription> tasks; tasks.push_back(task); schedDriver.launchTasks(offers[0].id(), tasks); WAIT_UNTIL(statusUpdateCall); EXPECT_EQ(TASK_RUNNING, status.state()); string hello = "hello"; schedDriver.sendFrameworkMessage(offers[0].slave_id(), DEFAULT_EXECUTOR_ID, hello); WAIT_UNTIL(execFrameworkMessageCall); EXPECT_EQ(hello, execData); string reply = "reply"; execDriver->sendFrameworkMessage(reply); WAIT_UNTIL(schedFrameworkMessageCall); EXPECT_EQ(reply, schedData); schedDriver.stop(); schedDriver.join(); WAIT_UNTIL(shutdownCall); // To ensure can deallocate MockExecutor. process::terminate(slave); process::wait(slave); process::terminate(master); process::wait(master); }
// This test verifies that the slave and status update manager // properly handle duplicate terminal status updates, when the // second update is received after the ACK for the first update. // The proper behavior here is for the status update manager to // forward the duplicate update to the scheduler. TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateAfterAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); Clock::settle(); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave and status update manager // properly handle duplicate terminal status updates, when the // second update is received before the ACK for the first update. // The proper behavior here is for the status update manager to // drop the duplicate update. TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateBeforeAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // Drop the first ACK from the scheduler to the slave. Future<StatusUpdateAcknowledgementMessage> statusUpdateAcknowledgementMessage = DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(), _, slave.get()); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(statusUpdateAcknowledgementMessage); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdate); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // At this point the status update manager has enqueued // TASK_FINISHED and TASK_KILLED updates. AWAIT_READY(_statusUpdate); // After we advance the clock, the scheduler should receive // the retried TASK_FINISHED update and acknowledge it. The // TASK_KILLED update should be dropped by the status update // manager, as the stream is already terminated. Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL); Clock::settle(); // Ensure the scheduler receives TASK_FINISHED. AWAIT_READY(update); EXPECT_EQ(TASK_FINISHED, update.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that status update manager ignores // duplicate ACK for an earlier update when it is waiting // for an ACK for a later update. This could happen when the // duplicate ACK is for a retried update. TEST_F(StatusUpdateManagerTest, IgnoreDuplicateStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop the first update, so that status update manager // resends the update. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // This is the ACK for the retried update. Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(ack); // Now send TASK_FINISHED update so that the status update manager // is waiting for its ACK, which it never gets because we drop the // update. DROP_PROTOBUFS(StatusUpdateMessage(), master.get(), _); Future<Nothing> update2 = FUTURE_DISPATCH(_, &Slave::_statusUpdate); TaskStatus status2 = status.get(); status2.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(status2); AWAIT_READY(update2); // This is to catch the duplicate ack for TASK_RUNNING. Future<Nothing> duplicateAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send a duplicate ACK for the TASK_RUNNING update. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, update.slave_id(), frameworkId, update.status().task_id(), update.uuid()); AWAIT_READY(duplicateAck); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the master reconciles tasks that are // missing from a reregistering slave. In this case, we trigger // a race between the slave re-registration message and the launch // message. There should be no TASK_LOST / TASK_DROPPED. // This was motivated by MESOS-1696. TEST_F(MasterSlaveReconciliationTest, ReconcileRace) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Since the agent may have retried registration, we want to // ensure that any duplicate registrations are flushed before // we appoint the master again. Otherwise, the agent may // receive a stale registration message. Clock::pause(); Clock::settle(); Clock::resume(); // Trigger a re-registration of the slave and capture the message // so that we can spoof a race with a launch task message. DROP_PROTOBUFS(ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); Future<ReregisterSlaveMessage> reregisterSlaveMessage = DROP_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); detector.appoint(master.get()->pid); AWAIT_READY(reregisterSlaveMessage); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); ExecutorDriver* executorDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&executorDriver)); // Leave the task in TASK_STAGING. Future<Nothing> launchTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureSatisfy(&launchTask)); EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(launchTask); // Send the stale re-registration message, which does not contain // the task we just launched. This will trigger a reconciliation // by the master. Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Prevent this from being dropped per the DROP_PROTOBUFS above. FUTURE_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); process::post( slave.get()->pid, master.get()->pid, reregisterSlaveMessage.get()); AWAIT_READY(slaveReregisteredMessage); // Neither the master nor the slave should send a TASK_LOST // as part of the reconciliation. We check this by calling // Clock::settle() to flush all pending events. Clock::pause(); Clock::settle(); Clock::resume(); // Now send TASK_FINISHED and make sure it's the only message // received by the scheduler. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); TaskStatus taskStatus; taskStatus.mutable_task_id()->CopyFrom(task.task_id()); taskStatus.set_state(TASK_FINISHED); executorDriver->sendStatusUpdate(taskStatus); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status->state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
TEST_F(FaultToleranceTest, SchedulerFailoverFrameworkMessage) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1(&sched1, DEFAULT_FRAMEWORK_INFO, master.get()); FrameworkID frameworkId; EXPECT_CALL(sched1, registered(&driver1, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver1.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched1, statusUpdate(&driver1, _)) .WillOnce(FutureArg<1>(&status)); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver1.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); MockScheduler sched2; FrameworkInfo framework2; // Bug in gcc 4.1.*, must assign on next line. framework2 = DEFAULT_FRAMEWORK_INFO; framework2.mutable_id()->MergeFrom(frameworkId); MesosSchedulerDriver driver2(&sched2, framework2, master.get()); Future<Nothing> registered; EXPECT_CALL(sched2, registered(&driver2, frameworkId, _)) .WillOnce(FutureSatisfy(®istered)); Future<Nothing> frameworkMessage; EXPECT_CALL(sched2, frameworkMessage(&driver2, _, _, _)) .WillOnce(FutureSatisfy(&frameworkMessage)); EXPECT_CALL(sched1, error(&driver1, "Framework failed over")); driver2.start(); AWAIT_READY(registered); execDriver->sendFrameworkMessage("Executor to Framework message"); AWAIT_READY(frameworkMessage); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver1.stop(); driver2.stop(); driver1.join(); driver2.join(); Shutdown(); }
// This test verifies that a re-registering slave sends the terminal // unacknowledged tasks for a terminal executor. This is required // for the master to correctly reconcile its view with the slave's // view of tasks. This test drops a terminal update to the master // and then forces the slave to re-register. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminatedExecutor) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<StatusUpdateAcknowledgementMessage> statusUpdateAcknowledgementMessage = FUTURE_PROTOBUF( StatusUpdateAcknowledgementMessage(), master.get()->pid, slave.get()->pid); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Make sure the acknowledgement reaches the slave. AWAIT_READY(statusUpdateAcknowledgementMessage); // Drop the TASK_FINISHED status update sent to the master. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<ExitedExecutorMessage> executorExitedMessage = FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _); TaskStatus finishedStatus; finishedStatus = status.get(); finishedStatus.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(finishedStatus); // Ensure the update was sent. AWAIT_READY(statusUpdateMessage); EXPECT_CALL(sched, executorLost(&driver, DEFAULT_EXECUTOR_ID, _, _)); // Now kill the executor. containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID); Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status2)); // We drop the 'UpdateFrameworkMessage' from the master to slave to // stop the status update manager from retrying the update that was // already sent due to the new master detection. DROP_PROTOBUFS(UpdateFrameworkMessage(), _, _); detector.appoint(master.get()->pid); AWAIT_READY(status2); EXPECT_EQ(TASK_FINISHED, status2.get().state()); driver.stop(); driver.join(); }
void* RunProcess (void* args) { StartInfo* info = static_cast<StartInfo*>(args); ExecutorDriver* driver = info->driver; const TaskInfo& task = info->task; ExternalInfo external = StartExternalProcess(task); { TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); if (external.failed) { status.set_state(TASK_FAILED); driver->sendStatusUpdate(status); delete info; return nullptr; } cout << "PID " << external.pid << "\n"; status.set_state(TASK_RUNNING); driver->sendStatusUpdate(status); } { lock_guard<mutex> lock(TaskId2PidLock); const string& taskId = task.task_id().value(); TaskId2Pid[taskId] = external.pid; } TaskStatus status; status.mutable_task_id()->CopyFrom(task.task_id()); int s; waitpid(external.pid, &s, WUNTRACED); cout << "WAIT for pid " << external.pid << " returned\n"; if (WIFEXITED(s)) { int es = WEXITSTATUS(s); if (es == 0) { cout << "EXIT " << external.pid << ", status == 0\n"; status.set_state(TASK_FINISHED); } else { cout << "EXIT " << external.pid << ", status " << es << "\n"; status.set_state(TASK_FAILED); } } else if (WIFSIGNALED(s)) { cout << "EXIT " << external.pid << " signalled with " << WTERMSIG(s) << "\n"; status.set_state(TASK_FAILED); } else if (WIFSTOPPED(s)) { cout << "EXIT " << external.pid << " stopped\n"; // TODO(fc) deal with stopped, but how? kill(external.pid, 9); status.set_state(TASK_FAILED); } driver->sendStatusUpdate(status); return nullptr; }
TEST(FaultToleranceTest, SchedulerFailoverFrameworkMessage) { ASSERT_TRUE(GTEST_IS_THREADSAFE); SimpleAllocator a; Master m(&a); PID<Master> master = process::spawn(&m); MockExecutor exec; ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdate(TASK_RUNNING)); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); map<ExecutorID, Executor*> execs; execs[DEFAULT_EXECUTOR_ID] = &exec; TestingIsolationModule isolationModule(execs); Resources resources = Resources::parse("cpus:2;mem:1024"); Slave s(resources, true, &isolationModule); PID<Slave> slave = process::spawn(&s); BasicMasterDetector detector(master, slave, true); MockScheduler sched1; MesosSchedulerDriver driver1(&sched1, DEFAULT_FRAMEWORK_INFO, master); FrameworkID frameworkId; vector<Offer> offers; TaskStatus status; trigger sched1ResourceOfferCall, sched1StatusUpdateCall; EXPECT_CALL(sched1, registered(&driver1, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); EXPECT_CALL(sched1, statusUpdate(&driver1, _)) .WillOnce(DoAll(SaveArg<1>(&status), Trigger(&sched1StatusUpdateCall))); EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(DoAll(SaveArg<1>(&offers), Trigger(&sched1ResourceOfferCall))) .WillRepeatedly(Return()); EXPECT_CALL(sched1, error(&driver1, "Framework failed over")) .Times(1); driver1.start(); WAIT_UNTIL(sched1ResourceOfferCall); EXPECT_NE(0, offers.size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers[0].slave_id()); task.mutable_resources()->MergeFrom(offers[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); driver1.launchTasks(offers[0].id(), tasks); WAIT_UNTIL(sched1StatusUpdateCall); EXPECT_EQ(TASK_RUNNING, status.state()); MockScheduler sched2; FrameworkInfo framework2; // Bug in gcc 4.1.*, must assign on next line. framework2 = DEFAULT_FRAMEWORK_INFO; framework2.mutable_id()->MergeFrom(frameworkId); MesosSchedulerDriver driver2(&sched2, framework2, master); trigger sched2RegisteredCall, sched2FrameworkMessageCall; EXPECT_CALL(sched2, registered(&driver2, frameworkId, _)) .WillOnce(Trigger(&sched2RegisteredCall)); EXPECT_CALL(sched2, frameworkMessage(&driver2, _, _, _)) .WillOnce(Trigger(&sched2FrameworkMessageCall)); driver2.start(); WAIT_UNTIL(sched2RegisteredCall); execDriver->sendFrameworkMessage(""); WAIT_UNTIL(sched2FrameworkMessageCall); driver1.stop(); driver2.stop(); driver1.join(); driver2.join(); process::terminate(slave); process::wait(slave); process::terminate(master); process::wait(master); }
// This test verifies that if master receives a status update // for an already terminated task it forwards it without // changing the state of the task. TEST_F(StatusUpdateManagerTest, DuplicatedTerminalStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); // Verify the latest task status. Future<process::http::Response> tasks = process::http::get(master.get(), "tasks"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(process::http::OK().status, tasks); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", tasks); Try<JSON::Object> parse = JSON::parse<JSON::Object>(tasks.get().body); ASSERT_SOME(parse); Result<JSON::String> state = parse.get().find<JSON::String>("tasks[0].state"); ASSERT_SOME_EQ(JSON::String("TASK_FINISHED"), state); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the status update manager correctly includes // the latest state of the task in status update. TEST_F(StatusUpdateManagerTest, LatestTaskState) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Signal when the first update is dropped. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); Future<Nothing> __statusUpdate = FUTURE_DISPATCH(_, &Slave::__statusUpdate); driver.start(); // Wait until TASK_RUNNING is sent to the master. AWAIT_READY(statusUpdateMessage); // Ensure the status update manager handles the TASK_RUNNING update. AWAIT_READY(__statusUpdate); // Pause the clock to avoid status update manager from retrying. Clock::pause(); Future<Nothing> __statusUpdate2 = FUTURE_DISPATCH(_, &Slave::__statusUpdate); // Now send TASK_FINISHED update. TaskStatus finishedStatus; finishedStatus = statusUpdateMessage.get().update().status(); finishedStatus.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(finishedStatus); // Ensure the status update manager handles the TASK_FINISHED update. AWAIT_READY(__statusUpdate2); // Signal when the second update is dropped. Future<StatusUpdateMessage> statusUpdateMessage2 = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); // Advance the clock for the status update manager to send a retry. Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(statusUpdateMessage2); // The update should correspond to TASK_RUNNING. ASSERT_EQ(TASK_RUNNING, statusUpdateMessage2.get().update().status().state()); // The update should include TASK_FINISHED as the latest state. ASSERT_EQ(TASK_FINISHED, statusUpdateMessage2.get().update().latest_state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// Checks that if a task is launched and then finishes normally, its // resources are recovered and reoffered correctly. TYPED_TEST(AllocatorTest, TaskFinished) { EXPECT_CALL(this->allocator, initialize(_, _, _)); master::Flags masterFlags = this->CreateMasterFlags(); masterFlags.allocation_interval = Milliseconds(50); Try<PID<Master> > master = this->StartMaster(&this->allocator, masterFlags); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = this->CreateSlaveFlags(); flags.resources = Option<string>("cpus:3;mem:1024"); EXPECT_CALL(this->allocator, slaveAdded(_, _, _)); Try<PID<Slave> > slave = this->StartSlave(&exec, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(this->allocator, frameworkAdded(_, _, _)); EXPECT_CALL(sched, registered(_, _, _)); // We decline offers that we aren't expecting so that the resources // get aggregated. Note that we need to do this _first_ and // _separate_ from the expectation below so that this expectation is // checked last and matches all possible offers. EXPECT_CALL(sched, resourceOffers(_, _)) .WillRepeatedly(DeclineOffers()); // Initially, all of the slave's resources. EXPECT_CALL(sched, resourceOffers(_, OfferEq(3, 1024))) .WillOnce(LaunchTasks(2, 1, 256)); // Some resources will be unused and we need to make sure that we // don't send the TASK_FINISHED status update below until after the // allocator knows about the unused resources so that it can // aggregate them with the resources from the finished task. Future<Nothing> resourcesUnused; EXPECT_CALL(this->allocator, resourcesUnused(_, _, _, _)) .WillRepeatedly(DoAll(InvokeResourcesUnused(&this->allocator), FutureSatisfy(&resourcesUnused))); EXPECT_CALL(exec, registered(_, _, _, _)); ExecutorDriver* execDriver; TaskInfo taskInfo; Future<Nothing> launchTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(DoAll(SaveArg<0>(&execDriver), SaveArg<1>(&taskInfo), SendStatusUpdateFromTask(TASK_RUNNING), FutureSatisfy(&launchTask))) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); EXPECT_CALL(sched, statusUpdate(_, _)) .WillRepeatedly(DoDefault()); driver.start(); AWAIT_READY(launchTask); AWAIT_READY(resourcesUnused); TaskStatus status; status.mutable_task_id()->MergeFrom(taskInfo.task_id()); status.set_state(TASK_FINISHED); EXPECT_CALL(this->allocator, resourcesRecovered(_, _, _)); // After the first task gets killed. Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(_, OfferEq(2, 768))) .WillOnce(FutureSatisfy(&resourceOffers)); execDriver->sendStatusUpdate(status); AWAIT_READY(resourceOffers); // Shut everything down. EXPECT_CALL(this->allocator, resourcesRecovered(_, _, _)) .WillRepeatedly(DoDefault()); EXPECT_CALL(this->allocator, frameworkDeactivated(_)) .Times(AtMost(1)); EXPECT_CALL(this->allocator, frameworkRemoved(_)) .Times(AtMost(1)); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); EXPECT_CALL(this->allocator, slaveRemoved(_)) .Times(AtMost(1)); this->Shutdown(); }