BasicMasterDetector::BasicMasterDetector(const UPID& _master, const vector<UPID>& pids, bool elect) : master(_master) { if (elect) { // Send a master token. { GotMasterTokenMessage message; message.set_token("0"); process::post(master, message); } // Elect the master. { NewMasterDetectedMessage message; message.set_pid(master); process::post(master, message); } } // Tell each pid about the master. foreach (const UPID& pid, pids) { NewMasterDetectedMessage message; message.set_pid(master); process::post(pid, message); }
TEST(FaultToleranceTest, FrameworkReregister) { ASSERT_TRUE(GTEST_IS_THREADSAFE); MockFilter filter; process::filter(&filter); EXPECT_MESSAGE(filter, _, _, _) .WillRepeatedly(Return(false)); PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master); trigger schedRegisteredCall, schedReregisteredCall; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(Trigger(&schedRegisteredCall)); EXPECT_CALL(sched, reregistered(&driver, _)) .WillOnce(Trigger(&schedReregisteredCall)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); process::Message message; EXPECT_MESSAGE(filter, Eq(FrameworkRegisteredMessage().GetTypeName()), _, _) .WillOnce(DoAll(SaveArgField<0>(&process::MessageEvent::message, &message), Return(false))); driver.start(); WAIT_UNTIL(schedRegisteredCall); // Ensures registered message is received. // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the scheduler. NewMasterDetectedMessage newMasterDetectedMsg; newMasterDetectedMsg.set_pid(master); process::post(message.to, newMasterDetectedMsg); WAIT_UNTIL(schedReregisteredCall); driver.stop(); driver.join(); local::shutdown(); process::filter(NULL); }
TEST_F(FaultToleranceTest, FrameworkReregister) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); Future<process::Message> message = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(message); // Framework registered message, to get the pid. AWAIT_READY(registered); // Framework registered call. Future<Nothing> disconnected; EXPECT_CALL(sched, disconnected(&driver)) .WillOnce(FutureSatisfy(&disconnected)); Future<Nothing> reregistered; EXPECT_CALL(sched, reregistered(&driver, _)) .WillOnce(FutureSatisfy(&reregistered)); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the scheduler. NewMasterDetectedMessage newMasterDetectedMsg; newMasterDetectedMsg.set_pid(master.get()); process::post(message.get().to, newMasterDetectedMsg); AWAIT_READY(disconnected); AWAIT_READY(reregistered); driver.stop(); driver.join(); Shutdown(); }
BasicMasterDetector::BasicMasterDetector(const UPID& _master) : master(_master) { // Send a master token. { GotMasterTokenMessage message; message.set_token("0"); process::post(master, message); } // Elect the master. { NewMasterDetectedMessage message; message.set_pid(master); process::post(master, message); } }
// This test ensures that a framework connecting with a // failed over master gets a registered callback. // Note that this behavior might change in the future and // the scheduler might receive a re-registered callback. TEST_F(FaultToleranceTest, MasterFailover) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); Future<process::Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); EXPECT_CALL(sched, registered(&driver, _, _)); driver.start(); AWAIT_READY(frameworkRegisteredMessage); // Simulate failed over master by restarting the master. Stop(master.get()); master = StartMaster(); ASSERT_SOME(master); EXPECT_CALL(sched, disconnected(&driver)); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); // Simulate a new master detected message to the scheduler. NewMasterDetectedMessage newMasterDetectedMsg; newMasterDetectedMsg.set_pid(master.get()); process::post(frameworkRegisteredMessage.get().to, newMasterDetectedMsg); // Framework should get a registered callback. AWAIT_READY(registered); driver.stop(); driver.join(); Shutdown(); }
TEST_F(FaultToleranceTest, SlaveReregisterOnZKExpiration) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(resourceOffers); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the slave. NewMasterDetectedMessage message; message.set_pid(master.get()); process::post(slave.get(), message); AWAIT_READY(slaveReregisteredMessage); driver.stop(); driver.join(); Shutdown(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(FaultToleranceTest, PartitionedSlaveReregistration) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); vector<TaskInfo> tasks; tasks.push_back(task); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); } Clock::advance(master::SLAVE_PING_TIMEOUT); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); // We now complete the partition on the slave side as well. This // is done by simulating a NoMasterDetectedMessage which would // normally occur during a network partition. process::post(slave.get(), NoMasterDetectedMessage()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Have the slave re-register with the master. NewMasterDetectedMessage newMasterDetectedMessage; newMasterDetectedMessage.set_pid(master.get()); process::post(slave.get(), newMasterDetectedMessage); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that when the slave re-registers, the master // does not send TASK_LOST update for a task that has reached terminal // state but is waiting for an acknowledgement. TEST_F(FaultToleranceTest, ReconcileIncompleteTasks) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); // Drop the status update from slave to the master, so that // the slave has a pending terminal update when it re-registers. DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Clock::pause(); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(_statusUpdate); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. NewMasterDetectedMessage message; message.set_pid(master.get()); process::post(slave.get(), message); AWAIT_READY(slaveReregisteredMessage); // The master should not send a TASK_LOST after the slave // re-registers. We check this by calling Clock::settle() so that // the only update the scheduler receives is the retried // TASK_FINISHED update. // NOTE: The status update manager resends the status update when // it receives a NewMasterDetected message. Clock::settle(); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the master sends TASK_LOST updates // for tasks in the master absent from the re-registered slave. // We do this by dropping RunTaskMessage from master to the slave. TEST_F(FaultToleranceTest, ReconcileLostTasks) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); // We now launch a task and drop the corresponding RunTaskMessage on // the slave, to ensure that only the master knows about this task. Future<RunTaskMessage> runTaskMessage = DROP_PROTOBUF(RunTaskMessage(), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(runTaskMessage); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. NewMasterDetectedMessage message; message.set_pid(master.get()); process::post(slave.get(), message); AWAIT_READY(slaveReregisteredMessage); AWAIT_READY(status); ASSERT_EQ(task.task_id(), status.get().task_id()); ASSERT_EQ(TASK_LOST, status.get().state()); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that a re-registering slave does not inform // the master about a terminated executor (and its tasks), when the // executor has pending updates. We check this by ensuring that the // master sends a TASK_LOST update for the task belonging to the // terminated executor. TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestingIsolator isolator(&exec); Try<PID<Slave> > slave = StartSlave(&isolator); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Drop the TASK_LOST status update(s) sent to the master. // This ensures that the TASK_LOST received by the scheduler // is generated by the master. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); Future<ExitedExecutorMessage> executorExitedMessage = FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _); // Now kill the executor. dispatch(isolator, &Isolator::killExecutor, frameworkId.get(), DEFAULT_EXECUTOR_ID); AWAIT_READY(executorExitedMessage); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status2)); NewMasterDetectedMessage message; message.set_pid(master.get()); process::post(slave.get(), message); AWAIT_READY(status2); EXPECT_EQ(TASK_LOST, status2.get().state()); driver.stop(); driver.join(); Shutdown(); }
TEST(FaultToleranceTest, SlaveReregister) { ASSERT_TRUE(GTEST_IS_THREADSAFE); MockFilter filter; process::filter(&filter); EXPECT_MESSAGE(filter, _, _, _) .WillRepeatedly(Return(false)); SimpleAllocator a; Master m(&a); PID<Master> master = process::spawn(&m); ProcessBasedIsolationModule isolationModule; Resources resources = Resources::parse("cpus:2;mem:1024"); Slave s(resources, true, &isolationModule); PID<Slave> slave = process::spawn(&s); BasicMasterDetector detector(master, slave, true); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master); trigger resourceOffersCall; EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(Trigger(&resourceOffersCall)) .WillRepeatedly(Return()); trigger slaveReRegisterMsg; EXPECT_MESSAGE(filter, Eq(SlaveReregisteredMessage().GetTypeName()), _, _) .WillOnce(DoAll(Trigger(&slaveReRegisterMsg), Return(false))); driver.start(); WAIT_UNTIL(resourceOffersCall); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the slave. NewMasterDetectedMessage message; message.set_pid(master); process::post(slave, message); WAIT_UNTIL(slaveReRegisterMsg); driver.stop(); driver.join(); process::terminate(slave); process::wait(slave); process::terminate(master); process::wait(master); process::filter(NULL); }