// This test verifies that if master --> slave socket closes and the // slave is not aware of it (i.e., one way network partition), slave // will re-register with the master. TEST_F(PartitionTest, OneWayPartitionMasterToSlave) { // Start a master. master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); Future<Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); AWAIT_READY(ping); Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); // Inject a slave exited event at the master causing the master // to mark the slave as disconnected. The slave should not notice // it until the next ping is received. process::inject::exited(slaveRegisteredMessage.get().to, master.get()->pid); // Wait until master deactivates the slave. AWAIT_READY(deactivateSlave); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Ensure the slave observer marked the slave as deactivated. Clock::pause(); Clock::settle(); // Let the slave observer send the next ping. Clock::advance(masterFlags.slave_ping_timeout); // Slave should re-register. AWAIT_READY(slaveReregisteredMessage); }
// This test verifies that if master --> slave socket closes and the // slave is not aware of it (i.e., one way network partition), slave // will re-register with the master. TEST_F(PartitionTest, OneWayPartitionMasterToSlave) { // Start a master. Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Future<Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); AWAIT_READY(ping); Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); // Inject a slave exited event at the master causing the master // to mark the slave as disconnected. The slave should not notice // it until the next ping is received. process::inject::exited(slaveRegisteredMessage.get().to, master.get()); // Wait until master deactivates the slave. AWAIT_READY(deactivateSlave); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Ensure the slave observer marked the slave as deactivated. Clock::pause(); Clock::settle(); // Let the slave observer send the next ping. Clock::advance(slave::MASTER_PING_TIMEOUT()); // Slave should re-register. AWAIT_READY(slaveReregisteredMessage); }
// Test that memory pressure listening is restarted after recovery. TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); // Wait for the ACK to be checkpointed. AWAIT_READY_FOR(_statusUpdateAcknowledgement, Seconds(120)); // We restart the slave to let it recover. slave.get()->terminate(); // Set up so we can wait until the new slave updates the container's // resources (this occurs after the executor has re-registered). Future<Nothing> update = FUTURE_DISPATCH(_, &MesosContainerizerProcess::update); // Use the same flags. _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); Future<SlaveReregisteredMessage> reregistered = FUTURE_PROTOBUF(SlaveReregisteredMessage(), master.get()->pid, _); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); AWAIT_READY(reregistered); // Wait until the containerizer is updated. AWAIT_READY(update); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&killed)); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// This test verifies that when the slave re-registers, the master // does not send TASK_LOST update for a task that has reached terminal // state but is waiting for an acknowledgement. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminalTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); // Drop the status update from slave to the master, so that // the slave has a pending terminal update when it re-registers. DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(_statusUpdate); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // The master should not send a TASK_LOST after the slave // re-registers. We check this by calling Clock::settle() so that // the only update the scheduler receives is the retried // TASK_FINISHED update. // NOTE: The status update manager resends the status update when // it detects a new master. Clock::pause(); Clock::settle(); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that the slave reports pending tasks when // re-registering, otherwise the master will report them as being // lost. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterPendingTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // No TASK_LOST updates should occur! EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); // We drop the _runTask dispatch to ensure the task remains // pending in the slave. Future<Nothing> _runTask = DROP_DISPATCH(slave.get()->pid, &Slave::_runTask); TaskInfo task1; task1.set_name("test task"); task1.mutable_task_id()->set_value("1"); task1.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task1.mutable_resources()->MergeFrom(offers.get()[0].resources()); task1.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); driver.launchTasks(offers.get()[0].id(), {task1}); AWAIT_READY(_runTask); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); Clock::pause(); Clock::settle(); Clock::resume(); driver.stop(); driver.join(); }
// This test verifies that the master reconciles tasks that are // missing from a re-registering slave. In this case, we trigger // a race between the slave re-registration message and the launch // message. There should be no TASK_LOST. // This was motivated by MESOS-1696. TEST_F(MasterSlaveReconciliationTest, ReconcileRace) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Since the agent may have retried registration, we want to // ensure that any duplicate registrations are flushed before // we appoint the master again. Otherwise, the agent may // receive a stale registration message. Clock::pause(); Clock::settle(); Clock::resume(); // Trigger a re-registration of the slave and capture the message // so that we can spoof a race with a launch task message. DROP_PROTOBUFS(ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); Future<ReregisterSlaveMessage> reregisterSlaveMessage = DROP_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); detector.appoint(master.get()->pid); AWAIT_READY(reregisterSlaveMessage); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); ExecutorDriver* executorDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&executorDriver)); // Leave the task in TASK_STAGING. Future<Nothing> launchTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureSatisfy(&launchTask)); EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(launchTask); // Send the stale re-registration message, which does not contain // the task we just launched. This will trigger a reconciliation // by the master. Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Prevent this from being dropped per the DROP_PROTOBUFS above. FUTURE_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); process::post( slave.get()->pid, master.get()->pid, reregisterSlaveMessage.get()); AWAIT_READY(slaveReregisteredMessage); // Neither the master nor the slave should send a TASK_LOST // as part of the reconciliation. We check this by calling // Clock::settle() to flush all pending events. Clock::pause(); Clock::settle(); Clock::resume(); // Now send TASK_FINISHED and make sure it's the only message // received by the scheduler. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); TaskStatus taskStatus; taskStatus.mutable_task_id()->CopyFrom(task.task_id()); taskStatus.set_state(TASK_FINISHED); executorDriver->sendStatusUpdate(taskStatus); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that the master reconciles tasks that are // missing from a re-registering slave. In this case, we drop the // RunTaskMessage so the slave should send TASK_LOST. TEST_F(MasterSlaveReconciliationTest, ReconcileLostTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); // We now launch a task and drop the corresponding RunTaskMessage on // the slave, to ensure that only the master knows about this task. Future<RunTaskMessage> runTaskMessage = DROP_PROTOBUF(RunTaskMessage(), _, _); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runTaskMessage); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // Make sure the slave generated the TASK_LOST. AWAIT_READY(statusUpdateMessage); AWAIT_READY(status); ASSERT_EQ(task.task_id(), status.get().task_id()); ASSERT_EQ(TASK_LOST, status.get().state()); // Before we obtain the metrics, ensure that the master has finished // processing the status update so metrics have been updated. Clock::pause(); Clock::settle(); Clock::resume(); // Check metrics. JSON::Object stats = Metrics(); EXPECT_EQ(1u, stats.values.count("master/tasks_lost")); EXPECT_EQ(1u, stats.values["master/tasks_lost"]); EXPECT_EQ( 1u, stats.values.count( "master/task_lost/source_slave/reason_reconciliation")); EXPECT_EQ( 1u, stats.values["master/task_lost/source_slave/reason_reconciliation"]); driver.stop(); driver.join(); }