// This test confirms that setting no values for the soft and hard // limits implies an unlimited resource. TEST_F(PosixRLimitsIsolatorTest, UnsetLimits) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/rlimits"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers->size()); TaskInfo task = createTask( offers.get()[0].slave_id(), offers.get()[0].resources(), "exit `ulimit -c | grep -q unlimited`"); // Force usage of C locale as we interpret a potentially translated // string in the task's command. mesos::Environment::Variable* locale = task.mutable_command()->mutable_environment()->add_variables(); locale->set_name("LC_ALL"); locale->set_value("C"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); // Setting rlimit for core without soft or hard limit signifies // unlimited range. RLimitInfo rlimitInfo; RLimitInfo::RLimit* rlimit = rlimitInfo.add_rlimits(); rlimit->set_type(RLimitInfo::RLimit::RLMT_CORE); container->mutable_rlimit_info()->CopyFrom(rlimitInfo); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinal; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinal)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinal); EXPECT_EQ(task.task_id(), statusFinal->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinal->state()); driver.stop(); driver.join(); }
virtual void SetUp() override { TemporaryDirectoryTest::SetUp(); ASSERT_SOME(plugin.Startup(GetPluginAddress())); }
// This test ensures that when explicit acknowledgements are enabled, // acknowledgements for master-generated updates are dropped by the // driver. We test this by creating an invalid task that uses no // resources. TEST_F(MesosSchedulerDriverTest, ExplicitAcknowledgementsMasterGeneratedUpdate) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. // Ensure no status update acknowledgements are sent to the master. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task using no resources. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); ASSERT_EQ(TASK_ERROR, status.get().state()); ASSERT_EQ(TaskStatus::SOURCE_MASTER, status.get().source()); ASSERT_EQ(TaskStatus::REASON_TASK_INVALID, status.get().reason()); // Now send the acknowledgement. driver.acknowledgeStatusUpdate(status.get()); // Settle the clock to ensure driver processes the acknowledgement, // which should get dropped due to having come from the master. Clock::pause(); Clock::settle(); driver.stop(); driver.join(); Shutdown(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send status updates, we send // a ShutdownMessage to the slave. Why? Because during a network // partition, the master will remove a partitioned slave, thus sending // its tasks to LOST. At this point, when the partition is removed, // the slave may attempt to send updates if it was unaware that the // master removed it. We've already notified frameworks that these // tasks were LOST, so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) { master::Flags masterFlags = CreateMasterFlags(); Try<PID<Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave sends an update. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // At this point, the slave still thinks it's registered, so we // simulate a status update coming from the slave. TaskID taskId; taskId.set_value("task_id"); const StatusUpdate& update = protobuf::createStatusUpdate( frameworkId.get(), slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update); message.set_pid(stringify(slave.get())); process::post(master.get(), message); // The master should shutdown the slave upon receiving the update. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test checks that a scheduler gets a slave lost // message for a partitioned slave. TEST_F(PartitionTest, PartitionedSlave) { master::Flags masterFlags = CreateMasterFlags(); Try<PID<Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Set these expectations up before we spawn the slave so that we // don't miss the first PING. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); // Drop all the PONGs to simulate slave partition. DROP_MESSAGES(Eq("PONG"), _, _); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. AWAIT_READY(resourceOffers); Clock::pause(); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); // Now advance through the PINGs. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(masterFlags.slave_ping_timeout); } Clock::advance(masterFlags.slave_ping_timeout); AWAIT_READY(slaveLost); this->Stop(slave.get()); JSON::Object stats = Metrics(); EXPECT_EQ(1, stats.values["master/slave_removals"]); EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]); driver.stop(); driver.join(); Shutdown(); Clock::resume(); }
// This test verifies that when the slave re-registers, the master // does not send TASK_LOST update for a task that has reached terminal // state but is waiting for an acknowledgement. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminalTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); // Drop the status update from slave to the master, so that // the slave has a pending terminal update when it re-registers. DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(_statusUpdate); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // The master should not send a TASK_LOST after the slave // re-registers. We check this by calling Clock::settle() so that // the only update the scheduler receives is the retried // TASK_FINISHED update. // NOTE: The status update manager resends the status update when // it detects a new master. Clock::pause(); Clock::settle(); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that a re-registering slave sends the terminal // unacknowledged tasks for a terminal executor. This is required // for the master to correctly reconcile its view with the slave's // view of tasks. This test drops a terminal update to the master // and then forces the slave to re-register. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminatedExecutor) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<StatusUpdateAcknowledgementMessage> statusUpdateAcknowledgementMessage = FUTURE_PROTOBUF( StatusUpdateAcknowledgementMessage(), master.get()->pid, slave.get()->pid); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Make sure the acknowledgement reaches the slave. AWAIT_READY(statusUpdateAcknowledgementMessage); // Drop the TASK_FINISHED status update sent to the master. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<ExitedExecutorMessage> executorExitedMessage = FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _); TaskStatus finishedStatus; finishedStatus = status.get(); finishedStatus.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(finishedStatus); // Ensure the update was sent. AWAIT_READY(statusUpdateMessage); EXPECT_CALL(sched, executorLost(&driver, DEFAULT_EXECUTOR_ID, _, _)); // Now kill the executor. containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID); Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status2)); // We drop the 'UpdateFrameworkMessage' from the master to slave to // stop the status update manager from retrying the update that was // already sent due to the new master detection. DROP_PROTOBUFS(UpdateFrameworkMessage(), _, _); detector.appoint(master.get()->pid); AWAIT_READY(status2); EXPECT_EQ(TASK_FINISHED, status2.get().state()); driver.stop(); driver.join(); }
// This test verifies that status update manager ignores // unexpected ACK for an earlier update when it is waiting // for an ACK for another update. We do this by dropping ACKs // for the original update and sending a random ACK to the slave. TEST_F(StatusUpdateManagerTest, IgnoreUnexpectedStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), master.get(), _); // Drop the ACKs, so that status update manager // retries the update. DROP_CALLS(mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, master.get()); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); Future<Nothing> unexpectedAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send an ACK with a random UUID. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, master.get(), update.slave_id(), frameworkId, update.status().task_id(), UUID::random().toBytes()); AWAIT_READY(unexpectedAck); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave and status update manager // properly handle duplicate terminal status updates, when the // second update is received after the ACK for the first update. // The proper behavior here is for the status update manager to // forward the duplicate update to the scheduler. TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateAfterAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); Clock::settle(); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_F(StatusUpdateManagerTest, RetryStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that status update manager ignores // duplicate ACK for an earlier update when it is waiting // for an ACK for a later update. This could happen when the // duplicate ACK is for a retried update. TEST_F(StatusUpdateManagerTest, IgnoreDuplicateStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop the first update, so that status update manager // resends the update. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), master.get(), _); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // This is the ACK for the retried update. Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(ack); // Now send TASK_FINISHED update so that the status update manager // is waiting for its ACK, which it never gets because we drop the // update. DROP_PROTOBUFS(StatusUpdateMessage(), master.get(), _); Future<Nothing> update2 = FUTURE_DISPATCH(_, &Slave::_statusUpdate); TaskStatus status2 = status.get(); status2.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(status2); AWAIT_READY(update2); // This is to catch the duplicate ack for TASK_RUNNING. Future<Nothing> duplicateAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send a duplicate ACK for the TASK_RUNNING update. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, master.get(), update.slave_id(), frameworkId, update.status().task_id(), update.uuid()); AWAIT_READY(duplicateAck); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)) .WillOnce(FutureArg<1>(&killed)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offer.id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY(killed); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// This test checks the behavior of passed invalid limits. TEST_F(PosixRLimitsIsolatorTest, InvalidLimits) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/rlimits"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers->size()); TaskInfo task = createTask( offers.get()[0].slave_id(), offers.get()[0].resources(), "true"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); // Set impossible limit soft > hard. RLimitInfo rlimitInfo; RLimitInfo::RLimit* rlimit = rlimitInfo.add_rlimits(); rlimit->set_type(RLimitInfo::RLimit::RLMT_CPU); rlimit->set_soft(100); rlimit->set_hard(1); container->mutable_rlimit_info()->CopyFrom(rlimitInfo); Future<TaskStatus> taskStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&taskStatus)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(taskStatus); EXPECT_EQ(task.task_id(), taskStatus->task_id()); EXPECT_EQ(TASK_FAILED, taskStatus->state()); EXPECT_EQ(TaskStatus::REASON_EXECUTOR_TERMINATED, taskStatus->reason()); driver.stop(); driver.join(); }
// This test confirms that if a task exceeds configured resource // limits it is forcibly terminated. TEST_F(PosixRLimitsIsolatorTest, TaskExceedingLimit) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "posix/rlimits"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers->size()); // The task attempts to use an infinite amount of CPU time. TaskInfo task = createTask( offers.get()[0].slave_id(), offers.get()[0].resources(), "while true; do true; done"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); // Limit the process to use maximally 1 second of CPU time. RLimitInfo rlimitInfo; RLimitInfo::RLimit* cpuLimit = rlimitInfo.add_rlimits(); cpuLimit->set_type(RLimitInfo::RLimit::RLMT_CPU); cpuLimit->set_soft(1); cpuLimit->set_hard(1); container->mutable_rlimit_info()->CopyFrom(rlimitInfo); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFailed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFailed)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFailed); EXPECT_EQ(task.task_id(), statusFailed->task_id()); EXPECT_EQ(TASK_FAILED, statusFailed->state()); driver.stop(); driver.join(); }
// This test verifies that the master reconciles tasks that are // missing from a re-registering slave. In this case, we trigger // a race between the slave re-registration message and the launch // message. There should be no TASK_LOST. // This was motivated by MESOS-1696. TEST_F(MasterSlaveReconciliationTest, ReconcileRace) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Since the agent may have retried registration, we want to // ensure that any duplicate registrations are flushed before // we appoint the master again. Otherwise, the agent may // receive a stale registration message. Clock::pause(); Clock::settle(); Clock::resume(); // Trigger a re-registration of the slave and capture the message // so that we can spoof a race with a launch task message. DROP_PROTOBUFS(ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); Future<ReregisterSlaveMessage> reregisterSlaveMessage = DROP_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); detector.appoint(master.get()->pid); AWAIT_READY(reregisterSlaveMessage); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); ExecutorDriver* executorDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&executorDriver)); // Leave the task in TASK_STAGING. Future<Nothing> launchTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureSatisfy(&launchTask)); EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(launchTask); // Send the stale re-registration message, which does not contain // the task we just launched. This will trigger a reconciliation // by the master. Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Prevent this from being dropped per the DROP_PROTOBUFS above. FUTURE_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); process::post( slave.get()->pid, master.get()->pid, reregisterSlaveMessage.get()); AWAIT_READY(slaveReregisteredMessage); // Neither the master nor the slave should send a TASK_LOST // as part of the reconciliation. We check this by calling // Clock::settle() to flush all pending events. Clock::pause(); Clock::settle(); Clock::resume(); // Now send TASK_FINISHED and make sure it's the only message // received by the scheduler. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); TaskStatus taskStatus; taskStatus.mutable_task_id()->CopyFrom(task.task_id()); taskStatus.set_state(TASK_FINISHED); executorDriver->sendStatusUpdate(taskStatus); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that the slave and status update manager // properly handle duplicate status updates, when the second // update with the same UUID is received before the ACK for the // first update. The proper behavior here is for the status update // manager to drop the duplicate update. TEST_F(StatusUpdateManagerTest, DuplicateUpdateBeforeAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Capture the first status update message. Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), _, _); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // Drop the first ACK from the scheduler to the slave. Future<StatusUpdateAcknowledgementMessage> statusUpdateAckMessage = DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(), _, slave.get()); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(statusUpdateAckMessage); Future<Nothing> __statusUpdate = FUTURE_DISPATCH(slave.get(), &Slave::__statusUpdate); // Now resend the TASK_RUNNING update. process::post(slave.get(), statusUpdateMessage.get()); // At this point the status update manager has handled // the duplicate status update. AWAIT_READY(__statusUpdate); // After we advance the clock, the status update manager should // retry the TASK_RUNNING update and the scheduler should receive // and acknowledge it. Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); Clock::settle(); // Ensure the scheduler receives TASK_FINISHED. AWAIT_READY(update); EXPECT_EQ(TASK_RUNNING, update.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave reports pending tasks when // re-registering, otherwise the master will report them as being // lost. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterPendingTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // No TASK_LOST updates should occur! EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); // We drop the _runTask dispatch to ensure the task remains // pending in the slave. Future<Nothing> _runTask = DROP_DISPATCH(slave.get()->pid, &Slave::_runTask); TaskInfo task1; task1.set_name("test task"); task1.mutable_task_id()->set_value("1"); task1.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task1.mutable_resources()->MergeFrom(offers.get()[0].resources()); task1.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); driver.launchTasks(offers.get()[0].id(), {task1}); AWAIT_READY(_runTask); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); Clock::pause(); Clock::settle(); Clock::resume(); driver.stop(); driver.join(); }
// This test verifies that the status update manager correctly includes // the latest state of the task in status update. TEST_F(StatusUpdateManagerTest, LatestTaskState) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Signal when the first update is dropped. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); Future<Nothing> __statusUpdate = FUTURE_DISPATCH(_, &Slave::__statusUpdate); driver.start(); // Wait until TASK_RUNNING is sent to the master. AWAIT_READY(statusUpdateMessage); // Ensure the status update manager handles the TASK_RUNNING update. AWAIT_READY(__statusUpdate); // Pause the clock to avoid status update manager from retrying. Clock::pause(); Future<Nothing> __statusUpdate2 = FUTURE_DISPATCH(_, &Slave::__statusUpdate); // Now send TASK_FINISHED update. TaskStatus finishedStatus; finishedStatus = statusUpdateMessage.get().update().status(); finishedStatus.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(finishedStatus); // Ensure the status update manager handles the TASK_FINISHED update. AWAIT_READY(__statusUpdate2); // Signal when the second update is dropped. Future<StatusUpdateMessage> statusUpdateMessage2 = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); // Advance the clock for the status update manager to send a retry. Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(statusUpdateMessage2); // The update should correspond to TASK_RUNNING. ASSERT_EQ(TASK_RUNNING, statusUpdateMessage2.get().update().status().state()); // The update should include TASK_FINISHED as the latest state. ASSERT_EQ(TASK_FINISHED, statusUpdateMessage2.get().update().latest_state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that when the slave re-registers, we correctly // send the information about actively running frameworks. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterFrameworks) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send an update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(_statusUpdate); Future<ReregisterSlaveMessage> reregisterSlave = FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); // Expect to receive the 'ReregisterSlaveMessage' containing the // active frameworks. AWAIT_READY(reregisterSlave); EXPECT_EQ(1u, reregisterSlave.get().frameworks().size()); Clock::pause(); Clock::settle(); AWAIT_READY(status); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that if master receives a status update // for an already terminated task it forwards it without // changing the state of the task. TEST_F(StatusUpdateManagerTest, DuplicatedTerminalStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); // Verify the latest task status. Future<process::http::Response> tasks = process::http::get(master.get(), "tasks"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(process::http::OK().status, tasks); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", tasks); Try<JSON::Object> parse = JSON::parse<JSON::Object>(tasks.get().body); ASSERT_SOME(parse); Result<JSON::String> state = parse.get().find<JSON::String>("tasks[0].state"); ASSERT_SOME_EQ(JSON::String("TASK_FINISHED"), state); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { master::Flags masterFlags = CreateMasterFlags(); Try<PID<Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); StandaloneMasterDetector detector(master.get()); Try<PID<Slave>> slave = StartSlave(&exec, &detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Have the slave re-register with the master. detector.appoint(master.get()); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); Shutdown(); }
TEST_F(StatusUpdateManagerTest, CheckpointStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); // Require flags to retrieve work_dir when recovering // the checkpointed data. slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); // Ensure that both the status update and its acknowledgement are // correctly checkpointed. Result<slave::state::State> state = slave::state::recover(slave::paths::getMetaRootDir(flags.work_dir), true); ASSERT_SOME(state); ASSERT_SOME(state.get().slave); ASSERT_TRUE(state.get().slave.get().frameworks.contains(frameworkId.get())); slave::state::FrameworkState frameworkState = state.get().slave.get().frameworks.get(frameworkId.get()).get(); ASSERT_EQ(1u, frameworkState.executors.size()); slave::state::ExecutorState executorState = frameworkState.executors.begin()->second; ASSERT_EQ(1u, executorState.runs.size()); slave::state::RunState runState = executorState.runs.begin()->second; ASSERT_EQ(1u, runState.tasks.size()); slave::state::TaskState taskState = runState.tasks.begin()->second; EXPECT_EQ(1u, taskState.updates.size()); EXPECT_EQ(1u, taskState.acks.size()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { master::Flags masterFlags = CreateMasterFlags(); Try<PID<Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); DROP_MESSAGES(Eq("PONG"), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave>> slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); driver.launchTasks(offers.get()[0].id(), {task}); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the environment secrets are resolved when launching a // task. TEST_F(EnvironmentSecretIsolatorTest, ResolveSecret) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); mesos::internal::slave::Flags flags = CreateSlaveFlags(); Fetcher fetcher(flags); Try<SecretResolver*> secretResolver = SecretResolver::create(); EXPECT_SOME(secretResolver); Try<MesosContainerizer*> containerizer = MesosContainerizer::create(flags, false, &fetcher, secretResolver.get()); EXPECT_SOME(containerizer); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<std::vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_FALSE(offers->empty()); const string commandString = strings::format( "env; test \"$%s\" = \"%s\"", SECRET_ENV_NAME, SECRET_VALUE).get(); CommandInfo command; command.set_value(commandString); // Request a secret. // TODO(kapil): Update createEnvironment() to support secrets. mesos::Environment::Variable *env = command.mutable_environment()->add_variables(); env->set_name(SECRET_ENV_NAME); env->set_type(mesos::Environment::Variable::SECRET); mesos::Secret* secret = env->mutable_secret(); secret->set_type(Secret::VALUE); secret->mutable_value()->set_data(SECRET_VALUE); TaskInfo task = createTask( offers.get()[0].slave_id(), Resources::parse("cpus:0.1;mem:32").get(), command); // NOTE: Successful tasks will output two status updates. Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished.get().state()); driver.stop(); driver.join(); }
// This test verifies that the image specified in the volume will be // properly provisioned and mounted into the container if container // root filesystem is not specified. TEST_P(VolumeImageIsolatorTest, ROOT_ImageInVolumeWithoutRootFilesystem) { string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "filesystem/linux,volume/image,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Try<MesosContainerizer*> create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); Owned<Containerizer> containerizer(create.get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); ContainerInfo container = createContainerInfo( None(), {createVolumeFromDockerImage("rootfs", "test_image", Volume::RW)}); CommandInfo command = createCommandInfo("test -d rootfs/bin"); ExecutorInfo executor = createExecutorInfo( "test_executor", nesting ? createCommandInfo("sleep 1000") : command); if (!nesting) { executor.mutable_container()->CopyFrom(container); } string directory = path::join(flags.work_dir, "sandbox"); ASSERT_SOME(os::mkdir(directory)); Future<bool> launch = containerizer->launch( containerId, None(), executor, directory, None(), SlaveID(), map<string, string>(), false); AWAIT_ASSERT_TRUE(launch); Future<Option<ContainerTermination>> wait = containerizer->wait(containerId); if (nesting) { ContainerID nestedContainerId; nestedContainerId.mutable_parent()->CopyFrom(containerId); nestedContainerId.set_value(UUID::random().toString()); launch = containerizer->launch( nestedContainerId, command, container, None(), SlaveID()); AWAIT_ASSERT_TRUE(launch); wait = containerizer->wait(nestedContainerId); } AWAIT_READY(wait); ASSERT_SOME(wait.get()); ASSERT_TRUE(wait->get().has_status()); EXPECT_WEXITSTATUS_EQ(0, wait->get().status()); if (nesting) { wait = containerizer->wait(containerId); containerizer->destroy(containerId); AWAIT_READY(wait); ASSERT_SOME(wait.get()); ASSERT_TRUE(wait->get().has_status()); EXPECT_WTERMSIG_EQ(SIGKILL, wait.get()->status()); } }
TEST_P(MemoryIsolatorTest, ROOT_MemUsage) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = GetParam(); Fetcher fetcher(flags); Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task = createTask(offers.get()[0], "sleep 120"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *(containers->begin()); Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); // TODO(jieyu): Consider using a program that predictably increases // RSS so that we can set more meaningful expectation here. EXPECT_LT(0u, usage->mem_rss_bytes()); driver.stop(); driver.join(); }
// Ensures that when a scheduler enables explicit acknowledgements // on the driver, there are no implicit acknowledgements sent, and // the call to 'acknowledgeStatusUpdate' sends the ack to the master. TEST_F(MesosSchedulerDriverTest, ExplicitAcknowledgements) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave>> slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 16, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Ensure no status update acknowledgements are sent from the driver // to the master until the explicit acknowledgement is sent. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.start(); AWAIT_READY(status); // Settle the clock to ensure driver finishes processing the status // update, we want to ensure that no implicit acknowledgement gets // sent. Clock::pause(); Clock::settle(); // Now send the acknowledgement. Future<mesos::scheduler::Call> acknowledgement = FUTURE_CALL( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, master.get()); driver.acknowledgeStatusUpdate(status.get()); AWAIT_READY(acknowledgement); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the master reconciles tasks that are // missing from a re-registering slave. In this case, we drop the // RunTaskMessage so the slave should send TASK_LOST. TEST_F(MasterSlaveReconciliationTest, ReconcileLostTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); // We now launch a task and drop the corresponding RunTaskMessage on // the slave, to ensure that only the master knows about this task. Future<RunTaskMessage> runTaskMessage = DROP_PROTOBUF(RunTaskMessage(), _, _); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runTaskMessage); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // Make sure the slave generated the TASK_LOST. AWAIT_READY(statusUpdateMessage); AWAIT_READY(status); ASSERT_EQ(task.task_id(), status.get().task_id()); ASSERT_EQ(TASK_LOST, status.get().state()); // Before we obtain the metrics, ensure that the master has finished // processing the status update so metrics have been updated. Clock::pause(); Clock::settle(); Clock::resume(); // Check metrics. JSON::Object stats = Metrics(); EXPECT_EQ(1u, stats.values.count("master/tasks_lost")); EXPECT_EQ(1u, stats.values["master/tasks_lost"]); EXPECT_EQ( 1u, stats.values.count( "master/task_lost/source_slave/reason_reconciliation")); EXPECT_EQ( 1u, stats.values["master/task_lost/source_slave/reason_reconciliation"]); driver.stop(); driver.join(); }
// The IPC namespace has its own copy of the svipc(7) tunables. We verify // that we are correctly entering the IPC namespace by verifying that we // can set shmmax some different value than that of the host namespace. TEST_F(NamespacesIsolatorTest, ROOT_IPCNamespace) { Try<Owned<MesosContainerizer>> containerizer = createContainerizer("namespaces/ipc"); ASSERT_SOME(containerizer); // Value we will set the child namespace shmmax to. uint64_t shmmaxValue = static_cast<uint64_t>(::getpid()); Try<uint64_t> hostShmmax = readValue("/proc/sys/kernel/shmmax"); ASSERT_SOME(hostShmmax); // Verify that the host namespace shmmax is different. ASSERT_NE(hostShmmax.get(), shmmaxValue); const string command = "stat -c %i /proc/self/ns/ipc > ns;" "echo " + stringify(shmmaxValue) + " > /proc/sys/kernel/shmmax;" "cp /proc/sys/kernel/shmmax shmmax"; process::Future<bool> launch = containerizer.get()->launch( containerId, None(), createExecutorInfo("executor", command), directory, None(), SlaveID(), std::map<string, string>(), false); AWAIT_READY(launch); ASSERT_TRUE(launch.get()); // Wait on the container. Future<Option<ContainerTermination>> wait = containerizer.get()->wait(containerId); AWAIT_READY(wait); ASSERT_SOME(wait.get()); // Check the executor exited correctly. EXPECT_TRUE(wait->get().has_status()); EXPECT_EQ(0, wait->get().status()); // Check that the command was run in a different IPC namespace. Try<ino_t> testIPCNamespace = ns::getns(::getpid(), "ipc"); ASSERT_SOME(testIPCNamespace); Try<string> containerIPCNamespace = os::read(path::join(directory, "ns")); ASSERT_SOME(containerIPCNamespace); EXPECT_NE(stringify(testIPCNamespace.get()), strings::trim(containerIPCNamespace.get())); // Check that we modified the IPC shmmax of the namespace, not the host. Try<uint64_t> childShmmax = readValue("shmmax"); ASSERT_SOME(childShmmax); // Verify that we didn't modify shmmax in the host namespace. ASSERT_EQ(hostShmmax.get(), readValue("/proc/sys/kernel/shmmax").get()); EXPECT_NE(hostShmmax.get(), childShmmax.get()); EXPECT_EQ(shmmaxValue, childShmmax.get()); }
TEST_F(FilesTest, ResolveTest) { Files files; process::UPID upid("files", process::address()); // Test the directory / file resolution. ASSERT_SOME(os::mkdir("1/2")); ASSERT_SOME(os::write("1/two", "two")); ASSERT_SOME(os::write("1/2/three", "three")); // Attach some paths. AWAIT_EXPECT_READY(files.attach("1", "one")); AWAIT_EXPECT_READY(files.attach("1", "/one/")); AWAIT_EXPECT_READY(files.attach("1/2", "two")); AWAIT_EXPECT_READY(files.attach("1/2", "one/two")); // Resolve 1/2/3 via each attached path. JSON::Object expected; expected.values["offset"] = 0; expected.values["data"] = "three"; Future<Response> response = process::http::get(upid, "read.json", "path=one/2/three&offset=0"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response); response = process::http::get(upid, "read.json", "path=/one/2/three&offset=0"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response); response = process::http::get(upid, "read.json", "path=two/three&offset=0"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response); response = process::http::get(upid, "read.json", "path=one/two/three&offset=0"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response); // Percent encoded '/' urls. response = process::http::get(upid, "read.json", "path=%2Fone%2F2%2Fthree&offset=0"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response); response = process::http::get(upid, "read.json", "path=one%2Ftwo%2Fthree&offset=0"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_BODY_EQ(stringify(expected), response); // Reading dirs not allowed. AWAIT_EXPECT_RESPONSE_STATUS_EQ( BadRequest().status, process::http::get(upid, "read.json", "path=one/2")); AWAIT_EXPECT_RESPONSE_STATUS_EQ( BadRequest().status, process::http::get(upid, "read.json", "path=one")); AWAIT_EXPECT_RESPONSE_STATUS_EQ( BadRequest().status, process::http::get(upid, "read.json", "path=one/")); // Breaking out of sandbox. AWAIT_EXPECT_RESPONSE_STATUS_EQ( BadRequest().status, process::http::get(upid, "read.json", "path=two/../two")); }