TEST_F(ResourceOffersTest, Request) { MockAllocatorProcess<HierarchicalDRFAllocatorProcess> allocator; EXPECT_CALL(allocator, initialize(_, _, _)) .Times(1); Try<PID<Master> > master = StartMaster(&allocator); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(allocator, frameworkAdded(_, _, _)) .Times(1); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); driver.start(); AWAIT_READY(registered); vector<Request> sent; Request request; request.mutable_slave_id()->set_value("test"); sent.push_back(request); Future<vector<Request> > received; EXPECT_CALL(allocator, resourcesRequested(_, _)) .WillOnce(FutureArg<1>(&received)); driver.requestResources(sent); AWAIT_READY(received); EXPECT_EQ(sent.size(), received.get().size()); EXPECT_NE(0u, received.get().size()); EXPECT_EQ(request.slave_id(), received.get()[0].slave_id()); EXPECT_CALL(allocator, frameworkDeactivated(_)) .Times(AtMost(1)); // Races with shutting down the cluster. EXPECT_CALL(allocator, frameworkRemoved(_)) .Times(AtMost(1)); // Races with shutting down the cluster. driver.stop(); driver.join(); Shutdown(); }
// Checks that in a cluster with one slave and one framework, all of // the slave's resources are offered to the framework. TYPED_TEST(AllocatorTest, MockAllocator) { EXPECT_CALL(this->allocator, initialize(_, _, _)); Try<PID<Master> > master = this->StartMaster(&this->allocator); ASSERT_SOME(master); slave::Flags flags = this->CreateSlaveFlags(); flags.resources = Option<string>("cpus:2;mem:1024;disk:0"); EXPECT_CALL(this->allocator, slaveAdded(_, _, _)); Try<PID<Slave> > slave = this->StartSlave(flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(this->allocator, frameworkAdded(_, _, _)); EXPECT_CALL(sched, registered(_, _, _)); // The framework should be offered all of the resources on the slave // since it is the only framework in the cluster. Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(_, OfferEq(2, 1024))) .WillOnce(FutureSatisfy(&resourceOffers)); driver.start(); AWAIT_READY(resourceOffers); // Shut everything down. EXPECT_CALL(this->allocator, resourcesRecovered(_, _, _)) .WillRepeatedly(DoDefault()); EXPECT_CALL(this->allocator, frameworkDeactivated(_)) .Times(AtMost(1)); EXPECT_CALL(this->allocator, frameworkRemoved(_)) .Times(AtMost(1)); driver.stop(); driver.join(); EXPECT_CALL(this->allocator, slaveRemoved(_)) .Times(AtMost(1)); this->Shutdown(); }
TEST_F(ExceptionTest, DisallowSchedulerActionsOnAbort) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(registered); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); ASSERT_EQ(DRIVER_ABORTED, driver.abort()); ASSERT_EQ(DRIVER_ABORTED, driver.reviveOffers()); driver.stop(); Shutdown(); }
// This test checks that a scheduler exit shuts down the executor. TEST_F(FaultToleranceTest, SchedulerExit) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); AWAIT_READY(offers); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST(ResourceOffersTest, ResourcesGetReofferedWhenUnused) { ASSERT_TRUE(GTEST_IS_THREADSAFE); PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false); MockScheduler sched1; MesosSchedulerDriver driver1(&sched1, "", DEFAULT_EXECUTOR_INFO, master); vector<Offer> offers; trigger sched1ResourceOfferCall; EXPECT_CALL(sched1, registered(&driver1, _)) .Times(1); EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(DoAll(SaveArg<1>(&offers), Trigger(&sched1ResourceOfferCall))) .WillRepeatedly(Return()); driver1.start(); WAIT_UNTIL(sched1ResourceOfferCall); EXPECT_NE(0, offers.size()); vector<TaskDescription> tasks; // Use nothing! driver1.launchTasks(offers[0].id(), tasks); driver1.stop(); driver1.join(); MockScheduler sched2; MesosSchedulerDriver driver2(&sched2, "", DEFAULT_EXECUTOR_INFO, master); trigger sched2ResourceOfferCall; EXPECT_CALL(sched2, registered(&driver2, _)) .Times(1); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(Trigger(&sched2ResourceOfferCall)) .WillRepeatedly(Return()); EXPECT_CALL(sched2, offerRescinded(&driver2, _)) .Times(AtMost(1)); driver2.start(); WAIT_UNTIL(sched2ResourceOfferCall); driver2.stop(); driver2.join(); local::shutdown(); }
TEST(FaultToleranceTest, FrameworkReregister) { ASSERT_TRUE(GTEST_IS_THREADSAFE); MockFilter filter; process::filter(&filter); EXPECT_MESSAGE(filter, _, _, _) .WillRepeatedly(Return(false)); PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master); trigger schedRegisteredCall, schedReregisteredCall; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(Trigger(&schedRegisteredCall)); EXPECT_CALL(sched, reregistered(&driver, _)) .WillOnce(Trigger(&schedReregisteredCall)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); process::Message message; EXPECT_MESSAGE(filter, Eq(FrameworkRegisteredMessage().GetTypeName()), _, _) .WillOnce(DoAll(SaveArgField<0>(&process::MessageEvent::message, &message), Return(false))); driver.start(); WAIT_UNTIL(schedRegisteredCall); // Ensures registered message is received. // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the scheduler. NewMasterDetectedMessage newMasterDetectedMsg; newMasterDetectedMsg.set_pid(master); process::post(message.to, newMasterDetectedMsg); WAIT_UNTIL(schedReregisteredCall); driver.stop(); driver.join(); local::shutdown(); process::filter(NULL); }
TEST(FaultToleranceTest, FrameworkReliableRegistration) { ASSERT_TRUE(GTEST_IS_THREADSAFE); Clock::pause(); MockFilter filter; process::filter(&filter); EXPECT_MESSAGE(filter, _, _, _) .WillRepeatedly(Return(false)); PID<Master> master = local::launch(1, 2, 1 * Gigabyte, false); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master); trigger schedRegisteredCall; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(Trigger(&schedRegisteredCall)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); trigger frameworkRegisteredMsg; // Drop the first framework registered message, allow subsequent messages. EXPECT_MESSAGE(filter, Eq(FrameworkRegisteredMessage().GetTypeName()), _, _) .WillOnce(DoAll(Trigger(&frameworkRegisteredMsg), Return(true))) .WillRepeatedly(Return(false)); driver.start(); WAIT_UNTIL(frameworkRegisteredMsg); Clock::advance(1.0); // TODO(benh): Pull out constant from SchedulerProcess. WAIT_UNTIL(schedRegisteredCall); // Ensures registered message is received. driver.stop(); driver.join(); local::shutdown(); process::filter(NULL); Clock::resume(); }
TEST_F(FaultToleranceTest, FrameworkReregister) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); Future<process::Message> message = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(message); // Framework registered message, to get the pid. AWAIT_READY(registered); // Framework registered call. Future<Nothing> disconnected; EXPECT_CALL(sched, disconnected(&driver)) .WillOnce(FutureSatisfy(&disconnected)); Future<Nothing> reregistered; EXPECT_CALL(sched, reregistered(&driver, _)) .WillOnce(FutureSatisfy(&reregistered)); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper // expiration) at the scheduler. NewMasterDetectedMessage newMasterDetectedMsg; newMasterDetectedMsg.set_pid(master.get()); process::post(message.get().to, newMasterDetectedMsg); AWAIT_READY(disconnected); AWAIT_READY(reregistered); driver.stop(); driver.join(); Shutdown(); }
TEST(MasterTest, ResourceOfferWithMultipleSlaves) { ASSERT_TRUE(GTEST_IS_THREADSAFE); PID<Master> master = local::launch(10, 2, 1 * Gigabyte, false, false); MockScheduler sched; MesosSchedulerDriver driver(&sched, master); vector<SlaveOffer> offers; trigger resourceOfferCall; EXPECT_CALL(sched, getFrameworkName(&driver)) .WillOnce(Return("")); EXPECT_CALL(sched, getExecutorInfo(&driver)) .WillOnce(Return(DEFAULT_EXECUTOR_INFO)); EXPECT_CALL(sched, registered(&driver, _)) .Times(1); EXPECT_CALL(sched, resourceOffer(&driver, _, _)) .WillOnce(DoAll(SaveArg<2>(&offers), Trigger(&resourceOfferCall))) .WillRepeatedly(Return()); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); driver.start(); WAIT_UNTIL(resourceOfferCall); EXPECT_NE(0, offers.size()); EXPECT_GE(10, offers.size()); Resources resources(offers[0].resources()); EXPECT_EQ(2, resources.get("cpus", Resource::Scalar()).value()); EXPECT_EQ(1024, resources.get("mem", Resource::Scalar()).value()); driver.stop(); driver.join(); local::shutdown(); }
TEST_F(FaultToleranceTest, FrameworkReliableRegistration) { Clock::pause(); Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); // Drop the first framework registered message, allow subsequent messages. Future<FrameworkRegisteredMessage> frameworkRegisteredMessage = DROP_PROTOBUF(FrameworkRegisteredMessage(), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); // TODO(benh): Pull out constant from SchedulerProcess. Clock::advance(Seconds(1)); AWAIT_READY(registered); // Ensures registered message is received. driver.stop(); driver.join(); Shutdown(); Clock::resume(); }
// This test verifies that an authorized task launch is successful. TEST_F(MasterAuthorizationTest, AuthorizedTask) { // Setup ACLs so that the framework can launch tasks as "foo". ACLs acls; mesos::ACL::RunTasks* acl = acls.add_run_tasks(); acl->mutable_principals()->add_values(DEFAULT_FRAMEWORK_INFO.principal()); acl->mutable_users()->add_values("foo"); master::Flags flags = CreateMasterFlags(); flags.acls = acls; Try<PID<Master> > master = StartMaster(flags); ASSERT_SOME(master); // Create an authorized executor. ExecutorInfo executor; // Bug in gcc 4.1.*, must assign on next line. executor = CREATE_EXECUTOR_INFO("test-executor", "exit 1"); executor.mutable_command()->set_user("foo"); MockExecutor exec(executor.executor_id()); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Create an authorized task. TaskInfo task; task.set_name("test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(executor); vector<TaskInfo> tasks; tasks.push_back(task); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test verifies that two tasks each launched on a different // slave with same executor id but different executor info are // allowed even when the first task is pending due to authorization. TEST_F(MasterAuthorizationTest, PendingExecutorInfoDiffersOnDifferentSlaves) { MockAuthorizer authorizer; Try<PID<Master> > master = StartMaster(&authorizer); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); driver.start(); AWAIT_READY(registered); Future<vector<Offer> > offers1; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers1)); // Start the first slave. MockExecutor exec1(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave1 = StartSlave(&exec1); ASSERT_SOME(slave1); AWAIT_READY(offers1); EXPECT_NE(0u, offers1.get().size()); // Launch the first task with the default executor id. ExecutorInfo executor1; executor1 = DEFAULT_EXECUTOR_INFO; executor1.mutable_command()->set_value("exit 1"); TaskInfo task1 = createTask( offers1.get()[0], executor1.command().value(), executor1.executor_id()); vector<TaskInfo> tasks1; tasks1.push_back(task1); // Return a pending future from authorizer. Future<Nothing> future; Promise<bool> promise; EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>())) .WillOnce(DoAll(FutureSatisfy(&future), Return(promise.future()))); driver.launchTasks(offers1.get()[0].id(), tasks1); // Wait until authorization is in progress. AWAIT_READY(future); Future<vector<Offer> > offers2; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers2)) .WillRepeatedly(Return()); // Ignore subsequent offers. // Now start the second slave. MockExecutor exec2(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave2 = StartSlave(&exec2); ASSERT_SOME(slave2); AWAIT_READY(offers2); EXPECT_NE(0u, offers2.get().size()); // Now launch the second task with the same executor id but // a different executor command. ExecutorInfo executor2; executor2 = executor1; executor2.mutable_command()->set_value("exit 2"); TaskInfo task2 = createTask( offers2.get()[0], executor2.command().value(), executor2.executor_id()); vector<TaskInfo> tasks2; tasks2.push_back(task2); EXPECT_CALL(exec2, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec2, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status2)); EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>())) .WillOnce(Return(true)); driver.launchTasks(offers2.get()[0].id(), tasks2); AWAIT_READY(status2); ASSERT_EQ(TASK_RUNNING, status2.get().state()); EXPECT_CALL(exec1, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec1, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status1; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status1)); // Complete authorization of 'task1'. promise.set(true); AWAIT_READY(status1); ASSERT_EQ(TASK_RUNNING, status1.get().state()); EXPECT_CALL(exec1, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(exec2, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that when an executor terminates before // registering with slave, it is properly cleaned up. TEST_F(SlaveTest, RemoveUnregisteredTerminatedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); // Drop the registration message from the executor to the slave. Future<process::Message> registerExecutorMessage = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(registerExecutorMessage); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Now kill the executor. containerizer.destroy(offers.get()[0].framework_id(), DEFAULT_EXECUTOR_ID); AWAIT_READY(status); EXPECT_EQ(TASK_LOST, status.get().state()); // We use 'gc.schedule' as a signal for the executor being cleaned // up by the slave. AWAIT_READY(schedule); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test checks that a scheduler gets a slave lost // message for a partioned slave. TEST_F(PartitionTest, PartitionedSlave) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Set these expectations up before we spawn the slave so that we // don't miss the first PING. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); // Drop all the PONGs to simulate slave partition. DROP_MESSAGES(Eq("PONG"), _, _); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. AWAIT_READY(resourceOffers); Clock::pause(); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); // Now advance through the PINGs. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); } Clock::advance(master::SLAVE_PING_TIMEOUT); AWAIT_READY(slaveLost); driver.stop(); driver.join(); Shutdown(); Clock::resume(); }
TEST_F(GarbageCollectorIntegrationTest, DiskUsage) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&containerizer, flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Resources resources = Resources::parse(flags.resources.get()).get(); double cpus = resources.get<Value::Scalar>("cpus").get().value(); double mem = resources.get<Value::Scalar>("mem").get().value(); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, cpus, mem, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); const std::string& executorDir = slave::paths::getExecutorPath( flags.work_dir, slaveId, frameworkId.get(), DEFAULT_EXECUTOR_ID); ASSERT_TRUE(os::exists(executorDir)); Clock::pause(); // Kiling the executor will cause the slave to schedule its // directory to get garbage collected. EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); EXPECT_CALL(sched, statusUpdate(_, _)) .Times(AtMost(1)); // Ignore TASK_LOST from killed executor. // Kill the executor and inform the slave. containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID); AWAIT_READY(schedule); Clock::settle(); // Wait for GarbageCollectorProcess::schedule to complete. // We advance the clock here so that the 'removalTime' of the // executor directory is definitely less than 'flags.gc_delay' in // the GarbageCollectorProcess 'GarbageCollector::prune()' gets // called (below). Otherwise, due to double comparison precision // in 'prune()' the directory might not be deleted. Clock::advance(Seconds(1)); Future<Nothing> _checkDiskUsage = FUTURE_DISPATCH(_, &Slave::_checkDiskUsage); // Simulate a disk full message to the slave. process::dispatch( slave.get(), &Slave::_checkDiskUsage, Try<double>(1.0 - slave::GC_DISK_HEADROOM)); AWAIT_READY(_checkDiskUsage); Clock::settle(); // Wait for Slave::_checkDiskUsage to complete. // Executor's directory should be gc'ed by now. ASSERT_FALSE(os::exists(executorDir)); process::UPID files("files", process::node()); AWAIT_EXPECT_RESPONSE_STATUS_EQ( process::http::NotFound().status, process::http::get(files, "browse.json", "path=" + executorDir)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'isolator' gets deallocated. }
// This test launches a command task which has checkpoint enabled, and // agent is terminated when the task is running, after agent is restarted, // kill the task and then verify we can receive TASK_KILLED for the task. TEST_F(CniIsolatorTest, ROOT_SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "network/cni"; flags.network_cni_plugins_dir = cniPluginDir; flags.network_cni_config_dir = cniConfigDir; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; CommandInfo command; command.set_value("sleep 1000"); TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128").get(), command); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); // Make sure the container join the mock CNI network. container->add_network_infos()->set_name("__MESOS_TEST__"); Future<TaskStatus> statusRunning; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusKilled)); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offer.id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Wait for the ACK to be checkpointed. AWAIT_READY(ack); // Stop the slave after TASK_RUNNING is received. slave.get()->terminate(); // Restart the slave. slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Kill the task. driver.killTask(task.task_id()); AWAIT_READY(statusKilled); EXPECT_EQ(task.task_id(), statusKilled->task_id()); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
TEST_F(StatusUpdateManagerTest, CheckpointStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); // Ensure that both the status update and its acknowledgement are // correctly checkpointed. Try<list<string> > found = os::find(flags.work_dir, TASK_UPDATES_FILE); ASSERT_SOME(found); ASSERT_EQ(1u, found.get().size()); Try<int> fd = os::open(found.get().front(), O_RDONLY); ASSERT_SOME(fd); int updates = 0; int acks = 0; string uuid; Result<StatusUpdateRecord> record = None(); while (true) { record = ::protobuf::read<StatusUpdateRecord>(fd.get()); ASSERT_FALSE(record.isError()); if (record.isNone()) { // Reached EOF. break; } if (record.get().type() == StatusUpdateRecord::UPDATE) { EXPECT_EQ(TASK_RUNNING, record.get().update().status().state()); uuid = record.get().update().uuid(); updates++; } else { EXPECT_EQ(uuid, record.get().uuid()); acks++; } } ASSERT_EQ(1, updates); ASSERT_EQ(1, acks); close(fd.get()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_F(GarbageCollectorIntegrationTest, Restart) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); // Need to create our own flags because we want to reuse them when // we (re)start the slave below. slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)) .Times(1); Resources resources = Resources::parse(flags.resources.get()).get(); double cpus = resources.get<Value::Scalar>("cpus").get().value(); double mem = resources.get<Value::Scalar>("mem").get().value(); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, cpus, mem, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. // Ignore offerRescinded calls. The scheduler might receive it // because the slave might re-register due to ping timeout. EXPECT_CALL(sched, offerRescinded(_, _)) .WillRepeatedly(Return()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Make sure directory exists. Need to do this AFTER getting a // status update for a task because the directory won't get created // until the task is launched. We get the slave ID from the // SlaveRegisteredMessage. const std::string& slaveDir = slave::paths::getSlavePath( flags.work_dir, slaveRegisteredMessage.get().slave_id()); ASSERT_TRUE(os::exists(slaveDir)); Clock::pause(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(sched, statusUpdate(_, _)) .Times(AtMost(1)); // Ignore TASK_LOST from killed executor. Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(_, _)) .WillOnce(FutureSatisfy(&slaveLost)); Stop(slave.get()); AWAIT_READY(slaveLost); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); slave = StartSlave(flags); ASSERT_SOME(slave); AWAIT_READY(schedule); Clock::settle(); // Wait for GarbageCollectorProcess::schedule to complete. Clock::advance(flags.gc_delay); Clock::settle(); // By this time the old slave directory should be cleaned up. ASSERT_FALSE(os::exists(slaveDir)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }
TEST_F(GarbageCollectorIntegrationTest, ExitedExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&containerizer, flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Resources resources = Resources::parse(flags.resources.get()).get(); double cpus = resources.get<Value::Scalar>("cpus").get().value(); double mem = resources.get<Value::Scalar>("mem").get().value(); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, cpus, mem, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. // Ignore offerRescinded calls. The scheduler might receive it // because the slave might re-register due to ping timeout. EXPECT_CALL(sched, offerRescinded(_, _)) .WillRepeatedly(Return()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); const std::string& executorDir = slave::paths::getExecutorPath( flags.work_dir, slaveId, frameworkId.get(), DEFAULT_EXECUTOR_ID); ASSERT_TRUE(os::exists(executorDir)); Clock::pause(); // Kiling the executor will cause the slave to schedule its // directory to get garbage collected. EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); EXPECT_CALL(sched, statusUpdate(_, _)) .Times(AtMost(1)); // Ignore TASK_LOST from killed executor. // Kill the executor and inform the slave. containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID); AWAIT_READY(schedule); Clock::settle(); // Wait for GarbageCollectorProcess::schedule to complete. Clock::advance(flags.gc_delay); Clock::settle(); // Executor's directory should be gc'ed by now. ASSERT_FALSE(os::exists(executorDir)); process::UPID files("files", process::node()); AWAIT_EXPECT_RESPONSE_STATUS_EQ( process::http::NotFound().status, process::http::get(files, "browse.json", "path=" + executorDir)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'isolator' gets deallocated. }
// This test ensures that an implicit reconciliation request results // in updates for all non-terminal tasks known to the master. TEST_F(ReconciliationTest, ImplicitNonTerminalTask) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); // Launch a framework and get a task running. MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update)); driver.start(); // Wait until the framework is registered. AWAIT_READY(frameworkId); AWAIT_READY(update); EXPECT_EQ(TASK_RUNNING, update.get().state()); EXPECT_TRUE(update.get().has_slave_id()); // When making an implicit reconciliation request, the non-terminal // task should be sent back. Future<TaskStatus> update2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update2)); vector<TaskStatus> statuses; driver.reconcileTasks(statuses); AWAIT_READY(update2); EXPECT_EQ(TASK_RUNNING, update2.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test verifies that the launch of new executor will result in // an unschedule of the framework work directory created by an old // executor. TEST_F(GarbageCollectorIntegrationTest, Unschedule) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Future<SlaveRegisteredMessage> slaveRegistered = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); ExecutorInfo executor1; // Bug in gcc 4.1.*, must assign on next line. executor1 = CREATE_EXECUTOR_INFO("executor-1", "exit 1"); ExecutorInfo executor2; // Bug in gcc 4.1.*, must assign on next line. executor2 = CREATE_EXECUTOR_INFO("executor-2", "exit 1"); MockExecutor exec1(executor1.executor_id()); MockExecutor exec2(executor2.executor_id()); hashmap<ExecutorID, Executor*> execs; execs[executor1.executor_id()] = &exec1; execs[executor2.executor_id()] = &exec2; TestContainerizer containerizer(execs); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&containerizer, flags); ASSERT_SOME(slave); AWAIT_READY(slaveRegistered); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Resources resources = Resources::parse(flags.resources.get()).get(); double cpus = resources.get<Value::Scalar>("cpus").get().value(); double mem = resources.get<Value::Scalar>("mem").get().value(); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(executor1, 1, cpus, mem, "*")); EXPECT_CALL(exec1, registered(_, _, _, _)); EXPECT_CALL(exec1, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // TODO(benh/vinod): Would've been great to match the dispatch // against arguments here. // NOTE: Since Google Mock selects the last matching expectation // that is still active, the order of (un)schedule expectations // below are the reverse of the actual (un)schedule call order. // Schedule framework work directory. Future<Nothing> scheduleFrameworkWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Schedule top level executor work directory. Future<Nothing> scheduleExecutorWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Schedule executor run work directory. Future<Nothing> scheduleExecutorRunWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); // Unschedule framework work directory. Future<Nothing> unscheduleFrameworkWork = FUTURE_DISPATCH(_, &GarbageCollectorProcess::unschedule); // We ask the isolator to kill the first executor below. EXPECT_CALL(exec1, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(sched, statusUpdate(_, _)) .Times(AtMost(2)); // Once for a TASK_LOST then once for TASK_RUNNING. // We use the killed executor/tasks resources to run another task. EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(executor2, 1, cpus, mem, "*")); EXPECT_CALL(exec2, registered(_, _, _, _)); EXPECT_CALL(exec2, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Clock::pause(); // Kill the first executor. containerizer.destroy(frameworkId.get(), exec1.id); AWAIT_READY(scheduleExecutorRunWork); AWAIT_READY(scheduleExecutorWork); AWAIT_READY(scheduleFrameworkWork); // Speedup the allocator. while (unscheduleFrameworkWork.isPending()) { Clock::advance(Seconds(1)); Clock::settle(); } AWAIT_READY(unscheduleFrameworkWork); Clock::resume(); EXPECT_CALL(exec2, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'isolator' gets deallocated. }
// Checks that in the event of a master failure and the election of a // new master, if a slave reregisters before a framework that has // resources on reregisters, all used and unused resources are // accounted for correctly. TYPED_TEST(AllocatorZooKeeperTest, SlaveReregistersFirst) { TypeParam allocator1; Try<PID<Master> > master = this->StartMaster(&allocator1); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = this->CreateSlaveFlags(); flags.resources = Option<string>("cpus:2;mem:1024"); Try<PID<Slave> > slave = this->StartSlave(&exec, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, stringify(this->url.get())); EXPECT_CALL(sched, registered(&driver, _, _)); // The framework should be offered all of the resources on the slave // since it is the only framework running. EXPECT_CALL(sched, resourceOffers(&driver, OfferEq(2, 1024))) .WillOnce(LaunchTasks(1, 1, 500, "*")) .WillRepeatedly(DeclineOffers()); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Stop the failing master from telling the slave to shut down when // it is killed. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, _); // Stop the framework from reregistering with the new master until the // slave has reregistered. DROP_PROTOBUFS(ReregisterFrameworkMessage(), _, _); // Shutting down the masters will cause the scheduler to get // disconnected. EXPECT_CALL(sched, disconnected(_)); // Shutting down the masters will also cause the slave to shutdown // frameworks that are not checkpointing, thus causing the executor // to get shutdown. EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); this->ShutdownMasters(); AWAIT_READY(shutdownMessage); MockAllocatorProcess<TypeParam> allocator2; EXPECT_CALL(allocator2, initialize(_, _, _)); Try<PID<Master> > master2 = this->StartMaster(&allocator2); ASSERT_SOME(master2); Future<Nothing> slaveAdded; EXPECT_CALL(allocator2, slaveAdded(_, _, _)) .WillOnce(DoAll(InvokeSlaveAdded(&allocator2), FutureSatisfy(&slaveAdded))); EXPECT_CALL(sched, reregistered(&driver, _)); AWAIT_READY(slaveAdded); EXPECT_CALL(allocator2, frameworkAdded(_, _, _)); Future<vector<Offer> > resourceOffers2; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&resourceOffers2)); // We kill the filter so that ReregisterFrameworkMessages can get // to the master now that the framework has been added, ensuring // that the framework reregisters after the slave. process::filter(NULL); AWAIT_READY(resourceOffers2); // Since the task is still running on the slave, the framework // should only be offered the resources not being used by the task. EXPECT_THAT(resourceOffers2.get(), OfferEq(1, 524)); // Shut everything down. EXPECT_CALL(allocator2, resourcesRecovered(_, _, _)) .WillRepeatedly(DoDefault()); EXPECT_CALL(allocator2, frameworkDeactivated(_)) .Times(AtMost(1)); EXPECT_CALL(allocator2, frameworkRemoved(_)) .Times(AtMost(1)); driver.stop(); driver.join(); EXPECT_CALL(allocator2, slaveRemoved(_)) .Times(AtMost(1)); this->Shutdown(); }
// This test ensures that the master does not send updates for // terminal tasks during an implicit reconciliation request. // TODO(bmahler): Soon the master will keep non-acknowledged // tasks, and this test may break. TEST_F(ReconciliationTest, ImplicitTerminalTask) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); // Launch a framework and get a task terminal. MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update)); driver.start(); // Wait until the framework is registered. AWAIT_READY(frameworkId); AWAIT_READY(update); EXPECT_EQ(TASK_FINISHED, update.get().state()); EXPECT_TRUE(update.get().has_slave_id()); // Framework should not receive any further updates. EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Future<ReconcileTasksMessage> reconcileTasksMessage = FUTURE_PROTOBUF(ReconcileTasksMessage(), _ , _); Clock::pause(); // When making an implicit reconciliation request, the master // should not send back terminal tasks. vector<TaskStatus> statuses; driver.reconcileTasks(statuses); // Make sure the master received the reconcile tasks message. AWAIT_READY(reconcileTasksMessage); // The Clock::settle() will ensure that framework would receive // a status update if it is sent by the master. In this test it // shouldn't receive any. Clock::settle(); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test verifies that the slave run task label decorator can add // and remove labels from a task during the launch sequence. A task // with two labels ("foo":"bar" and "bar":"baz") is launched and will // get modified by the slave hook to strip the "foo":"bar" pair and // add a new "baz":"qux" pair. TEST_F(HookTest, VerifySlaveRunTaskHook) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); task.mutable_executor()->CopyFrom(DEFAULT_EXECUTOR_INFO); // Add two labels: (1) will be removed by the hook to ensure that // runTaskHook can remove labels (2) will be preserved to ensure // that the framework can add labels to the task and have those be // available by the end of the launch task sequence when hooks are // used (to protect against hooks removing labels completely). Labels* labels = task.mutable_labels(); labels->add_labels()->CopyFrom(createLabel("foo", "bar")); labels->add_labels()->CopyFrom(createLabel("bar", "baz")); EXPECT_CALL(exec, registered(_, _, _, _)); Future<TaskInfo> taskInfo; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(DoAll( FutureArg<1>(&taskInfo), SendStatusUpdateFromTask(TASK_RUNNING))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(taskInfo); // The master hook will hang an extra label off. const Labels& labels_ = taskInfo.get().labels(); ASSERT_EQ(3, labels_.labels_size()); // The slave run task hook will prepend a new "baz":"qux" label. EXPECT_EQ("baz", labels_.labels(0).key()); EXPECT_EQ("qux", labels_.labels(0).value()); // Master launch task hook will still hang off test label. EXPECT_EQ(testLabelKey, labels_.labels(1).key()); EXPECT_EQ(testLabelValue, labels_.labels(1).value()); // And lastly, we only expect the "foo":"bar" pair to be stripped by // the module. The last pair should be the original "bar":"baz" // pair set by the test. EXPECT_EQ("bar", labels_.labels(2).key()); EXPECT_EQ("baz", labels_.labels(2).value()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// Tests whether a slave correctly detects the new master when its // ZooKeeper session is expired and a new master is elected before the // slave reconnects with ZooKeeper. TEST_F(ZooKeeperMasterDetectorTest, MasterDetectorExpireSlaveZKSessionNewMaster) { // Simulate a leading master. MockMasterDetectorListenerProcess master1; Future<Nothing> newMasterDetected1; EXPECT_CALL(master1, newMasterDetected(_)) .WillOnce(FutureSatisfy(&newMasterDetected1)) .WillRepeatedly(Return()); EXPECT_CALL(master1, noMasterDetected()) .Times(0); process::spawn(master1); std::string znode = "zk://" + server->connectString() + "/mesos"; Try<zookeeper::URL> url = zookeeper::URL::parse(znode); ASSERT_SOME(url); // Leading master's detector. ZooKeeperMasterDetector masterDetector1( url.get(), master1.self(), true, true); AWAIT_READY(newMasterDetected1); // Simulate a non-leading master. MockMasterDetectorListenerProcess master2; Future<Nothing> newMasterDetected2; EXPECT_CALL(master2, newMasterDetected(_)) .WillOnce(FutureSatisfy(&newMasterDetected2)) .WillRepeatedly(Return()); EXPECT_CALL(master2, noMasterDetected()) .Times(0); process::spawn(master2); // Non-leading master's detector. ZooKeeperMasterDetector masterDetector2( url.get(), master2.self(), true, true); AWAIT_READY(newMasterDetected2); // Simulate a slave. MockMasterDetectorListenerProcess slave; Future<Nothing> newMasterDetected3, newMasterDetected4; EXPECT_CALL(slave, newMasterDetected(_)) .WillOnce(FutureSatisfy(&newMasterDetected3)) .WillOnce(FutureSatisfy(&newMasterDetected4)); EXPECT_CALL(slave, noMasterDetected()) .Times(AtMost(1)); process::spawn(slave); // Slave's master detector. ZooKeeperMasterDetector slaveDetector( url.get(), slave.self(), false, true); AWAIT_READY(newMasterDetected3); // Now expire the slave's and leading master's zk sessions. // NOTE: Here we assume that slave stays disconnected from the ZK when the // leading master loses its session. Future<int64_t> slaveSession = slaveDetector.session(); AWAIT_READY(slaveSession); server->expireSession(slaveSession.get()); Future<int64_t> masterSession = masterDetector1.session(); AWAIT_READY(masterSession); server->expireSession(masterSession.get()); // Wait for session expiration and ensure we receive a // NewMasterDetected message. AWAIT_READY_FOR(newMasterDetected4, Seconds(10)); process::terminate(slave); process::wait(slave); process::terminate(master2); process::wait(master2); process::terminate(master1); process::wait(master1); }
// This test ensures that the master responds with the latest state // for tasks that are terminal at the master, but have not been // acknowledged by the framework. See MESOS-1389. TEST_F(ReconciliationTest, UnacknowledgedTerminalTask) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); // Launch a framework and get a task into a terminal state. MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> update1; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update1)); // Prevent the slave from retrying the status update by // only allowing a single update through to the master. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()); FUTURE_PROTOBUF(StatusUpdateMessage(), _, master.get());; // Drop the status update acknowledgements to ensure that the // task remains terminal and unacknowledged in the master. DROP_PROTOBUFS(StatusUpdateAcknowledgementMessage(), _, _); driver.start(); // Wait until the framework is registered. AWAIT_READY(frameworkId); AWAIT_READY(update1); EXPECT_EQ(TASK_FINISHED, update1.get().state()); EXPECT_TRUE(update1.get().has_slave_id()); // Framework should receive a TASK_FINISHED update, since the // master did not receive the acknowledgement. Future<TaskStatus> update2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update2)); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); vector<TaskStatus> statuses; driver.reconcileTasks(statuses); AWAIT_READY(update2); EXPECT_EQ(TASK_FINISHED, update2.get().state()); EXPECT_TRUE(update2.get().has_slave_id()); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test verifies that the slave task status label decorator can // add and remove labels from a TaskStatus during the status update // sequence. A TaskStatus with two labels ("foo":"bar" and // "bar":"baz") is sent from the executor. The labels get modified by // the slave hook to strip the "foo":"bar" pair and/ add a new // "baz":"qux" pair. TEST_F(HookTest, VerifySlaveTaskStatusDecorator) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); // Start a task. TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); Future<TaskInfo> execTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureArg<1>(&execTask)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(execTask); // Now send TASK_RUNNING update with two labels. The first label // ("foo:bar") will be removed by the task status hook to ensure // that it can remove labels. The second label will be preserved // and forwarded to Master (and eventually to the framework). // The hook also adds a new label with the same key but a different // value ("bar:quz"). TaskStatus runningStatus; runningStatus.mutable_task_id()->MergeFrom(execTask.get().task_id()); runningStatus.set_state(TASK_RUNNING); // Add two labels to the TaskStatus Labels* labels = runningStatus.mutable_labels(); labels->add_labels()->CopyFrom(createLabel("foo", "bar")); labels->add_labels()->CopyFrom(createLabel("bar", "baz")); execDriver->sendStatusUpdate(runningStatus); AWAIT_READY(status); // The hook will hang an extra label off. const Labels& labels_ = status.get().labels(); EXPECT_EQ(2, labels_.labels_size()); // The test hook will prepend a new "baz":"qux" label. EXPECT_EQ("bar", labels_.labels(0).key()); EXPECT_EQ("qux", labels_.labels(0).value()); // And lastly, we only expect the "foo":"bar" pair to be stripped by // the module. The last pair should be the original "bar":"baz" // pair set by the test. EXPECT_EQ("bar", labels_.labels(1).key()); EXPECT_EQ("baz", labels_.labels(1).value()); // Now validate TaskInfo.container_status. We must have received a // container_status with one network_info set by the test hook module. EXPECT_TRUE(status.get().has_container_status()); EXPECT_EQ(1, status.get().container_status().network_infos().size()); const NetworkInfo networkInfo = status.get().container_status().network_infos(0); // The hook module sets up '4.3.2.1' as the IP address and 'public' as the // network isolation group. The `ip_address` field is deprecated, but the // hook module should continue to set it as well as the new `ip_addresses` // field for now. EXPECT_TRUE(networkInfo.has_ip_address()); EXPECT_EQ("4.3.2.1", networkInfo.ip_address()); EXPECT_EQ(1, networkInfo.ip_addresses().size()); EXPECT_TRUE(networkInfo.ip_addresses(0).has_ip_address()); EXPECT_EQ("4.3.2.1", networkInfo.ip_addresses(0).ip_address()); EXPECT_EQ(1, networkInfo.groups().size()); EXPECT_EQ("public", networkInfo.groups(0)); EXPECT_TRUE(networkInfo.has_labels()); EXPECT_EQ(1, networkInfo.labels().labels().size()); const Label networkInfoLabel = networkInfo.labels().labels(0); // Finally, the labels set inside NetworkInfo by the hook module. EXPECT_EQ("net_foo", networkInfoLabel.key()); EXPECT_EQ("net_bar", networkInfoLabel.value()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that reconciliation sends the latest task // status, when the task state does not match between the framework // and the master. TEST_F(ReconciliationTest, TaskStateMismatch) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update)); driver.start(); // Wait until the framework is registered. AWAIT_READY(frameworkId); AWAIT_READY(update); EXPECT_EQ(TASK_RUNNING, update.get().state()); EXPECT_EQ(true, update.get().has_slave_id()); const TaskID taskId = update.get().task_id(); const SlaveID slaveId = update.get().slave_id(); // If framework has different state, current state should be reported. Future<TaskStatus> update2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&update2)); vector<TaskStatus> statuses; TaskStatus status; status.mutable_task_id()->CopyFrom(taskId); status.mutable_slave_id()->CopyFrom(slaveId); status.set_state(TASK_KILLED); statuses.push_back(status); driver.reconcileTasks(statuses); AWAIT_READY(update2); EXPECT_EQ(TASK_RUNNING, update2.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test ensures that a status update acknowledgement from a // non-leading master is ignored. TEST_F(SlaveTest, IgnoreNonLeaderStatusUpdateAcknowledgement) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver schedDriver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&schedDriver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&schedDriver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. // We need to grab this message to get the scheduler's pid. Future<process::Message> frameworkRegisteredMessage = FUTURE_MESSAGE( Eq(FrameworkRegisteredMessage().GetTypeName()), master.get(), _); schedDriver.start(); AWAIT_READY(frameworkRegisteredMessage); const process::UPID schedulerPid = frameworkRegisteredMessage.get().to; AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); vector<TaskInfo> tasks; tasks.push_back(task); Future<ExecutorDriver*> execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(FutureArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(&schedDriver, _)) .WillOnce(FutureArg<1>(&update)); // Pause the clock to prevent status update retries on the slave. Clock::pause(); // Intercept the acknowledgement sent to the slave so that we can // spoof the master's pid. Future<StatusUpdateAcknowledgementMessage> acknowledgementMessage = DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(), master.get(), slave.get()); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); schedDriver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(update); EXPECT_EQ(TASK_RUNNING, update.get().state()); AWAIT_READY(acknowledgementMessage); // Send the acknowledgement to the slave with a non-leading master. process::post( process::UPID("master@localhost:1"), slave.get(), acknowledgementMessage.get()); // Make sure the acknowledgement was ignored. Clock::settle(); ASSERT_TRUE(_statusUpdateAcknowledgement.isPending()); // Make sure the status update gets retried because the slave // ignored the acknowledgement. Future<TaskStatus> retriedUpdate; EXPECT_CALL(sched, statusUpdate(&schedDriver, _)) .WillOnce(FutureArg<1>(&retriedUpdate)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(retriedUpdate); // Ensure the slave receives and properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement); Clock::settle(); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); schedDriver.stop(); schedDriver.join(); Shutdown(); }
// This test verifies that a slave disconnection that comes before // '_launchTasks()' is called results in TASK_LOST. TEST_F(MasterAuthorizationTest, SlaveDisconnected) { MockAuthorizer authorizer; Try<PID<Master> > master = StartMaster(&authorizer); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); // Create a checkpointing slave so that a disconnected slave is not // immediately removed. slave::Flags flags = CreateSlaveFlags(); flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID); vector<TaskInfo> tasks; tasks.push_back(task); // Return a pending future from authorizer. Future<Nothing> future; Promise<bool> promise; EXPECT_CALL(authorizer, authorize(An<const mesos::ACL::RunTasks&>())) .WillOnce(DoAll(FutureSatisfy(&future), Return(promise.future()))); driver.launchTasks(offers.get()[0].id(), tasks); // Wait until authorization is in progress. AWAIT_READY(future); EXPECT_CALL(sched, slaveLost(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveDisconnected = FUTURE_DISPATCH(_, &AllocatorProcess::slaveDisconnected); // Now stop the slave. Stop(slave.get()); AWAIT_READY(slaveDisconnected); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> resourcesRecovered = FUTURE_DISPATCH(_, &AllocatorProcess::resourcesRecovered); // Now complete authorization. promise.set(true); // Framework should get a TASK_LOST. AWAIT_READY(status); EXPECT_EQ(TASK_LOST, status.get().state()); // No task launch should happen resulting in all resources being // returned to the allocator. AWAIT_READY(resourcesRecovered); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }