TEST_F(ResourceOffersTest, Request) { TestAllocator<master::allocator::HierarchicalDRFAllocator> allocator; EXPECT_CALL(allocator, initialize(_, _, _, _)) .Times(1); Try<PID<Master>> master = StartMaster(&allocator); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(allocator, addFramework(_, _, _)) .Times(1); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); driver.start(); AWAIT_READY(registered); vector<Request> sent; Request request; request.mutable_slave_id()->set_value("test"); sent.push_back(request); Future<vector<Request>> received; EXPECT_CALL(allocator, requestResources(_, _)) .WillOnce(FutureArg<1>(&received)); driver.requestResources(sent); AWAIT_READY(received); EXPECT_EQ(sent.size(), received.get().size()); EXPECT_NE(0u, received.get().size()); EXPECT_EQ(request.slave_id(), received.get()[0].slave_id()); driver.stop(); driver.join(); Shutdown(); }
// Ensures that the driver can handle the SUBSCRIBED event. TEST_F(SchedulerDriverEventTest, Subscribed) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_failover_timeout(Weeks(2).secs()); // Make sure the initial registration calls 'registered'. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); // Intercept the registration message, send a SUBSCRIBED instead. Future<Message> frameworkRegisteredMessage = DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); // Ensure that there will be no (re-)registration retries // from the scheduler driver. Clock::pause(); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; FrameworkRegisteredMessage message; ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body)); FrameworkID frameworkId = message.framework_id(); frameworkInfo.mutable_id()->CopyFrom(frameworkId); Event event; event.set_type(Event::SUBSCRIBED); event.mutable_subscribed()->mutable_framework_id()->CopyFrom(frameworkId); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, frameworkId, _)) .WillOnce(FutureSatisfy(®istered)); process::post(master.get(), frameworkPid, event); AWAIT_READY(registered); }
// Ensures that the driver can handle the FAILURE event. TEST_F(SchedulerDriverEventTest, Failure) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; // Send a failure for an executor, this should be dropped // to match the existing behavior of the scheduler driver. SlaveID slaveId; slaveId.set_value("S"); Event event; event.set_type(Event::FAILURE); event.mutable_failure()->mutable_slave_id()->CopyFrom(slaveId); event.mutable_failure()->mutable_executor_id()->set_value("E"); process::post(master.get(), frameworkPid, event); // Now, post a failure for a slave and expect a 'slaveLost'. event.mutable_failure()->clear_executor_id(); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, slaveId)) .WillOnce(FutureSatisfy(&slaveLost)); process::post(master.get(), frameworkPid, event); AWAIT_READY(slaveLost); }
// Ensures that the driver can handle the MESSAGE event. TEST_F(SchedulerDriverEventTest, Message) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; Event event; event.set_type(Event::MESSAGE); event.mutable_message()->mutable_slave_id()->set_value("S"); event.mutable_message()->mutable_executor_id()->set_value("E"); event.mutable_message()->set_data("data"); Future<Nothing> frameworkMessage; EXPECT_CALL(sched, frameworkMessage( &driver, event.message().executor_id(), event.message().slave_id(), event.message().data())) .WillOnce(FutureSatisfy(&frameworkMessage)); process::post(master.get()->pid, frameworkPid, event); AWAIT_READY(frameworkMessage); driver.stop(); driver.join(); }
TEST_F(MesosSchedulerDriverTest, MetricsEndpoint) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); ASSERT_EQ(DRIVER_RUNNING, driver.start()); AWAIT_READY(registered); Future<process::http::Response> response = process::http::get(MetricsProcess::instance()->self(), "/snapshot"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response); Try<JSON::Object> parse = JSON::parse<JSON::Object>(response.get().body); ASSERT_SOME(parse); JSON::Object metrics = parse.get(); EXPECT_EQ(1u, metrics.values.count("scheduler/event_queue_messages")); EXPECT_EQ(1u, metrics.values.count("scheduler/event_queue_dispatches")); driver.stop(); driver.join(); Shutdown(); }
// Ensures the scheduler driver can handle the UPDATE event. TEST_F(SchedulerDriverEventTest, Update) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; FrameworkRegisteredMessage message; ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body)); FrameworkID frameworkId = message.framework_id(); SlaveID slaveId; slaveId.set_value("S"); TaskID taskId; taskId.set_value("T"); ExecutorID executorId; executorId.set_value("E"); // Generate an update that needs no acknowledgement. Event event; event.set_type(Event::UPDATE); event.mutable_update()->mutable_status()->CopyFrom( protobuf::createStatusUpdate( frameworkId, slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_MASTER, None(), "message", None(), executorId).status()); Future<Nothing> statusUpdate; Future<Nothing> statusUpdate2; EXPECT_CALL(sched, statusUpdate(&driver, event.update().status())) .WillOnce(FutureSatisfy(&statusUpdate)) .WillOnce(FutureSatisfy(&statusUpdate2)); process::post(master.get(), frameworkPid, event); AWAIT_READY(statusUpdate); // Generate an update that requires acknowledgement. event.mutable_update()->mutable_status()->set_uuid(UUID::random().toBytes()); Future<mesos::scheduler::Call> acknowledgement = DROP_CALL( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, _); process::post(master.get(), frameworkPid, event); AWAIT_READY(statusUpdate2); AWAIT_READY(acknowledgement); }
// Ensures that the driver can handle an OFFERS event. // Note that this includes the ability to bypass the // master when sending framework messages. TEST_F(SchedulerDriverEventTest, Offers) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver schedDriver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&schedDriver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); schedDriver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; // Start a slave and capture the offers. Future<ResourceOffersMessage> resourceOffersMessage = DROP_PROTOBUF(ResourceOffersMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); AWAIT_READY(resourceOffersMessage); google::protobuf::RepeatedPtrField<Offer> offers = resourceOffersMessage.get().offers(); ASSERT_EQ(1, offers.size()); // Ignore future offer messages. DROP_PROTOBUFS(ResourceOffersMessage(), _, _); // Send the offers event and expect a 'resourceOffers' call. Event event; event.set_type(Event::OFFERS); event.mutable_offers()->mutable_offers()->CopyFrom(offers); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&schedDriver, _)) .WillOnce(FutureSatisfy(&resourceOffers)); process::post(master.get(), frameworkPid, event); AWAIT_READY(resourceOffers); // To test that the framework -> executor messages are // sent directly to the slave, launch a task and send // the executor a message. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&schedDriver, _)) .WillOnce(FutureArg<1>(&status)); TaskInfo task = createTask(offers.Get(0), "", DEFAULT_EXECUTOR_ID); schedDriver.launchTasks(offers.Get(0).id(), {task}); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // This message should skip the master! Future<FrameworkToExecutorMessage> frameworkToExecutorMessage = FUTURE_PROTOBUF(FrameworkToExecutorMessage(), frameworkPid, slave.get()); Future<string> data; EXPECT_CALL(exec, frameworkMessage(_, _)) .WillOnce(FutureArg<1>(&data)); schedDriver.sendFrameworkMessage( DEFAULT_EXECUTOR_ID, offers.Get(0).slave_id(), "hello"); AWAIT_READY(frameworkToExecutorMessage); AWAIT_EXPECT_EQ("hello", data); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); schedDriver.stop(); schedDriver.join(); Shutdown(); }
// This test verifies that a framework attempting to subscribe // after its failover timeout has elapsed is disallowed. TEST_F(HttpFaultToleranceTest, SchedulerSubscribeAfterFailoverTimeout) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_failover_timeout(Weeks(2).secs()); Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); Future<Nothing> deactivateFramework = FUTURE_DISPATCH( _, &master::allocator::MesosAllocatorProcess::deactivateFramework); v1::FrameworkID frameworkId; ContentType contentType = ContentType::PROTOBUF; // Launch the first (i.e., failing) scheduler and wait until it receives // a `SUBSCRIBED` event to launch the second (i.e., failover) scheduler. { auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)); v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(frameworkInfo); schedulerLibrary.send(call); } AWAIT_READY(subscribed); frameworkId = subscribed->framework_id(); } // Wait until master schedules the framework for removal. AWAIT_READY(deactivateFramework); // Simulate framework failover timeout. Clock::pause(); Clock::settle(); Try<Duration> failoverTimeout = Duration::create(frameworkInfo.failover_timeout()); ASSERT_SOME(failoverTimeout); Future<Nothing> frameworkFailoverTimeout = FUTURE_DISPATCH(_, &Master::frameworkFailoverTimeout); Clock::advance(failoverTimeout.get()); Clock::resume(); // Wait until master actually marks the framework as completed. AWAIT_READY(frameworkFailoverTimeout); // Now launch the second (i.e., failover) scheduler using the // framework id recorded from the first scheduler. { auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); // Framework should get `Error` event because the framework with this id // is marked as completed. Future<Nothing> error; EXPECT_CALL(*scheduler, error(_, _)) .WillOnce(FutureSatisfy(&error)); EXPECT_CALL(*scheduler, disconnected(_)) .Times(AtMost(1)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId); schedulerLibrary.send(call); } AWAIT_READY(error); } }
// This test verifies that the master reconciles tasks that are // missing from a re-registering slave. In this case, we trigger // a race between the slave re-registration message and the launch // message. There should be no TASK_LOST. // This was motivated by MESOS-1696. TEST_F(MasterSlaveReconciliationTest, ReconcileRace) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get()->pid, _); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Since the agent may have retried registration, we want to // ensure that any duplicate registrations are flushed before // we appoint the master again. Otherwise, the agent may // receive a stale registration message. Clock::pause(); Clock::settle(); Clock::resume(); // Trigger a re-registration of the slave and capture the message // so that we can spoof a race with a launch task message. DROP_PROTOBUFS(ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); Future<ReregisterSlaveMessage> reregisterSlaveMessage = DROP_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); detector.appoint(master.get()->pid); AWAIT_READY(reregisterSlaveMessage); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); ExecutorDriver* executorDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&executorDriver)); // Leave the task in TASK_STAGING. Future<Nothing> launchTask; EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(FutureSatisfy(&launchTask)); EXPECT_CALL(sched, statusUpdate(&driver, _)) .Times(0); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(launchTask); // Send the stale re-registration message, which does not contain // the task we just launched. This will trigger a reconciliation // by the master. Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Prevent this from being dropped per the DROP_PROTOBUFS above. FUTURE_PROTOBUF( ReregisterSlaveMessage(), slave.get()->pid, master.get()->pid); process::post( slave.get()->pid, master.get()->pid, reregisterSlaveMessage.get()); AWAIT_READY(slaveReregisteredMessage); // Neither the master nor the slave should send a TASK_LOST // as part of the reconciliation. We check this by calling // Clock::settle() to flush all pending events. Clock::pause(); Clock::settle(); Clock::resume(); // Now send TASK_FINISHED and make sure it's the only message // received by the scheduler. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); TaskStatus taskStatus; taskStatus.mutable_task_id()->CopyFrom(task.task_id()); taskStatus.set_state(TASK_FINISHED); executorDriver->sendStatusUpdate(taskStatus); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test checks that a scheduler gets a slave lost // message for a partitioned slave. TEST_F(PartitionTest, PartitionedSlave) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Set these expectations up before we spawn the slave so that we // don't miss the first PING. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); // Drop all the PONGs to simulate slave partition. DROP_PROTOBUFS(PongSlaveMessage(), _, _); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. AWAIT_READY(resourceOffers); Clock::pause(); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); // Now advance through the PINGs. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); } Clock::advance(masterFlags.slave_ping_timeout); AWAIT_READY(slaveLost); slave.get()->terminate(); slave->reset(); JSON::Object stats = Metrics(); EXPECT_EQ(1, stats.values["master/slave_removals"]); EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]); driver.stop(); driver.join(); Clock::resume(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send status updates, we send // a ShutdownMessage to the slave. Why? Because during a network // partition, the master will remove a partitioned slave, thus sending // its tasks to LOST. At this point, when the partition is removed, // the slave may attempt to send updates if it was unaware that the // master removed it. We've already notified frameworks that these // tasks were LOST, so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave sends an update. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // At this point, the slave still thinks it's registered, so we // simulate a status update coming from the slave. TaskID taskId; taskId.set_value("task_id"); const StatusUpdate& update = protobuf::createStatusUpdate( frameworkId.get(), slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update); message.set_pid(stringify(slave.get()->pid)); process::post(master.get()->pid, message); // The master should shutdown the slave upon receiving the update. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
// Ensures that the driver can handle the SUBSCRIBED event // after a master failover. TEST_F(SchedulerDriverEventTest, SubscribedMasterFailover) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_failover_timeout(Weeks(2).secs()); // Make sure the initial registration calls 'registered'. MockScheduler sched; StandaloneMasterDetector detector(master.get()->pid); TestingMesosSchedulerDriver driver(&sched, &detector, frameworkInfo); // Intercept the registration message, send a SUBSCRIBED instead. Future<Message> frameworkRegisteredMessage = DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); // Ensure that there will be no (re-)registration retries // from the scheduler driver. Clock::pause(); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; FrameworkRegisteredMessage message; ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body)); FrameworkID frameworkId = message.framework_id(); frameworkInfo.mutable_id()->CopyFrom(frameworkId); Event event; event.set_type(Event::SUBSCRIBED); event.mutable_subscribed()->mutable_framework_id()->CopyFrom(frameworkId); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, frameworkId, _)) .WillOnce(FutureSatisfy(®istered)); process::post(master.get()->pid, frameworkPid, event); AWAIT_READY(registered); EXPECT_CALL(sched, disconnected(&driver)); // Fail over the master and expect a 'reregistered' call. // Note that the master sends a registered message for // this case (see MESOS-786). master->reset(); master = StartMaster(); ASSERT_SOME(master); frameworkRegisteredMessage = DROP_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); detector.appoint(master.get()->pid); AWAIT_READY(frameworkRegisteredMessage); Future<Nothing> reregistered; EXPECT_CALL(sched, reregistered(&driver, _)) .WillOnce(FutureSatisfy(&reregistered)); process::post(master.get()->pid, frameworkPid, event); AWAIT_READY(reregistered); driver.stop(); driver.join(); }
// This is a simple end to end test that makes sure a master using log // storage with ZooKeeper can successfully launch a task. TEST_F(RegistrarZooKeeperTest, TaskRunning) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task = createTask(offers.get()[0], "dummy", DEFAULT_EXECUTOR_ID); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<Nothing> resourcesUpdated; EXPECT_CALL(containerizer, update(_, Resources(offers.get()[0].resources()))) .WillOnce(DoAll(FutureSatisfy(&resourcesUpdated), Return(Nothing()))); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(resourcesUpdated); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); // Must shutdown before 'containerizer' gets deallocated. }
// This test ensures that destroy can be called while in the // launch loop. The composing containerizer still calls the // underlying containerizer's destroy (because it's not sure // if the containerizer can handle the type of container being // launched). If the launch is not supported by the 1st containerizer, // the composing containerizer should stop the launch loop and // set the value of destroy future to true. TEST_F(ComposingContainerizerTest, DestroyDuringUnsupportedLaunchLoop) { vector<Containerizer*> containerizers; MockContainerizer* mockContainerizer1 = new MockContainerizer(); MockContainerizer* mockContainerizer2 = new MockContainerizer(); containerizers.push_back(mockContainerizer1); containerizers.push_back(mockContainerizer2); ComposingContainerizer containerizer(containerizers); ContainerID containerId; containerId.set_value("container"); TaskInfo taskInfo; ExecutorInfo executorInfo; SlaveID slaveId; std::map<std::string, std::string> environment; Promise<bool> launchPromise; EXPECT_CALL(*mockContainerizer1, launch(_, _, _, _, _, _, _, _)) .WillOnce(Return(launchPromise.future())); Future<Nothing> destroy; Promise<bool> destroyPromise; EXPECT_CALL(*mockContainerizer1, destroy(_)) .WillOnce(DoAll(FutureSatisfy(&destroy), Return(destroyPromise.future()))); Future<bool> launched = containerizer.launch( containerId, taskInfo, executorInfo, "dir", "user", slaveId, environment, false); Resources resources = Resources::parse("cpus:1;mem:256").get(); EXPECT_TRUE(launched.isPending()); Future<bool> destroyed = containerizer.destroy(containerId); EXPECT_CALL(*mockContainerizer2, launch(_, _, _, _, _, _, _, _)) .Times(0); // We make sure the destroy is being called on the first containerizer. // The second containerizer shouldn't be called as well since the // container is already destroyed. AWAIT_READY(destroy); launchPromise.set(false); destroyPromise.set(false); // `launched` should be a failure and `destroyed` should be true // because the launch was stopped from being tried on the 2nd // containerizer because of the destroy. AWAIT_FAILED(launched); AWAIT_EXPECT_EQ(true, destroyed); }
// This test checks that a failed over scheduler gets the retried status update // when the original instance dies without acknowledging the update. TEST_F(HttpFaultToleranceTest, SchedulerFailoverStatusUpdate) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); ExecutorID executorId = DEFAULT_EXECUTOR_ID; TestContainerizer containerizer(executorId, executor); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. ContentType contentType = ContentType::PROTOBUF; v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. Future<Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)); { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); schedulerLibrary.send(call); } AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); EXPECT_NE(0, offers->offers().size()); EXPECT_CALL(*executor, connected(_)) .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId))); EXPECT_CALL(*executor, subscribed(_, _)); EXPECT_CALL(*executor, launch(_, _)) .WillOnce(v1::executor::SendUpdateFromTask( frameworkId, evolve(executorId), v1::TASK_RUNNING)); Future<Nothing> acknowledged; EXPECT_CALL(*executor, acknowledged(_, _)) .WillOnce(FutureSatisfy(&acknowledged)); Future<Event::Update> update; EXPECT_CALL(*scheduler, update(_, _)) .WillOnce(FutureArg<1>(&update)); const v1::Offer& offer = offers->offers(0); v1::TaskInfo taskInfo = evolve(createTask(devolve(offer), "", executorId)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH); operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); schedulerLibrary.send(call); } AWAIT_READY(acknowledged); AWAIT_READY(update); EXPECT_EQ(v1::TASK_RUNNING, update->status().state()); EXPECT_EQ(executorId, devolve(update->status().executor_id())); EXPECT_TRUE(update->status().has_executor_id()); EXPECT_TRUE(update->status().has_uuid()); // Failover the scheduler without acknowledging the status update. auto scheduler2 = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected2; EXPECT_CALL(*scheduler2, connected(_)) .WillOnce(FutureSatisfy(&connected2)); // Failover to another scheduler instance. v1::scheduler::TestMesos schedulerLibrary2( master.get()->pid, contentType, scheduler2); AWAIT_READY(connected2); // The previously connected scheduler instance should receive an // error/disconnected event. Future<Nothing> error; EXPECT_CALL(*scheduler, error(_, _)) .WillOnce(FutureSatisfy(&error)); Future<Nothing> disconnected; EXPECT_CALL(*scheduler, disconnected(_)) .WillOnce(FutureSatisfy(&disconnected)); EXPECT_CALL(*scheduler2, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler2, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. // Scheduler2 should receive the retried status update. Future<Nothing> update2; EXPECT_CALL(*scheduler2, update(_, _)) .WillOnce(FutureSatisfy(&update2)) .WillRepeatedly(Return()); // Ignore subsequent updates. { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId); schedulerLibrary2.send(call); } AWAIT_READY(error); AWAIT_READY(disconnected); AWAIT_READY(subscribed); EXPECT_EQ(frameworkId, subscribed->framework_id()); Clock::pause(); // Now advance time enough for the reliable timeout to kick in and // another status update to be sent. Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(update2); EXPECT_CALL(*executor, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(*executor, disconnected(_)) .Times(AtMost(1)); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get()->pid, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Have the slave re-register with the master. detector.appoint(master.get()->pid); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); }
// This test ensures that the failed over scheduler is able to send a message // to the executor. TEST_F(HttpFaultToleranceTest, SchedulerFailoverFrameworkToExecutorMessage) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); ExecutorID executorId = DEFAULT_EXECUTOR_ID; TestContainerizer containerizer(executorId, executor); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. ContentType contentType = ContentType::PROTOBUF; v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. Future<Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)); { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); schedulerLibrary.send(call); } AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); EXPECT_NE(0, offers->offers().size()); EXPECT_CALL(*executor, connected(_)) .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId))); EXPECT_CALL(*executor, subscribed(_, _)); Future<Nothing> launch; EXPECT_CALL(*executor, launch(_, _)) .WillOnce(FutureSatisfy(&launch)); const v1::Offer& offer = offers->offers(0); v1::TaskInfo taskInfo = evolve(createTask(devolve(offer), "", executorId)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH); operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); schedulerLibrary.send(call); } AWAIT_READY(launch); auto scheduler2 = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected2; EXPECT_CALL(*scheduler2, connected(_)) .WillOnce(FutureSatisfy(&connected2)); // Failover to another scheduler instance. v1::scheduler::TestMesos schedulerLibrary2( master.get()->pid, contentType, scheduler2); AWAIT_READY(connected2); // The previously connected scheduler instance should receive an // error/disconnected event. Future<Nothing> error; EXPECT_CALL(*scheduler, error(_, _)) .WillOnce(FutureSatisfy(&error)); Future<Nothing> disconnected; EXPECT_CALL(*scheduler, disconnected(_)) .WillOnce(FutureSatisfy(&disconnected)); EXPECT_CALL(*scheduler2, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler2, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId); schedulerLibrary2.send(call); } AWAIT_READY(error); AWAIT_READY(disconnected); AWAIT_READY(subscribed); EXPECT_EQ(frameworkId, subscribed->framework_id()); Future<v1::executor::Event::Message> message; EXPECT_CALL(*executor, message(_, _)) .WillOnce(FutureArg<1>(&message)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::MESSAGE); Call::Message* message = call.mutable_message(); message->mutable_agent_id()->CopyFrom(offer.agent_id()); message->mutable_executor_id()->CopyFrom(v1::DEFAULT_EXECUTOR_ID); message->set_data("hello world"); schedulerLibrary2.send(call); } AWAIT_READY(message); ASSERT_EQ("hello world", message->data()); EXPECT_CALL(*executor, shutdown(_)) .Times(AtMost(1)); EXPECT_CALL(*executor, disconnected(_)) .Times(AtMost(1)); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()->pid); driver.launchTasks(offers.get()[0].id(), {task}); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
// This test checks that a scheduler exit shuts down the executor. TEST_F(HttpFaultToleranceTest, SchedulerExit) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); auto executor = std::make_shared<v1::MockHTTPExecutor>(); ExecutorID executorId = DEFAULT_EXECUTOR_ID; TestContainerizer containerizer(executorId, executor); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. ContentType contentType = ContentType::PROTOBUF; v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. Future<Event::Offers> offers; EXPECT_CALL(*scheduler, offers(_, _)) .WillOnce(FutureArg<1>(&offers)); { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); schedulerLibrary.send(call); } AWAIT_READY(subscribed); v1::FrameworkID frameworkId(subscribed->framework_id()); AWAIT_READY(offers); EXPECT_NE(0, offers->offers().size()); EXPECT_CALL(*executor, connected(_)) .WillOnce(v1::executor::SendSubscribe(frameworkId, evolve(executorId))); EXPECT_CALL(*executor, subscribed(_, _)); Future<Nothing> launch; EXPECT_CALL(*executor, launch(_, _)) .WillOnce(FutureSatisfy(&launch)); const v1::Offer& offer = offers->offers(0); v1::TaskInfo taskInfo = evolve(createTask(devolve(offer), "", DEFAULT_EXECUTOR_ID)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::ACCEPT); Call::Accept* accept = call.mutable_accept(); accept->add_offer_ids()->CopyFrom(offer.id()); v1::Offer::Operation* operation = accept->add_operations(); operation->set_type(v1::Offer::Operation::LAUNCH); operation->mutable_launch()->add_task_infos()->CopyFrom(taskInfo); schedulerLibrary.send(call); } AWAIT_READY(launch); EXPECT_CALL(*scheduler, disconnected(_)) .Times(AtMost(1)); Future<Nothing> shutdown; EXPECT_CALL(*executor, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::TEARDOWN); schedulerLibrary.send(call); } // Ensure that the executor receives a `Event::Shutdown` after the // scheduler exit. AWAIT_READY(shutdown); }
// This test does not set any Accept header for the subscribe call. // The default response media type should be "application/json" in // this case. TEST_P(ExecutorHttpApiTest, NoAcceptHeader) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); ExecutorID executorId = DEFAULT_EXECUTOR_ID; MockExecutor exec(executorId); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)); Future<Nothing> statusUpdate; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureSatisfy(&statusUpdate)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); TaskInfo taskInfo = createTask(offers.get()[0], "", executorId); driver.launchTasks(offers.get()[0].id(), {taskInfo}); // Wait until status update is received on the scheduler before sending // an executor subscribe request. AWAIT_READY(statusUpdate); // Only subscribe needs to 'Accept' JSON or protobuf. Call call; call.mutable_framework_id()->CopyFrom(evolve(frameworkId.get())); call.mutable_executor_id()->CopyFrom(evolve(executorId)); call.set_type(Call::SUBSCRIBE); call.mutable_subscribe(); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); // No 'Accept' header leads to all media types considered // acceptable. JSON will be chosen by default. process::http::Headers headers; Future<Response> response = process::http::streaming::post( slave.get()->pid, "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response); driver.stop(); driver.join(); }
// This test checks that a scheduler gets a slave lost // message for a partitioned slave. TEST_F(PartitionTest, PartitionedSlave) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Set these expectations up before we spawn the slave so that we // don't miss the first PING. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); // Drop all the PONGs to simulate slave partition. DROP_MESSAGES(Eq("PONG"), _, _); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. AWAIT_READY(resourceOffers); Clock::pause(); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); // Now advance through the PINGs. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); } Clock::advance(master::SLAVE_PING_TIMEOUT); AWAIT_READY(slaveLost); this->Stop(slave.get()); JSON::Object stats = Metrics(); EXPECT_EQ(1, stats.values["master/slave_removals"]); EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]); driver.stop(); driver.join(); Shutdown(); Clock::resume(); }