// Ensures that the driver can handle the RESCIND event. TEST_F(SchedulerDriverEventTest, Rescind) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; Event event; event.set_type(Event::RESCIND); event.mutable_rescind()->mutable_offer_id()->set_value("O"); Future<Nothing> offerRescinded; EXPECT_CALL(sched, offerRescinded(&driver, event.rescind().offer_id())) .WillOnce(FutureSatisfy(&offerRescinded)); process::post(master.get(), frameworkPid, event); AWAIT_READY(offerRescinded); }
// Bad password should return an authentication failure. TEST(SASL, failed1) { // Set up secrets. map<string, string> secrets; secrets["benh"] = "secret1"; sasl::secrets::load(secrets); // Launch a dummy process (somebody to send the AuthenticateMessage). UPID pid = spawn(new ProcessBase(), true); Credential credential; credential.set_principal("benh"); credential.set_secret("secret"); Authenticatee authenticatee(credential, UPID()); Future<Message> message = FUTURE_MESSAGE(Eq(AuthenticateMessage().GetTypeName()), _, _); Future<bool> client = authenticatee.authenticate(pid); AWAIT_READY(message); Authenticator authenticator(message.get().from); Future<bool> server = authenticator.authenticate(); AWAIT_EQ(false, client); AWAIT_EQ(false, server); terminate(pid); }
// Ensures that the driver can handle the ERROR event. TEST_F(SchedulerDriverEventTest, Error) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; Event event; event.set_type(Event::ERROR); event.mutable_error()->set_message("error message"); Future<Nothing> error; EXPECT_CALL(sched, error(&driver, event.error().message())) .WillOnce(FutureSatisfy(&error)); process::post(master.get(), frameworkPid, event); AWAIT_READY(error); }
// This test verifies that if master --> slave socket closes and the // slave is not aware of it (i.e., one way network partition), slave // will re-register with the master. TEST_F(PartitionTest, OneWayPartitionMasterToSlave) { // Start a master. master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); Future<Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); AWAIT_READY(ping); Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); // Inject a slave exited event at the master causing the master // to mark the slave as disconnected. The slave should not notice // it until the next ping is received. process::inject::exited(slaveRegisteredMessage.get().to, master.get()->pid); // Wait until master deactivates the slave. AWAIT_READY(deactivateSlave); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Ensure the slave observer marked the slave as deactivated. Clock::pause(); Clock::settle(); // Let the slave observer send the next ping. Clock::advance(masterFlags.slave_ping_timeout); // Slave should re-register. AWAIT_READY(slaveReregisteredMessage); }
// This test verifies that the pending future returned by // 'Authenticator::authenticate()' is properly failed when the Authenticator is // destructed in the middle of authentication. TYPED_TEST(CRAMMD5Authentication, AuthenticatorDestructionRace) { // Launch a dummy process (somebody to send the AuthenticateMessage). UPID pid = spawn(new ProcessBase(), true); Credential credential1; credential1.set_principal("benh"); credential1.set_secret("secret"); Credentials credentials; Credential* credential2 = credentials.add_credentials(); credential2->set_principal(credential1.principal()); credential2->set_secret(credential1.secret()); secrets::load(credentials); Future<Message> message = FUTURE_MESSAGE(Eq(AuthenticateMessage().GetTypeName()), _, _); Try<Authenticatee*> authenticatee = TypeParam::TypeAuthenticatee::create(); CHECK_SOME(authenticatee); Future<bool> client = authenticatee.get()->authenticate(pid, UPID(), credential1); AWAIT_READY(message); Try<Authenticator*> authenticator = TypeParam::TypeAuthenticator::create(); CHECK_SOME(authenticator); authenticator.get()->initialize(message.get().from); // Drop the AuthenticationStepMessage from authenticator to keep // the authentication from getting completed. Future<AuthenticationStepMessage> authenticationStepMessage = DROP_PROTOBUF(AuthenticationStepMessage(), _, _); Future<Option<string>> principal = authenticator.get()->authenticate(); AWAIT_READY(authenticationStepMessage); // At this point 'AuthenticatorProcess::authenticate()' has been // executed and its promise associated with the promise returned // by 'Authenticator::authenticate()'. // Authentication should be pending. ASSERT_TRUE(principal.isPending()); // Now delete the authenticator. delete authenticator.get(); // The future should be failed at this point. AWAIT_FAILED(principal); terminate(pid); delete authenticatee.get(); }
// This test verifies that if master --> slave socket closes and the // slave is not aware of it (i.e., one way network partition), slave // will re-register with the master. TEST_F(PartitionTest, OneWayPartitionMasterToSlave) { // Start a master. Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Future<Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); AWAIT_READY(ping); Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); // Inject a slave exited event at the master causing the master // to mark the slave as disconnected. The slave should not notice // it until the next ping is received. process::inject::exited(slaveRegisteredMessage.get().to, master.get()); // Wait until master deactivates the slave. AWAIT_READY(deactivateSlave); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Ensure the slave observer marked the slave as deactivated. Clock::pause(); Clock::settle(); // Let the slave observer send the next ping. Clock::advance(slave::MASTER_PING_TIMEOUT()); // Slave should re-register. AWAIT_READY(slaveReregisteredMessage); }
// Ensures that the driver can handle the FAILURE event. TEST_F(SchedulerDriverEventTest, Failure) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; // Send a failure for an executor, which should trigger executorLost callback. SlaveID slaveId; slaveId.set_value("S"); ExecutorID executorId = DEFAULT_EXECUTOR_ID; const int32_t status = 255; Event event; event.set_type(Event::FAILURE); event.mutable_failure()->mutable_slave_id()->CopyFrom(slaveId); event.mutable_failure()->mutable_executor_id()->CopyFrom(executorId); event.mutable_failure()->set_status(status); Future<Nothing> executorLost; EXPECT_CALL(sched, executorLost(&driver, executorId, slaveId, status)) .WillOnce(FutureSatisfy(&executorLost)); process::post(master.get()->pid, frameworkPid, event); AWAIT_READY(executorLost); // Now, post a failure for a slave and expect a 'slaveLost'. event.mutable_failure()->clear_executor_id(); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, slaveId)) .WillOnce(FutureSatisfy(&slaveLost)); process::post(master.get()->pid, frameworkPid, event); AWAIT_READY(slaveLost); driver.stop(); driver.join(); }
// Ensures that the driver can handle the FAILURE event. TEST_F(SchedulerDriverEventTest, Failure) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; // Send a failure for an executor, this should be dropped // to match the existing behavior of the scheduler driver. SlaveID slaveId; slaveId.set_value("S"); Event event; event.set_type(Event::FAILURE); event.mutable_failure()->mutable_slave_id()->CopyFrom(slaveId); event.mutable_failure()->mutable_executor_id()->set_value("E"); process::post(master.get(), frameworkPid, event); // Now, post a failure for a slave and expect a 'slaveLost'. event.mutable_failure()->clear_executor_id(); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, slaveId)) .WillOnce(FutureSatisfy(&slaveLost)); process::post(master.get(), frameworkPid, event); AWAIT_READY(slaveLost); }
TYPED_TEST(CRAMMD5Authentication, Success) { // Launch a dummy process (somebody to send the AuthenticateMessage). UPID pid = spawn(new ProcessBase(), true); Credential credential1; credential1.set_principal("benh"); credential1.set_secret("secret"); Credentials credentials; Credential* credential2 = credentials.add_credentials(); credential2->set_principal(credential1.principal()); credential2->set_secret(credential1.secret()); Future<Message> message = FUTURE_MESSAGE(Eq(AuthenticateMessage().GetTypeName()), _, _); Try<Authenticatee*> authenticatee = TypeParam::TypeAuthenticatee::create(); CHECK_SOME(authenticatee); Future<bool> client = authenticatee.get()->authenticate(pid, UPID(), credential1); AWAIT_READY(message); Try<Authenticator*> authenticator = TypeParam::TypeAuthenticator::create(); CHECK_SOME(authenticator); EXPECT_SOME(authenticator.get()->initialize(credentials)); Future<Option<string>> principal = authenticator.get()->authenticate(message.get().from); AWAIT_EQ(true, client); AWAIT_READY(principal); EXPECT_SOME_EQ("benh", principal.get()); terminate(pid); delete authenticator.get(); delete authenticatee.get(); }
// Ensures that the driver can handle the MESSAGE event. TEST_F(SchedulerDriverEventTest, Message) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; Event event; event.set_type(Event::MESSAGE); event.mutable_message()->mutable_slave_id()->set_value("S"); event.mutable_message()->mutable_executor_id()->set_value("E"); event.mutable_message()->set_data("data"); Future<Nothing> frameworkMessage; EXPECT_CALL(sched, frameworkMessage( &driver, event.message().executor_id(), event.message().slave_id(), event.message().data())) .WillOnce(FutureSatisfy(&frameworkMessage)); process::post(master.get()->pid, frameworkPid, event); AWAIT_READY(frameworkMessage); driver.stop(); driver.join(); }
// This test verifies that if master --> framework socket closes and the // framework is not aware of it (i.e., one way network partition), all // subsequent calls from the framework after the master has marked it as // disconnected would result in an error message causing the framework to abort. TEST_F(PartitionTest, OneWayPartitionMasterToScheduler) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_failover_timeout(Weeks(2).secs()); MockScheduler sched; StandaloneMasterDetector detector(master.get()->pid); TestingMesosSchedulerDriver driver(&sched, &detector, frameworkInfo); Future<process::Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); Future<Nothing> registered; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureSatisfy(®istered)); driver.start(); AWAIT_READY(frameworkRegisteredMessage); AWAIT_READY(registered); Future<Nothing> error; EXPECT_CALL(sched, error(&driver, _)) .WillOnce(FutureSatisfy(&error)); // Simulate framework disconnection. This should result in an error message. ASSERT_TRUE(process::inject::exited( frameworkRegisteredMessage.get().to, master.get()->pid)); AWAIT_READY(error); driver.stop(); driver.join(); }
// This test checks that a scheduler gets a slave lost // message for a partitioned slave. TEST_F(PartitionTest, PartitionedSlave) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Set these expectations up before we spawn the slave so that we // don't miss the first PING. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); // Drop all the PONGs to simulate slave partition. DROP_PROTOBUFS(PongSlaveMessage(), _, _); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. AWAIT_READY(resourceOffers); Clock::pause(); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); // Now advance through the PINGs. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); } Clock::advance(masterFlags.slave_ping_timeout); AWAIT_READY(slaveLost); slave.get()->terminate(); slave->reset(); JSON::Object stats = Metrics(); EXPECT_EQ(1, stats.values["master/slave_removals"]); EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]); driver.stop(); driver.join(); Clock::resume(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()->pid); driver.launchTasks(offers.get()[0].id(), {task}); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send status updates, we send // a ShutdownMessage to the slave. Why? Because during a network // partition, the master will remove a partitioned slave, thus sending // its tasks to LOST. At this point, when the partition is removed, // the slave may attempt to send updates if it was unaware that the // master removed it. We've already notified frameworks that these // tasks were LOST, so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave sends an update. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // At this point, the slave still thinks it's registered, so we // simulate a status update coming from the slave. TaskID taskId; taskId.set_value("task_id"); const StatusUpdate& update = protobuf::createStatusUpdate( frameworkId.get(), slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update); message.set_pid(stringify(slave.get()->pid)); process::post(master.get()->pid, message); // The master should shutdown the slave upon receiving the update. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get()->pid, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Have the slave re-register with the master. detector.appoint(master.get()->pid); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); }
// Ensures the scheduler driver can handle the UPDATE event. TEST_F(SchedulerDriverEventTest, Update) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); driver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; FrameworkRegisteredMessage message; ASSERT_TRUE(message.ParseFromString(frameworkRegisteredMessage.get().body)); FrameworkID frameworkId = message.framework_id(); SlaveID slaveId; slaveId.set_value("S"); TaskID taskId; taskId.set_value("T"); ExecutorID executorId; executorId.set_value("E"); // Generate an update that needs no acknowledgement. Event event; event.set_type(Event::UPDATE); event.mutable_update()->mutable_status()->CopyFrom( protobuf::createStatusUpdate( frameworkId, slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_MASTER, None(), "message", None(), executorId).status()); Future<Nothing> statusUpdate; Future<Nothing> statusUpdate2; EXPECT_CALL(sched, statusUpdate(&driver, event.update().status())) .WillOnce(FutureSatisfy(&statusUpdate)) .WillOnce(FutureSatisfy(&statusUpdate2)); process::post(master.get(), frameworkPid, event); AWAIT_READY(statusUpdate); // Generate an update that requires acknowledgement. event.mutable_update()->mutable_status()->set_uuid(UUID::random().toBytes()); Future<mesos::scheduler::Call> acknowledgement = DROP_CALL( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, _); process::post(master.get(), frameworkPid, event); AWAIT_READY(statusUpdate2); AWAIT_READY(acknowledgement); }
// Ensures that the driver can handle an OFFERS event. // Note that this includes the ability to bypass the // master when sending framework messages. TEST_F(SchedulerDriverEventTest, Offers) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockScheduler sched; MesosSchedulerDriver schedDriver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&schedDriver, _, _)); Future<Message> frameworkRegisteredMessage = FUTURE_MESSAGE(Eq(FrameworkRegisteredMessage().GetTypeName()), _, _); schedDriver.start(); AWAIT_READY(frameworkRegisteredMessage); UPID frameworkPid = frameworkRegisteredMessage.get().to; // Start a slave and capture the offers. Future<ResourceOffersMessage> resourceOffersMessage = DROP_PROTOBUF(ResourceOffersMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); AWAIT_READY(resourceOffersMessage); google::protobuf::RepeatedPtrField<Offer> offers = resourceOffersMessage.get().offers(); ASSERT_EQ(1, offers.size()); // Ignore future offer messages. DROP_PROTOBUFS(ResourceOffersMessage(), _, _); // Send the offers event and expect a 'resourceOffers' call. Event event; event.set_type(Event::OFFERS); event.mutable_offers()->mutable_offers()->CopyFrom(offers); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&schedDriver, _)) .WillOnce(FutureSatisfy(&resourceOffers)); process::post(master.get(), frameworkPid, event); AWAIT_READY(resourceOffers); // To test that the framework -> executor messages are // sent directly to the slave, launch a task and send // the executor a message. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&schedDriver, _)) .WillOnce(FutureArg<1>(&status)); TaskInfo task = createTask(offers.Get(0), "", DEFAULT_EXECUTOR_ID); schedDriver.launchTasks(offers.Get(0).id(), {task}); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // This message should skip the master! Future<FrameworkToExecutorMessage> frameworkToExecutorMessage = FUTURE_PROTOBUF(FrameworkToExecutorMessage(), frameworkPid, slave.get()); Future<string> data; EXPECT_CALL(exec, frameworkMessage(_, _)) .WillOnce(FutureArg<1>(&data)); schedDriver.sendFrameworkMessage( DEFAULT_EXECUTOR_ID, offers.Get(0).slave_id(), "hello"); AWAIT_READY(frameworkToExecutorMessage); AWAIT_EXPECT_EQ("hello", data); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); schedDriver.stop(); schedDriver.join(); Shutdown(); }
// This test checks that a scheduler gets a slave lost // message for a partitioned slave. TEST_F(PartitionTest, PartitionedSlave) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); // Set these expectations up before we spawn the slave so that we // don't miss the first PING. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); // Drop all the PONGs to simulate slave partition. DROP_MESSAGES(Eq("PONG"), _, _); Try<PID<Slave> > slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<Nothing> resourceOffers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureSatisfy(&resourceOffers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Need to make sure the framework AND slave have registered with // master. Waiting for resource offers should accomplish both. AWAIT_READY(resourceOffers); Clock::pause(); EXPECT_CALL(sched, offerRescinded(&driver, _)) .Times(AtMost(1)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); // Now advance through the PINGs. uint32_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == master::MAX_SLAVE_PING_TIMEOUTS) { break; } ping = FUTURE_MESSAGE(Eq("PING"), _, _); Clock::advance(master::SLAVE_PING_TIMEOUT); } Clock::advance(master::SLAVE_PING_TIMEOUT); AWAIT_READY(slaveLost); this->Stop(slave.get()); JSON::Object stats = Metrics(); EXPECT_EQ(1, stats.values["master/slave_removals"]); EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]); driver.stop(); driver.join(); Shutdown(); Clock::resume(); }