// This test sets an unsupported media type as Content-Type. This // should result in a 415 (UnsupportedMediaType) response. TEST_P(ExecutorHttpApiTest, UnsupportedContentMediaType) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); AWAIT_READY(__recover); ContentType contentType = GetParam(); hashmap<string, string> headers; headers["Accept"] = stringify(contentType); Call call; call.set_type(Call::SUBSCRIBE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); const string unknownMediaType = "application/unknown-media-type"; Future<Response> response = process::http::post( slave.get(), "api/v1/executor", headers, serialize(contentType, call), unknownMediaType); AWAIT_EXPECT_RESPONSE_STATUS_EQ(UnsupportedMediaType().status, response); Shutdown(); }
// This test expects a BadRequest when 'Content-Type' is omitted. TEST_F(ExecutorHttpApiTest, NoContentType) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); Call call; call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); call.set_type(Call::MESSAGE); call.mutable_message()->set_data("hello world"); Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", None(), serialize(ContentType::JSON, call), None()); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); }
// This test sends a malformed body that cannot be deserialized // into a valid protobuf resulting in a BadRequest. TEST_P(ExecutorHttpApiTest, MalformedContent) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); AWAIT_READY(__recover); const string body = "MALFORMED_CONTENT"; const ContentType contentType = GetParam(); hashmap<string, string> headers; headers["Accept"] = stringify(contentType); Future<Response> response = process::http::post( slave.get(), "api/v1/executor", headers, body, stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); Shutdown(); }
// This test sends a Call from an unknown FrameworkID. The call // should return a BadRequest. TEST_P(ExecutorHttpApiTest, MessageFromUnknownFramework) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); AWAIT_READY(__recover); ContentType contentType = GetParam(); hashmap<string, string> headers; headers["Accept"] = stringify(contentType); Call call; call.set_type(Call::MESSAGE); call.mutable_message()->set_data("hello world"); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); Future<Response> response = process::http::post( slave.get(), "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); Shutdown(); }
// This test sends a GET request to the executor HTTP endpoint instead // of a POST. The call should return a MethodNotAllowed response. TEST_F(ExecutorHttpApiTest, GetRequest) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); Future<Response> response = process::http::get( slave.get(), "api/v1/executor"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(MethodNotAllowed().status, response); Shutdown(); }
// This test sends a unsupported Accept media type for the Accept // header. The response should be NotAcceptable in this case. TEST_P(ExecutorHttpApiTest, NotAcceptable) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); AWAIT_READY(__recover); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); hashmap<string, string> headers; headers["Accept"] = "foo"; // Only subscribe needs to 'Accept' JSON or protobuf. Call call; call.set_type(Call::SUBSCRIBE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); Future<Response> response = process::http::streaming::post( slave.get(), "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(NotAcceptable().status, response); Shutdown(); }
TEST_F(ZooKeeperTest, LeaderDetectorTimeoutHandling) { Duration timeout = Seconds(10); Group group(server->connectString(), timeout, "/test/"); LeaderDetector detector(&group); AWAIT_READY(group.join("member 1")); Future<Option<Group::Membership> > leader = detector.detect(); AWAIT_READY(leader); EXPECT_SOME(leader.get()); leader = detector.detect(leader.get()); Future<Nothing> reconnecting = FUTURE_DISPATCH( group.process->self(), &GroupProcess::reconnecting); server->shutdownNetwork(); AWAIT_READY(reconnecting); Clock::pause(); // Settle to make sure 'reconnecting' schedules the timeout before // we advance. Clock::settle(); Clock::advance(timeout); // The detect operation times out. AWAIT_READY(leader); EXPECT_NONE(leader.get()); }
// This test sends a valid JSON blob that cannot be deserialized // into a valid protobuf resulting in a BadRequest. TEST_F(ExecutorHttpApiTest, ValidJsonButInvalidProtobuf) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); JSON::Object object; object.values["string"] = "valid_json"; process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, stringify(object), APPLICATION_JSON); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); }
// This test sends a valid JSON blob that cannot be deserialized // into a valid protobuf resulting in a BadRequest. TEST_F(ExecutorHttpApiTest, ValidJsonButInvalidProtobuf) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); AWAIT_READY(__recover); JSON::Object object; object.values["string"] = "valid_json"; hashmap<string, string> headers; headers["Accept"] = APPLICATION_JSON; Future<Response> response = process::http::post( slave.get(), "api/v1/executor", headers, stringify(object), APPLICATION_JSON); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); Shutdown(); }
// This test sends a malformed body that cannot be deserialized // into a valid protobuf resulting in a BadRequest. TEST_P(ExecutorHttpApiTest, MalformedContent) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); const string body = "MALFORMED_CONTENT"; const ContentType contentType = GetParam(); process::http::Headers headers; headers["Accept"] = stringify(contentType); Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, body, stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); }
// This test verifies that if master --> slave socket closes and the // slave is not aware of it (i.e., one way network partition), slave // will re-register with the master. TEST_F(PartitionTest, OneWayPartitionMasterToSlave) { // Start a master. master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); Future<Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); AWAIT_READY(ping); Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); // Inject a slave exited event at the master causing the master // to mark the slave as disconnected. The slave should not notice // it until the next ping is received. process::inject::exited(slaveRegisteredMessage.get().to, master.get()->pid); // Wait until master deactivates the slave. AWAIT_READY(deactivateSlave); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Ensure the slave observer marked the slave as deactivated. Clock::pause(); Clock::settle(); // Let the slave observer send the next ping. Clock::advance(masterFlags.slave_ping_timeout); // Slave should re-register. AWAIT_READY(slaveReregisteredMessage); }
// This test verifies that if master --> slave socket closes and the // slave is not aware of it (i.e., one way network partition), slave // will re-register with the master. TEST_F(PartitionTest, OneWayPartitionMasterToSlave) { // Start a master. Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); Future<Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); AWAIT_READY(ping); Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); // Inject a slave exited event at the master causing the master // to mark the slave as disconnected. The slave should not notice // it until the next ping is received. process::inject::exited(slaveRegisteredMessage.get().to, master.get()); // Wait until master deactivates the slave. AWAIT_READY(deactivateSlave); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Ensure the slave observer marked the slave as deactivated. Clock::pause(); Clock::settle(); // Let the slave observer send the next ping. Clock::advance(slave::MASTER_PING_TIMEOUT()); // Slave should re-register. AWAIT_READY(slaveReregisteredMessage); }
// This test does not set any Accept header for the subscribe call. // The default response media type should be "application/json" in // this case. TEST_P(ExecutorHttpApiTest, NoAcceptHeader) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); // No 'Accept' header leads to all media types considered // acceptable. JSON will be chosen by default. process::http::Headers headers; // Only subscribe needs to 'Accept' JSON or protobuf. Call call; call.set_type(Call::SUBSCRIBE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); Future<Response> response = process::http::streaming::post( slave.get(), "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); EXPECT_SOME_EQ(APPLICATION_JSON, response.get().headers.get("Content-Type")); Shutdown(); }
// This test sends a unsupported Accept media type for the Accept // header. The response should be NotAcceptable in this case. TEST_P(ExecutorHttpApiTest, NotAcceptable) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); process::http::Headers headers; headers["Accept"] = "foo"; // Only subscribe needs to 'Accept' JSON or protobuf. Call call; call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); call.set_type(Call::SUBSCRIBE); call.mutable_subscribe(); Future<Response> response = process::http::streaming::post( slave.get()->pid, "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(NotAcceptable().status, response); }
// This test sets an unsupported media type as Content-Type. This // should result in a 415 (UnsupportedMediaType) response. TEST_P(ExecutorHttpApiTest, UnsupportedContentMediaType) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); ContentType contentType = GetParam(); process::http::Headers headers; headers["Accept"] = stringify(contentType); Call call; call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); call.set_type(Call::SUBSCRIBE); call.mutable_subscribe(); const string unknownMediaType = "application/unknown-media-type"; Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(contentType, call), unknownMediaType); AWAIT_EXPECT_RESPONSE_STATUS_EQ(UnsupportedMediaType().status, response); }
// This test sends a GET request to the executor HTTP endpoint instead // of a POST. The call should return a MethodNotAllowed response. TEST_F(ExecutorHttpApiTest, GetRequest) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); Future<Response> response = process::http::get( slave.get()->pid, "api/v1/executor"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(MethodNotAllowed({"POST"}).status, response); }
// This test verifies that when the slave re-registers, the master // does not send TASK_LOST update for a task that has reached terminal // state but is waiting for an acknowledgement. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminalTask) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); // Drop the status update from slave to the master, so that // the slave has a pending terminal update when it re-registers. DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(_statusUpdate); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); AWAIT_READY(slaveReregisteredMessage); // The master should not send a TASK_LOST after the slave // re-registers. We check this by calling Clock::settle() so that // the only update the scheduler receives is the retried // TASK_FINISHED update. // NOTE: The status update manager resends the status update when // it detects a new master. Clock::pause(); Clock::settle(); AWAIT_READY(status); ASSERT_EQ(TASK_FINISHED, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test verifies that when the slave re-registers, we correctly // send the information about actively running frameworks. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterFrameworks) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("test task"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); EXPECT_CALL(exec, registered(_, _, _, _)); // Send an update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<Nothing> _statusUpdate = FUTURE_DISPATCH(_, &Slave::_statusUpdate); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore retried update due to update framework. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(_statusUpdate); Future<ReregisterSlaveMessage> reregisterSlave = FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _); // Simulate a spurious master change event (e.g., due to ZooKeeper // expiration) at the slave to force re-registration. detector.appoint(master.get()->pid); // Expect to receive the 'ReregisterSlaveMessage' containing the // active frameworks. AWAIT_READY(reregisterSlave); EXPECT_EQ(1u, reregisterSlave.get().frameworks().size()); Clock::pause(); Clock::settle(); AWAIT_READY(status); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
TEST_F(ZooKeeperTest, LeaderContender) { Seconds timeout(10); Group group(server->connectString(), timeout, "/test/"); Owned<LeaderContender> contender( new LeaderContender(&group, "candidate 1", master::MASTER_INFO_LABEL)); // Calling withdraw before contending returns 'false' because there // is nothing to withdraw. Future<bool> withdrawn = contender->withdraw(); AWAIT_READY(withdrawn); EXPECT_FALSE(withdrawn.get()); contender->contend(); // Immediately withdrawing after contending leads to delayed // cancellation. withdrawn = contender->withdraw(); AWAIT_READY(withdrawn); EXPECT_TRUE(withdrawn.get()); // Normal workflow. contender = Owned<LeaderContender>( new LeaderContender(&group, "candidate 1", master::MASTER_INFO_LABEL)); Future<Future<Nothing> > candidated = contender->contend(); AWAIT_READY(candidated); Future<Nothing> lostCandidacy = candidated.get(); EXPECT_TRUE(lostCandidacy.isPending()); // Expire the Group session while we are watching for updates from // the contender and the candidacy will be lost. Future<Option<int64_t> > session = group.session(); AWAIT_READY(session); ASSERT_SOME(session.get()); Future<Nothing> connected = FUTURE_DISPATCH( group.process->self(), &GroupProcess::connected); server->expireSession(session.get().get()); AWAIT_READY(lostCandidacy); // Withdraw directly returns because candidacy is lost and there // is nothing to cancel. withdrawn = contender->withdraw(); AWAIT_READY(withdrawn); EXPECT_FALSE(withdrawn.get()); // Contend again. contender = Owned<LeaderContender>( new LeaderContender(&group, "candidate 1", master::MASTER_INFO_LABEL)); candidated = contender->contend(); AWAIT_READY(connected); session = group.session(); AWAIT_READY(session); ASSERT_SOME(session.get()); server->expireSession(session.get().get()); Clock::pause(); // The retry timeout. Clock::advance(GroupProcess::RETRY_INTERVAL); Clock::settle(); Clock::resume(); // The contender weathered the expiration and succeeded in a retry. AWAIT_READY(candidated); withdrawn = contender->withdraw(); AWAIT_READY(withdrawn); // Contend (3) and shutdown the network this time. contender = Owned<LeaderContender>( new LeaderContender(&group, "candidate 1", master::MASTER_INFO_LABEL)); candidated = contender->contend(); AWAIT_READY(candidated); lostCandidacy = candidated.get(); Future<Nothing> reconnecting = FUTURE_DISPATCH( group.process->self(), &GroupProcess::reconnecting); server->shutdownNetwork(); AWAIT_READY(reconnecting); Clock::pause(); // Settle to make sure 'reconnecting()' schedules the timeout // before we advance. Clock::settle(); Clock::advance(timeout); // Server failure results in candidacy loss. AWAIT_READY(lostCandidacy); Clock::resume(); server->startNetwork(); // Contend again (4). contender = Owned<LeaderContender>( new LeaderContender(&group, "candidate 1", master::MASTER_INFO_LABEL)); candidated = contender->contend(); AWAIT_READY(candidated); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get()->pid, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Have the slave re-register with the master. detector.appoint(master.get()->pid); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); }
// This test verifies that a framework attempting to subscribe // after its failover timeout has elapsed is disallowed. TEST_F(HttpFaultToleranceTest, SchedulerSubscribeAfterFailoverTimeout) { master::Flags flags = CreateMasterFlags(); flags.authenticate_frameworks = false; v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_failover_timeout(Weeks(2).secs()); Try<Owned<cluster::Master>> master = StartMaster(flags); ASSERT_SOME(master); Future<Nothing> deactivateFramework = FUTURE_DISPATCH( _, &master::allocator::MesosAllocatorProcess::deactivateFramework); v1::FrameworkID frameworkId; ContentType contentType = ContentType::PROTOBUF; // Launch the first (i.e., failing) scheduler and wait until it receives // a `SUBSCRIBED` event to launch the second (i.e., failover) scheduler. { auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)); v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); Future<Event::Subscribed> subscribed; EXPECT_CALL(*scheduler, subscribed(_, _)) .WillOnce(FutureArg<1>(&subscribed)); EXPECT_CALL(*scheduler, heartbeat(_)) .WillRepeatedly(Return()); // Ignore heartbeats. { Call call; call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(frameworkInfo); schedulerLibrary.send(call); } AWAIT_READY(subscribed); frameworkId = subscribed->framework_id(); } // Wait until master schedules the framework for removal. AWAIT_READY(deactivateFramework); // Simulate framework failover timeout. Clock::pause(); Clock::settle(); Try<Duration> failoverTimeout = Duration::create(frameworkInfo.failover_timeout()); ASSERT_SOME(failoverTimeout); Future<Nothing> frameworkFailoverTimeout = FUTURE_DISPATCH(_, &Master::frameworkFailoverTimeout); Clock::advance(failoverTimeout.get()); Clock::resume(); // Wait until master actually marks the framework as completed. AWAIT_READY(frameworkFailoverTimeout); // Now launch the second (i.e., failover) scheduler using the // framework id recorded from the first scheduler. { auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); Future<Nothing> connected; EXPECT_CALL(*scheduler, connected(_)) .WillOnce(FutureSatisfy(&connected)) .WillRepeatedly(Return()); // Ignore future invocations. v1::scheduler::TestMesos schedulerLibrary( master.get()->pid, contentType, scheduler); AWAIT_READY(connected); // Framework should get `Error` event because the framework with this id // is marked as completed. Future<Nothing> error; EXPECT_CALL(*scheduler, error(_, _)) .WillOnce(FutureSatisfy(&error)); EXPECT_CALL(*scheduler, disconnected(_)) .Times(AtMost(1)); { Call call; call.mutable_framework_id()->CopyFrom(frameworkId); call.set_type(Call::SUBSCRIBE); Call::Subscribe* subscribe = call.mutable_subscribe(); subscribe->mutable_framework_info()->CopyFrom(v1::DEFAULT_FRAMEWORK_INFO); subscribe->mutable_framework_info()->mutable_id()->CopyFrom(frameworkId); schedulerLibrary.send(call); } AWAIT_READY(error); } }
// Test that memory pressure listening is restarted after recovery. TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); // Wait for the ACK to be checkpointed. AWAIT_READY_FOR(_statusUpdateAcknowledgement, Seconds(120)); // We restart the slave to let it recover. slave.get()->terminate(); // Set up so we can wait until the new slave updates the container's // resources (this occurs after the executor has re-registered). Future<Nothing> update = FUTURE_DISPATCH(_, &MesosContainerizerProcess::update); // Use the same flags. _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); Future<SlaveReregisteredMessage> reregistered = FUTURE_PROTOBUF(SlaveReregisteredMessage(), master.get()->pid, _); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); AWAIT_READY(reregistered); // Wait until the containerizer is updated. AWAIT_READY(update); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&killed)); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// This test verifies that the status update manager correctly includes // the latest state of the task in status update. TEST_F(StatusUpdateManagerTest, LatestTaskState) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Signal when the first update is dropped. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); Future<Nothing> __statusUpdate = FUTURE_DISPATCH(_, &Slave::__statusUpdate); driver.start(); // Wait until TASK_RUNNING is sent to the master. AWAIT_READY(statusUpdateMessage); // Ensure the status update manager handles the TASK_RUNNING update. AWAIT_READY(__statusUpdate); // Pause the clock to avoid status update manager from retrying. Clock::pause(); Future<Nothing> __statusUpdate2 = FUTURE_DISPATCH(_, &Slave::__statusUpdate); // Now send TASK_FINISHED update. TaskStatus finishedStatus; finishedStatus = statusUpdateMessage.get().update().status(); finishedStatus.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(finishedStatus); // Ensure the status update manager handles the TASK_FINISHED update. AWAIT_READY(__statusUpdate2); // Signal when the second update is dropped. Future<StatusUpdateMessage> statusUpdateMessage2 = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()); // Advance the clock for the status update manager to send a retry. Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); AWAIT_READY(statusUpdateMessage2); // The update should correspond to TASK_RUNNING. ASSERT_EQ(TASK_RUNNING, statusUpdateMessage2.get().update().status().state()); // The update should include TASK_FINISHED as the latest state. ASSERT_EQ(TASK_FINISHED, statusUpdateMessage2.get().update().latest_state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_F(StatusUpdateManagerTest, CheckpointStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); // Require flags to retrieve work_dir when recovering // the checkpointed data. slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); // Ensure that both the status update and its acknowledgement are // correctly checkpointed. Result<slave::state::State> state = slave::state::recover(slave::paths::getMetaRootDir(flags.work_dir), true); ASSERT_SOME(state); ASSERT_SOME(state.get().slave); ASSERT_TRUE(state.get().slave.get().frameworks.contains(frameworkId.get())); slave::state::FrameworkState frameworkState = state.get().slave.get().frameworks.get(frameworkId.get()).get(); ASSERT_EQ(1u, frameworkState.executors.size()); slave::state::ExecutorState executorState = frameworkState.executors.begin()->second; ASSERT_EQ(1u, executorState.runs.size()); slave::state::RunState runState = executorState.runs.begin()->second; ASSERT_EQ(1u, runState.tasks.size()); slave::state::TaskState taskState = runState.tasks.begin()->second; EXPECT_EQ(1u, taskState.updates.size()); EXPECT_EQ(1u, taskState.acks.size()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_P(ExecutorHttpApiTest, StatusUpdateCallFailedValidation) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); // We send a Call::Update message with inconsistent executor id between // Call::executor_id and Call::Update::TaskInfo::executor_id. // This should result in failed validation. { Call call; call.set_type(Call::UPDATE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("call_level_executor_id"); v1::TaskStatus* status = call.mutable_update()->mutable_status(); status->mutable_executor_id()->set_value("update_level_executor_id"); status->set_state(mesos::v1::TaskState::TASK_STARTING); status->mutable_task_id()->set_value("dummy_task_id"); process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), APPLICATION_PROTOBUF); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); } // We send a Call Update message with a TASK_STAGING // status update. This should fail validation. { Call call; call.set_type(Call::UPDATE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("call_level_executor_id"); v1::TaskStatus* status = call.mutable_update()->mutable_status(); status->mutable_executor_id()->set_value("call_level_executor_id"); status->mutable_task_id()->set_value("dummy_task_id"); status->set_state(mesos::v1::TaskState::TASK_STAGING); process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> responseStatusUpdate = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), APPLICATION_PROTOBUF); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, responseStatusUpdate); } // We send a Call Update message with a different source than // SOURCE_EXECUTOR in the status update. This should fail validation. { Call call; call.set_type(Call::UPDATE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("call_level_executor_id"); v1::TaskStatus* status = call.mutable_update()->mutable_status(); status->mutable_executor_id()->set_value("call_level_executor_id"); status->mutable_task_id()->set_value("dummy_task_id"); status->set_state(mesos::v1::TaskState::TASK_STARTING); status->set_source(mesos::v1::TaskStatus::SOURCE_MASTER); process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> responseStatusUpdate = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), APPLICATION_PROTOBUF); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, responseStatusUpdate); } }
// This test verifies that if master receives a status update // for an already terminated task it forwards it without // changing the state of the task. TEST_F(StatusUpdateManagerTest, DuplicatedTerminalStatusUpdate) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave>> slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); // Verify the latest task status. Future<process::http::Response> tasks = process::http::get(master.get(), "tasks"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(process::http::OK().status, tasks); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", tasks); Try<JSON::Object> parse = JSON::parse<JSON::Object>(tasks.get().body); ASSERT_SOME(parse); Result<JSON::String> state = parse.get().find<JSON::String>("tasks[0].state"); ASSERT_SOME_EQ(JSON::String("TASK_FINISHED"), state); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
TEST_P(ExecutorHttpApiTest, ValidProtobufInvalidCall) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Future<Nothing> __recover = FUTURE_DISPATCH(_, &Slave::__recover); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); AWAIT_READY(__recover); // Wait for recovery to be complete. Clock::pause(); Clock::settle(); // We send a Call protobuf message with missing // required message per type. { Call call; call.set_type(Call::SUBSCRIBE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), APPLICATION_PROTOBUF); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); } { Call call; call.set_type(Call::UPDATE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), APPLICATION_PROTOBUF); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); } { Call call; call.set_type(Call::MESSAGE); call.mutable_framework_id()->set_value("dummy_framework_id"); call.mutable_executor_id()->set_value("dummy_executor_id"); process::http::Headers headers; headers["Accept"] = APPLICATION_JSON; Future<Response> response = process::http::post( slave.get()->pid, "api/v1/executor", headers, serialize(ContentType::PROTOBUF, call), APPLICATION_PROTOBUF); AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response); } }
// This test verifies that the slave and status update manager // properly handle duplicate terminal status updates, when the // second update is received after the ACK for the first update. // The proper behavior here is for the status update manager to // forward the duplicate update to the scheduler. TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateAfterAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); // Send a terminal update right away. EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); Future<Nothing> _statusUpdateAcknowledgement = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(status); EXPECT_EQ(TASK_FINISHED, status.get().state()); AWAIT_READY(_statusUpdateAcknowledgement); Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Future<Nothing> _statusUpdateAcknowledgement2 = FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement); Clock::pause(); // Now send a TASK_KILLED update for the same task. TaskStatus status2 = status.get(); status2.set_state(TASK_KILLED); execDriver->sendStatusUpdate(status2); // Ensure the scheduler receives TASK_KILLED. AWAIT_READY(update); EXPECT_EQ(TASK_KILLED, update.get().state()); // Ensure the slave properly handles the ACK. // Clock::settle() ensures that the slave successfully // executes Slave::_statusUpdateAcknowledgement(). AWAIT_READY(_statusUpdateAcknowledgement2); Clock::settle(); Clock::resume(); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that status update manager ignores // unexpected ACK for an earlier update when it is waiting // for an ACK for another update. We do this by dropping ACKs // for the original update and sending a random ACK to the slave. TEST_F(StatusUpdateManagerTest, IgnoreUnexpectedStatusUpdateAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), master.get(), _); // Drop the ACKs, so that status update manager // retries the update. DROP_CALLS(mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, master.get()); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); StatusUpdate update = statusUpdateMessage.get().update(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); Future<Nothing> unexpectedAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Now send an ACK with a random UUID. process::dispatch( slave.get(), &Slave::statusUpdateAcknowledgement, master.get(), update.slave_id(), frameworkId, update.status().task_id(), UUID::random().toBytes()); AWAIT_READY(unexpectedAck); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }
// This test verifies that the slave and status update manager // properly handle duplicate status updates, when the second // update with the same UUID is received before the ACK for the // first update. The proper behavior here is for the status update // manager to drop the duplicate update. TEST_F(StatusUpdateManagerTest, DuplicateUpdateBeforeAck) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); // Enable checkpointing. MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); FrameworkID frameworkId; EXPECT_CALL(sched, registered(_, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Capture the first status update message. Future<StatusUpdateMessage> statusUpdateMessage = FUTURE_PROTOBUF(StatusUpdateMessage(), _, _); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&status)); // Drop the first ACK from the scheduler to the slave. Future<StatusUpdateAcknowledgementMessage> statusUpdateAckMessage = DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(), _, slave.get()); Clock::pause(); driver.launchTasks(offers.get()[0].id(), createTasks(offers.get()[0])); AWAIT_READY(statusUpdateMessage); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); AWAIT_READY(statusUpdateAckMessage); Future<Nothing> __statusUpdate = FUTURE_DISPATCH(slave.get(), &Slave::__statusUpdate); // Now resend the TASK_RUNNING update. process::post(slave.get(), statusUpdateMessage.get()); // At this point the status update manager has handled // the duplicate status update. AWAIT_READY(__statusUpdate); // After we advance the clock, the status update manager should // retry the TASK_RUNNING update and the scheduler should receive // and acknowledge it. Future<TaskStatus> update; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&update)); Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN); Clock::settle(); // Ensure the scheduler receives TASK_FINISHED. AWAIT_READY(update); EXPECT_EQ(TASK_RUNNING, update.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); Clock::resume(); driver.stop(); driver.join(); Shutdown(); }