TEST_F(ResourceOffersTest, ResourcesGetReofferedWhenUnused) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)); driver1.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); vector<TaskInfo> tasks; // Use nothing! driver1.launchTasks(offers.get()[0].id(), tasks); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)); driver2.start(); AWAIT_READY(offers); // Stop first framework before second so no offers are sent. driver1.stop(); driver1.join(); driver2.stop(); driver2.join(); }
TEST_F(ResourceOffersTest, ResourcesGetReofferedAfterFrameworkStops) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)) .Times(1); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)); driver1.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); driver1.stop(); driver1.join(); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)) .Times(1); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)); driver2.start(); AWAIT_READY(offers); driver2.stop(); driver2.join(); Shutdown(); }
// This is a simple end to end test that makes sure a master using log // storage with ZooKeeper can successfully launch a task. TEST_F(RegistrarZooKeeperTest, TaskRunning) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers->size()); TaskInfo task = createTask(offers.get()[0], "dummy", DEFAULT_EXECUTOR_ID); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<Nothing> resourcesUpdated; EXPECT_CALL(containerizer, update(_, Resources(offers.get()[0].resources()))) .WillOnce(DoAll(FutureSatisfy(&resourcesUpdated), Return(Nothing()))); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status->state()); AWAIT_READY(resourcesUpdated); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); }
// This test ensures that the command executor does not send // TASK_KILLING to frameworks that do not support the capability. TEST_P_TEMP_DISABLED_ON_WINDOWS(CommandExecutorTest, NoTaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework without the task killing capability. MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // There should only be a TASK_KILLED update. Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
// This test verifies that when the scheduler calls stop() before // abort(), no pending acknowledgements are sent. TEST_F(MesosSchedulerDriverTest, DropAckIfStopCalledBeforeAbort) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave>> slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 16, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. // When an update is received, stop the driver and then abort it. Future<Nothing> statusUpdate; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(DoAll(StopAndAbort(), FutureSatisfy(&statusUpdate))); // Ensure no status update acknowledgements are sent from the driver // to the master. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.start(); AWAIT_READY(statusUpdate); // Settle the clock to ensure driver finishes processing the status // update and sends acknowledgement if necessary. In this test it // shouldn't send an acknowledgement. Clock::pause(); Clock::settle(); driver.stop(); driver.join(); Shutdown(); }
TEST_F(ResourceOffersTest, ResourcesGetReofferedAfterFrameworkStops) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)); driver1.start(); AWAIT_READY(offers); EXPECT_FALSE(offers->empty()); driver1.stop(); driver1.join(); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)); driver2.start(); AWAIT_READY(offers); driver2.stop(); driver2.join(); }
TEST_F_TEMP_DISABLED_ON_WINDOWS( ResourceOffersTest, ResourceOfferWithMultipleSlaves) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); vector<Owned<cluster::Slave>> slaves; // Start 10 slaves. for (int i = 0; i < 10; i++) { slave::Flags flags = CreateSlaveFlags(); flags.launcher = "posix"; flags.resources = Option<std::string>("cpus:2;mem:1024"); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); slaves.push_back(slave.get()); } MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // All 10 slaves might not be in first offer. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); EXPECT_GE(10u, offers->size()); Resources resources(offers.get()[0].resources()); EXPECT_EQ(2, resources.get<Value::Scalar>("cpus")->value()); EXPECT_EQ(1024, resources.get<Value::Scalar>("mem")->value()); driver.stop(); driver.join(); }
TEST_F(ResourceOffersTest, ResourceOfferWithMultipleSlaves) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); // Start 10 slaves. for (int i = 0; i < 10; i++) { slave::Flags flags = CreateSlaveFlags(); flags.resources = Option<std::string>("cpus:2;mem:1024"); Try<PID<Slave>> slave = StartSlave(flags); ASSERT_SOME(slave); } MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)) .Times(1); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // All 10 slaves might not be in first offer. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); EXPECT_GE(10u, offers.get().size()); Resources resources(offers.get()[0].resources()); EXPECT_EQ(2, resources.get<Value::Scalar>("cpus").get().value()); EXPECT_EQ(1024, resources.get<Value::Scalar>("mem").get().value()); driver.stop(); driver.join(); Shutdown(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send status updates, we send // a ShutdownMessage to the slave. Why? Because during a network // partition, the master will remove a partitioned slave, thus sending // its tasks to LOST. At this point, when the partition is removed, // the slave may attempt to send updates if it was unaware that the // master removed it. We've already notified frameworks that these // tasks were LOST, so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveStatusUpdates) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); SlaveID slaveId = slaveRegisteredMessage.get().slave_id(); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave sends an update. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); EXPECT_CALL(sched, offerRescinded(&driver, _)) .WillRepeatedly(Return()); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // At this point, the slave still thinks it's registered, so we // simulate a status update coming from the slave. TaskID taskId; taskId.set_value("task_id"); const StatusUpdate& update = protobuf::createStatusUpdate( frameworkId.get(), slaveId, taskId, TASK_RUNNING, TaskStatus::SOURCE_SLAVE, UUID::random()); StatusUpdateMessage message; message.mutable_update()->CopyFrom(update); message.set_pid(stringify(slave.get()->pid)); process::post(master.get()->pid, message); // The master should shutdown the slave upon receiving the update. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to re-register, we deny the // re-registration by sending a ShutdownMessage to the slave. // Why? Because during a network partition, the master will remove a // partitioned slave, thus sending its tasks to LOST. At this point, // when the partition is removed, the slave will attempt to // re-register with its running tasks. We've already notified // frameworks that these tasks were LOST, so we have to have the slave // slave shut down. TEST_F(PartitionTest, PartitionedSlaveReregistration) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This is to ensure the task is killed by the slave, // during shutdown. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&runningStatus)); Future<Nothing> statusUpdateAck = FUTURE_DISPATCH( slave.get()->pid, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(runningStatus); EXPECT_EQ(TASK_RUNNING, runningStatus.get().state()); // Wait for the slave to have handled the acknowledgment prior // to pausing the clock. AWAIT_READY(statusUpdateAck); // Drop the first shutdown message from the master (simulated // partition), allow the second shutdown message to pass when // the slave re-registers. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); Clock::resume(); // We now complete the partition on the slave side as well. This // is done by simulating a master loss event which would normally // occur during a network partition. detector.appoint(None()); Future<Nothing> shutdown; EXPECT_CALL(exec, shutdown(_)) .WillOnce(FutureSatisfy(&shutdown)); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Have the slave re-register with the master. detector.appoint(master.get()->pid); // Upon re-registration, the master will shutdown the slave. // The slave will then shut down the executor. AWAIT_READY(shutdownMessage); AWAIT_READY(shutdown); driver.stop(); driver.join(); }
// This is an end-to-end test that verfies that the slave returns the // correct ResourceUsage based on the currently running executors, and // the values get from the statistics endpoint are as expected. TEST_F(MonitorIntegrationTest, RunningExecutor) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_FALSE(offers.get().empty()); const Offer& offer = offers.get()[0]; // Launch a task and wait until it is in RUNNING status. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:32").get(), "sleep 1000"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Hit the statistics endpoint and expect the response contains the // resource statistics for the running container. UPID upid("monitor", process::address()); Future<http::Response> response = http::get(upid, "statistics"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); // Verify that the statistics in the response contains the proper // resource limits for the container. Try<JSON::Value> value = JSON::parse(response.get().body); ASSERT_SOME(value); Try<JSON::Value> expected = JSON::parse(strings::format( "[{" "\"statistics\":{" "\"cpus_limit\":%g," "\"mem_limit_bytes\":%lu" "}" "}]", 1 + slave::DEFAULT_EXECUTOR_CPUS, (Megabytes(32) + slave::DEFAULT_EXECUTOR_MEM).bytes()).get()); ASSERT_SOME(expected); EXPECT_TRUE(value.get().contains(expected.get())); driver.stop(); driver.join(); Shutdown(); }
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)) .WillOnce(FutureArg<1>(&killed)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offer.id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// This test ensures that the command executor sends TASK_KILLING // to frameworks that support the capability. TEST_F(CommandExecutorTest, TaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); Future<TaskStatus> statusKilling, statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
// This test verifies if the executor is able to receive a Subscribed // event in response to a Subscribe call request. TEST_P(ExecutorHttpApiTest, Subscribe) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); ExecutorID executorId = DEFAULT_EXECUTOR_ID; MockExecutor exec(executorId); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); Future<Message> registerExecutorMessage = DROP_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); TaskInfo taskInfo = createTask(offers.get()[0], "", executorId); driver.launchTasks(offers.get()[0].id(), {taskInfo}); // Drop the `RegisterExecutorMessage` and then send a `Subscribe` request // from the HTTP based executor. AWAIT_READY(registerExecutorMessage); Call call; call.mutable_framework_id()->CopyFrom(evolve(frameworkId.get())); call.mutable_executor_id()->CopyFrom(evolve(executorId)); call.set_type(Call::SUBSCRIBE); call.mutable_subscribe(); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); const string contentTypeString = stringify(contentType); process::http::Headers headers; headers["Accept"] = contentTypeString; Future<Response> response = process::http::streaming::post( slave.get()->pid, "api/v1/executor", headers, serialize(contentType, call), contentTypeString); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ("chunked", "Transfer-Encoding", response); AWAIT_EXPECT_RESPONSE_HEADER_EQ(contentTypeString, "Content-Type", response); ASSERT_EQ(Response::PIPE, response.get().type); Option<Pipe::Reader> reader = response.get().reader; ASSERT_SOME(reader); auto deserializer = lambda::bind(deserialize<Event>, contentType, lambda::_1); Reader<Event> responseDecoder( Decoder<Event>(deserializer), reader.get()); Future<Result<Event>> event = responseDecoder.read(); AWAIT_READY(event); ASSERT_SOME(event.get()); // Check event type is subscribed and if the ExecutorID matches. ASSERT_EQ(Event::SUBSCRIBED, event.get().get().type()); ASSERT_EQ(event.get().get().subscribed().executor_info().executor_id(), call.executor_id()); reader.get().close(); driver.stop(); driver.join(); }
// This test ensures that when explicit acknowledgements are enabled, // acknowledgements for master-generated updates are dropped by the // driver. We test this by creating an invalid task that uses no // resources. TEST_F(MesosSchedulerDriverTest, ExplicitAcknowledgementsMasterGeneratedUpdate) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. // Ensure no status update acknowledgements are sent to the master. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); // Launch a task using no resources. TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); ASSERT_EQ(TASK_ERROR, status.get().state()); ASSERT_EQ(TaskStatus::SOURCE_MASTER, status.get().source()); ASSERT_EQ(TaskStatus::REASON_TASK_INVALID, status.get().reason()); // Now send the acknowledgement. driver.acknowledgeStatusUpdate(status.get()); // Settle the clock to ensure driver processes the acknowledgement, // which should get dropped due to having come from the master. Clock::pause(); Clock::settle(); driver.stop(); driver.join(); Shutdown(); }
// This test ensures that the HTTP command executor can self terminate // after it gets the ACK for the terminal status update from agent. TEST_F_TEMP_DISABLED_ON_WINDOWS(HTTPCommandExecutorTest, TerminateWithACK) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = true; Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, false, &fetcher); CHECK_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); StandaloneMasterDetector detector(master.get()->pid); MockSlave slave(flags, &detector, containerizer.get()); spawn(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a short lived task. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1"); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); Future<Future<Option<ContainerTermination>>> termination; EXPECT_CALL(slave, executorTerminated(_, _, _)) .WillOnce(FutureArg<2>(&termination)); driver.launchTasks(offers->front().id(), {task}); // Scheduler should first receive TASK_RUNNING followed by TASK_FINISHED. AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); // The executor should self terminate with 0 as exit status once // it gets the ACK for the terminal status update from agent. AWAIT_READY(termination); ASSERT_TRUE(termination.get().isReady()); EXPECT_EQ(0, termination.get().get().get().status()); driver.stop(); driver.join(); terminate(slave); wait(slave); }
TEST_P(CpuIsolatorTest, ROOT_UserCpuUsage) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = GetParam(); Fetcher fetcher(flags); Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get()); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); // Max out a single core in userspace. This will run for at most one // second. TaskInfo task = createTask( offers.get()[0], "while true ; do true ; done & sleep 60"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *(containers->begin()); // Wait up to 1 second for the child process to induce 1/8 of a // second of user cpu time. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.cpus_user_time_secs() >= 0.125) { break; } os::sleep(Milliseconds(200)); waited += Milliseconds(200); } while (waited < Seconds(1)); EXPECT_LE(0.125, statistics.cpus_user_time_secs()); driver.stop(); driver.join(); }
// This test verifies that docker image default cmd is executed correctly. // This corresponds to the case in runtime isolator logic table: sh=0, // value=0, argv=1, entrypoint=0, cmd=1. TEST_F(DockerRuntimeIsolatorTest, ROOT_DockerDefaultCmdLocalPuller) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); const string directory = path::join(os::getcwd(), "archives"); Future<Nothing> testImage = DockerArchive::create(directory, "alpine", "null", "[\"sh\"]"); AWAIT_READY(testImage); ASSERT_TRUE(os::exists(path::join(directory, "alpine.tar"))); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "docker/runtime,filesystem/linux"; flags.image_providers = "docker"; flags.docker_registry = directory; // Make docker store directory as a temparary directory. Because the // manifest of the test image is changeable, the image cached on // previous tests should never be used. flags.docker_store_dir = path::join(os::getcwd(), "store"); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value(UUID::random().toString()); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(Resources::parse("cpus:1;mem:128").get()); task.mutable_command()->set_shell(false); task.mutable_command()->add_arguments("-c"); task.mutable_command()->add_arguments("echo 'hello world'"); Image image; image.set_type(Image::DOCKER); image.mutable_docker()->set_name("alpine"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); container->mutable_mesos()->mutable_image()->CopyFrom(image); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offer.id(), {task}); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(task.task_id(), statusFinished->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); driver.stop(); driver.join(); }
// This test verifies that docker image default entrypoint is executed // correctly using registry puller. This corresponds to the case in runtime // isolator logic table: sh=0, value=0, argv=1, entrypoint=1, cmd=0. TEST_F(DockerRuntimeIsolatorTest, ROOT_CURL_INTERNET_DockerDefaultEntryptRegistryPuller) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.isolation = "docker/runtime,filesystem/linux"; flags.image_providers = "docker"; flags.docker_store_dir = path::join(os::getcwd(), "store"); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_EQ(1u, offers->size()); const Offer& offer = offers.get()[0]; TaskInfo task; task.set_name("test-task"); task.mutable_task_id()->set_value(UUID::random().toString()); task.mutable_slave_id()->CopyFrom(offer.slave_id()); task.mutable_resources()->CopyFrom(Resources::parse("cpus:1;mem:128").get()); task.mutable_command()->set_shell(false); task.mutable_command()->add_arguments("hello world"); Image image; image.set_type(Image::DOCKER); // 'mesosphere/inky' image is used in docker containerizer test, which // contains entrypoint as 'echo' and cmd as null. image.mutable_docker()->set_name("mesosphere/inky"); ContainerInfo* container = task.mutable_container(); container->set_type(ContainerInfo::MESOS); container->mutable_mesos()->mutable_image()->CopyFrom(image); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.launchTasks(offer.id(), {task}); AWAIT_READY_FOR(statusRunning, Seconds(60)); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(task.task_id(), statusFinished->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); driver.stop(); driver.join(); }
// Test that memory pressure listening is restarted after recovery. TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_SlaveRecovery) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.slave_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> containerizer1 = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(containerizer1); Try<PID<Slave>> slave = StartSlave(containerizer1.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_RUNNING, status.get().state()); // We restart the slave to let it recover. Stop(slave.get()); delete containerizer1.get(); // Set up so we can wait until the new slave updates the container's // resources (this occurs after the executor has re-registered). Future<Nothing> update = FUTURE_DISPATCH(_, &MesosContainerizerProcess::update); // Use the same flags. Try<MesosContainerizer*> containerizer2 = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(containerizer2); slave = StartSlave(containerizer2.get(), flags); ASSERT_SOME(slave); // Wait until the containerizer is updated. AWAIT_READY(update); Future<hashset<ContainerID>> containers = containerizer2.get()->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer2.get()->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Stop the memory-hammering task. driver.killTask(task.task_id()); // Process any queued up events through before proceeding. process::Clock::pause(); process::Clock::settle(); process::Clock::resume(); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer2.get()->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); driver.stop(); driver.join(); Shutdown(); delete containerizer2.get(); }
// Test that memory pressure listening is restarted after recovery. TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_SlaveRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; Fetcher fetcher(flags); Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; // Enable checkpointing for the framework. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> starting; Future<TaskStatus> running; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&starting)) .WillOnce(FutureArg<1>(&running)) .WillRepeatedly(Return()); // Ignore subsequent updates. Future<Nothing> runningAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); Future<Nothing> startingAck = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(starting); EXPECT_EQ(task.task_id(), starting->task_id()); EXPECT_EQ(TASK_STARTING, starting->state()); AWAIT_READY(startingAck); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running->task_id()); EXPECT_EQ(TASK_RUNNING, running->state()); // Wait for the ACK to be checkpointed. AWAIT_READY_FOR(runningAck, Seconds(120)); // We restart the slave to let it recover. slave.get()->terminate(); // Set up so we can wait until the new slave updates the container's // resources (this occurs after the executor has re-registered). Future<Nothing> update = FUTURE_DISPATCH(_, &MesosContainerizerProcess::update); // Use the same flags. _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); Future<SlaveReregisteredMessage> reregistered = FUTURE_PROTOBUF(SlaveReregisteredMessage(), master.get()->pid, _); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); AWAIT_READY(reregistered); // Wait until the containerizer is updated. AWAIT_READY(update); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *(containers->begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage->mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&killed)); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage->mem_low_pressure_counter(), usage->mem_medium_pressure_counter()); EXPECT_GE(usage->mem_medium_pressure_counter(), usage->mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// The purpose of this test is to ensure that when slaves are removed // from the master, and then attempt to send exited executor messages, // we send a ShutdownMessage to the slave. Why? Because during a // network partition, the master will remove a partitioned slave, thus // sending its tasks to LOST. At this point, when the partition is // removed, the slave may attempt to send exited executor messages if // it was unaware that the master removed it. We've already // notified frameworks that the tasks under the executors were LOST, // so we have to have the slave shut down. TEST_F(PartitionTest, PartitionedSlaveExitedExecutor) { master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); // Allow the master to PING the slave, but drop all PONG messages // from the slave. Note that we don't match on the master / slave // PIDs because it's actually the SlaveObserver Process that sends // the pings. Future<Message> ping = FUTURE_MESSAGE( Eq(PingSlaveMessage().GetTypeName()), _, _); DROP_PROTOBUFS(PongSlaveMessage(), _, _); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId));\ Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_NE(0u, offers.get().size()); // Launch a task. This allows us to have the slave send an // ExitedExecutorMessage. TaskID taskId; taskId.set_value("1"); TaskInfo task; task.set_name(""); task.mutable_task_id()->MergeFrom(taskId); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_resources()->MergeFrom(offers.get()[0].resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); task.mutable_executor()->mutable_command()->set_value("sleep 60"); // Set up the expectations for launching the task. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); // Drop all the status updates from the slave, so that we can // ensure the ExitedExecutorMessage is what triggers the slave // shutdown. DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get()->pid); driver.launchTasks(offers.get()[0].id(), {task}); // Drop the first shutdown message from the master (simulated // partition) and allow the second shutdown message to pass when // triggered by the ExitedExecutorMessage. Future<ShutdownMessage> shutdownMessage = DROP_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); Future<TaskStatus> lostStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&lostStatus)); Future<Nothing> slaveLost; EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); Clock::pause(); // Now, induce a partition of the slave by having the master // timeout the slave. size_t pings = 0; while (true) { AWAIT_READY(ping); pings++; if (pings == masterFlags.max_slave_ping_timeouts) { break; } ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _); Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); } Clock::advance(masterFlags.slave_ping_timeout); Clock::settle(); // The master will have notified the framework of the lost task. AWAIT_READY(lostStatus); EXPECT_EQ(TASK_LOST, lostStatus.get().state()); // Wait for the master to attempt to shut down the slave. AWAIT_READY(shutdownMessage); // The master will notify the framework that the slave was lost. AWAIT_READY(slaveLost); shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get()->pid); // Induce an ExitedExecutorMessage from the slave. containerizer.destroy( frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id()); // Upon receiving the message, the master will shutdown the slave. AWAIT_READY(shutdownMessage); Clock::resume(); driver.stop(); driver.join(); }
// This test does not set any Accept header for the subscribe call. // The default response media type should be "application/json" in // this case. TEST_P(ExecutorHttpApiTest, NoAcceptHeader) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); ExecutorID executorId = DEFAULT_EXECUTOR_ID; MockExecutor exec(executorId); TestContainerizer containerizer(&exec); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)); Future<Nothing> statusUpdate; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureSatisfy(&statusUpdate)); driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_EQ(1u, offers.get().size()); EXPECT_CALL(exec, registered(_, _, _, _)) .Times(1); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); TaskInfo taskInfo = createTask(offers.get()[0], "", executorId); driver.launchTasks(offers.get()[0].id(), {taskInfo}); // Wait until status update is received on the scheduler before sending // an executor subscribe request. AWAIT_READY(statusUpdate); // Only subscribe needs to 'Accept' JSON or protobuf. Call call; call.mutable_framework_id()->CopyFrom(evolve(frameworkId.get())); call.mutable_executor_id()->CopyFrom(evolve(executorId)); call.set_type(Call::SUBSCRIBE); call.mutable_subscribe(); // Retrieve the parameter passed as content type to this test. const ContentType contentType = GetParam(); // No 'Accept' header leads to all media types considered // acceptable. JSON will be chosen by default. process::http::Headers headers; Future<Response> response = process::http::streaming::post( slave.get()->pid, "api/v1/executor", headers, serialize(contentType, call), stringify(contentType)); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response); driver.stop(); driver.join(); }
// Ensures that when a scheduler enables explicit acknowledgements // on the driver, there are no implicit acknowledgements sent, and // the call to 'acknowledgeStatusUpdate' sends the ack to the master. TEST_F(MesosSchedulerDriverTest, ExplicitAcknowledgements) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); Try<PID<Slave>> slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 16, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // Ensure no status update acknowledgements are sent from the driver // to the master until the explicit acknowledgement is sent. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()); EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.start(); AWAIT_READY(status); // Settle the clock to ensure driver finishes processing the status // update, we want to ensure that no implicit acknowledgement gets // sent. Clock::pause(); Clock::settle(); // Now send the acknowledgement. Future<mesos::scheduler::Call> acknowledgement = FUTURE_CALL( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, master.get()); driver.acknowledgeStatusUpdate(status.get()); AWAIT_READY(acknowledgement); driver.stop(); driver.join(); Shutdown(); }
// This test ensures that a task will transition straight from `TASK_KILLING` to // `TASK_KILLED`, even if the health check begins to fail during the kill policy // grace period. // // TODO(gkleiman): this test takes about 7 seconds to run, consider using mock // tasks and health checkers to speed it up. TEST_P(CommandExecutorTest, NoTransitionFromKillingToRunning) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); const string command = strings::format( "%s %s --sleep_duration=15", getTestHelperPath("test-helper"), KillPolicyTestHelper::NAME).get(); TaskInfo task = createTask(offers->front(), command); // Create a health check that succeeds until a temporary file is removed. Try<string> temporaryPath = os::mktemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(temporaryPath); const string tmpPath = temporaryPath.get(); HealthCheck healthCheck; healthCheck.set_type(HealthCheck::COMMAND); healthCheck.mutable_command()->set_value("ls " + tmpPath + " >/dev/null"); healthCheck.set_delay_seconds(0); healthCheck.set_grace_period_seconds(0); healthCheck.set_interval_seconds(0); task.mutable_health_check()->CopyFrom(healthCheck); // Set the kill policy grace period to 5 seconds. KillPolicy killPolicy; killPolicy.mutable_grace_period()->set_nanoseconds(Seconds(5).ns()); task.mutable_kill_policy()->CopyFrom(killPolicy); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusHealthy; Future<TaskStatus> statusKilling; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusHealthy)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.launchTasks(offers->front().id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusHealthy); EXPECT_EQ(TASK_RUNNING, statusHealthy.get().state()); EXPECT_TRUE(statusHealthy.get().has_healthy()); EXPECT_TRUE(statusHealthy.get().healthy()); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); EXPECT_FALSE(statusKilling.get().has_healthy()); // Remove the temporary file, so that the health check fails. os::rm(tmpPath); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); EXPECT_FALSE(statusKilled.get().has_healthy()); driver.stop(); driver.join(); }
TEST_F(ResourceOffersTest, ResourcesGetReofferedAfterTaskInfoError) { Try<PID<Master>> master = StartMaster(); ASSERT_SOME(master); Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)) .Times(1); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver1.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(-1); Resource* mem = task.add_resources(); mem->set_name("mem"); mem->set_type(Value::SCALAR); mem->mutable_scalar()->set_value(Gigabytes(1).bytes()); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched1, statusUpdate(&driver1, _)) .WillOnce(FutureArg<1>(&status)); driver1.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status.get().task_id()); EXPECT_EQ(TASK_ERROR, status.get().state()); EXPECT_EQ(TaskStatus::REASON_TASK_INVALID, status.get().reason()); EXPECT_TRUE(status.get().has_message()); EXPECT_TRUE(strings::startsWith( status.get().message(), "Task uses invalid resources")); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)) .Times(1); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver2.start(); AWAIT_READY(offers); driver1.stop(); driver1.join(); driver2.stop(); driver2.join(); Shutdown(); }
// This test ensures that driver based schedulers using explicit // acknowledgements can acknowledge status updates sent from // HTTP based executors. TEST_F_TEMP_DISABLED_ON_WINDOWS( HTTPCommandExecutorTest, ExplicitAcknowledgements) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = true; Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); // Ensure no status update acknowledgements are sent from the driver // to the master until the explicit acknowledgement is sent. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()->pid); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_TRUE(statusRunning->has_slave_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Now send the acknowledgement. Future<mesos::scheduler::Call> acknowledgement = FUTURE_CALL( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, master.get()->pid); driver.acknowledgeStatusUpdate(statusRunning.get()); AWAIT_READY(acknowledgement); driver.stop(); driver.join(); }
TEST_F(ResourceOffersTest, ResourcesGetReofferedAfterTaskInfoError) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); MockScheduler sched1; MesosSchedulerDriver driver1( &sched1, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched1, registered(&driver1, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched1, resourceOffers(&driver1, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver1.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offers.get()[0].slave_id()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); Resource* cpus = task.add_resources(); cpus->set_name("cpus"); cpus->set_type(Value::SCALAR); cpus->mutable_scalar()->set_value(-1); Resource* mem = task.add_resources(); mem->set_name("mem"); mem->set_type(Value::SCALAR); mem->mutable_scalar()->set_value(static_cast<double>(Gigabytes(1).bytes())); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> status; EXPECT_CALL(sched1, statusUpdate(&driver1, _)) .WillOnce(FutureArg<1>(&status)); driver1.launchTasks(offers.get()[0].id(), tasks); AWAIT_READY(status); EXPECT_EQ(task.task_id(), status->task_id()); EXPECT_EQ(TASK_ERROR, status->state()); EXPECT_EQ(TaskStatus::REASON_TASK_INVALID, status->reason()); EXPECT_TRUE(status->has_message()); EXPECT_TRUE(strings::contains(status->message(), "Invalid scalar resource")) << status->message(); MockScheduler sched2; MesosSchedulerDriver driver2( &sched2, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched2, registered(&driver2, _, _)); EXPECT_CALL(sched2, resourceOffers(&driver2, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver2.start(); AWAIT_READY(offers); driver1.stop(); driver1.join(); driver2.stop(); driver2.join(); }
// This test has been temporarily disabled due to MESOS-1257. TEST_F(ExternalContainerizerTest, DISABLED_Launch) { Try<PID<Master> > master = this->StartMaster(); ASSERT_SOME(master); Flags testFlags; slave::Flags flags = this->CreateSlaveFlags(); flags.isolation = "external"; flags.containerizer_path = testFlags.build_dir + "/src/examples/python/test-containerizer"; MockExternalContainerizer containerizer(flags); Try<PID<Slave> > slave = this->StartSlave(&containerizer, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); TaskInfo task; task.set_name("isolator_test"); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->CopyFrom(offers.get()[0].slave_id()); task.mutable_resources()->CopyFrom(offers.get()[0].resources()); Resources resources(offers.get()[0].resources()); Option<Bytes> mem = resources.mem(); ASSERT_SOME(mem); Option<double> cpus = resources.cpus(); ASSERT_SOME(cpus); const std::string& file = path::join(flags.work_dir, "ready"); // This task induces user/system load in a child process by // running top in a child process for ten seconds. task.mutable_command()->set_value( #ifdef __APPLE__ // Use logging mode with 30,000 samples with no interval. "top -l 30000 -s 0 2>&1 > /dev/null & " #else // Batch mode, with 30,000 samples with no interval. "top -b -d 0 -n 30000 2>&1 > /dev/null & " #endif "touch " + file + "; " // Signals that the top command is running. "sleep 60"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)) .WillRepeatedly(Return()); // Ignore rest for now. Future<ContainerID> containerId; EXPECT_CALL(containerizer, launch(_, _, _, _, _, _, _, _)) .WillOnce(DoAll(FutureArg<0>(&containerId), Invoke(&containerizer, &MockExternalContainerizer::_launch))); driver.launchTasks(offers.get()[0].id(), {task}); AWAIT_READY(containerId); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Wait for the task to begin inducing cpu time. while (!os::exists(file)); ExecutorID executorId; executorId.set_value(task.task_id().value()); // We'll wait up to 10 seconds for the child process to induce // 1/8 of a second of user and system cpu time in total. // TODO(bmahler): Also induce rss memory consumption, by re-using // the balloon framework. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer.usage(containerId.get()); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! // NOTE: We are currently getting dummy-data from the test- // containerizer python script matching these expectations. // TODO(tillt): Consider working with real data. if (statistics.cpus_user_time_secs() >= 0.120 && statistics.cpus_system_time_secs() >= 0.05 && statistics.mem_rss_bytes() >= 1024u) { break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(10)); EXPECT_GE(statistics.cpus_user_time_secs(), 0.120); EXPECT_GE(statistics.cpus_system_time_secs(), 0.05); EXPECT_EQ(statistics.cpus_limit(), cpus.get()); EXPECT_GE(statistics.mem_rss_bytes(), 1024u); EXPECT_EQ(statistics.mem_limit_bytes(), mem.get().bytes()); EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); driver.killTask(task.task_id()); AWAIT_READY(status); EXPECT_EQ(TASK_KILLED, status.get().state()); driver.stop(); driver.join(); this->Shutdown(); }
// This test verifies that a re-registering slave sends the terminal // unacknowledged tasks for a terminal executor. This is required // for the master to correctly reconcile its view with the slave's // view of tasks. This test drops a terminal update to the master // and then forces the slave to re-register. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTerminatedExecutor) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); TestContainerizer containerizer(&exec); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, &containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "*")) .WillRepeatedly(Return()); // Ignore subsequent offers. ExecutorDriver* execDriver; EXPECT_CALL(exec, registered(_, _, _, _)) .WillOnce(SaveArg<0>(&execDriver)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Future<StatusUpdateAcknowledgementMessage> statusUpdateAcknowledgementMessage = FUTURE_PROTOBUF( StatusUpdateAcknowledgementMessage(), master.get()->pid, slave.get()->pid); driver.start(); AWAIT_READY(status); EXPECT_EQ(TASK_RUNNING, status.get().state()); // Make sure the acknowledgement reaches the slave. AWAIT_READY(statusUpdateAcknowledgementMessage); // Drop the TASK_FINISHED status update sent to the master. Future<StatusUpdateMessage> statusUpdateMessage = DROP_PROTOBUF(StatusUpdateMessage(), _, master.get()->pid); Future<ExitedExecutorMessage> executorExitedMessage = FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _); TaskStatus finishedStatus; finishedStatus = status.get(); finishedStatus.set_state(TASK_FINISHED); execDriver->sendStatusUpdate(finishedStatus); // Ensure the update was sent. AWAIT_READY(statusUpdateMessage); EXPECT_CALL(sched, executorLost(&driver, DEFAULT_EXECUTOR_ID, _, _)); // Now kill the executor. containerizer.destroy(frameworkId.get(), DEFAULT_EXECUTOR_ID); Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status2)); // We drop the 'UpdateFrameworkMessage' from the master to slave to // stop the status update manager from retrying the update that was // already sent due to the new master detection. DROP_PROTOBUFS(UpdateFrameworkMessage(), _, _); detector.appoint(master.get()->pid); AWAIT_READY(status2); EXPECT_EQ(TASK_FINISHED, status2.get().state()); driver.stop(); driver.join(); }