// This test ensures that the command executor does not send // TASK_KILLING to frameworks that do not support the capability. TEST_P_TEMP_DISABLED_ON_WINDOWS(CommandExecutorTest, NoTaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework without the task killing capability. MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // There should only be a TASK_KILLED update. Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
// This test ensures that driver based schedulers using explicit // acknowledgements can acknowledge status updates sent from // HTTP based executors. TEST_F_TEMP_DISABLED_ON_WINDOWS( HTTPCommandExecutorTest, ExplicitAcknowledgements) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = true; Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, false, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); // Ensure no status update acknowledgements are sent from the driver // to the master until the explicit acknowledgement is sent. EXPECT_NO_FUTURE_CALLS( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _ , master.get()->pid); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_TRUE(statusRunning->has_slave_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Now send the acknowledgement. Future<mesos::scheduler::Call> acknowledgement = FUTURE_CALL( mesos::scheduler::Call(), mesos::scheduler::Call::ACKNOWLEDGE, _, master.get()->pid); driver.acknowledgeStatusUpdate(statusRunning.get()); AWAIT_READY(acknowledgement); driver.stop(); driver.join(); }
// This test ensures that the HTTP command executor can self terminate // after it gets the ACK for the terminal status update from agent. TEST_F_TEMP_DISABLED_ON_WINDOWS(HTTPCommandExecutorTest, TerminateWithACK) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = true; Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, false, &fetcher); CHECK_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); StandaloneMasterDetector detector(master.get()->pid); MockSlave slave(flags, &detector, containerizer.get()); spawn(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a short lived task. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1"); Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); Future<Future<Option<ContainerTermination>>> termination; EXPECT_CALL(slave, executorTerminated(_, _, _)) .WillOnce(FutureArg<2>(&termination)); driver.launchTasks(offers->front().id(), {task}); // Scheduler should first receive TASK_RUNNING followed by TASK_FINISHED. AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); // The executor should self terminate with 0 as exit status once // it gets the ACK for the terminal status update from agent. AWAIT_READY(termination); ASSERT_TRUE(termination.get().isReady()); EXPECT_EQ(0, termination.get().get().get().status()); driver.stop(); driver.join(); terminate(slave); wait(slave); }
// This test ensures that a task will transition straight from `TASK_KILLING` to // `TASK_KILLED`, even if the health check begins to fail during the kill policy // grace period. // // TODO(gkleiman): this test takes about 7 seconds to run, consider using mock // tasks and health checkers to speed it up. TEST_P(CommandExecutorTest, NoTransitionFromKillingToRunning) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); flags.http_command_executor = GetParam(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); const string command = strings::format( "%s %s --sleep_duration=15", getTestHelperPath("test-helper"), KillPolicyTestHelper::NAME).get(); TaskInfo task = createTask(offers->front(), command); // Create a health check that succeeds until a temporary file is removed. Try<string> temporaryPath = os::mktemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(temporaryPath); const string tmpPath = temporaryPath.get(); HealthCheck healthCheck; healthCheck.set_type(HealthCheck::COMMAND); healthCheck.mutable_command()->set_value("ls " + tmpPath + " >/dev/null"); healthCheck.set_delay_seconds(0); healthCheck.set_grace_period_seconds(0); healthCheck.set_interval_seconds(0); task.mutable_health_check()->CopyFrom(healthCheck); // Set the kill policy grace period to 5 seconds. KillPolicy killPolicy; killPolicy.mutable_grace_period()->set_nanoseconds(Seconds(5).ns()); task.mutable_kill_policy()->CopyFrom(killPolicy); vector<TaskInfo> tasks; tasks.push_back(task); Future<TaskStatus> statusRunning; Future<TaskStatus> statusHealthy; Future<TaskStatus> statusKilling; Future<TaskStatus> statusKilled; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusHealthy)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.launchTasks(offers->front().id(), tasks); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning.get().state()); AWAIT_READY(statusHealthy); EXPECT_EQ(TASK_RUNNING, statusHealthy.get().state()); EXPECT_TRUE(statusHealthy.get().has_healthy()); EXPECT_TRUE(statusHealthy.get().healthy()); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); EXPECT_FALSE(statusKilling.get().has_healthy()); // Remove the temporary file, so that the health check fails. os::rm(tmpPath); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); EXPECT_FALSE(statusKilled.get().has_healthy()); driver.stop(); driver.join(); }
// This test ensures that the command executor sends TASK_KILLING // to frameworks that support the capability. TEST_F(CommandExecutorTest, TaskKillingCapability) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get()); ASSERT_SOME(slave); // Start the framework with the task killing capability. FrameworkInfo::Capability capability; capability.set_type(FrameworkInfo::Capability::TASK_KILLING_STATE); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.add_capabilities()->CopyFrom(capability); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_EQ(1u, offers->size()); // Launch a task with the command executor. TaskInfo task = createTask( offers->front().slave_id(), offers->front().resources(), "sleep 1000"); Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusRunning)); driver.launchTasks(offers->front().id(), {task}); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); Future<TaskStatus> statusKilling, statusKilled; EXPECT_CALL(sched, statusUpdate(_, _)) .WillOnce(FutureArg<1>(&statusKilling)) .WillOnce(FutureArg<1>(&statusKilled)); driver.killTask(task.task_id()); AWAIT_READY(statusKilling); EXPECT_EQ(TASK_KILLING, statusKilling->state()); AWAIT_READY(statusKilled); EXPECT_EQ(TASK_KILLED, statusKilled->state()); driver.stop(); driver.join(); }
// This test verifies that when reregistering, the slave sends the // executor ID of a non-command executor task, but not the one of a // command executor task. We then check that the master's API has // task IDs absent only for the command executor case. // // This was motivated by MESOS-8135. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTaskExecutorIds) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers->size()); const Offer& offer = offers->front(); const SlaveID& slaveId = offer.slave_id(); Resources resources = Resources::parse(defaultTaskResourcesString).get(); TaskInfo commandExecutorTask = createTask(slaveId, resources, SLEEP_COMMAND(1000)); TaskInfo defaultExecutorTask = createTask(slaveId, resources, SLEEP_COMMAND(1000)); ExecutorInfo defaultExecutorInfo; defaultExecutorInfo.set_type(ExecutorInfo::DEFAULT); defaultExecutorInfo.mutable_executor_id()->CopyFrom(DEFAULT_EXECUTOR_ID); defaultExecutorInfo.mutable_framework_id()->CopyFrom(frameworkId.get()); defaultExecutorInfo.mutable_resources()->CopyFrom(resources); // We expect two TASK_STARTING and two TASK_RUNNING updates. vector<Future<TaskStatus>> taskStatuses(4); { // This variable doesn't have to be used explicitly. testing::InSequence inSequence; foreach (Future<TaskStatus>& taskStatus, taskStatuses) { EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&taskStatus)); } EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillRepeatedly(Return()); // Ignore subsequent updates. }