// This test verifies the slave will destroy a container if, when // receiving a terminal status task update, updating the container's // resources fails. TEST_F(SlaveTest, TerminalTaskContainerizerUpdateFails) { // Start a master. Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); EXPECT_CALL(exec, registered(_, _, _, _)); TestContainerizer containerizer(&exec); // Start a slave. Try<PID<Slave> > slave = StartSlave(&containerizer); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Start two tasks. vector<TaskInfo> tasks; tasks.push_back(createTask( offer.slave_id(), Resources::parse("cpus:0.1;mem:32").get(), "sleep 1000", exec.id)); tasks.push_back(createTask( offer.slave_id(), Resources::parse("cpus:0.1;mem:32").get(), "sleep 1000", exec.id)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); Future<TaskStatus> status1, status2, status3, status4; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status1)) .WillOnce(FutureArg<1>(&status2)) .WillOnce(FutureArg<1>(&status3)) .WillOnce(FutureArg<1>(&status4)); driver.launchTasks(offer.id(), tasks); AWAIT_READY(status1); EXPECT_EQ(TASK_RUNNING, status1.get().state()); AWAIT_READY(status2); EXPECT_EQ(TASK_RUNNING, status2.get().state()); // Set up the containerizer so the next update() will fail. EXPECT_CALL(containerizer, update(_, _)) .WillOnce(Return(process::Failure("update() failed"))) .WillRepeatedly(Return(Nothing())); EXPECT_CALL(exec, killTask(_, _)) .WillOnce(SendStatusUpdateFromTaskID(TASK_KILLED)); // Kill one of the tasks. The failed update should result in the // second task going lost when the container is destroyed. driver.killTask(tasks[0].task_id()); AWAIT_READY(status3); EXPECT_EQ(TASK_KILLED, status3.get().state()); AWAIT_READY(status4); EXPECT_EQ(TASK_LOST, status4.get().state()); driver.stop(); driver.join(); Shutdown(); }
// In this test, the agent initially doesn't enable disk isolation // but then restarts with XFS disk isolation enabled. We verify that // the old container launched before the agent restart is // successfully recovered. TEST_F(ROOT_XFS_QuotaTest, RecoverOldContainers) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); slave::Flags flags = CreateSlaveFlags(); // `CreateSlaveFlags()` enables `disk/xfs` so here we reset // `isolation` to empty. flags.isolation.clear(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1024 count=1; sleep 1000"); Future<TaskStatus> startingStatus; Future<TaskStatus> runningstatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningstatus)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(runningstatus); EXPECT_EQ(task.task_id(), runningstatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningstatus->state()); { Future<ResourceUsage> usage = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage); // We should have 1 executor using resources but it doesn't have // disk limit enabled. ASSERT_EQ(1, usage->executors().size()); const ResourceUsage_Executor& executor = usage->executors().Get(0); ASSERT_TRUE(executor.has_statistics()); ASSERT_FALSE(executor.statistics().has_disk_limit_bytes()); } // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); // This time use the agent flags that include XFS disk isolation. slave = StartSlave(detector.get(), CreateSlaveFlags()); ASSERT_SOME(slave); // Wait for the slave to re-register. AWAIT_READY(slaveReregisteredMessage); { Future<ResourceUsage> usage = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage); // We should still have 1 executor using resources but it doesn't // have disk limit enabled. ASSERT_EQ(1, usage->executors().size()); const ResourceUsage_Executor& executor = usage->executors().Get(0); ASSERT_TRUE(executor.has_statistics()); ASSERT_FALSE(executor.statistics().has_disk_limit_bytes()); } driver.stop(); driver.join(); }
TEST_F(MemoryPressureMesosTest, CGROUPS_ROOT_Statistics) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); // We only care about memory cgroup for this test. flags.isolation = "cgroups/mem"; flags.agent_subsystems = None(); Fetcher fetcher; Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; // Run a task that triggers memory pressure event. We request 1G // disk because we are going to write a 512 MB file repeatedly. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:256;disk:1024").get(), "while true; do dd count=512 bs=1M if=/dev/zero of=./temp; done"); Future<TaskStatus> running; Future<TaskStatus> killed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&running)) .WillOnce(FutureArg<1>(&killed)) .WillRepeatedly(Return()); // Ignore subsequent updates. driver.launchTasks(offer.id(), {task}); AWAIT_READY(running); EXPECT_EQ(task.task_id(), running.get().task_id()); EXPECT_EQ(TASK_RUNNING, running.get().state()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers.get().size()); ContainerID containerId = *(containers.get().begin()); // Wait a while for some memory pressure events to occur. Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); if (usage.get().mem_low_pressure_counter() > 0) { // We will check the correctness of the memory pressure counters // later, because the memory-hammering task is still active // and potentially incrementing these counters. break; } os::sleep(Milliseconds(100)); waited += Milliseconds(100); } while (waited < Seconds(5)); EXPECT_LE(waited, Seconds(5)); // Pause the clock to ensure that the reaper doesn't reap the exited // command executor and inform the containerizer/slave. Clock::pause(); Clock::settle(); // Stop the memory-hammering task. driver.killTask(task.task_id()); AWAIT_READY_FOR(killed, Seconds(120)); EXPECT_EQ(task.task_id(), killed->task_id()); EXPECT_EQ(TASK_KILLED, killed->state()); // Now check the correctness of the memory pressure counters. Future<ResourceStatistics> usage = containerizer->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_low_pressure_counter(), usage.get().mem_medium_pressure_counter()); EXPECT_GE(usage.get().mem_medium_pressure_counter(), usage.get().mem_critical_pressure_counter()); Clock::resume(); driver.stop(); driver.join(); }
// In this test, the framework is not checkpointed. This ensures that when we // stop the slave, the executor is killed and we will need to recover the // working directories without getting any checkpointed recovery state. TEST_F(ROOT_XFS_QuotaTest, NoCheckpointRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); Fetcher fetcher(flags); Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000"); Future<TaskStatus> runningStatus; Future<TaskStatus> startingStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningStatus)) .WillOnce(Return()); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(runningStatus); EXPECT_EQ(task.task_id(), runningStatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningStatus->state()); Future<ResourceUsage> usage1 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage1); // We should have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *containers->begin(); // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); // Wait until slave recovery is complete. Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); AWAIT_READY_FOR(_recover, Seconds(60)); // Wait until the orphan containers are cleaned up. AWAIT_READY_FOR(containerizer.get()->wait(containerId), Seconds(60)); AWAIT_READY(slaveReregisteredMessage); Future<ResourceUsage> usage2 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage2); // We should have no executors left because we didn't checkpoint. ASSERT_TRUE(usage2->executors().empty()); Try<std::list<string>> sandboxes = os::glob(path::join( slave::paths::getSandboxRootDir(mountPoint.get()), "*", "frameworks", "*", "executors", "*", "runs", "*")); ASSERT_SOME(sandboxes); // One sandbox and one symlink. ASSERT_EQ(2u, sandboxes->size()); // Scan the remaining sandboxes and make sure that no projects are assigned. foreach (const string& sandbox, sandboxes.get()) { // Skip the "latest" symlink. if (os::stat::islink(sandbox)) { continue; } EXPECT_NONE(xfs::getProjectId(sandbox)); } driver.stop(); driver.join(); }
// In this test, the framework is checkpointed so we expect the executor to // persist across the slave restart and to have the same resource usage before // and after. TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery) { slave::Flags flags = CreateSlaveFlags(); Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), CreateSlaveFlags()); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000"); Future<TaskStatus> startingStatus; Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningStatus)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), runningStatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningStatus->state()); Future<ResourceUsage> usage1 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage1); // We should have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Wait for the slave to re-register. AWAIT_READY(slaveReregisteredMessage); Future<ResourceUsage> usage2 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage2); // We should have still have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); Try<std::list<string>> sandboxes = os::glob(path::join( slave::paths::getSandboxRootDir(mountPoint.get()), "*", "frameworks", "*", "executors", "*", "runs", "*")); ASSERT_SOME(sandboxes); // One sandbox and one symlink. ASSERT_EQ(2u, sandboxes->size()); // Scan the remaining sandboxes. We ought to still have project IDs // assigned to them all. foreach (const string& sandbox, sandboxes.get()) { // Skip the "latest" symlink. if (os::stat::islink(sandbox)) { continue; } EXPECT_SOME(xfs::getProjectId(sandbox)); } driver.stop(); driver.join(); }
// This test verifies that persistent volumes are unmounted properly // after a checkpointed framework disappears and the slave restarts. // // TODO(jieyu): Even though the command task specifies a new // filesystem root, the executor (command executor) itself does not // change filesystem root (uses the host filesystem). We need to add a // test to test the scenario that the executor itself changes rootfs. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_RecoverOrphanedPersistentVolume) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:1024;disk(role1):1024"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Fetcher fetcher(flags); Try<MesosContainerizer*> create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); Owned<Containerizer> containerizer(create.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; string dir1 = path::join(sandbox.get(), "dir1"); ASSERT_SOME(os::mkdir(dir1)); Resource persistentVolume = createPersistentVolume( Megabytes(64), "role1", "id1", "path1", None(), None(), frameworkInfo.principal()); // Create a task that does nothing for a long time. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:512").get() + persistentVolume, "sleep 1000"); task.mutable_container()->CopyFrom(createContainerInfo( "test_image", {createVolumeHostPath("/tmp", dir1, Volume::RW)})); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillRepeatedly(DoDefault()); Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Create the persistent volumes and launch task via `acceptOffers`. driver.acceptOffers( {offer.id()}, {CREATE(persistentVolume), LAUNCH({task})}); AWAIT_READY(statusStarting); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Wait for the ACK to be checkpointed. AWAIT_READY(ack); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *containers->begin(); // Restart the slave. slave.get()->terminate(); // Wipe the slave meta directory so that the slave will treat the // above running task as an orphan. ASSERT_SOME(os::rmdir(slave::paths::getMetaRootDir(flags.work_dir))); Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); // Recreate the containerizer using the same helper as above. containerizer.reset(); create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); containerizer.reset(create.get()); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); // Wait until slave recovery is complete. AWAIT_READY(_recover); // Wait until the orphan containers are cleaned up. AWAIT_READY(containerizer->wait(containerId)); Try<fs::MountInfoTable> table = fs::MountInfoTable::read(); ASSERT_SOME(table); // All mount targets should be under this directory. string directory = slave::paths::getSandboxRootDir(flags.work_dir); // Verify that the orphaned container's persistent volume and // the rootfs are unmounted. foreach (const fs::MountInfoTable::Entry& entry, table->entries) { EXPECT_FALSE(strings::contains(entry.target, directory)) << "Target was not unmounted: " << entry.target; } driver.stop(); driver.join(); }
// This test verifies that the framework can launch a command task // that specifies both container image and persistent volumes. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_ChangeRootFilesystemCommandExecutorPersistentVolume) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:1024;disk(role1):1024"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; string dir1 = path::join(sandbox.get(), "dir1"); ASSERT_SOME(os::mkdir(dir1)); Resource persistentVolume = createPersistentVolume( Megabytes(64), "role1", "id1", "path1", None(), None(), frameworkInfo.principal()); // We use the filter explicitly here so that the resources will not // be filtered for 5 seconds (the default). Filters filters; filters.set_refuse_seconds(0); TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:512").get() + persistentVolume, "echo abc > path1/file"); task.mutable_container()->CopyFrom(createContainerInfo( "test_image", {createVolumeHostPath("/tmp", dir1, Volume::RW)})); // Create the persistent volumes and launch task via `acceptOffers`. driver.acceptOffers( {offer.id()}, {CREATE(persistentVolume), LAUNCH({task})}, filters); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); AWAIT_READY(statusStarting); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); // NOTE: The command executor's id is the same as the task id. ExecutorID executorId; executorId.set_value(task.task_id().value()); string directory = slave::paths::getExecutorLatestRunPath( flags.work_dir, offer.slave_id(), frameworkId.get(), executorId); EXPECT_FALSE(os::exists(path::join(directory, "path1"))); string volumePath = slave::paths::getPersistentVolumePath( flags.work_dir, "role1", "id1"); EXPECT_SOME_EQ("abc\n", os::read(path::join(volumePath, "file"))); driver.stop(); driver.join(); }
// This test verifies that a task group is launched on the agent if the executor // provides a valid authentication token specifying its own ContainerID. TEST_F(ExecutorAuthorizationTest, RunTaskGroup) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); // Start an agent with permissive ACLs so that a task can be launched. ACLs acls; acls.set_permissive(true); slave::Flags flags = CreateSlaveFlags(); flags.acls = acls; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:0.5;mem:32").get(), "sleep 1000"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Resources executorResources = allocatedResources(Resources::parse("cpus:0.1;mem:32;disk:32").get(), "*"); ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.set_type(ExecutorInfo::DEFAULT); executor.mutable_framework_id()->CopyFrom(frameworkId.get()); executor.mutable_resources()->CopyFrom(executorResources); TaskGroupInfo taskGroup; taskGroup.add_tasks()->CopyFrom(task); driver.acceptOffers({offer.id()}, {LAUNCH_GROUP(executor, taskGroup)}); AWAIT_READY(status); ASSERT_EQ(task.task_id(), status->task_id()); EXPECT_EQ(TASK_STARTING, status->state()); driver.stop(); driver.join(); }
// This test verifies that a task is launched on the agent if the task // user is authorized based on `run_tasks` ACL configured on the agent // to only allow whitelisted users to run tasks on the agent. TYPED_TEST(SlaveAuthorizerTest, AuthorizeRunTaskOnAgent) { // Get the current user. Result<string> user = os::user(); ASSERT_SOME(user) << "Failed to get the current user name" << (user.isError() ? ": " + user.error() : ""); Try<Owned<cluster::Master>> master = this->StartMaster(); ASSERT_SOME(master); // Start a slave with `bar` and the current user being the only authorized // users to launch tasks on the agent. ACLs acls; acls.set_permissive(false); // Restrictive. mesos::ACL::RunTask* acl = acls.add_run_tasks(); acl->mutable_principals()->set_type(ACL::Entity::ANY); acl->mutable_users()->add_values("bar"); acl->mutable_users()->add_values(user.get()); slave::Flags slaveFlags = this->CreateSlaveFlags(); slaveFlags.acls = acls; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = this->StartSlave( detector.get(), slaveFlags); ASSERT_SOME(slave); // Create a framework with user `foo`. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_user("foo"); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); // Framework is registered since the master admits frameworks of any user. AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; // Launch the first task with no user, so it defaults to the // framework user `foo`. TaskInfo task1 = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:32").get(), "sleep 1000"); // Launch the second task as the current user. TaskInfo task2 = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:32").get(), "sleep 1000"); task2.mutable_command()->set_user(user.get()); // The first task should fail since the task user `foo` is not an // authorized user that can launch a task. However, the second task // should succeed. Future<TaskStatus> status0; Future<TaskStatus> status1; Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status0)) .WillOnce(FutureArg<1>(&status1)) .WillOnce(FutureArg<1>(&status2)); driver.acceptOffers( {offer.id()}, {LAUNCH({task1, task2})}); // Wait for TASK_ERROR for 1st task, and TASK_STARTING followed by // TASK_RUNNING for 2nd task. AWAIT_READY(status0); AWAIT_READY(status1); AWAIT_READY(status2); // Validate both the statuses. Note that the order of receiving the // status updates for the 2 tasks is not deterministic, but we know // that task2's TASK_RUNNING arrives after TASK_STARTING. hashmap<TaskID, TaskStatus> statuses; statuses[status0->task_id()] = status0.get(); statuses[status1->task_id()] = status1.get(); statuses[status2->task_id()] = status2.get(); ASSERT_TRUE(statuses.contains(task1.task_id())); EXPECT_EQ(TASK_ERROR, statuses.at(task1.task_id()).state()); EXPECT_EQ(TaskStatus::SOURCE_SLAVE, statuses.at(task1.task_id()).source()); EXPECT_EQ(TaskStatus::REASON_TASK_UNAUTHORIZED, statuses.at(task1.task_id()).reason()); ASSERT_TRUE(statuses.contains(task2.task_id())); EXPECT_EQ(TASK_RUNNING, statuses.at(task2.task_id()).state()); driver.stop(); driver.join(); }
TEST_F(FaultToleranceTest, ForwardStatusUpdateUnknownExecutor) { Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get()); FrameworkID frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(SaveArg<1>(&frameworkId)); Future<vector<Offer> > offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)); driver.start(); AWAIT_READY(offers); EXPECT_NE(0u, offers.get().size()); Offer offer = offers.get()[0]; TaskInfo task; task.set_name(""); task.mutable_task_id()->set_value("1"); task.mutable_slave_id()->MergeFrom(offer.slave_id()); task.mutable_resources()->MergeFrom(offer.resources()); task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO); vector<TaskInfo> tasks; tasks.push_back(task); Future<Nothing> statusUpdate; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureSatisfy(&statusUpdate)); // TASK_RUNNING of task1. EXPECT_CALL(exec, registered(_, _, _, _)); EXPECT_CALL(exec, launchTask(_, _)) .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING)); driver.launchTasks(offer.id(), tasks); // Wait until TASK_RUNNING of task1 is received. AWAIT_READY(statusUpdate); // Simulate the slave receiving status update from an unknown // (e.g. exited) executor of the given framework. Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); // TASK_RUNNING of task2. TaskID taskId; taskId.set_value("task2"); StatusUpdate statusUpdate2 = createStatusUpdate( frameworkId, offer.slave_id(), taskId, TASK_RUNNING, "Dummy update"); process::dispatch(slave.get(), &Slave::statusUpdate, statusUpdate2); // Ensure that the scheduler receives task2's update. AWAIT_READY(status); EXPECT_EQ(taskId, status.get().task_id()); EXPECT_EQ(TASK_RUNNING, status.get().state()); EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); driver.stop(); driver.join(); Shutdown(); }