// This test verifies that the container will be killed if the volume // usage exceeds its quota. TEST_F(DiskQuotaTest, VolumeUsageExceedsQuota) { FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_role("role1"); master::Flags masterFlags = CreateMasterFlags(); Try<Owned<cluster::Master>> master = StartMaster(masterFlags); ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); slaveFlags.isolation = "posix/cpu,posix/mem,disk/du"; // NOTE: We can't pause the clock because we need the reaper to reap // the 'du' subprocess. slaveFlags.container_disk_watch_interval = Milliseconds(1); slaveFlags.enforce_container_disk_quota = true; slaveFlags.resources = "cpus:2;mem:128;disk(role1):128"; Try<Resources> initialResources = Resources::parse(slaveFlags.resources.get()); ASSERT_SOME(initialResources); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); const Offer& offer = offers.get()[0]; // Create a task that requests a 1 MB persistent volume but attempts // to use 2MB. Resources volume = createPersistentVolume( Megabytes(1), "role1", "id1", "volume_path", None(), None(), frameworkInfo.principal()); // We intentionally request a sandbox that is much bugger (16MB) than // the file the task writes (2MB) to the persistent volume (1MB). This // makes sure that the quota is indeed enforced on the persistent volume. Resources taskResources = Resources::parse("cpus:1;mem:64;disk(role1):16").get() + volume; TaskInfo task = createTask( offer.slave_id(), taskResources, "dd if=/dev/zero of=volume_path/file bs=1048576 count=2 && sleep 1000"); Future<TaskStatus> status1; Future<TaskStatus> status2; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status1)) .WillOnce(FutureArg<1>(&status2)); // Create the volume and launch the task. driver.acceptOffers( {offer.id()}, {CREATE(volume), LAUNCH({task})}); AWAIT_READY(status1); EXPECT_EQ(task.task_id(), status1->task_id()); EXPECT_EQ(TASK_RUNNING, status1->state()); AWAIT_READY(status2); EXPECT_EQ(task.task_id(), status1->task_id()); EXPECT_EQ(TASK_FAILED, status2->state()); driver.stop(); driver.join(); }
// This test verifies that persistent volumes are unmounted properly // after a checkpointed framework disappears and the slave restarts. // // TODO(jieyu): Even though the command task specifies a new // filesystem root, the executor (command executor) itself does not // change filesystem root (uses the host filesystem). We need to add a // test to test the scenario that the executor itself changes rootfs. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_RecoverOrphanedPersistentVolume) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:1024;disk(role1):1024"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Fetcher fetcher(flags); Try<MesosContainerizer*> create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); Owned<Containerizer> containerizer(create.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); frameworkInfo.set_checkpoint(true); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; string dir1 = path::join(sandbox.get(), "dir1"); ASSERT_SOME(os::mkdir(dir1)); Resource persistentVolume = createPersistentVolume( Megabytes(64), "role1", "id1", "path1", None(), None(), frameworkInfo.principal()); // Create a task that does nothing for a long time. TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:512").get() + persistentVolume, "sleep 1000"); task.mutable_container()->CopyFrom(createContainerInfo( "test_image", {createVolumeHostPath("/tmp", dir1, Volume::RW)})); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillRepeatedly(DoDefault()); Future<Nothing> ack = FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); // Create the persistent volumes and launch task via `acceptOffers`. driver.acceptOffers( {offer.id()}, {CREATE(persistentVolume), LAUNCH({task})}); AWAIT_READY(statusStarting); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); // Wait for the ACK to be checkpointed. AWAIT_READY(ack); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *containers->begin(); // Restart the slave. slave.get()->terminate(); // Wipe the slave meta directory so that the slave will treat the // above running task as an orphan. ASSERT_SOME(os::rmdir(slave::paths::getMetaRootDir(flags.work_dir))); Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); // Recreate the containerizer using the same helper as above. containerizer.reset(); create = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(create); containerizer.reset(create.get()); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); // Wait until slave recovery is complete. AWAIT_READY(_recover); // Wait until the orphan containers are cleaned up. AWAIT_READY(containerizer->wait(containerId)); Try<fs::MountInfoTable> table = fs::MountInfoTable::read(); ASSERT_SOME(table); // All mount targets should be under this directory. string directory = slave::paths::getSandboxRootDir(flags.work_dir); // Verify that the orphaned container's persistent volume and // the rootfs are unmounted. foreach (const fs::MountInfoTable::Entry& entry, table->entries) { EXPECT_FALSE(strings::contains(entry.target, directory)) << "Target was not unmounted: " << entry.target; } driver.stop(); driver.join(); }
// This test verifies that the framework can launch a command task // that specifies both container image and persistent volumes. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_ChangeRootFilesystemCommandExecutorPersistentVolume) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:1024;disk(role1):1024"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; string dir1 = path::join(sandbox.get(), "dir1"); ASSERT_SOME(os::mkdir(dir1)); Resource persistentVolume = createPersistentVolume( Megabytes(64), "role1", "id1", "path1", None(), None(), frameworkInfo.principal()); // We use the filter explicitly here so that the resources will not // be filtered for 5 seconds (the default). Filters filters; filters.set_refuse_seconds(0); TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:512").get() + persistentVolume, "echo abc > path1/file"); task.mutable_container()->CopyFrom(createContainerInfo( "test_image", {createVolumeHostPath("/tmp", dir1, Volume::RW)})); // Create the persistent volumes and launch task via `acceptOffers`. driver.acceptOffers( {offer.id()}, {CREATE(persistentVolume), LAUNCH({task})}, filters); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); AWAIT_READY(statusStarting); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); // NOTE: The command executor's id is the same as the task id. ExecutorID executorId; executorId.set_value(task.task_id().value()); string directory = slave::paths::getExecutorLatestRunPath( flags.work_dir, offer.slave_id(), frameworkId.get(), executorId); EXPECT_FALSE(os::exists(path::join(directory, "path1"))); string volumePath = slave::paths::getPersistentVolumePath( flags.work_dir, "role1", "id1"); EXPECT_SOME_EQ("abc\n", os::read(path::join(volumePath, "file"))); driver.stop(); driver.join(); }
// Tests that the task fails when it attempts to write to a persistent volume // mounted as read-only. Note that although we use a shared persistent volume, // the behavior is the same for non-shared persistent volumes. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_WriteAccessSharedPersistentVolumeReadOnlyMode) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:128;disk(role1):128"; flags.isolation = "filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); frameworkInfo.add_capabilities()->set_type( FrameworkInfo::Capability::SHARED_RESOURCES); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); // We create a shared volume which shall be used by the task to // write to that volume. Resource volume = createPersistentVolume( Megabytes(4), "role1", "id1", "volume_path", None(), None(), frameworkInfo.principal(), true); // Shared volume. // The task uses the shared volume as read-only. Resource roVolume = volume; roVolume.mutable_disk()->mutable_volume()->set_mode(Volume::RO); Resources taskResources = Resources::parse("cpus:1;mem:64;disk(role1):1").get() + roVolume; TaskInfo task = createTask( offers.get()[0].slave_id(), taskResources, "echo hello > volume_path/file"); // The task fails to write to the volume since the task's resources // intends to use the volume as read-only. Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; Future<TaskStatus> statusFailed; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFailed)); driver.acceptOffers( {offers.get()[0].id()}, {CREATE(volume), LAUNCH({task})}); AWAIT_READY(statusStarting); EXPECT_EQ(task.task_id(), statusStarting->task_id()); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFailed); EXPECT_EQ(task.task_id(), statusFailed->task_id()); EXPECT_EQ(TASK_FAILED, statusFailed->state()); driver.stop(); driver.join(); }
// This test verifies that the volume usage accounting for sandboxes // with bind-mounted volumes (while linux filesystem isolator is used) // works correctly by creating a file within the volume the size of // which exceeds the sandbox quota. TEST_F(LinuxFilesystemIsolatorMesosTest, ROOT_VolumeUsageExceedsSandboxQuota) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); string registry = path::join(sandbox.get(), "registry"); AWAIT_READY(DockerArchive::create(registry, "test_image")); slave::Flags flags = CreateSlaveFlags(); flags.resources = "cpus:2;mem:128;disk(role1):128"; flags.isolation = "disk/du,filesystem/linux,docker/runtime"; flags.docker_registry = registry; flags.docker_store_dir = path::join(sandbox.get(), "store"); flags.image_providers = "docker"; // NOTE: We can't pause the clock because we need the reaper to reap // the 'du' subprocess. flags.container_disk_watch_interval = Milliseconds(1); flags.enforce_container_disk_quota = true; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); MockScheduler sched; FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, "role1"); MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); // We request a sandbox (1MB) that is smaller than the persistent // volume (4MB) and attempt to create a file in that volume that is // twice the size of the sanbox (2MB). Resources volume = createPersistentVolume( Megabytes(4), "role1", "id1", "volume_path", None(), None(), frameworkInfo.principal()); Resources taskResources = Resources::parse("cpus:1;mem:64;disk(role1):1").get() + volume; // We sleep to give quota enforcement (du) a chance to kick in. TaskInfo task = createTask( offers.get()[0].slave_id(), taskResources, "dd if=/dev/zero of=volume_path/file bs=1048576 count=2 && sleep 1"); Future<TaskStatus> statusStarting; Future<TaskStatus> statusRunning; Future<TaskStatus> statusFinished; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&statusStarting)) .WillOnce(FutureArg<1>(&statusRunning)) .WillOnce(FutureArg<1>(&statusFinished)); driver.acceptOffers( {offers.get()[0].id()}, {CREATE(volume), LAUNCH({task})}); AWAIT_READY(statusStarting); EXPECT_EQ(task.task_id(), statusStarting->task_id()); EXPECT_EQ(TASK_STARTING, statusStarting->state()); AWAIT_READY(statusRunning); EXPECT_EQ(task.task_id(), statusRunning->task_id()); EXPECT_EQ(TASK_RUNNING, statusRunning->state()); AWAIT_READY(statusFinished); EXPECT_EQ(task.task_id(), statusFinished->task_id()); EXPECT_EQ(TASK_FINISHED, statusFinished->state()); driver.stop(); driver.join(); }
// This test verifies that the master reconciles operations that are missing // from a reregistering slave. In this case, we drop the ApplyOperationMessage // and expect the master to send a ReconcileOperationsMessage after the slave // reregisters. TEST_F(MasterSlaveReconciliationTest, ReconcileDroppedOperation) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); StandaloneMasterDetector detector(master.get()->pid); Future<UpdateSlaveMessage> updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _); Try<Owned<cluster::Slave>> slave = StartSlave(&detector); ASSERT_SOME(slave); // Since any out-of-sync operation state in `UpdateSlaveMessage` triggers a // reconciliation, await the message from the initial agent registration // sequence beforce continuing. Otherwise we risk the master reconciling with // the agent before we fail over the master. AWAIT_READY(updateSlaveMessage); // Register the framework in a non-`*` role so it can reserve resources. FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_roles(0, DEFAULT_TEST_ROLE); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(&driver, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); // We prevent the operation from reaching the agent. Future<ApplyOperationMessage> applyOperationMessage = DROP_PROTOBUF(ApplyOperationMessage(), _, _); // Perform a reserve operation on the offered resources. // This will trigger an `ApplyOperationMessage`. ASSERT_FALSE(offers->empty()); const Offer& offer = offers->at(0); Resources reservedResources = offer.resources(); reservedResources = reservedResources.pushReservation(createDynamicReservationInfo( frameworkInfo.roles(0), frameworkInfo.principal())); driver.acceptOffers({offer.id()}, {RESERVE(reservedResources)}); AWAIT_READY(applyOperationMessage); // We expect the master to detect the missing operation when the // slave reregisters and to reconcile the operations on that slave. Future<ReconcileOperationsMessage> reconcileOperationsMessage = FUTURE_PROTOBUF(ReconcileOperationsMessage(), _, _); // Simulate a master failover to trigger slave reregistration. detector.appoint(master.get()->pid); AWAIT_READY(reconcileOperationsMessage); ASSERT_EQ(1, reconcileOperationsMessage->operations_size()); EXPECT_EQ( applyOperationMessage->operation_uuid(), reconcileOperationsMessage->operations(0).operation_uuid()); }