TEST_F(ProvisionerPathTest, ListProvisionerContainers) { ContainerID child1; // parent1/child1 ContainerID child2; // parent1/child2 ContainerID child3; // parent2/child3 ContainerID parent1; // parent1 ContainerID parent2; // parent2 child1.set_value("child1"); child1.mutable_parent()->set_value("parent1"); child2.set_value("child2"); child2.mutable_parent()->set_value("parent1"); child3.set_value("child3"); child3.mutable_parent()->set_value("parent2"); parent1.set_value("parent1"); parent2.set_value("parent2"); const string provisionerDir = os::getcwd(); const string containerDir1 = paths::getContainerDir(provisionerDir, child1); const string containerDir2 = paths::getContainerDir(provisionerDir, child2); const string containerDir3 = paths::getContainerDir(provisionerDir, child3); ASSERT_SOME(os::mkdir(containerDir1)); ASSERT_SOME(os::mkdir(containerDir2)); ASSERT_SOME(os::mkdir(containerDir3)); Try<hashset<ContainerID>> containerIds = paths::listContainers(provisionerDir); ASSERT_SOME(containerIds); EXPECT_TRUE(containerIds->contains(parent1)); EXPECT_TRUE(containerIds->contains(parent2)); EXPECT_TRUE(containerIds->contains(child1)); EXPECT_TRUE(containerIds->contains(child2)); EXPECT_TRUE(containerIds->contains(child3)); EXPECT_EQ(5u, containerIds->size()); }
Future<string> StoreProcess::_fetchImage(const Image::Appc& appc) { VLOG(1) << "Fetching image '" << appc.name() << "'"; Try<string> _tmpFetchDir = os::mkdtemp( path::join(paths::getStagingDir(rootDir), "XXXXXX")); if (_tmpFetchDir.isError()) { return Failure( "Failed to create temporary fetch directory for image '" + appc.name() + "': " + _tmpFetchDir.error()); } const string tmpFetchDir = _tmpFetchDir.get(); return fetcher->fetch(appc, Path(tmpFetchDir)) .then(defer(self(), [=]() -> Future<string> { Try<list<string>> imageIds = os::ls(tmpFetchDir); if (imageIds.isError()) { return Failure( "Failed to list images under '" + tmpFetchDir + "': " + imageIds.error()); } if (imageIds->size() != 1) { return Failure( "Unexpected number of images under '" + tmpFetchDir + "': " + stringify(imageIds->size())); } const string& imageId = imageIds->front(); const string source = path::join(tmpFetchDir, imageId); const string target = paths::getImagePath(rootDir, imageId); if (os::exists(target)) { LOG(WARNING) << "Image id '" << imageId << "' already exists in the store"; } else { Try<Nothing> rename = os::rename(source, target); if (rename.isError()) { return Failure( "Failed to rename directory '" + source + "' to '" + target + "': " + rename.error()); } } Try<Nothing> addCache = cache->add(imageId); if (addCache.isError()) { return Failure( "Failed to add image '" + appc.name() + "' with image id '" + imageId + "' to the cache: " + addCache.error()); } Try<Nothing> rmdir = os::rmdir(tmpFetchDir); if (rmdir.isError()) { return Failure( "Failed to remove temporary fetch directory '" + tmpFetchDir + "' for image '" + appc.name() + "': " + rmdir.error()); } return imageId; })); }
// In this test, the framework is checkpointed so we expect the executor to // persist across the slave restart and to have the same resource usage before // and after. TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery) { slave::Flags flags = CreateSlaveFlags(); Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), CreateSlaveFlags()); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; frameworkInfo.set_checkpoint(true); MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000"); Future<TaskStatus> startingStatus; Future<TaskStatus> runningStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningStatus)); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), runningStatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningStatus->state()); Future<ResourceUsage> usage1 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage1); // We should have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); // Wait for the slave to re-register. AWAIT_READY(slaveReregisteredMessage); Future<ResourceUsage> usage2 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage2); // We should have still have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); Try<std::list<string>> sandboxes = os::glob(path::join( slave::paths::getSandboxRootDir(mountPoint.get()), "*", "frameworks", "*", "executors", "*", "runs", "*")); ASSERT_SOME(sandboxes); // One sandbox and one symlink. ASSERT_EQ(2u, sandboxes->size()); // Scan the remaining sandboxes. We ought to still have project IDs // assigned to them all. foreach (const string& sandbox, sandboxes.get()) { // Skip the "latest" symlink. if (os::stat::islink(sandbox)) { continue; } EXPECT_SOME(xfs::getProjectId(sandbox)); } driver.stop(); driver.join(); }
// In this test, the framework is not checkpointed. This ensures that when we // stop the slave, the executor is killed and we will need to recover the // working directories without getting any checkpointed recovery state. TEST_F(ROOT_XFS_QuotaTest, NoCheckpointRecovery) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); Fetcher fetcher(flags); Try<MesosContainerizer*> _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); Owned<MesosContainerizer> containerizer(_containerizer.get()); Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave( detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); EXPECT_CALL(sched, registered(_, _, _)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(_, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:1;mem:128;disk:1").get(), "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000"); Future<TaskStatus> runningStatus; Future<TaskStatus> startingStatus; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&startingStatus)) .WillOnce(FutureArg<1>(&runningStatus)) .WillOnce(Return()); driver.launchTasks(offer.id(), {task}); AWAIT_READY(startingStatus); EXPECT_EQ(task.task_id(), startingStatus->task_id()); EXPECT_EQ(TASK_STARTING, startingStatus->state()); AWAIT_READY(runningStatus); EXPECT_EQ(task.task_id(), runningStatus->task_id()); EXPECT_EQ(TASK_RUNNING, runningStatus->state()); Future<ResourceUsage> usage1 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage1); // We should have 1 executor using resources. ASSERT_EQ(1, usage1->executors().size()); Future<hashset<ContainerID>> containers = containerizer->containers(); AWAIT_READY(containers); ASSERT_EQ(1u, containers->size()); ContainerID containerId = *containers->begin(); // Restart the slave. slave.get()->terminate(); Future<SlaveReregisteredMessage> slaveReregisteredMessage = FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); _containerizer = MesosContainerizer::create(flags, true, &fetcher); ASSERT_SOME(_containerizer); containerizer.reset(_containerizer.get()); slave = StartSlave(detector.get(), containerizer.get(), flags); ASSERT_SOME(slave); // Wait until slave recovery is complete. Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); AWAIT_READY_FOR(_recover, Seconds(60)); // Wait until the orphan containers are cleaned up. AWAIT_READY_FOR(containerizer.get()->wait(containerId), Seconds(60)); AWAIT_READY(slaveReregisteredMessage); Future<ResourceUsage> usage2 = process::dispatch(slave.get()->pid, &Slave::usage); AWAIT_READY(usage2); // We should have no executors left because we didn't checkpoint. ASSERT_TRUE(usage2->executors().empty()); Try<std::list<string>> sandboxes = os::glob(path::join( slave::paths::getSandboxRootDir(mountPoint.get()), "*", "frameworks", "*", "executors", "*", "runs", "*")); ASSERT_SOME(sandboxes); // One sandbox and one symlink. ASSERT_EQ(2u, sandboxes->size()); // Scan the remaining sandboxes and make sure that no projects are assigned. foreach (const string& sandbox, sandboxes.get()) { // Skip the "latest" symlink. if (os::stat::islink(sandbox)) { continue; } EXPECT_NONE(xfs::getProjectId(sandbox)); } driver.stop(); driver.join(); }