Future<Option<ContainerLaunchInfo>> PosixFilesystemIsolatorProcess::prepare( const ContainerID& containerId, const ContainerConfig& containerConfig) { if (infos.contains(containerId)) { return Failure("Container has already been prepared"); } const ExecutorInfo& executorInfo = containerConfig.executor_info(); if (executorInfo.has_container()) { CHECK_EQ(executorInfo.container().type(), ContainerInfo::MESOS); // Return failure if the container change the filesystem root // because the symlinks will become invalid in the new root. if (executorInfo.container().mesos().has_image()) { return Failure("Container root filesystems not supported"); } if (executorInfo.container().volumes().size() > 0) { return Failure("Volumes in ContainerInfo is not supported"); } } infos.put(containerId, Owned<Info>(new Info(containerConfig.directory()))); return update(containerId, executorInfo.resources()) .then([]() -> Future<Option<ContainerLaunchInfo>> { return None(); }); }
// static int DbWrapper::copySecondary(DB_ENV *env, const std::string &oldcname, const std::string &newcname, const std::string &prefix, const std::string &dbname, bool duplicates) { int err; DbWrapper olddb(env, oldcname, prefix, dbname, DEFAULT_CONFIG); ContainerConfig config; config.setReadOnly(true); err = olddb.open(0, DB_BTREE, config); if (err) { // ignore ENOENT -- it's OK sometimes if (err == ENOENT) err = 0; return err; } config.setReadOnly(false); config.setPageSize(olddb.getPageSize()); DbWrapper newdb(env, newcname, prefix, dbname, config); if (duplicates) { err = newdb.getDb()->set_flags(newdb.getDb(), DB_DUP|DB_DUPSORT); if (err) throw XmlException(err); } err = newdb.open(0, DB_BTREE, DB_CREATE|DB_EXCL); if (err == 0) err = newdb.copy(olddb, DB_KEYFIRST); return err; }
TYPED_TEST(MemIsolatorTest, MemUsage) { slave::Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("mem:1024").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); AWAIT_READY(isolator.get()->prepare( containerId, containerConfig)); MemoryTestHelper helper; ASSERT_SOME(helper.spawn()); ASSERT_SOME(helper.pid()); // Set up the reaper to wait on the subprocess. Future<Option<int>> status = process::reap(helper.pid().get()); // Isolate the subprocess. AWAIT_READY(isolator.get()->isolate(containerId, helper.pid().get())); const Bytes allocation = Megabytes(128); EXPECT_SOME(helper.increaseRSS(allocation)); Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_rss_bytes(), allocation.bytes()); // Ensure the process is killed. helper.cleanup(); // Make sure the subprocess was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); }
StructuralStatsDatabase::StructuralStatsDatabase() : db_(0, "", "", "", DEFAULT_CONFIG), inMemory_(true) { ContainerConfig config; config.setAllowCreate(true); int err = db_.open(0, DB_BTREE, config); if (err != 0) throw XmlException(err); }
ContainerConfig::ContainerConfig(const ContainerConfig &o) : mode_(o.getMode()), dbOpenFlags_(o.getDbOpenFlags()), dbSetFlags_(o.getDbSetFlags()), seqFlags_(o.getSeqFlags()), xmlFlags_(o.getXmlFlags()), type_(o.getContainerType()), compressionName_(o.getCompressionName()), mgr_(0), pageSize_(o.getPageSize()), sequenceIncr_(o.getSequenceIncrement()), containerOwned_(false) {}
Future<Option<ContainerLaunchInfo>> CgroupsNetClsIsolatorProcess::prepare( const ContainerID& containerId, const ContainerConfig& containerConfig) { if (infos.contains(containerId)) { return Failure("Container has already been prepared"); } // Use this info to create the cgroup, but do not insert it into // infos till the cgroup has been created successfully. Info info(path::join(flags.cgroups_root, containerId.value())); // Create a cgroup for this container. Try<bool> exists = cgroups::exists(hierarchy, info.cgroup); if (exists.isError()) { return Failure("Failed to check if the cgroup already exists: " + exists.error()); } else if (exists.get()) { return Failure("The cgroup already exists"); } Try<Nothing> create = cgroups::create(hierarchy, info.cgroup); if (create.isError()) { return Failure("Failed to create the cgroup: " + create.error()); } // 'chown' the cgroup so the executor can create nested cgroups. Do // not recurse so the control files are still owned by the slave // user and thus cannot be changed by the executor. if (containerConfig.has_user()) { Try<Nothing> chown = os::chown( containerConfig.user(), path::join(hierarchy, info.cgroup), false); if (chown.isError()) { return Failure("Failed to change ownership of cgroup hierarchy: " + chown.error()); } } infos.emplace(containerId, info); return update(containerId, containerConfig.executorinfo().resources()) .then([]() -> Future<Option<ContainerLaunchInfo>> { return None(); }); }
void ContainerConfig::setFlags(const ContainerConfig &flags) { setTransactional(flags.getTransactional()); setIndexNodes(flags.getIndexNodes()); setEncrypted(flags.getEncrypted()); setStatistics(flags.getStatistics()); setAllowValidation(flags.getAllowValidation()); setChecksum(flags.getChecksum()); setDbOpenFlags(flags.getDbOpenFlags()); setDbSetFlags(flags.getDbSetFlags()); setSeqFlags(flags.getSeqFlags()); }
Future<Option<ContainerLaunchInfo>> LinuxFilesystemIsolatorProcess::prepare( const ContainerID& containerId, const ContainerConfig& containerConfig) { const string& directory = containerConfig.directory(); Option<string> user; if (containerConfig.has_user()) { user = containerConfig.user(); } if (infos.contains(containerId)) { return Failure("Container has already been prepared"); } Owned<Info> info(new Info( directory, containerConfig.executor_info())); infos.put(containerId, info); ContainerLaunchInfo launchInfo; launchInfo.set_namespaces(CLONE_NEWNS); // Prepare the commands that will be run in the container's mount // namespace right after forking the executor process. We use these // commands to mount those volumes specified in the container info // so that they don't pollute the host mount namespace. Try<string> _script = script(containerId, containerConfig); if (_script.isError()) { return Failure("Failed to generate isolation script: " + _script.error()); } CommandInfo* command = launchInfo.add_commands(); command->set_value(_script.get()); return update(containerId, containerConfig.executor_info().resources()) .then([launchInfo]() -> Future<Option<ContainerLaunchInfo>> { return launchInfo; }); }
DocumentDatabase::DocumentDatabase(DB_ENV *env, Transaction *txn, const std::string &name, const ContainerConfig &config, XmlCompression *compression) : environment_(env), name_(name), type_(config.getContainerType()), content_(env, name, "content_", document_name, DEFAULT_CONFIG), secondary_(env, name, document_name, DEFAULT_CONFIG), compressor_(compression) { open(txn, config); }
DbWrapper::DbWrapper(DB_ENV *environment, const std::string &name, const std::string &prefixName, const std::string &databaseName, const ContainerConfig &config) : flags_(0), name_(name), prefixName_(prefixName), databaseName_(databaseName), pageSize_(config.getPageSize()), db_(0), environment_(environment), readCursor_(0), writeCursor_(0) { int ret = db_create(&db_, environment, 0); if (ret != 0) throw XmlException(ret); if (environment_) setFlags(environment_); }
DictionaryDatabase::DictionaryDatabase(DB_ENV *env, Transaction *txn, const std::string &name, const ContainerConfig &config, bool useMutex) : stringCache_(true), environment_(env), name_(name), primary_(new PrimaryDatabase(env, name, dictionary_name, DEFAULT_CONFIG)), secondary_(new SecondaryDatabase(env, name, dictionary_name, DEFAULT_CONFIG)), nidName_(0), nidRoot_(0), usePreloads_(false), isTransacted_(txn ? true : false), mutex_(useMutex ? MutexLock::createMutex() : 0) { cache_.setDictionaryDatabase(this); if (!isTransacted_ && env) { u_int32_t envFlags; env->get_flags(env, &envFlags); if (envFlags & DB_CDB_ALLDB) isTransacted_ = true; } int err = 0; try { // set cache sizes low if no DB_ENV -- this is the in-memory // dictionary for transient docs if (!env) { primary_->getDb()->set_cachesize(primary_->getDb(), 0, dictCacheBytes, 1); secondary_->getDb()->set_cachesize(secondary_->getDb(), 0, dictCacheBytes, 1); } // Open the Db objects err = primary_->open(txn, config); if (err == 0) err = secondary_->open(txn, /*duplicates*/true, config); if (err == 0) { // Lookup/Define the dbxml namespace names (but don't define // if this is a read-only container) bool rdonly = config.getReadOnly(); preloadDictionary(txn, rdonly); } } catch (XmlException &) { secondary_->cleanup(); primary_->cleanup(); if (txn) txn->abort(); throw; } if (err != 0) { secondary_->cleanup(); primary_->cleanup(); if (txn) txn->abort(); string msg = name; if (err == EEXIST) { msg += ": container exists"; throw XmlException( XmlException::CONTAINER_EXISTS, msg); } else if (err == ENOENT) { msg += ": container file not found, or not a container"; throw XmlException(XmlException::CONTAINER_NOT_FOUND, msg); } throw XmlException(err); } }
// A test to verify the number of processes and threads in a // container. TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Pids_and_Tids) { slave::Flags flags; flags.cgroups_cpu_enable_pids_and_tids_count = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:0.5;mem:512").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); Future<Option<ContainerLaunchInfo>> prepare = isolator.get()->prepare( containerId, containerConfig); AWAIT_READY(prepare); // Right after the creation of the cgroup, which happens in // 'prepare', we check that it is empty. Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(0U, usage.get().processes()); EXPECT_EQ(0U, usage.get().threads()); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(1); argv[0] = "cat"; Try<pid_t> pid = launcher.get()->fork( containerId, "cat", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes), prepare.get().isSome() ? prepare.get().get().namespaces() : 0); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int>> status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Before isolation, the cgroup is empty. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(0U, usage.get().processes()); EXPECT_EQ(0U, usage.get().threads()); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // After the isolation, the cgroup is not empty, even though the // process hasn't exec'd yet. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(1U, usage.get().processes()); EXPECT_EQ(1U, usage.get().threads()); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Process count should be 1 since 'sleep' is still sleeping. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(1U, usage.get().processes()); EXPECT_EQ(1U, usage.get().threads()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Wait for the command to complete. AWAIT_READY(status); // After the process is killed, the cgroup should be empty again. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(0U, usage.get().processes()); EXPECT_EQ(0U, usage.get().threads()); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
// This test verifies that we can successfully launch a container with // a big (>= 10 cpus) cpu quota. This is to catch the regression // observed in MESOS-1049. // TODO(vinod): Revisit this if/when the isolator restricts the number // of cpus that an executor can use based on the slave cpus. TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Big_Quota) { slave::Flags flags; // Enable CFS to cap CPU utilization. flags.cgroups_enable_cfs = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); // Set the executor's resources to 100.5 cpu. ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:100.5").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); Future<Option<ContainerLaunchInfo>> prepare = isolator.get()->prepare( containerId, containerConfig); AWAIT_READY(prepare); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(3); argv[0] = "sh"; argv[1] = "-c"; argv[2] = "exit 0"; Try<pid_t> pid = launcher.get()->fork( containerId, "sh", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes), prepare.get().isSome() ? prepare.get().get().namespaces() : 0); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait for the command to complete successfully. AWAIT_READY(status); ASSERT_SOME_EQ(0, status.get()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Enable_Cfs) { slave::Flags flags; // Enable CFS to cap CPU utilization. flags.cgroups_enable_cfs = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); // Set the executor's resources to 0.5 cpu. ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:0.5").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); Future<Option<ContainerLaunchInfo>> prepare = isolator.get()->prepare( containerId, containerConfig); AWAIT_READY(prepare); // Generate random numbers to max out a single core. We'll run this for 0.5 // seconds of wall time so it should consume approximately 250 ms of total // cpu time when limited to 0.5 cpu. We use /dev/urandom to prevent blocking // on Linux when there's insufficient entropy. string command = "cat /dev/urandom > /dev/null & " "export MESOS_TEST_PID=$! && " "sleep 0.5 && " "kill $MESOS_TEST_PID"; int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(3); argv[0] = "sh"; argv[1] = "-c"; argv[2] = command; Try<pid_t> pid = launcher.get()->fork( containerId, "sh", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes), prepare.get().isSome() ? prepare.get().get().namespaces() : 0); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait for the command to complete. AWAIT_READY(status); Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); // Expect that no more than 300 ms of cpu time has been consumed. We also // check that at least 50 ms of cpu time has been consumed so this test will // fail if the host system is very heavily loaded. This behavior is correct // because under such conditions we aren't actually testing the CFS cpu // limiter. double cpuTime = usage.get().cpus_system_time_secs() + usage.get().cpus_user_time_secs(); EXPECT_GE(0.30, cpuTime); EXPECT_LE(0.05, cpuTime); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TEST_F(RevocableCpuIsolatorTest, ROOT_CGROUPS_RevocableCpu) { slave::Flags flags; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = PosixLauncher::create(flags); // Include revocable CPU in the executor's resources. Resource cpu = Resources::parse("cpus", "1", "*").get(); cpu.mutable_revocable(); ExecutorInfo executorInfo; executorInfo.add_resources()->CopyFrom(cpu); ContainerID containerId; containerId.set_value(UUID::random().toString()); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(os::getcwd()); AWAIT_READY(isolator.get()->prepare( containerId, containerConfig)); vector<string> argv{"sleep", "100"}; Try<pid_t> pid = launcher.get()->fork( containerId, "/bin/sleep", argv, Subprocess::PATH("/dev/null"), Subprocess::PATH("/dev/null"), Subprocess::PATH("/dev/null"), None(), None(), None(), None()); ASSERT_SOME(pid); AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Executor should have proper cpu.shares for revocable containers. Result<string> cpuHierarchy = cgroups::hierarchy("cpu"); ASSERT_SOME(cpuHierarchy); Result<string> cpuCgroup = cgroups::cpu::cgroup(pid.get()); ASSERT_SOME(cpuCgroup); EXPECT_SOME_EQ( CPU_SHARES_PER_CPU_REVOCABLE, cgroups::cpu::shares(cpuHierarchy.get(), cpuCgroup.get())); // Kill the container and clean up. Future<Option<int>> status = process::reap(pid.get()); AWAIT_READY(launcher.get()->destroy(containerId)); AWAIT_READY(status); AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
Future<bool> launch( const ContainerID& containerId, const ContainerConfig& containerConfig, const map<string, string>& environment, const Option<string>& pidCheckpointPath) { CHECK(!terminatedContainers.contains(containerId)) << "Failed to launch nested container " << containerId << " for executor '" << containerConfig.executor_info().executor_id() << "' of framework " << containerConfig.executor_info().framework_id() << " because this ContainerID is being re-used with" << " a previously terminated container"; CHECK(!containers_.contains(containerId)) << "Failed to launch container " << containerId << " for executor '" << containerConfig.executor_info().executor_id() << "' of framework " << containerConfig.executor_info().framework_id() << " because it is already launched"; containers_[containerId] = Owned<ContainerData>(new ContainerData()); if (containerId.has_parent()) { // Launching a nested container via the test containerizer is a // no-op for now. return true; } CHECK(executors.contains(containerConfig.executor_info().executor_id())) << "Failed to launch executor '" << containerConfig.executor_info().executor_id() << "' of framework " << containerConfig.executor_info().framework_id() << " because it is unknown to the containerizer"; containers_.at(containerId)->executorId = containerConfig.executor_info().executor_id(); containers_.at(containerId)->frameworkId = containerConfig.executor_info().framework_id(); // We need to synchronize all reads and writes to the environment // as this is global state. // // TODO(jmlvanre): Even this is not sufficient, as other aspects // of the code may read an environment variable while we are // manipulating it. The better solution is to pass the environment // variables into the fork, or to set them on the command line. // See MESOS-3475. static std::mutex mutex; synchronized(mutex) { // Since the constructor for `MesosExecutorDriver` reads // environment variables to load flags, even it needs to // be within this synchronization section. // // Prepare additional environment variables for the executor. // TODO(benh): Need to get flags passed into the TestContainerizer // in order to properly use here. slave::Flags flags; flags.recovery_timeout = Duration::zero(); // We need to save the original set of environment variables so we // can reset the environment after calling 'driver->start()' below. hashmap<string, string> original = os::environment(); foreachpair (const string& name, const string& variable, environment) { os::setenv(name, variable); } // TODO(benh): Can this be removed and done exclusively in the // 'executorEnvironment()' function? There are other places in the // code where we do this as well and it's likely we can do this once // in 'executorEnvironment()'. foreach (const Environment::Variable& variable, containerConfig.executor_info() .command().environment().variables()) { os::setenv(variable.name(), variable.value()); } os::setenv("MESOS_LOCAL", "1"); const Owned<ExecutorData>& executorData = executors.at(containerConfig.executor_info().executor_id()); if (executorData->executor != nullptr) { executorData->driver = Owned<MesosExecutorDriver>( new MesosExecutorDriver(executorData->executor)); executorData->driver->start(); } else { shared_ptr<v1::MockHTTPExecutor> executor = executorData->v1ExecutorMock; executorData->v1Library = Owned<v1::executor::TestMesos>( new v1::executor::TestMesos(ContentType::PROTOBUF, executor)); } os::unsetenv("MESOS_LOCAL"); // Unset the environment variables we set by resetting them to their // original values and also removing any that were not part of the // original environment. foreachpair (const string& name, const string& value, original) { os::setenv(name, value); }
// upgrade from 2.1 (version 4) to 2.2 (version 5) // Modify keys for both document secondary and content (if wholedoc storage) // secondary key: docId, nameId, type // content key: docId // for both: // 1. open old DB and create a new database // 2. for each record: // o unmarshal old format; marshal in new, and put into new DB static int upgrade22(const std::string &name, const std::string &tname, Manager &mgr) { int err = 0; SecondaryDatabase secondary(mgr.getDB_ENV(), name, document_name, DEFAULT_CONFIG); err = secondary.open(0, /*duplicates*/false, DEFAULT_CONFIG); ContainerConfig config; config.setPageSize(secondary.getPageSize()); SecondaryDatabase newSec(mgr.getDB_ENV(), tname, document_name, config); err = newSec.open(0, /*duplicates*/false, CREATE_CONFIG); // the key is: docId,nodeId,type DbXml::DbXmlDbt key; DbXml::DbXmlDbt data; Cursor curs(secondary, (Transaction*)0, DbXml::CURSOR_READ); int ret = 0; while ((err == 0) && (ret = curs.get(key, data, DB_NEXT)) == 0) { // decode key using pre-2.2 code DocID did; NameID nid; XmlValue::Type type; u_int32_t *p = (u_int32_t *)key.data; u_int32_t id1, id2; id1 = *p++; id2 = *p++; if (Manager::isBigendian()) { M_32_SWAP(id1); M_32_SWAP(id2); } did = id1; nid = id2; type = (XmlValue::Type) * ((unsigned char*)p); // encode using 2.2. code DbtOut newKey; MetaDatum::setKeyDbt(did, nid, type, newKey); err = newSec.put(0, &newKey, &data, 0); } curs.close(); secondary.close(0); newSec.close(0); if (err == 0) { DbWrapper content(mgr.getDB_ENV(), name, "content_", document_name, DEFAULT_CONFIG); err = content.open(0, DB_BTREE, DEFAULT_CONFIG); if (err != 0) { // ignore ENOENT if (err == ENOENT) err = 0; return err; } config.setPageSize(content.getPageSize()); DbWrapper newContent(mgr.getDB_ENV(), tname, "content_", document_name, config); err = newContent.open(0, DB_BTREE, CREATE_CONFIG); if (err != 0) return err; // the key is: docId DbXml::DbXmlDbt key; DbXml::DbXmlDbt data; Cursor curs1(content, (Transaction*)0, DbXml::CURSOR_READ); int ret = 0; while ((err == 0) && (ret = curs1.get(key, data, DB_NEXT)) == 0) { // decode/encode key, rewrite u_int32_t *p = (u_int32_t *)key.data; u_int32_t id1; id1 = *p; if (Manager::isBigendian()) M_32_SWAP(id1); DocID id(id1); DbtOut newKey; id.setDbtFromThis(newKey); err = newContent.put(0, &newKey, &data, 0); } curs1.close(); content.close(0); newContent.close(0); } return err; }
TYPED_TEST(CpuIsolatorTest, SystemCpuUsage) { slave::Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); // A PosixLauncher is sufficient even when testing a cgroups isolator. Try<Launcher*> launcher = PosixLauncher::create(flags); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:1.0").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); AWAIT_READY(isolator.get()->prepare( containerId, containerConfig)); const string& file = path::join(dir.get(), "mesos_isolator_test_ready"); // Generating random numbers is done by the kernel and will max out a single // core and run almost exclusively in the kernel, i.e., system time. string command = "cat /dev/urandom > /dev/null & " "touch " + file + "; " // Signals the command is running. "sleep 60"; int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(3); argv[0] = "sh"; argv[1] = "-c"; argv[2] = command; Try<pid_t> pid = launcher.get()->fork( containerId, "sh", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes), None()); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait for the command to start. while (!os::exists(file)); // Wait up to 1 second for the child process to induce 1/8 of a second of // system cpu time. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.cpus_system_time_secs() >= 0.125) { break; } os::sleep(Milliseconds(200)); waited += Milliseconds(200); } while (waited < Seconds(1)); EXPECT_LE(0.125, statistics.cpus_system_time_secs()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Make sure the child was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
Try<string> LinuxFilesystemIsolatorProcess::script( const ContainerID& containerId, const ContainerConfig& containerConfig) { ostringstream out; out << "#!/bin/sh\n"; out << "set -x -e\n"; // Make sure mounts in the container mount namespace do not // propagate back to the host mount namespace. // NOTE: We cannot simply run `mount --make-rslave /`, for more info // please refer to comments in mount.hpp. MesosContainerizerMount::Flags mountFlags; mountFlags.operation = MesosContainerizerMount::MAKE_RSLAVE; mountFlags.path = "/"; out << path::join(flags.launcher_dir, "mesos-containerizer") << " " << MesosContainerizerMount::NAME << " " << stringify(mountFlags) << "\n"; if (!containerConfig.executor_info().has_container()) { return out.str(); } // Bind mount the sandbox if the container specifies a rootfs. if (containerConfig.has_rootfs()) { string sandbox = path::join( containerConfig.rootfs(), flags.sandbox_directory); Try<Nothing> mkdir = os::mkdir(sandbox); if (mkdir.isError()) { return Error( "Failed to create sandbox mount point at '" + sandbox + "': " + mkdir.error()); } out << "mount -n --rbind '" << containerConfig.directory() << "' '" << sandbox << "'\n"; } foreach (const Volume& volume, containerConfig.executor_info().container().volumes()) { // NOTE: Volumes with source will be handled by the corresponding // isolators (e.g., docker/volume). if (volume.has_source()) { VLOG(1) << "Ignored a volume with source for container '" << containerId << "'"; continue; } if (!volume.has_host_path()) { return Error("A volume misses 'host_path'"); } // If both 'host_path' and 'container_path' are relative paths, // return an error because the user can just directly access the // volume in the work directory. if (!strings::startsWith(volume.host_path(), "/") && !strings::startsWith(volume.container_path(), "/")) { return Error( "Both 'host_path' and 'container_path' of a volume are relative"); } // Determine the source of the mount. string source; if (strings::startsWith(volume.host_path(), "/")) { source = volume.host_path(); // An absolute path must already exist. if (!os::exists(source)) { return Error("Absolute host path does not exist"); } } else { // Path is interpreted as relative to the work directory. source = path::join(containerConfig.directory(), volume.host_path()); // TODO(jieyu): We need to check that source resolves under the // work directory because a user can potentially use a container // path like '../../abc'. Try<Nothing> mkdir = os::mkdir(source); if (mkdir.isError()) { return Error( "Failed to create the source of the mount at '" + source + "': " + mkdir.error()); } // TODO(idownes): Consider setting ownership and mode. } // Determine the target of the mount. string target; if (strings::startsWith(volume.container_path(), "/")) { if (containerConfig.has_rootfs()) { target = path::join( containerConfig.rootfs(), volume.container_path()); Try<Nothing> mkdir = os::mkdir(target); if (mkdir.isError()) { return Error( "Failed to create the target of the mount at '" + target + "': " + mkdir.error()); } } else { target = volume.container_path(); // An absolute path must already exist. This is because we // want to avoid creating mount points outside the work // directory in the host filesystem. if (!os::exists(target)) { return Error("Absolute container path does not exist"); } } // TODO(jieyu): We need to check that target resolves under // 'rootfs' because a user can potentially use a container path // like '/../../abc'. } else { if (containerConfig.has_rootfs()) { target = path::join(containerConfig.rootfs(), flags.sandbox_directory, volume.container_path()); } else { target = path::join(containerConfig.directory(), volume.container_path()); } // TODO(jieyu): We need to check that target resolves under the // sandbox because a user can potentially use a container path // like '../../abc'. // NOTE: We cannot create the mount point at 'target' if // container has rootfs defined. The bind mount of the sandbox // will hide what's inside 'target'. So we should always create // the mount point in 'directory'. string mountPoint = path::join( containerConfig.directory(), volume.container_path()); Try<Nothing> mkdir = os::mkdir(mountPoint); if (mkdir.isError()) { return Error( "Failed to create the target of the mount at '" + mountPoint + "': " + mkdir.error()); } } // TODO(jieyu): Consider the mode in the volume. out << "mount -n --rbind '" << source << "' '" << target << "'\n"; } return out.str(); }
process::Future<Option<ContainerLaunchInfo>> NetworkIsolatorProcess::prepare( const ContainerID& containerId, const ContainerConfig& containerConfig) { LOG(INFO) << "NetworkIsolator::prepare for container: " << containerId; const ExecutorInfo executorInfo = containerConfig.executorinfo(); if (!executorInfo.has_container()) { LOG(INFO) << "NetworkIsolator::prepare Ignoring request as " << "executorInfo.container is missing for container: " << containerId; return None(); } if (executorInfo.container().network_infos().size() == 0) { LOG(INFO) << "NetworkIsolator::prepare Ignoring request as " << "executorInfo.container.network_infos is missing for " << "container: " << containerId; return None(); } if (executorInfo.container().network_infos().size() > 1) { return Failure( "NetworkIsolator:: multiple NetworkInfos are not supported."); } NetworkInfo networkInfo = executorInfo.container().network_infos(0); if (networkInfo.has_protocol()) { return Failure( "NetworkIsolator: NetworkInfo.protocol is deprecated and unsupported."); } if (networkInfo.has_ip_address()) { return Failure( "NetworkIsolator: NetworkInfo.ip_address is deprecated and" " unsupported."); } string uid = UUID::random().toString(); // Two IPAM commands: // 1) reserve for IPs the user has specifically asked for. // 2) auto-assign IPs. // Spin through all IPAddress messages once to get info for each command. // Then we'll issue each command if needed. IPAMReserveIPMessage reserveMessage; IPAMReserveIPMessage::Args* reserveArgs = reserveMessage.mutable_args(); // Counter of IPs to auto assign. int numIPv4 = 0; foreach (const NetworkInfo::IPAddress& ipAddress, networkInfo.ip_addresses()) { if (ipAddress.has_ip_address() && ipAddress.has_protocol()) { return Failure("NetworkIsolator: Cannot include both ip_address and " "protocol in a request."); } if (ipAddress.has_ip_address()) { // Store IP to attempt to reserve. reserveArgs->add_ipv4_addrs(ipAddress.ip_address()); } else if (ipAddress.has_protocol() && ipAddress.protocol() == NetworkInfo::IPv6){ return Failure("NetworkIsolator: IPv6 is not supported at this time."); } else { // Either protocol is IPv4, or not included (in which case we default to // IPv4 anyway). numIPv4++; } } if (!(reserveArgs->ipv4_addrs_size() + numIPv4)) { return Failure( "NetworkIsolator: Container requires at least one IP address."); } // All the IP addresses, both reserved and allocated. vector<string> allAddresses; // Reserve provided IPs first. if (reserveArgs->ipv4_addrs_size()) { reserveArgs->set_hostname(slaveInfo.hostname()); reserveArgs->set_uid(uid); reserveArgs->mutable_netgroups()->CopyFrom(networkInfo.groups()); LOG(INFO) << "Sending IP reserve command to IPAM"; Try<IPAMResponse> response = runCommand<IPAMReserveIPMessage, IPAMResponse>( ipamClientPath, reserveMessage); if (response.isError()) { return Failure("Error reserving IPs with IPAM: " + response.error()); } string addresses = ""; foreach (const string& addr, reserveArgs->ipv4_addrs()) { addresses = addresses + addr + " "; allAddresses.push_back(addr); } LOG(INFO) << "IP(s) " << addresses << "reserved with IPAM"; }