Exemplo n.º 1
0
Future<Option<ContainerLaunchInfo>> PosixFilesystemIsolatorProcess::prepare(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  if (infos.contains(containerId)) {
    return Failure("Container has already been prepared");
  }

  const ExecutorInfo& executorInfo = containerConfig.executor_info();

  if (executorInfo.has_container()) {
    CHECK_EQ(executorInfo.container().type(), ContainerInfo::MESOS);

    // Return failure if the container change the filesystem root
    // because the symlinks will become invalid in the new root.
    if (executorInfo.container().mesos().has_image()) {
      return Failure("Container root filesystems not supported");
    }

    if (executorInfo.container().volumes().size() > 0) {
      return Failure("Volumes in ContainerInfo is not supported");
    }
  }

  infos.put(containerId, Owned<Info>(new Info(containerConfig.directory())));

  return update(containerId, executorInfo.resources())
      .then([]() -> Future<Option<ContainerLaunchInfo>> { return None(); });
}
Exemplo n.º 2
0
// static
int DbWrapper::copySecondary(DB_ENV *env,
			     const std::string &oldcname,
			     const std::string &newcname,
			     const std::string &prefix,
			     const std::string &dbname,
			     bool duplicates)
{
	int err;
	DbWrapper olddb(env, oldcname, prefix, dbname, DEFAULT_CONFIG);
	ContainerConfig config;
	config.setReadOnly(true);
	err = olddb.open(0, DB_BTREE, config);
	if (err) {
		// ignore ENOENT -- it's OK sometimes
		if (err == ENOENT)
			err = 0;
		return err;
	}
	config.setReadOnly(false);
	config.setPageSize(olddb.getPageSize());
	DbWrapper newdb(env, newcname, prefix, dbname,
			config);
	if (duplicates) {
		err = newdb.getDb()->set_flags(newdb.getDb(), DB_DUP|DB_DUPSORT);
		if (err)
			throw XmlException(err);
	}
	err = newdb.open(0, DB_BTREE, DB_CREATE|DB_EXCL);
	if (err == 0)
		err = newdb.copy(olddb, DB_KEYFIRST);
	return err;
}
Exemplo n.º 3
0
TYPED_TEST(MemIsolatorTest, MemUsage)
{
  slave::Flags flags;

  Try<Isolator*> isolator = TypeParam::create(flags);
  CHECK_SOME(isolator);

  ExecutorInfo executorInfo;
  executorInfo.mutable_resources()->CopyFrom(
      Resources::parse("mem:1024").get());

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  // Use a relative temporary directory so it gets cleaned up
  // automatically with the test.
  Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
  ASSERT_SOME(dir);

  ContainerConfig containerConfig;
  containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
  containerConfig.set_directory(dir.get());

  AWAIT_READY(isolator.get()->prepare(
      containerId,
      containerConfig));

  MemoryTestHelper helper;
  ASSERT_SOME(helper.spawn());
  ASSERT_SOME(helper.pid());

  // Set up the reaper to wait on the subprocess.
  Future<Option<int>> status = process::reap(helper.pid().get());

  // Isolate the subprocess.
  AWAIT_READY(isolator.get()->isolate(containerId, helper.pid().get()));

  const Bytes allocation = Megabytes(128);
  EXPECT_SOME(helper.increaseRSS(allocation));

  Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);

  EXPECT_GE(usage.get().mem_rss_bytes(), allocation.bytes());

  // Ensure the process is killed.
  helper.cleanup();

  // Make sure the subprocess was reaped.
  AWAIT_READY(status);

  // Let the isolator clean up.
  AWAIT_READY(isolator.get()->cleanup(containerId));

  delete isolator.get();
}
Exemplo n.º 4
0
StructuralStatsDatabase::StructuralStatsDatabase()
	: db_(0, "", "", "", DEFAULT_CONFIG),
	  inMemory_(true)
{
	ContainerConfig config;
	config.setAllowCreate(true);
	int err = db_.open(0, DB_BTREE, config);

	if (err != 0) throw XmlException(err);
}
Exemplo n.º 5
0
ContainerConfig::ContainerConfig(const ContainerConfig &o)
: mode_(o.getMode()), dbOpenFlags_(o.getDbOpenFlags()), dbSetFlags_(o.getDbSetFlags()), 
	seqFlags_(o.getSeqFlags()), xmlFlags_(o.getXmlFlags()), 
	type_(o.getContainerType()), compressionName_(o.getCompressionName()), mgr_(0),
	  pageSize_(o.getPageSize()), sequenceIncr_(o.getSequenceIncrement()),
	  containerOwned_(false)
{}
Exemplo n.º 6
0
Future<Option<ContainerLaunchInfo>> CgroupsNetClsIsolatorProcess::prepare(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  if (infos.contains(containerId)) {
    return Failure("Container has already been prepared");
  }

  // Use this info to create the cgroup, but do not insert it into
  // infos till the cgroup has been created successfully.
  Info info(path::join(flags.cgroups_root, containerId.value()));

  // Create a cgroup for this container.
  Try<bool> exists = cgroups::exists(hierarchy, info.cgroup);
  if (exists.isError()) {
    return Failure("Failed to check if the cgroup already exists: " +
                   exists.error());
  } else if (exists.get()) {
    return Failure("The cgroup already exists");
  }

  Try<Nothing> create = cgroups::create(hierarchy, info.cgroup);
  if (create.isError()) {
    return Failure("Failed to create the cgroup: " + create.error());
  }

  // 'chown' the cgroup so the executor can create nested cgroups. Do
  // not recurse so the control files are still owned by the slave
  // user and thus cannot be changed by the executor.
  if (containerConfig.has_user()) {
    Try<Nothing> chown = os::chown(
        containerConfig.user(),
        path::join(hierarchy, info.cgroup),
        false);

    if (chown.isError()) {
      return Failure("Failed to change ownership of cgroup hierarchy: " +
                     chown.error());
    }
  }

  infos.emplace(containerId, info);

  return update(containerId, containerConfig.executorinfo().resources())
    .then([]() -> Future<Option<ContainerLaunchInfo>> {
      return None();
    });
}
Exemplo n.º 7
0
void ContainerConfig::setFlags(const ContainerConfig &flags)
{
	setTransactional(flags.getTransactional());
	setIndexNodes(flags.getIndexNodes());
	setEncrypted(flags.getEncrypted());
	setStatistics(flags.getStatistics());
	setAllowValidation(flags.getAllowValidation());
	setChecksum(flags.getChecksum());
	setDbOpenFlags(flags.getDbOpenFlags());
	setDbSetFlags(flags.getDbSetFlags());
	setSeqFlags(flags.getSeqFlags());
}
Exemplo n.º 8
0
Future<Option<ContainerLaunchInfo>> LinuxFilesystemIsolatorProcess::prepare(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  const string& directory = containerConfig.directory();

  Option<string> user;
  if (containerConfig.has_user()) {
    user = containerConfig.user();
  }

  if (infos.contains(containerId)) {
    return Failure("Container has already been prepared");
  }

  Owned<Info> info(new Info(
      directory,
      containerConfig.executor_info()));

  infos.put(containerId, info);

  ContainerLaunchInfo launchInfo;
  launchInfo.set_namespaces(CLONE_NEWNS);

  // Prepare the commands that will be run in the container's mount
  // namespace right after forking the executor process. We use these
  // commands to mount those volumes specified in the container info
  // so that they don't pollute the host mount namespace.
  Try<string> _script = script(containerId, containerConfig);
  if (_script.isError()) {
    return Failure("Failed to generate isolation script: " + _script.error());
  }

  CommandInfo* command = launchInfo.add_commands();
  command->set_value(_script.get());

  return update(containerId, containerConfig.executor_info().resources())
    .then([launchInfo]() -> Future<Option<ContainerLaunchInfo>> {
      return launchInfo;
    });
}
Exemplo n.º 9
0
DocumentDatabase::DocumentDatabase(DB_ENV *env, Transaction *txn,
				   const std::string &name,
				   const ContainerConfig &config,
				   XmlCompression *compression)
	: environment_(env),
	  name_(name),
	  type_(config.getContainerType()),
	  content_(env, name, "content_", document_name,
		   DEFAULT_CONFIG),
	  secondary_(env, name, document_name, DEFAULT_CONFIG),
	  compressor_(compression)
{
	open(txn, config);
}
Exemplo n.º 10
0
DbWrapper::DbWrapper(DB_ENV *environment, const std::string &name,
		     const std::string &prefixName,
		     const std::string &databaseName,
		     const ContainerConfig &config)
	: flags_(0),
	  name_(name),
	  prefixName_(prefixName),
	  databaseName_(databaseName),
	  pageSize_(config.getPageSize()),
	  db_(0),
	  environment_(environment),
	  readCursor_(0),
	  writeCursor_(0)
{
	int ret = db_create(&db_, environment, 0);
	if (ret != 0)
		throw XmlException(ret);
	if (environment_)
		setFlags(environment_);
}
Exemplo n.º 11
0
DictionaryDatabase::DictionaryDatabase(DB_ENV *env, Transaction *txn,
				       const std::string &name,
				       const ContainerConfig &config,
				       bool useMutex)
	: stringCache_(true),
	  environment_(env),
	  name_(name),
	  primary_(new PrimaryDatabase(env, name,
				       dictionary_name,
				       DEFAULT_CONFIG)),
	  secondary_(new SecondaryDatabase(env, name,
					   dictionary_name,
					   DEFAULT_CONFIG)),
	  nidName_(0),
	  nidRoot_(0),
	  usePreloads_(false),
	  isTransacted_(txn ? true : false),
	  mutex_(useMutex ? MutexLock::createMutex() : 0)
{
	cache_.setDictionaryDatabase(this);
	if (!isTransacted_ && env) {
		u_int32_t envFlags;
		env->get_flags(env, &envFlags);
		if (envFlags & DB_CDB_ALLDB)
			isTransacted_ = true;
	}
	int err = 0;
	try {
		// set cache sizes low if no DB_ENV -- this is the in-memory
		// dictionary for transient docs
		if (!env) {
			primary_->getDb()->set_cachesize(primary_->getDb(), 0, dictCacheBytes, 1);
			secondary_->getDb()->set_cachesize(secondary_->getDb(), 0, dictCacheBytes, 1);
		}

		// Open the Db objects
		err = primary_->open(txn, config);
		if (err == 0)
			err = secondary_->open(txn, /*duplicates*/true, config);

		if (err == 0) {
			// Lookup/Define the dbxml namespace names (but don't define
			// if this is a read-only container)
			bool rdonly = config.getReadOnly();
			preloadDictionary(txn, rdonly);
		}

	} catch (XmlException &) {
		secondary_->cleanup();
		primary_->cleanup();
		if (txn)
			txn->abort();
		throw;
	}
	if (err != 0) {
		secondary_->cleanup();
		primary_->cleanup();
		if (txn)
			txn->abort();
		string msg = name;
		if (err == EEXIST) {
			msg += ": container exists";
			throw XmlException(
				XmlException::CONTAINER_EXISTS, msg);
		} else if (err == ENOENT) {
			msg += ": container file not found, or not a container";
			throw XmlException(XmlException::CONTAINER_NOT_FOUND,
					   msg);
		}
		throw XmlException(err);
	}
}
Exemplo n.º 12
0
// A test to verify the number of processes and threads in a
// container.
TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Pids_and_Tids)
{
  slave::Flags flags;
  flags.cgroups_cpu_enable_pids_and_tids_count = true;

  Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags);
  CHECK_SOME(isolator);

  Try<Launcher*> launcher = LinuxLauncher::create(flags);
  CHECK_SOME(launcher);

  ExecutorInfo executorInfo;
  executorInfo.mutable_resources()->CopyFrom(
      Resources::parse("cpus:0.5;mem:512").get());

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  // Use a relative temporary directory so it gets cleaned up
  // automatically with the test.
  Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
  ASSERT_SOME(dir);

  ContainerConfig containerConfig;
  containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
  containerConfig.set_directory(dir.get());

  Future<Option<ContainerLaunchInfo>> prepare =
    isolator.get()->prepare(
        containerId,
        containerConfig);

  AWAIT_READY(prepare);

  // Right after the creation of the cgroup, which happens in
  // 'prepare', we check that it is empty.
  Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);
  EXPECT_EQ(0U, usage.get().processes());
  EXPECT_EQ(0U, usage.get().threads());

  int pipes[2];
  ASSERT_NE(-1, ::pipe(pipes));

  vector<string> argv(1);
  argv[0] = "cat";

  Try<pid_t> pid = launcher.get()->fork(
      containerId,
      "cat",
      argv,
      Subprocess::FD(STDIN_FILENO),
      Subprocess::FD(STDOUT_FILENO),
      Subprocess::FD(STDERR_FILENO),
      None(),
      None(),
      lambda::bind(&childSetup, pipes),
      prepare.get().isSome() ? prepare.get().get().namespaces() : 0);

  ASSERT_SOME(pid);

  // Reap the forked child.
  Future<Option<int>> status = process::reap(pid.get());

  // Continue in the parent.
  ASSERT_SOME(os::close(pipes[0]));

  // Before isolation, the cgroup is empty.
  usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);
  EXPECT_EQ(0U, usage.get().processes());
  EXPECT_EQ(0U, usage.get().threads());

  // Isolate the forked child.
  AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));

  // After the isolation, the cgroup is not empty, even though the
  // process hasn't exec'd yet.
  usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);
  EXPECT_EQ(1U, usage.get().processes());
  EXPECT_EQ(1U, usage.get().threads());

  // Now signal the child to continue.
  char dummy;
  ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy)));

  ASSERT_SOME(os::close(pipes[1]));

  // Process count should be 1 since 'sleep' is still sleeping.
  usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);
  EXPECT_EQ(1U, usage.get().processes());
  EXPECT_EQ(1U, usage.get().threads());

  // Ensure all processes are killed.
  AWAIT_READY(launcher.get()->destroy(containerId));

  // Wait for the command to complete.
  AWAIT_READY(status);

  // After the process is killed, the cgroup should be empty again.
  usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);
  EXPECT_EQ(0U, usage.get().processes());
  EXPECT_EQ(0U, usage.get().threads());

  // Let the isolator clean up.
  AWAIT_READY(isolator.get()->cleanup(containerId));

  delete isolator.get();
  delete launcher.get();
}
Exemplo n.º 13
0
// This test verifies that we can successfully launch a container with
// a big (>= 10 cpus) cpu quota. This is to catch the regression
// observed in MESOS-1049.
// TODO(vinod): Revisit this if/when the isolator restricts the number
// of cpus that an executor can use based on the slave cpus.
TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Big_Quota)
{
  slave::Flags flags;

  // Enable CFS to cap CPU utilization.
  flags.cgroups_enable_cfs = true;

  Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags);
  CHECK_SOME(isolator);

  Try<Launcher*> launcher = LinuxLauncher::create(flags);
  CHECK_SOME(launcher);

  // Set the executor's resources to 100.5 cpu.
  ExecutorInfo executorInfo;
  executorInfo.mutable_resources()->CopyFrom(
      Resources::parse("cpus:100.5").get());

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  // Use a relative temporary directory so it gets cleaned up
  // automatically with the test.
  Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
  ASSERT_SOME(dir);

  ContainerConfig containerConfig;
  containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
  containerConfig.set_directory(dir.get());

  Future<Option<ContainerLaunchInfo>> prepare =
    isolator.get()->prepare(
        containerId,
        containerConfig);

  AWAIT_READY(prepare);

  int pipes[2];
  ASSERT_NE(-1, ::pipe(pipes));

  vector<string> argv(3);
  argv[0] = "sh";
  argv[1] = "-c";
  argv[2] = "exit 0";

  Try<pid_t> pid = launcher.get()->fork(
      containerId,
      "sh",
      argv,
      Subprocess::FD(STDIN_FILENO),
      Subprocess::FD(STDOUT_FILENO),
      Subprocess::FD(STDERR_FILENO),
      None(),
      None(),
      lambda::bind(&childSetup, pipes),
      prepare.get().isSome() ? prepare.get().get().namespaces() : 0);

  ASSERT_SOME(pid);

  // Reap the forked child.
  Future<Option<int> > status = process::reap(pid.get());

  // Continue in the parent.
  ASSERT_SOME(os::close(pipes[0]));

  // Isolate the forked child.
  AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));

  // Now signal the child to continue.
  char dummy;
  ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy)));

  ASSERT_SOME(os::close(pipes[1]));

  // Wait for the command to complete successfully.
  AWAIT_READY(status);
  ASSERT_SOME_EQ(0, status.get());

  // Ensure all processes are killed.
  AWAIT_READY(launcher.get()->destroy(containerId));

  // Let the isolator clean up.
  AWAIT_READY(isolator.get()->cleanup(containerId));

  delete isolator.get();
  delete launcher.get();
}
Exemplo n.º 14
0
TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Enable_Cfs)
{
  slave::Flags flags;

  // Enable CFS to cap CPU utilization.
  flags.cgroups_enable_cfs = true;

  Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags);
  CHECK_SOME(isolator);

  Try<Launcher*> launcher = LinuxLauncher::create(flags);
  CHECK_SOME(launcher);

  // Set the executor's resources to 0.5 cpu.
  ExecutorInfo executorInfo;
  executorInfo.mutable_resources()->CopyFrom(
      Resources::parse("cpus:0.5").get());

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  // Use a relative temporary directory so it gets cleaned up
  // automatically with the test.
  Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
  ASSERT_SOME(dir);

  ContainerConfig containerConfig;
  containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
  containerConfig.set_directory(dir.get());

  Future<Option<ContainerLaunchInfo>> prepare =
    isolator.get()->prepare(
        containerId,
        containerConfig);

  AWAIT_READY(prepare);

  // Generate random numbers to max out a single core. We'll run this for 0.5
  // seconds of wall time so it should consume approximately 250 ms of total
  // cpu time when limited to 0.5 cpu. We use /dev/urandom to prevent blocking
  // on Linux when there's insufficient entropy.
  string command = "cat /dev/urandom > /dev/null & "
    "export MESOS_TEST_PID=$! && "
    "sleep 0.5 && "
    "kill $MESOS_TEST_PID";

  int pipes[2];
  ASSERT_NE(-1, ::pipe(pipes));

  vector<string> argv(3);
  argv[0] = "sh";
  argv[1] = "-c";
  argv[2] = command;

  Try<pid_t> pid = launcher.get()->fork(
      containerId,
      "sh",
      argv,
      Subprocess::FD(STDIN_FILENO),
      Subprocess::FD(STDOUT_FILENO),
      Subprocess::FD(STDERR_FILENO),
      None(),
      None(),
      lambda::bind(&childSetup, pipes),
      prepare.get().isSome() ? prepare.get().get().namespaces() : 0);

  ASSERT_SOME(pid);

  // Reap the forked child.
  Future<Option<int> > status = process::reap(pid.get());

  // Continue in the parent.
  ASSERT_SOME(os::close(pipes[0]));

  // Isolate the forked child.
  AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));

  // Now signal the child to continue.
  char dummy;
  ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy)));

  ASSERT_SOME(os::close(pipes[1]));

  // Wait for the command to complete.
  AWAIT_READY(status);

  Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
  AWAIT_READY(usage);

  // Expect that no more than 300 ms of cpu time has been consumed. We also
  // check that at least 50 ms of cpu time has been consumed so this test will
  // fail if the host system is very heavily loaded. This behavior is correct
  // because under such conditions we aren't actually testing the CFS cpu
  // limiter.
  double cpuTime = usage.get().cpus_system_time_secs() +
                   usage.get().cpus_user_time_secs();

  EXPECT_GE(0.30, cpuTime);
  EXPECT_LE(0.05, cpuTime);

  // Ensure all processes are killed.
  AWAIT_READY(launcher.get()->destroy(containerId));

  // Let the isolator clean up.
  AWAIT_READY(isolator.get()->cleanup(containerId));

  delete isolator.get();
  delete launcher.get();
}
Exemplo n.º 15
0
TEST_F(RevocableCpuIsolatorTest, ROOT_CGROUPS_RevocableCpu)
{
  slave::Flags flags;

  Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags);
  CHECK_SOME(isolator);

  Try<Launcher*> launcher = PosixLauncher::create(flags);

  // Include revocable CPU in the executor's resources.
  Resource cpu = Resources::parse("cpus", "1", "*").get();
  cpu.mutable_revocable();

  ExecutorInfo executorInfo;
  executorInfo.add_resources()->CopyFrom(cpu);

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  ContainerConfig containerConfig;
  containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
  containerConfig.set_directory(os::getcwd());

  AWAIT_READY(isolator.get()->prepare(
        containerId,
        containerConfig));

  vector<string> argv{"sleep", "100"};

  Try<pid_t> pid = launcher.get()->fork(
      containerId,
      "/bin/sleep",
      argv,
      Subprocess::PATH("/dev/null"),
      Subprocess::PATH("/dev/null"),
      Subprocess::PATH("/dev/null"),
      None(),
      None(),
      None(),
      None());

  ASSERT_SOME(pid);

  AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));

  // Executor should have proper cpu.shares for revocable containers.
  Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
  ASSERT_SOME(cpuHierarchy);

  Result<string> cpuCgroup = cgroups::cpu::cgroup(pid.get());
  ASSERT_SOME(cpuCgroup);

  EXPECT_SOME_EQ(
      CPU_SHARES_PER_CPU_REVOCABLE,
      cgroups::cpu::shares(cpuHierarchy.get(), cpuCgroup.get()));

  // Kill the container and clean up.
  Future<Option<int>> status = process::reap(pid.get());

  AWAIT_READY(launcher.get()->destroy(containerId));

  AWAIT_READY(status);

  AWAIT_READY(isolator.get()->cleanup(containerId));

  delete isolator.get();
  delete launcher.get();
}
Exemplo n.º 16
0
  Future<bool> launch(
      const ContainerID& containerId,
      const ContainerConfig& containerConfig,
      const map<string, string>& environment,
      const Option<string>& pidCheckpointPath)
  {
    CHECK(!terminatedContainers.contains(containerId))
      << "Failed to launch nested container " << containerId
      << " for executor '" << containerConfig.executor_info().executor_id()
      << "' of framework " << containerConfig.executor_info().framework_id()
      << " because this ContainerID is being re-used with"
      << " a previously terminated container";

    CHECK(!containers_.contains(containerId))
      << "Failed to launch container " << containerId
      << " for executor '" << containerConfig.executor_info().executor_id()
      << "' of framework " << containerConfig.executor_info().framework_id()
      << " because it is already launched";

    containers_[containerId] = Owned<ContainerData>(new ContainerData());

    if (containerId.has_parent()) {
      // Launching a nested container via the test containerizer is a
      // no-op for now.
      return true;
    }

    CHECK(executors.contains(containerConfig.executor_info().executor_id()))
      << "Failed to launch executor '"
      << containerConfig.executor_info().executor_id()
      << "' of framework " << containerConfig.executor_info().framework_id()
      << " because it is unknown to the containerizer";

    containers_.at(containerId)->executorId =
      containerConfig.executor_info().executor_id();

    containers_.at(containerId)->frameworkId =
      containerConfig.executor_info().framework_id();

    // We need to synchronize all reads and writes to the environment
    // as this is global state.
    //
    // TODO(jmlvanre): Even this is not sufficient, as other aspects
    // of the code may read an environment variable while we are
    // manipulating it. The better solution is to pass the environment
    // variables into the fork, or to set them on the command line.
    // See MESOS-3475.
    static std::mutex mutex;

    synchronized(mutex) {
      // Since the constructor for `MesosExecutorDriver` reads
      // environment variables to load flags, even it needs to
      // be within this synchronization section.
      //
      // Prepare additional environment variables for the executor.
      // TODO(benh): Need to get flags passed into the TestContainerizer
      // in order to properly use here.
      slave::Flags flags;
      flags.recovery_timeout = Duration::zero();

      // We need to save the original set of environment variables so we
      // can reset the environment after calling 'driver->start()' below.
      hashmap<string, string> original = os::environment();

      foreachpair (const string& name, const string& variable, environment) {
        os::setenv(name, variable);
      }

      // TODO(benh): Can this be removed and done exclusively in the
      // 'executorEnvironment()' function? There are other places in the
      // code where we do this as well and it's likely we can do this once
      // in 'executorEnvironment()'.
      foreach (const Environment::Variable& variable,
               containerConfig.executor_info()
                 .command().environment().variables()) {
        os::setenv(variable.name(), variable.value());
      }

      os::setenv("MESOS_LOCAL", "1");

      const Owned<ExecutorData>& executorData =
        executors.at(containerConfig.executor_info().executor_id());

      if (executorData->executor != nullptr) {
        executorData->driver = Owned<MesosExecutorDriver>(
            new MesosExecutorDriver(executorData->executor));
        executorData->driver->start();
      } else {
        shared_ptr<v1::MockHTTPExecutor> executor =
          executorData->v1ExecutorMock;
        executorData->v1Library = Owned<v1::executor::TestMesos>(
          new v1::executor::TestMesos(ContentType::PROTOBUF, executor));
      }

      os::unsetenv("MESOS_LOCAL");

      // Unset the environment variables we set by resetting them to their
      // original values and also removing any that were not part of the
      // original environment.
      foreachpair (const string& name, const string& value, original) {
        os::setenv(name, value);
      }
Exemplo n.º 17
0
// upgrade from 2.1 (version 4) to 2.2 (version 5)
// Modify keys for both document secondary and content (if wholedoc storage)
// secondary key: docId, nameId, type
// content key: docId
// for both:
// 1.  open old DB and create a new database
// 2.  for each record:
//    o unmarshal old format; marshal in new, and put into new DB
static int upgrade22(const std::string &name,
		     const std::string &tname, Manager &mgr)
{
	int err = 0;
	SecondaryDatabase secondary(mgr.getDB_ENV(), name,
				    document_name, DEFAULT_CONFIG);
	err = secondary.open(0, /*duplicates*/false, DEFAULT_CONFIG);
	ContainerConfig config;
	config.setPageSize(secondary.getPageSize());
	SecondaryDatabase newSec(mgr.getDB_ENV(), tname,
				 document_name, config);
	err = newSec.open(0, /*duplicates*/false, CREATE_CONFIG);
	// the key is: docId,nodeId,type
	DbXml::DbXmlDbt key;
	DbXml::DbXmlDbt data;
	Cursor curs(secondary, (Transaction*)0, DbXml::CURSOR_READ);
	int ret = 0;
	while ((err == 0) &&
	       (ret = curs.get(key, data, DB_NEXT)) == 0) {
		// decode key using pre-2.2 code
		DocID did;
		NameID nid;
		XmlValue::Type type;
		u_int32_t *p = (u_int32_t *)key.data;
		u_int32_t id1, id2;
		id1 = *p++;
		id2 = *p++;
		if (Manager::isBigendian()) {
			M_32_SWAP(id1);
			M_32_SWAP(id2);  
		}
		did = id1;            
		nid = id2;
		type = (XmlValue::Type) * ((unsigned char*)p);
		// encode using 2.2. code
		DbtOut newKey;
		MetaDatum::setKeyDbt(did, nid, type, newKey);
		err = newSec.put(0, &newKey, &data, 0);
	}
	curs.close();
	secondary.close(0);
	newSec.close(0);
	if (err == 0) {
		DbWrapper content(mgr.getDB_ENV(), name,
				  "content_",
				  document_name, DEFAULT_CONFIG);
		err = content.open(0, DB_BTREE, DEFAULT_CONFIG);
		if (err != 0) {
			// ignore ENOENT
			if (err == ENOENT)
				err = 0;
			return err;
		}
		config.setPageSize(content.getPageSize());
		DbWrapper newContent(mgr.getDB_ENV(), tname,
				     "content_", document_name,
				     config);
		err = newContent.open(0, DB_BTREE,
				      CREATE_CONFIG);
		if (err != 0)
			return err;
		// the key is: docId
		DbXml::DbXmlDbt key;
		DbXml::DbXmlDbt data;
		Cursor curs1(content, (Transaction*)0, DbXml::CURSOR_READ);
		int ret = 0;
		while ((err == 0) &&
		       (ret = curs1.get(key, data, DB_NEXT)) == 0) {
			// decode/encode key, rewrite
			u_int32_t *p = (u_int32_t *)key.data;
			u_int32_t id1;
			id1 = *p;
			if (Manager::isBigendian())
				M_32_SWAP(id1);
			DocID id(id1);
			DbtOut newKey;
			id.setDbtFromThis(newKey);
			err = newContent.put(0, &newKey, &data, 0);
		}
		curs1.close();
		content.close(0);
		newContent.close(0);
	}
	return err;
}
Exemplo n.º 18
0
TYPED_TEST(CpuIsolatorTest, SystemCpuUsage)
{
  slave::Flags flags;

  Try<Isolator*> isolator = TypeParam::create(flags);
  CHECK_SOME(isolator);

  // A PosixLauncher is sufficient even when testing a cgroups isolator.
  Try<Launcher*> launcher = PosixLauncher::create(flags);

  ExecutorInfo executorInfo;
  executorInfo.mutable_resources()->CopyFrom(
      Resources::parse("cpus:1.0").get());

  ContainerID containerId;
  containerId.set_value(UUID::random().toString());

  // Use a relative temporary directory so it gets cleaned up
  // automatically with the test.
  Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
  ASSERT_SOME(dir);

  ContainerConfig containerConfig;
  containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
  containerConfig.set_directory(dir.get());

  AWAIT_READY(isolator.get()->prepare(
      containerId,
      containerConfig));

  const string& file = path::join(dir.get(), "mesos_isolator_test_ready");

  // Generating random numbers is done by the kernel and will max out a single
  // core and run almost exclusively in the kernel, i.e., system time.
  string command = "cat /dev/urandom > /dev/null & "
    "touch " + file + "; " // Signals the command is running.
    "sleep 60";

  int pipes[2];
  ASSERT_NE(-1, ::pipe(pipes));

  vector<string> argv(3);
  argv[0] = "sh";
  argv[1] = "-c";
  argv[2] = command;

  Try<pid_t> pid = launcher.get()->fork(
      containerId,
      "sh",
      argv,
      Subprocess::FD(STDIN_FILENO),
      Subprocess::FD(STDOUT_FILENO),
      Subprocess::FD(STDERR_FILENO),
      None(),
      None(),
      lambda::bind(&childSetup, pipes),
      None());

  ASSERT_SOME(pid);

  // Reap the forked child.
  Future<Option<int> > status = process::reap(pid.get());

  // Continue in the parent.
  ASSERT_SOME(os::close(pipes[0]));

  // Isolate the forked child.
  AWAIT_READY(isolator.get()->isolate(containerId, pid.get()));

  // Now signal the child to continue.
  char dummy;
  ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy)));

  ASSERT_SOME(os::close(pipes[1]));

  // Wait for the command to start.
  while (!os::exists(file));

  // Wait up to 1 second for the child process to induce 1/8 of a second of
  // system cpu time.
  ResourceStatistics statistics;
  Duration waited = Duration::zero();
  do {
    Future<ResourceStatistics> usage = isolator.get()->usage(containerId);
    AWAIT_READY(usage);

    statistics = usage.get();

    // If we meet our usage expectations, we're done!
    if (statistics.cpus_system_time_secs() >= 0.125) {
      break;
    }

    os::sleep(Milliseconds(200));
    waited += Milliseconds(200);
  } while (waited < Seconds(1));

  EXPECT_LE(0.125, statistics.cpus_system_time_secs());

  // Ensure all processes are killed.
  AWAIT_READY(launcher.get()->destroy(containerId));

  // Make sure the child was reaped.
  AWAIT_READY(status);

  // Let the isolator clean up.
  AWAIT_READY(isolator.get()->cleanup(containerId));

  delete isolator.get();
  delete launcher.get();
}
Exemplo n.º 19
0
Try<string> LinuxFilesystemIsolatorProcess::script(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  ostringstream out;
  out << "#!/bin/sh\n";
  out << "set -x -e\n";

  // Make sure mounts in the container mount namespace do not
  // propagate back to the host mount namespace.
  // NOTE: We cannot simply run `mount --make-rslave /`, for more info
  // please refer to comments in mount.hpp.
  MesosContainerizerMount::Flags mountFlags;
  mountFlags.operation = MesosContainerizerMount::MAKE_RSLAVE;
  mountFlags.path = "/";
  out << path::join(flags.launcher_dir, "mesos-containerizer") << " "
      << MesosContainerizerMount::NAME << " "
      << stringify(mountFlags) << "\n";

  if (!containerConfig.executor_info().has_container()) {
    return out.str();
  }

  // Bind mount the sandbox if the container specifies a rootfs.
  if (containerConfig.has_rootfs()) {
    string sandbox = path::join(
        containerConfig.rootfs(),
        flags.sandbox_directory);

    Try<Nothing> mkdir = os::mkdir(sandbox);
    if (mkdir.isError()) {
      return Error(
          "Failed to create sandbox mount point at '" +
          sandbox + "': " + mkdir.error());
    }

    out << "mount -n --rbind '" << containerConfig.directory()
        << "' '" << sandbox << "'\n";
  }

  foreach (const Volume& volume,
           containerConfig.executor_info().container().volumes()) {
    // NOTE: Volumes with source will be handled by the corresponding
    // isolators (e.g., docker/volume).
    if (volume.has_source()) {
      VLOG(1) << "Ignored a volume with source for container '"
              << containerId << "'";
      continue;
    }

    if (!volume.has_host_path()) {
      return Error("A volume misses 'host_path'");
    }

    // If both 'host_path' and 'container_path' are relative paths,
    // return an error because the user can just directly access the
    // volume in the work directory.
    if (!strings::startsWith(volume.host_path(), "/") &&
        !strings::startsWith(volume.container_path(), "/")) {
      return Error(
          "Both 'host_path' and 'container_path' of a volume are relative");
    }

    // Determine the source of the mount.
    string source;

    if (strings::startsWith(volume.host_path(), "/")) {
      source = volume.host_path();

      // An absolute path must already exist.
      if (!os::exists(source)) {
        return Error("Absolute host path does not exist");
      }
    } else {
      // Path is interpreted as relative to the work directory.
      source = path::join(containerConfig.directory(), volume.host_path());

      // TODO(jieyu): We need to check that source resolves under the
      // work directory because a user can potentially use a container
      // path like '../../abc'.

      Try<Nothing> mkdir = os::mkdir(source);
      if (mkdir.isError()) {
        return Error(
            "Failed to create the source of the mount at '" +
            source + "': " + mkdir.error());
      }

      // TODO(idownes): Consider setting ownership and mode.
    }

    // Determine the target of the mount.
    string target;

    if (strings::startsWith(volume.container_path(), "/")) {
      if (containerConfig.has_rootfs()) {
        target = path::join(
            containerConfig.rootfs(),
            volume.container_path());

        Try<Nothing> mkdir = os::mkdir(target);
        if (mkdir.isError()) {
          return Error(
              "Failed to create the target of the mount at '" +
              target + "': " + mkdir.error());
        }
      } else {
        target = volume.container_path();

        // An absolute path must already exist. This is because we
        // want to avoid creating mount points outside the work
        // directory in the host filesystem.
        if (!os::exists(target)) {
          return Error("Absolute container path does not exist");
        }
      }

      // TODO(jieyu): We need to check that target resolves under
      // 'rootfs' because a user can potentially use a container path
      // like '/../../abc'.
    } else {
      if (containerConfig.has_rootfs()) {
        target = path::join(containerConfig.rootfs(),
                            flags.sandbox_directory,
                            volume.container_path());
      } else {
        target = path::join(containerConfig.directory(),
                            volume.container_path());
      }

      // TODO(jieyu): We need to check that target resolves under the
      // sandbox because a user can potentially use a container path
      // like '../../abc'.

      // NOTE: We cannot create the mount point at 'target' if
      // container has rootfs defined. The bind mount of the sandbox
      // will hide what's inside 'target'. So we should always create
      // the mount point in 'directory'.
      string mountPoint = path::join(
          containerConfig.directory(),
          volume.container_path());

      Try<Nothing> mkdir = os::mkdir(mountPoint);
      if (mkdir.isError()) {
        return Error(
            "Failed to create the target of the mount at '" +
            mountPoint + "': " + mkdir.error());
      }
    }

    // TODO(jieyu): Consider the mode in the volume.
    out << "mount -n --rbind '" << source << "' '" << target << "'\n";
  }

  return out.str();
}
Exemplo n.º 20
0
process::Future<Option<ContainerLaunchInfo>> NetworkIsolatorProcess::prepare(
    const ContainerID& containerId,
    const ContainerConfig& containerConfig)
{
  LOG(INFO) << "NetworkIsolator::prepare for container: " << containerId;

  const ExecutorInfo executorInfo = containerConfig.executorinfo();
  if (!executorInfo.has_container()) {
    LOG(INFO) << "NetworkIsolator::prepare Ignoring request as "
              << "executorInfo.container is missing for container: "
              << containerId;
    return None();
  }

  if (executorInfo.container().network_infos().size() == 0) {
    LOG(INFO) << "NetworkIsolator::prepare Ignoring request as "
              << "executorInfo.container.network_infos is missing for "
              << "container: " << containerId;
    return None();
  }

  if (executorInfo.container().network_infos().size() > 1) {
    return Failure(
        "NetworkIsolator:: multiple NetworkInfos are not supported.");
  }

  NetworkInfo networkInfo = executorInfo.container().network_infos(0);

  if (networkInfo.has_protocol()) {
    return Failure(
      "NetworkIsolator: NetworkInfo.protocol is deprecated and unsupported.");
  }
  if (networkInfo.has_ip_address()) {
    return Failure(
      "NetworkIsolator: NetworkInfo.ip_address is deprecated and"
      " unsupported.");
  }

  string uid = UUID::random().toString();

  // Two IPAM commands:
  // 1) reserve for IPs the user has specifically asked for.
  // 2) auto-assign IPs.
  // Spin through all IPAddress messages once to get info for each command.
  // Then we'll issue each command if needed.
  IPAMReserveIPMessage reserveMessage;
  IPAMReserveIPMessage::Args* reserveArgs = reserveMessage.mutable_args();

  // Counter of IPs to auto assign.
  int numIPv4 = 0;
  foreach (const NetworkInfo::IPAddress& ipAddress, networkInfo.ip_addresses()) {
    if (ipAddress.has_ip_address() && ipAddress.has_protocol()) {
      return Failure("NetworkIsolator: Cannot include both ip_address and "
                     "protocol in a request.");
    }
    if (ipAddress.has_ip_address()) {
      // Store IP to attempt to reserve.
      reserveArgs->add_ipv4_addrs(ipAddress.ip_address());
    } else if (ipAddress.has_protocol() &&
               ipAddress.protocol() == NetworkInfo::IPv6){
      return Failure("NetworkIsolator: IPv6 is not supported at this time.");
    } else {
      // Either protocol is IPv4, or not included (in which case we default to
      // IPv4 anyway).
      numIPv4++;
    }
  }

  if (!(reserveArgs->ipv4_addrs_size() + numIPv4)) {
    return Failure(
      "NetworkIsolator: Container requires at least one IP address.");
  }

  // All the IP addresses, both reserved and allocated.
  vector<string> allAddresses;

  // Reserve provided IPs first.
  if (reserveArgs->ipv4_addrs_size()) {
    reserveArgs->set_hostname(slaveInfo.hostname());
    reserveArgs->set_uid(uid);
    reserveArgs->mutable_netgroups()->CopyFrom(networkInfo.groups());

    LOG(INFO) << "Sending IP reserve command to IPAM";
    Try<IPAMResponse> response =
      runCommand<IPAMReserveIPMessage, IPAMResponse>(
          ipamClientPath, reserveMessage);
    if (response.isError()) {
      return Failure("Error reserving IPs with IPAM: " + response.error());
    }

    string addresses = "";
    foreach (const string& addr, reserveArgs->ipv4_addrs()) {
      addresses = addresses + addr + " ";
      allAddresses.push_back(addr);
    }
    LOG(INFO) << "IP(s) " << addresses << "reserved with IPAM";
  }