// This test verifies that we can successfully launch a container with // a big (>= 10 cpus) cpu quota. This is to catch the regression // observed in MESOS-1049. // TODO(vinod): Revisit this if/when the isolator restricts the number // of cpus that an executor can use based on the slave cpus. TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Cfs_Big_Quota) { Flags flags; // Enable CFS to cap CPU utilization. flags.cgroups_enable_cfs = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); // Set the executor's resources to 100.5 cpu. ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:100.5").get()); ContainerID containerId; containerId.set_value("mesos_test_cfs_big_cpu_limit"); AWAIT_READY(isolator.get()->prepare(containerId, executorInfo)); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); lambda::function<int()> inChild = lambda::bind(&execute, "exit 0", pipes); Try<pid_t> pid = launcher.get()->fork(containerId, inChild); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ::close(pipes[0]); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. int buf; ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf))); ::close(pipes[1]); // Wait for the command to complete successfully. AWAIT_READY(status); ASSERT_SOME_EQ(0, status.get()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TYPED_TEST(MemIsolatorTest, MemUsage) { slave::Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("mem:1024").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); AWAIT_READY(isolator.get()->prepare( containerId, containerConfig)); MemoryTestHelper helper; ASSERT_SOME(helper.spawn()); ASSERT_SOME(helper.pid()); // Set up the reaper to wait on the subprocess. Future<Option<int>> status = process::reap(helper.pid().get()); // Isolate the subprocess. AWAIT_READY(isolator.get()->isolate(containerId, helper.pid().get())); const Bytes allocation = Megabytes(128); EXPECT_SOME(helper.increaseRSS(allocation)); Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_GE(usage.get().mem_rss_bytes(), allocation.bytes()); // Ensure the process is killed. helper.cleanup(); // Make sure the subprocess was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); }
TYPED_TEST(MemIsolatorTest, MemUsage) { Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); // A PosixLauncher is sufficient even when testing a cgroups isolator. Try<Launcher*> launcher = PosixLauncher::create(flags); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("mem:1024").get()); ContainerID containerId; containerId.set_value("memory_usage"); AWAIT_READY(isolator.get()->prepare(containerId, executorInfo)); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); lambda::function<int()> inChild = lambda::bind( &consumeMemory, Megabytes(256), Seconds(10), pipes); Try<pid_t> pid = launcher.get()->fork(containerId, inChild); ASSERT_SOME(pid); // Set up the reaper to wait on the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ::close(pipes[0]); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. int buf; ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf))); ::close(pipes[1]); // Wait up to 5 seconds for the child process to consume 256 MB of memory; ResourceStatistics statistics; Bytes threshold = Megabytes(256); Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.mem_rss_bytes() >= threshold.bytes()) { break; } os::sleep(Seconds(1)); waited += Seconds(1); } while (waited < Seconds(5)); EXPECT_LE(threshold.bytes(), statistics.mem_rss_bytes()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Make sure the child was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Cfs) { Flags flags; // Enable CFS to cap CPU utilization. flags.cgroups_enable_cfs = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); // Set the executor's resources to 0.5 cpu. ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:0.5").get()); ContainerID containerId; containerId.set_value("mesos_test_cfs_cpu_limit"); AWAIT_READY(isolator.get()->prepare(containerId, executorInfo)); // Generate random numbers to max out a single core. We'll run this for 0.5 // seconds of wall time so it should consume approximately 250 ms of total // cpu time when limited to 0.5 cpu. We use /dev/urandom to prevent blocking // on Linux when there's insufficient entropy. string command = "cat /dev/urandom > /dev/null & " "export MESOS_TEST_PID=$! && " "sleep 0.5 && " "kill $MESOS_TEST_PID"; int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); lambda::function<int()> inChild = lambda::bind(&execute, command, pipes); Try<pid_t> pid = launcher.get()->fork(containerId, inChild); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ::close(pipes[0]); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. int buf; ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf))); ::close(pipes[1]); // Wait for the command to complete. AWAIT_READY(status); Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); // Expect that no more than 300 ms of cpu time has been consumed. We also // check that at least 50 ms of cpu time has been consumed so this test will // fail if the host system is very heavily loaded. This behavior is correct // because under such conditions we aren't actually testing the CFS cpu // limiter. double cpuTime = usage.get().cpus_system_time_secs() + usage.get().cpus_user_time_secs(); EXPECT_GE(0.30, cpuTime); EXPECT_LE(0.05, cpuTime); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TYPED_TEST(CpuIsolatorTest, SystemCpuUsage) { Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); // A PosixLauncher is sufficient even when testing a cgroups isolator. Try<Launcher*> launcher = PosixLauncher::create(flags); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:1.0").get()); ContainerID containerId; containerId.set_value("system_cpu_usage"); AWAIT_READY(isolator.get()->prepare(containerId, executorInfo)); Try<string> dir = os::mkdtemp(); ASSERT_SOME(dir); const string& file = path::join(dir.get(), "mesos_isolator_test_ready"); // Generating random numbers is done by the kernel and will max out a single // core and run almost exclusively in the kernel, i.e., system time. string command = "cat /dev/urandom > /dev/null & " "touch " + file + "; " // Signals the command is running. "sleep 60"; int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); lambda::function<int()> inChild = lambda::bind(&execute, command, pipes); Try<pid_t> pid = launcher.get()->fork(containerId, inChild); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ::close(pipes[0]); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. int buf; ASSERT_LT(0, ::write(pipes[1], &buf, sizeof(buf))); ::close(pipes[1]); // Wait for the command to start. while (!os::exists(file)); // Wait up to 1 second for the child process to induce 1/8 of a second of // system cpu time. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.cpus_system_time_secs() >= 0.125) { break; } os::sleep(Milliseconds(200)); waited += Milliseconds(200); } while (waited < Seconds(1)); EXPECT_LE(0.125, statistics.cpus_system_time_secs()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Make sure the child was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); CHECK_SOME(os::rmdir(dir.get())); }
TYPED_TEST(MemIsolatorTest, MemUsage) { slave::Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); // A PosixLauncher is sufficient even when testing a cgroups isolator. Try<Launcher*> launcher = PosixLauncher::create(flags); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("mem:1024").get()); ContainerID containerId; containerId.set_value("memory_usage"); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); AWAIT_READY( isolator.get()->prepare(containerId, executorInfo, dir.get(), None())); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); Try<pid_t> pid = launcher.get()->fork( containerId, "/bin/sh", vector<string>(), Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&consumeMemory, Megabytes(256), Seconds(10), pipes)); ASSERT_SOME(pid); // Set up the reaper to wait on the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait up to 5 seconds for the child process to consume 256 MB of memory; ResourceStatistics statistics; Bytes threshold = Megabytes(256); Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.mem_rss_bytes() >= threshold.bytes()) { break; } os::sleep(Seconds(1)); waited += Seconds(1); } while (waited < Seconds(5)); EXPECT_LE(threshold.bytes(), statistics.mem_rss_bytes()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Make sure the child was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
// This test verifies that we can successfully launch a container with // a big (>= 10 cpus) cpu quota. This is to catch the regression // observed in MESOS-1049. // TODO(vinod): Revisit this if/when the isolator restricts the number // of cpus that an executor can use based on the slave cpus. TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Cfs_Big_Quota) { slave::Flags flags; // Enable CFS to cap CPU utilization. flags.cgroups_enable_cfs = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); // Set the executor's resources to 100.5 cpu. ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:100.5").get()); ContainerID containerId; containerId.set_value("mesos_test_cfs_big_cpu_limit"); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); AWAIT_READY( isolator.get()->prepare(containerId, executorInfo, dir.get(), None())); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(3); argv[0] = "sh"; argv[1] = "-c"; argv[2] = "exit 0"; Try<pid_t> pid = launcher.get()->fork( containerId, "/bin/sh", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes)); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait for the command to complete successfully. AWAIT_READY(status); ASSERT_SOME_EQ(0, status.get()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TYPED_TEST(CpuIsolatorTest, UserCpuUsage) { slave::Flags flags; Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); // A PosixLauncher is sufficient even when testing a cgroups isolator. Try<Launcher*> launcher = PosixLauncher::create(flags); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:1.0").get()); ContainerID containerId; containerId.set_value("user_cpu_usage"); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); AWAIT_READY( isolator.get()->prepare(containerId, executorInfo, dir.get(), None())); const string& file = path::join(dir.get(), "mesos_isolator_test_ready"); // Max out a single core in userspace. This will run for at most one second. string command = "while true ; do true ; done &" "touch " + file + "; " // Signals the command is running. "sleep 60"; int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(3); argv[0] = "sh"; argv[1] = "-c"; argv[2] = command; Try<pid_t> pid = launcher.get()->fork( containerId, "/bin/sh", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes)); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait for the command to start. while (!os::exists(file)); // Wait up to 1 second for the child process to induce 1/8 of a second of // user cpu time. ResourceStatistics statistics; Duration waited = Duration::zero(); do { Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); statistics = usage.get(); // If we meet our usage expectations, we're done! if (statistics.cpus_user_time_secs() >= 0.125) { break; } os::sleep(Milliseconds(200)); waited += Milliseconds(200); } while (waited < Seconds(1)); EXPECT_LE(0.125, statistics.cpus_user_time_secs()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Make sure the child was reaped. AWAIT_READY(status); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TYPED_TEST(UserCgroupIsolatorTest, ROOT_CGROUPS_UserCgroup) { slave::Flags flags; flags.perf_events = "cpu-cycles"; // Needed for CgroupsPerfEventIsolator. Try<Isolator*> isolator = TypeParam::create(flags); CHECK_SOME(isolator); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("mem:1024;cpus:1").get()); // For cpu/mem isolators. ContainerID containerId; containerId.set_value("container"); AWAIT_READY(isolator.get()->prepare( containerId, executorInfo, os::getcwd(), UNPRIVILEGED_USERNAME)); // Isolators don't provide a way to determine the cgroups they use // so we'll inspect the cgroups for an isolated dummy process. pid_t pid = fork(); if (pid == 0) { // Child just sleeps. ::sleep(100); ABORT("Child process should not reach here"); } ASSERT_GT(pid, 0); AWAIT_READY(isolator.get()->isolate(containerId, pid)); // Get the container's cgroups from /proc/$PID/cgroup. We're only // interested in the non-root cgroups, i.e., we exclude those with // paths "/", e.g., only cpu and cpuacct from this example: // 6:blkio:/ // 5:perf_event:/ // 4:memory:/ // 3:freezer:/ // 2:cpuacct:/mesos // 1:cpu:/mesos // awk will then output "cpuacct/mesos\ncpu/mesos" as the cgroup(s). ostringstream output; Try<int> status = os::shell( &output, "grep -v '/$' /proc/" + stringify(pid) + "/cgroup | awk -F ':' '{print $2$3}'"); ASSERT_SOME(status); // Kill the dummy child process. ::kill(pid, SIGKILL); int exitStatus; EXPECT_NE(-1, ::waitpid(pid, &exitStatus, 0)); vector<string> cgroups = strings::tokenize(output.str(), "\n"); ASSERT_FALSE(cgroups.empty()); foreach (const string& cgroup, cgroups) { // Check the user cannot manipulate the container's cgroup control // files. EXPECT_NE(0, os::system( "su - " + UNPRIVILEGED_USERNAME + " -c 'echo $$ >" + path::join(flags.cgroups_hierarchy, cgroup, "cgroup.procs") + "'")); // Check the user can create a cgroup under the container's // cgroup. string userCgroup = path::join(cgroup, "user"); EXPECT_EQ(0, os::system( "su - " + UNPRIVILEGED_USERNAME + " -c 'mkdir " + path::join(flags.cgroups_hierarchy, userCgroup) + "'")); // Check the user can manipulate control files in the created // cgroup. EXPECT_EQ(0, os::system( "su - " + UNPRIVILEGED_USERNAME + " -c 'echo $$ >" + path::join(flags.cgroups_hierarchy, userCgroup, "cgroup.procs") + "'")); }
// A test to verify the number of processes and threads in a // container. TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Pids_and_Tids) { slave::Flags flags; flags.cgroups_cpu_enable_pids_and_tids_count = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:0.5;mem:512").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); Future<Option<ContainerLaunchInfo>> prepare = isolator.get()->prepare( containerId, containerConfig); AWAIT_READY(prepare); // Right after the creation of the cgroup, which happens in // 'prepare', we check that it is empty. Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(0U, usage.get().processes()); EXPECT_EQ(0U, usage.get().threads()); int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(1); argv[0] = "cat"; Try<pid_t> pid = launcher.get()->fork( containerId, "cat", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes), prepare.get().isSome() ? prepare.get().get().namespaces() : 0); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int>> status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Before isolation, the cgroup is empty. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(0U, usage.get().processes()); EXPECT_EQ(0U, usage.get().threads()); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // After the isolation, the cgroup is not empty, even though the // process hasn't exec'd yet. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(1U, usage.get().processes()); EXPECT_EQ(1U, usage.get().threads()); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Process count should be 1 since 'sleep' is still sleeping. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(1U, usage.get().processes()); EXPECT_EQ(1U, usage.get().threads()); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Wait for the command to complete. AWAIT_READY(status); // After the process is killed, the cgroup should be empty again. usage = isolator.get()->usage(containerId); AWAIT_READY(usage); EXPECT_EQ(0U, usage.get().processes()); EXPECT_EQ(0U, usage.get().threads()); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Enable_Cfs) { slave::Flags flags; // Enable CFS to cap CPU utilization. flags.cgroups_enable_cfs = true; Try<Isolator*> isolator = CgroupsCpushareIsolatorProcess::create(flags); CHECK_SOME(isolator); Try<Launcher*> launcher = LinuxLauncher::create(flags); CHECK_SOME(launcher); // Set the executor's resources to 0.5 cpu. ExecutorInfo executorInfo; executorInfo.mutable_resources()->CopyFrom( Resources::parse("cpus:0.5").get()); ContainerID containerId; containerId.set_value(UUID::random().toString()); // Use a relative temporary directory so it gets cleaned up // automatically with the test. Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX")); ASSERT_SOME(dir); ContainerConfig containerConfig; containerConfig.mutable_executor_info()->CopyFrom(executorInfo); containerConfig.set_directory(dir.get()); Future<Option<ContainerLaunchInfo>> prepare = isolator.get()->prepare( containerId, containerConfig); AWAIT_READY(prepare); // Generate random numbers to max out a single core. We'll run this for 0.5 // seconds of wall time so it should consume approximately 250 ms of total // cpu time when limited to 0.5 cpu. We use /dev/urandom to prevent blocking // on Linux when there's insufficient entropy. string command = "cat /dev/urandom > /dev/null & " "export MESOS_TEST_PID=$! && " "sleep 0.5 && " "kill $MESOS_TEST_PID"; int pipes[2]; ASSERT_NE(-1, ::pipe(pipes)); vector<string> argv(3); argv[0] = "sh"; argv[1] = "-c"; argv[2] = command; Try<pid_t> pid = launcher.get()->fork( containerId, "sh", argv, Subprocess::FD(STDIN_FILENO), Subprocess::FD(STDOUT_FILENO), Subprocess::FD(STDERR_FILENO), None(), None(), lambda::bind(&childSetup, pipes), prepare.get().isSome() ? prepare.get().get().namespaces() : 0); ASSERT_SOME(pid); // Reap the forked child. Future<Option<int> > status = process::reap(pid.get()); // Continue in the parent. ASSERT_SOME(os::close(pipes[0])); // Isolate the forked child. AWAIT_READY(isolator.get()->isolate(containerId, pid.get())); // Now signal the child to continue. char dummy; ASSERT_LT(0, ::write(pipes[1], &dummy, sizeof(dummy))); ASSERT_SOME(os::close(pipes[1])); // Wait for the command to complete. AWAIT_READY(status); Future<ResourceStatistics> usage = isolator.get()->usage(containerId); AWAIT_READY(usage); // Expect that no more than 300 ms of cpu time has been consumed. We also // check that at least 50 ms of cpu time has been consumed so this test will // fail if the host system is very heavily loaded. This behavior is correct // because under such conditions we aren't actually testing the CFS cpu // limiter. double cpuTime = usage.get().cpus_system_time_secs() + usage.get().cpus_user_time_secs(); EXPECT_GE(0.30, cpuTime); EXPECT_LE(0.05, cpuTime); // Ensure all processes are killed. AWAIT_READY(launcher.get()->destroy(containerId)); // Let the isolator clean up. AWAIT_READY(isolator.get()->cleanup(containerId)); delete isolator.get(); delete launcher.get(); }
Future<ExecutorInfo> ExternalContainerizerProcess::launch( const ContainerID& containerId, const TaskInfo& taskInfo, const FrameworkID& frameworkId, const std::string& directory, const Option<std::string>& user, const SlaveID& slaveId, const PID<Slave>& slavePid, bool checkpoint) { LOG(INFO) << "Launching container '" << containerId << "'"; // Get the executor from our task. If no executor is associated with // the given task, this function renders an ExecutorInfo using the // mesos-executor as its command. ExecutorInfo executor = containerExecutorInfo(flags, taskInfo, frameworkId); executor.mutable_resources()->MergeFrom(taskInfo.resources()); if (containers.contains(containerId)) { return Failure("Cannot start already running container '" + containerId.value() + "'"); } sandboxes.put(containerId, Owned<Sandbox>(new Sandbox(directory, user))); map<string, string> environment = executorEnvironment( executor, directory, slaveId, slavePid, checkpoint, flags.recovery_timeout); if (!flags.hadoop_home.empty()) { environment["HADOOP_HOME"] = flags.hadoop_home; } TaskInfo task; task.CopyFrom(taskInfo); CommandInfo* command = task.has_executor() ? task.mutable_executor()->mutable_command() : task.mutable_command(); // When the selected command has no container attached, use the // default from the slave startup flags, if available. if (!command->has_container()) { if (flags.default_container_image.isSome()) { command->mutable_container()->set_image( flags.default_container_image.get()); } else { LOG(INFO) << "No container specified in task and no default given. " << "The external containerizer will have to fill in " << "defaults."; } } ExternalTask external; external.mutable_task()->CopyFrom(task); external.set_mesos_executor_path( path::join(flags.launcher_dir, "mesos-executor")); stringstream output; external.SerializeToOstream(&output); Try<Subprocess> invoked = invoke( "launch", containerId, output.str(), environment); if (invoked.isError()) { return Failure("Launch of container '" + containerId.value() + "' failed (error: " + invoked.error() + ")"); } // Record the process. containers.put( containerId, Owned<Container>(new Container(invoked.get().pid()))); VLOG(2) << "Now awaiting data from pipe..."; // Read from the result-pipe and invoke callbacks when reaching EOF. return await(read(invoked.get().out()), invoked.get().status()) .then(defer( PID<ExternalContainerizerProcess>(this), &ExternalContainerizerProcess::_launch, containerId, frameworkId, executor, slaveId, checkpoint, lambda::_1)); }
// This test verifies that a task group is launched on the agent if the executor // provides a valid authentication token specifying its own ContainerID. TEST_F(ExecutorAuthorizationTest, RunTaskGroup) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); // Start an agent with permissive ACLs so that a task can be launched. ACLs acls; acls.set_permissive(true); slave::Flags flags = CreateSlaveFlags(); flags.acls = acls; Owned<MasterDetector> detector = master.get()->createDetector(); Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); ASSERT_SOME(slave); FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; MockScheduler sched; MesosSchedulerDriver driver( &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); ASSERT_FALSE(offers->empty()); Offer offer = offers.get()[0]; TaskInfo task = createTask( offer.slave_id(), Resources::parse("cpus:0.5;mem:32").get(), "sleep 1000"); Future<TaskStatus> status; EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&status)); Resources executorResources = allocatedResources(Resources::parse("cpus:0.1;mem:32;disk:32").get(), "*"); ExecutorInfo executor; executor.mutable_executor_id()->set_value("default"); executor.set_type(ExecutorInfo::DEFAULT); executor.mutable_framework_id()->CopyFrom(frameworkId.get()); executor.mutable_resources()->CopyFrom(executorResources); TaskGroupInfo taskGroup; taskGroup.add_tasks()->CopyFrom(task); driver.acceptOffers({offer.id()}, {LAUNCH_GROUP(executor, taskGroup)}); AWAIT_READY(status); ASSERT_EQ(task.task_id(), status->task_id()); EXPECT_EQ(TASK_STARTING, status->state()); driver.stop(); driver.join(); }
// This test verifies that when reregistering, the slave sends the // executor ID of a non-command executor task, but not the one of a // command executor task. We then check that the master's API has // task IDs absent only for the command executor case. // // This was motivated by MESOS-8135. TEST_F(MasterSlaveReconciliationTest, SlaveReregisterTaskExecutorIds) { Try<Owned<cluster::Master>> master = StartMaster(); ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); StandaloneMasterDetector detector(master.get()->pid); Try<Owned<cluster::Slave>> slave = StartSlave(&detector, flags); ASSERT_SOME(slave); MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); Future<FrameworkID> frameworkId; EXPECT_CALL(sched, registered(&driver, _, _)) .WillOnce(FutureArg<1>(&frameworkId)); Future<vector<Offer>> offers; EXPECT_CALL(sched, resourceOffers(&driver, _)) .WillOnce(FutureArg<1>(&offers)) .WillRepeatedly(Return()); // Ignore subsequent offers. driver.start(); AWAIT_READY(frameworkId); AWAIT_READY(offers); EXPECT_NE(0u, offers->size()); const Offer& offer = offers->front(); const SlaveID& slaveId = offer.slave_id(); Resources resources = Resources::parse(defaultTaskResourcesString).get(); TaskInfo commandExecutorTask = createTask(slaveId, resources, SLEEP_COMMAND(1000)); TaskInfo defaultExecutorTask = createTask(slaveId, resources, SLEEP_COMMAND(1000)); ExecutorInfo defaultExecutorInfo; defaultExecutorInfo.set_type(ExecutorInfo::DEFAULT); defaultExecutorInfo.mutable_executor_id()->CopyFrom(DEFAULT_EXECUTOR_ID); defaultExecutorInfo.mutable_framework_id()->CopyFrom(frameworkId.get()); defaultExecutorInfo.mutable_resources()->CopyFrom(resources); // We expect two TASK_STARTING and two TASK_RUNNING updates. vector<Future<TaskStatus>> taskStatuses(4); { // This variable doesn't have to be used explicitly. testing::InSequence inSequence; foreach (Future<TaskStatus>& taskStatus, taskStatuses) { EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillOnce(FutureArg<1>(&taskStatus)); } EXPECT_CALL(sched, statusUpdate(&driver, _)) .WillRepeatedly(Return()); // Ignore subsequent updates. }