// TODO(bplotka) parametrize that static ResourceStatistics createStatistics() { ResourceStatistics statistics; statistics.set_cpus_nr_periods(100); statistics.set_cpus_nr_throttled(2); statistics.set_cpus_user_time_secs(4); statistics.set_cpus_system_time_secs(1); statistics.set_cpus_throttled_time_secs(0.5); statistics.set_cpus_limit(1.0); statistics.set_mem_file_bytes(0); statistics.set_mem_anon_bytes(0); statistics.set_mem_mapped_file_bytes(0); statistics.set_mem_rss_bytes(1024); statistics.set_mem_limit_bytes(2048); statistics.set_timestamp(0); return statistics; }
TEST(MonitorTest, Statistics) { FrameworkID frameworkId; frameworkId.set_value("framework"); ExecutorID executorId; executorId.set_value("executor"); ExecutorInfo executorInfo; executorInfo.mutable_executor_id()->CopyFrom(executorId); executorInfo.mutable_framework_id()->CopyFrom(frameworkId); executorInfo.set_name("name"); executorInfo.set_source("source"); ResourceStatistics statistics; statistics.set_cpus_nr_periods(100); statistics.set_cpus_nr_throttled(2); statistics.set_cpus_user_time_secs(4); statistics.set_cpus_system_time_secs(1); statistics.set_cpus_throttled_time_secs(0.5); statistics.set_cpus_limit(1.0); statistics.set_mem_file_bytes(0); statistics.set_mem_anon_bytes(0); statistics.set_mem_mapped_file_bytes(0); statistics.set_mem_rss_bytes(1024); statistics.set_mem_limit_bytes(2048); statistics.set_timestamp(0); ResourceMonitor monitor([=]() -> Future<ResourceUsage> { Resources resources = Resources::parse("cpus:1;mem:2").get(); ResourceUsage usage; ResourceUsage::Executor* executor = usage.add_executors(); executor->mutable_executor_info()->CopyFrom(executorInfo); executor->mutable_allocated()->CopyFrom(resources); executor->mutable_statistics()->CopyFrom(statistics); return usage; }); UPID upid("monitor", process::address()); Future<http::Response> response = http::get(upid, "statistics"); AWAIT_READY(response); AWAIT_EXPECT_RESPONSE_STATUS_EQ(http::OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); JSON::Array expected; JSON::Object usage; usage.values["executor_id"] = "executor"; usage.values["executor_name"] = "name"; usage.values["framework_id"] = "framework"; usage.values["source"] = "source"; usage.values["statistics"] = JSON::Protobuf(statistics); expected.values.push_back(usage); Try<JSON::Array> result = JSON::parse<JSON::Array>(response.get().body); ASSERT_SOME(result); ASSERT_EQ(expected, result.get()); }
Future<ResourceStatistics> CpuacctSubsystem::usage( const ContainerID& containerId) { ResourceStatistics result; // TODO(chzhcn): Getting the number of processes and threads is // available as long as any cgroup subsystem is used so this best // not be tied to a specific cgroup subsystem. A better place is // probably Linux Launcher, which uses the cgroup freezer subsystem. // That requires some change for it to adopt the new semantics of // reporting subsystem-independent cgroup usage. // Note: The complexity of this operation is linear to the number of // processes and threads in a container: the kernel has to allocate // memory to contain the list of pids or tids; the userspace has to // parse the cgroup files to get the size. If this proves to be a // performance bottleneck, some kind of rate limiting mechanism // needs to be employed. if (flags.cgroups_cpu_enable_pids_and_tids_count) { Try<set<pid_t>> pids = cgroups::processes( hierarchy, path::join(flags.cgroups_root, containerId.value())); if (pids.isError()) { return Failure("Failed to get number of processes: " + pids.error()); } result.set_processes(pids.get().size()); Try<set<pid_t>> tids = cgroups::threads( hierarchy, path::join(flags.cgroups_root, containerId.value())); if (tids.isError()) { return Failure("Failed to get number of threads: " + tids.error()); } result.set_threads(tids.get().size()); } // Get the number of clock ticks, used for cpu accounting. static long ticks = sysconf(_SC_CLK_TCK); PCHECK(ticks > 0) << "Failed to get sysconf(_SC_CLK_TCK)"; // Add the cpuacct.stat information. Try<hashmap<string, uint64_t>> stat = cgroups::stat( hierarchy, path::join(flags.cgroups_root, containerId.value()), "cpuacct.stat"); if (stat.isError()) { return Failure("Failed to read 'cpuacct.stat': " + stat.error()); } // TODO(bmahler): Add namespacing to cgroups to enforce the expected // structure, e.g., cgroups::cpuacct::stat. Option<uint64_t> user = stat.get().get("user"); Option<uint64_t> system = stat.get().get("system"); if (user.isSome() && system.isSome()) { result.set_cpus_user_time_secs((double) user.get() / (double) ticks); result.set_cpus_system_time_secs((double) system.get() / (double) ticks); } return result; }
// TODO(bmahler): Add additional tests: // 1. Check that the data has been published to statistics. // 2. Check that metering is occurring on subsequent resource data. TEST(MonitorTest, WatchUnwatch) { FrameworkID frameworkId; frameworkId.set_value("framework"); ExecutorID executorId; executorId.set_value("executor"); ExecutorInfo executorInfo; executorInfo.mutable_executor_id()->CopyFrom(executorId); executorInfo.mutable_framework_id()->CopyFrom(frameworkId); executorInfo.set_name("name"); executorInfo.set_source("source"); ResourceStatistics initialStatistics; initialStatistics.set_cpus_user_time_secs(0); initialStatistics.set_cpus_system_time_secs(0); initialStatistics.set_cpus_limit(2.5); initialStatistics.set_mem_rss_bytes(0); initialStatistics.set_mem_limit_bytes(2048); initialStatistics.set_timestamp(Clock::now().secs()); ResourceStatistics statistics; statistics.set_cpus_nr_periods(100); statistics.set_cpus_nr_throttled(2); statistics.set_cpus_user_time_secs(4); statistics.set_cpus_system_time_secs(1); statistics.set_cpus_throttled_time_secs(0.5); statistics.set_cpus_limit(2.5); statistics.set_mem_rss_bytes(1024); statistics.set_mem_limit_bytes(2048); statistics.set_timestamp( initialStatistics.timestamp() + slave::RESOURCE_MONITORING_INTERVAL.secs()); TestingIsolator isolator; process::spawn(isolator); Future<Nothing> usage1, usage2; EXPECT_CALL(isolator, usage(frameworkId, executorId)) .WillOnce(DoAll(FutureSatisfy(&usage1), Return(initialStatistics))) .WillOnce(DoAll(FutureSatisfy(&usage2), Return(statistics))); slave::ResourceMonitor monitor(&isolator); // We pause the clock first in order to make sure that we can // advance time below to force the 'delay' in // ResourceMonitorProcess::watch to execute. process::Clock::pause(); monitor.watch( frameworkId, executorId, executorInfo, slave::RESOURCE_MONITORING_INTERVAL); // Now wait for ResouorceMonitorProcess::watch to finish so we can // advance time to cause collection to begin. process::Clock::settle(); process::Clock::advance(slave::RESOURCE_MONITORING_INTERVAL); process::Clock::settle(); AWAIT_READY(usage1); // Wait until the isolator has finished returning the statistics. process::Clock::settle(); // The second collection will populate the cpus_usage. process::Clock::advance(slave::RESOURCE_MONITORING_INTERVAL); process::Clock::settle(); AWAIT_READY(usage2); // Wait until the isolator has finished returning the statistics. process::Clock::settle(); process::UPID upid("monitor", process::ip(), process::port()); Future<Response> response = process::http::get(upid, "usage.json"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); // TODO(bmahler): Verify metering directly through statistics. AWAIT_EXPECT_RESPONSE_BODY_EQ( strings::format( "[{" "\"executor_id\":\"executor\"," "\"executor_name\":\"name\"," "\"framework_id\":\"framework\"," "\"resource_usage\":{" "\"cpu_time\":%g," "\"cpu_usage\":%g," "\"memory_rss\":%lu" "}," "\"source\":\"source\"" "}]", statistics.cpus_system_time_secs() + statistics.cpus_user_time_secs(), (statistics.cpus_system_time_secs() + statistics.cpus_user_time_secs()) / slave::RESOURCE_MONITORING_INTERVAL.secs(), statistics.mem_rss_bytes()).get(), response); response = process::http::get(upid, "statistics.json"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); // TODO(bmahler): Verify metering directly through statistics. AWAIT_EXPECT_RESPONSE_BODY_EQ( strings::format( "[{" "\"executor_id\":\"executor\"," "\"executor_name\":\"name\"," "\"framework_id\":\"framework\"," "\"source\":\"source\"," "\"statistics\":{" "\"cpus_limit\":%g," "\"cpus_nr_periods\":%d," "\"cpus_nr_throttled\":%d," "\"cpus_system_time_secs\":%g," "\"cpus_throttled_time_secs\":%g," "\"cpus_user_time_secs\":%g," "\"mem_limit_bytes\":%lu," "\"mem_rss_bytes\":%lu" "}" "}]", statistics.cpus_limit(), statistics.cpus_nr_periods(), statistics.cpus_nr_throttled(), statistics.cpus_system_time_secs(), statistics.cpus_throttled_time_secs(), statistics.cpus_user_time_secs(), statistics.mem_limit_bytes(), statistics.mem_rss_bytes()).get(), response); // Ensure the monitor stops polling the isolator. monitor.unwatch(frameworkId, executorId); // Wait until ResourceMonitorProcess::unwatch has completed. process::Clock::settle(); // This time, Isolator::usage should not get called. EXPECT_CALL(isolator, usage(frameworkId, executorId)) .Times(0); process::Clock::advance(slave::RESOURCE_MONITORING_INTERVAL); process::Clock::settle(); response = process::http::get(upid, "usage.json"); AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); AWAIT_EXPECT_RESPONSE_HEADER_EQ( "application/json", "Content-Type", response); AWAIT_EXPECT_RESPONSE_BODY_EQ("[]", response); }