Try<Nothing> PrExecutorPassFilter::consume(const ResourceUsage& in) { ResourceUsage product; product.mutable_total()->CopyFrom(in.total()); for (ResourceUsage_Executor inExec : in.executors()) { if (!inExec.has_executor_info()) { LOG(ERROR) << name << "Executor <unknown>" << " does not include executor_info"; // Filter out these executors. continue; } if (inExec.allocated().size() == 0) { LOG(ERROR) << name << "Executor " << inExec.executor_info().executor_id().value() << " does not include allocated resources."; // Filter out these executors. continue; } Resources allocated(inExec.allocated()); // Check if task uses revocable resources. if (!allocated.revocable().empty()) { continue; } // Add an PR executor. ResourceUsage_Executor* outExec = product.mutable_executors()->Add(); outExec->CopyFrom(inExec); } produce(product); return Nothing(); }
/** * In this test we generate stable load with drop and * test the RollingChangePointDetector. We expect one * contention. */ TEST(DropFilterRollingDetectorTest, StableLoadWithDrop) { const uint64_t WINDOWS_SIZE = 10; const uint64_t CONTENTION_COOLDOWN = 10; const double_t RELATIVE_THRESHOLD = 5; const uint64_t LOAD_ITERATIONS = 200; // End of pipeline. MockSink<Contentions> mockSink; EXPECT_CALL(mockSink, consume(_)) .Times(LOAD_ITERATIONS); DropFilter<RollingChangePointDetector> dropFilter( &mockSink, usage::getIpc, ChangePointDetectionState::createForRollingDetector( WINDOWS_SIZE, CONTENTION_COOLDOWN, RELATIVE_THRESHOLD)); // Fake slave ResourceUsage source. MockSource<ResourceUsage> usageSource(&dropFilter); Try<mesos::FixtureResourceUsage> usages = JsonUsage::ReadJson("tests/fixtures/start_json_test.json"); if (usages.isError()) { LOG(ERROR) << "JsonSource failed: " << usages.error() << std::endl; } ResourceUsage usage; usage.CopyFrom(usages.get().resource_usage(0)); const double_t DROP_PROGRES = 1; LoadGenerator loadGen( [](double_t iter) { return 10; }, new ZeroNoise(), LOAD_ITERATIONS); bool dropped = false; for (; loadGen.end(); loadGen++) { usage.mutable_executors(0)->CopyFrom( generateIPC(usage.executors(0), (*loadGen)(), (*loadGen).timestamp)); // Run pipeline iteration. usageSource.produce(usage); if (dropped) { dropped = false; mockSink.expectContentionWithVictim("serenity2"); } else { mockSink.expectContentions(0); } if (loadGen.iteration >= 100 && loadGen.iteration < 110) { // After 6 iterations of 1 drop progress value should be below // threshold (4). if (loadGen.iteration == 105) dropped = true; loadGen.modifier -= DROP_PROGRES; } } }
/** * In this test we generate load with noise and * test the CpuUsageEMAfilter output in every iteration. */ TEST(EMATest, CpuUsageEMATestNoisyConstSample) { // End of pipeline. MockSink<ResourceUsage> mockSink; // Third component in pipeline. EMAFilter cpuUsageEMAFilter( &mockSink, usage::getCpuUsage, usage::setEmaCpuUsage, 0.2); // Second component in pipeline. // We need that for cumulative metrics. CumulativeFilter cumulativeFilter( &cpuUsageEMAFilter); // First component in pipeline. MockSource<ResourceUsage> source(&cumulativeFilter); Try<mesos::FixtureResourceUsage> usages = JsonUsage::ReadJson("tests/fixtures/start_json_test.json"); if (usages.isError()) { LOG(ERROR) << "JsonSource failed: " << usages.error() << std::endl; } ResourceUsage usage; usage.CopyFrom(usages.get().resource_usage(0)); const double_t CPU_USAGE_VALUE = 10; const double_t THRESHOLD = 1.2; const double_t MAX_NOISE = 5; const int32_t ITERATIONS = 100; SignalScenario signalGen = SignalScenario(ITERATIONS) .use(math::const10Function) .use(new SymetricNoiseGenerator(MAX_NOISE)); ITERATE_SIGNAL(signalGen) { usage.mutable_executors(0)->CopyFrom( generateCpuUsage(usage.executors(0), (uint64_t)(*signalGen).cumulative(), signalGen->timestamp)); // Run pipeline iteration source.produce(usage); if (signalGen.iteration > 0) mockSink.expectCpuUsage(0, CPU_USAGE_VALUE, THRESHOLD); } EXPECT_EQ(99, mockSink.numberOfMessagesConsumed); }
Try<Nothing> ExecutorAgeFilter::consume(const ResourceUsage& in) { double_t now = time(NULL); for (ResourceUsage_Executor executor : in.executors()) { auto startedTime = this->started->find(executor.executor_info()); if (startedTime == this->started->end()) { // If executor is missing, create start entry for executor. this->started->insert(pair<ExecutorInfo, double_t>( executor.executor_info(), now)); this->age(executor.executor_info()); // For test! } } // TODO(nnielsen): Clean up finished frameworks and executors. this->produce(in); return Nothing(); }
/** * In this test we generate stable load and * test the RollingChangePointDetector. We don't expect * any contention. */ TEST(DropFilterRollingDetectorTest, StableLoad) { const uint64_t WINDOWS_SIZE = 10; const uint64_t CONTENTION_COOLDOWN = 10; const double_t RELATIVE_THRESHOLD = 0.5; const uint64_t LOAD_ITERATIONS = 100; // End of pipeline. MockSink<Contentions> mockSink; EXPECT_CALL(mockSink, consume(_)) .Times(LOAD_ITERATIONS); DropFilter<RollingChangePointDetector> dropFilter( &mockSink, usage::getIpc, ChangePointDetectionState::createForRollingDetector( WINDOWS_SIZE, CONTENTION_COOLDOWN, RELATIVE_THRESHOLD)); // Fake slave ResourceUsage source. MockSource<ResourceUsage> usageSource(&dropFilter); Try<mesos::FixtureResourceUsage> usages = JsonUsage::ReadJson("tests/fixtures/start_json_test.json"); if (usages.isError()) { LOG(ERROR) << "JsonSource failed: " << usages.error() << std::endl; } ResourceUsage usage; usage.CopyFrom(usages.get().resource_usage(0)); LoadGenerator loadGen( [](double_t iter) { return 10; }, new ZeroNoise(), LOAD_ITERATIONS); for (; loadGen.end(); loadGen++) { usage.mutable_executors(0)->CopyFrom( generateIPC(usage.executors(0), (*loadGen)(), (*loadGen).timestamp)); // Run pipeline iteration. usageSource.produce(usage); if (loadGen.iteration > 0) mockSink.expectContentions(0); } }
TEST(QoSIpcPipelineTest, AssuranceDetectorTwoDropCorrectionsWithEma) { uint64_t WINDOWS_SIZE = 10; uint64_t CONTENTION_COOLDOWN = 4; double_t FRATIONAL_THRESHOLD = 0.3; double_t SEVERITY_LEVEL = 1; double_t NEAR_LEVEL = 0.1; MockSlaveUsage mockSlaveUsage(QOS_PIPELINE_FIXTURE2); SerenityConfig conf; conf["Detector"] = createAssuranceDetectorCfg( WINDOWS_SIZE, CONTENTION_COOLDOWN, FRATIONAL_THRESHOLD, SEVERITY_LEVEL, NEAR_LEVEL); conf.set(ema::ALPHA, 0.9); conf.set(ENABLED_VISUALISATION, false); conf.set(VALVE_OPENED, true); QoSControllerPipeline* pipeline = new CpuQoSPipeline(conf); // First iteration. Result<QoSCorrections> corrections = pipeline->run(mockSlaveUsage.usage().get()); EXPECT_NONE(corrections); ResourceUsage usage = mockSlaveUsage.usage().get(); const int32_t LOAD_ITERATIONS = 17; LoadGenerator loadGen( [](double_t iter) { return 1; }, new ZeroNoise(), LOAD_ITERATIONS); for (; loadGen.end(); loadGen++) { // Test scenario: After 10 iterations create drop in IPC // for executor num 3. double_t ipcFor3Executor = (*loadGen)(); if (loadGen.iteration >= 11) { ipcFor3Executor /= 2.0; } usage.mutable_executors(PR_4CPUS)->CopyFrom( generateIPC(usage.executors(PR_4CPUS), ipcFor3Executor, (*loadGen).timestamp)); usage.mutable_executors(PR_2CPUS)->CopyFrom( generateIPC(usage.executors(PR_2CPUS), (*loadGen)(), (*loadGen).timestamp)); // Third iteration (repeated). corrections = pipeline->run(usage); // Assurance Detector will wait for signal to be returned to the // established state. if (loadGen.iteration == 11 || loadGen.iteration == 16) { EXPECT_SOME(corrections); ASSERT_EQ(slave::QoSCorrection_Type_KILL, corrections.get().front().type()); // Make sure that we do not kill PR tasks! EXPECT_NE("serenityPR", corrections.get().front().kill().executor_id().value()); EXPECT_NE("serenityPR2", corrections.get().front().kill().executor_id().value()); } else { EXPECT_SOME(corrections); EXPECT_TRUE(corrections.get().empty()); } } delete pipeline; }
TEST(QoSIpsPipelineTest, RollingFractionalDetectorOneDropCorrectionsWithEma) { QoSPipelineConf conf; ChangePointDetectionState cpdState; // Detector configuration: // How far we look back in samples. cpdState.windowSize = 10; // How many iterations detector will wait with creating another // contention. cpdState.contentionCooldown = 10; // Defines how much (relatively to base point) value must drop to trigger // contention. // Most detectors will use that. cpdState.fractionalThreshold = 0.5; // Defines how many instructions can be done per one CPU in one second. // This option helps RollingFractionalDetector to estimate severity of // drop. cpdState.severityLevel = 1000000000; // 1 Billion. conf.cpdState = cpdState; conf.emaAlpha = 0.4; conf.visualisation = false; // Let's start with QoS pipeline disabled. conf.valveOpened = true; MockSlaveUsage mockSlaveUsage(QOS_PIPELINE_FIXTURE3); QoSControllerPipeline* pipeline = new IpsQoSPipeline<RollingFractionalDetector>(conf); // First iteration. Result<QoSCorrections> corrections = pipeline->run(mockSlaveUsage.usage().get()); EXPECT_NONE(corrections); // Second iteration is used for manually configured load. ResourceUsage usage = mockSlaveUsage.usage().get(); const int32_t LOAD_ITERATIONS = 14; LoadGenerator loadGen( [](double_t iter) { return 3000000000; }, new ZeroNoise(), LOAD_ITERATIONS); for (; loadGen.end(); loadGen++) { // Test scenario: After 10 iterations create drop in IPS for executor num 3. double ipsFor3Executor = (*loadGen)(); if (loadGen.iteration >= 11) { ipsFor3Executor /= 3.0; } usage.mutable_executors(PR_4CPUS)->CopyFrom( generateIPS(usage.executors(PR_4CPUS), ipsFor3Executor, (*loadGen).timestamp)); usage.mutable_executors(PR_2CPUS)->CopyFrom( generateIPS(usage.executors(PR_2CPUS), (*loadGen)(), (*loadGen).timestamp)); // Third iteration (repeated). corrections = pipeline->run(usage); if (loadGen.iteration >= 13) { EXPECT_SOME(corrections); ASSERT_EQ(slave::QoSCorrection_Type_KILL, corrections.get().front().type()); // Make sure that we do not kill PR tasks! EXPECT_NE("serenityPR", corrections.get().front().kill().executor_id().value()); EXPECT_NE("serenityPR2", corrections.get().front().kill().executor_id().value()); } else { EXPECT_SOME(corrections); EXPECT_TRUE(corrections.get().empty()); } } delete pipeline; }
TEST(QoSIpcPipelineTest, AssuranceFractionalDetectorTwoDropCorrectionsWithEma) { QoSPipelineConf conf; ChangePointDetectionState cpdState; // Detector configuration: // How far we look back in samples. cpdState.windowSize = 10; // How many iterations detector will wait with creating another // contention. cpdState.contentionCooldown = 4; // Defines how much (relatively to base point) value must drop to trigger // contention. // Most detectors will use that. cpdState.fractionalThreshold = 0.3; // Defines how to convert difference in values to CPU. // This option helps RollingFractionalDetector to estimate severity of // drop. cpdState.severityLevel = 1; cpdState.nearFraction = 0.1; conf.cpdState = cpdState; conf.emaAlpha = 0.9; conf.visualisation = false; // Let's start with QoS pipeline disabled. conf.valveOpened = true; MockSlaveUsage mockSlaveUsage(QOS_PIPELINE_FIXTURE2); QoSControllerPipeline* pipeline = new CpuQoSPipeline<AssuranceFractionalDetector>(conf); // First iteration. Result<QoSCorrections> corrections = pipeline->run(mockSlaveUsage.usage().get()); EXPECT_NONE(corrections); ResourceUsage usage = mockSlaveUsage.usage().get(); const int32_t LOAD_ITERATIONS = 17; LoadGenerator loadGen( [](double_t iter) { return 1; }, new ZeroNoise(), LOAD_ITERATIONS); for (; loadGen.end(); loadGen++) { // Test scenario: After 10 iterations create drop in IPC // for executor num 3. double_t ipcFor3Executor = (*loadGen)(); if (loadGen.iteration >= 11) { ipcFor3Executor /= 2.0; } usage.mutable_executors(PR_4CPUS)->CopyFrom( generateIPC(usage.executors(PR_4CPUS), ipcFor3Executor, (*loadGen).timestamp)); usage.mutable_executors(PR_2CPUS)->CopyFrom( generateIPC(usage.executors(PR_2CPUS), (*loadGen)(), (*loadGen).timestamp)); // Third iteration (repeated). corrections = pipeline->run(usage); // Assurance Detector will wait for signal to be returned to the // established state. if (loadGen.iteration == 11 || loadGen.iteration == 16) { EXPECT_SOME(corrections); ASSERT_EQ(slave::QoSCorrection_Type_KILL, corrections.get().front().type()); // Make sure that we do not kill PR tasks! EXPECT_NE("serenityPR", corrections.get().front().kill().executor_id().value()); EXPECT_NE("serenityPR2", corrections.get().front().kill().executor_id().value()); } else { EXPECT_SOME(corrections); EXPECT_TRUE(corrections.get().empty()); } } delete pipeline; }
TEST(QoSIpcPipelineTest, RollingDetectorOneDropCorrectionsWithEma) { uint64_t WINDOWS_SIZE = 10; uint64_t CONTENTION_COOLDOWN = 10; double_t RELATIVE_THRESHOLD = 0.3; MockSlaveUsage mockSlaveUsage(QOS_PIPELINE_FIXTURE2); QoSControllerPipeline* pipeline = new CpuQoSPipeline<RollingChangePointDetector>( QoSPipelineConf( ChangePointDetectionState::createForRollingDetector( WINDOWS_SIZE, CONTENTION_COOLDOWN, RELATIVE_THRESHOLD), 0.2, // Alpha = 1 means no smoothing. 0.2 means high smoothing. false, true)); // First iteration. Result<QoSCorrections> corrections = pipeline->run(mockSlaveUsage.usage().get()); EXPECT_NONE(corrections); ResourceUsage usage = mockSlaveUsage.usage().get(); const int32_t LOAD_ITERATIONS = 16; LoadGenerator loadGen( [](double_t iter) { return 1; }, new ZeroNoise(), LOAD_ITERATIONS); for (; loadGen.end(); loadGen++) { // Test scenario: After 10 iterations create drop in // IPC for executor num 3. double_t ipcFor3Executor = (*loadGen)(); if (loadGen.iteration >= 11) { ipcFor3Executor /= 2.0; } usage.mutable_executors(PR_4CPUS)->CopyFrom( generateIPC(usage.executors(PR_4CPUS), ipcFor3Executor, (*loadGen).timestamp)); usage.mutable_executors(PR_2CPUS)->CopyFrom( generateIPC(usage.executors(PR_2CPUS), (*loadGen)(), (*loadGen).timestamp)); // Third iteration (repeated). corrections = pipeline->run(usage); if (loadGen.iteration >= 15) { EXPECT_SOME(corrections); ASSERT_EQ(slave::QoSCorrection_Type_KILL, corrections.get().front().type()); // Make sure that we do not kill PR tasks! EXPECT_NE("serenityPR", corrections.get().front().kill().executor_id().value()); EXPECT_NE("serenityPR2", corrections.get().front().kill().executor_id().value()); } else { EXPECT_SOME(corrections); EXPECT_TRUE(corrections.get().empty()); } } delete pipeline; }
Try<Nothing> TooHighCpuUsageDetector::consume(const ResourceUsage& in) { Contentions product; if (in.total_size() == 0) { return Error(std::string(NAME) + " No total in ResourceUsage"); } Resources totalAgentResources(in.total()); Option<double_t> totalAgentCpus = totalAgentResources.cpus(); if (totalAgentCpus.isNone()) { return Error(std::string(NAME) + " No total cpus in ResourceUsage"); } double_t agentSumValue = 0; uint64_t beExecutors = 0; for (const ResourceUsage_Executor& inExec : in.executors()) { if (!inExec.has_executor_info()) { SERENITY_LOG(ERROR) << "Executor <unknown>" << " does not include executor_info"; // Filter out these executors. continue; } if (!inExec.has_statistics()) { SERENITY_LOG(ERROR) << "Executor " << inExec.executor_info().executor_id().value() << " does not include statistics."; // Filter out these executors. continue; } Try<double_t> value = this->cpuUsageGetFunction(inExec); if (value.isError()) { SERENITY_LOG(ERROR) << value.error(); continue; } agentSumValue += value.get(); if (!Resources(inExec.allocated()).revocable().empty()) { beExecutors++; } } // Debug only SERENITY_LOG(INFO) << "Sum = " << agentSumValue << " vs total = " << totalAgentCpus.get(); double_t lvl = agentSumValue / totalAgentCpus.get(); if (lvl > this->cfgUtilizationThreshold) { if (beExecutors == 0) { SERENITY_LOG(INFO) << "No BE tasks - only high host utilization"; } else { SERENITY_LOG(INFO) << "Creating CPU contention, because of the value" << " above the threshold. " << agentSumValue << "/" << totalAgentCpus.get(); product.push_back(createContention(totalAgentCpus.get() - agentSumValue, Contention_Type_CPU)); } } // Continue pipeline. this->produce(product); return Nothing(); }
Try<Nothing> EMAFilter::consume(const ResourceUsage& in) { ResourceUsage product; for (ResourceUsage_Executor inExec : in.executors()) { if (!inExec.has_executor_info()) { SERENITY_LOG(ERROR) << "Executor <unknown>" << " does not include executor_info"; // Filter out these executors. continue; } if (!inExec.has_statistics()) { SERENITY_LOG(ERROR) << "Executor " << inExec.executor_info().executor_id().value() << " does not include statistics."; // Filter out these executors. continue; } // Check if EMA for given executor exists. auto emaSample = this->emaSamples->find(inExec.executor_info()); if (emaSample == this->emaSamples->end()) { SERENITY_LOG(ERROR) << "First EMA iteration for: " << WID(inExec.executor_info()).toString(); // If not - insert new one. ExponentialMovingAverage ema(EMA_REGULAR_SERIES, this->alpha); emaSamples->insert(std::pair<ExecutorInfo, ExponentialMovingAverage>( inExec.executor_info(), ema)); } else { // Get proper value. Try<double_t> value = this->valueGetFunction(inExec); if (value.isError()) { SERENITY_LOG(ERROR) << value.error(); continue; } // Perform EMA filtering. double_t emaValue = (emaSample->second).calculateEMA( value.get(), inExec.statistics().perf().timestamp()); // Store EMA value. ResourceUsage_Executor* outExec = new ResourceUsage_Executor(inExec); Try<Nothing> result = this->valueSetFunction(emaValue, outExec); if (result.isError()) { SERENITY_LOG(ERROR) << result.error(); delete outExec; continue; } // Add an executor only when there was no error. product.mutable_executors()->AddAllocated(outExec); } } if (0 != product.executors_size()) { SERENITY_LOG(INFO) << "Continuing with " << product.executors_size() << " executor(s)."; // Continue pipeline. // Copy total agent's capacity. product.mutable_total()->CopyFrom(in.total()); produce(product); } return Nothing(); }