// Returns True when value is duplicated in list. // TODO(bplotka): Move to QoSCorrections std::set in future. bool checkForDuplicates( slave::QoSCorrection value, QoSCorrections corrections) { for (slave::QoSCorrection correction : corrections) { if (value.type() == slave::QoSCorrection_Type_KILL && value.has_kill() && value.kill().has_executor_id() && value.kill().has_framework_id() && correction.type() == slave::QoSCorrection_Type_KILL && correction.has_kill() && correction.kill().has_executor_id() && correction.kill().has_framework_id()) { if (correction.kill().executor_id().value() == value.kill().executor_id().value() && correction.kill().framework_id().value() == value.kill().framework_id().value()) { // Found duplicate. return true; } } else { SERENITY_LOG(WARNING) << "Received correction without all required data."; } } return false; }
Try<Nothing> AssuranceDetector::reset() { // Return detector to normal state. SERENITY_LOG(INFO) << "Resetting any drop tracking if exists."; this->valueBeforeDrop = None(); return Nothing(); }
Try<Nothing> QoSCorrectionObserver::doQosDecision() { if (contentions.get().empty() || ResourceUsageHelper::getRevocableExecutors(usage.get()).empty()) { SERENITY_LOG(INFO) << "Empty contentions received."; emptyContentionsReceived(); // Produce empty corrections and contentions produceResultsAndClearConsumedData(); return Nothing(); } if (iterationCooldownCounter.isSome()) { SERENITY_LOG(INFO) << "QoS Correction observer is in cooldown phase"; cooldownPhase(); // Produce empty corrections and contentions produceResultsAndClearConsumedData(); return Nothing(); } Try<QoSCorrections> corrections = newContentionsReceived(); if (corrections.isError()) { SERENITY_LOG(INFO) << "corrections returned error: " << corrections.error(); // Produce empty corrections and contentions produceResultsAndClearConsumedData(); return Error(corrections.error()); } if (corrections.get().empty()) { SERENITY_LOG(INFO) << "Strategy didn't found aggressors"; // Strategy didn't found aggressors. // Passing contentions to next QoS Controller. produceResultsAndClearConsumedData(QoSCorrections(), this->contentions.get()); return Nothing(); } // Strategy has pointed aggressors, so don't pass // current contentions to next QoS Controller. iterationCooldownCounter = this->cooldownIterations; produceResultsAndClearConsumedData(corrections.get(), Contentions()); return Nothing(); }
/** * Contention Factory. */ Detection createContention(double_t severity) { Detection cpd; if (severity > 0) { cpd.severity = severity; } SERENITY_LOG(INFO) << " Created contention with severity = " << (cpd.severity.isSome() ? std::to_string(cpd.severity.get()) : "<none>"); return cpd; }
virtual void allProductsReady() { std::vector<QoSCorrections> qosCorrectionsVector = Consumer<QoSCorrections>::getConsumables(); QoSCorrections corrections; uint64_t receivedCententionNum = 0; for (QoSCorrections product : qosCorrectionsVector) { receivedCententionNum += product.size(); for (slave::QoSCorrection correction : product) { if (checkForDuplicates(correction, corrections)) { // Filter out duplicated value. continue; } corrections.push_back(correction); } } SERENITY_LOG(INFO) << "Received " << corrections.size() << " corrections"; produce(corrections); return; }
virtual Try<Nothing> syncConsume( const std::vector<QoSCorrections> products) { QoSCorrections corrections; uint64_t receivedCententionNum = 0; for (QoSCorrections product : products) { receivedCententionNum += product.size(); for (slave::QoSCorrection correction : product) { if (checkForDuplicates(correction, corrections)) { // Filter out duplicated value. continue; } corrections.push_back(correction); } } SERENITY_LOG(INFO) << "Received " << receivedCententionNum << " corrections" << " and merged to " << corrections.size() << " corrections."; produce(corrections); return Nothing(); }
Try<Nothing> TooHighCpuUsageDetector::consume(const ResourceUsage& in) { Contentions product; if (in.total_size() == 0) { return Error(std::string(NAME) + " No total in ResourceUsage"); } Resources totalAgentResources(in.total()); Option<double_t> totalAgentCpus = totalAgentResources.cpus(); if (totalAgentCpus.isNone()) { return Error(std::string(NAME) + " No total cpus in ResourceUsage"); } double_t agentSumValue = 0; uint64_t beExecutors = 0; for (const ResourceUsage_Executor& inExec : in.executors()) { if (!inExec.has_executor_info()) { SERENITY_LOG(ERROR) << "Executor <unknown>" << " does not include executor_info"; // Filter out these executors. continue; } if (!inExec.has_statistics()) { SERENITY_LOG(ERROR) << "Executor " << inExec.executor_info().executor_id().value() << " does not include statistics."; // Filter out these executors. continue; } Try<double_t> value = this->cpuUsageGetFunction(inExec); if (value.isError()) { SERENITY_LOG(ERROR) << value.error(); continue; } agentSumValue += value.get(); if (!Resources(inExec.allocated()).revocable().empty()) { beExecutors++; } } // Debug only SERENITY_LOG(INFO) << "Sum = " << agentSumValue << " vs total = " << totalAgentCpus.get(); double_t lvl = agentSumValue / totalAgentCpus.get(); if (lvl > this->cfgUtilizationThreshold) { if (beExecutors == 0) { SERENITY_LOG(INFO) << "No BE tasks - only high host utilization"; } else { SERENITY_LOG(INFO) << "Creating CPU contention, because of the value" << " above the threshold. " << agentSumValue << "/" << totalAgentCpus.get(); product.push_back(createContention(totalAgentCpus.get() - agentSumValue, Contention_Type_CPU)); } } // Continue pipeline. this->produce(product); return Nothing(); }
Try<Nothing> EMAFilter::consume(const ResourceUsage& in) { ResourceUsage product; for (ResourceUsage_Executor inExec : in.executors()) { if (!inExec.has_executor_info()) { SERENITY_LOG(ERROR) << "Executor <unknown>" << " does not include executor_info"; // Filter out these executors. continue; } if (!inExec.has_statistics()) { SERENITY_LOG(ERROR) << "Executor " << inExec.executor_info().executor_id().value() << " does not include statistics."; // Filter out these executors. continue; } // Check if EMA for given executor exists. auto emaSample = this->emaSamples->find(inExec.executor_info()); if (emaSample == this->emaSamples->end()) { SERENITY_LOG(ERROR) << "First EMA iteration for: " << WID(inExec.executor_info()).toString(); // If not - insert new one. ExponentialMovingAverage ema(EMA_REGULAR_SERIES, this->alpha); emaSamples->insert(std::pair<ExecutorInfo, ExponentialMovingAverage>( inExec.executor_info(), ema)); } else { // Get proper value. Try<double_t> value = this->valueGetFunction(inExec); if (value.isError()) { SERENITY_LOG(ERROR) << value.error(); continue; } // Perform EMA filtering. double_t emaValue = (emaSample->second).calculateEMA( value.get(), inExec.statistics().perf().timestamp()); // Store EMA value. ResourceUsage_Executor* outExec = new ResourceUsage_Executor(inExec); Try<Nothing> result = this->valueSetFunction(emaValue, outExec); if (result.isError()) { SERENITY_LOG(ERROR) << result.error(); delete outExec; continue; } // Add an executor only when there was no error. product.mutable_executors()->AddAllocated(outExec); } } if (0 != product.executors_size()) { SERENITY_LOG(INFO) << "Continuing with " << product.executors_size() << " executor(s)."; // Continue pipeline. // Copy total agent's capacity. product.mutable_total()->CopyFrom(in.total()); produce(product); } return Nothing(); }
Result<Detection> AssuranceDetector::_processSample( double_t in) { // Check if we track some contention. if (this->valueBeforeDrop.isSome()) { // Check if the signal returned to normal state. (!) double_t nearValue = this->cfgNearFraction * this->valueBeforeDrop.get(); SERENITY_LOG(INFO) << "Waiting for signal: " << in << " to return to: " << (this->valueBeforeDrop.get() - nearValue) << " after corrections. "; // We want to use reference Base Point instead of base point. if (in >= (this->valueBeforeDrop.get() - nearValue)) { SERENITY_LOG(INFO) << "Signal returned to established state."; this->reset(); } else { // Create contention. return this->createContention( ((this->valueBeforeDrop.get() - nearValue) - in) * this->cfgSeverityFraction); } } double_t currentDropFraction = 0; double_t meanValueBeforeDrop = 0; this->dropVotes = 0; std::stringstream basePointValues; // Make a voting within all basePoints(checkpoints). Drop will be // detected when dropVotes will be >= Quorum number. for (std::list<double_t>::iterator basePoint : this->basePoints) { basePointValues << " " << (double_t)(*basePoint); // Check if drop happened for this basePoint. double_t dropFraction = 1.0 - (in / (*basePoint)); if (dropFraction >= this->cfgFractionalThreshold) { // Vote on drop. this->dropVotes++; currentDropFraction += dropFraction; meanValueBeforeDrop += (double_t)(*basePoint); basePointValues << "[-] "; } else if ((double_t)(*basePoint) >= in) { basePointValues << "[~] "; } else { basePointValues << "[+] "; } } if (this->dropVotes > 0) { currentDropFraction /= this->dropVotes; meanValueBeforeDrop /= this->dropVotes; } // In other cases theses variables == 0. SERENITY_LOG(INFO) << "{inValue: " << in << " |baseValues:" << basePointValues.str() << " |currentDrop %: " << currentDropFraction * 100 << " |threshold %: " << this->cfgFractionalThreshold * 100 << " |dropVotes/quorum: " << this->dropVotes << "/" << this->quorumNum << "}"; // Check if drop obtained minimum number of votes. if (this->dropVotes >= this->quorumNum) { // Create contention. this->valueBeforeDrop = meanValueBeforeDrop; // TODO(bplotka): Ensure proper severity. return this->createContention( currentDropFraction * this->cfgSeverityFraction); } return None(); }