double Checkable::GetLastCheck(void) const { CheckResult::Ptr cr = GetLastCheckResult(); double schedule_end = -1; if (cr) schedule_end = cr->GetScheduleEnd(); return schedule_end; }
double Checkable::CalculateLatency(const CheckResult::Ptr& cr) { if (!cr) return 0; double latency = (cr->GetScheduleEnd() - cr->GetScheduleStart()) - CalculateExecutionTime(cr); if (latency < 0) latency = 0; return latency; }
void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type) { time_t ts = cr->GetScheduleEnd(); if (type == CheckableHost) { if (cr->GetActive()) CIB::UpdateActiveHostChecksStatistics(ts, 1); else CIB::UpdatePassiveHostChecksStatistics(ts, 1); } else if (type == CheckableService) { if (cr->GetActive()) CIB::UpdateActiveServiceChecksStatistics(ts, 1); else CIB::UpdatePassiveServiceChecksStatistics(ts, 1); } else { Log(LogWarning, "Checkable", "Unknown checkable type for statistic update."); } }
void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin::Ptr& origin) { { ObjectLock olock(this); m_CheckRunning = false; } double now = Utility::GetTime(); if (cr->GetScheduleStart() == 0) cr->SetScheduleStart(now); if (cr->GetScheduleEnd() == 0) cr->SetScheduleEnd(now); if (cr->GetExecutionStart() == 0) cr->SetExecutionStart(now); if (cr->GetExecutionEnd() == 0) cr->SetExecutionEnd(now); if (!origin || origin->IsLocal()) { Log(LogDebug, "Checkable") << "No origin or local origin for object '" << GetName() << "', setting " << IcingaApplication::GetInstance()->GetNodeName() << " as check_source."; cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName()); } Endpoint::Ptr command_endpoint = GetCommandEndpoint(); /* override check source if command_endpoint was defined */ if (command_endpoint && !GetExtension("agent_check")) { Log(LogDebug, "Checkable") << "command_endpoint found for object '" << GetName() << "', setting " << command_endpoint->GetName() << " as check_source."; cr->SetCheckSource(command_endpoint->GetName()); } /* agent checks go through the api */ if (command_endpoint && GetExtension("agent_check")) { ApiListener::Ptr listener = ApiListener::GetInstance(); if (listener) { /* send message back to its origin */ Dictionary::Ptr message = ClusterEvents::MakeCheckResultMessage(this, cr); listener->SyncSendMessage(command_endpoint, message); } return; } bool reachable = IsReachable(); bool notification_reachable = IsReachable(DependencyNotification); ASSERT(!OwnsLock()); ObjectLock olock(this); CheckResult::Ptr old_cr = GetLastCheckResult(); ServiceState old_state = GetStateRaw(); StateType old_stateType = GetStateType(); long old_attempt = GetCheckAttempt(); bool recovery = false; if (old_cr && cr->GetExecutionStart() < old_cr->GetExecutionStart()) return; /* The ExecuteCheck function already sets the old state, but we need to do it again * in case this was a passive check result. */ SetLastStateRaw(old_state); SetLastStateType(old_stateType); SetLastReachable(reachable); long attempt = 1; std::set<Checkable::Ptr> children = GetChildren(); if (!old_cr) { SetStateType(StateTypeHard); } else if (cr->GetState() == ServiceOK) { if (old_state == ServiceOK && old_stateType == StateTypeSoft) { SetStateType(StateTypeHard); // SOFT OK -> HARD OK recovery = true; } if (old_state != ServiceOK) recovery = true; // NOT OK -> SOFT/HARD OK ResetNotificationNumbers(); SetLastStateOK(Utility::GetTime()); /* update reachability for child objects in OK state */ if (!children.empty()) OnReachabilityChanged(this, cr, children, origin); } else { if (old_attempt >= GetMaxCheckAttempts()) { SetStateType(StateTypeHard); } else if (old_stateType == StateTypeSoft && old_state != ServiceOK) { SetStateType(StateTypeSoft); attempt = old_attempt + 1; //NOT-OK -> NOT-OK counter } else if (old_state == ServiceOK) { SetStateType(StateTypeSoft); attempt = 1; //OK -> NOT-OK transition, reset the counter } else { attempt = old_attempt; } switch (cr->GetState()) { case ServiceOK: /* Nothing to do here. */ break; case ServiceWarning: SetLastStateWarning(Utility::GetTime()); break; case ServiceCritical: SetLastStateCritical(Utility::GetTime()); break; case ServiceUnknown: SetLastStateUnknown(Utility::GetTime()); break; } /* update reachability for child objects in NOT-OK state */ if (!children.empty()) OnReachabilityChanged(this, cr, children, origin); } if (!reachable) SetLastStateUnreachable(Utility::GetTime()); SetCheckAttempt(attempt); ServiceState new_state = cr->GetState(); SetStateRaw(new_state); bool stateChange = (old_state != new_state); if (stateChange) { SetLastStateChange(now); /* remove acknowledgements */ if (GetAcknowledgement() == AcknowledgementNormal || (GetAcknowledgement() == AcknowledgementSticky && new_state == ServiceOK)) { ClearAcknowledgement(); } /* reschedule direct parents */ BOOST_FOREACH(const Checkable::Ptr& parent, GetParents()) { if (parent.get() == this) continue; ObjectLock olock(parent); parent->SetNextCheck(Utility::GetTime()); } } bool remove_acknowledgement_comments = false; if (GetAcknowledgement() == AcknowledgementNone) remove_acknowledgement_comments = true; bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft); if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard) hardChange = true; bool is_volatile = GetVolatile(); if (hardChange || is_volatile) { SetLastHardStateRaw(new_state); SetLastHardStateChange(now); } if (new_state != ServiceOK) TriggerDowntimes(); Host::Ptr host; Service::Ptr service; tie(host, service) = GetHostService(this); CheckableType checkable_type = CheckableHost; if (service) checkable_type = CheckableService; /* statistics for external tools */ Checkable::UpdateStatistics(cr, checkable_type); bool in_downtime = IsInDowntime(); bool send_notification = hardChange && notification_reachable && !in_downtime && !IsAcknowledged(); if (!old_cr) send_notification = false; /* Don't send notifications for the initial state change */ if (old_state == ServiceOK && old_stateType == StateTypeSoft) send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */ if (is_volatile && old_state == ServiceOK && new_state == ServiceOK) send_notification = false; /* Don't send notifications for volatile OK -> OK changes. */ bool send_downtime_notification = (GetLastInDowntime() != in_downtime); SetLastInDowntime(in_downtime); olock.Unlock(); if (remove_acknowledgement_comments) RemoveCommentsByType(CommentAcknowledgement); Dictionary::Ptr vars_after = new Dictionary(); vars_after->Set("state", new_state); vars_after->Set("state_type", GetStateType()); vars_after->Set("attempt", GetCheckAttempt()); vars_after->Set("reachable", reachable); if (old_cr) cr->SetVarsBefore(old_cr->GetVarsAfter()); cr->SetVarsAfter(vars_after); olock.Lock(); SetLastCheckResult(cr); bool was_flapping, is_flapping; was_flapping = IsFlapping(); if (GetStateType() == StateTypeHard) UpdateFlappingStatus(stateChange); is_flapping = IsFlapping(); olock.Unlock(); // Log(LogDebug, "Checkable") // << "Flapping: Checkable " << GetName() // << " was: " << (was_flapping) // << " is: " << is_flapping) // << " threshold: " << GetFlappingThreshold() // << "% current: " + GetFlappingCurrent()) << "%."; OnNewCheckResult(this, cr, origin); /* signal status updates to for example db_ido */ OnStateChanged(this); String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state))); String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state))); if (hardChange || is_volatile) { OnStateChange(this, cr, StateTypeHard, origin); Log(LogNotice, "Checkable") << "State Change: Checkable " << GetName() << " hard state change from " << old_state_str << " to " << new_state_str << " detected." << (is_volatile ? " Checkable is volatile." : ""); } else if (stateChange) { OnStateChange(this, cr, StateTypeSoft, origin); Log(LogNotice, "Checkable") << "State Change: Checkable " << GetName() << " soft state change from " << old_state_str << " to " << new_state_str << " detected."; } if (GetStateType() == StateTypeSoft || hardChange || recovery || is_volatile) ExecuteEventHandler(); if (send_downtime_notification) OnNotificationsRequested(this, in_downtime ? NotificationDowntimeStart : NotificationDowntimeEnd, cr, "", ""); if (!was_flapping && is_flapping) { OnNotificationsRequested(this, NotificationFlappingStart, cr, "", ""); Log(LogNotice, "Checkable") << "Flapping: Checkable " << GetName() << " started flapping (" << GetFlappingThreshold() << "% < " << GetFlappingCurrent() << "%)."; NotifyFlapping(origin); } else if (was_flapping && !is_flapping) { OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", ""); Log(LogNotice, "Checkable") << "Flapping: Checkable " << GetName() << " stopped flapping (" << GetFlappingThreshold() << "% >= " << GetFlappingCurrent() << "%)."; NotifyFlapping(origin); } else if (send_notification) OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", ""); }
Dictionary::Ptr ServiceDbObject::GetStatusFields(void) const { Dictionary::Ptr fields = make_shared<Dictionary>(); Service::Ptr service = static_pointer_cast<Service>(GetObject()); CheckResult::Ptr cr = service->GetLastCheckResult(); if (cr) { fields->Set("output", CompatUtility::GetCheckResultOutput(cr)); fields->Set("long_output", CompatUtility::GetCheckResultLongOutput(cr)); fields->Set("perfdata", CompatUtility::GetCheckResultPerfdata(cr)); fields->Set("check_source", cr->GetCheckSource()); } fields->Set("current_state", service->GetState()); fields->Set("has_been_checked", CompatUtility::GetCheckableHasBeenChecked(service)); fields->Set("should_be_scheduled", service->GetEnableActiveChecks()); fields->Set("current_check_attempt", service->GetCheckAttempt()); fields->Set("max_check_attempts", service->GetMaxCheckAttempts()); if (cr) fields->Set("last_check", DbValue::FromTimestamp(cr->GetScheduleEnd())); fields->Set("next_check", DbValue::FromTimestamp(service->GetNextCheck())); fields->Set("check_type", CompatUtility::GetCheckableCheckType(service)); fields->Set("last_state_change", DbValue::FromTimestamp(service->GetLastStateChange())); fields->Set("last_hard_state_change", DbValue::FromTimestamp(service->GetLastHardStateChange())); fields->Set("last_time_ok", DbValue::FromTimestamp(static_cast<int>(service->GetLastStateOK()))); fields->Set("last_time_warning", DbValue::FromTimestamp(static_cast<int>(service->GetLastStateWarning()))); fields->Set("last_time_critical", DbValue::FromTimestamp(static_cast<int>(service->GetLastStateCritical()))); fields->Set("last_time_unknown", DbValue::FromTimestamp(static_cast<int>(service->GetLastStateUnknown()))); fields->Set("state_type", service->GetStateType()); fields->Set("last_notification", DbValue::FromTimestamp(CompatUtility::GetCheckableNotificationLastNotification(service))); fields->Set("next_notification", DbValue::FromTimestamp(CompatUtility::GetCheckableNotificationNextNotification(service))); fields->Set("no_more_notifications", Empty); fields->Set("notifications_enabled", CompatUtility::GetCheckableNotificationsEnabled(service)); fields->Set("problem_has_been_acknowledged", CompatUtility::GetCheckableProblemHasBeenAcknowledged(service)); fields->Set("acknowledgement_type", CompatUtility::GetCheckableAcknowledgementType(service)); fields->Set("current_notification_number", CompatUtility::GetCheckableNotificationNotificationNumber(service)); fields->Set("passive_checks_enabled", CompatUtility::GetCheckablePassiveChecksEnabled(service)); fields->Set("active_checks_enabled", CompatUtility::GetCheckableActiveChecksEnabled(service)); fields->Set("event_handler_enabled", CompatUtility::GetCheckableEventHandlerEnabled(service)); fields->Set("flap_detection_enabled", CompatUtility::GetCheckableFlapDetectionEnabled(service)); fields->Set("is_flapping", CompatUtility::GetCheckableIsFlapping(service)); fields->Set("percent_state_change", CompatUtility::GetCheckablePercentStateChange(service)); if (cr) { fields->Set("latency", Convert::ToString(Service::CalculateLatency(cr))); fields->Set("execution_time", Convert::ToString(Service::CalculateExecutionTime(cr))); } fields->Set("scheduled_downtime_depth", service->GetDowntimeDepth()); fields->Set("process_performance_data", CompatUtility::GetCheckableProcessPerformanceData(service)); fields->Set("event_handler", CompatUtility::GetCheckableEventHandler(service)); fields->Set("check_command", CompatUtility::GetCheckableCheckCommand(service)); fields->Set("normal_check_interval", CompatUtility::GetCheckableCheckInterval(service)); fields->Set("retry_check_interval", CompatUtility::GetCheckableRetryInterval(service)); fields->Set("check_timeperiod_object_id", service->GetCheckPeriod()); fields->Set("modified_service_attributes", service->GetModifiedAttributes()); return fields; }
Dictionary::Ptr HostDbObject::GetStatusFields(void) const { Dictionary::Ptr fields = new Dictionary(); Host::Ptr host = static_pointer_cast<Host>(GetObject()); CheckResult::Ptr cr = host->GetLastCheckResult(); if (cr) { fields->Set("output", CompatUtility::GetCheckResultOutput(cr)); fields->Set("long_output", CompatUtility::GetCheckResultLongOutput(cr)); fields->Set("perfdata", CompatUtility::GetCheckResultPerfdata(cr)); fields->Set("check_source", cr->GetCheckSource()); } fields->Set("current_state", host->IsReachable() ? host->GetState() : 2); fields->Set("has_been_checked", CompatUtility::GetCheckableHasBeenChecked(host)); fields->Set("should_be_scheduled", host->GetEnableActiveChecks()); fields->Set("current_check_attempt", host->GetCheckAttempt()); fields->Set("max_check_attempts", host->GetMaxCheckAttempts()); if (cr) fields->Set("last_check", DbValue::FromTimestamp(cr->GetScheduleEnd())); fields->Set("next_check", DbValue::FromTimestamp(host->GetNextCheck())); fields->Set("check_type", CompatUtility::GetCheckableCheckType(host)); fields->Set("last_state_change", DbValue::FromTimestamp(host->GetLastStateChange())); fields->Set("last_hard_state_change", DbValue::FromTimestamp(host->GetLastHardStateChange())); fields->Set("last_time_up", DbValue::FromTimestamp(static_cast<int>(host->GetLastStateUp()))); fields->Set("last_time_down", DbValue::FromTimestamp(static_cast<int>(host->GetLastStateDown()))); fields->Set("last_time_unreachable", DbValue::FromTimestamp(static_cast<int>(host->GetLastStateUnreachable()))); fields->Set("state_type", host->GetStateType()); fields->Set("last_notification", DbValue::FromTimestamp(CompatUtility::GetCheckableNotificationLastNotification(host))); fields->Set("next_notification", DbValue::FromTimestamp(CompatUtility::GetCheckableNotificationNextNotification(host))); fields->Set("no_more_notifications", Empty); fields->Set("notifications_enabled", CompatUtility::GetCheckableNotificationsEnabled(host)); { ObjectLock olock(host); fields->Set("problem_has_been_acknowledged", CompatUtility::GetCheckableProblemHasBeenAcknowledged(host)); fields->Set("acknowledgement_type", CompatUtility::GetCheckableAcknowledgementType(host)); } fields->Set("current_notification_number", CompatUtility::GetCheckableNotificationNotificationNumber(host)); fields->Set("passive_checks_enabled", CompatUtility::GetCheckablePassiveChecksEnabled(host)); fields->Set("active_checks_enabled", CompatUtility::GetCheckableActiveChecksEnabled(host)); fields->Set("event_handler_enabled", CompatUtility::GetCheckableEventHandlerEnabled(host)); fields->Set("flap_detection_enabled", CompatUtility::GetCheckableFlapDetectionEnabled(host)); fields->Set("is_flapping", CompatUtility::GetCheckableIsFlapping(host)); fields->Set("percent_state_change", CompatUtility::GetCheckablePercentStateChange(host)); if (cr) { fields->Set("latency", Convert::ToString(Service::CalculateLatency(cr))); fields->Set("execution_time", Convert::ToString(Service::CalculateExecutionTime(cr))); } fields->Set("scheduled_downtime_depth", host->GetDowntimeDepth()); fields->Set("failure_prediction_enabled", Empty); fields->Set("process_performance_data", 0); /* this is a host which does not process any perf data */ fields->Set("obsess_over_host", Empty); fields->Set("modified_host_attributes", host->GetModifiedAttributes()); fields->Set("event_handler", CompatUtility::GetCheckableEventHandler(host)); fields->Set("check_command", CompatUtility::GetCheckableCheckCommand(host)); fields->Set("normal_check_interval", CompatUtility::GetCheckableCheckInterval(host)); fields->Set("retry_check_interval", CompatUtility::GetCheckableRetryInterval(host)); fields->Set("check_timeperiod_object_id", host->GetCheckPeriod()); fields->Set("is_reachable", CompatUtility::GetCheckableIsReachable(host)); return fields; }
void ElasticsearchWriter::AddCheckResult(const Dictionary::Ptr& fields, const Checkable::Ptr& checkable, const CheckResult::Ptr& cr) { String prefix = "check_result."; fields->Set(prefix + "output", cr->GetOutput()); fields->Set(prefix + "check_source", cr->GetCheckSource()); fields->Set(prefix + "exit_status", cr->GetExitStatus()); fields->Set(prefix + "command", cr->GetCommand()); fields->Set(prefix + "state", cr->GetState()); fields->Set(prefix + "vars_before", cr->GetVarsBefore()); fields->Set(prefix + "vars_after", cr->GetVarsAfter()); fields->Set(prefix + "execution_start", FormatTimestamp(cr->GetExecutionStart())); fields->Set(prefix + "execution_end", FormatTimestamp(cr->GetExecutionEnd())); fields->Set(prefix + "schedule_start", FormatTimestamp(cr->GetScheduleStart())); fields->Set(prefix + "schedule_end", FormatTimestamp(cr->GetScheduleEnd())); /* Add extra calculated field. */ fields->Set(prefix + "latency", cr->CalculateLatency()); fields->Set(prefix + "execution_time", cr->CalculateExecutionTime()); if (!GetEnableSendPerfdata()) return; Array::Ptr perfdata = cr->GetPerformanceData(); CheckCommand::Ptr checkCommand = checkable->GetCheckCommand(); if (perfdata) { ObjectLock olock(perfdata); for (const Value& val : perfdata) { PerfdataValue::Ptr pdv; if (val.IsObjectType<PerfdataValue>()) pdv = val; else { try { pdv = PerfdataValue::Parse(val); } catch (const std::exception&) { Log(LogWarning, "ElasticsearchWriter") << "Ignoring invalid perfdata for checkable '" << checkable->GetName() << "' and command '" << checkCommand->GetName() << "' with value: " << val; continue; } } String escapedKey = pdv->GetLabel(); boost::replace_all(escapedKey, " ", "_"); boost::replace_all(escapedKey, ".", "_"); boost::replace_all(escapedKey, "\\", "_"); boost::algorithm::replace_all(escapedKey, "::", "."); String perfdataPrefix = prefix + "perfdata." + escapedKey; fields->Set(perfdataPrefix + ".value", pdv->GetValue()); if (pdv->GetMin()) fields->Set(perfdataPrefix + ".min", pdv->GetMin()); if (pdv->GetMax()) fields->Set(perfdataPrefix + ".max", pdv->GetMax()); if (pdv->GetWarn()) fields->Set(perfdataPrefix + ".warn", pdv->GetWarn()); if (pdv->GetCrit()) fields->Set(perfdataPrefix + ".crit", pdv->GetCrit()); if (!pdv->GetUnit().IsEmpty()) fields->Set(perfdataPrefix + ".unit", pdv->GetUnit()); } } }