Beispiel #1
0
void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type)
{
    time_t ts = cr->GetScheduleEnd();

    if (type == CheckableHost) {
        if (cr->GetActive())
            CIB::UpdateActiveHostChecksStatistics(ts, 1);
        else
            CIB::UpdatePassiveHostChecksStatistics(ts, 1);
    } else if (type == CheckableService) {
        if (cr->GetActive())
            CIB::UpdateActiveServiceChecksStatistics(ts, 1);
        else
            CIB::UpdatePassiveServiceChecksStatistics(ts, 1);
    } else {
        Log(LogWarning, "Checkable", "Unknown checkable type for statistic update.");
    }
}
Beispiel #2
0
void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin::Ptr& origin)
{
	{
		ObjectLock olock(this);
		m_CheckRunning = false;
	}

	double now = Utility::GetTime();

	if (cr->GetScheduleStart() == 0)
		cr->SetScheduleStart(now);

	if (cr->GetScheduleEnd() == 0)
		cr->SetScheduleEnd(now);

	if (cr->GetExecutionStart() == 0)
		cr->SetExecutionStart(now);

	if (cr->GetExecutionEnd() == 0)
		cr->SetExecutionEnd(now);

	if (!origin || origin->IsLocal())
		cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());

	Endpoint::Ptr command_endpoint = GetCommandEndpoint();

	/* override check source if command_endpoint was defined */
	if (command_endpoint && !GetExtension("agent_check"))
		cr->SetCheckSource(command_endpoint->GetName());

	/* agent checks go through the api */
	if (command_endpoint && GetExtension("agent_check")) {
		ApiListener::Ptr listener = ApiListener::GetInstance();

		if (listener) {
			/* send message back to its origin */
			Dictionary::Ptr message = ClusterEvents::MakeCheckResultMessage(this, cr);
			listener->SyncSendMessage(command_endpoint, message);
		}

		return;

	}

	bool reachable = IsReachable();
	bool notification_reachable = IsReachable(DependencyNotification);

	ObjectLock olock(this);

	CheckResult::Ptr old_cr = GetLastCheckResult();
	ServiceState old_state = GetStateRaw();
	StateType old_stateType = GetStateType();
	long old_attempt = GetCheckAttempt();
	bool recovery = false;

	/* Ignore check results older than the current one. */
	if (old_cr && cr->GetExecutionStart() < old_cr->GetExecutionStart())
		return;

	/* The ExecuteCheck function already sets the old state, but we need to do it again
	 * in case this was a passive check result. */
	SetLastStateRaw(old_state);
	SetLastStateType(old_stateType);
	SetLastReachable(reachable);

	Host::Ptr host;
	Service::Ptr service;
	tie(host, service) = GetHostService(this);

	CheckableType checkableType = CheckableHost;
	if (service)
		checkableType = CheckableService;

	long attempt = 1;

	std::set<Checkable::Ptr> children = GetChildren();

	if (!old_cr) {
		SetStateType(StateTypeHard);
	} else if (IsStateOK(cr->GetState())) {
		SetStateType(StateTypeHard); // NOT-OK -> HARD OK

		if (!IsStateOK(old_state))
			recovery = true;

		ResetNotificationNumbers();
		SaveLastState(ServiceOK, Utility::GetTime());

		/* update reachability for child objects in OK state */
		if (!children.empty())
			OnReachabilityChanged(this, cr, children, origin);
	} else {
		if (old_attempt >= GetMaxCheckAttempts()) {
			SetStateType(StateTypeHard);
		} else if (old_stateType == StateTypeSoft && !IsStateOK(old_state)) {
			SetStateType(StateTypeSoft);
			attempt = old_attempt + 1; // NOT-OK -> NOT-OK counter
		} else if (IsStateOK(old_state)) {
			SetStateType(StateTypeSoft);
			attempt = 1; // OK -> NOT-OK transition, reset the counter
		} else {
			attempt = old_attempt;
		}

		if (!IsStateOK(cr->GetState())) {
			SaveLastState(cr->GetState(), Utility::GetTime());
		}

		/* update reachability for child objects in NOT-OK state */
		if (!children.empty())
			OnReachabilityChanged(this, cr, children, origin);
	}

	if (!reachable)
		SetLastStateUnreachable(Utility::GetTime());

	SetCheckAttempt(attempt);

	ServiceState new_state = cr->GetState();
	SetStateRaw(new_state);

	bool stateChange;

	/* Exception on state change calculation for hosts. */
	if (checkableType == CheckableService)
		stateChange = (old_state != new_state);
	else
		stateChange = (Host::CalculateState(old_state) != Host::CalculateState(new_state));

	if (stateChange) {
		SetLastStateChange(now);

		/* remove acknowledgements */
		if (GetAcknowledgement() == AcknowledgementNormal ||
		    (GetAcknowledgement() == AcknowledgementSticky && IsStateOK(new_state))) {
			ClearAcknowledgement();
		}

		/* reschedule direct parents */
		BOOST_FOREACH(const Checkable::Ptr& parent, GetParents()) {
			if (parent.get() == this)
				continue;

			ObjectLock olock(parent);
			parent->SetNextCheck(Utility::GetTime());
		}
	}

	bool remove_acknowledgement_comments = false;

	if (GetAcknowledgement() == AcknowledgementNone)
		remove_acknowledgement_comments = true;

	bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);

	if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
		hardChange = true;

	bool is_volatile = GetVolatile();

	if (hardChange || is_volatile) {
		SetLastHardStateRaw(new_state);
		SetLastHardStateChange(now);
	}

	if (!IsStateOK(new_state))
		TriggerDowntimes();

	/* statistics for external tools */
	Checkable::UpdateStatistics(cr, checkableType);

	bool in_downtime = IsInDowntime();

	bool send_notification = false;

	if (notification_reachable && !in_downtime && !IsAcknowledged()) {
		/* Send notifications whether when a hard state change occured. */
		if (hardChange)
			send_notification = true;
		/* Or if the checkable is volatile and in a HARD state. */
		else if (is_volatile && GetStateType() == StateTypeHard)
			send_notification = true;
	}

	if (!old_cr)
		send_notification = false; /* Don't send notifications for the initial state change */

	if (IsStateOK(old_state) && old_stateType == StateTypeSoft)
		send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */

	if (is_volatile && IsStateOK(old_state) && IsStateOK(new_state))
		send_notification = false; /* Don't send notifications for volatile OK -> OK changes. */

	bool send_downtime_notification = (GetLastInDowntime() != in_downtime);
	SetLastInDowntime(in_downtime);

	olock.Unlock();

	if (remove_acknowledgement_comments)
		RemoveCommentsByType(CommentAcknowledgement);

	Dictionary::Ptr vars_after = new Dictionary();
	vars_after->Set("state", new_state);
	vars_after->Set("state_type", GetStateType());
	vars_after->Set("attempt", GetCheckAttempt());
	vars_after->Set("reachable", reachable);

	if (old_cr)
		cr->SetVarsBefore(old_cr->GetVarsAfter());

	cr->SetVarsAfter(vars_after);

	olock.Lock();
	SetLastCheckResult(cr);

	bool was_flapping, is_flapping;

	was_flapping = IsFlapping();

	if (GetStateType() == StateTypeHard)
		UpdateFlappingStatus(stateChange);

	is_flapping = IsFlapping();

	if (cr->GetActive()) {
		UpdateNextCheck(origin);
	} else {
		/* Reschedule the next check for passive check results. The side effect of
		 * this is that for as long as we receive passive results for a service we
		 * won't execute any active checks. */
		SetNextCheck(Utility::GetTime() + GetCheckInterval(), false, origin);
	}

	olock.Unlock();

//	Log(LogDebug, "Checkable")
//	    << "Flapping: Checkable " << GetName()
//	    << " was: " << (was_flapping)
//	    << " is: " << is_flapping)
//	    << " threshold: " << GetFlappingThreshold()
//	    << "% current: " + GetFlappingCurrent()) << "%.";

	OnNewCheckResult(this, cr, origin);

	/* signal status updates to for example db_ido */
	OnStateChanged(this);

	String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
	String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));

	/* Whether a hard state change or a volatile state change except OK -> OK happened. */
	if (hardChange || (is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state)))) {
		OnStateChange(this, cr, StateTypeHard, origin);
		Log(LogNotice, "Checkable")
		    << "State Change: Checkable " << GetName() << " hard state change from " << old_state_str << " to " << new_state_str << " detected." << (is_volatile ? " Checkable is volatile." : "");
	}
	/* Whether a state change happened or the state type is SOFT (must be logged too). */
	else if (stateChange || GetStateType() == StateTypeSoft) {
		OnStateChange(this, cr, StateTypeSoft, origin);
		Log(LogNotice, "Checkable")
		    << "State Change: Checkable " << GetName() << " soft state change from " << old_state_str << " to " << new_state_str << " detected.";
	}

	if (GetStateType() == StateTypeSoft || hardChange || recovery ||
	    (is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state))))
		ExecuteEventHandler();

	if (send_downtime_notification && IsActive())
		OnNotificationsRequested(this, in_downtime ? NotificationDowntimeStart : NotificationDowntimeEnd, cr, "", "", MessageOrigin::Ptr());

	/* Flapping start/end notifications */
	if (!was_flapping && is_flapping) {
		if (!IsPaused())
			OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "", MessageOrigin::Ptr());

		Log(LogNotice, "Checkable")
			<< "Flapping: Checkable " << GetName() << " started flapping (" << GetFlappingThreshold() << "% < " << GetFlappingCurrent() << "%).";

		NotifyFlapping(origin);
	} else if (was_flapping && !is_flapping) {
		if (!IsPaused())
			OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "", MessageOrigin::Ptr());

		Log(LogNotice, "Checkable")
			<< "Flapping: Checkable " << GetName() << " stopped flapping (" << GetFlappingThreshold() << "% >= " << GetFlappingCurrent() << "%).";

		NotifyFlapping(origin);
	}

	/* Problem notifications */
	if (send_notification && !is_flapping) {
		if (!IsPaused())
			OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "", MessageOrigin::Ptr());
	}
}