double Checkable::CalculateLatency(const CheckResult::Ptr& cr)
{
	if (!cr)
		return 0;

	double latency = (cr->GetScheduleEnd() - cr->GetScheduleStart()) - CalculateExecutionTime(cr);

	if (latency < 0)
		latency = 0;

	return latency;
}
Exemple #2
0
void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin::Ptr& origin)
{
    {
        ObjectLock olock(this);
        m_CheckRunning = false;
    }

    double now = Utility::GetTime();

    if (cr->GetScheduleStart() == 0)
        cr->SetScheduleStart(now);

    if (cr->GetScheduleEnd() == 0)
        cr->SetScheduleEnd(now);

    if (cr->GetExecutionStart() == 0)
        cr->SetExecutionStart(now);

    if (cr->GetExecutionEnd() == 0)
        cr->SetExecutionEnd(now);

    if (!origin || origin->IsLocal()) {
        Log(LogDebug, "Checkable")
                << "No origin or local origin for object '" << GetName()
                << "', setting " << IcingaApplication::GetInstance()->GetNodeName()
                << " as check_source.";
        cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
    }

    Endpoint::Ptr command_endpoint = GetCommandEndpoint();

    /* override check source if command_endpoint was defined */
    if (command_endpoint && !GetExtension("agent_check")) {
        Log(LogDebug, "Checkable")
                << "command_endpoint found for object '" << GetName()
                << "', setting " << command_endpoint->GetName()
                << " as check_source.";
        cr->SetCheckSource(command_endpoint->GetName());
    }

    /* agent checks go through the api */
    if (command_endpoint && GetExtension("agent_check")) {
        ApiListener::Ptr listener = ApiListener::GetInstance();

        if (listener) {
            /* send message back to its origin */
            Dictionary::Ptr message = ClusterEvents::MakeCheckResultMessage(this, cr);
            listener->SyncSendMessage(command_endpoint, message);
        }

        return;

    }

    bool reachable = IsReachable();
    bool notification_reachable = IsReachable(DependencyNotification);

    ASSERT(!OwnsLock());
    ObjectLock olock(this);

    CheckResult::Ptr old_cr = GetLastCheckResult();
    ServiceState old_state = GetStateRaw();
    StateType old_stateType = GetStateType();
    long old_attempt = GetCheckAttempt();
    bool recovery = false;

    if (old_cr && cr->GetExecutionStart() < old_cr->GetExecutionStart())
        return;

    /* The ExecuteCheck function already sets the old state, but we need to do it again
     * in case this was a passive check result. */
    SetLastStateRaw(old_state);
    SetLastStateType(old_stateType);
    SetLastReachable(reachable);

    long attempt = 1;

    std::set<Checkable::Ptr> children = GetChildren();

    if (!old_cr) {
        SetStateType(StateTypeHard);
    } else if (cr->GetState() == ServiceOK) {
        if (old_state == ServiceOK && old_stateType == StateTypeSoft) {
            SetStateType(StateTypeHard); // SOFT OK -> HARD OK
            recovery = true;
        }

        if (old_state != ServiceOK)
            recovery = true; // NOT OK -> SOFT/HARD OK

        ResetNotificationNumbers();
        SetLastStateOK(Utility::GetTime());

        /* update reachability for child objects in OK state */
        if (!children.empty())
            OnReachabilityChanged(this, cr, children, origin);
    } else {
        if (old_attempt >= GetMaxCheckAttempts()) {
            SetStateType(StateTypeHard);
        } else if (old_stateType == StateTypeSoft && old_state != ServiceOK) {
            SetStateType(StateTypeSoft);
            attempt = old_attempt + 1; //NOT-OK -> NOT-OK counter
        } else if (old_state == ServiceOK) {
            SetStateType(StateTypeSoft);
            attempt = 1; //OK -> NOT-OK transition, reset the counter
        } else {
            attempt = old_attempt;
        }

        switch (cr->GetState()) {
        case ServiceOK:
            /* Nothing to do here. */
            break;
        case ServiceWarning:
            SetLastStateWarning(Utility::GetTime());
            break;
        case ServiceCritical:
            SetLastStateCritical(Utility::GetTime());
            break;
        case ServiceUnknown:
            SetLastStateUnknown(Utility::GetTime());
            break;
        }

        /* update reachability for child objects in NOT-OK state */
        if (!children.empty())
            OnReachabilityChanged(this, cr, children, origin);
    }

    if (!reachable)
        SetLastStateUnreachable(Utility::GetTime());

    SetCheckAttempt(attempt);

    ServiceState new_state = cr->GetState();
    SetStateRaw(new_state);

    bool stateChange = (old_state != new_state);
    if (stateChange) {
        SetLastStateChange(now);

        /* remove acknowledgements */
        if (GetAcknowledgement() == AcknowledgementNormal ||
                (GetAcknowledgement() == AcknowledgementSticky && new_state == ServiceOK)) {
            ClearAcknowledgement();
        }

        /* reschedule direct parents */
        BOOST_FOREACH(const Checkable::Ptr& parent, GetParents()) {
            if (parent.get() == this)
                continue;

            ObjectLock olock(parent);
            parent->SetNextCheck(Utility::GetTime());
        }
    }

    bool remove_acknowledgement_comments = false;

    if (GetAcknowledgement() == AcknowledgementNone)
        remove_acknowledgement_comments = true;

    bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);

    if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
        hardChange = true;

    bool is_volatile = GetVolatile();

    if (hardChange || is_volatile) {
        SetLastHardStateRaw(new_state);
        SetLastHardStateChange(now);
    }

    if (new_state != ServiceOK)
        TriggerDowntimes();

    Host::Ptr host;
    Service::Ptr service;
    tie(host, service) = GetHostService(this);

    CheckableType checkable_type = CheckableHost;
    if (service)
        checkable_type = CheckableService;

    /* statistics for external tools */
    Checkable::UpdateStatistics(cr, checkable_type);

    bool in_downtime = IsInDowntime();
    bool send_notification = hardChange && notification_reachable && !in_downtime && !IsAcknowledged();

    if (!old_cr)
        send_notification = false; /* Don't send notifications for the initial state change */

    if (old_state == ServiceOK && old_stateType == StateTypeSoft)
        send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */

    if (is_volatile && old_state == ServiceOK && new_state == ServiceOK)
        send_notification = false; /* Don't send notifications for volatile OK -> OK changes. */

    bool send_downtime_notification = (GetLastInDowntime() != in_downtime);
    SetLastInDowntime(in_downtime);

    olock.Unlock();

    if (remove_acknowledgement_comments)
        RemoveCommentsByType(CommentAcknowledgement);

    Dictionary::Ptr vars_after = new Dictionary();
    vars_after->Set("state", new_state);
    vars_after->Set("state_type", GetStateType());
    vars_after->Set("attempt", GetCheckAttempt());
    vars_after->Set("reachable", reachable);

    if (old_cr)
        cr->SetVarsBefore(old_cr->GetVarsAfter());

    cr->SetVarsAfter(vars_after);

    olock.Lock();
    SetLastCheckResult(cr);

    bool was_flapping, is_flapping;

    was_flapping = IsFlapping();
    if (GetStateType() == StateTypeHard)
        UpdateFlappingStatus(stateChange);
    is_flapping = IsFlapping();

    olock.Unlock();

//	Log(LogDebug, "Checkable")
//	    << "Flapping: Checkable " << GetName()
//	    << " was: " << (was_flapping)
//	    << " is: " << is_flapping)
//	    << " threshold: " << GetFlappingThreshold()
//	    << "% current: " + GetFlappingCurrent()) << "%.";

    OnNewCheckResult(this, cr, origin);

    /* signal status updates to for example db_ido */
    OnStateChanged(this);

    String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
    String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));

    if (hardChange || is_volatile) {
        OnStateChange(this, cr, StateTypeHard, origin);
        Log(LogNotice, "Checkable")
                << "State Change: Checkable " << GetName() << " hard state change from " << old_state_str << " to " << new_state_str << " detected." << (is_volatile ? " Checkable is volatile." : "");
    } else if (stateChange) {
        OnStateChange(this, cr, StateTypeSoft, origin);
        Log(LogNotice, "Checkable")
                << "State Change: Checkable " << GetName() << " soft state change from " << old_state_str << " to " << new_state_str << " detected.";
    }

    if (GetStateType() == StateTypeSoft || hardChange || recovery || is_volatile)
        ExecuteEventHandler();

    if (send_downtime_notification)
        OnNotificationsRequested(this, in_downtime ? NotificationDowntimeStart : NotificationDowntimeEnd, cr, "", "");

    if (!was_flapping && is_flapping) {
        OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "");

        Log(LogNotice, "Checkable")
                << "Flapping: Checkable " << GetName() << " started flapping (" << GetFlappingThreshold() << "% < " << GetFlappingCurrent() << "%).";

        NotifyFlapping(origin);
    } else if (was_flapping && !is_flapping) {
        OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "");

        Log(LogNotice, "Checkable")
                << "Flapping: Checkable " << GetName() << " stopped flapping (" << GetFlappingThreshold() << "% >= " << GetFlappingCurrent() << "%).";

        NotifyFlapping(origin);
    } else if (send_notification)
        OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "");
}
void ElasticsearchWriter::AddCheckResult(const Dictionary::Ptr& fields, const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
{
	String prefix = "check_result.";

	fields->Set(prefix + "output", cr->GetOutput());
	fields->Set(prefix + "check_source", cr->GetCheckSource());
	fields->Set(prefix + "exit_status", cr->GetExitStatus());
	fields->Set(prefix + "command", cr->GetCommand());
	fields->Set(prefix + "state", cr->GetState());
	fields->Set(prefix + "vars_before", cr->GetVarsBefore());
	fields->Set(prefix + "vars_after", cr->GetVarsAfter());

	fields->Set(prefix + "execution_start", FormatTimestamp(cr->GetExecutionStart()));
	fields->Set(prefix + "execution_end", FormatTimestamp(cr->GetExecutionEnd()));
	fields->Set(prefix + "schedule_start", FormatTimestamp(cr->GetScheduleStart()));
	fields->Set(prefix + "schedule_end", FormatTimestamp(cr->GetScheduleEnd()));

	/* Add extra calculated field. */
	fields->Set(prefix + "latency", cr->CalculateLatency());
	fields->Set(prefix + "execution_time", cr->CalculateExecutionTime());

	if (!GetEnableSendPerfdata())
		return;

	Array::Ptr perfdata = cr->GetPerformanceData();

	CheckCommand::Ptr checkCommand = checkable->GetCheckCommand();

	if (perfdata) {
		ObjectLock olock(perfdata);
		for (const Value& val : perfdata) {
			PerfdataValue::Ptr pdv;

			if (val.IsObjectType<PerfdataValue>())
				pdv = val;
			else {
				try {
					pdv = PerfdataValue::Parse(val);
				} catch (const std::exception&) {
					Log(LogWarning, "ElasticsearchWriter")
						<< "Ignoring invalid perfdata for checkable '"
						<< checkable->GetName() << "' and command '"
						<< checkCommand->GetName() << "' with value: " << val;
					continue;
				}
			}

			String escapedKey = pdv->GetLabel();
			boost::replace_all(escapedKey, " ", "_");
			boost::replace_all(escapedKey, ".", "_");
			boost::replace_all(escapedKey, "\\", "_");
			boost::algorithm::replace_all(escapedKey, "::", ".");

			String perfdataPrefix = prefix + "perfdata." + escapedKey;

			fields->Set(perfdataPrefix + ".value", pdv->GetValue());

			if (pdv->GetMin())
				fields->Set(perfdataPrefix + ".min", pdv->GetMin());
			if (pdv->GetMax())
				fields->Set(perfdataPrefix + ".max", pdv->GetMax());
			if (pdv->GetWarn())
				fields->Set(perfdataPrefix + ".warn", pdv->GetWarn());
			if (pdv->GetCrit())
				fields->Set(perfdataPrefix + ".crit", pdv->GetCrit());

			if (!pdv->GetUnit().IsEmpty())
				fields->Set(perfdataPrefix + ".unit", pdv->GetUnit());
		}
	}
}