Exemplo n.º 1
0
double Checkable::CalculateExecutionTime(const CheckResult::Ptr& cr)
{
    if (!cr)
        return 0;

    return cr->GetExecutionEnd() - cr->GetExecutionStart();
}
Exemplo n.º 2
0
void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin::Ptr& origin)
{
    {
        ObjectLock olock(this);
        m_CheckRunning = false;
    }

    double now = Utility::GetTime();

    if (cr->GetScheduleStart() == 0)
        cr->SetScheduleStart(now);

    if (cr->GetScheduleEnd() == 0)
        cr->SetScheduleEnd(now);

    if (cr->GetExecutionStart() == 0)
        cr->SetExecutionStart(now);

    if (cr->GetExecutionEnd() == 0)
        cr->SetExecutionEnd(now);

    if (!origin || origin->IsLocal()) {
        Log(LogDebug, "Checkable")
                << "No origin or local origin for object '" << GetName()
                << "', setting " << IcingaApplication::GetInstance()->GetNodeName()
                << " as check_source.";
        cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
    }

    Endpoint::Ptr command_endpoint = GetCommandEndpoint();

    /* override check source if command_endpoint was defined */
    if (command_endpoint && !GetExtension("agent_check")) {
        Log(LogDebug, "Checkable")
                << "command_endpoint found for object '" << GetName()
                << "', setting " << command_endpoint->GetName()
                << " as check_source.";
        cr->SetCheckSource(command_endpoint->GetName());
    }

    /* agent checks go through the api */
    if (command_endpoint && GetExtension("agent_check")) {
        ApiListener::Ptr listener = ApiListener::GetInstance();

        if (listener) {
            /* send message back to its origin */
            Dictionary::Ptr message = ClusterEvents::MakeCheckResultMessage(this, cr);
            listener->SyncSendMessage(command_endpoint, message);
        }

        return;

    }

    bool reachable = IsReachable();
    bool notification_reachable = IsReachable(DependencyNotification);

    ASSERT(!OwnsLock());
    ObjectLock olock(this);

    CheckResult::Ptr old_cr = GetLastCheckResult();
    ServiceState old_state = GetStateRaw();
    StateType old_stateType = GetStateType();
    long old_attempt = GetCheckAttempt();
    bool recovery = false;

    if (old_cr && cr->GetExecutionStart() < old_cr->GetExecutionStart())
        return;

    /* The ExecuteCheck function already sets the old state, but we need to do it again
     * in case this was a passive check result. */
    SetLastStateRaw(old_state);
    SetLastStateType(old_stateType);
    SetLastReachable(reachable);

    long attempt = 1;

    std::set<Checkable::Ptr> children = GetChildren();

    if (!old_cr) {
        SetStateType(StateTypeHard);
    } else if (cr->GetState() == ServiceOK) {
        if (old_state == ServiceOK && old_stateType == StateTypeSoft) {
            SetStateType(StateTypeHard); // SOFT OK -> HARD OK
            recovery = true;
        }

        if (old_state != ServiceOK)
            recovery = true; // NOT OK -> SOFT/HARD OK

        ResetNotificationNumbers();
        SetLastStateOK(Utility::GetTime());

        /* update reachability for child objects in OK state */
        if (!children.empty())
            OnReachabilityChanged(this, cr, children, origin);
    } else {
        if (old_attempt >= GetMaxCheckAttempts()) {
            SetStateType(StateTypeHard);
        } else if (old_stateType == StateTypeSoft && old_state != ServiceOK) {
            SetStateType(StateTypeSoft);
            attempt = old_attempt + 1; //NOT-OK -> NOT-OK counter
        } else if (old_state == ServiceOK) {
            SetStateType(StateTypeSoft);
            attempt = 1; //OK -> NOT-OK transition, reset the counter
        } else {
            attempt = old_attempt;
        }

        switch (cr->GetState()) {
        case ServiceOK:
            /* Nothing to do here. */
            break;
        case ServiceWarning:
            SetLastStateWarning(Utility::GetTime());
            break;
        case ServiceCritical:
            SetLastStateCritical(Utility::GetTime());
            break;
        case ServiceUnknown:
            SetLastStateUnknown(Utility::GetTime());
            break;
        }

        /* update reachability for child objects in NOT-OK state */
        if (!children.empty())
            OnReachabilityChanged(this, cr, children, origin);
    }

    if (!reachable)
        SetLastStateUnreachable(Utility::GetTime());

    SetCheckAttempt(attempt);

    ServiceState new_state = cr->GetState();
    SetStateRaw(new_state);

    bool stateChange = (old_state != new_state);
    if (stateChange) {
        SetLastStateChange(now);

        /* remove acknowledgements */
        if (GetAcknowledgement() == AcknowledgementNormal ||
                (GetAcknowledgement() == AcknowledgementSticky && new_state == ServiceOK)) {
            ClearAcknowledgement();
        }

        /* reschedule direct parents */
        BOOST_FOREACH(const Checkable::Ptr& parent, GetParents()) {
            if (parent.get() == this)
                continue;

            ObjectLock olock(parent);
            parent->SetNextCheck(Utility::GetTime());
        }
    }

    bool remove_acknowledgement_comments = false;

    if (GetAcknowledgement() == AcknowledgementNone)
        remove_acknowledgement_comments = true;

    bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);

    if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
        hardChange = true;

    bool is_volatile = GetVolatile();

    if (hardChange || is_volatile) {
        SetLastHardStateRaw(new_state);
        SetLastHardStateChange(now);
    }

    if (new_state != ServiceOK)
        TriggerDowntimes();

    Host::Ptr host;
    Service::Ptr service;
    tie(host, service) = GetHostService(this);

    CheckableType checkable_type = CheckableHost;
    if (service)
        checkable_type = CheckableService;

    /* statistics for external tools */
    Checkable::UpdateStatistics(cr, checkable_type);

    bool in_downtime = IsInDowntime();
    bool send_notification = hardChange && notification_reachable && !in_downtime && !IsAcknowledged();

    if (!old_cr)
        send_notification = false; /* Don't send notifications for the initial state change */

    if (old_state == ServiceOK && old_stateType == StateTypeSoft)
        send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */

    if (is_volatile && old_state == ServiceOK && new_state == ServiceOK)
        send_notification = false; /* Don't send notifications for volatile OK -> OK changes. */

    bool send_downtime_notification = (GetLastInDowntime() != in_downtime);
    SetLastInDowntime(in_downtime);

    olock.Unlock();

    if (remove_acknowledgement_comments)
        RemoveCommentsByType(CommentAcknowledgement);

    Dictionary::Ptr vars_after = new Dictionary();
    vars_after->Set("state", new_state);
    vars_after->Set("state_type", GetStateType());
    vars_after->Set("attempt", GetCheckAttempt());
    vars_after->Set("reachable", reachable);

    if (old_cr)
        cr->SetVarsBefore(old_cr->GetVarsAfter());

    cr->SetVarsAfter(vars_after);

    olock.Lock();
    SetLastCheckResult(cr);

    bool was_flapping, is_flapping;

    was_flapping = IsFlapping();
    if (GetStateType() == StateTypeHard)
        UpdateFlappingStatus(stateChange);
    is_flapping = IsFlapping();

    olock.Unlock();

//	Log(LogDebug, "Checkable")
//	    << "Flapping: Checkable " << GetName()
//	    << " was: " << (was_flapping)
//	    << " is: " << is_flapping)
//	    << " threshold: " << GetFlappingThreshold()
//	    << "% current: " + GetFlappingCurrent()) << "%.";

    OnNewCheckResult(this, cr, origin);

    /* signal status updates to for example db_ido */
    OnStateChanged(this);

    String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
    String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));

    if (hardChange || is_volatile) {
        OnStateChange(this, cr, StateTypeHard, origin);
        Log(LogNotice, "Checkable")
                << "State Change: Checkable " << GetName() << " hard state change from " << old_state_str << " to " << new_state_str << " detected." << (is_volatile ? " Checkable is volatile." : "");
    } else if (stateChange) {
        OnStateChange(this, cr, StateTypeSoft, origin);
        Log(LogNotice, "Checkable")
                << "State Change: Checkable " << GetName() << " soft state change from " << old_state_str << " to " << new_state_str << " detected.";
    }

    if (GetStateType() == StateTypeSoft || hardChange || recovery || is_volatile)
        ExecuteEventHandler();

    if (send_downtime_notification)
        OnNotificationsRequested(this, in_downtime ? NotificationDowntimeStart : NotificationDowntimeEnd, cr, "", "");

    if (!was_flapping && is_flapping) {
        OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "");

        Log(LogNotice, "Checkable")
                << "Flapping: Checkable " << GetName() << " started flapping (" << GetFlappingThreshold() << "% < " << GetFlappingCurrent() << "%).";

        NotifyFlapping(origin);
    } else if (was_flapping && !is_flapping) {
        OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "");

        Log(LogNotice, "Checkable")
                << "Flapping: Checkable " << GetName() << " stopped flapping (" << GetFlappingThreshold() << "% >= " << GetFlappingCurrent() << "%).";

        NotifyFlapping(origin);
    } else if (send_notification)
        OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "");
}
Exemplo n.º 3
0
void CheckResultReader::ProcessCheckResultFile(const String& path) const
{
	CONTEXT("Processing check result file '" + path + "'");

	String crfile = String(path.Begin(), path.End() - 3); /* Remove the ".ok" extension. */

	std::ifstream fp;
	fp.exceptions(std::ifstream::badbit);
	fp.open(crfile.CStr());

	std::map<String, String> attrs;

	while (fp.good()) {
		std::string line;
		std::getline(fp, line);

		if (line.empty() || line[0] == '#')
			continue; /* Ignore comments and empty lines. */

		size_t pos = line.find_first_of('=');

		if (pos == std::string::npos)
			continue; /* Ignore invalid lines. */

		String key = line.substr(0, pos);
		String value = line.substr(pos + 1);

		attrs[key] = value;
	}

	/* Remove the checkresult files. */
	if (unlink(path.CStr()) < 0)
		BOOST_THROW_EXCEPTION(posix_error()
			<< boost::errinfo_api_function("unlink")
			<< boost::errinfo_errno(errno)
			<< boost::errinfo_file_name(path));

	if (unlink(crfile.CStr()) < 0)
		BOOST_THROW_EXCEPTION(posix_error()
			<< boost::errinfo_api_function("unlink")
			<< boost::errinfo_errno(errno)
			<< boost::errinfo_file_name(crfile));

	Checkable::Ptr checkable;

	Host::Ptr host = Host::GetByName(attrs["host_name"]);

	if (!host) {
		Log(LogWarning, "CheckResultReader")
			<< "Ignoring checkresult file for host '" << attrs["host_name"] << "': Host does not exist.";

		return;
	}

	if (attrs.find("service_description") != attrs.end()) {
		Service::Ptr service = host->GetServiceByShortName(attrs["service_description"]);

		if (!service) {
			Log(LogWarning, "CheckResultReader")
				<< "Ignoring checkresult file for host '" << attrs["host_name"]
				<< "', service '" << attrs["service_description"] << "': Service does not exist.";

			return;
		}

		checkable = service;
	} else
		checkable = host;

	CheckResult::Ptr result = new CheckResult();
	String output = CompatUtility::UnEscapeString(attrs["output"]);
	std::pair<String, Value> co = PluginUtility::ParseCheckOutput(output);
	result->SetOutput(co.first);
	result->SetPerformanceData(PluginUtility::SplitPerfdata(co.second));
	result->SetState(PluginUtility::ExitStatusToState(Convert::ToLong(attrs["return_code"])));

	if (attrs.find("start_time") != attrs.end())
		result->SetExecutionStart(Convert::ToDouble(attrs["start_time"]));
	else
		result->SetExecutionStart(Utility::GetTime());

	if (attrs.find("finish_time") != attrs.end())
		result->SetExecutionEnd(Convert::ToDouble(attrs["finish_time"]));
	else
		result->SetExecutionEnd(result->GetExecutionStart());

	checkable->ProcessCheckResult(result);

	Log(LogDebug, "CheckResultReader")
		<< "Processed checkresult file for object '" << checkable->GetName() << "'";

	/* Reschedule the next check. The side effect of this is that for as long
	 * as we receive check result files for a host/service we won't execute any
	 * active checks. */
	checkable->SetNextCheck(Utility::GetTime() + checkable->GetCheckInterval());
}
Exemplo n.º 4
0
void ElasticsearchWriter::AddCheckResult(const Dictionary::Ptr& fields, const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
{
	String prefix = "check_result.";

	fields->Set(prefix + "output", cr->GetOutput());
	fields->Set(prefix + "check_source", cr->GetCheckSource());
	fields->Set(prefix + "exit_status", cr->GetExitStatus());
	fields->Set(prefix + "command", cr->GetCommand());
	fields->Set(prefix + "state", cr->GetState());
	fields->Set(prefix + "vars_before", cr->GetVarsBefore());
	fields->Set(prefix + "vars_after", cr->GetVarsAfter());

	fields->Set(prefix + "execution_start", FormatTimestamp(cr->GetExecutionStart()));
	fields->Set(prefix + "execution_end", FormatTimestamp(cr->GetExecutionEnd()));
	fields->Set(prefix + "schedule_start", FormatTimestamp(cr->GetScheduleStart()));
	fields->Set(prefix + "schedule_end", FormatTimestamp(cr->GetScheduleEnd()));

	/* Add extra calculated field. */
	fields->Set(prefix + "latency", cr->CalculateLatency());
	fields->Set(prefix + "execution_time", cr->CalculateExecutionTime());

	if (!GetEnableSendPerfdata())
		return;

	Array::Ptr perfdata = cr->GetPerformanceData();

	CheckCommand::Ptr checkCommand = checkable->GetCheckCommand();

	if (perfdata) {
		ObjectLock olock(perfdata);
		for (const Value& val : perfdata) {
			PerfdataValue::Ptr pdv;

			if (val.IsObjectType<PerfdataValue>())
				pdv = val;
			else {
				try {
					pdv = PerfdataValue::Parse(val);
				} catch (const std::exception&) {
					Log(LogWarning, "ElasticsearchWriter")
						<< "Ignoring invalid perfdata for checkable '"
						<< checkable->GetName() << "' and command '"
						<< checkCommand->GetName() << "' with value: " << val;
					continue;
				}
			}

			String escapedKey = pdv->GetLabel();
			boost::replace_all(escapedKey, " ", "_");
			boost::replace_all(escapedKey, ".", "_");
			boost::replace_all(escapedKey, "\\", "_");
			boost::algorithm::replace_all(escapedKey, "::", ".");

			String perfdataPrefix = prefix + "perfdata." + escapedKey;

			fields->Set(perfdataPrefix + ".value", pdv->GetValue());

			if (pdv->GetMin())
				fields->Set(perfdataPrefix + ".min", pdv->GetMin());
			if (pdv->GetMax())
				fields->Set(perfdataPrefix + ".max", pdv->GetMax());
			if (pdv->GetWarn())
				fields->Set(perfdataPrefix + ".warn", pdv->GetWarn());
			if (pdv->GetCrit())
				fields->Set(perfdataPrefix + ".crit", pdv->GetCrit());

			if (!pdv->GetUnit().IsEmpty())
				fields->Set(perfdataPrefix + ".unit", pdv->GetUnit());
		}
	}
}