Beispiel #1
0
/* detects host flapping */
void check_for_host_flapping(host *hst, int update, int actual_check, int allow_flapstart_notification)
{
	int is_flapping = FALSE;
	unsigned long wait_threshold = 0L;
	time_t current_time = 0L;
	double low_threshold = 0.0;
	double high_threshold = 0.0;

	log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_host_flapping()\n");

	if (hst == NULL || !should_flap_detect(hst))
		return;

	log_debug_info(DEBUGL_FLAPPING, 1, "Checking host '%s' for flapping...\n", hst->name);

	time(&current_time);

	/* period to wait for updating archived state info if we have no state change */
	if (hst->total_services == 0)
		wait_threshold = hst->notification_interval * interval_length;
	else
		wait_threshold = (hst->total_service_check_interval * interval_length) / hst->total_services;

	/* update history on actual checks and when enough time has passed */
	if (current_time - hst->last_state_history_update > (time_t)wait_threshold)
		update = TRUE;
	if (actual_check == TRUE)
		update = TRUE;

	/*
	 * return early if we shouldn't update state history, as flapping
	 * state won't change and we won't send notifications regardless
	 */
	if (!update)
		return;

	/* what thresholds should we use (global or host-specific)? */
	low_threshold = (hst->low_flap_threshold <= 0.0) ? low_host_flap_threshold : hst->low_flap_threshold;
	high_threshold = (hst->high_flap_threshold <= 0.0) ? high_host_flap_threshold : hst->high_flap_threshold;

	/* update the last record time */
	hst->last_state_history_update = current_time;

	/* record the current state in the state history */
	hst->state_history[hst->state_history_index] = hst->current_state;

	/* increment state history index to next available slot */
	hst->state_history_index++;
	if (hst->state_history_index >= MAX_STATE_HISTORY_ENTRIES)
		hst->state_history_index = 0;

	hst->percent_state_change = flapping_pct(hst->state_history, hst->state_history_index,
	                                         MAX_STATE_HISTORY_ENTRIES);

	log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, hst->percent_state_change, hst->percent_state_change);

	/* bail early if flap detection is disabled */
	if (enable_flap_detection == FALSE)
		return;
	if (hst->flap_detection_enabled == FALSE)
		return;

	/* we're undecided, so don't change the current flap state */
	if (hst->percent_state_change > low_threshold && hst->percent_state_change < high_threshold)
		return;

	/* we're below the lower bound, so we're not flapping */
	if (hst->percent_state_change <= low_threshold)
		is_flapping = FALSE;

	/* else we're above the upper bound, so we are flapping */
	else if (hst->percent_state_change >= high_threshold)
		is_flapping = TRUE;

	log_debug_info(DEBUGL_FLAPPING, 1, "Host %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", hst->percent_state_change);

	/* did the host just start flapping? */
	if (is_flapping == TRUE && hst->is_flapping == FALSE)
		set_host_flap(hst, hst->percent_state_change, high_threshold, low_threshold, allow_flapstart_notification);

	/* did the host just stop flapping? */
	else if (is_flapping == FALSE && hst->is_flapping == TRUE)
		clear_host_flap(hst, hst->percent_state_change, high_threshold, low_threshold);
}
Beispiel #2
0
/* detects service flapping */
void check_for_service_flapping(service *svc, int update, int allow_flapstart_notification)
{
	int is_flapping = FALSE;
	double low_threshold = 0.0;
	double high_threshold = 0.0;

	log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_service_flapping()\n");

	if (svc == NULL || !should_flap_detect(svc))
		return;

	log_debug_info(DEBUGL_FLAPPING, 1, "Checking service '%s' on host '%s' for flapping...\n", svc->description, svc->host_name);

	/* if this is a soft service state and not a soft recovery, don't record this in the history */
	/* only hard states and soft recoveries get recorded for flap detection */
	if (svc->state_type == SOFT_STATE && svc->current_state != STATE_OK)
		return;

	/*
	 * if we shouldn't update the state history with this state, flapping
	 * state won't change and we can just as well return early
	 */
	if (!update)
		return;

	/* what threshold values should we use (global or service-specific)? */
	low_threshold = (svc->low_flap_threshold <= 0.0) ? low_service_flap_threshold : svc->low_flap_threshold;
	high_threshold = (svc->high_flap_threshold <= 0.0) ? high_service_flap_threshold : svc->high_flap_threshold;

	/* record the current state in the state history */
	svc->state_history[svc->state_history_index] = svc->current_state;

	/* increment state history index to next available slot */
	svc->state_history_index++;
	if (svc->state_history_index >= MAX_STATE_HISTORY_ENTRIES)
		svc->state_history_index = 0;

	svc->percent_state_change = flapping_pct(svc->state_history, svc->state_history_index,
		                                     MAX_STATE_HISTORY_ENTRIES);

	log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, svc->percent_state_change, svc->percent_state_change);

	/* bail out now if flap detection is disabled */
	if (enable_flap_detection == FALSE)
		return;
	if (svc->flap_detection_enabled == FALSE)
		return;

	/* we're undecided, so don't change the current flap state */
	if (svc->percent_state_change > low_threshold && svc->percent_state_change < high_threshold)
		return;

	/* we're below the lower bound, so we're not flapping */
	if (svc->percent_state_change <= low_threshold)
		is_flapping = FALSE;

	/* else we're above the upper bound, so we are flapping */
	else if (svc->percent_state_change >= high_threshold)
		is_flapping = TRUE;

	log_debug_info(DEBUGL_FLAPPING, 1, "Service %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", svc->percent_state_change);

	/* did the service just start flapping? */
	if (is_flapping == TRUE && svc->is_flapping == FALSE)
		set_service_flap(svc, svc->percent_state_change, high_threshold, low_threshold, allow_flapstart_notification);

	/* did the service just stop flapping? */
	else if (is_flapping == FALSE && svc->is_flapping == TRUE)
		clear_service_flap(svc, svc->percent_state_change, high_threshold, low_threshold);
}
Beispiel #3
0
/* detects service flapping */
void check_for_service_flapping(service *svc, int update, int allow_flapstart_notification) {
	int update_history = TRUE;
	int is_flapping = FALSE;
	register int x = 0;
	register int y = 0;
	int last_state_history_value = STATE_OK;
	double curved_changes = 0.0;
	double curved_percent_change = 0.0;
	double low_threshold = 0.0;
	double high_threshold = 0.0;
	double low_curve_value = 0.75;
	double high_curve_value = 1.25;

	/* large install tweaks skips all flap detection logic - including state change calculation */


	log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_service_flapping()\n");

	if(svc == NULL)
		return;

	log_debug_info(DEBUGL_FLAPPING, 1, "Checking service '%s' on host '%s' for flapping...\n", svc->description, svc->host_name);

	/* if this is a soft service state and not a soft recovery, don't record this in the history */
	/* only hard states and soft recoveries get recorded for flap detection */
	if(svc->state_type == SOFT_STATE && svc->current_state != STATE_OK)
		return;

	/* what threshold values should we use (global or service-specific)? */
	low_threshold = (svc->low_flap_threshold <= 0.0) ? low_service_flap_threshold : svc->low_flap_threshold;
	high_threshold = (svc->high_flap_threshold <= 0.0) ? high_service_flap_threshold : svc->high_flap_threshold;

	update_history = update;

	/* should we update state history for this state? */
	if(update_history == TRUE) {

		if(!should_flap_detect(svc))
			update_history = FALSE;

		}

	/* record current service state */
	if(update_history == TRUE) {

		/* record the current state in the state history */
		svc->state_history[svc->state_history_index] = svc->current_state;

		/* increment state history index to next available slot */
		svc->state_history_index++;
		if(svc->state_history_index >= MAX_STATE_HISTORY_ENTRIES)
			svc->state_history_index = 0;
		}

	/* calculate overall and curved percent state changes */
	for(x = 0, y = svc->state_history_index; x < MAX_STATE_HISTORY_ENTRIES; x++) {

		if(x == 0) {
			last_state_history_value = svc->state_history[y];
			y++;
			if(y >= MAX_STATE_HISTORY_ENTRIES)
				y = 0;
			continue;
			}

		if(last_state_history_value != svc->state_history[y])
			curved_changes += (((double)(x - 1) * (high_curve_value - low_curve_value)) / ((double)(MAX_STATE_HISTORY_ENTRIES - 2))) + low_curve_value;

		last_state_history_value = svc->state_history[y];

		y++;
		if(y >= MAX_STATE_HISTORY_ENTRIES)
			y = 0;
		}

	/* calculate overall percent change in state */
	curved_percent_change = (double)(((double)curved_changes * 100.0) / (double)(MAX_STATE_HISTORY_ENTRIES - 1));

	svc->percent_state_change = curved_percent_change;

	log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, curved_percent_change, curved_percent_change);


	/* don't do anything if we don't have flap detection enabled
	   on a program-wide basis or for this service */
	if(enable_flap_detection == FALSE || svc->flap_detection_enabled == FALSE) {
		if(svc->is_flapping == TRUE)
			clear_service_flap(svc, curved_percent_change, high_threshold, low_threshold, 1);
		return;
	}

	/* are we flapping, undecided, or what?... */

	/* we're undecided, so don't change the current flap state */
	if(curved_percent_change > low_threshold && curved_percent_change < high_threshold)
		return;

	/* we're below the lower bound, so we're not flapping */
	else if(curved_percent_change <= low_threshold)
		is_flapping = FALSE;

	/* else we're above the upper bound, so we are flapping */
	else if(curved_percent_change >= high_threshold)
		is_flapping = TRUE;

	log_debug_info(DEBUGL_FLAPPING, 1, "Service %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", curved_percent_change);

	/* did the service just start flapping? */
	if(is_flapping == TRUE && svc->is_flapping == FALSE)
		set_service_flap(svc, curved_percent_change, high_threshold, low_threshold, allow_flapstart_notification);

	/* did the service just stop flapping? */
	else if(is_flapping == FALSE && svc->is_flapping == TRUE)
		clear_service_flap(svc, curved_percent_change, high_threshold, low_threshold, 0);

	return;
	}