示例#1
0
/* detects host flapping */
void check_for_host_flapping(host *hst, int update, int actual_check, int allow_flapstart_notification) {
	int update_history = TRUE;
	int is_flapping = FALSE;
	register int x = 0;
	register int y = 0;
	int last_state_history_value = HOST_UP;
	unsigned long wait_threshold = 0L;
	double curved_changes = 0.0;
	double curved_percent_change = 0.0;
	time_t current_time = 0L;
	double low_threshold = 0.0;
	double high_threshold = 0.0;
	double low_curve_value = 0.75;
	double high_curve_value = 1.25;


	log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_host_flapping()\n");

	if (hst == NULL)
		return;

	log_debug_info(DEBUGL_FLAPPING, 1, "Checking host '%s' for flapping...\n", hst->name);

	time(&current_time);

	/* period to wait for updating archived state info if we have no state change */
	if (hst->total_services == 0)
		wait_threshold = hst->notification_interval * interval_length;
	else
		wait_threshold = (hst->total_service_check_interval * interval_length) / hst->total_services;

	update_history = update;

	/* should we update state history for this state? */
	if (update_history == TRUE) {

		if (hst->current_state == HOST_UP  && hst->flap_detection_on_up == FALSE)
			update_history = FALSE;
		if (hst->current_state == HOST_DOWN && hst->flap_detection_on_down == FALSE)
			update_history = FALSE;
		if (hst->current_state == HOST_UNREACHABLE && hst->flap_detection_on_unreachable == FALSE)
			update_history = FALSE;
	}

	/* if we didn't have an actual check, only update if we've waited long enough */
	if (update_history == TRUE && actual_check == FALSE && (current_time - hst->last_state_history_update) < wait_threshold) {

		update_history = FALSE;

	}

	/* what thresholds should we use (global or host-specific)? */
	low_threshold = (hst->low_flap_threshold <= 0.0) ? low_host_flap_threshold : hst->low_flap_threshold;
	high_threshold = (hst->high_flap_threshold <= 0.0) ? high_host_flap_threshold : hst->high_flap_threshold;

	/* record current host state */
	if (update_history == TRUE) {

		/* update the last record time */
		hst->last_state_history_update = current_time;

		/* record the current state in the state history */
		hst->state_history[hst->state_history_index] = hst->current_state;

		/* increment state history index to next available slot */
		hst->state_history_index++;
		if (hst->state_history_index >= MAX_STATE_HISTORY_ENTRIES)
			hst->state_history_index = 0;
	}

	/* calculate overall changes in state */
	for (x = 0, y = hst->state_history_index; x < MAX_STATE_HISTORY_ENTRIES; x++) {

		if (x == 0) {
			last_state_history_value = hst->state_history[y];
			y++;
			if (y >= MAX_STATE_HISTORY_ENTRIES)
				y = 0;
			continue;
		}

		if (last_state_history_value != hst->state_history[y])
			curved_changes += (((double)(x - 1) * (high_curve_value - low_curve_value)) / ((double)(MAX_STATE_HISTORY_ENTRIES - 2))) + low_curve_value;

		last_state_history_value = hst->state_history[y];

		y++;
		if (y >= MAX_STATE_HISTORY_ENTRIES)
			y = 0;
	}

	/* calculate overall percent change in state */
	curved_percent_change = (double)(((double)curved_changes * 100.0) / (double)(MAX_STATE_HISTORY_ENTRIES - 1));

	hst->percent_state_change = curved_percent_change;

	log_debug_info(DEBUGL_FLAPPING, 2, "LFT (low_threshold): %.2f, HFT (high_threshold): %.2f, CPC (curved_percent_change): %.2f, PSC (curved_percent_change): %.2f%%\n",
			low_threshold, high_threshold, curved_percent_change, curved_percent_change);


	/* don't do anything if we don't have flap detection enabled on a program-wide basis */
	if (enable_flap_detection == FALSE)
		return;

	/* don't do anything if we don't have flap detection enabled for this host */
	if (hst->flap_detection_enabled == FALSE)
		return;

	/* are we flapping, undecided, or what?... */

	/* we're undecided, so don't change the current flap state */
	if (curved_percent_change > low_threshold && curved_percent_change < high_threshold)
		return;

	/* we're below the lower bound, so we're not flapping */
	else if (curved_percent_change <= low_threshold)
		is_flapping = FALSE;

	/* else we're above the upper bound, so we are flapping */
	else if (curved_percent_change >= high_threshold)
		is_flapping = TRUE;

	log_debug_info(DEBUGL_FLAPPING, 1, "Host %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", curved_percent_change);

	/* did the host just start flapping? */
	if (is_flapping == TRUE && hst->is_flapping == FALSE)
		set_host_flap(hst, curved_percent_change, high_threshold, low_threshold, allow_flapstart_notification);

	/* did the host just stop flapping? */
	else if (is_flapping == FALSE && hst->is_flapping == TRUE)
		clear_host_flap(hst, curved_percent_change, high_threshold, low_threshold);

	return;
}
示例#2
0
/* detects host flapping */
void check_for_host_flapping(host *hst, int update, int actual_check, int allow_flapstart_notification)
{
	int is_flapping = FALSE;
	unsigned long wait_threshold = 0L;
	time_t current_time = 0L;
	double low_threshold = 0.0;
	double high_threshold = 0.0;

	log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_host_flapping()\n");

	if (hst == NULL || !should_flap_detect(hst))
		return;

	log_debug_info(DEBUGL_FLAPPING, 1, "Checking host '%s' for flapping...\n", hst->name);

	time(&current_time);

	/* period to wait for updating archived state info if we have no state change */
	if (hst->total_services == 0)
		wait_threshold = hst->notification_interval * interval_length;
	else
		wait_threshold = (hst->total_service_check_interval * interval_length) / hst->total_services;

	/* update history on actual checks and when enough time has passed */
	if (current_time - hst->last_state_history_update > (time_t)wait_threshold)
		update = TRUE;
	if (actual_check == TRUE)
		update = TRUE;

	/*
	 * return early if we shouldn't update state history, as flapping
	 * state won't change and we won't send notifications regardless
	 */
	if (!update)
		return;

	/* what thresholds should we use (global or host-specific)? */
	low_threshold = (hst->low_flap_threshold <= 0.0) ? low_host_flap_threshold : hst->low_flap_threshold;
	high_threshold = (hst->high_flap_threshold <= 0.0) ? high_host_flap_threshold : hst->high_flap_threshold;

	/* update the last record time */
	hst->last_state_history_update = current_time;

	/* record the current state in the state history */
	hst->state_history[hst->state_history_index] = hst->current_state;

	/* increment state history index to next available slot */
	hst->state_history_index++;
	if (hst->state_history_index >= MAX_STATE_HISTORY_ENTRIES)
		hst->state_history_index = 0;

	hst->percent_state_change = flapping_pct(hst->state_history, hst->state_history_index,
	                                         MAX_STATE_HISTORY_ENTRIES);

	log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, hst->percent_state_change, hst->percent_state_change);

	/* bail early if flap detection is disabled */
	if (enable_flap_detection == FALSE)
		return;
	if (hst->flap_detection_enabled == FALSE)
		return;

	/* we're undecided, so don't change the current flap state */
	if (hst->percent_state_change > low_threshold && hst->percent_state_change < high_threshold)
		return;

	/* we're below the lower bound, so we're not flapping */
	if (hst->percent_state_change <= low_threshold)
		is_flapping = FALSE;

	/* else we're above the upper bound, so we are flapping */
	else if (hst->percent_state_change >= high_threshold)
		is_flapping = TRUE;

	log_debug_info(DEBUGL_FLAPPING, 1, "Host %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", hst->percent_state_change);

	/* did the host just start flapping? */
	if (is_flapping == TRUE && hst->is_flapping == FALSE)
		set_host_flap(hst, hst->percent_state_change, high_threshold, low_threshold, allow_flapstart_notification);

	/* did the host just stop flapping? */
	else if (is_flapping == FALSE && hst->is_flapping == TRUE)
		clear_host_flap(hst, hst->percent_state_change, high_threshold, low_threshold);
}