/* detects host flapping */ void check_for_host_flapping(host *hst, int update, int actual_check, int allow_flapstart_notification) { int is_flapping = FALSE; unsigned long wait_threshold = 0L; time_t current_time = 0L; double low_threshold = 0.0; double high_threshold = 0.0; log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_host_flapping()\n"); if (hst == NULL || !should_flap_detect(hst)) return; log_debug_info(DEBUGL_FLAPPING, 1, "Checking host '%s' for flapping...\n", hst->name); time(¤t_time); /* period to wait for updating archived state info if we have no state change */ if (hst->total_services == 0) wait_threshold = hst->notification_interval * interval_length; else wait_threshold = (hst->total_service_check_interval * interval_length) / hst->total_services; /* update history on actual checks and when enough time has passed */ if (current_time - hst->last_state_history_update > (time_t)wait_threshold) update = TRUE; if (actual_check == TRUE) update = TRUE; /* * return early if we shouldn't update state history, as flapping * state won't change and we won't send notifications regardless */ if (!update) return; /* what thresholds should we use (global or host-specific)? */ low_threshold = (hst->low_flap_threshold <= 0.0) ? low_host_flap_threshold : hst->low_flap_threshold; high_threshold = (hst->high_flap_threshold <= 0.0) ? high_host_flap_threshold : hst->high_flap_threshold; /* update the last record time */ hst->last_state_history_update = current_time; /* record the current state in the state history */ hst->state_history[hst->state_history_index] = hst->current_state; /* increment state history index to next available slot */ hst->state_history_index++; if (hst->state_history_index >= MAX_STATE_HISTORY_ENTRIES) hst->state_history_index = 0; hst->percent_state_change = flapping_pct(hst->state_history, hst->state_history_index, MAX_STATE_HISTORY_ENTRIES); log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, hst->percent_state_change, hst->percent_state_change); /* bail early if flap detection is disabled */ if (enable_flap_detection == FALSE) return; if (hst->flap_detection_enabled == FALSE) return; /* we're undecided, so don't change the current flap state */ if (hst->percent_state_change > low_threshold && hst->percent_state_change < high_threshold) return; /* we're below the lower bound, so we're not flapping */ if (hst->percent_state_change <= low_threshold) is_flapping = FALSE; /* else we're above the upper bound, so we are flapping */ else if (hst->percent_state_change >= high_threshold) is_flapping = TRUE; log_debug_info(DEBUGL_FLAPPING, 1, "Host %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", hst->percent_state_change); /* did the host just start flapping? */ if (is_flapping == TRUE && hst->is_flapping == FALSE) set_host_flap(hst, hst->percent_state_change, high_threshold, low_threshold, allow_flapstart_notification); /* did the host just stop flapping? */ else if (is_flapping == FALSE && hst->is_flapping == TRUE) clear_host_flap(hst, hst->percent_state_change, high_threshold, low_threshold); }
/* detects service flapping */ void check_for_service_flapping(service *svc, int update, int allow_flapstart_notification) { int is_flapping = FALSE; double low_threshold = 0.0; double high_threshold = 0.0; log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_service_flapping()\n"); if (svc == NULL || !should_flap_detect(svc)) return; log_debug_info(DEBUGL_FLAPPING, 1, "Checking service '%s' on host '%s' for flapping...\n", svc->description, svc->host_name); /* if this is a soft service state and not a soft recovery, don't record this in the history */ /* only hard states and soft recoveries get recorded for flap detection */ if (svc->state_type == SOFT_STATE && svc->current_state != STATE_OK) return; /* * if we shouldn't update the state history with this state, flapping * state won't change and we can just as well return early */ if (!update) return; /* what threshold values should we use (global or service-specific)? */ low_threshold = (svc->low_flap_threshold <= 0.0) ? low_service_flap_threshold : svc->low_flap_threshold; high_threshold = (svc->high_flap_threshold <= 0.0) ? high_service_flap_threshold : svc->high_flap_threshold; /* record the current state in the state history */ svc->state_history[svc->state_history_index] = svc->current_state; /* increment state history index to next available slot */ svc->state_history_index++; if (svc->state_history_index >= MAX_STATE_HISTORY_ENTRIES) svc->state_history_index = 0; svc->percent_state_change = flapping_pct(svc->state_history, svc->state_history_index, MAX_STATE_HISTORY_ENTRIES); log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, svc->percent_state_change, svc->percent_state_change); /* bail out now if flap detection is disabled */ if (enable_flap_detection == FALSE) return; if (svc->flap_detection_enabled == FALSE) return; /* we're undecided, so don't change the current flap state */ if (svc->percent_state_change > low_threshold && svc->percent_state_change < high_threshold) return; /* we're below the lower bound, so we're not flapping */ if (svc->percent_state_change <= low_threshold) is_flapping = FALSE; /* else we're above the upper bound, so we are flapping */ else if (svc->percent_state_change >= high_threshold) is_flapping = TRUE; log_debug_info(DEBUGL_FLAPPING, 1, "Service %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", svc->percent_state_change); /* did the service just start flapping? */ if (is_flapping == TRUE && svc->is_flapping == FALSE) set_service_flap(svc, svc->percent_state_change, high_threshold, low_threshold, allow_flapstart_notification); /* did the service just stop flapping? */ else if (is_flapping == FALSE && svc->is_flapping == TRUE) clear_service_flap(svc, svc->percent_state_change, high_threshold, low_threshold); }
/* detects service flapping */ void check_for_service_flapping(service *svc, int update, int allow_flapstart_notification) { int update_history = TRUE; int is_flapping = FALSE; register int x = 0; register int y = 0; int last_state_history_value = STATE_OK; double curved_changes = 0.0; double curved_percent_change = 0.0; double low_threshold = 0.0; double high_threshold = 0.0; double low_curve_value = 0.75; double high_curve_value = 1.25; /* large install tweaks skips all flap detection logic - including state change calculation */ log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_service_flapping()\n"); if(svc == NULL) return; log_debug_info(DEBUGL_FLAPPING, 1, "Checking service '%s' on host '%s' for flapping...\n", svc->description, svc->host_name); /* if this is a soft service state and not a soft recovery, don't record this in the history */ /* only hard states and soft recoveries get recorded for flap detection */ if(svc->state_type == SOFT_STATE && svc->current_state != STATE_OK) return; /* what threshold values should we use (global or service-specific)? */ low_threshold = (svc->low_flap_threshold <= 0.0) ? low_service_flap_threshold : svc->low_flap_threshold; high_threshold = (svc->high_flap_threshold <= 0.0) ? high_service_flap_threshold : svc->high_flap_threshold; update_history = update; /* should we update state history for this state? */ if(update_history == TRUE) { if(!should_flap_detect(svc)) update_history = FALSE; } /* record current service state */ if(update_history == TRUE) { /* record the current state in the state history */ svc->state_history[svc->state_history_index] = svc->current_state; /* increment state history index to next available slot */ svc->state_history_index++; if(svc->state_history_index >= MAX_STATE_HISTORY_ENTRIES) svc->state_history_index = 0; } /* calculate overall and curved percent state changes */ for(x = 0, y = svc->state_history_index; x < MAX_STATE_HISTORY_ENTRIES; x++) { if(x == 0) { last_state_history_value = svc->state_history[y]; y++; if(y >= MAX_STATE_HISTORY_ENTRIES) y = 0; continue; } if(last_state_history_value != svc->state_history[y]) curved_changes += (((double)(x - 1) * (high_curve_value - low_curve_value)) / ((double)(MAX_STATE_HISTORY_ENTRIES - 2))) + low_curve_value; last_state_history_value = svc->state_history[y]; y++; if(y >= MAX_STATE_HISTORY_ENTRIES) y = 0; } /* calculate overall percent change in state */ curved_percent_change = (double)(((double)curved_changes * 100.0) / (double)(MAX_STATE_HISTORY_ENTRIES - 1)); svc->percent_state_change = curved_percent_change; log_debug_info(DEBUGL_FLAPPING, 2, "LFT=%.2f, HFT=%.2f, CPC=%.2f, PSC=%.2f%%\n", low_threshold, high_threshold, curved_percent_change, curved_percent_change); /* don't do anything if we don't have flap detection enabled on a program-wide basis or for this service */ if(enable_flap_detection == FALSE || svc->flap_detection_enabled == FALSE) { if(svc->is_flapping == TRUE) clear_service_flap(svc, curved_percent_change, high_threshold, low_threshold, 1); return; } /* are we flapping, undecided, or what?... */ /* we're undecided, so don't change the current flap state */ if(curved_percent_change > low_threshold && curved_percent_change < high_threshold) return; /* we're below the lower bound, so we're not flapping */ else if(curved_percent_change <= low_threshold) is_flapping = FALSE; /* else we're above the upper bound, so we are flapping */ else if(curved_percent_change >= high_threshold) is_flapping = TRUE; log_debug_info(DEBUGL_FLAPPING, 1, "Service %s flapping (%.2f%% state change).\n", (is_flapping == TRUE) ? "is" : "is not", curved_percent_change); /* did the service just start flapping? */ if(is_flapping == TRUE && svc->is_flapping == FALSE) set_service_flap(svc, curved_percent_change, high_threshold, low_threshold, allow_flapstart_notification); /* did the service just stop flapping? */ else if(is_flapping == FALSE && svc->is_flapping == TRUE) clear_service_flap(svc, curved_percent_change, high_threshold, low_threshold, 0); return; }