/* Try to reap 'jobs' jobs for 'msecs' milliseconds. Return early on error. */ void wproc_reap(int jobs, int msecs) { struct timeval start; gettimeofday(&start, NULL); while (jobs > 0 && msecs > 0) { int inputs = iobroker_poll(nagios_iobs, msecs); if (inputs < 0) return; jobs -= inputs; /* One input is roughly equivalent to one job. */ struct timeval now; gettimeofday(&now, NULL); msecs -= tv_delta_msec(&start, &now); start = now; } }
/* this is the main event handler loop */ int event_execution_loop(void) { timed_event *temp_event, *last_event = NULL; time_t last_time = 0L; time_t current_time = 0L; time_t last_status_update = 0L; int poll_time_ms; log_debug_info(DEBUGL_FUNCTIONS, 0, "event_execution_loop() start\n"); time(&last_time); while (1) { struct timeval now; const struct timeval *event_runtime; int inputs; /* super-priority (hardcoded) events come first */ /* see if we should exit or restart (a signal was encountered) */ if (sigshutdown == TRUE || sigrestart == TRUE) break; /* get the current time */ time(¤t_time); if (sigrotate == TRUE) { rotate_log_file(current_time); update_program_status(FALSE); } /* hey, wait a second... we traveled back in time! */ if (current_time < last_time) compensate_for_system_time_change((unsigned long)last_time, (unsigned long)current_time); /* else if the time advanced over the specified threshold, try and compensate... */ else if ((current_time - last_time) >= time_change_threshold) compensate_for_system_time_change((unsigned long)last_time, (unsigned long)current_time); /* get next scheduled event */ current_event = temp_event = (timed_event *)squeue_peek(nagios_squeue); /* if we don't have any events to handle, exit */ if (!temp_event) { log_debug_info(DEBUGL_EVENTS, 0, "There aren't any events that need to be handled! Exiting...\n"); break; } /* keep track of the last time */ last_time = current_time; /* update status information occassionally - NagVis watches the NDOUtils DB to see if Nagios is alive */ if ((unsigned long)(current_time - last_status_update) > 5) { last_status_update = current_time; update_program_status(FALSE); } event_runtime = squeue_event_runtime(temp_event->sq_event); if (temp_event != last_event) { log_debug_info(DEBUGL_EVENTS, 1, "** Event Check Loop\n"); log_debug_info(DEBUGL_EVENTS, 1, "Next Event Time: %s", ctime(&temp_event->run_time)); log_debug_info(DEBUGL_EVENTS, 1, "Current/Max Service Checks: %d/%d (%.3lf%% saturation)\n", currently_running_service_checks, max_parallel_service_checks, ((float)currently_running_service_checks / (float)max_parallel_service_checks) * 100); } last_event = temp_event; gettimeofday(&now, NULL); poll_time_ms = tv_delta_msec(&now, event_runtime); if (poll_time_ms < 0) poll_time_ms = 0; else if (poll_time_ms >= 1500) poll_time_ms = 1500; log_debug_info(DEBUGL_SCHEDULING, 2, "## Polling %dms; sockets=%d; events=%u; iobs=%p\n", poll_time_ms, iobroker_get_num_fds(nagios_iobs), squeue_size(nagios_squeue), nagios_iobs); inputs = iobroker_poll(nagios_iobs, poll_time_ms); if (inputs < 0 && errno != EINTR) { logit(NSLOG_RUNTIME_ERROR, TRUE, "Error: Polling for input on %p failed: %s", nagios_iobs, iobroker_strerror(inputs)); break; } log_debug_info(DEBUGL_IPC, 2, "## %d descriptors had input\n", inputs); /* * if the event we peaked was removed from the queue from * one of the I/O operations, we must take care not to * try to run at, as we're (almost) sure to access free'd * or invalid memory if we do. */ if (!current_event) { log_debug_info(DEBUGL_EVENTS, 0, "Event was cancelled by iobroker input\n"); continue; } gettimeofday(&now, NULL); if (tv_delta_msec(&now, event_runtime) >= 0) continue; /* move on if we shouldn't run this event */ if (should_run_event(temp_event) == FALSE) continue; /* handle the event */ handle_timed_event(temp_event); /* * we must remove the entry we've peeked, or * we'll keep getting the same one over and over. * This also maintains sync with broker modules. */ remove_event(nagios_squeue, temp_event); /* reschedule the event if necessary */ if (temp_event->recurring == TRUE) reschedule_event(nagios_squeue, temp_event); /* else free memory associated with the event */ else my_free(temp_event); } log_debug_info(DEBUGL_FUNCTIONS, 0, "event_execution_loop() end\n"); return OK; }
static void enter_worker(int sd) { /* created with socketpair(), usually */ master_sd = sd; parent_pid = getppid(); (void)chdir("/tmp"); (void)chdir("nagios-workers"); if (setpgid(0, 0)) { /* XXX: handle error somehow, or maybe just ignore it */ } /* we need to catch child signals the default way */ signal(SIGCHLD, SIG_DFL); fcntl(fileno(stdout), F_SETFD, FD_CLOEXEC); fcntl(fileno(stderr), F_SETFD, FD_CLOEXEC); fcntl(master_sd, F_SETFD, FD_CLOEXEC); iobs = iobroker_create(); if (!iobs) { /* XXX: handle this a bit better */ worker_die("Worker failed to create io broker socket set"); } /* * Create a modest scheduling queue that will be * more than enough for our needs */ sq = squeue_create(1024); set_socket_options(master_sd, 256 * 1024); iobroker_register(iobs, master_sd, NULL, receive_command); while (iobroker_get_num_fds(iobs) > 0) { int poll_time = -1; /* check for timed out jobs */ for (;;) { child_process *cp; struct timeval now, tmo; /* stop when scheduling queue is empty */ cp = (child_process *)squeue_peek(sq); if (!cp) break; tmo.tv_usec = cp->start.tv_usec; tmo.tv_sec = cp->start.tv_sec + cp->timeout; gettimeofday(&now, NULL); poll_time = tv_delta_msec(&now, &tmo); /* * A little extra takes care of rounding errors and * ensures we never kill a job before it times out. * 5 milliseconds is enough to take care of that. */ poll_time += 5; if (poll_time > 0) break; /* this job timed out, so kill it */ wlog("job with pid %d timed out. Killing it", cp->pid); kill_job(cp, ETIME); } iobroker_poll(iobs, poll_time); /* * if our parent goes away we can't really do anything * sensible at all, so let's just break out and exit */ if (kill(parent_pid, 0) < 0 && errno == ESRCH) { break; } } /* we exit when the master shuts us down */ exit(EXIT_SUCCESS); }