Exemple #1
0
/* Try to reap 'jobs' jobs for 'msecs' milliseconds. Return early on error. */
void wproc_reap(int jobs, int msecs)
{
	struct timeval start;
	gettimeofday(&start, NULL);

	while (jobs > 0 && msecs > 0) {
		int inputs = iobroker_poll(nagios_iobs, msecs);
		if (inputs < 0) return;

		jobs -= inputs; /* One input is roughly equivalent to one job. */

		struct timeval now;
		gettimeofday(&now, NULL);
		msecs -= tv_delta_msec(&start, &now);
		start = now;
	}
}
Exemple #2
0
/* this is the main event handler loop */
int event_execution_loop(void)
{
	timed_event *temp_event, *last_event = NULL;
	time_t last_time = 0L;
	time_t current_time = 0L;
	time_t last_status_update = 0L;
	int poll_time_ms;

	log_debug_info(DEBUGL_FUNCTIONS, 0, "event_execution_loop() start\n");

	time(&last_time);

	while (1) {
		struct timeval now;
		const struct timeval *event_runtime;
		int inputs;

		/* super-priority (hardcoded) events come first */

		/* see if we should exit or restart (a signal was encountered) */
		if (sigshutdown == TRUE || sigrestart == TRUE)
			break;

		/* get the current time */
		time(&current_time);

		if (sigrotate == TRUE) {
			rotate_log_file(current_time);
			update_program_status(FALSE);
		}

		/* hey, wait a second...  we traveled back in time! */
		if (current_time < last_time)
			compensate_for_system_time_change((unsigned long)last_time, (unsigned long)current_time);

		/* else if the time advanced over the specified threshold, try and compensate... */
		else if ((current_time - last_time) >= time_change_threshold)
			compensate_for_system_time_change((unsigned long)last_time, (unsigned long)current_time);

		/* get next scheduled event */
		current_event = temp_event = (timed_event *)squeue_peek(nagios_squeue);

		/* if we don't have any events to handle, exit */
		if (!temp_event) {
			log_debug_info(DEBUGL_EVENTS, 0, "There aren't any events that need to be handled! Exiting...\n");
			break;
		}

		/* keep track of the last time */
		last_time = current_time;

		/* update status information occassionally - NagVis watches the NDOUtils DB to see if Nagios is alive */
		if ((unsigned long)(current_time - last_status_update) > 5) {
			last_status_update = current_time;
			update_program_status(FALSE);
		}

		event_runtime = squeue_event_runtime(temp_event->sq_event);
		if (temp_event != last_event) {
			log_debug_info(DEBUGL_EVENTS, 1, "** Event Check Loop\n");
			log_debug_info(DEBUGL_EVENTS, 1, "Next Event Time: %s", ctime(&temp_event->run_time));
			log_debug_info(DEBUGL_EVENTS, 1, "Current/Max Service Checks: %d/%d (%.3lf%% saturation)\n",
			               currently_running_service_checks, max_parallel_service_checks,
			               ((float)currently_running_service_checks / (float)max_parallel_service_checks) * 100);
		}

		last_event = temp_event;

		gettimeofday(&now, NULL);
		poll_time_ms = tv_delta_msec(&now, event_runtime);
		if (poll_time_ms < 0)
			poll_time_ms = 0;
		else if (poll_time_ms >= 1500)
			poll_time_ms = 1500;

		log_debug_info(DEBUGL_SCHEDULING, 2, "## Polling %dms; sockets=%d; events=%u; iobs=%p\n",
		               poll_time_ms, iobroker_get_num_fds(nagios_iobs),
		               squeue_size(nagios_squeue), nagios_iobs);
		inputs = iobroker_poll(nagios_iobs, poll_time_ms);
		if (inputs < 0 && errno != EINTR) {
			logit(NSLOG_RUNTIME_ERROR, TRUE, "Error: Polling for input on %p failed: %s", nagios_iobs, iobroker_strerror(inputs));
			break;
		}

		log_debug_info(DEBUGL_IPC, 2, "## %d descriptors had input\n", inputs);

		/*
		 * if the event we peaked was removed from the queue from
		 * one of the I/O operations, we must take care not to
		 * try to run at, as we're (almost) sure to access free'd
		 * or invalid memory if we do.
		 */
		if (!current_event) {
			log_debug_info(DEBUGL_EVENTS, 0, "Event was cancelled by iobroker input\n");
			continue;
		}

		gettimeofday(&now, NULL);
		if (tv_delta_msec(&now, event_runtime) >= 0)
			continue;

		/* move on if we shouldn't run this event */
		if (should_run_event(temp_event) == FALSE)
			continue;

		/* handle the event */
		handle_timed_event(temp_event);

		/*
		 * we must remove the entry we've peeked, or
		 * we'll keep getting the same one over and over.
		 * This also maintains sync with broker modules.
		 */
		remove_event(nagios_squeue, temp_event);

		/* reschedule the event if necessary */
		if (temp_event->recurring == TRUE)
			reschedule_event(nagios_squeue, temp_event);

		/* else free memory associated with the event */
		else
			my_free(temp_event);
	}

	log_debug_info(DEBUGL_FUNCTIONS, 0, "event_execution_loop() end\n");

	return OK;
}
Exemple #3
0
static void enter_worker(int sd)
{
	/* created with socketpair(), usually */
	master_sd = sd;
	parent_pid = getppid();
	(void)chdir("/tmp");
	(void)chdir("nagios-workers");

	if (setpgid(0, 0)) {
		/* XXX: handle error somehow, or maybe just ignore it */
	}

	/* we need to catch child signals the default way */
	signal(SIGCHLD, SIG_DFL);

	fcntl(fileno(stdout), F_SETFD, FD_CLOEXEC);
	fcntl(fileno(stderr), F_SETFD, FD_CLOEXEC);
	fcntl(master_sd, F_SETFD, FD_CLOEXEC);
	iobs = iobroker_create();
	if (!iobs) {
		/* XXX: handle this a bit better */
		worker_die("Worker failed to create io broker socket set");
	}

	/*
	 * Create a modest scheduling queue that will be
	 * more than enough for our needs
	 */
	sq = squeue_create(1024);
	set_socket_options(master_sd, 256 * 1024);

	iobroker_register(iobs, master_sd, NULL, receive_command);
	while (iobroker_get_num_fds(iobs) > 0) {
		int poll_time = -1;

		/* check for timed out jobs */
		for (;;) {
			child_process *cp;
			struct timeval now, tmo;

			/* stop when scheduling queue is empty */
			cp = (child_process *)squeue_peek(sq);
			if (!cp)
				break;

			tmo.tv_usec = cp->start.tv_usec;
			tmo.tv_sec = cp->start.tv_sec + cp->timeout;
			gettimeofday(&now, NULL);
			poll_time = tv_delta_msec(&now, &tmo);
			/*
			 * A little extra takes care of rounding errors and
			 * ensures we never kill a job before it times out.
			 * 5 milliseconds is enough to take care of that.
			 */
			poll_time += 5;
			if (poll_time > 0)
				break;

			/* this job timed out, so kill it */
			wlog("job with pid %d timed out. Killing it", cp->pid);
			kill_job(cp, ETIME);
		}

		iobroker_poll(iobs, poll_time);

		/*
		 * if our parent goes away we can't really do anything
		 * sensible at all, so let's just break out and exit
		 */
		if (kill(parent_pid, 0) < 0 && errno == ESRCH) {
			break;
		}
	}

	/* we exit when the master shuts us down */
	exit(EXIT_SUCCESS);
}