예제 #1
0
/* remove an event from the queue */
void remove_event(squeue_t *sq, timed_event *event)
{
#ifdef USE_EVENT_BROKER
	/* send event data to broker */
	broker_timed_event(NEBTYPE_TIMEDEVENT_REMOVE, NEBFLAG_NONE, NEBATTR_NONE, event, NULL);
#endif
	if (!event || !event->sq_event)
		return;

	if (sq)
		squeue_remove(sq, event->sq_event);
	else
		logit(NSLOG_RUNTIME_ERROR, TRUE,
		      "Error: remove_event() called for %s event with NULL sq parameter\n",
		      EVENT_TYPE_STR(event->event_type));

	if (sq == nagios_squeue)
		track_events(event->event_type, -1);

	event->sq_event = NULL; /* mark this event as unscheduled */

	/*
	 * if we catch an event from the queue which gets removed when
	 * we go polling for input (as might happen with f.e. downtime
	 * events that we get "cancel" commands for just as they are
	 * about to start or expire), we must make sure we mark the
	 * current event as no longer scheduled, or we'll run into
	 * segfaults and memory corruptions for sure.
	 */
	if (event == current_event) {
		current_event = NULL;
	}
}
예제 #2
0
파일: worker.c 프로젝트: ageric/nagios
static void destroy_job(child_process *cp)
{
	/*
	 * we must remove the job's timeout ticker,
	 * or we'll end up accessing an already free()'d
	 * pointer, or the pointer to a different child.
	 */
	squeue_remove(sq, cp->ei->sq_event);
	running_jobs--;
	fanout_remove(ptab, cp->ei->pid);

	if (cp->outstd.buf) {
		free(cp->outstd.buf);
		cp->outstd.buf = NULL;
	}
	if (cp->outerr.buf) {
		free(cp->outerr.buf);
		cp->outerr.buf = NULL;
	}

	kvvec_destroy(cp->request, KVVEC_FREE_ALL);
	free(cp->cmd);

	free(cp->ei);
	free(cp);
}
예제 #3
0
파일: worker.c 프로젝트: formorer/nagios
static int finish_job(child_process *cp, int reason)
{
	static struct kvvec resp = KVVEC_INITIALIZER;
	struct rusage *ru = &cp->rusage;
	int i, ret;

	/* how many key/value pairs do we need? */
	if (kvvec_init(&resp, 12 + cp->request->kv_pairs) == NULL) {
		/* what the hell do we do now? */
		exit_worker();
	}

	gettimeofday(&cp->stop, NULL);

	if (running_jobs != squeue_size(sq)) {
		wlog("running_jobs(%d) != squeue_size(sq) (%d)\n",
			 running_jobs, squeue_size(sq));
		wlog("started: %d; running: %d; finished: %d\n",
			 started, running_jobs, started - running_jobs);
	}

	/*
	 * we must remove the job's timeout ticker,
	 * or we'll end up accessing an already free()'d
	 * pointer, or the pointer to a different child.
	 */
	squeue_remove(sq, cp->sq_event);

	/* get rid of still open filedescriptors */
	if (cp->outstd.fd != -1)
		iobroker_close(iobs, cp->outstd.fd);
	if (cp->outerr.fd != -1)
		iobroker_close(iobs, cp->outerr.fd);

	cp->runtime = tv_delta_f(&cp->start, &cp->stop);

	/*
	 * Now build the return message.
	 * First comes the request, minus environment variables
	 */
	for (i = 0; i < cp->request->kv_pairs; i++) {
		struct key_value *kv = &cp->request->kv[i];
		/* skip environment macros */
		if (kv->key_len == 3 && !strcmp(kv->key, "env")) {
			continue;
		}
		kvvec_addkv_wlen(&resp, kv->key, kv->key_len, kv->value, kv->value_len);
	}
	kvvec_addkv(&resp, "wait_status", (char *)mkstr("%d", cp->ret));
	kvvec_addkv_wlen(&resp, "outstd", 6, cp->outstd.buf, cp->outstd.len);
	kvvec_addkv_wlen(&resp, "outerr", 6, cp->outerr.buf, cp->outerr.len);
	kvvec_add_tv(&resp, "start", cp->start);
	kvvec_add_tv(&resp, "stop", cp->stop);
	kvvec_addkv(&resp, "runtime", (char *)mkstr("%f", cp->runtime));
	if (!reason) {
		/* child exited nicely */
		kvvec_addkv(&resp, "exited_ok", "1");
		kvvec_add_tv(&resp, "ru_utime", ru->ru_utime);
		kvvec_add_tv(&resp, "ru_stime", ru->ru_stime);
		kvvec_add_long(&resp, "ru_minflt", ru->ru_minflt);
		kvvec_add_long(&resp, "ru_majflt", ru->ru_majflt);
		kvvec_add_long(&resp, "ru_nswap", ru->ru_nswap);
		kvvec_add_long(&resp, "ru_inblock", ru->ru_inblock);
		kvvec_add_long(&resp, "ru_oublock", ru->ru_oublock);
		kvvec_add_long(&resp, "ru_nsignals", ru->ru_nsignals);
	} else {
		/* some error happened */
		kvvec_addkv(&resp, "exited_ok", "0");
		kvvec_addkv(&resp, "error_code", (char *)mkstr("%d", reason));
	}
	ret = send_kvvec(master_sd, &resp);
	if (ret < 0 && errno == EPIPE)
		exit_worker();

	running_jobs--;
	if (cp->outstd.buf) {
		free(cp->outstd.buf);
		cp->outstd.buf = NULL;
	}
	if (cp->outerr.buf) {
		free(cp->outerr.buf);
		cp->outerr.buf = NULL;
	}

	kvvec_destroy(cp->request, KVVEC_FREE_ALL);
	free(cp->cmd);
	free(cp);

	return 0;
}
예제 #4
0
파일: worker.c 프로젝트: ageric/nagios
/*
 * "What can the harvest hope for, if not for the care
 * of the Reaper Man?"
 *   -- Terry Pratchett, Reaper Man
 *
 * We end up here no matter if the job is stale (ie, the child is
 * stuck in uninterruptable sleep) or if it's the first time we try
 * to kill it.
 * A job is considered reaped once we reap our direct child, in
 * which case init will become parent of our grandchildren.
 * It's also considered fully reaped if kill() results in ESRCH or
 * EPERM, or if wait()ing for the process group results in ECHILD.
 */
static void kill_job(child_process *cp, int reason)
{
	int ret, status, reaped = 0;
	int pid = cp ? cp->ei->pid : 0;

	/*
	 * first attempt at reaping, so see if we just failed to
	 * notice that things were going wrong her
	 */
	if (reason == ETIME && !check_completion(cp, WNOHANG)) {
		timeouts++;
		wlog("job %d with pid %d reaped at timeout. timeouts=%u; started=%u", cp->id, pid, timeouts, started);
		return;
	}

	/* brutal but efficient */
	if (kill(-cp->ei->pid, SIGKILL) < 0) {
		if (errno == ESRCH) {
			reaped = 1;
		} else {
			wlog("kill(-%d, SIGKILL) failed: %s\n", cp->ei->pid, strerror(errno));
		}
	}

	/*
	 * we must iterate at least once, in case kill() returns
	 * ESRCH when there's zombies
	 */
	do {
		ret = waitpid(cp->ei->pid, &status, WNOHANG);
		if (ret < 0 && errno == EINTR)
			continue;

		if (ret == cp->ei->pid || (ret < 0 && errno == ECHILD)) {
			reaped = 1;
			break;
		}
		if (!ret) {
			struct timeval tv;

			gettimeofday(&tv, NULL);
			/*
			 * stale process (signal may not have been delivered, or
			 * the child can be stuck in uninterruptible sleep). We
			 * can't hang around forever, so just reschedule a new
			 * reap attempt later.
			 */
			if (reason == ESTALE) {
				tv.tv_sec += 5;
				wlog("Failed to reap child with pid %d. Next attempt @ %lu.%lu", cp->ei->pid, tv.tv_sec, tv.tv_usec);
			} else {
				tv.tv_usec = 250000;
				if (tv.tv_usec > 1000000) {
					tv.tv_usec -= 1000000;
					tv.tv_sec += 1;
				}
				cp->ei->state = ESTALE;
				finish_job(cp, reason);
			}
			squeue_remove(sq, cp->ei->sq_event);
			cp->ei->sq_event = squeue_add_tv(sq, &tv, cp);
			return;
		}
	} while (!reaped);

	if (cp->ei->state != ESTALE)
		finish_job(cp, reason);
	else
		wlog("job %d (pid=%d): Dormant child reaped", cp->id, cp->ei->pid);
	destroy_job(cp);
}
예제 #5
0
int main(int argc, char **argv)
{
	squeue_t *sq;
	struct timeval tv;
	sq_test_event a, b, c, d, *x;

	t_set_colors(0);
	t_start("squeue tests");

	a.id = 1;
	b.id = 2;
	c.id = 3;
	d.id = 4;

	gettimeofday(&tv, NULL);
	/* Order in is a, b, c, d, but we should get b, c, d, a out. */
	srand(tv.tv_usec ^ tv.tv_sec);
	t((sq = squeue_create(1024)) != NULL);
	t(squeue_size(sq) == 0);

	/* we fill and empty the squeue completely once before testing */
	sq_test_random(sq);
	t(squeue_size(sq) == 0, "Size should be 0 after first sq_test_random");

	t((a.evt = squeue_add(sq, time(NULL) + 9, &a)) != NULL);
	t(squeue_size(sq) == 1);
	t((b.evt = squeue_add(sq, time(NULL) + 3, &b)) != NULL);
	t(squeue_size(sq) == 2);
	t((c.evt = squeue_add_msec(sq, time(NULL) + 5, 0, &c)) != NULL);
	t(squeue_size(sq) == 3);
	t((d.evt = squeue_add_usec(sq, time(NULL) + 5, 1, &d)) != NULL);
	t(squeue_size(sq) == 4);

	/* add and remove lots. remainder should be what we have above */
	sq_test_random(sq);

	/* testing squeue_peek() */
	t((x = (sq_test_event *)squeue_peek(sq)) != NULL);
	t(x == &b, "x: %p; a: %p; b: %p; c: %p; d: %p\n", x, &a, &b, &c, &d);
	t(x->id == b.id);
	t(squeue_size(sq) == 4);

	/* testing squeue_remove() and re-add */
	t(squeue_remove(sq, b.evt) == 0);
	t(squeue_size(sq) == 3);
	t((x = squeue_peek(sq)) != NULL);
	t(x == &c);
	t((b.evt = squeue_add(sq, time(NULL) + 3, &b)) != NULL);
	t(squeue_size(sq) == 4);

	/* peek should now give us the &b event (again) */
	t((x = squeue_peek(sq)) != NULL);
	if (x != &b) {
		printf("about to fail pretty f*****g hard...\n");
		printf("ea: %p; &b: %p; &c: %p; ed: %p; x: %p\n",
		       &a, &b, &c, &d, x);
	}
	t(x == &b);
	t(x->id == b.id);
	t(squeue_size(sq) == 4);

	/* testing squeue_pop(), lifo manner */
	t((x = squeue_pop(sq)) != NULL);
	t(squeue_size(sq) == 3,
	  "squeue_size(sq) = %d\n", squeue_size(sq));
	t(x == &b, "x: %p; &b: %p\n", x, &b);
	t(x->id == b.id, "x->id: %lu; d.id: %lu\n", x->id, d.id);

	/* Test squeue_pop() */
	t((x = squeue_pop(sq)) != NULL);
	t(squeue_size(sq) == 2);
	t(x == &c, "x->id: %lu; c.id: %lu\n", x->id, c.id);
	t(x->id == c.id, "x->id: %lu; c.id: %lu\n", x->id, c.id);

	/* this should fail gracefully (-1 return from squeue_remove()) */
	t(squeue_remove(NULL, NULL) == -1);
	t(squeue_remove(NULL, a.evt) == -1);

	squeue_foreach(sq, sq_walker, NULL);

	/* clean up to prevent false valgrind positives */
	squeue_destroy(sq, 0);

	return t_end();
}