Beispiel #1
0
int main(int argc, char **argv)
{
	unsigned long k;
	struct test_data *td;
	t_set_colors(0);
	t_start("fanout tests");

	run_tests(10, 64);
	run_tests(512, 64);
	run_tests(64, 64);
	run_tests(511, 17);

	destroyed = 0;
	fot = fanout_create(512);
	ok_int(fanout_remove(fot, 12398) == NULL, 1,
	       "remove on empty table must yield NULL");
	ok_int(fanout_get(fot, 123887987) == NULL, 1,
	       "get on empty table must yield NULL");
	for (k = 0; k < 16385; k++) {
		struct test_data *tdata = calloc(1, sizeof(*td));
		tdata->key = k;
		asprintf(&tdata->name, "%lu", k);
		fanout_add(fot, k, tdata);
	}
	td = fanout_get(fot, k - 1);
	ok_int(td != NULL, 1, "get must get what add inserts");
	ok_int(fanout_remove(fot, k + 1) == NULL, 1,
	       "remove on non-inserted key must yield NULL");
	ok_int(fanout_get(fot, k + 1) == NULL, 1,
	       "get on non-inserted must yield NULL");
	fanout_destroy(fot, pdest);
	ok_int((int)destroyed, (int)k, "destroy counter while free()'ing");

	return t_end();
}
Beispiel #2
0
static void run_tests(int ntests, int fo_size)
{
	struct tcase *tc;
	unsigned long last_ptr, *ptr;
	int i, added = 0, removed = 0;
	fanout_table *fo;

	last_ptr = ntests;

	fo = fanout_create(fo_size);
	tc = calloc(ntests, sizeof(*tc));
	for (i = 0; i < ntests; i++) {
		tc[i].value = i;
		if (!fanout_add(fo, tc[i].key, &tc[i].value))
			added++;
	}
	ok_int(added, ntests, "Adding stuff must work");

	while ((ptr = (unsigned long *)fanout_remove(fo, 0))) {
		ok_int((int)*ptr, (int)last_ptr - 1, "Removing a bunch of them");
		removed++;
		last_ptr = *ptr;
	}
	ok_int(added, removed, "Removing should work as well as adding");
	fanout_destroy(fo, destructor);
	ok_int(destroyed, 0, "Expected no entries in destructor");

	fo = fanout_create(fo_size);
	for (i = 0; i < ntests; i++) {
		tc[i].value = i;
		if (!fanout_add(fo, tc[i].key, &tc[i].value))
			added++;
	}
	fanout_destroy(fo, destructor);
	ok_int(destroyed, ntests, "Expected ntest entries in destructor");
	destroyed = 0;
	free(tc);
}
Beispiel #3
0
static int wproc_destroy(struct wproc_worker *wp, int flags)
{
	int i = 0, force = 0, self;

	if (!wp)
		return 0;

	force = !!(flags & WPROC_FORCE);

	self = getpid();

	/* master retains workers through restarts */
	if (self == nagios_pid && !force)
		return 0;

	/* free all memory when either forcing or a worker called us */
	iocache_destroy(wp->ioc);
	wp->ioc = NULL;
	my_free(wp->name);
	fanout_destroy(wp->jobs, fo_destroy_job);
	wp->jobs = NULL;

	/* workers must never control other workers, so they return early */
	if (self != nagios_pid)
		return 0;

	/* kill(0, SIGKILL) equals suicide, so we avoid it */
	if (wp->pid) {
		kill(wp->pid, SIGKILL);
	}

	iobroker_close(nagios_iobs, wp->sd);

	/* reap our possibly lost children */
	while (waitpid(-1, &i, WNOHANG) > 0)
		; /* do nothing */

	free(wp);

	return 0;
}
Beispiel #4
0
/* frees memory allocated for the scheduled downtime data */
void free_downtime_data(void) {
	scheduled_downtime *this_downtime = NULL;
	scheduled_downtime *next_downtime = NULL;

	fanout_destroy(dt_fanout, NULL);

	/* free memory for the scheduled_downtime list */
	for(this_downtime = scheduled_downtime_list; this_downtime != NULL; this_downtime = next_downtime) {
		next_downtime = this_downtime->next;
		my_free(this_downtime->host_name);
		my_free(this_downtime->service_description);
		my_free(this_downtime->author);
		my_free(this_downtime->comment);
		my_free(this_downtime);
		}

	/* reset list pointer */
	scheduled_downtime_list = NULL;

	return;
	}
Beispiel #5
0
static int handle_worker_result(int sd, int events, void *arg)
{
	wproc_object_job *oj = NULL;
	char *buf, *error_reason = NULL;
	unsigned long size;
	int ret;
	static struct kvvec kvv = KVVEC_INITIALIZER;
	struct wproc_worker *wp = (struct wproc_worker *)arg;

	if(iocache_capacity(wp->ioc) == 0) {
		logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: iocache_capacity() is 0 for worker %s.\n", wp->name);
	}

	ret = iocache_read(wp->ioc, wp->sd);

	if (ret < 0) {
		logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: iocache_read() from %s returned %d: %s\n",
			  wp->name, ret, strerror(errno));
		return 0;
	} else if (ret == 0) {
		logit(NSLOG_INFO_MESSAGE, TRUE, "wproc: Socket to worker %s broken, removing", wp->name);
		wproc_num_workers_online--;
		iobroker_unregister(nagios_iobs, sd);
		if (workers.len <= 0) {
			/* there aren't global workers left, we can't run any more checks
			 * we should try respawning a few of the standard ones
			 */
			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: All our workers are dead, we can't do anything!");
		}
		remove_worker(wp);
		fanout_destroy(wp->jobs, fo_reassign_wproc_job);
		wp->jobs = NULL;
		wproc_destroy(wp, 0);
		return 0;
	}
	while ((buf = worker_ioc2msg(wp->ioc, &size, 0))) {
		struct wproc_job *job;
		wproc_result wpres;

		/* log messages are handled first */
		if (size > 5 && !memcmp(buf, "log=", 4)) {
			logit(NSLOG_INFO_MESSAGE, TRUE, "wproc: %s: %s\n", wp->name, buf + 4);
			continue;
		}

		/* for everything else we need to actually parse */
		if (buf2kvvec_prealloc(&kvv, buf, size, '=', '\0', KVVEC_ASSIGN) <= 0) {
			logit(NSLOG_RUNTIME_ERROR, TRUE,
				  "wproc: Failed to parse key/value vector from worker response with len %lu. First kv=%s",
				  size, buf ? buf : "(NULL)");
			continue;
		}

		memset(&wpres, 0, sizeof(wpres));
		wpres.job_id = -1;
		wpres.type = -1;
		wpres.response = &kvv;
		parse_worker_result(&wpres, &kvv);

		job = get_job(wp, wpres.job_id);
		if (!job) {
			logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: Job with id '%d' doesn't exist on %s.\n",
				  wpres.job_id, wp->name);
			continue;
		}
		if (wpres.type != job->type) {
			logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: %s claims job %d is type %d, but we think it's type %d\n",
				  wp->name, job->id, wpres.type, job->type);
			break;
		}
		oj = (wproc_object_job *)job->arg;

		/*
		 * ETIME ("Timer expired") doesn't really happen
		 * on any modern systems, so we reuse it to mean
		 * "program timed out"
		 */
		if (wpres.error_code == ETIME) {
			wpres.early_timeout = TRUE;
		}
		if (wpres.early_timeout) {
			asprintf(&error_reason, "timed out after %.2fs", tv_delta_f(&wpres.start, &wpres.stop));
		}
		else if (WIFSIGNALED(wpres.wait_status)) {
			asprintf(&error_reason, "died by signal %d%s after %.2f seconds",
			         WTERMSIG(wpres.wait_status),
			         WCOREDUMP(wpres.wait_status) ? " (core dumped)" : "",
			         tv_delta_f(&wpres.start, &wpres.stop));
		}
		else if (job->type != WPJOB_CHECK && WEXITSTATUS(wpres.wait_status) != 0) {
			asprintf(&error_reason, "is a non-check helper but exited with return code %d",
			         WEXITSTATUS(wpres.wait_status));
		}
		if (error_reason) {
			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: %s job %d from worker %s %s",
			      wpjob_type_name(job->type), job->id, wp->name, error_reason);
			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   command: %s\n", job->command);
			if (job->type != WPJOB_CHECK && oj) {
				logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   host=%s; service=%s; contact=%s\n",
				      oj->host_name ? oj->host_name : "(none)",
				      oj->service_description ? oj->service_description : "(none)",
				      oj->contact_name ? oj->contact_name : "(none)");
			} else if (oj) {
				struct check_result *cr = (struct check_result *)job->arg;
				logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   host=%s; service=%s;\n",
				      cr->host_name, cr->service_description);
			}
			logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   early_timeout=%d; exited_ok=%d; wait_status=%d; error_code=%d;\n",
			      wpres.early_timeout, wpres.exited_ok, wpres.wait_status, wpres.error_code);
			wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   stderr", wpres.outerr);
			wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc:   stdout", wpres.outstd);
		}
		my_free(error_reason);

		switch (job->type) {
		case WPJOB_CHECK:
			ret = handle_worker_check(&wpres, wp, job);
			break;
		case WPJOB_NOTIFY:
			if (wpres.early_timeout) {
				if (oj->service_description) {
					logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Notifying contact '%s' of service '%s' on host '%s' by command '%s' timed out after %.2f seconds\n",
						  oj->contact_name, oj->service_description,
						  oj->host_name, job->command,
						  tv2float(&wpres.runtime));
				} else {
					logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Notifying contact '%s' of host '%s' by command '%s' timed out after %.2f seconds\n",
						  oj->contact_name, oj->host_name,
						  job->command, tv2float(&wpres.runtime));
				}
			}
			break;
		case WPJOB_OCSP:
			if (wpres.early_timeout) {
				logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: OCSP command '%s' for service '%s' on host '%s' timed out after %.2f seconds\n",
					  job->command, oj->service_description, oj->host_name,
					  tv2float(&wpres.runtime));
			}
			break;
		case WPJOB_OCHP:
			if (wpres.early_timeout) {
				logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: OCHP command '%s' for host '%s' timed out after %.2f seconds\n",
					  job->command, oj->host_name, tv2float(&wpres.runtime));
			}
			break;
		case WPJOB_GLOBAL_SVC_EVTHANDLER:
			if (wpres.early_timeout) {
				logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE,
					  "Warning: Global service event handler command '%s' timed out after %.2f seconds\n",
					  job->command, tv2float(&wpres.runtime));
			}
			break;
		case WPJOB_SVC_EVTHANDLER:
			if (wpres.early_timeout) {
				logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE,
					  "Warning: Service event handler command '%s' timed out after %.2f seconds\n",
					  job->command, tv2float(&wpres.runtime));
			}
			break;
		case WPJOB_GLOBAL_HOST_EVTHANDLER:
			if (wpres.early_timeout) {
				logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE,
					  "Warning: Global host event handler command '%s' timed out after %.2f seconds\n",
					  job->command, tv2float(&wpres.runtime));
			}
			break;
		case WPJOB_HOST_EVTHANDLER:
			if (wpres.early_timeout) {
				logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE,
					  "Warning: Host event handler command '%s' timed out after %.2f seconds\n",
					  job->command, tv2float(&wpres.runtime));
			}
			break;

		case WPJOB_CALLBACK:
			run_job_callback(job, &wpres, 0);
			break;

		default:
			logit(NSLOG_RUNTIME_WARNING, TRUE, "Worker %d: Unknown jobtype: %d\n", wp->pid, job->type);
			break;
		}
		destroy_job(job);
	}

	return 0;
}