示例#1
0
static void *
collector_writer(void *unused)
{
	int i = 0;
	size_t queued;
	size_t queuesize;
	double queueusage;
	size_t totdropped;
	size_t numaggregators = aggregator_numaggregators();
	server **srvs = NULL;

	while (keep_running) {
		if (cluster_refresh_pending) {
			server **newservers = router_getservers(pending_clusters);
			if (srvs != NULL)
				free(srvs);
			srvs = newservers;
			cluster_refresh_pending = 0;
		}
		assert(srvs != NULL);
		sleep(1);
		i++;
		if (i < collector_interval)
			continue;
		totdropped = 0;
		for (i = 0; srvs[i] != NULL; i++) {
			queued = server_get_queue_len(srvs[i]);
			queuesize = server_get_queue_size(srvs[i]);
			totdropped += server_get_dropped(srvs[i]);
			queueusage = (double)queued / (double)queuesize;

			if (queueusage >= 0.75)
				logout("warning: metrics queuing up "
						"for %s:%u: %zd metrics (%d%% of queue size)\n",
						server_ip(srvs[i]), server_port(srvs[i]), queued,
						(int)(queueusage * 100));
		}
		if (totdropped - lastdropped > 0)
			logout("warning: dropped %zd metrics\n", totdropped - lastdropped);
		lastdropped = totdropped;
		if (numaggregators > 0) {
			totdropped = aggregator_get_dropped();
			if (totdropped - lastaggrdropped > 0)
				logout("warning: aggregator dropped %zd metrics\n",
						totdropped - lastaggrdropped);
			lastaggrdropped = totdropped;
		}

		i = 0;
	}
	return NULL;
}
示例#2
0
/**
 * Collects metrics from dispatchers and servers and emits them.
 */
static void *
collector_runner(void *s)
{
	int i;
	size_t totticks;
	size_t totmetrics;
	size_t totblackholes;
	size_t totqueued;
	size_t totstalls;
	size_t totdropped;
	size_t totsleeps;
	size_t ticks;
	size_t metrics;
	size_t blackholes;
	size_t queued;
	size_t stalls;
	size_t dropped;
	size_t sleeps;
	time_t now;
	time_t nextcycle;
	char destbuf[1024];  /* sort of POSIX_MAX_PATH */
	char *p;
	size_t numaggregators = 0;
	aggregator *aggrs = NULL;
	server *submission = (server *)s;
	server **srvs = NULL;
	char metric[METRIC_BUFSIZ];
	char *m = NULL;
	size_t sizem = 0;
	size_t (*s_ticks)(server *);
	size_t (*s_metrics)(server *);
	size_t (*s_stalls)(server *);
	size_t (*s_dropped)(server *);
	size_t (*d_ticks)(dispatcher *);
	size_t (*d_metrics)(dispatcher *);
	size_t (*d_blackholes)(dispatcher *);
	size_t (*d_sleeps)(dispatcher *);
	size_t (*a_received)(aggregator *);
	size_t (*a_sent)(aggregator *);
	size_t (*a_dropped)(aggregator *);

	/* setup functions to target what the user wants */
	if (debug & 2) {
		s_ticks = server_get_ticks_sub;
		s_metrics = server_get_metrics_sub;
		s_stalls = server_get_stalls_sub;
		s_dropped = server_get_dropped_sub;
		d_ticks = dispatch_get_ticks_sub;
		d_metrics = dispatch_get_metrics_sub;
		d_blackholes = dispatch_get_blackholes_sub;
		d_sleeps = dispatch_get_sleeps_sub;
		a_received = aggregator_get_received_sub;
		a_sent = aggregator_get_sent_sub;
		a_dropped = aggregator_get_dropped_sub;
	} else {
		s_ticks = server_get_ticks;
		s_metrics = server_get_metrics;
		s_stalls = server_get_stalls;
		s_dropped = server_get_dropped;
		d_ticks = dispatch_get_ticks;
		d_metrics = dispatch_get_metrics;
		d_blackholes = dispatch_get_blackholes;
		d_sleeps = dispatch_get_sleeps;
		a_received = aggregator_get_received;
		a_sent = aggregator_get_sent;
		a_dropped = aggregator_get_dropped;
	}

#define send(metric) \
	if (debug & 1) \
		logout("%s", metric); \
	else \
		server_send(submission, strdup(metric), 1);

	nextcycle = time(NULL) + collector_interval;
	while (keep_running) {
		if (cluster_refresh_pending) {
			char *stub = router_getcollectorstub(pending_router);
			server **newservers = router_getservers(pending_router);
			if (srvs != NULL)
				free(srvs);
			srvs = newservers;
			aggrs = router_getaggregators(pending_router);
			numaggregators = aggregator_numaggregators(aggrs);

			/* prepare hostname for graphite metrics */
			snprintf(metric, sizeof(metric), "%scarbon.relays.%s",
					stub == NULL ? "" : stub, relay_hostname);
			for (m = metric + strlen("carbon.relays."); *m != '\0'; m++)
				if (*m == '.')
					*m = '_';
			*m++ = '.';
			*m = '\0';
			sizem = sizeof(metric) - (m - metric);

			cluster_refresh_pending = 0;
		}
		assert(srvs != NULL);
		sleep(1);
		now = time(NULL);
		if (nextcycle > now)
			continue;
		nextcycle += collector_interval;
		totticks = 0;
		totmetrics = 0;
		totblackholes = 0;
		totsleeps = 0;
		for (i = 0; dispatchers[i] != NULL; i++) {
			totsleeps += sleeps = d_sleeps(dispatchers[i]);
			totticks += ticks = d_ticks(dispatchers[i]);
			totmetrics += metrics = d_metrics(dispatchers[i]);
			totblackholes += blackholes = d_blackholes(dispatchers[i]);
			snprintf(m, sizem, "dispatcher%d.metricsReceived %zu %zu\n",
					i + 1, metrics, (size_t)now);
			send(metric);
			snprintf(m, sizem, "dispatcher%d.metricsBlackholed %zu %zu\n",
					i + 1, blackholes, (size_t)now);
			send(metric);
			snprintf(m, sizem, "dispatcher%d.wallTime_us %zu %zu\n",
					i + 1, ticks, (size_t)now);
			send(metric);
			snprintf(m, sizem, "dispatcher%d.sleepTime_us %zu %zu\n",
					i + 1, sleeps, (size_t)now);
			send(metric);
		}
		snprintf(m, sizem, "metricsReceived %zu %zu\n",
				totmetrics, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricsBlackholed %zu %zu\n",
				totblackholes, (size_t)now);
		send(metric);
		snprintf(m, sizem, "dispatch_wallTime_us %zu %zu\n",
				totticks, (size_t)now);
		send(metric);
		snprintf(m, sizem, "dispatch_sleepTime_us %zu %zu\n",
				totsleeps, (size_t)now);
		send(metric);

#define send_server_metrics(ipbuf, ticks, metrics, queued, stalls, dropped) \
			snprintf(m, sizem, "destinations.%s.sent %zu %zu\n", \
					ipbuf, metrics, (size_t)now); \
			send(metric); \
			snprintf(m, sizem, "destinations.%s.queued %zu %zu\n", \
					ipbuf, queued, (size_t)now); \
			send(metric); \
			snprintf(m, sizem, "destinations.%s.stalls %zu %zu\n", \
					ipbuf, stalls, (size_t)now); \
			send(metric); \
			snprintf(m, sizem, "destinations.%s.dropped %zu %zu\n", \
					ipbuf, dropped, (size_t)now); \
			send(metric); \
			snprintf(m, sizem, "destinations.%s.wallTime_us %zu %zu\n", \
					ipbuf, ticks, (size_t)now); \
			send(metric);

		totticks = 0;
		totmetrics = 0;
		totqueued = 0;
		totstalls = 0;
		totdropped = 0;

		/* exclude internal_submission metrics from the totals to avoid
		 * artificial doubles due to internal routing details */
		ticks = s_ticks(submission);
		metrics = s_metrics(submission);
		queued = server_get_queue_len(submission);
		stalls = s_stalls(submission);
		dropped = s_dropped(submission);
		send_server_metrics(server_ip(submission),
				ticks, metrics, queued, stalls, dropped);

		for (i = 0; srvs[i] != NULL; i++) {
			switch (server_ctype(srvs[i])) {
				case CON_FILE:
				case CON_PIPE:
					snprintf(destbuf, sizeof(destbuf), "%s",
							server_ip(srvs[i]));
					break;
				case CON_TCP:
					snprintf(destbuf, sizeof(destbuf), "%s:%u",
							server_ip(srvs[i]), server_port(srvs[i]));
					break;
				case CON_UDP:
					snprintf(destbuf, sizeof(destbuf), "%s:%u-udp",
							server_ip(srvs[i]), server_port(srvs[i]));
					break;
			}
			for (p = destbuf; *p != '\0'; p++)
				if (*p == '.')
					*p = '_';

			totticks += ticks = s_ticks(srvs[i]);
			totmetrics += metrics = s_metrics(srvs[i]);
			totqueued += queued = server_get_queue_len(srvs[i]);
			totstalls += stalls = s_stalls(srvs[i]);
			totdropped += dropped = s_dropped(srvs[i]);
			send_server_metrics(destbuf,
					ticks, metrics, queued, stalls, dropped);
		}

		snprintf(m, sizem, "metricsSent %zu %zu\n",
				totmetrics, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricsQueued %zu %zu\n",
				totqueued, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricStalls %zu %zu\n",
				totstalls, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricsDropped %zu %zu\n",
				totdropped, (size_t)now);
		send(metric);
		snprintf(m, sizem, "server_wallTime_us %zu %zu\n",
				totticks, (size_t)now);
		send(metric);
		snprintf(m, sizem, "connections %zu %zu\n",
				dispatch_get_accepted_connections(), (size_t)now);
		send(metric);
		snprintf(m, sizem, "disconnects %zu %zu\n",
				dispatch_get_closed_connections(), (size_t)now);
		send(metric);

		if (numaggregators > 0) {
			snprintf(m, sizem, "aggregators.metricsReceived %zu %zu\n",
					a_received(aggrs), (size_t)now);
			send(metric);
			snprintf(m, sizem, "aggregators.metricsSent %zu %zu\n",
					a_sent(aggrs), (size_t)now);
			send(metric);
			snprintf(m, sizem, "aggregators.metricsDropped %zu %zu\n",
					a_dropped(aggrs), (size_t)now);
			send(metric);
		}

		if (debug & 1)
			fflush(stdout);
	}

	if (srvs != NULL)
		free(srvs);

	return NULL;
}
示例#3
0
static void *
collector_writer(void *unused)
{
	int i = 0;
	size_t queued;
	size_t queuesize;
	double queueusage;
	size_t totdropped;
	size_t lastconn = 0;
	size_t lastdisc = 0;
	size_t numaggregators = 0;
	server **srvs = NULL;
	aggregator *aggrs = NULL;

	while (keep_running) {
		if (cluster_refresh_pending) {
			server **newservers = router_getservers(pending_router);
			if (srvs != NULL)
				free(srvs);
			srvs = newservers;
			aggrs = router_getaggregators(pending_router);
			numaggregators = aggregator_numaggregators(aggrs);
			cluster_refresh_pending = 0;
		}
		assert(srvs != NULL);
		sleep(1);
		if (debug & 1) {
			size_t mpsout;
			size_t totout;
			size_t mpsdrop;
			size_t totdrop;
			size_t totqueue;
			size_t mpsin;
			size_t totin;
			size_t totconn;
			size_t totdisc;
			size_t dticks;
			size_t dsleeps;
			int j;
			/* Solaris iostat like output:
 metrics in     metrics out    metrics drop  queue    conns     disconn   workr
  mps     tot    mps     tot    dps     tot    cur  cps   tot  dps   tot  act
99999 9999999  99999 9999999  99999 9999999  99999  999 99999  999 99999  99%
			 */
			if (i % 24 == 0)
				printf(" metrics in     metrics out    metrics drop  queue    conns     disconn   workr\n"
						"  mps     tot    mps     tot    dps     tot    cur  cps   tot  dps   tot  act\n");

			mpsout = totout = 0;
			mpsdrop = totdrop = 0;
			totqueue = 0;
			for (j = 0; srvs[j] != NULL; j++) {
				mpsout += server_get_metrics_sub(srvs[j]);
				totout += server_get_metrics(srvs[j]);

				mpsdrop += server_get_dropped_sub(srvs[j]);
				totdrop += server_get_dropped(srvs[j]);

				totqueue += server_get_queue_len(srvs[j]);
			}
			mpsin = totin = 0;
			dticks = dsleeps = 0;
			for (j = 0; dispatchers[j] != NULL; j++) {
				mpsin += dispatch_get_metrics_sub(dispatchers[j]);
				totin += dispatch_get_metrics(dispatchers[j]);

				dticks += dispatch_get_ticks_sub(dispatchers[j]);
				dsleeps += dispatch_get_sleeps_sub(dispatchers[j]);
			}
			totconn = dispatch_get_accepted_connections();
			totdisc = dispatch_get_closed_connections();
			printf("%5zu %7zu  "   /* metrics in */
					"%5zu %7zu  "  /* metrics out */
					"%5zu %7zu  "  /* metrics dropped */
					"%5zu  "       /* queue */
					"%3zu %5zu  "  /* conns */
					"%3zu %5zu  "  /* disconns */
					"%2d%%\n",     /* workers */
					mpsin, totin,
					mpsout, totout,
					mpsdrop, totdrop,
					totqueue,
					totconn - lastconn, totconn,
					totdisc - lastdisc, totdisc,
					(int)(((double)dticks * 100.0) / (double)(dticks + dsleeps))
				  );
			lastconn = totconn;
			lastdisc = totdisc;
		}
		i++;
		if (i < collector_interval)
			continue;
		totdropped = 0;
		for (i = 0; srvs[i] != NULL; i++) {
			queued = server_get_queue_len(srvs[i]);
			queuesize = server_get_queue_size(srvs[i]);
			totdropped += server_get_dropped(srvs[i]);
			queueusage = (double)queued / (double)queuesize;

			if (queueusage >= 0.75)
				logout("warning: metrics queuing up "
						"for %s:%u: %zu metrics (%d%% of queue size)\n",
						server_ip(srvs[i]), server_port(srvs[i]), queued,
						(int)(queueusage * 100));
		}
		if (totdropped - lastdropped > 0)
			logout("warning: dropped %zu metrics\n", totdropped - lastdropped);
		lastdropped = totdropped;
		if (numaggregators > 0) {
			totdropped = aggregator_get_dropped(aggrs);
			if (totdropped - lastaggrdropped > 0)
				logout("warning: aggregator dropped %zu metrics\n",
						totdropped - lastaggrdropped);
			lastaggrdropped = totdropped;
		}

		i = 0;
	}
	return NULL;
}
示例#4
0
int
main(int argc, char * const argv[])
{
	int stream_sock[] = {0, 0, 0};  /* tcp4, tcp6, UNIX */
	int stream_socklen = sizeof(stream_sock) / sizeof(stream_sock[0]);
	int dgram_sock[] = {0, 0};  /* udp4, udp6 */
	int dgram_socklen = sizeof(dgram_sock) / sizeof(dgram_sock[0]);
	char id;
	unsigned short listenport = 2003;
	int ch;
	size_t numaggregators;
	size_t numcomputes;
	server *internal_submission;
	char *listeninterface = NULL;
	server **servers;
	char *allowed_chars = NULL;
	int i;
	enum { SUB, CUM } smode = CUM;

	if (gethostname(relay_hostname, sizeof(relay_hostname)) < 0)
		snprintf(relay_hostname, sizeof(relay_hostname), "127.0.0.1");

	while ((ch = getopt(argc, argv, ":hvdmstf:i:l:p:w:b:q:S:c:H:")) != -1) {
		switch (ch) {
			case 'v':
				do_version();
				break;
			case 'd':
				if (mode == TEST) {
					mode = DEBUGTEST;
				} else {
					mode = DEBUG;
				}
				break;
			case 'm':
				smode = SUB;
				break;
			case 's':
				mode = SUBMISSION;
				break;
			case 't':
				if (mode == DEBUG) {
					mode = DEBUGTEST;
				} else {
					mode = TEST;
				}
				break;
			case 'f':
				config = optarg;
				break;
			case 'i':
				listeninterface = optarg;
				break;
			case 'l':
				relay_logfile = optarg;
				break;
			case 'p':
				listenport = (unsigned short)atoi(optarg);
				if (listenport == 0) {
					fprintf(stderr, "error: port needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'w':
				workercnt = (char)atoi(optarg);
				if (workercnt <= 0) {
					fprintf(stderr, "error: workers needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'b':
				batchsize = atoi(optarg);
				if (batchsize <= 0) {
					fprintf(stderr, "error: batch size needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'q':
				queuesize = atoi(optarg);
				if (queuesize <= 0) {
					fprintf(stderr, "error: queue size needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'S':
				collector_interval = atoi(optarg);
				if (collector_interval <= 0) {
					fprintf(stderr, "error: sending interval needs to be "
							"a number >0\n");
					do_usage(1);
				}
				break;
			case 'c':
				allowed_chars = optarg;
				break;
			case 'H':
				snprintf(relay_hostname, sizeof(relay_hostname), "%s", optarg);
				break;
			case '?':
			case ':':
				do_usage(1);
				break;
			case 'h':
			default:
				do_usage(0);
				break;
		}
	}
	if (optind == 1 || config == NULL)
		do_usage(1);


	/* seed randomiser for dispatcher and aggregator "splay" */
	srand(time(NULL));

	if (workercnt == 0)
		workercnt = mode == SUBMISSION ? 2 : get_cores();

	/* any_of failover maths need batchsize to be smaller than queuesize */
	if (batchsize > queuesize) {
		fprintf(stderr, "error: batchsize must be smaller than queuesize\n");
		exit(-1);
	}

	if (relay_logfile != NULL && mode != TEST && mode != DEBUGTEST) {
		FILE *f = fopen(relay_logfile, "a");
		if (f == NULL) {
			fprintf(stderr, "error: failed to open logfile '%s': %s\n",
					relay_logfile, strerror(errno));
			exit(-1);
		}
		relay_stdout = f;
		relay_stderr = f;
	} else {
		relay_stdout = stdout;
		relay_stderr = stderr;
	}
	relay_can_log = 1;

	logout("starting carbon-c-relay v%s (%s), pid=%d\n",
			VERSION, GIT_VERSION, getpid());
	fprintf(relay_stdout, "configuration:\n");
	fprintf(relay_stdout, "    relay hostname = %s\n", relay_hostname);
	fprintf(relay_stdout, "    listen port = %u\n", listenport);
	if (listeninterface != NULL)
		fprintf(relay_stdout, "    listen interface = %s\n", listeninterface);
	fprintf(relay_stdout, "    workers = %d\n", workercnt);
	fprintf(relay_stdout, "    send batch size = %d\n", batchsize);
	fprintf(relay_stdout, "    server queue size = %d\n", queuesize);
	fprintf(relay_stdout, "    statistics submission interval = %ds\n",
			collector_interval);
	if (allowed_chars != NULL)
		fprintf(relay_stdout, "    extra allowed characters = %s\n",
				allowed_chars);
	if (mode == DEBUG || mode == DEBUGTEST)
		fprintf(relay_stdout, "    debug = true\n");
	else if (mode == SUBMISSION)
		fprintf(relay_stdout, "    submission = true\n");
	fprintf(relay_stdout, "    routes configuration = %s\n", config);
	fprintf(relay_stdout, "\n");

	if (router_readconfig(&clusters, &routes,
				config, queuesize, batchsize) == 0)
	{
		logerr("failed to read configuration '%s'\n", config);
		return 1;
	}
	router_optimise(&routes);

	numaggregators = aggregator_numaggregators();
	numcomputes = aggregator_numcomputes();
#define dbg (mode == DEBUG || mode == DEBUGTEST ? 2 : 0)
	if (numaggregators > 10 && !dbg) {
		fprintf(relay_stdout, "parsed configuration follows:\n"
				"(%zd aggregations with %zd computations omitted "
				"for brevity)\n", numaggregators, numcomputes);
		router_printconfig(relay_stdout, 0, clusters, routes);
	} else {
		fprintf(relay_stdout, "parsed configuration follows:\n");
		router_printconfig(relay_stdout, 1 + dbg, clusters, routes);
	}
	fprintf(relay_stdout, "\n");

	/* shortcut for rule testing mode */
	if (mode == TEST || mode == DEBUGTEST) {
		char metricbuf[METRIC_BUFSIZ];
		char *p;

		fflush(relay_stdout);
		while (fgets(metricbuf, sizeof(metricbuf), stdin) != NULL) {
			if ((p = strchr(metricbuf, '\n')) != NULL)
				*p = '\0';
			router_test(metricbuf, routes);
		}

		exit(0);
	}

	if (signal(SIGINT, exit_handler) == SIG_ERR) {
		logerr("failed to create SIGINT handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGTERM, exit_handler) == SIG_ERR) {
		logerr("failed to create SIGTERM handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGQUIT, exit_handler) == SIG_ERR) {
		logerr("failed to create SIGQUIT handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGHUP, hup_handler) == SIG_ERR) {
		logerr("failed to create SIGHUP handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
		logerr("failed to ignore SIGPIPE: %s\n", strerror(errno));
		return 1;
	}

	workers = malloc(sizeof(dispatcher *) * (1 + workercnt + 1));
	if (workers == NULL) {
		logerr("failed to allocate memory for workers\n");
		return 1;
	}

	if (bindlisten(stream_sock, &stream_socklen,
				dgram_sock, &dgram_socklen,
				listeninterface, listenport) < 0) {
		logerr("failed to bind on port %s:%d: %s\n",
				listeninterface == NULL ? "" : listeninterface,
				listenport, strerror(errno));
		return -1;
	}
	for (ch = 0; ch < stream_socklen; ch++) {
		if (dispatch_addlistener(stream_sock[ch]) != 0) {
			logerr("failed to add listener\n");
			return -1;
		}
	}
	for (ch = 0; ch < dgram_socklen; ch++) {
		if (dispatch_addlistener_udp(dgram_sock[ch]) != 0) {
			logerr("failed to listen to datagram socket\n");
			return -1;
		}
	}
	if ((workers[0] = dispatch_new_listener()) == NULL)
		logerr("failed to add listener\n");

	if (allowed_chars == NULL)
		allowed_chars = "-_:#";
	logout("starting %d workers\n", workercnt);
	for (id = 1; id < 1 + workercnt; id++) {
		workers[id + 0] = dispatch_new_connection(routes, allowed_chars);
		if (workers[id + 0] == NULL) {
			logerr("failed to add worker %d\n", id);
			break;
		}
	}
	workers[id + 0] = NULL;
	if (id < 1 + workercnt) {
		logerr("shutting down due to errors\n");
		keep_running = 0;
	}

	/* server used for delivering metrics produced inside the relay,
	 * that is collector (statistics) and aggregator (aggregations) */
	if ((internal_submission = server_new("internal", listenport, CON_PIPE,
					NULL, 3000 + (numcomputes * 3), batchsize)) == NULL)
	{
		logerr("failed to create internal submission queue, shutting down\n");
		keep_running = 0;
	}

	if (numaggregators > 0) {
		logout("starting aggregator\n");
		if (!aggregator_start(internal_submission)) {
			logerr("shutting down due to failure to start aggregator\n");
			keep_running = 0;
		}
	}

	logout("starting statistics collector\n");
	collector_start(&workers[1], clusters, internal_submission, smode == CUM);

	logout("startup sequence complete\n");

	/* workers do the work, just wait */
	while (keep_running)
		sleep(1);

	logout("shutting down...\n");
	/* make sure we don't accept anything new anymore */
	for (ch = 0; ch < stream_socklen; ch++)
		dispatch_removelistener(stream_sock[ch]);
	destroy_usock(listenport);
	logout("listeners for port %u closed\n", listenport);
	/* since workers will be freed, stop querying the structures */
	collector_stop();
	logout("collector stopped\n");
	if (numaggregators > 0) {
		aggregator_stop();
		logout("aggregator stopped\n");
	}
	server_shutdown(internal_submission);
	/* give a little time for whatever the collector/aggregator wrote,
	 * to be delivered by the dispatchers */
	usleep(500 * 1000);  /* 500ms */
	/* make sure we don't write to our servers any more */
	logout("stopped worker");
	for (id = 0; id < 1 + workercnt; id++)
		dispatch_stop(workers[id + 0]);
	for (id = 0; id < 1 + workercnt; id++) {
		dispatch_shutdown(workers[id + 0]);
		fprintf(relay_stdout, " %d", id + 1);
		fflush(relay_stdout);
	}
	fprintf(relay_stdout, "\n");
	router_shutdown();
	servers = router_getservers(clusters);
	logout("stopped server");
	for (i = 0; servers[i] != NULL; i++)
		server_stop(servers[i]);
	for (i = 0; servers[i] != NULL; i++) {
		server_shutdown(servers[i]);
		fprintf(relay_stdout, " %d", i + 1);
		fflush(relay_stdout);
	}
	fprintf(relay_stdout, "\n");
	logout("routing stopped\n");

	router_free(clusters, routes);
	free(workers);
	return 0;
}
示例#5
0
/**
 * Collects metrics from dispatchers and servers and emits them.
 */
static void *
collector_runner(void *s)
{
	int i;
	size_t totticks;
	size_t totmetrics;
	size_t totqueued;
	size_t totstalls;
	size_t totdropped;
	size_t ticks;
	size_t metrics;
	size_t queued;
	size_t stalls;
	size_t dropped;
	size_t dispatchers_idle;
	size_t dispatchers_busy;
	time_t now;
	time_t nextcycle;
	char ipbuf[32];
	char *p;
	size_t numaggregators = aggregator_numaggregators();
	server *submission = (server *)s;
	server **srvs = NULL;
	char metric[METRIC_BUFSIZ];
	char *m;
	size_t sizem = 0;

	/* prepare hostname for graphite metrics */
	snprintf(metric, sizeof(metric), "carbon.relays.%s", relay_hostname);
	for (m = metric + strlen("carbon.relays."); *m != '\0'; m++)
		if (*m == '.')
			*m = '_';
	*m++ = '.';
	*m = '\0';
	sizem = sizeof(metric) - (m - metric);

#define send(metric) \
	if (debug) \
		logout("%s", metric); \
	else \
		server_send(submission, strdup(metric), 1);

	nextcycle = time(NULL) + collector_interval;
	while (keep_running) {
		if (cluster_refresh_pending) {
			server **newservers = router_getservers(pending_clusters);
			if (srvs != NULL)
				free(srvs);
			srvs = newservers;
			cluster_refresh_pending = 0;
		}
		assert(srvs != NULL);
		sleep(1);
		now = time(NULL);
		if (nextcycle > now)
			continue;
		nextcycle += collector_interval;
		totticks = 0;
		totmetrics = 0;
		dispatchers_idle = 0;
		dispatchers_busy = 0;
		for (i = 0; dispatchers[i] != NULL; i++) {
			if (dispatch_busy(dispatchers[i])) {
				dispatchers_busy++;
			} else {
				dispatchers_idle++;
			}
			totticks += ticks = dispatch_get_ticks(dispatchers[i]);
			totmetrics += metrics = dispatch_get_metrics(dispatchers[i]);
			snprintf(m, sizem, "dispatcher%d.metricsReceived %zd %zd\n",
					i + 1, metrics, (size_t)now);
			send(metric);
			snprintf(m, sizem, "dispatcher%d.wallTime_us %zd %zd\n",
					i + 1, ticks, (size_t)now);
			send(metric);
		}
		snprintf(m, sizem, "metricsReceived %zd %zd\n",
				totmetrics, (size_t)now);
		send(metric);
		snprintf(m, sizem, "dispatch_wallTime_us %zd %zd\n",
				totticks, (size_t)now);
		send(metric);
		snprintf(m, sizem, "dispatch_busy %zd %zd\n",
				dispatchers_busy, (size_t)now);
		send(metric);
		snprintf(m, sizem, "dispatch_idle %zd %zd\n",
				dispatchers_idle, (size_t)now);
		send(metric);

		totticks = 0;
		totmetrics = 0;
		totqueued = 0;
		totstalls = 0;
		totdropped = 0;
		for (i = 0; srvs[i] != NULL; i++) {
			if (server_ctype(srvs[i]) == CON_PIPE) {
				strncpy(ipbuf, "internal", sizeof(ipbuf));

				ticks = server_get_ticks(srvs[i]);
				metrics = server_get_metrics(srvs[i]);
				queued = server_get_queue_len(srvs[i]);
				stalls = server_get_stalls(srvs[i]);
				dropped = server_get_dropped(srvs[i]);
			} else {
				snprintf(ipbuf, sizeof(ipbuf), "%s:%u",
						server_ip(srvs[i]), server_port(srvs[i]));
				for (p = ipbuf; *p != '\0'; p++)
					if (*p == '.')
						*p = '_';

				totticks += ticks = server_get_ticks(srvs[i]);
				totmetrics += metrics = server_get_metrics(srvs[i]);
				totqueued += queued = server_get_queue_len(srvs[i]);
				totstalls += stalls = server_get_stalls(srvs[i]);
				totdropped += dropped = server_get_dropped(srvs[i]);
			}
			snprintf(m, sizem, "destinations.%s.sent %zd %zd\n",
					ipbuf, metrics, (size_t)now);
			send(metric);
			snprintf(m, sizem, "destinations.%s.queued %zd %zd\n",
					ipbuf, queued, (size_t)now);
			send(metric);
			snprintf(m, sizem, "destinations.%s.stalls %zd %zd\n",
					ipbuf, stalls, (size_t)now);
			send(metric);
			snprintf(m, sizem, "destinations.%s.dropped %zd %zd\n",
					ipbuf, dropped, (size_t)now);
			send(metric);
			snprintf(m, sizem, "destinations.%s.wallTime_us %zd %zd\n",
					ipbuf, ticks, (size_t)now);
			send(metric);
		}

		snprintf(m, sizem, "metricsSent %zd %zd\n",
				totmetrics, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricsQueued %zd %zd\n",
				totqueued, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricStalls %zd %zd\n",
				totstalls, (size_t)now);
		send(metric);
		snprintf(m, sizem, "metricsDropped %zd %zd\n",
				totdropped, (size_t)now);
		send(metric);
		snprintf(m, sizem, "server_wallTime_us %zd %zd\n",
				totticks, (size_t)now);
		send(metric);
		snprintf(m, sizem, "connections %zd %zd\n",
				dispatch_get_accepted_connections(), (size_t)now);
		send(metric);
		snprintf(m, sizem, "disconnects %zd %zd\n",
				dispatch_get_closed_connections(), (size_t)now);
		send(metric);

		if (numaggregators > 0) {
			snprintf(m, sizem, "aggregators.metricsReceived %zd %zd\n",
					aggregator_get_received(), (size_t)now);
			send(metric);
			snprintf(m, sizem, "aggregators.metricsSent %zd %zd\n",
					aggregator_get_sent(), (size_t)now);
			send(metric);
			snprintf(m, sizem, "aggregators.metricsDropped %zd %zd\n",
					aggregator_get_dropped(), (size_t)now);
			send(metric);
		}

		if (debug)
			fflush(stdout);
	}

	return NULL;
}