void handle_request(char *buf)
	const char *delims = "|\r\n\t ";

	switch (buf[0]) {
	  case 'S':
		/* Register server|type|weight|sticky|extras */
			char *tok, *servername = NULL;
			enum locator_servicetype_t servicetype = ST_MAX;
			int serverweight = 0;
			enum locator_sticky_t sticky = LOC_ROAMING;
			char *serverextras = NULL;

			tok = strtok(buf, delims); if (tok) { tok = strtok(NULL, delims); }
			if (tok) { servername = tok; tok = strtok(NULL, delims); }
			if (tok) { servicetype = get_servicetype(tok); tok = strtok(NULL, delims); }
			if (tok) { serverweight = atoi(tok); tok = strtok(NULL, delims); }
			if (tok) { sticky = ((atoi(tok) == 1) ? LOC_STICKY : LOC_ROAMING); tok = strtok(NULL, delims); }
			if (tok) { serverextras = tok; tok = strtok(NULL, delims); }

			if (servername && (servicetype != ST_MAX)) {
				dbgprintf("Registering server '%s' handling %s (weight %d, %s)\n",
					servername, servicetype_names[servicetype], serverweight,
					(sticky == LOC_STICKY ? "sticky" : "not sticky"));
				register_server(servername, servicetype, serverweight, sticky, serverextras);
				strcpy(buf, "OK");
			else strcpy(buf, "BADSYNTAX");

	  case 'D': case 'U': case 'F':
		/* Down/Up/Forget server|type */
			char *tok, *servername = NULL;
			enum locator_servicetype_t servicetype = ST_MAX;

			tok = strtok(buf, delims); if (tok) { tok = strtok(NULL, delims); }
			if (tok) { servername = tok; tok = strtok(NULL, delims); }
			if (tok) { servicetype = get_servicetype(tok); tok = strtok(NULL, delims); }

			if (servername && (servicetype != ST_MAX)) {
				downup_server(servername, servicetype, buf[0]);
				strcpy(buf, "OK");
			else strcpy(buf, "BADSYNTAX");

	  case 'H':
		/* Register host|type|server */
			char *tok, *hostname = NULL, *servername = NULL;
			enum locator_servicetype_t servicetype = ST_MAX;

			tok = strtok(buf, delims); if (tok) { tok = strtok(NULL, delims); }
			if (tok) { hostname = tok; tok = strtok(NULL, delims); }
			if (tok) { servicetype = get_servicetype(tok); tok = strtok(NULL, delims); }
			if (tok) { servername = tok; tok = strtok(NULL, delims); }

			if (hostname && (servicetype != ST_MAX) && servername) {
				dbgprintf("Registering type/host %s/%s handled by server %s\n",
					  servicetype_names[servicetype], hostname, servername);
				register_host(hostname, servicetype, servername);
				strcpy(buf, "OK");
			else strcpy(buf, "BADSYNTAX");

	  case 'M':
		/* Rename host|type|newhostname */
			char *tok, *oldhostname = NULL, *newhostname = NULL;
			enum locator_servicetype_t servicetype = ST_MAX;

			tok = strtok(buf, delims); if (tok) { tok = strtok(NULL, delims); }
			if (tok) { oldhostname = tok; tok = strtok(NULL, delims); }
			if (tok) { servicetype = get_servicetype(tok); tok = strtok(NULL, delims); }
			if (tok) { newhostname = tok; tok = strtok(NULL, delims); }

			if (oldhostname && (servicetype != ST_MAX) && newhostname) {
				dbgprintf("Renaming type/host %s/%s to %s\n",
					  servicetype_names[servicetype], oldhostname, newhostname);
				if (rename_host(oldhostname, servicetype, newhostname)) {
					strcpy(buf, "OK");
				else {
					strcpy(buf, "FAILED");
			else strcpy(buf, "BADSYNTAX");

	  case 'X':
	  case 'Q':
		/* Query type|host */
			char *tok, *hostname = NULL;
			enum locator_servicetype_t servicetype = ST_MAX;
			int extquery = (buf[0] == 'X');
			serverinfo_t *res = NULL;

			tok = strtok(buf, delims); if (tok) { tok = strtok(NULL, delims); }
			if (tok) { servicetype = get_servicetype(tok); tok = strtok(NULL, delims); }
			if (tok) { hostname = tok; tok = strtok(NULL, delims); }

			if ((servicetype != ST_MAX) && hostname) {
				res = find_server_by_host(servicetype, hostname);

				if (res) {
					/* This host is fixed on a specific server ... */
					if (res->serveractualweight > 0) {
						/* ... and that server is UP */
						sprintf(buf, "!|%s", res->servername);
					else {
						/* ... and the server is DOWN, so we cannot service the request */
						strcpy(buf, "?");
				else {
					/* Roaming or un-registered host */
					res = find_server_by_type(servicetype);
					if (res) {
						if (res->sticky == LOC_STICKY) {
							dbgprintf("Host %s/%s now fixed on server %s\n", 
								  hostname, servicetype_names[servicetype], res->servername);
							register_host(hostname, servicetype, res->servername);
						sprintf(buf, "*|%s", res->servername);
					else {
						strcpy(buf, "?");

				if (res && extquery) {
					int blen = strlen(buf);

					snprintf(buf+blen, sizeof(buf)-blen-1, "|%s", res->serverextras);
			else strcpy(buf, "BADSYNTAX");

	  case 'p':
		/* Locator ping */
		sprintf(buf, "PONG|%s", VERSION);

	  case '@':
		/* Save state */
		strcpy(buf, "OK");

		strcpy(buf, "BADREQUEST");
int main(int argc, char *argv[])
	char buf[1024];
	int done = 0;
	char *res;

	if (argc < 2) {
		printf("Usage: %s IP:PORT\n", argv[0]);
		return 1;

	if (locator_init(argv[1]) == -1) {
		printf("Locator ping failed\n");
		return 1;
	else {
		printf("Locator is available\n");

	while (!done) {
		char *p, *p1, *p2, *p3, *p4, *p5, *p6, *p7;
		char *extras;

		printf("  r(egister) s servername type weight sticky\n");
		printf("  r(egister) h servername type hostname\n");
		printf("  d(own)       servername type\n");
		printf("  u(p)         servername type\n");
		printf("  f(orget)     servername type\n");
		printf("  q(uery)      hostname type\n");
		printf("  x(query)     hostname type\n");
		printf("  p(ing)\n");
		printf("  s(ave state)\n");
		printf(">"); fflush(stdout);
		done = (fgets(buf, sizeof(buf), stdin) == NULL); if (done) continue;

		p = strchr(buf, '\n'); if (p) *p = '\0';
		p1 = p2 = p3 = p4 = p5 = p6 = p7 = NULL;

		p1 = strtok(buf, " ");
		if (p1) p2 = strtok(NULL, " ");
		if (p2) p3 = strtok(NULL, " ");
		if (p3) p4 = strtok(NULL, " ");
		if (p4) p5 = strtok(NULL, " ");
		if (p5) p6 = strtok(NULL, " ");
		if (p6) p7 = strtok(NULL, "\r\n");

		switch (*p1) {
		  case 'R': case 'r':
			if (*p2 == 's') {
				enum locator_servicetype_t svc;
				enum locator_sticky_t sticky;
				int weight;

				svc = get_servicetype(p4);
				weight = (p5 ? atoi(p5) : 1);
				sticky = ((p6 && (atoi(p6) == 1)) ? LOC_STICKY : LOC_ROAMING);

				printf("%s\n", locator_register_server(p3, svc, weight, sticky, p7) ? "Failed" : "OK");
			else if (*p2 == 'h') {
				printf("%s\n", locator_register_host(p5, get_servicetype(p4), p3) ? "Failed" : "OK");

		  case 'D': case 'd':
			printf("%s\n", locator_serverdown(p2, get_servicetype(p3)) ? "Failed" : "OK");

		  case 'U': case 'u':
			printf("%s\n", locator_serverup(p2, get_servicetype(p3)) ? "Failed" : "OK");

		  case 'F': case 'f':
			printf("%s\n", locator_serverforget(p2, get_servicetype(p3)) ? "Failed" : "OK");

		  case 'Q': case 'q':
		  case 'X': case 'x':
			extras = NULL;
			res = locator_query(p2, get_servicetype(p3), (*p1 == 'x') ? &extras : NULL);
			if (res) {
				printf("Result: %s\n", res); 
				if (extras) printf("  Extras gave: %s\n", extras);
			else {

		  case 'P': case 'p':
			p = locator_cmd("p");
			if (p == NULL) printf("Failed\n"); else printf("OK: %s\n", p);

		  case 'S': case 's':
			p = locator_cmd("@");
			if (p == NULL) printf("Failed\n"); else printf("OK: %s\n", p);

	return 0;
void load_state(void)
	char *tmpdir;
	char *fn;
	FILE *fd;
	char buf[4096];
	char *tname, *sname, *sconfweight, *sactweight, *ssticky, *sextra, *hname;
	enum locator_servicetype_t stype;

	tmpdir = xgetenv("XYMONTMP"); if (!tmpdir) tmpdir = "/tmp";
	fn = (char *)malloc(strlen(tmpdir) + 100);

	sprintf(fn, "%s/locator.servers.chk", tmpdir);
	fd = fopen(fn, "r");
	if (fd) {
		while (fgets(buf, sizeof(buf), fd)) {
			serverinfo_t *srv;

			tname = sname = sconfweight = sactweight = ssticky = sextra = NULL;

			tname = strtok(buf, "|\n");
			if (tname) sname = strtok(NULL, "|\n");
			if (sname) sconfweight = strtok(NULL, "|\n");
			if (sconfweight) sactweight = strtok(NULL, "|\n");
			if (sactweight) ssticky = strtok(NULL, "|\n");
			if (ssticky) sextra = strtok(NULL, "\n");

			if (tname && sname && sconfweight && sactweight && ssticky) {
				enum locator_sticky_t sticky = (atoi(ssticky) == 1) ? LOC_STICKY : LOC_ROAMING;

				stype = get_servicetype(tname);
				srv = register_server(sname, stype, atoi(sconfweight), sticky, sextra);
				srv->serveractualweight = atoi(sactweight);
				dbgprintf("Loaded server %s/%s (cweight %d, aweight %d, %s)\n",
					srv->servername, tname, srv->serverconfweight, srv->serveractualweight,
					(srv->sticky ? "sticky" : "not sticky"));

	for (stype = 0; (stype < ST_MAX); stype++) recalc_current(stype);

	sprintf(fn, "%s/locator.hosts.chk", tmpdir);
	fd = fopen(fn, "r");
	if (fd) {
		while (fgets(buf, sizeof(buf), fd)) {
			tname = hname = sname = NULL;

			tname = strtok(buf, "|\n");
			if (tname) hname = strtok(NULL, "|\n");
			if (hname) sname = strtok(NULL, "|\n");

			if (tname && hname && sname) {
				enum locator_servicetype_t stype = get_servicetype(tname);

				register_host(hname, stype, sname);
				dbgprintf("Loaded host %s/%s for server %s\n", hname, tname, sname);
int main(int argc, char *argv[])
	int daemonize = 0;
	char *pidfile = NULL;
	char *envarea = NULL;
	int cnid = -1;
	pcre *msgfilter = NULL;
	pcre *stdfilter = NULL;

	int argi;
	struct sigaction sa;
	RbtIterator handle;

	/* Dont save the error buffer */
	save_errbuf = 0;

	/* Create the peer container */
	peers = rbtNew(name_compare);

	for (argi=1; (argi < argc); argi++) {
		if (argnmatch(argv[argi], "--debug")) {
			debug = 1;
		else if (argnmatch(argv[argi], "--channel=")) {
			char *cn = strchr(argv[argi], '=') + 1;

			for (cnid = C_STATUS; (channelnames[cnid] && strcmp(channelnames[cnid], cn)); cnid++) ;
			if (channelnames[cnid] == NULL) cnid = -1;
		else if (argnmatch(argv[argi], "--daemon")) {
			daemonize = 1;
		else if (argnmatch(argv[argi], "--no-daemon")) {
			daemonize = 0;
		else if (argnmatch(argv[argi], "--pidfile=")) {
			char *p = strchr(argv[argi], '=');
			pidfile = strdup(p+1);
		else if (argnmatch(argv[argi], "--log=")) {
			char *p = strchr(argv[argi], '=');
			logfn = strdup(p+1);
		else if (argnmatch(argv[argi], "--env=")) {
			char *p = strchr(argv[argi], '=');
			loadenv(p+1, envarea);
		else if (argnmatch(argv[argi], "--area=")) {
			char *p = strchr(argv[argi], '=');
			envarea = strdup(p+1);
		else if (argnmatch(argv[argi], "--locator=")) {
			char *p = strchr(argv[argi], '=');
			locatorbased = 1;
		else if (argnmatch(argv[argi], "--service=")) {
			char *p = strchr(argv[argi], '=');
			locatorservice = get_servicetype(p+1);
		else if (argnmatch(argv[argi], "--filter=")) {
			char *p = strchr(argv[argi], '=');
			msgfilter = compileregex(p+1);
			if (!msgfilter) {
				errprintf("Invalid filter (bad expression): %s\n", p+1);
			else {
				stdfilter = compileregex("^@@(logrotate|shutdown|drophost|droptest|renamehost|renametest)");
		else {
			char *childcmd;
			char **childargs;
			int i = 0;

			childcmd = argv[argi];
			childargs = (char **) calloc((1 + argc - argi), sizeof(char *));
			while (argi < argc) { childargs[i++] = argv[argi++]; }
			addlocalpeer(childcmd, childargs);

	/* Sanity checks */
	if (cnid == -1) {
		errprintf("No channel/unknown channel specified\n");
		return 1;
	if (locatorbased && (locatorservice == ST_MAX)) {
		errprintf("Must specify --service when using locator\n");
		return 1;
	if (!locatorbased && (rbtBegin(peers) == rbtEnd(peers))) {
		errprintf("Must specify command for local worker\n");
		return 1;

	/* Do cache responses to avoid doing too many lookups */
	if (locatorbased) locator_prepcache(locatorservice, 0);

	/* Go daemon */
	if (daemonize) {
		/* Become a daemon */
		pid_t daemonpid = fork();
		if (daemonpid < 0) {
			/* Fork failed */
			errprintf("Could not fork child\n");
		else if (daemonpid > 0) {
			/* Parent creates PID file and exits */
			FILE *fd = NULL;
			if (pidfile) fd = fopen(pidfile, "w");
			if (fd) {
				fprintf(fd, "%d\n", (int)daemonpid);
		/* Child (daemon) continues here */

	/* Catch signals */
	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = sig_handler;
	sigaction(SIGINT, &sa, NULL);
	sigaction(SIGTERM, &sa, NULL);
	sigaction(SIGCHLD, &sa, NULL);
	signal(SIGALRM, SIG_IGN);

	/* Switch stdout/stderr to the logfile, if one was specified */
	freopen("/dev/null", "r", stdin);	/* xymond_channel's stdin is not used */
	if (logfn) {
		freopen(logfn, "a", stdout);
		freopen(logfn, "a", stderr);

	/* Attach to the channel */
	channel = setup_channel(cnid, CHAN_CLIENT);
	if (channel == NULL) {
		errprintf("Channel not available\n");
		running = 0;

	while (running) {
		 * Wait for GOCLIENT to go up.
		 * Note that we use IPC_NOWAIT if there are messages in the
		 * queue, because then we just want to pick up a message if
		 * there is one, and if not we want to continue pushing the
		 * queued data to the worker.
		struct sembuf s;
		int n;

		s.sem_num = GOCLIENT; s.sem_op  = -1; s.sem_flg = ((pendingcount > 0) ? IPC_NOWAIT : 0);
		n = semop(channel->semid, &s, 1);

		if (n == 0) {
			 * GOCLIENT went high, and so we got alerted about a new
			 * message arriving. Copy the message to our own buffer queue.
			char *inbuf = NULL;

			if (!msgfilter || matchregex(channel->channelbuf, msgfilter) || matchregex(channel->channelbuf, stdfilter)) {
				inbuf = strdup(channel->channelbuf);

			 * Now we have safely stored the new message in our buffer.
			 * Wait until any other clients on the same channel have picked up 
			 * this message (GOCLIENT reaches 0).
			 * We wrap this into an alarm handler, because it can occasionally
			 * fail, causing the whole system to lock up. We dont want that....
			 * We'll set the alarm to trigger after 1 second. Experience shows
			 * that we'll either succeed in a few milliseconds, or fail completely
			 * and wait the full alarm-timer duration.
			gotalarm = 0; signal(SIGALRM, sig_handler); alarm(2); 
			do {
				s.sem_num = GOCLIENT; s.sem_op  = 0; s.sem_flg = 0;
				n = semop(channel->semid, &s, 1);
			} while ((n == -1) && (errno == EAGAIN) && running && (!gotalarm));
			signal(SIGALRM, SIG_IGN);

			if (gotalarm) {
				errprintf("Gave up waiting for GOCLIENT to go low.\n");

			 * Let master know we got it by downing BOARDBUSY.
			 * This should not block, since BOARDBUSY is upped
			 * by the master just before he ups GOCLIENT.
			do {
				s.sem_num = BOARDBUSY; s.sem_op  = -1; s.sem_flg = IPC_NOWAIT;
				n = semop(channel->semid, &s, 1);
			} while ((n == -1) && (errno == EINTR));
			if (n == -1) {
				errprintf("Tried to down BOARDBUSY: %s\n", strerror(errno));

			if (inbuf) {
				 * See if they want us to rotate logs. We pass this on to
				 * the worker module as well, but must handle our own logfile.
				if (strncmp(inbuf, "@@logrotate", 11) == 0) {
					freopen(logfn, "a", stdout);
					freopen(logfn, "a", stderr);

				 * Put the new message on our outbound queue.
				if (addmessage(inbuf) != 0) {
					/* Failed to queue message, free the buffer */
		else {
			if (errno != EAGAIN) {
				dbgprintf("Semaphore wait aborted: %s\n", strerror(errno));

		 * We've picked up messages from the master. Now we 
		 * must push them to the worker process. Since there 
		 * is no way to hang off both a semaphore and select(),
		 * we'll just push as much data as possible into the 
		 * pipe. If we get to a point where we would block,
		 * then wait a teeny bit of time and restart the 
		 * whole loop with checking for new messages from the
		 * master etc.
		 * In theory, this could become an almost busy-wait loop.
		 * In practice, however, the queue will be empty most
		 * of the time because we'll just shove the data to the
		 * worker child.
		for (handle = rbtBegin(peers); (handle != rbtEnd(peers)); handle = rbtNext(peers, handle)) {
			int canwrite = 1, hasfailed = 0;
			xymon_peer_t *pwalk;
			time_t msgtimeout = gettimer() - MSGTIMEOUT;
			int flushcount = 0;

			pwalk = (xymon_peer_t *) gettreeitem(peers, handle);
			if (pwalk->msghead == NULL) continue; /* Ignore peers with nothing queued */

			switch (pwalk->peerstatus) {
			  case P_UP:
				canwrite = 1;

			  case P_DOWN:
				canwrite = (pwalk->peerstatus == P_UP);

			  case P_FAILED:
				canwrite = 0;

			/* See if we have stale messages queued */
			while (pwalk->msghead && (pwalk->msghead->tstamp < msgtimeout)) {

			if (flushcount) {
				errprintf("Flushed %d stale messages for %s:%d\n",

			while (pwalk->msghead && canwrite) {
				fd_set fdwrite;
				struct timeval tmo;

				/* Check that this peer is ready for writing. */
				FD_ZERO(&fdwrite); FD_SET(pwalk->peersocket, &fdwrite);
				tmo.tv_sec = 0; tmo.tv_usec = 2000;
				n = select(pwalk->peersocket+1, NULL, &fdwrite, NULL, &tmo);
				if (n == -1) {
					errprintf("select() failed: %s\n", strerror(errno));
					canwrite = 0; 
					hasfailed = 1;
				else if ((n == 0) || (!FD_ISSET(pwalk->peersocket, &fdwrite))) {
					canwrite = 0;

				n = write(pwalk->peersocket, pwalk->msghead->bufp, pwalk->msghead->buflen);
				if (n >= 0) {
					pwalk->msghead->bufp += n;
					pwalk->msghead->buflen -= n;
					if (pwalk->msghead->buflen == 0) flushmessage(pwalk);
				else if (errno == EAGAIN) {
					 * Write would block ... stop for now. 
					canwrite = 0;
				else {
					hasfailed = 1;

				if (hasfailed) {
					/* Write failed, or message grew stale */
					errprintf("Peer at %s:%d failed: %s\n",
						  inet_ntoa(pwalk->peeraddr.sin_addr), ntohs(pwalk->peeraddr.sin_port),
					canwrite = 0;
					if (pwalk->peertype == P_NET) locator_serverdown(pwalk->peername, locatorservice);
					pwalk->peerstatus = P_FAILED;

	/* Detach from channels */
	close_channel(channel, CHAN_CLIENT);

	/* Close peer connections */
	for (handle = rbtBegin(peers); (handle != rbtEnd(peers)); handle = rbtNext(peers, handle)) {
		xymon_peer_t *pwalk = (xymon_peer_t *) gettreeitem(peers, handle);

	/* Remove the PID file */
	if (pidfile) unlink(pidfile);

	return 0;