Exemplo n.º 1
0
static void print_host(hostlist_t *host, htnames_t *testnames[], int testcount)
{
    int testi, rowcount, netcount;
    void *hinfo = hostinfo(host->hostname);
    char *dispname = NULL, *clientalias = NULL, *comment = NULL, *description = NULL, *pagepathtitle = NULL;
    char *net = NULL, *nkalerts = NULL;
    char *nktime = NULL, *downtime = NULL, *reporttime = NULL;
    char *itm;
    tag_t *taghead = NULL;
    int contidx = 0, haveping = 0;
    char contcol[1024];
    activealerts_t *alert;
    strbuffer_t *buf = newstrbuffer(0);

    fprintf(stdout, "<p style=\"page-break-before: always\">\n");
    fprintf(stdout, "<table width=\"100%%\" border=1 summary=\"%s configuration\">\n", host->hostname);

    pagepathtitle = bbh_item(hinfo, BBH_PAGEPATHTITLE);
    if (!pagepathtitle || (strlen(pagepathtitle) == 0)) pagepathtitle = "Top page";
    dispname = bbh_item(hinfo, BBH_DISPLAYNAME);
    if (dispname && (strcmp(dispname, host->hostname) == 0)) dispname = NULL;
    clientalias = bbh_item(hinfo, BBH_CLIENTALIAS);
    if (clientalias && (strcmp(clientalias, host->hostname) == 0)) clientalias = NULL;
    comment = bbh_item(hinfo, BBH_COMMENT);
    description = bbh_item(hinfo, BBH_DESCRIPTION);
    net = bbh_item(hinfo, BBH_NET);
    nkalerts = bbh_item(hinfo, BBH_NK);
    nktime = bbh_item(hinfo, BBH_NKTIME);
    if (!nktime) nktime = "24x7";
    else nktime = strdup(timespec_text(nktime));
    downtime = bbh_item(hinfo, BBH_DOWNTIME);
    if (downtime) downtime = strdup(timespec_text(downtime));
    reporttime = bbh_item(hinfo, BBH_REPORTTIME);
    if (!reporttime) reporttime = "24x7";
    else reporttime = strdup(timespec_text(reporttime));

    rowcount = 1;
    if (pagepathtitle) rowcount++;
    if (dispname || clientalias) rowcount++;
    if (comment) rowcount++;
    if (description) rowcount++;
    if (!newnkconfig && nktime) rowcount++;
    if (downtime) rowcount++;
    if (reporttime) rowcount++;

    fprintf(stdout, "<tr>\n");
    fprintf(stdout, "<th rowspan=%d align=left width=\"25%%\" valign=top>Basics</th>\n", rowcount);
    fprintf(stdout, "<th align=center>%s (%s)</th>\n",
            (dispname ? dispname : host->hostname), bbh_item(hinfo, BBH_IP));
    fprintf(stdout, "</tr>\n");

    if (dispname || clientalias) {
        fprintf(stdout, "<tr><td>Aliases:");
        if (dispname) fprintf(stdout, " %s", dispname);
        if (clientalias) fprintf(stdout, " %s", clientalias);
        fprintf(stdout, "</td></tr>\n");
    }
    if (pagepathtitle) fprintf(stdout, "<tr><td>Monitoring location: %s</td></tr>\n", pagepathtitle);
    if (comment) fprintf(stdout, "<tr><td>Comment: %s</td></tr>\n", comment);
    if (description) fprintf(stdout, "<tr><td>Description: %s</td></tr>\n", description);
    if (!newnkconfig && nktime) fprintf(stdout, "<tr><td>NK monitoring period: %s</td></tr>\n", nktime);
    if (downtime) fprintf(stdout, "<tr><td>Planned downtime: %s</td></tr>\n", downtime);
    if (reporttime) fprintf(stdout, "<tr><td>SLA Reporting Period: %s</td></tr>\n", reporttime);


    /* Build a list of the network tests */
    itm = bbh_item_walk(hinfo);
    while (itm) {
        char *visdata = NULL, *colname = NULL, *expdata = NULL;
        bburl_t bu;
        int dialuptest = 0, reversetest = 0, alwaystruetest = 0, httpextra = 0;

        if (*itm == '?') {
            dialuptest=1;
            itm++;
        }
        if (*itm == '!') {
            reversetest=1;
            itm++;
        }
        if (*itm == '~') {
            alwaystruetest=1;
            itm++;
        }

        if ( argnmatch(itm, "http")         ||
                argnmatch(itm, "content=http") ||
                argnmatch(itm, "cont;http")    ||
                argnmatch(itm, "cont=")        ||
                argnmatch(itm, "nocont;http")  ||
                argnmatch(itm, "nocont=")      ||
                argnmatch(itm, "post;http")    ||
                argnmatch(itm, "post=")        ||
                argnmatch(itm, "nopost;http")  ||
                argnmatch(itm, "nopost=")      ||
                argnmatch(itm, "type;http")    ||
                argnmatch(itm, "type=")        ) {
            visdata = decode_url(itm, &bu);
            colname = bu.columnname;
            if (!colname) {
                if (bu.expdata) {
                    httpextra = 1;
                    if (contidx == 0) {
                        colname = "content";
                        contidx++;
                    }
                    else {
                        sprintf(contcol, "content%d", contidx);
                        colname = contcol;
                        contidx++;
                    }
                }
                else {
                    colname = "http";
                }
            }
            expdata = bu.expdata;
        }
        else if (strncmp(itm, "rpc=", 4) == 0) {
            colname = "rpc";
            visdata = strdup(itm+4);
        }
        else if (strncmp(itm, "dns=", 4) == 0) {
            colname = "dns";
            visdata = strdup(itm+4);
        }
        else if (strncmp(itm, "dig=", 4) == 0) {
            colname = "dns";
            visdata = strdup(itm+4);
        }
        else if (strncmp(itm, pingplus, strlen(pingplus)) == 0) {
            haveping = 1;
            colname = pingcolumn;
            visdata = strdup(itm+strlen(pingplus));
        }
        else if (is_net_test(itm)) {
            colname = strdup(itm);
        }


        if (colname) {
            tag_t *newitem;

addtolist:
            for (newitem = taghead; (newitem && strcmp(newitem->columnname, colname)); newitem = newitem->next);

            if (!newitem) {
                newitem = (tag_t *)calloc(1, sizeof(tag_t));
                newitem->columnname = strdup(colname);
                newitem->visualdata = (visdata ? strdup(visdata) : NULL);
                newitem->expdata = (expdata ? strdup(expdata) : NULL);
                newitem->next = taghead;
                taghead = newitem;
            }
            else {
                /* Multiple tags for one column - must be http */
                newitem->visualdata = (char *)realloc(newitem->visualdata, strlen(newitem->visualdata) + strlen(visdata) + 5);
                strcat(newitem->visualdata, "<br>");
                strcat(newitem->visualdata, visdata);
            }

            if (httpextra) {
                httpextra = 0;
                colname = "http";
                expdata = NULL;
                goto addtolist;
            }
        }

        itm = bbh_item_walk(NULL);
    }

    if (!haveping && !bbh_item(hinfo, BBH_FLAG_NOCONN)) {
        for (testi = 0; (testi < testcount); testi++) {
            if (strcmp(testnames[testi]->name, pingcolumn) == 0) {
                tag_t *newitem = (tag_t *)calloc(1, sizeof(tag_t));
                newitem->columnname = strdup(pingcolumn);
                newitem->next = taghead;
                taghead = newitem;
            }
        }
    }

    /* Add the "badFOO" settings */
    itm = bbh_item_walk(hinfo);
    while (itm) {
        if (strncmp(itm, "bad", 3) == 0) {
            char *tname, *p;
            int b1, b2, b3, n = -1;
            tag_t *tag = NULL;

            tname = itm+3;
            p = strchr(tname, ':');
            if (p) {
                *p = '\0';
                n = sscanf(p+1, "%d:%d:%d", &b1, &b2, &b3);
                for (tag = taghead; (tag && strcmp(tag->columnname, tname)); tag = tag->next);
                *p = ':';
            }

            if (tag && (n == 3)) {
                tag->b1 = b1;
                tag->b2 = b2;
                tag->b3 = b3;
            }
        }

        itm = bbh_item_walk(NULL);
    }

    if (taghead) {
        fprintf(stdout, "<tr>\n");
        fprintf(stdout, "<th align=left valign=top>Network tests");
        if (net) fprintf(stdout, "<br>(from %s)", net);
        fprintf(stdout, "</th>\n");

        fprintf(stdout, "<td><table border=0 cellpadding=\"3\" cellspacing=\"5\" summary=\"%s network tests\">\n", host->hostname);
        fprintf(stdout, "<tr><th align=left valign=top>Service</th><th align=left valign=top>NK</th><th align=left valign=top>C/Y/R limits</th><th align=left valign=top>Specifics</th></tr>\n");
    }
    for (testi = 0, netcount = 0; (testi < testcount); testi++) {
        tag_t *twalk;

        for (twalk = taghead; (twalk && strcasecmp(twalk->columnname, testnames[testi]->name)); twalk = twalk->next);
        if (!twalk) continue;

        use_columndoc(testnames[testi]->name);
        fprintf(stdout, "<tr>");
        fprintf(stdout, "<td valign=top>%s</td>", testnames[testi]->name);
        fprintf(stdout, "<td valign=top>%s</td>", nkval(host->hostname, testnames[testi]->name, nkalerts));

        fprintf(stdout, "<td valign=top>");
        if (twalk->b1 || twalk->b2 || twalk->b3) {
            fprintf(stdout, "%d/%d/%d", twalk->b1, twalk->b2, twalk->b3);
        }
        else {
            fprintf(stdout, "-/-/-");
        }
        fprintf(stdout, "</td>");

        fprintf(stdout, "<td valign=top>");
        fprintf(stdout, "<i>%s</i>", (twalk->visualdata ? twalk->visualdata : "&nbsp;"));
        if (twalk->expdata) fprintf(stdout, " must return <i>'%s'</i>", twalk->expdata);
        fprintf(stdout, "</td>");

        fprintf(stdout, "</tr>");
        netcount++;
    }
    if (taghead) {
        fprintf(stdout, "</table></td>\n");
        fprintf(stdout, "</tr>\n");
    }


    if (netcount != testcount) {
        fprintf(stdout, "<tr>\n");
        fprintf(stdout, "<th align=left valign=top>Local tests</th>\n");
        fprintf(stdout, "<td><table border=0 cellpadding=\"3\" cellspacing=\"5\" summary=\"%s local tests\">\n", host->hostname);
        fprintf(stdout, "<tr><th align=left valign=top>Service</th><th align=left valign=top>NK</th><th align=left valign=top>C/Y/R limits</th><th align=left valign=top>Configuration <i>(NB: Thresholds on client may differ)</i></th></tr>\n");
    }
    for (testi = 0; (testi < testcount); testi++) {
        tag_t *twalk;

        for (twalk = taghead; (twalk && strcasecmp(twalk->columnname, testnames[testi]->name)); twalk = twalk->next);
        if (twalk) continue;

        use_columndoc(testnames[testi]->name);
        fprintf(stdout, "<tr>");
        fprintf(stdout, "<td valign=top>%s</td>", testnames[testi]->name);
        fprintf(stdout, "<td valign=top>%s</td>", nkval(host->hostname, testnames[testi]->name, nkalerts));
        fprintf(stdout, "<td valign=top>-/-/-</td>");

        /* Make up some default configuration data */
        fprintf(stdout, "<td valign=top>");
        if (strcmp(testnames[testi]->name, "cpu") == 0) {
            fprintf(stdout, "UNIX - Yellow: Load average > 1.5, Red: Load average > 3.0<br>");
            fprintf(stdout, "Windows - Yellow: CPU utilisation > 80%%, Red: CPU utilisation > 95%%");
        }
        else if (strcmp(testnames[testi]->name, "disk") == 0) {
            fprintf(stdout, "Default limits: Yellow 90%% full, Red 95%% full<br>\n");
            print_disklist(host->hostname);
        }
        else if (strcmp(testnames[testi]->name, "memory") == 0) {
            fprintf(stdout, "Yellow: swap/pagefile use > 80%%, Red: swap/pagefile use > 90%%");
        }
        else if (strcmp(testnames[testi]->name, "procs") == 0) {
            htnames_t *walk;

            if (!host->procs) fprintf(stdout, "No processes monitored<br>\n");

            for (walk = host->procs; (walk); walk = walk->next) {
                fprintf(stdout, "%s<br>\n", walk->name);
            }
        }
        else if (strcmp(testnames[testi]->name, "svcs") == 0) {
            htnames_t *walk;

            if (!host->svcs) fprintf(stdout, "No services monitored<br>\n");

            for (walk = host->svcs; (walk); walk = walk->next) {
                fprintf(stdout, "%s<br>\n", walk->name);
            }
        }
        else {
            fprintf(stdout, "&nbsp;");
        }
        fprintf(stdout, "</td>");

        fprintf(stdout, "</tr>");
    }
    if (netcount != testcount) {
        fprintf(stdout, "</table></td>\n");
        fprintf(stdout, "</tr>\n");
    }

    /* Do the alerts */
    alert = (activealerts_t *)calloc(1, sizeof(activealerts_t));
    alert->hostname = host->hostname;
    alert->location = bbh_item(hinfo, BBH_ALLPAGEPATHS);
    strcpy(alert->ip, "127.0.0.1");
    alert->color = COL_RED;
    alert->pagemessage = "";
    alert->state = A_PAGING;
    alert->cookie = 12345;
    alert_printmode(2);
    for (testi = 0; (testi < testcount); testi++) {
        alert->testname = testnames[testi]->name;
        if (have_recipient(alert, NULL)) print_alert_recipients(alert, buf);
    }
    xfree(alert);

    if (STRBUFLEN(buf) > 0) {
        fprintf(stdout, "<tr>\n");
        fprintf(stdout, "<th align=left valign=top>Alerts</th>\n");
        fprintf(stdout, "<td><table border=0 cellpadding=\"3\" cellspacing=\"5\" summary=\"%s alerts\">\n", host->hostname);
        fprintf(stdout, "<tr><th>Service</th><th>Recipient</th><th>1st Delay</th><th>Stop after</th><th>Repeat</th><th>Time of Day</th><th>Colors</th></tr>\n");

        fprintf(stdout, "%s", STRBUF(buf));

        fprintf(stdout, "</table></td>\n");
        fprintf(stdout, "</tr>\n");
    }

    /* Finish off this host */
    fprintf(stdout, "</table>\n");

    freestrbuffer(buf);
}
Exemplo n.º 2
0
int main(int argc, char *argv[])
{
	char *msg;
	int seq;
	int argi;
	int alertcolors, alertinterval;
	char *configfn = NULL;
	char *checkfn = NULL;
	int checkpointinterval = 900;
	char acklogfn[PATH_MAX];
	FILE *acklogfd = NULL;
	char notiflogfn[PATH_MAX];
	FILE *notiflogfd = NULL;
	char *tracefn = NULL;
	struct sigaction sa;
	int configchanged;
	time_t lastxmit = 0;

	MEMDEFINE(acklogfn);
	MEMDEFINE(notiflogfn);

	libxymon_init(argv[0]);

	/* Dont save the error buffer */
	save_errbuf = 0;

	/* Load alert config */
	alertcolors = colorset(xgetenv("ALERTCOLORS"), ((1 << COL_GREEN) | (1 << COL_BLUE)));
	alertinterval = 60*atoi(xgetenv("ALERTREPEAT"));

	/* Create our loookup-trees */
	hostnames = xtreeNew(strcasecmp);
	testnames = xtreeNew(strcasecmp);
	locations = xtreeNew(strcasecmp);

	for (argi=1; (argi < argc); argi++) {
		if (argnmatch(argv[argi], "--config=")) {
			configfn = strdup(strchr(argv[argi], '=')+1);
		}
		else if (argnmatch(argv[argi], "--checkpoint-file=")) {
			checkfn = strdup(strchr(argv[argi], '=')+1);
		}
		else if (argnmatch(argv[argi], "--checkpoint-interval=")) {
			char *p = strchr(argv[argi], '=') + 1;
			checkpointinterval = atoi(p);
		}
		else if (argnmatch(argv[argi], "--dump-config")) {
			load_alertconfig(configfn, alertcolors, alertinterval);
			dump_alertconfig(1);
			return 0;
		}
		else if (argnmatch(argv[argi], "--cfid")) {
			include_configid = 1;
		}
		else if (argnmatch(argv[argi], "--test")) {
			char *testhost = NULL, *testservice = NULL, *testpage = NULL, 
			     *testcolor = "red", *testgroups = NULL;
			void *hinfo;
			int testdur = 0;
			FILE *logfd = NULL;
			activealerts_t *awalk = NULL;
			int paramno = 0;

			argi++; if (argi < argc) testhost = argv[argi];
			argi++; if (argi < argc) testservice = argv[argi];
			argi++; 
			while (argi < argc) {
				if (strncasecmp(argv[argi], "--duration=", 11) == 0) {
					testdur = durationvalue(strchr(argv[argi], '=')+1);
				}
				else if (strncasecmp(argv[argi], "--color=", 8) == 0) {
					testcolor = strchr(argv[argi], '=')+1;
				}
				else if (strncasecmp(argv[argi], "--group=", 8) == 0) {
					testgroups = strchr(argv[argi], '=')+1;
				}
				else if (strncasecmp(argv[argi], "--time=", 7) == 0) {
					fakestarttime = (time_t)atoi(strchr(argv[argi], '=')+1);
				}
				else {
					paramno++;
					if (paramno == 1) testdur = atoi(argv[argi]);
					else if (paramno == 2) testcolor = argv[argi];
					else if (paramno == 3) fakestarttime = (time_t) atoi(argv[argi]);
				}

				argi++;
			}

			if ((testhost == NULL) || (testservice == NULL)) {
				printf("Usage: xymond_alert --test HOST SERVICE [options]\n");
				printf("Possible options:\n\t[--duration=MINUTES]\n\t[--color=COLOR]\n\t[--group=GROUPNAME]\n\t[--time=TIMESPEC]\n");

				return 1;
			}

			load_hostnames(xgetenv("HOSTSCFG"), NULL, get_fqdn());
			hinfo = hostinfo(testhost);
			if (hinfo) {
				testpage = strdup(xmh_item(hinfo, XMH_ALLPAGEPATHS));
			}
			else {
				errprintf("Host not found in hosts.cfg - assuming it is on the top page\n");
				testpage = "";
			}

			awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t));
			awalk->hostname = find_name(hostnames, testhost);
			awalk->testname = find_name(testnames, testservice);
			awalk->location = find_name(locations, testpage);
			awalk->ip = strdup("127.0.0.1");
			awalk->color = awalk->maxcolor = parse_color(testcolor);
			awalk->pagemessage = "Test of the alert configuration";
			awalk->eventstart = getcurrenttime(NULL) - testdur*60;
			awalk->groups = (testgroups ? strdup(testgroups) : NULL);
			awalk->state = A_PAGING;
			awalk->cookie = 12345;
			awalk->next = NULL;

			logfd = fopen("/dev/null", "w");
			starttrace(NULL);
			testonly = 1;

			load_alertconfig(configfn, alertcolors, alertinterval);
			load_holidays(0);
			send_alert(awalk, logfd);
			return 0;
		}
		else if (argnmatch(argv[argi], "--trace=")) {
			tracefn = strdup(strchr(argv[argi], '=')+1);
			starttrace(tracefn);
		}
		else if (net_worker_option(argv[argi])) {
			/* Handled in the subroutine */
		}
		else if (standardoption(argv[argi])) {
			if (showhelp) return 0;
		}
		else {
			errprintf("Unknown option '%s'\n", argv[argi]);
		}
	}

	/* Do the network stuff if needed */
	net_worker_run(ST_ALERT, LOC_SINGLESERVER, NULL);

	if (checkfn) {
		load_checkpoint(checkfn);
		nextcheckpoint = gettimer() + checkpointinterval;
		dbgprintf("Next checkpoint at %d, interval %d\n", (int) nextcheckpoint, checkpointinterval);
	}

	setup_signalhandler("xymond_alert");
	/* Need to handle these ourselves, so we can shutdown and save state-info */
	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = sig_handler;
	sigaction(SIGPIPE, &sa, NULL);
	sigaction(SIGTERM, &sa, NULL);
	sigaction(SIGINT,  &sa, NULL);
	sigaction(SIGCHLD, &sa, NULL);
	sigaction(SIGUSR1, &sa, NULL);
	sigaction(SIGHUP,  &sa, NULL);

	if (xgetenv("XYMONSERVERLOGS")) {
		sprintf(acklogfn, "%s/acknowledge.log", xgetenv("XYMONSERVERLOGS"));
		acklogfd = fopen(acklogfn, "a");
		sprintf(notiflogfn, "%s/notifications.log", xgetenv("XYMONSERVERLOGS"));
		notiflogfd = fopen(notiflogfn, "a");
	}

	/*
	 * The general idea here is that this loop handles receiving of alert-
	 * and ack-messages from the master daemon, and maintains a list of 
	 * host+test combinations that may have alerts going out.
	 *
	 * This module does not deal with any specific alert-configuration, 
	 * it just picks up the alert messages, maintains the list of 
	 * known tests that are in some sort of critical condition, and
	 * periodically pushes alerts to the do_alert.c module for handling.
	 *
	 * The only modification of alerts that happen here is the handling
	 * of when the next alert is due. It calls into the next_alert() 
	 * routine to learn when an alert should be repeated, and also 
	 * deals with Acknowledgments that stop alerts from going out for
	 * a period of time.
	 */
	while (running) {
		char *eoln, *restofmsg;
		char *metadata[20];
		char *p;
		int metacount;
		char *hostname = NULL, *testname = NULL;
		struct timespec timeout;
		time_t now, nowtimer;
		int anytogo;
		activealerts_t *awalk;
		int childstat;

		nowtimer = gettimer();
		if (checkfn && (nowtimer > nextcheckpoint)) {
			dbgprintf("Saving checkpoint\n");
			nextcheckpoint = nowtimer + checkpointinterval;
			save_checkpoint(checkfn);

			if (acklogfd) acklogfd = freopen(acklogfn, "a", acklogfd);
			if (notiflogfd) notiflogfd = freopen(notiflogfn, "a", notiflogfd);
		}

		timeout.tv_sec = 60; timeout.tv_nsec = 0;
		msg = get_xymond_message(C_PAGE, "xymond_alert", &seq, &timeout);
		if (msg == NULL) {
			running = 0;
			continue;
		}

		/* See what time it is - must happen AFTER the timeout */
		now = getcurrenttime(NULL);

		/* Split the message in the first line (with meta-data), and the rest */
 		eoln = strchr(msg, '\n');
		if (eoln) {
			*eoln = '\0';
			restofmsg = eoln+1;
		}
		else {
			restofmsg = "";
		}

		/* 
		 * Now parse the meta-data into elements.
		 * We use our own "gettok()" routine which works
		 * like strtok(), but can handle empty elements.
		 */
		metacount = 0; 
		memset(&metadata, 0, sizeof(metadata));
		p = gettok(msg, "|");
		while (p && (metacount < 19)) {
			metadata[metacount] = p;
			metacount++;
			p = gettok(NULL, "|");
		}
		metadata[metacount] = NULL;

		if (metacount > 3) hostname = metadata[3];
		if (metacount > 4) testname = metadata[4];

		if ((metacount > 10) && (strncmp(metadata[0], "@@page", 6) == 0)) {
			/* @@page|timestamp|sender|hostname|testname|hostip|expiretime|color|prevcolor|changetime|location|cookie|osname|classname|grouplist|modifiers */

			int newcolor, newalertstatus, oldalertstatus;

			dbgprintf("Got page message from %s:%s\n", hostname, testname);
			traceprintf("@@page %s:%s:%s=%s\n", hostname, testname, metadata[10], metadata[7]);

			awalk = find_active(hostname, testname);
			if (awalk == NULL) {
				char *hwalk = find_name(hostnames, hostname);
				char *twalk = find_name(testnames, testname);
				char *pwalk = find_name(locations, metadata[10]);

				awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t));
				awalk->hostname = hwalk;
				awalk->testname = twalk;
				awalk->location = pwalk;
				awalk->cookie = -1;
				awalk->state = A_DEAD;
				/*
				 * Use changetime here, if we restart the alert module then
				 * this gets the duration values more right than using "now".
				 * Also, define this only when a new alert arrives - we should
				 * NOT clear this when a status goes yellow->red, or if it
				 * flaps between yellow and red.
				 */
				awalk->eventstart = atoi(metadata[9]);
				add_active(awalk->hostname, awalk);
				traceprintf("New record\n");
			}

			newcolor = parse_color(metadata[7]);
			oldalertstatus = ((alertcolors & (1 << awalk->color)) != 0);
			newalertstatus = ((alertcolors & (1 << newcolor)) != 0);

			traceprintf("state %d->%d\n", oldalertstatus, newalertstatus);

			if (newalertstatus) {
				/* It's in an alert state. */
				awalk->color = newcolor;
				awalk->state = A_PAGING;

				if (newcolor > awalk->maxcolor) {
					if (awalk->maxcolor != 0) {
						/*
						 * Severity has increased (yellow -> red).
						 * Clear the repeat-interval, and set maxcolor to
						 * the new color. If it drops to yellow again,
						 * maxcolor stays at red, so a test that flaps
						 * between yellow and red will only alert on red
						 * the first time, and then follow the repeat
						 * interval.
						 */
						dbgprintf("Severity increased, cleared repeat interval: %s/%s %s->%s\n",
							awalk->hostname, awalk->testname,
							colorname(awalk->maxcolor), colorname(newcolor));
						clear_interval(awalk);
					}

					awalk->maxcolor = newcolor;
				}
			}
			else {
				/* 
				 * Send one "recovered" message out now, then go to A_DEAD.
				 * Dont update the color here - we want recoveries to go out 
				 * only if the alert color triggered an alert
				 */
				awalk->state = (newcolor == COL_BLUE) ? A_DISABLED : A_RECOVERED;
			}

			if (oldalertstatus != newalertstatus) {
				dbgprintf("Alert status changed from %d to %d\n", oldalertstatus, newalertstatus);
				clear_interval(awalk);
			}

			if (awalk->ip) xfree(awalk->ip);
			awalk->ip = strdup(metadata[5]);
			awalk->cookie = atoi(metadata[11]);
			if (awalk->osname) xfree(awalk->osname);
			awalk->osname    = (metadata[12] ? strdup(metadata[12]) : NULL);
			if (awalk->classname) xfree(awalk->classname);
			awalk->classname = (metadata[13] ? strdup(metadata[13]) : NULL);
			if (awalk->groups) xfree(awalk->groups);
			awalk->groups    = (metadata[14] ? strdup(metadata[14]) : NULL);
			if (awalk->pagemessage) xfree(awalk->pagemessage);
			if (metadata[15]) {
				/* Modifiers are more interesting than the message itself */
				awalk->pagemessage = (char *)malloc(strlen(awalk->hostname) + strlen(awalk->testname) + strlen(colorname(awalk->color)) + strlen(metadata[15]) + strlen(restofmsg) + 10);
				sprintf(awalk->pagemessage, "%s:%s %s\n%s\n%s",
					awalk->hostname, awalk->testname, colorname(awalk->color), metadata[15], restofmsg);
			}
			else {
				awalk->pagemessage = strdup(restofmsg);
			}
		}
		else if ((metacount > 5) && (strncmp(metadata[0], "@@ack", 5) == 0)) {
 			/* @@ack|timestamp|sender|hostname|testname|hostip|expiretime */

			/*
			 * An ack is handled simply by setting the next
			 * alert-time to when the ack expires.
			 */
			time_t nextalert = atoi(metadata[6]);

			dbgprintf("Got ack message from %s:%s\n", hostname, testname);
			traceprintf("@@ack: %s:%s now=%d, ackeduntil %d\n",
				     hostname, testname, (int)now, (int)nextalert);

			awalk = find_active(hostname, testname);

			if (acklogfd) {
				int cookie = (awalk ? awalk->cookie : -1);
				int color  = (awalk ? awalk->color : 0);

				fprintf(acklogfd, "%d\t%d\t%d\t%d\t%s\t%s.%s\t%s\t%s\n",
					(int)now, cookie, 
					(int)((nextalert - now) / 60), cookie,
					"np_filename_not_used", 
					hostname, testname, 
					colorname(color),
					nlencode(restofmsg));
				fflush(acklogfd);
			}

			if (awalk && (awalk->state == A_PAGING)) {
				traceprintf("Record updated\n");
				awalk->state = A_ACKED;
				awalk->nextalerttime = nextalert;
				if (awalk->ackmessage) xfree(awalk->ackmessage);
				awalk->ackmessage = strdup(restofmsg);
			}
			else {
				traceprintf("No record\n");
			}
		}
		else if ((metacount > 4) && (strncmp(metadata[0], "@@notify", 5) == 0)) {
			/* @@notify|timestamp|sender|hostname|testname|pagepath */

			char *hwalk = find_name(hostnames, hostname);
			char *twalk = find_name(testnames, testname);
			char *pwalk = find_name(locations, (metadata[5] ? metadata[5] : ""));

			awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t));
			awalk->hostname = hwalk;
			awalk->testname = twalk;
			awalk->location = pwalk;
			awalk->cookie = -1;
			awalk->pagemessage = strdup(restofmsg);
			awalk->eventstart = getcurrenttime(NULL);
			awalk->state = A_NOTIFY;
			add_active(awalk->hostname, awalk);
		}
		else if ((metacount > 3) && 
			 ((strncmp(metadata[0], "@@drophost", 10) == 0) || (strncmp(metadata[0], "@@dropstate", 11) == 0))) {
			/* @@drophost|timestamp|sender|hostname */
			/* @@dropstate|timestamp|sender|hostname */
			xtreePos_t handle;

			handle = xtreeFind(hostnames, hostname);
			if (handle != xtreeEnd(hostnames)) {
				alertanchor_t *anchor = (alertanchor_t *)xtreeData(hostnames, handle);
				for (awalk = anchor->head; (awalk); awalk = awalk->next) awalk->state = A_DEAD;
			}
		}
		else if ((metacount > 4) && (strncmp(metadata[0], "@@droptest", 10) == 0)) {
			/* @@droptest|timestamp|sender|hostname|testname */

			awalk = find_active(hostname, testname);
			if (awalk) awalk->state = A_DEAD;
		}
		else if ((metacount > 4) && (strncmp(metadata[0], "@@renamehost", 12) == 0)) {
			/* @@renamehost|timestamp|sender|hostname|newhostname */

			/* 
			 * We handle rename's simply by dropping the alert. If there is still an
			 * active alert for the host, it will have to be dealt with when the next
			 * status update arrives.
			 */
			xtreePos_t handle;

			handle = xtreeFind(hostnames, hostname);
			if (handle != xtreeEnd(hostnames)) {
				alertanchor_t *anchor = (alertanchor_t *)xtreeData(hostnames, handle);
				for (awalk = anchor->head; (awalk); awalk = awalk->next) awalk->state = A_DEAD;
			}
		}
		else if ((metacount > 5) && (strncmp(metadata[0], "@@renametest", 12) == 0)) {
			/* @@renametest|timestamp|sender|hostname|oldtestname|newtestname */

			/* 
			 * We handle rename's simply by dropping the alert. If there is still an
			 * active alert for the host, it will have to be dealt with when the next
			 * status update arrives.
			 */
			awalk = find_active(hostname, testname);
			if (awalk) awalk->state = A_DEAD;
		}
		else if (strncmp(metadata[0], "@@shutdown", 10) == 0) {
			running = 0;
			errprintf("Got a shutdown message\n");
			continue;
		}
		else if (strncmp(metadata[0], "@@logrotate", 11) == 0) {
			char *fn = xgetenv("XYMONCHANNEL_LOGFILENAME");
			if (fn && strlen(fn)) {
				reopen_file(fn, "a", stdout);
				reopen_file(fn, "a", stderr);

				if (tracefn) {
					stoptrace();
					starttrace(tracefn);
				}
			}
			continue;
		}
		else if (strncmp(metadata[0], "@@reload", 8) == 0) {
			/* Nothing ... right now */
		}
		else if (strncmp(metadata[0], "@@idle", 6) == 0) {
			/* Timeout */
		}

		/*
		 * When a burst of alerts happen, we get lots of alert messages
		 * coming in quickly. So lets handle them in bunches and only 
		 * do the full alert handling once every 10 secs - that lets us
		 * combine a bunch of alerts into one transmission process.
		 */
		if (nowtimer < (lastxmit+10)) continue;
		lastxmit = nowtimer;

		/* 
		 * Loop through the activealerts list and see if anything is pending.
		 * This is an optimization, we could just as well just fork off the
		 * notification child and let it handle all of it. But there is no
		 * reason to fork a child process unless it is going to do something.
		 */
		configchanged = load_alertconfig(configfn, alertcolors, alertinterval);
		configchanged += load_holidays(0);
		anytogo = 0;
		for (awalk = alistBegin(); (awalk); awalk = alistNext()) {
			int anymatch = 0;

			switch (awalk->state) {
			  case A_NORECIP:
				if (!configchanged) break;

				/* The configuration has changed - switch NORECIP -> PAGING */
				awalk->state = A_PAGING;
				clear_interval(awalk);
				/* Fall through */

			  case A_PAGING:
				if (have_recipient(awalk, &anymatch)) {
					if (awalk->nextalerttime <= now) anytogo++;
				}
				else {
					if (!anymatch) {
						awalk->state = A_NORECIP;
						cleanup_alert(awalk);
					}
				}
				break;

			  case A_ACKED:
				if (awalk->nextalerttime <= now) {
					/* An ack has expired, so drop the ack message and switch to A_PAGING */
					anytogo++;
					if (awalk->ackmessage) xfree(awalk->ackmessage);
					awalk->state = A_PAGING;
				}
				break;

			  case A_RECOVERED:
			  case A_DISABLED:
			  case A_NOTIFY:
				anytogo++;
				break;

			  case A_DEAD:
				break;
			}
		}
		dbgprintf("%d alerts to go\n", anytogo);

		if (anytogo) {
			pid_t childpid;

			childpid = fork();
			if (childpid == 0) {
				/* The child */
				start_alerts();
				for (awalk = alistBegin(); (awalk); awalk = alistNext()) {
					switch (awalk->state) {
					  case A_PAGING:
						if (awalk->nextalerttime <= now) {
							send_alert(awalk, notiflogfd);
						}
						break;

					  case A_ACKED:
						/* Cannot be A_ACKED unless the ack is still valid, so no alert. */
						break;

					  case A_RECOVERED:
					  case A_DISABLED:
					  case A_NOTIFY:
						send_alert(awalk, notiflogfd);
						break;

					  case A_NORECIP:
					  case A_DEAD:
						break;
					}
				}
				finish_alerts();

				/* Child does not continue */
				exit(0);
			}
			else if (childpid < 0) {
				errprintf("Fork failed, cannot send alerts: %s\n", strerror(errno));
			}
		}

		/* Update the state flag and the next-alert timestamp */
		for (awalk = alistBegin(); (awalk); awalk = alistNext()) {
			switch (awalk->state) {
			  case A_PAGING:
				if (awalk->nextalerttime <= now) awalk->nextalerttime = next_alert(awalk);
				break;

			  case A_NORECIP:
				break;

			  case A_ACKED:
				/* Still cannot get here except if ack is still valid */
				break;

			  case A_RECOVERED:
			  case A_DISABLED:
			  case A_NOTIFY:
				awalk->state = A_DEAD;
				/* Fall through */

			  case A_DEAD:
				cleanup_alert(awalk); 
				break;
			}
		}

		clean_all_active();

		/* Pickup any finished child processes to avoid zombies */
		while (wait3(&childstat, WNOHANG, NULL) > 0) ;
	}

	if (checkfn) save_checkpoint(checkfn);
	if (acklogfd) fclose(acklogfd);
	if (notiflogfd) fclose(notiflogfd);
	stoptrace();

	MEMUNDEFINE(notiflogfn);
	MEMUNDEFINE(acklogfn);

	if (termsig >= 0) {
		errprintf("Terminated by signal %d\n", termsig);
	}

	return 0;
}