int main(int argc, char *argv[]) { char *msg; int seq; int argi; int alertcolors, alertinterval; char *configfn = NULL; char *checkfn = NULL; int checkpointinterval = 900; char acklogfn[PATH_MAX]; FILE *acklogfd = NULL; char notiflogfn[PATH_MAX]; FILE *notiflogfd = NULL; char *tracefn = NULL; struct sigaction sa; int configchanged; time_t lastxmit = 0; MEMDEFINE(acklogfn); MEMDEFINE(notiflogfn); libxymon_init(argv[0]); /* Dont save the error buffer */ save_errbuf = 0; /* Load alert config */ alertcolors = colorset(xgetenv("ALERTCOLORS"), ((1 << COL_GREEN) | (1 << COL_BLUE))); alertinterval = 60*atoi(xgetenv("ALERTREPEAT")); /* Create our loookup-trees */ hostnames = xtreeNew(strcasecmp); testnames = xtreeNew(strcasecmp); locations = xtreeNew(strcasecmp); for (argi=1; (argi < argc); argi++) { if (argnmatch(argv[argi], "--config=")) { configfn = strdup(strchr(argv[argi], '=')+1); } else if (argnmatch(argv[argi], "--checkpoint-file=")) { checkfn = strdup(strchr(argv[argi], '=')+1); } else if (argnmatch(argv[argi], "--checkpoint-interval=")) { char *p = strchr(argv[argi], '=') + 1; checkpointinterval = atoi(p); } else if (argnmatch(argv[argi], "--dump-config")) { load_alertconfig(configfn, alertcolors, alertinterval); dump_alertconfig(1); return 0; } else if (argnmatch(argv[argi], "--cfid")) { include_configid = 1; } else if (argnmatch(argv[argi], "--test")) { char *testhost = NULL, *testservice = NULL, *testpage = NULL, *testcolor = "red", *testgroups = NULL; void *hinfo; int testdur = 0; FILE *logfd = NULL; activealerts_t *awalk = NULL; int paramno = 0; argi++; if (argi < argc) testhost = argv[argi]; argi++; if (argi < argc) testservice = argv[argi]; argi++; while (argi < argc) { if (strncasecmp(argv[argi], "--duration=", 11) == 0) { testdur = durationvalue(strchr(argv[argi], '=')+1); } else if (strncasecmp(argv[argi], "--color=", 8) == 0) { testcolor = strchr(argv[argi], '=')+1; } else if (strncasecmp(argv[argi], "--group=", 8) == 0) { testgroups = strchr(argv[argi], '=')+1; } else if (strncasecmp(argv[argi], "--time=", 7) == 0) { fakestarttime = (time_t)atoi(strchr(argv[argi], '=')+1); } else { paramno++; if (paramno == 1) testdur = atoi(argv[argi]); else if (paramno == 2) testcolor = argv[argi]; else if (paramno == 3) fakestarttime = (time_t) atoi(argv[argi]); } argi++; } if ((testhost == NULL) || (testservice == NULL)) { printf("Usage: xymond_alert --test HOST SERVICE [options]\n"); printf("Possible options:\n\t[--duration=MINUTES]\n\t[--color=COLOR]\n\t[--group=GROUPNAME]\n\t[--time=TIMESPEC]\n"); return 1; } load_hostnames(xgetenv("HOSTSCFG"), NULL, get_fqdn()); hinfo = hostinfo(testhost); if (hinfo) { testpage = strdup(xmh_item(hinfo, XMH_ALLPAGEPATHS)); } else { errprintf("Host not found in hosts.cfg - assuming it is on the top page\n"); testpage = ""; } awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t)); awalk->hostname = find_name(hostnames, testhost); awalk->testname = find_name(testnames, testservice); awalk->location = find_name(locations, testpage); awalk->ip = strdup("127.0.0.1"); awalk->color = awalk->maxcolor = parse_color(testcolor); awalk->pagemessage = "Test of the alert configuration"; awalk->eventstart = getcurrenttime(NULL) - testdur*60; awalk->groups = (testgroups ? strdup(testgroups) : NULL); awalk->state = A_PAGING; awalk->cookie = 12345; awalk->next = NULL; logfd = fopen("/dev/null", "w"); starttrace(NULL); testonly = 1; load_alertconfig(configfn, alertcolors, alertinterval); load_holidays(0); send_alert(awalk, logfd); return 0; } else if (argnmatch(argv[argi], "--trace=")) { tracefn = strdup(strchr(argv[argi], '=')+1); starttrace(tracefn); } else if (net_worker_option(argv[argi])) { /* Handled in the subroutine */ } else if (standardoption(argv[argi])) { if (showhelp) return 0; } else { errprintf("Unknown option '%s'\n", argv[argi]); } } /* Do the network stuff if needed */ net_worker_run(ST_ALERT, LOC_SINGLESERVER, NULL); if (checkfn) { load_checkpoint(checkfn); nextcheckpoint = gettimer() + checkpointinterval; dbgprintf("Next checkpoint at %d, interval %d\n", (int) nextcheckpoint, checkpointinterval); } setup_signalhandler("xymond_alert"); /* Need to handle these ourselves, so we can shutdown and save state-info */ memset(&sa, 0, sizeof(sa)); sa.sa_handler = sig_handler; sigaction(SIGPIPE, &sa, NULL); sigaction(SIGTERM, &sa, NULL); sigaction(SIGINT, &sa, NULL); sigaction(SIGCHLD, &sa, NULL); sigaction(SIGUSR1, &sa, NULL); sigaction(SIGHUP, &sa, NULL); if (xgetenv("XYMONSERVERLOGS")) { sprintf(acklogfn, "%s/acknowledge.log", xgetenv("XYMONSERVERLOGS")); acklogfd = fopen(acklogfn, "a"); sprintf(notiflogfn, "%s/notifications.log", xgetenv("XYMONSERVERLOGS")); notiflogfd = fopen(notiflogfn, "a"); } /* * The general idea here is that this loop handles receiving of alert- * and ack-messages from the master daemon, and maintains a list of * host+test combinations that may have alerts going out. * * This module does not deal with any specific alert-configuration, * it just picks up the alert messages, maintains the list of * known tests that are in some sort of critical condition, and * periodically pushes alerts to the do_alert.c module for handling. * * The only modification of alerts that happen here is the handling * of when the next alert is due. It calls into the next_alert() * routine to learn when an alert should be repeated, and also * deals with Acknowledgments that stop alerts from going out for * a period of time. */ while (running) { char *eoln, *restofmsg; char *metadata[20]; char *p; int metacount; char *hostname = NULL, *testname = NULL; struct timespec timeout; time_t now, nowtimer; int anytogo; activealerts_t *awalk; int childstat; nowtimer = gettimer(); if (checkfn && (nowtimer > nextcheckpoint)) { dbgprintf("Saving checkpoint\n"); nextcheckpoint = nowtimer + checkpointinterval; save_checkpoint(checkfn); if (acklogfd) acklogfd = freopen(acklogfn, "a", acklogfd); if (notiflogfd) notiflogfd = freopen(notiflogfn, "a", notiflogfd); } timeout.tv_sec = 60; timeout.tv_nsec = 0; msg = get_xymond_message(C_PAGE, "xymond_alert", &seq, &timeout); if (msg == NULL) { running = 0; continue; } /* See what time it is - must happen AFTER the timeout */ now = getcurrenttime(NULL); /* Split the message in the first line (with meta-data), and the rest */ eoln = strchr(msg, '\n'); if (eoln) { *eoln = '\0'; restofmsg = eoln+1; } else { restofmsg = ""; } /* * Now parse the meta-data into elements. * We use our own "gettok()" routine which works * like strtok(), but can handle empty elements. */ metacount = 0; memset(&metadata, 0, sizeof(metadata)); p = gettok(msg, "|"); while (p && (metacount < 19)) { metadata[metacount] = p; metacount++; p = gettok(NULL, "|"); } metadata[metacount] = NULL; if (metacount > 3) hostname = metadata[3]; if (metacount > 4) testname = metadata[4]; if ((metacount > 10) && (strncmp(metadata[0], "@@page", 6) == 0)) { /* @@page|timestamp|sender|hostname|testname|hostip|expiretime|color|prevcolor|changetime|location|cookie|osname|classname|grouplist|modifiers */ int newcolor, newalertstatus, oldalertstatus; dbgprintf("Got page message from %s:%s\n", hostname, testname); traceprintf("@@page %s:%s:%s=%s\n", hostname, testname, metadata[10], metadata[7]); awalk = find_active(hostname, testname); if (awalk == NULL) { char *hwalk = find_name(hostnames, hostname); char *twalk = find_name(testnames, testname); char *pwalk = find_name(locations, metadata[10]); awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t)); awalk->hostname = hwalk; awalk->testname = twalk; awalk->location = pwalk; awalk->cookie = -1; awalk->state = A_DEAD; /* * Use changetime here, if we restart the alert module then * this gets the duration values more right than using "now". * Also, define this only when a new alert arrives - we should * NOT clear this when a status goes yellow->red, or if it * flaps between yellow and red. */ awalk->eventstart = atoi(metadata[9]); add_active(awalk->hostname, awalk); traceprintf("New record\n"); } newcolor = parse_color(metadata[7]); oldalertstatus = ((alertcolors & (1 << awalk->color)) != 0); newalertstatus = ((alertcolors & (1 << newcolor)) != 0); traceprintf("state %d->%d\n", oldalertstatus, newalertstatus); if (newalertstatus) { /* It's in an alert state. */ awalk->color = newcolor; awalk->state = A_PAGING; if (newcolor > awalk->maxcolor) { if (awalk->maxcolor != 0) { /* * Severity has increased (yellow -> red). * Clear the repeat-interval, and set maxcolor to * the new color. If it drops to yellow again, * maxcolor stays at red, so a test that flaps * between yellow and red will only alert on red * the first time, and then follow the repeat * interval. */ dbgprintf("Severity increased, cleared repeat interval: %s/%s %s->%s\n", awalk->hostname, awalk->testname, colorname(awalk->maxcolor), colorname(newcolor)); clear_interval(awalk); } awalk->maxcolor = newcolor; } } else { /* * Send one "recovered" message out now, then go to A_DEAD. * Dont update the color here - we want recoveries to go out * only if the alert color triggered an alert */ awalk->state = (newcolor == COL_BLUE) ? A_DISABLED : A_RECOVERED; } if (oldalertstatus != newalertstatus) { dbgprintf("Alert status changed from %d to %d\n", oldalertstatus, newalertstatus); clear_interval(awalk); } if (awalk->ip) xfree(awalk->ip); awalk->ip = strdup(metadata[5]); awalk->cookie = atoi(metadata[11]); if (awalk->osname) xfree(awalk->osname); awalk->osname = (metadata[12] ? strdup(metadata[12]) : NULL); if (awalk->classname) xfree(awalk->classname); awalk->classname = (metadata[13] ? strdup(metadata[13]) : NULL); if (awalk->groups) xfree(awalk->groups); awalk->groups = (metadata[14] ? strdup(metadata[14]) : NULL); if (awalk->pagemessage) xfree(awalk->pagemessage); if (metadata[15]) { /* Modifiers are more interesting than the message itself */ awalk->pagemessage = (char *)malloc(strlen(awalk->hostname) + strlen(awalk->testname) + strlen(colorname(awalk->color)) + strlen(metadata[15]) + strlen(restofmsg) + 10); sprintf(awalk->pagemessage, "%s:%s %s\n%s\n%s", awalk->hostname, awalk->testname, colorname(awalk->color), metadata[15], restofmsg); } else { awalk->pagemessage = strdup(restofmsg); } } else if ((metacount > 5) && (strncmp(metadata[0], "@@ack", 5) == 0)) { /* @@ack|timestamp|sender|hostname|testname|hostip|expiretime */ /* * An ack is handled simply by setting the next * alert-time to when the ack expires. */ time_t nextalert = atoi(metadata[6]); dbgprintf("Got ack message from %s:%s\n", hostname, testname); traceprintf("@@ack: %s:%s now=%d, ackeduntil %d\n", hostname, testname, (int)now, (int)nextalert); awalk = find_active(hostname, testname); if (acklogfd) { int cookie = (awalk ? awalk->cookie : -1); int color = (awalk ? awalk->color : 0); fprintf(acklogfd, "%d\t%d\t%d\t%d\t%s\t%s.%s\t%s\t%s\n", (int)now, cookie, (int)((nextalert - now) / 60), cookie, "np_filename_not_used", hostname, testname, colorname(color), nlencode(restofmsg)); fflush(acklogfd); } if (awalk && (awalk->state == A_PAGING)) { traceprintf("Record updated\n"); awalk->state = A_ACKED; awalk->nextalerttime = nextalert; if (awalk->ackmessage) xfree(awalk->ackmessage); awalk->ackmessage = strdup(restofmsg); } else { traceprintf("No record\n"); } } else if ((metacount > 4) && (strncmp(metadata[0], "@@notify", 5) == 0)) { /* @@notify|timestamp|sender|hostname|testname|pagepath */ char *hwalk = find_name(hostnames, hostname); char *twalk = find_name(testnames, testname); char *pwalk = find_name(locations, (metadata[5] ? metadata[5] : "")); awalk = (activealerts_t *)calloc(1, sizeof(activealerts_t)); awalk->hostname = hwalk; awalk->testname = twalk; awalk->location = pwalk; awalk->cookie = -1; awalk->pagemessage = strdup(restofmsg); awalk->eventstart = getcurrenttime(NULL); awalk->state = A_NOTIFY; add_active(awalk->hostname, awalk); } else if ((metacount > 3) && ((strncmp(metadata[0], "@@drophost", 10) == 0) || (strncmp(metadata[0], "@@dropstate", 11) == 0))) { /* @@drophost|timestamp|sender|hostname */ /* @@dropstate|timestamp|sender|hostname */ xtreePos_t handle; handle = xtreeFind(hostnames, hostname); if (handle != xtreeEnd(hostnames)) { alertanchor_t *anchor = (alertanchor_t *)xtreeData(hostnames, handle); for (awalk = anchor->head; (awalk); awalk = awalk->next) awalk->state = A_DEAD; } } else if ((metacount > 4) && (strncmp(metadata[0], "@@droptest", 10) == 0)) { /* @@droptest|timestamp|sender|hostname|testname */ awalk = find_active(hostname, testname); if (awalk) awalk->state = A_DEAD; } else if ((metacount > 4) && (strncmp(metadata[0], "@@renamehost", 12) == 0)) { /* @@renamehost|timestamp|sender|hostname|newhostname */ /* * We handle rename's simply by dropping the alert. If there is still an * active alert for the host, it will have to be dealt with when the next * status update arrives. */ xtreePos_t handle; handle = xtreeFind(hostnames, hostname); if (handle != xtreeEnd(hostnames)) { alertanchor_t *anchor = (alertanchor_t *)xtreeData(hostnames, handle); for (awalk = anchor->head; (awalk); awalk = awalk->next) awalk->state = A_DEAD; } } else if ((metacount > 5) && (strncmp(metadata[0], "@@renametest", 12) == 0)) { /* @@renametest|timestamp|sender|hostname|oldtestname|newtestname */ /* * We handle rename's simply by dropping the alert. If there is still an * active alert for the host, it will have to be dealt with when the next * status update arrives. */ awalk = find_active(hostname, testname); if (awalk) awalk->state = A_DEAD; } else if (strncmp(metadata[0], "@@shutdown", 10) == 0) { running = 0; errprintf("Got a shutdown message\n"); continue; } else if (strncmp(metadata[0], "@@logrotate", 11) == 0) { char *fn = xgetenv("XYMONCHANNEL_LOGFILENAME"); if (fn && strlen(fn)) { reopen_file(fn, "a", stdout); reopen_file(fn, "a", stderr); if (tracefn) { stoptrace(); starttrace(tracefn); } } continue; } else if (strncmp(metadata[0], "@@reload", 8) == 0) { /* Nothing ... right now */ } else if (strncmp(metadata[0], "@@idle", 6) == 0) { /* Timeout */ } /* * When a burst of alerts happen, we get lots of alert messages * coming in quickly. So lets handle them in bunches and only * do the full alert handling once every 10 secs - that lets us * combine a bunch of alerts into one transmission process. */ if (nowtimer < (lastxmit+10)) continue; lastxmit = nowtimer; /* * Loop through the activealerts list and see if anything is pending. * This is an optimization, we could just as well just fork off the * notification child and let it handle all of it. But there is no * reason to fork a child process unless it is going to do something. */ configchanged = load_alertconfig(configfn, alertcolors, alertinterval); configchanged += load_holidays(0); anytogo = 0; for (awalk = alistBegin(); (awalk); awalk = alistNext()) { int anymatch = 0; switch (awalk->state) { case A_NORECIP: if (!configchanged) break; /* The configuration has changed - switch NORECIP -> PAGING */ awalk->state = A_PAGING; clear_interval(awalk); /* Fall through */ case A_PAGING: if (have_recipient(awalk, &anymatch)) { if (awalk->nextalerttime <= now) anytogo++; } else { if (!anymatch) { awalk->state = A_NORECIP; cleanup_alert(awalk); } } break; case A_ACKED: if (awalk->nextalerttime <= now) { /* An ack has expired, so drop the ack message and switch to A_PAGING */ anytogo++; if (awalk->ackmessage) xfree(awalk->ackmessage); awalk->state = A_PAGING; } break; case A_RECOVERED: case A_DISABLED: case A_NOTIFY: anytogo++; break; case A_DEAD: break; } } dbgprintf("%d alerts to go\n", anytogo); if (anytogo) { pid_t childpid; childpid = fork(); if (childpid == 0) { /* The child */ start_alerts(); for (awalk = alistBegin(); (awalk); awalk = alistNext()) { switch (awalk->state) { case A_PAGING: if (awalk->nextalerttime <= now) { send_alert(awalk, notiflogfd); } break; case A_ACKED: /* Cannot be A_ACKED unless the ack is still valid, so no alert. */ break; case A_RECOVERED: case A_DISABLED: case A_NOTIFY: send_alert(awalk, notiflogfd); break; case A_NORECIP: case A_DEAD: break; } } finish_alerts(); /* Child does not continue */ exit(0); } else if (childpid < 0) { errprintf("Fork failed, cannot send alerts: %s\n", strerror(errno)); } } /* Update the state flag and the next-alert timestamp */ for (awalk = alistBegin(); (awalk); awalk = alistNext()) { switch (awalk->state) { case A_PAGING: if (awalk->nextalerttime <= now) awalk->nextalerttime = next_alert(awalk); break; case A_NORECIP: break; case A_ACKED: /* Still cannot get here except if ack is still valid */ break; case A_RECOVERED: case A_DISABLED: case A_NOTIFY: awalk->state = A_DEAD; /* Fall through */ case A_DEAD: cleanup_alert(awalk); break; } } clean_all_active(); /* Pickup any finished child processes to avoid zombies */ while (wait3(&childstat, WNOHANG, NULL) > 0) ; } if (checkfn) save_checkpoint(checkfn); if (acklogfd) fclose(acklogfd); if (notiflogfd) fclose(notiflogfd); stoptrace(); MEMUNDEFINE(notiflogfn); MEMUNDEFINE(acklogfn); if (termsig >= 0) { errprintf("Terminated by signal %d\n", termsig); } return 0; }
int main(int argc, char *argv[]) { char *msg; int argi; struct sigaction sa; char *exthandler = NULL; char *extids = NULL; char *processor = NULL; struct sockaddr_un ctlsockaddr; int ctlsocket; /* Handle program options. */ for (argi = 1; (argi < argc); argi++) { if (strcmp(argv[argi], "--debug") == 0) { debug = 1; } else if (argnmatch(argv[argi], "--rrddir=")) { char *p = strchr(argv[argi], '='); rrddir = strdup(p+1); } else if (argnmatch(argv[argi], "--extra-script=")) { char *p = strchr(argv[argi], '='); exthandler = strdup(p+1); } else if (argnmatch(argv[argi], "--extra-tests=")) { char *p = strchr(argv[argi], '='); extids = strdup(p+1); } else if (argnmatch(argv[argi], "--processor=")) { char *p = strchr(argv[argi], '='); processor = strdup(p+1); } else if (strcmp(argv[argi], "--no-cache") == 0) { use_rrd_cache = 0; } else if (net_worker_option(argv[argi])) { /* Handled in the subroutine */ } } save_errbuf = 0; if ((rrddir == NULL) && xgetenv("XYMONRRDS")) { rrddir = strdup(xgetenv("XYMONRRDS")); } if (exthandler && extids) setup_exthandler(exthandler, extids); /* Do the network stuff if needed */ net_worker_run(ST_RRD, LOC_STICKY, update_locator_hostdata); setup_signalhandler("xymond_rrd"); memset(&sa, 0, sizeof(sa)); sa.sa_handler = sig_handler; sigaction(SIGHUP, &sa, NULL); sigaction(SIGCHLD, &sa, NULL); sigaction(SIGTERM, &sa, NULL); sigaction(SIGINT, &sa, NULL); signal(SIGPIPE, SIG_DFL); /* Setup the control socket that receives cache-flush commands */ memset(&ctlsockaddr, 0, sizeof(ctlsockaddr)); sprintf(ctlsockaddr.sun_path, "%s/rrdctl.%d", xgetenv("XYMONTMP"), getpid()); unlink(ctlsockaddr.sun_path); /* In case it was accidentally left behind */ ctlsockaddr.sun_family = AF_UNIX; ctlsocket = socket(AF_UNIX, SOCK_DGRAM, 0); if (ctlsocket == -1) { errprintf("Cannot create cache-control socket (%s)\n", strerror(errno)); return 1; } fcntl(ctlsocket, F_SETFL, O_NONBLOCK); if (bind(ctlsocket, (struct sockaddr *)&ctlsockaddr, sizeof(ctlsockaddr)) == -1) { errprintf("Cannot bind to cache-control socket (%s)\n", strerror(errno)); return 1; } /* Linux obeys filesystem permissions on the socket file, so make it world-accessible */ if (chmod(ctlsockaddr.sun_path, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) == -1) { errprintf("Setting permissions on cache-control socket failed: %s\n", strerror(errno)); } /* Load the RRD definitions */ load_rrddefs(); /* If we are passing data to an external processor, create the pipe to it */ setup_extprocessor(processor); running = 1; while (running) { char *eoln, *restofmsg = NULL; char *metadata[MAX_META+1]; int metacount; char *p; char *hostname = NULL, *testname = NULL, *sender = NULL, *classname = NULL, *pagepaths = NULL; xymonrrd_t *ldef = NULL; time_t tstamp; int childstat; ssize_t n; char ctlbuf[PATH_MAX]; int gotcachectlmessage; time_t now; /* See if we have any cache-control messages pending */ do { n = recv(ctlsocket, ctlbuf, sizeof(ctlbuf), 0); gotcachectlmessage = (n > 0); if (gotcachectlmessage) { /* We have a control message */ char *bol, *eol; ctlbuf[n] = '\0'; bol = ctlbuf; do { eol = strchr(bol, '\n'); if (eol) *eol = '\0'; rrdcacheflushhost(bol); if (eol) { bol = eol+1; } else bol = NULL; } while (bol && *bol); } } while (gotcachectlmessage); /* Get next message */ msg = get_xymond_message(C_LAST, argv[0], &seq, NULL); if (msg == NULL) { running = 0; continue; } now = gettimer(); if (reloadtime < now) { /* Reload configuration files */ load_hostnames(xgetenv("HOSTSCFG"), NULL, get_fqdn()); load_client_config(NULL); reloadtime = now + 600; } /* Split the message in the first line (with meta-data), and the rest */ eoln = strchr(msg, '\n'); if (eoln) { *eoln = '\0'; restofmsg = eoln+1; } /* Parse the meta-data */ metacount = 0; memset(&metadata, 0, sizeof(metadata)); p = gettok(msg, "|"); while (p && (metacount < MAX_META)) { metadata[metacount++] = p; p = gettok(NULL, "|"); } metadata[metacount] = NULL; if ((metacount >= 14) && (strncmp(metadata[0], "@@status", 8) == 0)) { /* * @@status|timestamp|sender|origin|hostname|testname|expiretime|color|testflags|\ * prevcolor|changetime|ackexpiretime|ackmessage|disableexpiretime|disablemessage|\ * clienttstamp|flapping|classname|pagepaths */ int color = parse_color(metadata[7]); switch (color) { case COL_GREEN: case COL_YELLOW: case COL_RED: case COL_BLUE: /* Blue is OK, because it only arrives here when an update is sent */ tstamp = atoi(metadata[1]); sender = metadata[2]; hostname = metadata[4]; testname = metadata[5]; classname = (metadata[17] ? metadata[17] : ""); pagepaths = (metadata[18] ? metadata[18] : ""); ldef = find_xymon_rrd(testname, metadata[8]); update_rrd(hostname, testname, restofmsg, tstamp, sender, ldef, classname, pagepaths); break; default: /* Ignore reports with purple, blue or clear - they have no data we want. */ break; } } else if ((metacount > 5) && (strncmp(metadata[0], "@@data", 6) == 0)) { /* @@data|timestamp|sender|origin|hostname|testname|classname|pagepaths */ tstamp = atoi(metadata[1]); sender = metadata[2]; hostname = metadata[4]; testname = metadata[5]; classname = (metadata[6] ? metadata[6] : ""); pagepaths = (metadata[7] ? metadata[7] : ""); ldef = find_xymon_rrd(testname, ""); update_rrd(hostname, testname, restofmsg, tstamp, sender, ldef, classname, pagepaths); } else if (strncmp(metadata[0], "@@shutdown", 10) == 0) { running = 0; continue; } else if (strncmp(metadata[0], "@@idle", 6) == 0) { /* Ignored */ continue; } else if (strncmp(metadata[0], "@@logrotate", 11) == 0) { char *fn = xgetenv("XYMONCHANNEL_LOGFILENAME"); if (fn && strlen(fn)) { freopen(fn, "a", stdout); freopen(fn, "a", stderr); } continue; } else if (strncmp(metadata[0], "@@reload", 8) == 0) { reloadtime = 0; } else if ((metacount > 3) && (strncmp(metadata[0], "@@drophost", 10) == 0)) { char hostdir[PATH_MAX]; hostname = metadata[3]; MEMDEFINE(hostdir); sprintf(hostdir, "%s/%s", rrddir, hostname); dropdirectory(hostdir, 1); MEMUNDEFINE(hostdir); } else if ((metacount > 4) && (strncmp(metadata[0], "@@droptest", 10) == 0)) { /* * Not implemented. Mappings of testnames -> rrd files is * too complex, so on the rare occasion that a single test * is deleted, they will have to delete the rrd files themselves. */ } else if ((metacount > 4) && (strncmp(metadata[0], "@@renamehost", 12) == 0)) { char oldhostdir[PATH_MAX]; char newhostdir[PATH_MAX]; char *newhostname; MEMDEFINE(oldhostdir); MEMDEFINE(newhostdir); hostname = metadata[3]; newhostname = metadata[4]; sprintf(oldhostdir, "%s/%s", rrddir, hostname); sprintf(newhostdir, "%s/%s", rrddir, newhostname); rename(oldhostdir, newhostdir); if (net_worker_locatorbased()) locator_rename_host(hostname, newhostname, ST_RRD); MEMUNDEFINE(newhostdir); MEMUNDEFINE(oldhostdir); } else if ((metacount > 5) && (strncmp(metadata[0], "@@renametest", 12) == 0)) { /* Not implemented. See "droptest". */ } /* * We fork a subprocess when processing drophost requests. * Pickup any finished child processes to avoid zombies */ while (wait3(&childstat, WNOHANG, NULL) > 0) ; } /* Flush all cached updates to disk */ errprintf("Shutting down, flushing cached updates to disk\n"); rrdcacheflushall(); errprintf("Cache flush completed\n"); /* Close the external processor */ shutdown_extprocessor(); /* Close the control socket */ close(ctlsocket); unlink(ctlsockaddr.sun_path); return 0; }
int main(int argc, char *argv[]) { char *msg; int running; int argi, seq; int recentperiod = 3600; int maxrecentcount = 5; int logdirfull = 0; int minlogspace = 5; struct sigaction sa; /* Handle program options. */ for (argi = 1; (argi < argc); argi++) { if (argnmatch(argv[argi], "--logdir=")) { clientlogdir = strchr(argv[argi], '=')+1; } else if (argnmatch(argv[argi], "--recent-period=")) { char *p = strchr(argv[argi], '='); recentperiod = 60*atoi(p+1); } else if (argnmatch(argv[argi], "--recent-count=")) { char *p = strchr(argv[argi], '='); maxrecentcount = atoi(p+1); } else if (argnmatch(argv[argi], "--minimum-free=")) { minlogspace = atoi(strchr(argv[argi], '=')+1); } else if (strcmp(argv[argi], "--debug") == 0) { /* * A global "debug" variable is available. If * it is set, then "dbgprintf()" outputs debug messages. */ debug = 1; } else if (net_worker_option(argv[argi])) { /* Handled in the subroutine */ } } if (clientlogdir == NULL) clientlogdir = xgetenv("CLIENTLOGS"); if (clientlogdir == NULL) { clientlogdir = (char *)malloc(strlen(xgetenv("XYMONVAR")) + 10); sprintf(clientlogdir, "%s/hostdata", xgetenv("XYMONVAR")); } save_errbuf = 0; /* Do the network stuff if needed */ net_worker_run(ST_HOSTDATA, LOC_STICKY, update_locator_hostdata); setup_signalhandler("xymond_hostdata"); memset(&sa, 0, sizeof(sa)); sa.sa_handler = sig_handler; signal(SIGCHLD, SIG_IGN); sigaction(SIGHUP, &sa, NULL); signal(SIGPIPE, SIG_DFL); savetimes = xtreeNew(strcasecmp); running = 1; while (running) { char *eoln, *restofmsg, *p; char *metadata[MAX_META+1]; int metacount; msg = get_xymond_message(C_CLICHG, "xymond_hostdata", &seq, NULL); if (msg == NULL) { /* * get_xymond_message will return NULL if xymond_channel closes * the input pipe. We should shutdown when that happens. */ running = 0; continue; } if (nextfscheck < gettimer()) { logdirfull = (chkfreespace(clientlogdir, minlogspace, minlogspace) != 0); if (logdirfull) errprintf("Hostdata directory %s has less than %d%% free space - disabling save of data for 5 minutes\n", clientlogdir, minlogspace); nextfscheck = gettimer() + 300; } /* Split the message in the first line (with meta-data), and the rest */ eoln = strchr(msg, '\n'); if (eoln) { *eoln = '\0'; restofmsg = eoln+1; } else { restofmsg = ""; } metacount = 0; memset(&metadata, 0, sizeof(metadata)); p = gettok(msg, "|"); while (p && (metacount < MAX_META)) { metadata[metacount++] = p; p = gettok(NULL, "|"); } metadata[metacount] = NULL; if (strncmp(metadata[0], "@@clichg", 8) == 0) { xtreePos_t handle; savetimes_t *itm; int i, recentcount; time_t now = gettimer(); char hostdir[PATH_MAX]; char fn[PATH_MAX]; FILE *fd; /* metadata[3] is the hostname */ handle = xtreeFind(savetimes, metadata[3]); if (handle != xtreeEnd(savetimes)) { itm = (savetimes_t *)xtreeData(savetimes, handle); } else { itm = (savetimes_t *)calloc(1, sizeof(savetimes_t)); itm->hostname = strdup(metadata[3]); xtreeAdd(savetimes, itm->hostname, itm); } /* See how many times we've saved the hostdata recently (within the past 'recentperiod' seconds) */ for (i=0, recentcount=0; ((i < 12) && (itm->tstamp[i] > (now - recentperiod))); i++) recentcount++; /* If it's been saved less than 'maxrecentcount' times, then save it. Otherwise just drop it */ if (!logdirfull && (recentcount < maxrecentcount)) { int written, closestatus, ok = 1; for (i = 10; (i > 0); i--) itm->tstamp[i+1] = itm->tstamp[i]; itm->tstamp[0] = now; sprintf(hostdir, "%s/%s", clientlogdir, metadata[3]); mkdir(hostdir, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); sprintf(fn, "%s/%s", hostdir, metadata[4]); fd = fopen(fn, "w"); if (fd == NULL) { errprintf("Cannot create file %s: %s\n", fn, strerror(errno)); continue; } written = fwrite(restofmsg, 1, strlen(restofmsg), fd); if (written != strlen(restofmsg)) { errprintf("Cannot write hostdata file %s: %s\n", fn, strerror(errno)); closestatus = fclose(fd); /* Ignore any close errors */ ok = 0; } else { closestatus = fclose(fd); if (closestatus != 0) { errprintf("Cannot write hostdata file %s: %s\n", fn, strerror(errno)); ok = 0; } } if (!ok) remove(fn); } } /* * A "shutdown" message is sent when the master daemon * terminates. The child workers should shutdown also. */ else if (strncmp(metadata[0], "@@shutdown", 10) == 0) { running = 0; continue; } else if (strncmp(metadata[0], "@@idle", 6) == 0) { /* Ignored */ continue; } /* * A "logrotate" message is sent when the Xymon logs are * rotated. The child workers must re-open their logfiles, * typically stdin and stderr - the filename is always * provided in the XYMONCHANNEL_LOGFILENAME environment. */ else if (strncmp(metadata[0], "@@logrotate", 11) == 0) { char *fn = xgetenv("XYMONCHANNEL_LOGFILENAME"); if (fn && strlen(fn)) { reopen_file(fn, "a", stdout); reopen_file(fn, "a", stderr); } continue; } else if ((metacount > 3) && (strncmp(metadata[0], "@@drophost", 10) == 0)) { /* @@drophost|timestamp|sender|hostname */ char hostdir[PATH_MAX]; snprintf(hostdir, sizeof(hostdir), "%s/%s", clientlogdir, basename(metadata[3])); dropdirectory(hostdir, 1); } else if ((metacount > 4) && (strncmp(metadata[0], "@@renamehost", 12) == 0)) { /* @@renamehost|timestamp|sender|hostname|newhostname */ char oldhostdir[PATH_MAX], newhostdir[PATH_MAX]; snprintf(oldhostdir, sizeof(oldhostdir), "%s/%s", clientlogdir, basename(metadata[3])); snprintf(newhostdir, sizeof(newhostdir), "%s/%s", clientlogdir, basename(metadata[4])); rename(oldhostdir, newhostdir); if (net_worker_locatorbased()) locator_rename_host(metadata[3], metadata[4], ST_HOSTDATA); } else if (strncmp(metadata[0], "@@reload", 8) == 0) { /* Do nothing */ } } return 0; }