void net_worker_run(enum locator_servicetype_t svc, enum locator_sticky_t sticky, update_fn_t *updfunc) { locatorsvc = svc; if (listenipport) { char *p; struct in_addr dummy; if (!locatorid) locatorid = strdup(listenipport); p = strchr(locatorid, ':'); if (p == NULL) { errprintf("Locator ID must be IP:PORT matching the listener address\n"); exit(1); } *p = '\0'; if (inet_aton(locatorid, &dummy) == 0) { errprintf("Locator ID must be IP:PORT matching the listener address\n"); exit(1); } *p = ':'; } if (listenipport && locatorlocation) { int res; int delay = 10; /* Tell the world we're here */ while (locator_init(locatorlocation) != 0) { errprintf("Locator unavailable, waiting for it to be ready\n"); sleep(delay); if (delay < 240) delay *= 2; } locator_register_server(locatorid, svc, locatorweight, sticky, locatorextra); if (updfunc) (*updfunc)(locatorid); /* Launch the network listener and wait for incoming connections */ res = net_worker_listener(listenipport); /* * Return value is: * -1 : Error in setup. Abort. * 0 : New connection arrived, and this is now a forked worker process. Continue. * 1 : Listener terminates. Exit normally. */ if (res == -1) { errprintf("Listener setup failed, aborting\n"); locator_serverdown(locatorid, svc); exit(1); } else if (res == 1) { errprintf("xymond_listener listener terminated\n"); locator_serverdown(locatorid, svc); exit(0); } else { /* Worker process started. Return from here causes worker to start. */ } } else if (listenipport || locatorlocation || locatorid) { errprintf("Must specify all of --listen, --locator and --locatorid\n"); exit(1); } }
int main(int argc, char *argv[]) { char buf[1024]; int done = 0; char *res; if (argc < 2) { printf("Usage: %s IP:PORT\n", argv[0]); return 1; } if (locator_init(argv[1]) == -1) { printf("Locator ping failed\n"); return 1; } else { printf("Locator is available\n"); } while (!done) { char *p, *p1, *p2, *p3, *p4, *p5, *p6, *p7; char *extras; printf("Commands:\n"); printf(" r(egister) s servername type weight sticky\n"); printf(" r(egister) h servername type hostname\n"); printf(" d(own) servername type\n"); printf(" u(p) servername type\n"); printf(" f(orget) servername type\n"); printf(" q(uery) hostname type\n"); printf(" x(query) hostname type\n"); printf(" p(ing)\n"); printf(" s(ave state)\n"); printf(">"); fflush(stdout); done = (fgets(buf, sizeof(buf), stdin) == NULL); if (done) continue; p = strchr(buf, '\n'); if (p) *p = '\0'; p1 = p2 = p3 = p4 = p5 = p6 = p7 = NULL; p1 = strtok(buf, " "); if (p1) p2 = strtok(NULL, " "); if (p2) p3 = strtok(NULL, " "); if (p3) p4 = strtok(NULL, " "); if (p4) p5 = strtok(NULL, " "); if (p5) p6 = strtok(NULL, " "); if (p6) p7 = strtok(NULL, "\r\n"); switch (*p1) { case 'R': case 'r': if (*p2 == 's') { enum locator_servicetype_t svc; enum locator_sticky_t sticky; int weight; svc = get_servicetype(p4); weight = (p5 ? atoi(p5) : 1); sticky = ((p6 && (atoi(p6) == 1)) ? LOC_STICKY : LOC_ROAMING); printf("%s\n", locator_register_server(p3, svc, weight, sticky, p7) ? "Failed" : "OK"); } else if (*p2 == 'h') { printf("%s\n", locator_register_host(p5, get_servicetype(p4), p3) ? "Failed" : "OK"); } break; case 'D': case 'd': printf("%s\n", locator_serverdown(p2, get_servicetype(p3)) ? "Failed" : "OK"); break; case 'U': case 'u': printf("%s\n", locator_serverup(p2, get_servicetype(p3)) ? "Failed" : "OK"); break; case 'F': case 'f': printf("%s\n", locator_serverforget(p2, get_servicetype(p3)) ? "Failed" : "OK"); break; case 'Q': case 'q': case 'X': case 'x': extras = NULL; res = locator_query(p2, get_servicetype(p3), (*p1 == 'x') ? &extras : NULL); if (res) { printf("Result: %s\n", res); if (extras) printf(" Extras gave: %s\n", extras); } else { printf("Failed\n"); } break; case 'P': case 'p': p = locator_cmd("p"); if (p == NULL) printf("Failed\n"); else printf("OK: %s\n", p); break; case 'S': case 's': p = locator_cmd("@"); if (p == NULL) printf("Failed\n"); else printf("OK: %s\n", p); break; } } return 0; }
int main(int argc, char *argv[]) { int daemonize = 0; char *pidfile = NULL; char *envarea = NULL; int cnid = -1; pcre *msgfilter = NULL; pcre *stdfilter = NULL; int argi; struct sigaction sa; RbtIterator handle; /* Dont save the error buffer */ save_errbuf = 0; /* Create the peer container */ peers = rbtNew(name_compare); for (argi=1; (argi < argc); argi++) { if (argnmatch(argv[argi], "--debug")) { debug = 1; } else if (argnmatch(argv[argi], "--channel=")) { char *cn = strchr(argv[argi], '=') + 1; for (cnid = C_STATUS; (channelnames[cnid] && strcmp(channelnames[cnid], cn)); cnid++) ; if (channelnames[cnid] == NULL) cnid = -1; } else if (argnmatch(argv[argi], "--daemon")) { daemonize = 1; } else if (argnmatch(argv[argi], "--no-daemon")) { daemonize = 0; } else if (argnmatch(argv[argi], "--pidfile=")) { char *p = strchr(argv[argi], '='); pidfile = strdup(p+1); } else if (argnmatch(argv[argi], "--log=")) { char *p = strchr(argv[argi], '='); logfn = strdup(p+1); } else if (argnmatch(argv[argi], "--env=")) { char *p = strchr(argv[argi], '='); loadenv(p+1, envarea); } else if (argnmatch(argv[argi], "--area=")) { char *p = strchr(argv[argi], '='); envarea = strdup(p+1); } else if (argnmatch(argv[argi], "--locator=")) { char *p = strchr(argv[argi], '='); locator_init(p+1); locatorbased = 1; } else if (argnmatch(argv[argi], "--service=")) { char *p = strchr(argv[argi], '='); locatorservice = get_servicetype(p+1); } else if (argnmatch(argv[argi], "--filter=")) { char *p = strchr(argv[argi], '='); msgfilter = compileregex(p+1); if (!msgfilter) { errprintf("Invalid filter (bad expression): %s\n", p+1); } else { stdfilter = compileregex("^@@(logrotate|shutdown|drophost|droptest|renamehost|renametest)"); } } else { char *childcmd; char **childargs; int i = 0; childcmd = argv[argi]; childargs = (char **) calloc((1 + argc - argi), sizeof(char *)); while (argi < argc) { childargs[i++] = argv[argi++]; } addlocalpeer(childcmd, childargs); } } /* Sanity checks */ if (cnid == -1) { errprintf("No channel/unknown channel specified\n"); return 1; } if (locatorbased && (locatorservice == ST_MAX)) { errprintf("Must specify --service when using locator\n"); return 1; } if (!locatorbased && (rbtBegin(peers) == rbtEnd(peers))) { errprintf("Must specify command for local worker\n"); return 1; } /* Do cache responses to avoid doing too many lookups */ if (locatorbased) locator_prepcache(locatorservice, 0); /* Go daemon */ if (daemonize) { /* Become a daemon */ pid_t daemonpid = fork(); if (daemonpid < 0) { /* Fork failed */ errprintf("Could not fork child\n"); exit(1); } else if (daemonpid > 0) { /* Parent creates PID file and exits */ FILE *fd = NULL; if (pidfile) fd = fopen(pidfile, "w"); if (fd) { fprintf(fd, "%d\n", (int)daemonpid); fclose(fd); } exit(0); } /* Child (daemon) continues here */ setsid(); } /* Catch signals */ setup_signalhandler("xymond_channel"); memset(&sa, 0, sizeof(sa)); sa.sa_handler = sig_handler; sigaction(SIGINT, &sa, NULL); sigaction(SIGTERM, &sa, NULL); sigaction(SIGCHLD, &sa, NULL); signal(SIGALRM, SIG_IGN); /* Switch stdout/stderr to the logfile, if one was specified */ freopen("/dev/null", "r", stdin); /* xymond_channel's stdin is not used */ if (logfn) { freopen(logfn, "a", stdout); freopen(logfn, "a", stderr); } /* Attach to the channel */ channel = setup_channel(cnid, CHAN_CLIENT); if (channel == NULL) { errprintf("Channel not available\n"); running = 0; } while (running) { /* * Wait for GOCLIENT to go up. * * Note that we use IPC_NOWAIT if there are messages in the * queue, because then we just want to pick up a message if * there is one, and if not we want to continue pushing the * queued data to the worker. */ struct sembuf s; int n; s.sem_num = GOCLIENT; s.sem_op = -1; s.sem_flg = ((pendingcount > 0) ? IPC_NOWAIT : 0); n = semop(channel->semid, &s, 1); if (n == 0) { /* * GOCLIENT went high, and so we got alerted about a new * message arriving. Copy the message to our own buffer queue. */ char *inbuf = NULL; if (!msgfilter || matchregex(channel->channelbuf, msgfilter) || matchregex(channel->channelbuf, stdfilter)) { inbuf = strdup(channel->channelbuf); } /* * Now we have safely stored the new message in our buffer. * Wait until any other clients on the same channel have picked up * this message (GOCLIENT reaches 0). * * We wrap this into an alarm handler, because it can occasionally * fail, causing the whole system to lock up. We dont want that.... * We'll set the alarm to trigger after 1 second. Experience shows * that we'll either succeed in a few milliseconds, or fail completely * and wait the full alarm-timer duration. */ gotalarm = 0; signal(SIGALRM, sig_handler); alarm(2); do { s.sem_num = GOCLIENT; s.sem_op = 0; s.sem_flg = 0; n = semop(channel->semid, &s, 1); } while ((n == -1) && (errno == EAGAIN) && running && (!gotalarm)); signal(SIGALRM, SIG_IGN); if (gotalarm) { errprintf("Gave up waiting for GOCLIENT to go low.\n"); } /* * Let master know we got it by downing BOARDBUSY. * This should not block, since BOARDBUSY is upped * by the master just before he ups GOCLIENT. */ do { s.sem_num = BOARDBUSY; s.sem_op = -1; s.sem_flg = IPC_NOWAIT; n = semop(channel->semid, &s, 1); } while ((n == -1) && (errno == EINTR)); if (n == -1) { errprintf("Tried to down BOARDBUSY: %s\n", strerror(errno)); } if (inbuf) { /* * See if they want us to rotate logs. We pass this on to * the worker module as well, but must handle our own logfile. */ if (strncmp(inbuf, "@@logrotate", 11) == 0) { freopen(logfn, "a", stdout); freopen(logfn, "a", stderr); } /* * Put the new message on our outbound queue. */ if (addmessage(inbuf) != 0) { /* Failed to queue message, free the buffer */ xfree(inbuf); } } } else { if (errno != EAGAIN) { dbgprintf("Semaphore wait aborted: %s\n", strerror(errno)); continue; } } /* * We've picked up messages from the master. Now we * must push them to the worker process. Since there * is no way to hang off both a semaphore and select(), * we'll just push as much data as possible into the * pipe. If we get to a point where we would block, * then wait a teeny bit of time and restart the * whole loop with checking for new messages from the * master etc. * * In theory, this could become an almost busy-wait loop. * In practice, however, the queue will be empty most * of the time because we'll just shove the data to the * worker child. */ for (handle = rbtBegin(peers); (handle != rbtEnd(peers)); handle = rbtNext(peers, handle)) { int canwrite = 1, hasfailed = 0; xymon_peer_t *pwalk; time_t msgtimeout = gettimer() - MSGTIMEOUT; int flushcount = 0; pwalk = (xymon_peer_t *) gettreeitem(peers, handle); if (pwalk->msghead == NULL) continue; /* Ignore peers with nothing queued */ switch (pwalk->peerstatus) { case P_UP: canwrite = 1; break; case P_DOWN: openconnection(pwalk); canwrite = (pwalk->peerstatus == P_UP); break; case P_FAILED: canwrite = 0; break; } /* See if we have stale messages queued */ while (pwalk->msghead && (pwalk->msghead->tstamp < msgtimeout)) { flushmessage(pwalk); flushcount++; } if (flushcount) { errprintf("Flushed %d stale messages for %s:%d\n", flushcount, inet_ntoa(pwalk->peeraddr.sin_addr), ntohs(pwalk->peeraddr.sin_port)); } while (pwalk->msghead && canwrite) { fd_set fdwrite; struct timeval tmo; /* Check that this peer is ready for writing. */ FD_ZERO(&fdwrite); FD_SET(pwalk->peersocket, &fdwrite); tmo.tv_sec = 0; tmo.tv_usec = 2000; n = select(pwalk->peersocket+1, NULL, &fdwrite, NULL, &tmo); if (n == -1) { errprintf("select() failed: %s\n", strerror(errno)); canwrite = 0; hasfailed = 1; continue; } else if ((n == 0) || (!FD_ISSET(pwalk->peersocket, &fdwrite))) { canwrite = 0; continue; } n = write(pwalk->peersocket, pwalk->msghead->bufp, pwalk->msghead->buflen); if (n >= 0) { pwalk->msghead->bufp += n; pwalk->msghead->buflen -= n; if (pwalk->msghead->buflen == 0) flushmessage(pwalk); } else if (errno == EAGAIN) { /* * Write would block ... stop for now. */ canwrite = 0; } else { hasfailed = 1; } if (hasfailed) { /* Write failed, or message grew stale */ errprintf("Peer at %s:%d failed: %s\n", inet_ntoa(pwalk->peeraddr.sin_addr), ntohs(pwalk->peeraddr.sin_port), strerror(errno)); canwrite = 0; shutdownconnection(pwalk); if (pwalk->peertype == P_NET) locator_serverdown(pwalk->peername, locatorservice); pwalk->peerstatus = P_FAILED; } } } } /* Detach from channels */ close_channel(channel, CHAN_CLIENT); /* Close peer connections */ for (handle = rbtBegin(peers); (handle != rbtEnd(peers)); handle = rbtNext(peers, handle)) { xymon_peer_t *pwalk = (xymon_peer_t *) gettreeitem(peers, handle); shutdownconnection(pwalk); } /* Remove the PID file */ if (pidfile) unlink(pidfile); return 0; }