int main(int argc,char **argv) { struct stat st; dir = argv[1]; if (!dir || argv[2]) strerr_die1x(100,"supervise: usage: supervise dir"); if (pipe(selfpipe) == -1) strerr_die4sys(111,FATAL,"unable to create pipe for ",dir,": "); coe(selfpipe[0]); coe(selfpipe[1]); ndelay_on(selfpipe[0]); ndelay_on(selfpipe[1]); sig_block(sig_child); sig_catch(sig_child,trigger); if (chdir(dir) == -1) strerr_die4sys(111,FATAL,"unable to chdir to ",dir,": "); if (stat("down",&st) != -1) flagwantup = 0; else if (errno != error_noent) strerr_die4sys(111,FATAL,"unable to stat ",dir,"/down: "); mkdir("supervise",0700); fdlock = open_append("supervise/lock"); if ((fdlock == -1) || (lock_exnb(fdlock) == -1)) strerr_die4sys(111,FATAL,"unable to acquire ",dir,"/supervise/lock: "); coe(fdlock); fifo_make("supervise/control",0600); fdcontrol = open_read("supervise/control"); if (fdcontrol == -1) strerr_die4sys(111,FATAL,"unable to read ",dir,"/supervise/control: "); coe(fdcontrol); ndelay_on(fdcontrol); /* shouldn't be necessary */ fdcontrolwrite = open_write("supervise/control"); if (fdcontrolwrite == -1) strerr_die4sys(111,FATAL,"unable to write ",dir,"/supervise/control: "); coe(fdcontrolwrite); pidchange(); announce(); fifo_make("supervise/ok",0600); fdok = open_read("supervise/ok"); if (fdok == -1) strerr_die4sys(111,FATAL,"unable to read ",dir,"/supervise/ok: "); coe(fdok); if (!flagwant || flagwantup) trystart(); doit(); announce(); _exit(0); }
void trystart(void) { int f; switch(f = fork()) { case -1: strerr_warn4(WARNING,"unable to fork for ",dir,", sleeping 60 seconds: ",&strerr_sys); deepsleep(60); trigger(); return; case 0: sig_uncatch(sig_child); sig_unblock(sig_child); execve(*run,run,environ); strerr_die4sys(111,FATAL,"unable to start ",dir,"/run: "); } flagpaused = 0; pid = f; pidchange(); announce(); deepsleep(1); }
static void trystart() { int f; switch(f = fork()) { case -1: write_log(fdlogwf, WARNING, "unable to fork for ", service, ", sleeping 60 seconds\n"); deepsleep(60); trigger(); return; case 0: sig_uncatch(sig_child); sig_unblock(sig_child); execvp(cmd[0], cmdp); write_log(fdlogwf, FATAL, "unable to start ", cmd[0], "\n"); _exit(1); } flagpaused = 0; pid = f; pidchange(); announce(); deepsleep(1); }
void doit(void) { iopause_fd x[2]; struct taia deadline; struct taia stamp; int wstat; int r; char ch; announce(); for (;;) { if (flagexit && !pid) return; sig_unblock(sig_child); x[0].fd = selfpipe[0]; x[0].events = IOPAUSE_READ; x[1].fd = fdcontrol; x[1].events = IOPAUSE_READ; taia_now(&stamp); taia_uint(&deadline,3600); taia_add(&deadline,&stamp,&deadline); iopause(x,2,&deadline,&stamp); sig_block(sig_child); while (read(selfpipe[0],&ch,1) == 1) ; for (;;) { r = wait_nohang(&wstat); if (!r) break; if ((r == -1) && (errno != error_intr)) break; if (r == pid) { pid = 0; pidchange(); announce(); if (flagexit) return; if (flagwant && flagwantup) trystart(); break; } } if (read(fdcontrol,&ch,1) == 1) switch(ch) { case 'd': flagwant = 1; flagwantup = 0; if (pid) { kill(pid,SIGTERM); kill(pid,SIGCONT); flagpaused = 0; } announce(); break; case 'u': flagwant = 1; flagwantup = 1; announce(); if (!pid) trystart(); break; case 'o': flagwant = 0; announce(); if (!pid) trystart(); break; case 'a': if (pid) kill(pid,SIGALRM); break; case 'h': if (pid) kill(pid,SIGHUP); break; case 'k': if (pid) kill(pid,SIGKILL); break; case 't': if (pid) kill(pid,SIGTERM); break; case 'i': if (pid) kill(pid,SIGINT); break; case 'p': flagpaused = 1; announce(); if (pid) kill(pid,SIGSTOP); break; case 'c': flagpaused = 0; announce(); if (pid) kill(pid,SIGCONT); break; case 'x': flagexit = 1; announce(); break; } } }
int main(int argc, char **argv) { struct sigaction sa; if (parse_argv(argc, argv) < 0) _exit(100); snprintf(status_files[0], 1024, "%s/lock", status_dir); snprintf(status_files[1], 1024, "%s/control", status_dir); snprintf(status_files[2], 1024, "%s/ok", status_dir); snprintf(status_files[3], 1024, "%s/status", status_dir); snprintf(status_files[4], 1024, "%s/status.new", status_dir); snprintf(status_files[5], 1024, "%s/supervise.log", status_dir); snprintf(status_files[6], 1024, "%s/supervise.log.wf", status_dir); sa.sa_handler = SIG_IGN; sigemptyset(&sa.sa_mask); if (sigaction(SIGHUP, &sa, NULL) < 0) { printf("unable to ignore SIGHUP for %s\n", service); _exit(110); } if (mkdir(status_dir, 0700) < 0 && errno != EEXIST) { printf("unable to create dir: %s\n", status_dir); _exit(110); } fdlog = open_append(status_files[5]); if (fdlog == -1) { printf("unable to open %s%s", status_dir, "/supervise.log"); _exit(111); } coe(fdlog); fdlogwf = open_append(status_files[6]); if (fdlogwf == -1) { printf("unable to open %s%s", status_dir, "/supervise.log.wf"); _exit(1); } coe(fdlogwf); if (daemon(1, 0) < 0) { printf("failed to daemonize supervise!\n"); _exit(111); } if (pipe(selfpipe) == -1) { write_log(fdlogwf, FATAL, "unable to create pipe for ", service, "\n"); _exit(111); } coe(selfpipe[0]); coe(selfpipe[1]); ndelay_on(selfpipe[0]); ndelay_on(selfpipe[1]); sig_block(sig_child); sig_catch(sig_child, trigger); sig_block(sig_alarm); sig_catch(sig_alarm, timer_handler); sig_unblock(sig_alarm); fdlock = open_append(status_files[0]); if ((fdlock == -1) || (lock_exnb(fdlock) == -1)) { write_log(fdlogwf, FATAL, "Unable to acquier ", status_dir, "/lock\n"); _exit(111); } coe(fdlock); fifo_make(status_files[1], 0600); fdcontrol = open_read(status_files[1]); if (fdcontrol == -1) { write_log(fdlogwf, FATAL, "unable to read ", status_dir, "/control\n"); _exit(1); } coe(fdcontrol); ndelay_on(fdcontrol); fdcontrolwrite = open_write(status_files[1]); if (fdcontrolwrite == -1) { write_log(fdlogwf, FATAL, "unable to write ", status_dir, "/control\n"); _exit(1); } coe(fdcontrolwrite); fifo_make(status_files[2], 0600); fdok = open_read(status_files[2]); if (fdok == -1) { write_log(fdlogwf, FATAL, "unable to read ", status_dir, "/ok\n"); _exit(1); } coe(fdok); if (!restart_sh[0]) { parse_conf(); } pidchange(); announce(); if (!flagwant || flagwantup) trystart(); doit(); announce(); _exit(0); }
static void doit() { iopause_fd x[2]; struct taia deadline; struct taia stamp; int wstat; long int time_cmp = 0; int r; char ch; char warn_message[2048]; int coredumped = 0; char restart_cmd[2048]; announce(); x[0].fd = selfpipe[0]; x[0].events = IOPAUSE_READ; x[1].fd = fdcontrol; x[1].events = IOPAUSE_READ; while (1) { if (flagexit && !pid) break; taia_now(&stamp); taia_uint(&deadline, 3600); taia_add(&deadline, &stamp, &deadline); sig_unblock(sig_child); iopause(x, 2, &deadline, &stamp); sig_block(sig_child); while (read(selfpipe[0], &ch, 1) == 1); while (1) { //waitpid(-1,&wstat,WNOHANG);WNOHANG : return immediately if no child has exited r = wait_nohang(&wstat); //r==0 if one or more child(ren) exit(s) but have not yet changed state if (!r) break; //r == -1 && errno == error_intr means waitpid is interrupted by a signal, we should call waitpid again. //here is not necessary cause we call waitpid with a WNOHANG argument. if (r == -1 && errno != error_intr) break; if (r == pid) { pid = 0; pidchange(); announce(); time_now = time((time_t *)0); if(time_old) { time_cmp = time_now - time_old; if(time_cmp >= time_alarm) //cmp { num = 0; } time_old = time_now; } else { time_cmp = 0; time_old = time_now; } if (0 != restart_sh[0]) { if (num == INT_MAX) num = 0; num++; if (snprintf(restart_cmd, sizeof(restart_cmd), "%s %d", restart_sh, num) > 1) { system(restart_cmd); write_log(fdlog, NOTICE, "restart_cmd: ", restart_cmd, " is called.\n"); } } else { if (WCOREDUMP(wstat)) { have_coredumped++; coredumped = 1; } bzero(warn_message, 2048); create_warn_message(warn_message, coredumped); write_log(fdlog, NOTICE, "service exited", coredumped ? " with coredump" : "", "!\n"); coredumped = 0; if (!closealarm && alarm_interval > 0 && have_tried++ == 0) { alarm(alarm_interval); do_alarm(warn_message); } if (flagexit || (alarm_interval > 0 && have_tried > max_tries && max_tries > 0) || (alarm_interval > 0 && have_coredumped > max_tries_if_coredumped && max_tries_if_coredumped > 0)) { write_log(fdlog, NOTICE, "supervise refused to restart ", service, " any more and exited itself!\n"); alarm(0); return; } } if (flagwant && flagwantup) { write_log(fdlog, NOTICE, "supervise is trying to restart ", service, "...\n"); trystart(); } break; } } if (read(fdcontrol, &ch, 1) != 1) continue; switch(ch) { // -s: Silent. Do not alarm any more. case 's': closealarm = 1; announce(); break; case 'n': closealarm = 0; announce(); break; case 'r': parse_conf(); break; // -d: Down. If the service is running, send it a TERM signal and then a CONT signal. // After it stops, do not restart it. case 'd': flagwant = 1; flagwantup = 0; if (pid) { kill(pid, SIGTERM); kill(pid, SIGCONT); flagpaused = 0; } announce(); break; // -u: Up. If the service is not running, start it. If the service stops, restart it. case 'u': flagwant = 1; flagwantup = 1; announce(); if (!pid) trystart(); break; // -o: Once. If the service is not running, start it. Do not restart it if it stops. case 'o': flagwant = 0; announce(); if (!pid) trystart(); break; // -a: Alarm. Send the service an ALRM signal. case 'a': if (pid) kill(pid, SIGALRM); break; // -h: Hangup. Send the service a HUP signal. case 'h': if (pid) kill(pid, SIGHUP); break; // -k: Kill. Send the service a KILL signal. case 'k': if (pid) kill(pid, SIGKILL); break; //* -t: Terminate. Send the service a TERM signal. case 't': if (pid) kill(pid, SIGTERM); break; // -i: Interrupt. Send the service an INT signal. case 'i': if (pid) kill(pid, SIGINT); break; // -p: Pause. Send the service a STOP signal. case 'p': flagpaused = 1; announce(); if (pid) kill(pid, SIGSTOP); break; // -c: Continue. Send the service a CONT signal. case 'c': flagpaused = 0; announce(); if (pid) kill(pid, SIGCONT); break; // -x: Exit. supervise will exit as soon as the service is down. If you use this option on a //stable system, you're doing something wrong; supervise is designed to run forever. case 'x': flagexit = 1; announce(); break; } //end switch } //end while }