static void child_exec(int which) { pid_t pid = 0; tain_t now; tain_t when_ok; char **argv = deux[which].argv; tain_now(&now); tain_load(&when_ok, 1, 0); tain_plus(&when_ok, &deux[which].when, &when_ok); if(tain_less(&now, &when_ok)){ warn("pausing for respawn of ", argv[0], " ..."); sleep(1); } /* if option -x: ** not supervising, don't fork here, just exec into deux[1] */ if(opt_super ? 1 : (which == 0)){ while((pid = fork()) == -1){ warn("failure on fork() while starting ", argv[0]); sleep(2); } } /* child (or, if not opt_super, execing into deux[1]): */ if(pid == 0){ /* setup logpipe: */ fd_dupe(which, my_logpipe[which]); close(my_logpipe[0]); close(my_logpipe[1]); #if 0 if(which == 1){ /* redirect stderr to stdout for program: */ fd_dupe(2, 1); } #endif /* reset default signal handlers, unblock: */ sig_default(SIGTERM); sig_default(SIGCHLD); sig_default(SIGALRM); sig_default(SIGCONT); sig_default(SIGHUP); sig_default(SIGINT); sig_default(SIGQUIT); sig_default(SIGTSTP); sig_default(SIGUSR1); sig_default(SIGUSR2); sig_default(SIGPIPE); sigset_unblock(&my_sigset); /* do it: */ execvx(argv[0], argv, environ, NULL); /* uh oh: */ fatal_syserr("failure on exec of ", argv[0]); } /* parent: */ deux[which].pid = pid; tain_now(&deux[which].when); return; }
/* perpd_svdef_run() ** exec() a service: ** "which" is SUBSV_MAIN or SUBSV_LOG ** "target" is SVRUN_START or SVRUN_RESET ** ** side effects: ** global flag_failing set on fail of fork() ** child uses (but does not alter) global poll_sigset */ int perpd_svdef_run(struct svdef *svdef, int which, int target) { struct subsv *subsv = &svdef->svpair[which]; char *prog[7]; tain_t now, when_ok; tain_t towait = tain_INIT(0,0); pid_t pid; int wstat; char nbuf_reset[NFMT_SIZE]; int i; /* insanity checks: */ if((which == SUBSV_LOG) && !(svdef->bitflags & SVDEF_FLAG_HASLOG)){ log_debug("logging service requested but not enabled"); return 0; } if(subsv->pid > 0){ log_debug("perpd_svrun() requested for service still running"); return 0; } /* initialize (attempted) target, etc: */ switch(target){ case SVRUN_RESET: subsv->bitflags |= SUBSV_FLAG_ISRESET; break; default: subsv->bitflags &= ~SUBSV_FLAG_ISRESET; break; } wstat = subsv->wstat; subsv->pid = 0; subsv->wstat = 0; subsv->bitflags &= ~SUBSV_FLAG_FAILING; /* setup argv: */ prog[0] = (which == SUBSV_LOG) ? "./rc.log" : "./rc.main"; prog[1] = (target == SVRUN_START) ? "start" : "reset"; prog[2] = svdef->name; prog[3] = NULL; /* additional args if running "reset": */ if(target == SVRUN_RESET){ if(WIFEXITED(wstat)){ prog[3] = "exit"; prog[4] = nfmt_uint32(nbuf_reset, (uint32_t)WEXITSTATUS(wstat)); prog[5] = NULL; } else { int n = (WIFSIGNALED(wstat) ? WTERMSIG(wstat) : WSTOPSIG(wstat)); char *s = (char *)sysstr_signal(n); prog[3] = (WIFSIGNALED(wstat) ? "signal" : "stopped"); prog[4] = nfmt_uint32(nbuf_reset, (uint32_t)n); prog[5] = ((s != NULL) ? s : "SIGUNKNOWN"); prog[6] = NULL; } } /* timestamps and respawn governor: */ tain_now(&now); tain_assign(&when_ok, &subsv->when_ok); if((target == SVRUN_START) && tain_less(&now, &when_ok)){ log_warning("setting respawn governor on 'start' target of service ", svdef->name, " for ", prog[0]); tain_minus(&towait, &when_ok, &now); } /* fork/exec: */ if((pid = fork()) == -1){ subsv->pid = 0; subsv->bitflags |= SUBSV_FLAG_FAILING; perpd_trigger_fail(); warn_syserr("failure fork() for service ", svdef->name); return -1; } /* XXX, TODO: ** if child error before exec(), die() with a distinctive error code */ /* child: */ if(pid == 0){ /* nfmt buffer for environmental variables (reusable with newenv_set()): */ char nbuf_env[NFMT_SIZE]; /* run child in new process group: */ setsid(); /* cwd for runscripts is svdir: */ if(fchdir(svdef->fd_dir) == -1){ fatal_syserr("(in child for service ", svdef->name, "): failure fchdir() to service directory"); } /* setup logpipe: */ if(svdef->bitflags & SVDEF_FLAG_HASLOG){ if(which == SUBSV_MAIN){ /* set stdout to logpipe: */ close(1); if(dup2(svdef->logpipe[1], 1) != 1){ fatal_syserr("(in child for service ", svdef->name, "): failure dup2() on logpipe[1] to logging service"); } } if((which == SUBSV_LOG) && (target == SVRUN_START)){ /* set stdin to logpipe: ** (but not if this is a resetting log service) */ close(0); if(dup2(svdef->logpipe[0], 0) != 0){ fatal_syserr("(in child for service ", svdef->name, "): failure dup2() on logpipe[0] for logging service"); } } close(svdef->logpipe[0]); close(svdef->logpipe[1]); } /* close extraneous descriptors (shouldn't be any!): */ for(i = 3; i < 1024; ++i) close(i); /* set PERP_BASE in the environment: */ if(newenv_set("PERP_BASE", basedir) == -1){ fatal_syserr("(in child for service ", svdef->name, "): failure setting PERP_BASE environment for ", prog[0], " ", prog[1]); } /* set PERP_SVPID in the environment: */ if(target == SVRUN_RESET){ nfmt_uint64(nbuf_env, (uint64_t)subsv->pid_prev); }else{ nfmt_uint64(nbuf_env, (uint64_t)getpid()); } if(newenv_set("PERP_SVPID", nbuf_env) == -1){ fatal_syserr("(in child for service ", svdef->name, "): failure setting PERP_SVPID environment for ", prog[0], " ", prog[1]); } /* set PERP_SVSECS in the environment (reset target only): */ if(target == SVRUN_RESET){ nfmt_uint64(nbuf_env, tain_uptime(&now, &subsv->when)); if(newenv_set("PERP_SVSECS", nbuf_env) == -1){ fatal_syserr("(in child for service ", svdef->name, "): failure setting PERP_SVSECS environment for ", prog[0], " ", prog[1]); } } /* respawn governor: */ if((target == SVRUN_START) && !(tain_iszero(&towait))){ tain_pause(&towait, NULL); } /* clear signal handlers from child process: */ sig_uncatch(SIGCHLD); sig_uncatch(SIGHUP); sig_uncatch(SIGINT); sig_uncatch(SIGTERM); sig_uncatch(SIGPIPE); sigset_unblock(&poll_sigset); /* go forth my child: */ newenv_run(prog, environ); /* nuts, exec failed: */ fatal_syserr("(in child for service ", svdef->name, "): failure execve()"); } /* parent: */ subsv->pid = pid; /* set timestamps and respawn governor: */ tain_assign(&subsv->when, &now); if(target == SVRUN_START){ /* when_ok = now + 1sec + wait: */ tain_LOAD(&when_ok, 1, 0); tain_plus(&when_ok, &now, &when_ok); tain_plus(&when_ok, &when_ok, &towait); tain_assign(&subsv->when_ok, &when_ok); } return 0; }
void deepsleepuntil (tain_t const *deadline, tain_t *stamp) { iopause_fd x ; while (tain_less(stamp, deadline)) iopause_stamp(&x, 0, deadline, stamp) ; }
static void check (char const *name) { struct stat st ; unsigned int namelen ; unsigned int i = 0 ; if (name[0] == '.') return ; if (stat(name, &st) == -1) { strerr_warnwu2sys("stat ", name) ; retrydirlater() ; return ; } if (!S_ISDIR(st.st_mode)) return ; namelen = str_len(name) ; for (; i < n ; i++) if ((services[i].ino == st.st_ino) && (services[i].dev == st.st_dev)) break ; if (i < n) { if (services[i].flaglog && (services[i].p[0] < 0)) { /* See BLACK MAGIC above. */ services[i].p[0] = -2 ; return ; } } else { if (n >= max) { strerr_warnwu3x("start supervisor for ", name, ": too many services") ; return ; } else { struct stat su ; char tmp[namelen + 5] ; byte_copy(tmp, namelen, name) ; byte_copy(tmp + namelen, 5, "/log") ; if (stat(tmp, &su) < 0) if (errno == ENOENT) services[i].flaglog = 0 ; else { strerr_warnwu2sys("stat ", tmp) ; retrydirlater() ; return ; } else if (!S_ISDIR(su.st_mode)) services[i].flaglog = 0 ; else { if (pipecoe(services[i].p) < 0) { strerr_warnwu1sys("pipecoe") ; retrydirlater() ; return ; } services[i].flaglog = 1 ; } services[i].ino = st.st_ino ; services[i].dev = st.st_dev ; tain_copynow(&services[i].restartafter[0]) ; tain_copynow(&services[i].restartafter[1]) ; services[i].pid[0] = 0 ; services[i].pid[1] = 0 ; n++ ; } } services[i].flagactive = 1 ; if (services[i].flaglog && !services[i].pid[1]) { if (!tain_future(&services[i].restartafter[1])) { char tmp[namelen + 5] ; byte_copy(tmp, namelen, name) ; byte_copy(tmp + namelen, 5, "/log") ; trystart(i, tmp, 1) ; } else if (tain_less(&services[i].restartafter[1], &deadline)) deadline = services[i].restartafter[1] ; } if (!services[i].pid[0]) { if (!tain_future(&services[i].restartafter[0])) trystart(i, name, 0) ; else if (tain_less(&services[i].restartafter[0], &deadline)) deadline = services[i].restartafter[0] ; } }
static void retrydirlater (void) { tain_t a ; tain_addsec_g(&a, DIR_RETRY_TIMEOUT) ; if (tain_less(&a, &deadline)) deadline = a ; }
static void reap (void) { tain_t nextscan ; if (!wantreap) return ; wantreap = 0 ; tain_addsec_g(&nextscan, 1) ; for (;;) { int wstat ; int r = wait_nohang(&wstat) ; if (r < 0) if (errno != ECHILD) panic("wait_nohang") ; else break ; else if (!r) break ; else { register unsigned int i = 0 ; for (; i < n ; i++) { if (services[i].pid[0] == r) { services[i].pid[0] = 0 ; services[i].restartafter[0] = nextscan ; break ; } else if (services[i].pid[1] == r) { services[i].pid[1] = 0 ; services[i].restartafter[1] = nextscan ; break ; } } if (i == n) continue ; if (services[i].flagactive) { if (tain_less(&nextscan, &deadline)) deadline = nextscan ; } else { if (services[i].flaglog) { /* BLACK MAGIC: - we need to close the pipe early: * as soon as the writer exits so the logger can exit on EOF * or as soon as the logger exits so the writer can crash on EPIPE - but if the same service gets reactivated before the second supervise process exits, ouch: we've lost the pipe - so we can't reuse the same service even if it gets reactivated - so we're marking a dying service with a closed pipe - if the scanner sees a service with p[0] = -1 it won't flag it as active (and won't restart the dead supervise) - but if the service gets reactivated we want it to restart as soon as the 2nd supervise process dies - so the scanner marks such a process with p[0] = -2 - and the reaper triggers a scan when it finds a -2. */ if (services[i].p[0] >= 0) { fd_close(services[i].p[1]) ; services[i].p[1] = -1 ; fd_close(services[i].p[0]) ; services[i].p[0] = -1 ; } else if (services[i].p[0] == -2) wantscan = 1 ; } if (!services[i].pid[0] && (!services[i].flaglog || !services[i].pid[1])) services[i] = services[--n] ; } } } }
static void child_exec(int which) { pid_t pid = 0; tain_t now; tain_t when_ok; char **argv = deux[which].argv; tain_now(&now); tain_load(&when_ok, 1, 0); tain_plus(&when_ok, &deux[which].when, &when_ok); if(tain_less(&now, &when_ok)){ warn("pausing for restart of ", argv[0], " ..."); sleep(1); } /* if option -x: ** not supervising, don't fork here, just exec into deux[1] */ if(opt_super ? 1 : (which == 0)){ while((pid = fork()) == -1){ warn("failure on fork() while starting ", argv[0]); sleep(2); } } /* child (or, if not opt_super, execing into deux[1]): */ if(pid == 0){ int fd, fd_max; struct rlimit rlim; int i; /* setup PERP_BASE in environment: */ if(newenv_set("PERP_BASE", perp_base) == -1){ fatal_syserr("failure setting environment in child for ", argv[0]); } /* prepare for closing unused file descriptors: */ if(getrlimit(RLIMIT_NOFILE, &rlim) == -1){ fatal_syserr("failure getting file rlimit in child for ", argv[0]); } fd_max = (rlim.rlim_max == RLIM_INFINITY) ? 1024 : rlim.rlim_max; /* start fd 0,1,2 on /dev/null: */ if((fd = open("/dev/null", O_RDWR)) == -1){ fatal_syserr("failure opening /dev/null in child for ", argv[0]); } fd_dupe(0, fd); fd_dupe(1, fd); fd_dupe(2, fd); close(fd); /* setup logpipe: */ fd_dupe(which, my_logpipe[which]); if(which == 1){ /* perpd gets stderr redirected to stdout for logger: */ fd_dupe(2, 1); } /* close all other descriptors: */ for(i = 3; i < fd_max; ++i) close(i); /* set default umask: */ umask(0); /* reset default signal handlers, unblock: */ sig_default(SIGTERM); sig_default(SIGCHLD); sig_default(SIGALRM); sig_default(SIGCONT); sig_default(SIGHUP); sig_default(SIGINT); sig_default(SIGQUIT); sig_default(SIGTSTP); sig_default(SIGUSR1); sig_default(SIGUSR2); sig_default(SIGPIPE); sigset_unblock(&my_sigset); /* do it: */ newenv_run(argv, environ); /* uh oh: */ fatal_syserr("failure on exec of ", argv[0]); } /* parent: */ deux[which].pid = pid; tain_now(&deux[which].when); return; }