Beispiel #1
0
int main(int argc,char **argv)
{
  struct stat st;

  dir = argv[1];
  if (!dir || argv[2])
    strerr_die1x(100,"supervise: usage: supervise dir");

  if (pipe(selfpipe) == -1)
    strerr_die4sys(111,FATAL,"unable to create pipe for ",dir,": ");
  coe(selfpipe[0]);
  coe(selfpipe[1]);
  ndelay_on(selfpipe[0]);
  ndelay_on(selfpipe[1]);

  sig_block(sig_child);
  sig_catch(sig_child,trigger);

  if (chdir(dir) == -1)
    strerr_die4sys(111,FATAL,"unable to chdir to ",dir,": ");

  if (stat("down",&st) != -1)
    flagwantup = 0;
  else
    if (errno != error_noent)
      strerr_die4sys(111,FATAL,"unable to stat ",dir,"/down: ");

  mkdir("supervise",0700);
  fdlock = open_append("supervise/lock");
  if ((fdlock == -1) || (lock_exnb(fdlock) == -1))
    strerr_die4sys(111,FATAL,"unable to acquire ",dir,"/supervise/lock: ");
  coe(fdlock);

  fifo_make("supervise/control",0600);
  fdcontrol = open_read("supervise/control");
  if (fdcontrol == -1)
    strerr_die4sys(111,FATAL,"unable to read ",dir,"/supervise/control: ");
  coe(fdcontrol);
  ndelay_on(fdcontrol); /* shouldn't be necessary */
  fdcontrolwrite = open_write("supervise/control");
  if (fdcontrolwrite == -1)
    strerr_die4sys(111,FATAL,"unable to write ",dir,"/supervise/control: ");
  coe(fdcontrolwrite);

  pidchange();
  announce();

  fifo_make("supervise/ok",0600);
  fdok = open_read("supervise/ok");
  if (fdok == -1)
    strerr_die4sys(111,FATAL,"unable to read ",dir,"/supervise/ok: ");
  coe(fdok);

  if (!flagwant || flagwantup) trystart();

  doit();
  announce();
  _exit(0);
}
Beispiel #2
0
void trystart(void)
{
  int f;

  switch(f = fork()) {
    case -1:
      strerr_warn4(WARNING,"unable to fork for ",dir,", sleeping 60 seconds: ",&strerr_sys);
      deepsleep(60);
      trigger();
      return;
    case 0:
      sig_uncatch(sig_child);
      sig_unblock(sig_child);
      execve(*run,run,environ);
      strerr_die4sys(111,FATAL,"unable to start ",dir,"/run: ");
  }
  flagpaused = 0;
  pid = f;
  pidchange();
  announce();
  deepsleep(1);
}
Beispiel #3
0
static void trystart()
{
	int f;
	switch(f = fork())
	{
		case -1:
			write_log(fdlogwf, WARNING, "unable to fork for ", service, ", sleeping 60 seconds\n");
			deepsleep(60);
			trigger();
			return;
		case 0:
			sig_uncatch(sig_child);
			sig_unblock(sig_child);
			execvp(cmd[0], cmdp);
			write_log(fdlogwf, FATAL, "unable to start ", cmd[0], "\n");
			_exit(1);
	}
	flagpaused = 0;
	pid = f;
	pidchange();
	announce();
	deepsleep(1);
}
Beispiel #4
0
void doit(void)
{
  iopause_fd x[2];
  struct taia deadline;
  struct taia stamp;
  int wstat;
  int r;
  char ch;

  announce();

  for (;;) {
    if (flagexit && !pid) return;

    sig_unblock(sig_child);

    x[0].fd = selfpipe[0];
    x[0].events = IOPAUSE_READ;
    x[1].fd = fdcontrol;
    x[1].events = IOPAUSE_READ;
    taia_now(&stamp);
    taia_uint(&deadline,3600);
    taia_add(&deadline,&stamp,&deadline);
    iopause(x,2,&deadline,&stamp);

    sig_block(sig_child);

    while (read(selfpipe[0],&ch,1) == 1)
      ;

    for (;;) {
      r = wait_nohang(&wstat);
      if (!r) break;
      if ((r == -1) && (errno != error_intr)) break;
      if (r == pid) {
	pid = 0;
	pidchange();
	announce();
	if (flagexit) return;
	if (flagwant && flagwantup) trystart();
	break;
      }
    }

    if (read(fdcontrol,&ch,1) == 1)
      switch(ch) {
	case 'd':
	  flagwant = 1;
	  flagwantup = 0;
	  if (pid) { kill(pid,SIGTERM); kill(pid,SIGCONT); flagpaused = 0; }
	  announce();
	  break;
	case 'u':
	  flagwant = 1;
	  flagwantup = 1;
	  announce();
	  if (!pid) trystart();
	  break;
	case 'o':
	  flagwant = 0;
	  announce();
	  if (!pid) trystart();
	  break;
	case 'a':
	  if (pid) kill(pid,SIGALRM);
	  break;
	case 'h':
	  if (pid) kill(pid,SIGHUP);
	  break;
	case 'k':
	  if (pid) kill(pid,SIGKILL);
	  break;
	case 't':
	  if (pid) kill(pid,SIGTERM);
	  break;
	case 'i':
	  if (pid) kill(pid,SIGINT);
	  break;
	case 'p':
	  flagpaused = 1;
	  announce();
	  if (pid) kill(pid,SIGSTOP);
	  break;
	case 'c':
	  flagpaused = 0;
	  announce();
	  if (pid) kill(pid,SIGCONT);
	  break;
	case 'x':
	  flagexit = 1;
	  announce();
	  break;
      }
  }
}
Beispiel #5
0
int main(int argc, char **argv)
{
	struct sigaction sa;

	if (parse_argv(argc, argv) < 0)
		_exit(100);

	snprintf(status_files[0], 1024, "%s/lock", status_dir);
	snprintf(status_files[1], 1024, "%s/control", status_dir);
	snprintf(status_files[2], 1024, "%s/ok", status_dir);
	snprintf(status_files[3], 1024, "%s/status", status_dir);
	snprintf(status_files[4], 1024, "%s/status.new", status_dir);
	snprintf(status_files[5], 1024, "%s/supervise.log", status_dir);
	snprintf(status_files[6], 1024, "%s/supervise.log.wf", status_dir);

	sa.sa_handler = SIG_IGN;
	sigemptyset(&sa.sa_mask);
	if (sigaction(SIGHUP, &sa, NULL) < 0)
	{
		printf("unable to ignore SIGHUP for %s\n", service);  
		_exit(110);
	}

	if (mkdir(status_dir, 0700) < 0 && errno !=  EEXIST)
	{
		printf("unable to create dir: %s\n", status_dir);  
		_exit(110);
	}

	fdlog = open_append(status_files[5]);
	if (fdlog == -1)
	{
		printf("unable to open %s%s", status_dir, "/supervise.log");
		_exit(111);
	}
	coe(fdlog);

	fdlogwf = open_append(status_files[6]);
	if (fdlogwf == -1)
	{
		printf("unable to open %s%s", status_dir, "/supervise.log.wf");
		_exit(1);
	}
	coe(fdlogwf);

	if (daemon(1, 0) < 0)
	{
		printf("failed to daemonize supervise!\n");
		_exit(111);
	}

	if (pipe(selfpipe) == -1)
	{
		write_log(fdlogwf, FATAL, "unable to create pipe for ", service, "\n");
		_exit(111);
	}
	coe(selfpipe[0]);
	coe(selfpipe[1]);
	ndelay_on(selfpipe[0]);
	ndelay_on(selfpipe[1]);

	sig_block(sig_child);
	sig_catch(sig_child, trigger);

	sig_block(sig_alarm);
	sig_catch(sig_alarm, timer_handler);
	sig_unblock(sig_alarm);

	fdlock = open_append(status_files[0]);
	if ((fdlock == -1) || (lock_exnb(fdlock) == -1))
	{
		write_log(fdlogwf, FATAL, "Unable to acquier ", status_dir, "/lock\n");
		_exit(111);
	}
	coe(fdlock);

	fifo_make(status_files[1], 0600);
	fdcontrol = open_read(status_files[1]);
	if (fdcontrol == -1)
	{
		write_log(fdlogwf, FATAL, "unable to read ", status_dir, "/control\n");
		_exit(1);
	}
	coe(fdcontrol);
	ndelay_on(fdcontrol);

	fdcontrolwrite = open_write(status_files[1]);
	if (fdcontrolwrite == -1)
	{
		write_log(fdlogwf, FATAL, "unable to write ", status_dir, "/control\n");
		_exit(1);
	}
	coe(fdcontrolwrite);

	fifo_make(status_files[2], 0600);
	fdok = open_read(status_files[2]);
	if (fdok == -1)
	{
		write_log(fdlogwf, FATAL, "unable to read ", status_dir, "/ok\n");
		_exit(1);
	}
	coe(fdok);
	
	if (!restart_sh[0])
	{
		parse_conf(); 
	}
	pidchange();
	announce();

	if (!flagwant || flagwantup)
		trystart();
	doit();
	announce();

	_exit(0);
}
Beispiel #6
0
static void doit()
{
	iopause_fd x[2];
	struct taia deadline;
	struct taia stamp;
	int wstat;
	long int time_cmp = 0;
	int r;
	char ch;
	char warn_message[2048];
	int coredumped = 0;
	char restart_cmd[2048];

	announce();
	x[0].fd = selfpipe[0];
	x[0].events = IOPAUSE_READ;
	x[1].fd = fdcontrol;
	x[1].events = IOPAUSE_READ;

	while (1)
	{
		if (flagexit && !pid)
			break;

		taia_now(&stamp);
		taia_uint(&deadline, 3600);
		taia_add(&deadline, &stamp, &deadline);
		sig_unblock(sig_child);
		iopause(x, 2, &deadline, &stamp);
		sig_block(sig_child);

		while (read(selfpipe[0], &ch, 1) == 1);

		while (1)
		{
			//waitpid(-1,&wstat,WNOHANG);WNOHANG : return immediately if no child has exited
			r = wait_nohang(&wstat);
			//r==0 if one or more child(ren) exit(s) but have not yet changed state
			if (!r)
				break;
			//r == -1 && errno == error_intr means waitpid is interrupted by a signal, we should call waitpid again.
			//here is not necessary cause we call waitpid with a WNOHANG argument.
			if (r == -1 && errno != error_intr)
				break;
			if (r == pid)
			{
				pid = 0;
				pidchange();
				announce();
				time_now = time((time_t *)0);
				if(time_old)
				{
					time_cmp = time_now - time_old;
					if(time_cmp >= time_alarm) //cmp
					{
						num = 0;
					}
					time_old = time_now;
				}
				else
				{
					time_cmp = 0;
					time_old = time_now;
				}
				if (0 != restart_sh[0])
				{
					if (num == INT_MAX)
						num = 0;
					num++;
					
					if (snprintf(restart_cmd, sizeof(restart_cmd), "%s %d", restart_sh, num) > 1)
					{
						system(restart_cmd);
						write_log(fdlog, NOTICE, "restart_cmd: ", restart_cmd, " is called.\n");
					}
				}
				else
				{

					if (WCOREDUMP(wstat))
					{
						have_coredumped++;
						coredumped = 1;
					}

					bzero(warn_message, 2048);
					create_warn_message(warn_message, coredumped);      
					write_log(fdlog, NOTICE, "service exited", coredumped ? " with coredump" : "", "!\n");
					coredumped = 0;

					if (!closealarm && alarm_interval > 0 && have_tried++ == 0)
					{
						alarm(alarm_interval);
						do_alarm(warn_message);
					}

					if (flagexit || (alarm_interval > 0 && have_tried > max_tries && max_tries > 0) ||
					(alarm_interval > 0 && have_coredumped > max_tries_if_coredumped && max_tries_if_coredumped > 0))
					{
						write_log(fdlog, NOTICE, "supervise refused to restart ", service, 
								" any more and exited itself!\n");
						alarm(0);
						return;
					}
				}

				if (flagwant && flagwantup)
				{
					write_log(fdlog, NOTICE, "supervise is trying to restart ", service, "...\n");
					trystart();
				}
				break;
			}
		}

		if (read(fdcontrol, &ch, 1) != 1)
			continue;

		switch(ch)
		{
			// -s: Silent. Do not alarm any more.
			case 's':
				closealarm = 1;
				announce();
				break;
			case 'n':
				closealarm = 0;
				announce();
				break;
			case 'r':
				parse_conf();
				break;
			// -d: Down. If the service is running, send it a TERM signal and then a CONT signal. 
			// After it stops, do not restart it.
			case 'd':
				flagwant = 1;
				flagwantup = 0;
				if (pid) { kill(pid, SIGTERM); kill(pid, SIGCONT); flagpaused = 0; }
				announce();
				break;
			// -u: Up. If the service is not running, start it. If the service stops, restart it.
			case 'u':
				flagwant = 1;
				flagwantup = 1;
				announce();
				if (!pid) trystart();
				break;
			// -o: Once. If the service is not running, start it. Do not restart it if it stops.
			case 'o':
				flagwant = 0;
				announce();
				if (!pid) trystart();
				break;
			// -a: Alarm. Send the service an ALRM signal.
			case 'a':
				if (pid) kill(pid, SIGALRM);
				break;
			// -h: Hangup. Send the service a HUP signal.
			case 'h':
				if (pid) kill(pid, SIGHUP);
				break;
			// -k: Kill. Send the service a KILL signal.
			case 'k':
				if (pid) kill(pid, SIGKILL);
				break;
			//* -t: Terminate. Send the service a TERM signal.
			case 't':
				if (pid) kill(pid, SIGTERM);
				break;
			// -i: Interrupt. Send the service an INT signal.
			case 'i':
				if (pid) kill(pid, SIGINT);
				break;
			// -p: Pause. Send the service a STOP signal.
			case 'p':
				flagpaused = 1;
				announce();
				if (pid) kill(pid, SIGSTOP);
				break;
			// -c: Continue. Send the service a CONT signal.
			case 'c':
				flagpaused = 0;
				announce();
				if (pid) kill(pid, SIGCONT);
				break;
			// -x: Exit. supervise will exit as soon as the service is down. If you use this option on a 
			//stable system, you're doing something wrong; supervise is designed to run forever. 
			case 'x':
				flagexit = 1;
				announce();
				break;
		} //end switch
	} //end while
}