Example #1
0
static void down_u (void)
{
  status.flagwant = 1 ;
  status.flagwantup = 1 ;
  announce() ;
  trystart() ;
}
Example #2
0
int main(int argc,char **argv)
{
  struct stat st;

  dir = argv[1];
  if (!dir || argv[2])
    strerr_die1x(100,"supervise: usage: supervise dir");

  if (pipe(selfpipe) == -1)
    strerr_die4sys(111,FATAL,"unable to create pipe for ",dir,": ");
  coe(selfpipe[0]);
  coe(selfpipe[1]);
  ndelay_on(selfpipe[0]);
  ndelay_on(selfpipe[1]);

  sig_block(sig_child);
  sig_catch(sig_child,trigger);

  if (chdir(dir) == -1)
    strerr_die4sys(111,FATAL,"unable to chdir to ",dir,": ");

  if (stat("down",&st) != -1)
    flagwantup = 0;
  else
    if (errno != error_noent)
      strerr_die4sys(111,FATAL,"unable to stat ",dir,"/down: ");

  mkdir("supervise",0700);
  fdlock = open_append("supervise/lock");
  if ((fdlock == -1) || (lock_exnb(fdlock) == -1))
    strerr_die4sys(111,FATAL,"unable to acquire ",dir,"/supervise/lock: ");
  coe(fdlock);

  fifo_make("supervise/control",0600);
  fdcontrol = open_read("supervise/control");
  if (fdcontrol == -1)
    strerr_die4sys(111,FATAL,"unable to read ",dir,"/supervise/control: ");
  coe(fdcontrol);
  ndelay_on(fdcontrol); /* shouldn't be necessary */
  fdcontrolwrite = open_write("supervise/control");
  if (fdcontrolwrite == -1)
    strerr_die4sys(111,FATAL,"unable to write ",dir,"/supervise/control: ");
  coe(fdcontrolwrite);

  pidchange();
  announce();

  fifo_make("supervise/ok",0600);
  fdok = open_read("supervise/ok");
  if (fdok == -1)
    strerr_die4sys(111,FATAL,"unable to read ",dir,"/supervise/ok: ");
  coe(fdok);

  if (!flagwant || flagwantup) trystart();

  doit();
  announce();
  _exit(0);
}
Example #3
0
int supervise_run(void)
{
	g_pid = 0;
	g_flagexit = 0;
	g_flagwant = 1;
	g_flagwantup = opt_auto_start;
	g_flagpaused = 0;

	sig_block(SIGCHLD);
	sig_catch(SIGCHLD, sigchild_handler);

	stat_pidchange();
	stat_update();

	if(g_flagwant && g_flagwantup) {
		trystart();
	}

	while(1) {
		char c;
		ssize_t rl;

		if(g_flagexit && !g_pid) { return 0; }

printf("waiting pid pid=%d\n",g_pid);
		while(1) {
			int stat;
			int r = waitpid(-1, &stat, WNOHANG);
			if(r == 0) { break; }
			if(r < 0 && errno != EAGAIN && errno != EINTR) { break; }
			if(r == g_pid) {
				g_pid = 0;
				stat_pidchange();
				stat_update();
				if(g_flagexit) { return 0; }
				if(g_flagwant && g_flagwantup) {
					trystart();
					break;
				}
			}
		}

printf("reading... pid=%d\n",g_pid);
		sig_unblock(SIGCHLD);
		rl = read(g_ctl_rfd, &c, 1);
		if(rl <= 0) {
			if(errno == EAGAIN || errno == EINTR) {
				continue;
			}
			return -1;
		}
		sig_block(SIGCHLD);

		switch(c) {
		case 'd':  /* down */
printf("down %d\n",g_pid);
			g_flagwant = 1;
			g_flagwantup = 0;
			if(g_pid) {
				kill(g_pid, SIGTERM);
				kill(g_pid, SIGCONT);
				g_flagpaused = 0;
			}
			stat_update();
			break;

		case 'u':  /* up */
printf("up %d\n",g_pid);
			g_flagwant = 1;
			g_flagwantup = 1;
			if(!g_pid) { trystart(); }
			stat_update();
			break;

		case 'o':  /* once */
printf("once %d\n",g_pid);
			g_flagwant = 0;
			if(!g_pid) { trystart(); }
			stat_update();
			break;

		case 'x':  /* exit */
printf("exit %d\n",g_pid);
			g_flagexit = 1;
			stat_update();
			break;

		case 'p':  /* pause */
printf("pause %d\n",g_pid);
			g_flagpaused = 1;
			if(g_pid) { kill(g_pid, SIGSTOP); }
			stat_update();
			break;

		case 'c':  /* continue */
printf("continue %d\n",g_pid);
			g_flagpaused = 0;
			if(g_pid) { kill(g_pid, SIGCONT); }
			stat_update();
			break;

		case 'h':  /* hup */
printf("hup %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGHUP); }
			break;

		case 'a':  /* alarm */
printf("alarm %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGALRM); }
			break;

		case 'i':  /* interrupt */
printf("interrupt %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGINT); }
			break;

		case 't':  /* term */
printf("term %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGTERM); }
			break;

		case 'k':  /* kill */
printf("kill %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGKILL); }
			break;

		case '1':  /* usr1 */
printf("usr1 %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGUSR1); }
			break;

		case '2':  /* usr2 */
printf("usr2 %d\n",g_pid);
			if(g_pid) { kill(g_pid, SIGUSR2); }
			break;

		case 's':  /* sigchld */
printf("sigchld %d\n",g_pid);
			if(!opt_auto_restart) {
				g_flagwant = 0;
				stat_update();
			}
			break;

		case ' ': /* ping */
			// FIXME touch status file
			break;
		}
	}
}
Example #4
0
static void check (char const *name)
{
  struct stat st ;
  unsigned int namelen ;
  unsigned int i = 0 ;
  if (name[0] == '.') return ;
  if (stat(name, &st) == -1)
  {
    strerr_warnwu2sys("stat ", name) ;
    retrydirlater() ;
    return ;
  }
  if (!S_ISDIR(st.st_mode)) return ;
  namelen = str_len(name) ;
  for (; i < n ; i++) if ((services[i].ino == st.st_ino) && (services[i].dev == st.st_dev)) break ;
  if (i < n)
  {
    if (services[i].flaglog && (services[i].p[0] < 0))
    {
     /* See BLACK MAGIC above. */
      services[i].p[0] = -2 ;
      return ;
    }
  }
  else
  {
    if (n >= max)
    {
      strerr_warnwu3x("start supervisor for ", name, ": too many services") ;
      return ;
    }
    else
    {
      struct stat su ;
      char tmp[namelen + 5] ;
      byte_copy(tmp, namelen, name) ;
      byte_copy(tmp + namelen, 5, "/log") ;
      if (stat(tmp, &su) < 0)
        if (errno == ENOENT) services[i].flaglog = 0 ;
        else
        {
          strerr_warnwu2sys("stat ", tmp) ;
          retrydirlater() ;
          return ;
        }
      else if (!S_ISDIR(su.st_mode))
        services[i].flaglog = 0 ;
      else
      {
        if (pipecoe(services[i].p) < 0)
        {
          strerr_warnwu1sys("pipecoe") ;
          retrydirlater() ;
          return ;
        }
        services[i].flaglog = 1 ;
      }
      services[i].ino = st.st_ino ;
      services[i].dev = st.st_dev ;
      tain_copynow(&services[i].restartafter[0]) ;
      tain_copynow(&services[i].restartafter[1]) ;
      services[i].pid[0] = 0 ;
      services[i].pid[1] = 0 ;
      n++ ;
    }
  }
  
  services[i].flagactive = 1 ;

  if (services[i].flaglog && !services[i].pid[1])
  {
    if (!tain_future(&services[i].restartafter[1]))
    {
      char tmp[namelen + 5] ;
      byte_copy(tmp, namelen, name) ;
      byte_copy(tmp + namelen, 5, "/log") ;
      trystart(i, tmp, 1) ;
    }
    else if (tain_less(&services[i].restartafter[1], &deadline))
      deadline = services[i].restartafter[1] ;
  }

  if (!services[i].pid[0])
  {
    if (!tain_future(&services[i].restartafter[0]))
      trystart(i, name, 0) ;
    else if (tain_less(&services[i].restartafter[0], &deadline))
      deadline = services[i].restartafter[0] ;
  }
}
Example #5
0
void doit(void)
{
  iopause_fd x[2];
  struct taia deadline;
  struct taia stamp;
  int wstat;
  int r;
  char ch;

  announce();

  for (;;) {
    if (flagexit && !pid) return;

    sig_unblock(sig_child);

    x[0].fd = selfpipe[0];
    x[0].events = IOPAUSE_READ;
    x[1].fd = fdcontrol;
    x[1].events = IOPAUSE_READ;
    taia_now(&stamp);
    taia_uint(&deadline,3600);
    taia_add(&deadline,&stamp,&deadline);
    iopause(x,2,&deadline,&stamp);

    sig_block(sig_child);

    while (read(selfpipe[0],&ch,1) == 1)
      ;

    for (;;) {
      r = wait_nohang(&wstat);
      if (!r) break;
      if ((r == -1) && (errno != error_intr)) break;
      if (r == pid) {
	pid = 0;
	pidchange();
	announce();
	if (flagexit) return;
	if (flagwant && flagwantup) trystart();
	break;
      }
    }

    if (read(fdcontrol,&ch,1) == 1)
      switch(ch) {
	case 'd':
	  flagwant = 1;
	  flagwantup = 0;
	  if (pid) { kill(pid,SIGTERM); kill(pid,SIGCONT); flagpaused = 0; }
	  announce();
	  break;
	case 'u':
	  flagwant = 1;
	  flagwantup = 1;
	  announce();
	  if (!pid) trystart();
	  break;
	case 'o':
	  flagwant = 0;
	  announce();
	  if (!pid) trystart();
	  break;
	case 'a':
	  if (pid) kill(pid,SIGALRM);
	  break;
	case 'h':
	  if (pid) kill(pid,SIGHUP);
	  break;
	case 'k':
	  if (pid) kill(pid,SIGKILL);
	  break;
	case 't':
	  if (pid) kill(pid,SIGTERM);
	  break;
	case 'i':
	  if (pid) kill(pid,SIGINT);
	  break;
	case 'p':
	  flagpaused = 1;
	  announce();
	  if (pid) kill(pid,SIGSTOP);
	  break;
	case 'c':
	  flagpaused = 0;
	  announce();
	  if (pid) kill(pid,SIGCONT);
	  break;
	case 'x':
	  flagexit = 1;
	  announce();
	  break;
      }
  }
}
Example #6
0
int main(int argc, char **argv)
{
	struct sigaction sa;

	if (parse_argv(argc, argv) < 0)
		_exit(100);

	snprintf(status_files[0], 1024, "%s/lock", status_dir);
	snprintf(status_files[1], 1024, "%s/control", status_dir);
	snprintf(status_files[2], 1024, "%s/ok", status_dir);
	snprintf(status_files[3], 1024, "%s/status", status_dir);
	snprintf(status_files[4], 1024, "%s/status.new", status_dir);
	snprintf(status_files[5], 1024, "%s/supervise.log", status_dir);
	snprintf(status_files[6], 1024, "%s/supervise.log.wf", status_dir);

	sa.sa_handler = SIG_IGN;
	sigemptyset(&sa.sa_mask);
	if (sigaction(SIGHUP, &sa, NULL) < 0)
	{
		printf("unable to ignore SIGHUP for %s\n", service);  
		_exit(110);
	}

	if (mkdir(status_dir, 0700) < 0 && errno !=  EEXIST)
	{
		printf("unable to create dir: %s\n", status_dir);  
		_exit(110);
	}

	fdlog = open_append(status_files[5]);
	if (fdlog == -1)
	{
		printf("unable to open %s%s", status_dir, "/supervise.log");
		_exit(111);
	}
	coe(fdlog);

	fdlogwf = open_append(status_files[6]);
	if (fdlogwf == -1)
	{
		printf("unable to open %s%s", status_dir, "/supervise.log.wf");
		_exit(1);
	}
	coe(fdlogwf);

	if (daemon(1, 0) < 0)
	{
		printf("failed to daemonize supervise!\n");
		_exit(111);
	}

	if (pipe(selfpipe) == -1)
	{
		write_log(fdlogwf, FATAL, "unable to create pipe for ", service, "\n");
		_exit(111);
	}
	coe(selfpipe[0]);
	coe(selfpipe[1]);
	ndelay_on(selfpipe[0]);
	ndelay_on(selfpipe[1]);

	sig_block(sig_child);
	sig_catch(sig_child, trigger);

	sig_block(sig_alarm);
	sig_catch(sig_alarm, timer_handler);
	sig_unblock(sig_alarm);

	fdlock = open_append(status_files[0]);
	if ((fdlock == -1) || (lock_exnb(fdlock) == -1))
	{
		write_log(fdlogwf, FATAL, "Unable to acquier ", status_dir, "/lock\n");
		_exit(111);
	}
	coe(fdlock);

	fifo_make(status_files[1], 0600);
	fdcontrol = open_read(status_files[1]);
	if (fdcontrol == -1)
	{
		write_log(fdlogwf, FATAL, "unable to read ", status_dir, "/control\n");
		_exit(1);
	}
	coe(fdcontrol);
	ndelay_on(fdcontrol);

	fdcontrolwrite = open_write(status_files[1]);
	if (fdcontrolwrite == -1)
	{
		write_log(fdlogwf, FATAL, "unable to write ", status_dir, "/control\n");
		_exit(1);
	}
	coe(fdcontrolwrite);

	fifo_make(status_files[2], 0600);
	fdok = open_read(status_files[2]);
	if (fdok == -1)
	{
		write_log(fdlogwf, FATAL, "unable to read ", status_dir, "/ok\n");
		_exit(1);
	}
	coe(fdok);
	
	if (!restart_sh[0])
	{
		parse_conf(); 
	}
	pidchange();
	announce();

	if (!flagwant || flagwantup)
		trystart();
	doit();
	announce();

	_exit(0);
}
Example #7
0
static void doit()
{
	iopause_fd x[2];
	struct taia deadline;
	struct taia stamp;
	int wstat;
	long int time_cmp = 0;
	int r;
	char ch;
	char warn_message[2048];
	int coredumped = 0;
	char restart_cmd[2048];

	announce();
	x[0].fd = selfpipe[0];
	x[0].events = IOPAUSE_READ;
	x[1].fd = fdcontrol;
	x[1].events = IOPAUSE_READ;

	while (1)
	{
		if (flagexit && !pid)
			break;

		taia_now(&stamp);
		taia_uint(&deadline, 3600);
		taia_add(&deadline, &stamp, &deadline);
		sig_unblock(sig_child);
		iopause(x, 2, &deadline, &stamp);
		sig_block(sig_child);

		while (read(selfpipe[0], &ch, 1) == 1);

		while (1)
		{
			//waitpid(-1,&wstat,WNOHANG);WNOHANG : return immediately if no child has exited
			r = wait_nohang(&wstat);
			//r==0 if one or more child(ren) exit(s) but have not yet changed state
			if (!r)
				break;
			//r == -1 && errno == error_intr means waitpid is interrupted by a signal, we should call waitpid again.
			//here is not necessary cause we call waitpid with a WNOHANG argument.
			if (r == -1 && errno != error_intr)
				break;
			if (r == pid)
			{
				pid = 0;
				pidchange();
				announce();
				time_now = time((time_t *)0);
				if(time_old)
				{
					time_cmp = time_now - time_old;
					if(time_cmp >= time_alarm) //cmp
					{
						num = 0;
					}
					time_old = time_now;
				}
				else
				{
					time_cmp = 0;
					time_old = time_now;
				}
				if (0 != restart_sh[0])
				{
					if (num == INT_MAX)
						num = 0;
					num++;
					
					if (snprintf(restart_cmd, sizeof(restart_cmd), "%s %d", restart_sh, num) > 1)
					{
						system(restart_cmd);
						write_log(fdlog, NOTICE, "restart_cmd: ", restart_cmd, " is called.\n");
					}
				}
				else
				{

					if (WCOREDUMP(wstat))
					{
						have_coredumped++;
						coredumped = 1;
					}

					bzero(warn_message, 2048);
					create_warn_message(warn_message, coredumped);      
					write_log(fdlog, NOTICE, "service exited", coredumped ? " with coredump" : "", "!\n");
					coredumped = 0;

					if (!closealarm && alarm_interval > 0 && have_tried++ == 0)
					{
						alarm(alarm_interval);
						do_alarm(warn_message);
					}

					if (flagexit || (alarm_interval > 0 && have_tried > max_tries && max_tries > 0) ||
					(alarm_interval > 0 && have_coredumped > max_tries_if_coredumped && max_tries_if_coredumped > 0))
					{
						write_log(fdlog, NOTICE, "supervise refused to restart ", service, 
								" any more and exited itself!\n");
						alarm(0);
						return;
					}
				}

				if (flagwant && flagwantup)
				{
					write_log(fdlog, NOTICE, "supervise is trying to restart ", service, "...\n");
					trystart();
				}
				break;
			}
		}

		if (read(fdcontrol, &ch, 1) != 1)
			continue;

		switch(ch)
		{
			// -s: Silent. Do not alarm any more.
			case 's':
				closealarm = 1;
				announce();
				break;
			case 'n':
				closealarm = 0;
				announce();
				break;
			case 'r':
				parse_conf();
				break;
			// -d: Down. If the service is running, send it a TERM signal and then a CONT signal. 
			// After it stops, do not restart it.
			case 'd':
				flagwant = 1;
				flagwantup = 0;
				if (pid) { kill(pid, SIGTERM); kill(pid, SIGCONT); flagpaused = 0; }
				announce();
				break;
			// -u: Up. If the service is not running, start it. If the service stops, restart it.
			case 'u':
				flagwant = 1;
				flagwantup = 1;
				announce();
				if (!pid) trystart();
				break;
			// -o: Once. If the service is not running, start it. Do not restart it if it stops.
			case 'o':
				flagwant = 0;
				announce();
				if (!pid) trystart();
				break;
			// -a: Alarm. Send the service an ALRM signal.
			case 'a':
				if (pid) kill(pid, SIGALRM);
				break;
			// -h: Hangup. Send the service a HUP signal.
			case 'h':
				if (pid) kill(pid, SIGHUP);
				break;
			// -k: Kill. Send the service a KILL signal.
			case 'k':
				if (pid) kill(pid, SIGKILL);
				break;
			//* -t: Terminate. Send the service a TERM signal.
			case 't':
				if (pid) kill(pid, SIGTERM);
				break;
			// -i: Interrupt. Send the service an INT signal.
			case 'i':
				if (pid) kill(pid, SIGINT);
				break;
			// -p: Pause. Send the service a STOP signal.
			case 'p':
				flagpaused = 1;
				announce();
				if (pid) kill(pid, SIGSTOP);
				break;
			// -c: Continue. Send the service a CONT signal.
			case 'c':
				flagpaused = 0;
				announce();
				if (pid) kill(pid, SIGCONT);
				break;
			// -x: Exit. supervise will exit as soon as the service is down. If you use this option on a 
			//stable system, you're doing something wrong; supervise is designed to run forever. 
			case 'x':
				flagexit = 1;
				announce();
				break;
		} //end switch
	} //end while
}
Example #8
0
static void down_o (void)
{
  down_O() ;
  trystart() ;
}
Example #9
0
static void downtimeout (void)
{
  if (status.flagwant && status.flagwantup) trystart() ;
  else settimeout_infinite() ;
}