Пример #1
0
int run_watchdog (int pids[], int num_pids, char* port, char* email_address, char* mail_server) {
	signal (SIGPIPE, SIG_IGN);

	pid_t pid = fork();
	if (pid < 0) {
		PERR ("fork");
		exit (EXIT_FAILURE);
	}

	if (pid == 0) {

		umask(0);

		openlog ("SPROCKETS_WATCHDOG", LOG_CONS, LOG_DAEMON);

		pid_t sid = setsid();
		if (sid < 0) {
			PMSG ("Sprockets watchdog failed to setsid. Exiting now.\n");
			exit (EXIT_FAILURE);
		}

		if ((chdir("/")) < 0) {
			PMSG ("Sprockets watchdog failed to chdir. Exiting now.\n");
			exit (EXIT_FAILURE);
		}

		int sigs[] = { SIGINT, SIGQUIT, SIGHUP, SIGCHLD };
		int sfd = setup_sighandlers (sigs, 3);
		if (sfd == 0) {
			PMSG ("Sprockets watchdog failed to set sighandlers. Exiting now.\n");
			exit (EXIT_FAILURE);
		}

		int epfd = init_epoll (sfd);
		if (epfd == 0) {
			PMSG ("Sprockets watchdog failed to initialize epoll. Exiting now.\n");
			exit (EXIT_FAILURE);
		}

		int server_fd = NULL;
		if (port != NULL) {
			server_fd = sprocket_tcp_server (port, NULL);
			if (server_fd == 0) {
				PMSG ("Couldn't start watchdog server\n");
			       	exit (EXIT_FAILURE);	
			}

			if (add_fd_to_epoll (epfd, server_fd) == 0) {
				PERR ("Couldn't add watchdog server to epoll\n");
			       	exit (EXIT_FAILURE);	
			}
		}

		close(STDIN_FILENO);
		close(STDOUT_FILENO);
		close(STDERR_FILENO);

		syslog (LOG_NOTICE, "Sprockets watchdog successfully started\n");



				
		int paused_for_signal = 0;
		char* stats = NULL;
		auto_string* stat_buffer = NULL;
		pthread_mutex_t stats_mutex;
		pthread_mutex_init(&stats_mutex, NULL);

		auto_array* thread_array = auto_array_create (10);
		if (thread_array == NULL) {
			syslog (LOG_CRIT, "Unable to create thread array - out of memory");
			exit (EXIT_FAILURE);
		}


		while (1) {
			struct epoll_event events[MAX_EPOLL_EVENTS];
			int num_fd = epoll_wait(epfd, events, MAX_EPOLL_EVENTS, 500);

			if (num_fd == -1) {
				if (errno == EINTR) {
					syslog (LOG_NOTICE, "epoll_wait interrupted. Continuing\n");
					continue;
				}

				syslog (LOG_CRIT, "epoll_wait error: %s\n", strerror (errno)); 
				exit (EXIT_FAILURE);
			}

			if (num_fd != 0) { // no fds  timeout occurred
				for (int i = 0; i < num_fd; ++i) {
					if (events[i].data.fd == sfd) { // caught signal
						struct signalfd_siginfo fdsi;

 						int s = read (sfd, &fdsi, sizeof fdsi);
               					if (s != sizeof fdsi) {
							syslog (LOG_CRIT, "Read signal error: %s\n", strerror (errno));
							continue;
						}

						switch (fdsi.ssi_signo) {
							case SIGINT:
								syslog (LOG_NOTICE, "Caught SIGINT - pausing\n");
								paused_for_signal = 1;
								break;	
							case SIGQUIT:
								syslog (LOG_NOTICE, "Caught SIGQUIT - exiting\n");
								for (int ii = 0; ii < num_pids; ++ii) {
									if (pids[ii] != 0) { 
										kill (pids[ii], SIGTERM);
									}
								}

								for (int ii = 0; ii < thread_array->count; ++ii) {
									watchdog_thread* wt = auto_array_get (thread_array, ii);
									char ex[2] = "EX";
									write (wt->pipe_write, ex, 2);

								       	auto_array_delete (thread_array, free);	
								}

								exit (EXIT_SUCCESS);
							case SIGHUP:
								syslog (LOG_NOTICE, "Caught SIGHUP\n");
								paused_for_signal = 0;
								break;
							case SIGCHLD:
								syslog (LOG_NOTICE, "Caught SIGCHLD\n");
								break;
							default:
								syslog (LOG_NOTICE, "Caught unknown signal\n");
								break;
						}
	       				} else if (events[i].data.fd == server_fd) {
						struct sockaddr_in addr;
						socklen_t addr_sz = 0;
						int client_fd = accept (server_fd, (struct sockaddr*) &addr, &addr_sz);
						
						watchdog_thread_args* thread_args = malloc (sizeof thread_args);
						if (thread_args == NULL) {
							syslog (LOG_CRIT, "Malloc returned NULL: %s", strerror (errno));
							exit (EXIT_FAILURE);
						}

						watchdog_thread* dog_thread = malloc (sizeof dog_thread);
						if (dog_thread == NULL) {
							syslog (LOG_CRIT, "Malloc returned NULL: %s", strerror (errno));
							exit (EXIT_FAILURE);
						}

						int pipefd[2];
						if (pipe (pipefd) < 0) {
							syslog (LOG_CRIT, "watchdog pipe error: %s", strerror (errno));
							continue;
						}
						
						dog_thread->pipe_write = pipefd[1];

						thread_args->pipe_read = pipefd[0];
						thread_args->client_fd = client_fd;
						thread_args->stats = &stats;
						thread_args->stats_mutex = &stats_mutex;

						pthread_attr_t attr;
						int trv = 0;
						if ((trv = pthread_attr_init (&attr)) != 0) {
							syslog (LOG_CRIT, "pthread_attr_init: %s\n", strerror (trv));
						       	continue;	
						}

						if ((trv = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) != 0) {
							syslog (LOG_CRIT, "pthread_attr_setdetachstate: %s\n", strerror (trv));
						       	continue;	
						}

						if ((trv = pthread_create(&dog_thread->th_id, &attr, client_thread, thread_args)) != 0) {
							syslog (LOG_CRIT, "pthread_create: %s\n", strerror (trv));
						       	continue;	
						}

						auto_array_add (thread_array, dog_thread);

						pthread_attr_destroy(&attr);
					}
				}

			}

			if (paused_for_signal) {
				syslog (LOG_NOTICE, "Paused for signal\n");
			} else {
				auto_array* stat_files = auto_array_create (num_pids);
				for (int i = 0; i < num_pids; ++i) {
					if (pids[i] == 0) {
						continue;
					}

					char pid[32];
					sprintf (pid, "%d", pids[i]);
					
					FILE* p = get_stat_filep (pid);
					if (p == NULL) {
						syslog (LOG_CRIT, "Process with pid %s has halted", pid);
						if (email_address != NULL && mail_server != NULL) {
							char msg[2048];
							snprintf (msg, 2048, email_alert_msg_format, email_address, email_address, pid);
							send_email (mail_server, email_address, email_address, msg);
									
							pids[i] = 0; 
						}

						continue;
					}

					auto_array_add (stat_files, p);
				}

				pthread_mutex_lock(&stats_mutex);

				if (stat_buffer != NULL) {
					auto_string_delete (stat_buffer);
				}
				
				stat_buffer = auto_string_create (1024);
				if (stat_buffer == NULL) {
					syslog (LOG_CRIT, "Unable to create stat buffer - out of memory");
					exit (EXIT_FAILURE);
				}

				for (int i = 0; i < stat_files->count; ++i) {
					FILE* f = auto_array_get (stat_files, i);
					
					char* tmp = get_proc_string (f);
					
					if (tmp != NULL) {
						auto_string_append (stat_buffer, tmp);
						free (tmp);	
					}
				}

				stats = stat_buffer->buf;

				pthread_mutex_unlock (&stats_mutex);

				for (int i = 0; i < thread_array->count; ++i) {
					watchdog_thread* wt = auto_array_get (thread_array, i);
					char ok[2] = "OK";
					if (write (wt->pipe_write, ok, 2) <= 0) {
						syslog (LOG_CRIT, "watchdog write client thread: %s", strerror (errno));
						auto_array_remove (thread_array, i);
						close (wt->pipe_write);
						free (wt);
					}
				}
				
				auto_array_delete (stat_files, close_file);
			}	
		}

	}

	return pid;
}
Пример #2
0
static void
event_loop(int sockfd, int hidfd)
{
    int efd, i, s;
    struct epoll_event events[MAXEVENTS];
    struct timeval last_recv, last_heartbeat;
    int clientfds[MAXCLIENTS];
    int client_count = 0;

    efd = epoll_create1(0);
    if (efd == -1) {
	perror("epoll_create");
	goto out;
    }

    if (add_fd_to_epoll(efd, hidfd) < 0 || add_fd_to_epoll(efd, sockfd) < 0) {
	goto out;
    }

    memset(events, 0, sizeof(events));
    memset(&last_recv, 0, sizeof(last_recv));
    memset(&last_heartbeat, 0, sizeof(last_heartbeat));

    /* The event loop */
    while (1) {
	int n, item;

	n = epoll_wait (efd, events, MAXEVENTS, 2000);
	if (n < 0) {
	    if (errno != EINTR) {
		perror("epoll_wait");
	    }
	    goto out;
	}

	if (client_count > 0) {
	    struct timeval now;

	    gettimeofday(&now, NULL);
	    if (now.tv_sec - last_recv.tv_sec > RESET_TIMEOUT) {
		send_hmr_reset(hidfd);
		last_recv = now;
	    }
	    if (now.tv_sec - last_heartbeat.tv_sec > HEARTBEAT_INTERVAL) {
		send_hmr_heartbeat(hidfd);
		last_heartbeat = now;
	    }
	}

	for (item = 0; item < n; item++) {
	    struct epoll_event *ev = &events[item];

	    if (ev->events & (EPOLLERR | EPOLLHUP)) {
		fprintf (stderr, "epoll error\n");
		if (ev->data.fd == sockfd || ev->data.fd == hidfd) {
		    goto out;
		} else {
		    remove_client(ev->data.fd, clientfds, &client_count);
		    continue;
		}
	    } else if (sockfd == ev->data.fd) {
		/* We have a notification on the listening socket, which
		 * means one or more incoming connections. */
		while (1) {
		    struct sockaddr in_addr;
		    socklen_t in_len;
		    int infd;
		    char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];

		    in_len = sizeof(in_addr);
		    infd = accept(sockfd, &in_addr, &in_len);
		    if (infd == -1) {
			if (errno == EAGAIN || errno == EWOULDBLOCK) {
			    /* We have processed all incoming
			     * connections. */
			    break;
			} else {
			    perror("accept");
			    break;
			}
		    }

		    s = getnameinfo(&in_addr, in_len, hbuf, sizeof(hbuf),
				    sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
		    if (s == 0) {
			printf("Accepted connection on descriptor %d "
			       "(host=%s, port=%s)\n", infd, hbuf, sbuf);
		    }

		    /* Make the incoming socket non-blocking and add it to the
		     * list of fds to monitor. */
		    s = make_fd_non_blocking(infd);
		    if (s == -1) {
			close(infd);
			continue;
		    }

		    if (add_fd_to_epoll(efd, infd) < 0) {
			close(infd);
			continue;
		    }
		    if (client_count < MAXCLIENTS) {
			if (client_count == 0) {
			    send_hmr_reset(hidfd);
			    send_hmr_heartbeat(hidfd);
			    gettimeofday(&last_recv, NULL);
			    last_heartbeat = last_recv;
			}
			clientfds[client_count++] = infd;
		    }
		}
	    } else if (hidfd == ev->data.fd) {
		char buf[8];
		ssize_t count = read(hidfd, buf, sizeof(buf));

		gettimeofday(&last_recv, NULL);
		if (count == 8) {
		    int cl, length = buf[0];
		    if (length < 8) {
			for (cl = 0; cl < client_count; ) {
			    if (write_exact(clientfds[cl], buf + 1, length) < 0) {
				remove_client(clientfds[cl], clientfds, &client_count);
				/* keep cl, as remove_client shifted the array */
			    } else {
				cl++;
			    }
			}
		    }
		}
	    } else {
		/* We have data on the fd waiting to be read. Read and
		 * discard it. We must read whatever data is available
		 * completely, as we are running in edge-triggered mode
		 * and won't get a notification again for the same
		 * data. */
		int done = 0;

		while (1) {
		    ssize_t count;
		    char buf[512];

		    count = read (ev->data.fd, buf, sizeof buf);
		    if (count == -1) {
			/* If errno == EAGAIN, that means we have read all
			 * data. So go back to the main loop. */
			if (errno != EAGAIN && errno != EWOULDBLOCK) {
			    perror ("read");
			    done = 1;
			}
			break;
		    } else if (count == 0) {
			/* End of file. The remote has closed the connection */
			done = 1;
			break;
		    }
		}

		if (done) {
		    printf ("Closed connection on descriptor %d\n", ev->data.fd);
		    /* Closing the descriptor will make epoll remove it
		     * from the set of descriptors which are monitored. */
		    remove_client(ev->data.fd, clientfds, &client_count);
		}
	    }
	}
    }

out:
    for (i = 0; i < client_count; i++) {
	close(clientfds[i]);
    }
    if (efd >= 0) {
	close(efd);
    }
}
Пример #3
0
static
void
worker_thread_handle_msgqueue_event(
    struct worker_thread* state,
    uint32_t event_flags
    )
{
  if (event_flags & EPOLLERR) {
    D_FMTSTRING("Error on message queue!");
    return;
  }

  for (;;) {
    /*
     * Drain all the bytes from the queue descriptor.
     */
    struct message msg;
    ssize_t bytes = HANDLE_EINTR_ON_SYSCALL(
        mq_receive(state->wk_messagequeue.mq_queuefds,
                   (char*) &msg,
                   sizeof(msg),
                   NULL /* don't care about message priority */));
    if (bytes == -1) {
      if (errno == EAGAIN) {
        return;
      }
      D_FUNCFAIL_ERRNO(mq_receive);
      return;
    }
    BUGSTOP_IF((msg.msg_code != kITTMessageAddClient), 
               "Unknown message code");
    /*
     * The client object gets ownership of the socket descriptor.
     * Should anything go wrong in the creation process it will
     * close the socket descriptor.
     */
    struct client* clnt = client_create(msg.msg_data.msg_fd, 
                                        state->wk_allocator);
    if (!clnt) {
      /*
       * Client creation failed. Try to get next message, if any.
       * Close client socket.
       */
      HANDLE_EINTR_ON_SYSCALL(close(msg.msg_data.msg_fd));
      continue;
    }

    if (add_fd_to_epoll(state->wk_epoll_fds, clnt->cl_sockfd,
                        EPOLLIN | EPOLLRDHUP, kDataTypePTR,
                        clnt) == -1) {
      /*
       * Failed to add client socket to epoll so release all resources and get
       * the next message.
       */
      client_destroy(clnt, state->wk_allocator);
      continue;
    }
    /*
     * Add client to list.
     */
    dlist_push_tail(state->wk_clients, clnt); 
  }
}
Пример #4
0
static 
int 
server_init(
    struct server* p_srv
    )
{
  assert(p_srv);
  memset(p_srv, 0, sizeof(*p_srv));

  p_srv->sv_allocator = allocator_handle;
  p_srv->sv_quitflag = 0;
  p_srv->sv_acceptfd = create_server_socket();
  if (-1 == p_srv->sv_acceptfd) {
    return -1;
  }
  /*
   * Level 1 - accept socket created.
   */
  ++p_srv->sv_rollback;

  p_srv->sv_epollfd = epoll_create(kMaxEpollCompletionEntries);
  if (-1 == p_srv->sv_epollfd) {
    return -1;
  }
  /*
   * Level 2 - epoll descriptor allocated.
   */
  ++p_srv->sv_rollback;

  /*
   * Block SIGINT and create a signal descriptor to receive it via epoll.
   */
  sigset_t sig_mask;
  sigemptyset(&sig_mask);
  if (-1 == sigaddset(&sig_mask, SIGINT)) {
    return -1;
  }
  if (-1 == sigprocmask(SIG_BLOCK, &sig_mask, NULL)) {
    return -1;
  }
  p_srv->sv_sigfds = signalfd(-1, &sig_mask, SFD_NONBLOCK);
  if (-1 == p_srv->sv_sigfds) {
    return -1;
  }
  /*
   * Level 3 - signal descriptor for SIGINT allocated.
   */
  ++p_srv->sv_rollback;

  /*
   * Add termination signal and accept socket to epoll interface.
   */
  if (-1 == add_fd_to_epoll(p_srv->sv_epollfd,
                            p_srv->sv_sigfds,
                            EPOLLIN | EPOLLET,
                            kDataTypeFD,
                            p_srv->sv_sigfds)) {
    return -1;
  }

  if (-1 == add_fd_to_epoll(p_srv->sv_epollfd,
                            p_srv->sv_acceptfd,
                            EPOLLIN | EPOLLET | EPOLLRDHUP,
                            kDataTypeFD,
                            p_srv->sv_acceptfd)) {
    return -1;
  }

  p_srv->sv_threadrdy_eventfds = eventfd(0, 0);
  if (p_srv->sv_threadrdy_eventfds == -1) {
    D_FUNCFAIL_ERRNO(eventfd);
    return -1;
  }
  /*
   * Level 4 - thread notification event created.
   */
  ++p_srv->sv_rollback;

  /*
   * Get number of available processors. The number of spawned threads is
   * nr_processors * thread_to_proc_ratio.
   */
  long nr_procs = sysconf(_SC_NPROCESSORS_ONLN);
  if (nr_procs == -1) {
    D_FUNCFAIL_ERRNO(sysconf);
    return -1;
  }
  D_FMTSTRING("Online processors %d, will spawn %d threads.",
              nr_procs, nr_procs);

  p_srv->sv_workers = p_srv->sv_allocator->al_mem_alloc(
      p_srv->sv_allocator, (sizeof(struct worker_thread*) * nr_procs));
  if (!p_srv->sv_workers) {
    D_FMTSTRING("Out of memory!");
    return -1;
  }
  /*
   * Level 5 - memory for worker thread data allocated.
   */
  ++p_srv->sv_rollback;
  memset(p_srv->sv_workers, 0, sizeof(struct worker_thread*) * nr_procs);

  /*
   * Initialize data and start worker threads.
   */
  for (long l = 0; l < nr_procs; ++l) {
    char thread_msgqueue[NAME_MAX];
    snprintf(thread_msgqueue, sizeof(thread_msgqueue) - 1, 
             "/__msgqueue_thread_%d__", (int) l);
    struct worker_thread* current = worker_thread_create(thread_msgqueue, 
                                                         p_srv->sv_allocator);
    if (current) {
      if (worker_thread_start(p_srv, current, NULL) == 0) {
        /*
         * Thread successfully initialized, add it to list.
         */
        p_srv->sv_workers[p_srv->sv_worker_count++] = current;
      } else {
        /*
         * Cleanup thread data since pthread_create() failed.
         */
        worker_thread_destroy(current);
      }
    }
  }
  if (!p_srv->sv_worker_count) {
    D_FMTSTRING("Fatal : failed to initialize at least one worker thread!");
    return -1;
  }

  D_FMTSTRING("Started a total of %d worker threads", p_srv->sv_worker_count);
  /*
   * Server is up and running.
   */
  return 0;
}
Пример #5
0
/*
 * @@ Not implemented. @@
 */
static
void*
worker_thread_proc(
    void* args
    ) {
  struct worker_thread* state = (struct worker_thread*) args;
  BUGSTOP_IF((!state), "Invalid thread state specified!");
  D_FMTSTRING("Client thread (%u) starting\n", syscall(SYS_gettid));
  
  /*
   * Add the message queue and the termination event to epoll.
   */
  int result = 0;
  if (add_fd_to_epoll(state->wk_epoll_fds, 
                      state->wk_termsig.so_sigfds,
                      EPOLLIN | EPOLLET,
                      kDataTypePTR,
                      (void*) &state->wk_termsig) == 0) {
    ++result;
  } 

  if (add_fd_to_epoll(state->wk_epoll_fds,
                      state->wk_messagequeue.mq_queuefds,
                      EPOLLIN | EPOLLET,
                      kDataTypePTR,
                      (void*) &state->wk_messagequeue) == 0) {
   ++result;
  } 
  
  /*
   * Notify waiter with our initialize status.
   */
  uint64_t init_status = (result == 2 ? kThreadInitOk : kThreadInitFail);
  HANDLE_EINTR_ON_SYSCALL(write(state->wk_readyevent, &init_status,
                                sizeof(init_status)));
  if (result != 2) {
    /*
     * Failed to init so return.
     */
    return NULL;
  }

  /*
   * Loop forever waiting for events.
   */
  for (; !state->wk_quitflag;) {
    struct epoll_event rec_events[kMaxEpollCompletionEntries];
    int ev_count = HANDLE_EINTR_ON_SYSCALL(epoll_wait(state->wk_epoll_fds,
                                                      rec_events,
                                                      kMaxEpollCompletionEntries,
                                                      -1 /* don;t timeout */));
    if (ev_count == -1) {
      D_FUNCFAIL_ERRNO(epoll_wait);
      break;
    }
    for (int i = 0; i < ev_count && !state->wk_quitflag; ++i) {
      worker_thread_handle_event(state, rec_events + i);
    }
  }

  return NULL;
}