/**
 * pthread compatible routine that handles connections and processes
 * whatever comes in on those.
 */
static void *
dispatch_runner(void *arg)
{
    dispatcher *self = (dispatcher *)arg;
    connection *conn;
    int c;

    self->metrics = 0;
    self->blackholes = 0;
    self->ticks = 0;
    self->sleeps = 0;
    self->prevmetrics = 0;
    self->prevblackholes = 0;
    self->prevticks = 0;
    self->prevsleeps = 0;

    if (self->type == LISTENER) {
        struct pollfd ufds[sizeof(listeners) / sizeof(connection *)];
        while (self->keep_running) {
            for (c = 0; c < sizeof(listeners) / sizeof(connection *); c++) {
                if (listeners[c] == NULL)
                    break;
                ufds[c].fd = listeners[c]->sock;
                ufds[c].events = POLLIN;
            }
            if (poll(ufds, c, 1000) > 0) {
                for (--c; c >= 0; c--) {
                    if (ufds[c].revents & POLLIN) {
                        int client;
                        struct sockaddr addr;
                        socklen_t addrlen = sizeof(addr);

                        if ((client = accept(ufds[c].fd, &addr, &addrlen)) < 0)
                        {
                            logerr("dispatch: failed to "
                                   "accept() new connection: %s\n",
                                   strerror(errno));
                            dispatch_check_rlimit_and_warn();
                            continue;
                        }
                        if (dispatch_addconnection(client) == -1) {
                            close(client);
                            continue;
                        }
                    }
                }
            }
        }
    } else if (self->type == CONNECTION) {
        int work;
        struct timeval start, stop;

        while (self->keep_running) {
            work = 0;

            if (self->route_refresh_pending) {
                self->rtr = self->pending_rtr;
                self->pending_rtr = NULL;
                self->route_refresh_pending = 0;
                self->hold = 0;
            }

            gettimeofday(&start, NULL);
            pthread_rwlock_rdlock(&connectionslock);
            for (c = 0; c < connectionslen; c++) {
                conn = &(connections[c]);
                /* atomically try to "claim" this connection */
                if (!__sync_bool_compare_and_swap(&(conn->takenby), 0, self->id))
                    continue;
                if (self->hold && !conn->isaggr) {
                    conn->takenby = 0;
                    continue;
                }
                work += dispatch_connection(conn, self, start);
            }
            pthread_rwlock_unlock(&connectionslock);
            gettimeofday(&stop, NULL);
            self->ticks += timediff(start, stop);

            /* nothing done, avoid spinlocking */
            if (self->keep_running && work == 0) {
                gettimeofday(&start, NULL);
                usleep((100 + (rand() % 200)) * 1000);  /* 100ms - 300ms */
                gettimeofday(&stop, NULL);
                self->sleeps += timediff(start, stop);
            }
        }
    } else {
        logerr("huh? unknown self type!\n");
    }

    return NULL;
}
Exemple #2
0
/**
 * Reads from the queue and sends items to the remote server.  This
 * function is designed to be a thread.  Data sending is attempted to be
 * batched, but sent one by one to reduce loss on sending failure.
 * A connection with the server is maintained for as long as there is
 * data to be written.  As soon as there is none, the connection is
 * dropped if a timeout of DISCONNECT_WAIT_TIME exceeds.
 */
static void *
server_queuereader(void *d)
{
	server *self = (server *)d;
	size_t len;
	ssize_t slen;
	const char **metric = self->batch;
	struct timeval start, stop;
	struct timeval timeout;
	queue *squeue;
	char idle = 0;
	size_t *secpos = NULL;

	*metric = NULL;
	self->metrics = 0;
	self->ticks = 0;

#define FAIL_WAIT_TIME   6  /* 6 * 250ms = 1.5s */
#define DISCONNECT_WAIT_TIME   12  /* 12 * 250ms = 3s */
#define LEN_CRITICAL(Q)  (queue_free(Q) < self->bsize)
	self->running = 1;
	while (1) {
		if (queue_len(self->queue) == 0) {
			/* if we're idling, close the TCP connection, this allows us
			 * to reduce connections, while keeping the connection alive
			 * if we're writing a lot */
			gettimeofday(&start, NULL);
			if (self->ctype == CON_TCP && self->fd >= 0 &&
					idle++ > DISCONNECT_WAIT_TIME)
			{
				close(self->fd);
				self->fd = -1;
			}
			gettimeofday(&stop, NULL);
			self->ticks += timediff(start, stop);
			if (!self->keep_running)
				break;
			/* nothing to do, so slow down for a bit */
			usleep((200 + (rand() % 100)) * 1000);  /* 200ms - 300ms */
			/* if we are in failure mode, keep checking if we can
			 * connect, this avoids unnecessary queue moves */
			if (!self->failure)
				/* it makes no sense to try and do something, so skip */
				continue;
		} else if (self->secondariescnt > 0 &&
				(self->failure >= FAIL_WAIT_TIME ||
				 (!self->failover && LEN_CRITICAL(self->queue))))
		{
			size_t i;

			gettimeofday(&start, NULL);
			if (self->secondariescnt > 0) {
				if (secpos == NULL) {
					secpos = malloc(sizeof(size_t) * self->secondariescnt);
					if (secpos == NULL) {
						logerr("server: failed to allocate memory "
								"for secpos\n");
						gettimeofday(&stop, NULL);
						self->ticks += timediff(start, stop);
						continue;
					}
					for (i = 0; i < self->secondariescnt; i++)
						secpos[i] = i;
				}
				if (!self->failover) {
					/* randomise the failover list such that in the
					 * grand scheme of things we don't punish the first
					 * working server in the list to deal with all
					 * traffic meant for a now failing server */
					for (i = 0; i < self->secondariescnt; i++) {
						size_t n = rand() % (self->secondariescnt - i);
						if (n != i) {
							size_t t = secpos[n];
							secpos[n] = secpos[i];
							secpos[i] = t;
						}
					}
				}
			}

			/* offload data from our queue to our secondaries
			 * when doing so, observe the following:
			 * - avoid nodes that are in failure mode
			 * - avoid nodes which queues are >= critical_len
			 * when no nodes remain given the above
			 * - send to nodes which queue size < critical_len
			 * where there are no such nodes
			 * - do nothing (we will overflow, since we can't send
			 *   anywhere) */
			*metric = NULL;
			squeue = NULL;
			for (i = 0; i < self->secondariescnt; i++) {
				/* both conditions below make sure we skip ourself */
				if (self->secondaries[secpos[i]]->failure)
					continue;
				squeue = self->secondaries[secpos[i]]->queue;
				if (!self->failover && LEN_CRITICAL(squeue)) {
					squeue = NULL;
					continue;
				}
				if (*metric == NULL) {
					/* send up to batch size of our queue to this queue */
					len = queue_dequeue_vector(
							self->batch, self->queue, self->bsize);
					self->batch[len] = NULL;
					metric = self->batch;
				}

				for (; *metric != NULL; metric++)
					if (!queue_putback(squeue, *metric))
						break;
				/* try to put back stuff that didn't fit */
				for (; *metric != NULL; metric++)
					if (!queue_putback(self->queue, *metric))
						break;
			}
			for (; *metric != NULL; metric++) {
				if (mode & MODE_DEBUG)
					logerr("dropping metric: %s", *metric);
				free((char *)*metric);
				self->dropped++;
			}
			gettimeofday(&stop, NULL);
			self->ticks += timediff(start, stop);
			if (squeue == NULL) {
				/* we couldn't do anything, take it easy for a bit */
				if (self->failure)
					self->failure = 1;
				if (!self->keep_running)
					break;
				usleep((200 + (rand() % 100)) * 1000);  /* 200ms - 300ms */
			}
		} else if (self->failure) {
			if (!self->keep_running)
				break;
			usleep((200 + (rand() % 100)) * 1000);  /* 200ms - 300ms */
		}

		/* at this point we've got work to do, if we're instructed to
		 * shut down, however, try to get everything out of the door
		 * (until we fail, see top of this loop) */

		gettimeofday(&start, NULL);

		/* try to connect */
		if (self->fd < 0) {
			if (self->ctype == CON_PIPE) {
				int intconn[2];
				if (pipe(intconn) < 0) {
					if (!self->failure)
						logerr("failed to create pipe: %s\n", strerror(errno));
					self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
					continue;
				}
				dispatch_addconnection(intconn[0]);
				self->fd = intconn[1];
			} else if (self->ctype == CON_UDP) {
				if ((self->fd = socket(self->saddr->ai_family,
								self->saddr->ai_socktype,
								self->saddr->ai_protocol)) < 0)
				{
					if (!self->failure)
						logerr("failed to create udp socket: %s\n",
								strerror(errno));
					self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
					continue;
				}
				if (connect(self->fd,
						self->saddr->ai_addr, self->saddr->ai_addrlen) < 0)
				{
					if (!self->failure)
						logerr("failed to connect udp socket: %s\n",
								strerror(errno));
					close(self->fd);
					self->fd = -1;
					self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
					continue;
				}
			} else if (self->ctype == CON_FILE) {
				if ((self->fd = open(self->ip,
								O_WRONLY | O_APPEND | O_CREAT,
								S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0)
				{
					if (!self->failure)
						logerr("failed to open file '%s': %s\n",
								self->ip, strerror(errno));
					self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
					continue;
				}
			} else {
				int ret;
				int args;

				if ((self->fd = socket(self->saddr->ai_family,
								self->saddr->ai_socktype,
								self->saddr->ai_protocol)) < 0)
				{
					if (!self->failure)
						logerr("failed to create socket: %s\n",
								strerror(errno));
					self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
					continue;
				}

				/* put socket in non-blocking mode such that we can
				 * poll() (time-out) on the connect() call */
				args = fcntl(self->fd, F_GETFL, NULL);
				(void) fcntl(self->fd, F_SETFL, args | O_NONBLOCK);
				ret = connect(self->fd,
						self->saddr->ai_addr, self->saddr->ai_addrlen);

				if (ret < 0 && errno == EINPROGRESS) {
					/* wait for connection to succeed if the OS thinks
					 * it can succeed */
					struct pollfd ufds[1];
					ufds[0].fd = self->fd;
					ufds[0].events = POLLIN | POLLOUT;
					ret = poll(ufds, 1, self->iotimeout + (rand() % 100));
					if (ret == 0) {
						/* time limit expired */
						if (!self->failure)
							logerr("failed to connect() to "
									"%s:%u: Operation timed out\n",
									self->ip, self->port);
						close(self->fd);
						self->fd = -1;
						self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
						continue;
					} else if (ret < 0) {
						/* some select error occurred */
						if (!self->failure)
							logerr("failed to poll() for %s:%u: %s\n",
									self->ip, self->port, strerror(errno));
						close(self->fd);
						self->fd = -1;
						self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
						continue;
					} else {
						if (ufds[0].revents & POLLHUP) {
							if (!self->failure)
								logerr("failed to connect() for %s:%u: "
										"Hangup\n", self->ip, self->port);
							close(self->fd);
							self->fd = -1;
							self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
							continue;
						}
					}
				} else if (ret < 0) {
					if (!self->failure) {
						logerr("failed to connect() to %s:%u: %s\n",
								self->ip, self->port, strerror(errno));
						dispatch_check_rlimit_and_warn();
					}
					close(self->fd);
					self->fd = -1;
					self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
					continue;
				}

				/* make socket blocking again */
				(void) fcntl(self->fd, F_SETFL, args);
			}

			/* ensure we will break out of connections being stuck more
			 * quickly than the kernel would give up */
			timeout.tv_sec = 10;
			timeout.tv_usec = (rand() % 300) * 1000;
			setsockopt(self->fd, SOL_SOCKET, SO_SNDTIMEO,
					&timeout, sizeof(timeout));
#ifdef SO_NOSIGPIPE
			setsockopt(self->fd, SOL_SOCKET, SO_NOSIGPIPE, NULL, 0);
#endif
		}

		/* send up to batch size */
		len = queue_dequeue_vector(self->batch, self->queue, self->bsize);
		self->batch[len] = NULL;
		metric = self->batch;

		if (len != 0 && !self->keep_running) {
			/* be noisy during shutdown so we can track any slowing down
			 * servers, possibly preventing us to shut down */
			logerr("shutting down %s:%u: waiting for %zu metrics\n",
					self->ip, self->port, len + queue_len(self->queue));
		}

		if (len == 0 && self->failure) {
			/* if we don't have anything to send, we have at least a
			 * connection succeed, so assume the server is up again,
			 * this is in particular important for recovering this
			 * node by probes, to avoid starvation of this server since
			 * its queue is possibly being offloaded to secondaries */
			if (self->ctype != CON_UDP)
				logerr("server %s:%u: OK after probe\n", self->ip, self->port);
			self->failure = 0;
		}

		for (; *metric != NULL; metric++) {
			len = strlen(*metric);
			if ((slen = write(self->fd, *metric, len)) != len) {
				/* not fully sent, or failure, close connection
				 * regardless so we don't get synchonisation problems,
				 * partially sent data is an error for us, since we use
				 * blocking sockets, and hence partial sent is
				 * indication of a failure */
				if (self->ctype != CON_UDP && !self->failure)
					logerr("failed to write() to %s:%u: %s\n",
							self->ip, self->port,
							(slen < 0 ? strerror(errno) : "uncomplete write"));
				close(self->fd);
				self->fd = -1;
				self->failure += self->failure >= FAIL_WAIT_TIME ? 0 : 1;
				/* put back stuff we couldn't process */
				for (; *metric != NULL; metric++) {
					if (!queue_putback(self->queue, *metric)) {
						if (mode & MODE_DEBUG)
							logerr("server %s:%u: dropping metric: %s",
									self->ip, self->port, *metric);
						free((char *)*metric);
						self->dropped++;
					}
				}
				break;
			} else if (self->failure) {
				if (self->ctype != CON_UDP)
					logerr("server %s:%u: OK\n", self->ip, self->port);
				self->failure = 0;
			}
			free((char *)*metric);
			self->metrics++;
		}

		gettimeofday(&stop, NULL);
		self->ticks += timediff(start, stop);

		idle = 0;
	}
	self->running = 0;

	if (self->fd >= 0)
		close(self->fd);
	return NULL;
}
Exemple #3
0
/**
 * pthread compatible routine that handles connections and processes
 * whatever comes in on those.
 */
static void *
dispatch_runner(void *arg)
{
	dispatcher *self = (dispatcher *)arg;
	connection *conn;
	int work;
	int c;

	self->metrics = 0;
	self->ticks = 0;
	self->state = SLEEPING;

	if (self->type == LISTENER) {
		fd_set fds;
		int maxfd = -1;
		struct timeval tv;
		while (self->keep_running) {
			FD_ZERO(&fds);
			tv.tv_sec = 0;
			tv.tv_usec = 250 * 1000;  /* 250 ms */
			for (c = 0; c < sizeof(listeners) / sizeof(connection *); c++) {
				conn = listeners[c];
				if (conn == NULL)
					break;
				FD_SET(conn->sock, &fds);
				if (conn->sock > maxfd)
					maxfd = conn->sock;
			}
			if (select(maxfd + 1, &fds, NULL, NULL, &tv) > 0) {
				for (c = 0; c < sizeof(listeners) / sizeof(connection *); c++) {
					conn = listeners[c];
					if (conn == NULL)
						break;
					if (FD_ISSET(conn->sock, &fds)) {
						int client;
						struct sockaddr addr;
						socklen_t addrlen = sizeof(addr);

						if ((client = accept(conn->sock, &addr, &addrlen)) < 0)
						{
							logerr("dispatch: failed to "
									"accept() new connection: %s\n",
									strerror(errno));
							dispatch_check_rlimit_and_warn();
							continue;
						}
						if (dispatch_addconnection(client) == -1) {
							close(client);
							continue;
						}
					}
				}
			}
		}
	} else if (self->type == CONNECTION) {
		while (self->keep_running) {
			work = 0;
			if (self->route_refresh_pending) {
				self->routes = self->pending_routes;
				self->pending_routes = NULL;
				self->route_refresh_pending = 0;
			}

			pthread_rwlock_rdlock(&connectionslock);
			for (c = 0; c < connectionslen; c++) {
				conn = &(connections[c]);
				/* atomically try to "claim" this connection */
				if (!__sync_bool_compare_and_swap(&(conn->takenby), 0, self->id))
					continue;
				self->state = RUNNING;
				work += dispatch_connection(conn, self);
			}
			pthread_rwlock_unlock(&connectionslock);

			self->state = SLEEPING;
			/* nothing done, avoid spinlocking */
			if (self->keep_running && work == 0)
				usleep((100 + (rand() % 200)) * 1000);  /* 100ms - 300ms */
		}
	} else {
		logerr("huh? unknown self type!\n");
	}

	return NULL;
}