Example #1
0
/* destroy a worker */
void socket_worker_destroy(socket_worker_t * worker)
{
    uint32_t was_stopping = RELAY_ATOMIC_OR(worker->base.stopping, WORKER_STOPPING);

    /* Avoid race between worker_pool_reload_static and worker_pool_destroy_static().
     *
     * TODO: Another possible solution for this race could be a destructor thread
     * that waits on a semaphore and then destroys all.  Possible flaw: what is
     * a thread doesn't decrement the semaphore?
     *
     * Note that similar solution is used also by the graphite worker. */
    if (was_stopping & WORKER_STOPPING)
        return;

    pthread_join(worker->base.tid, NULL);

    LOCK_DESTROY(&worker->lock);

    free(worker->base.arg);
    free(worker);
}
Example #2
0
void set_abort_bits(uint32_t v) {
    RELAY_ATOMIC_OR(ABORT, v);
}
Example #3
0
/* initialize a worker safely */
socket_worker_t *socket_worker_create(const char *arg, const config_t * config)
{
    socket_worker_t *worker = calloc_or_fatal(sizeof(*worker));
    disk_writer_t *disk_writer = calloc_or_fatal(sizeof(disk_writer_t));

    if (worker == NULL || disk_writer == NULL)
        return NULL;

    int create_err;

    worker->base.config = config;
    worker->base.arg = strdup(arg);

    worker->exists = 1;

    if (!socketize(arg, &worker->base.output_socket, IPPROTO_TCP, RELAY_CONN_IS_OUTBOUND, "worker")) {
        FATAL("Failed to socketize worker");
        return NULL;
    }

    worker->disk_writer = disk_writer;

    disk_writer->base.config = config;
    disk_writer->counters = &worker->counters;
    disk_writer->recents = &worker->recents;
    disk_writer->totals = &worker->totals;

#define DECAY_1MIN 60
#define DECAY_5MIN (5 * DECAY_1MIN)
#define DECAY_15MIN (15 * DECAY_1MIN)

    rates_init(&worker->rates[0], DECAY_1MIN);
    rates_init(&worker->rates[1], DECAY_5MIN);
    rates_init(&worker->rates[2], DECAY_15MIN);

    LOCK_INIT(&worker->lock);

    /* setup spill_path */
    int wrote = snprintf(disk_writer->spill_path, PATH_MAX, "%s/event_relay.%s", config->spill_root,
                         worker->base.output_socket.arg_clean);
    if (wrote < 0 || wrote >= PATH_MAX) {
        FATAL("Failed to construct spill_path %s", disk_writer->spill_path);
        return NULL;
    }

    /* Create the disk_writer before we create the main worker.
     * We do this because the disk_writer only consumes things
     * that have been handled by the main worker, and vice versa
     * when the main worker fails to send then it might want to give
     * the item to the disk worker. If we did it the other way round
     * we might have something to assign to the disk worker but no
     * disk worker to assign it to.
     */
    create_err = pthread_create(&disk_writer->base.tid, NULL, disk_writer_thread, disk_writer);
    if (create_err) {
        FATAL("Failed to create disk worker, pthread error: %d", create_err);
        return NULL;
    }

    /* and finally create the thread */
    create_err = pthread_create(&worker->base.tid, NULL, socket_worker_thread, worker);
    if (create_err) {
        int join_err;

        /* we died, so shut down our "pet" disk worker, and then exit with a message */
        RELAY_ATOMIC_OR(disk_writer->base.stopping, WORKER_STOPPING);

        /* have to handle failure of the shutdown too */
        join_err = pthread_join(disk_writer->base.tid, NULL);

        if (join_err) {
            FATAL
                ("Failed to create socket worker, pthread error: %d, and also failed to join disk worker, pthread error: %d",
                 create_err, join_err);
        } else {
            FATAL("Failed to create socket worker, pthread error: %d, disk worker shut down ok", create_err);
        }
        return NULL;
    }

    /* return the worker */
    return worker;
}
Example #4
0
/* the main loop for the socket worker process */
void *socket_worker_thread(void *arg)
{
    socket_worker_t *self = (socket_worker_t *) arg;

    queue_t *main_queue = &self->queue;
    relay_socket_t *sck = NULL;

    queue_t private_queue;
    queue_t spill_queue;

    memset(&private_queue, 0, sizeof(queue_t));
    memset(&spill_queue, 0, sizeof(queue_t));

    const config_t *config = self->base.config;

    int join_err;

#define RATE_UPDATE_PERIOD 15
    time_t last_rate_update = 0;

    while (!RELAY_ATOMIC_READ(self->base.stopping)) {
        time_t now = time(NULL);

        if (!sck) {
            SAY("Opening forwarding socket");
            sck = open_output_socket_eventually(&self->base);
            if (sck == NULL || !(sck->type == SOCK_DGRAM || sck->type == SOCK_STREAM)) {
                FATAL_ERRNO("Failed to open forwarding socket");
                break;
            }
            connected_inc();
        }

        long since_rate_update = now - last_rate_update;
        if (since_rate_update >= RATE_UPDATE_PERIOD) {
            last_rate_update = now;
            update_rates(&self->rates[0], &self->totals, since_rate_update);
            update_rates(&self->rates[1], &self->totals, since_rate_update);
            update_rates(&self->rates[2], &self->totals, since_rate_update);
        }

        /* if we dont have anything in our local queue we need to hijack the main one */
        if (private_queue.head == NULL) {
            /* hijack the queue - copy the queue state into our private copy
             * and then reset the queue state to empty. So the formerly
             * shared queue is now private. We only do this if necessary.
             */
            if (!queue_hijack(main_queue, &private_queue, &GLOBAL.pool.lock)) {
                /* nothing to do, so sleep a while and redo the loop */
                worker_wait_millisec(config->polling_interval_millisec);
                continue;
            }
        }

        RELAY_ATOMIC_INCREMENT(self->counters.received_count, private_queue.count);

        /* ok, so we should have something in our queue to process */
        if (private_queue.head == NULL) {
            WARN("Empty private queue");
            break;
        }

        ssize_t wrote = 0;
        if (!process_queue(self, sck, &private_queue, &spill_queue, &wrote)) {
            if (!RELAY_ATOMIC_READ(self->base.stopping)) {
                WARN("Closing forwarding socket");
                close(sck->socket);
                sck = NULL;
                connected_dec();
            }
        }

        accumulate_and_clear_stats(&self->counters, &self->recents, &self->totals);
    }

    if (control_is(RELAY_STOPPING)) {
        SAY("Socket worker stopping, trying forwarding flush");
        stats_count_t old_sent = self->totals.sent_count;
        stats_count_t old_spilled = self->totals.spilled_count;
        stats_count_t old_dropped = self->totals.dropped_count;
        if (sck) {
            ssize_t wrote = 0;
            if (!process_queue(self, sck, &private_queue, &spill_queue, &wrote)) {
                WARN_ERRNO("Forwarding flush failed");
            }
            accumulate_and_clear_stats(&self->counters, &self->recents, &self->totals);
            SAY("Forwarding flush forwarded %zd bytes in %llu events, spilled %llu events, dropped %llu events ",
                wrote, (unsigned long long) (self->totals.sent_count - old_sent),
                (unsigned long long) (self->totals.spilled_count - old_spilled),
                (unsigned long long) (self->totals.dropped_count - old_dropped));
        } else {
            WARN("No forwarding socket to flush to");
        }
        SAY("Socket worker spilling any remaining events to disk");
        stats_count_t spilled = spill_all(self, &private_queue, &spill_queue);
        SAY("Socket worker spilled %llu events to disk", (unsigned long long) spilled);
    } else {
        accumulate_and_clear_stats(&self->counters, &self->recents, &self->totals);
    }

    SAY("worker[%s] in its lifetime received %lu sent %lu spilled %lu dropped %lu",
        (sck ? sck->to_string : self->base.arg),
        (unsigned long) RELAY_ATOMIC_READ(self->totals.received_count),
        (unsigned long) RELAY_ATOMIC_READ(self->totals.sent_count),
        (unsigned long) RELAY_ATOMIC_READ(self->totals.spilled_count),
        (unsigned long) RELAY_ATOMIC_READ(self->totals.dropped_count));

    if (sck) {
        close(sck->socket);
        connected_dec();
    }

    /* we are done so shut down our "pet" disk worker, and then exit with a message */
    RELAY_ATOMIC_OR(self->disk_writer->base.stopping, WORKER_STOPPING);

    join_err = pthread_join(self->disk_writer->base.tid, NULL);

    if (join_err)
        FATAL("shutting down disk_writer thread error: pthread error %d", join_err);

    free(self->disk_writer);

    return NULL;
}