Exemplo n.º 1
0
void process_ping(struct thorium_actor *self, struct thorium_message *message)
{
    int count;
    char *buffer;
    int buffer_size;
    uint64_t *bucket;
    uint64_t expected_checksum;
    uint64_t actual_checksum;
    struct process *concrete_self;

    concrete_self = (struct process *)thorium_actor_concrete_actor(self);
    buffer = thorium_message_buffer(message);
    count = thorium_message_count(message);
    buffer_size = count - sizeof(expected_checksum);
    bucket = (uint64_t *)(buffer + buffer_size);
    expected_checksum = *bucket;
    actual_checksum = core_hash_data_uint64_t(buffer, buffer_size, SEED);

    if (expected_checksum != actual_checksum) {
        printf("TRANSPORT FAILED source: %d (%d) destination: %d (%d) tag: ACTION_PING count: %d"
                        " expected_checksum: %" PRIu64 " actual_checksum: %" PRIu64 "\n",
                        thorium_message_source(message),
                        thorium_message_source_node(message),
                        thorium_message_destination(message),
                        thorium_message_destination_node(message),
                        count,
                        expected_checksum, actual_checksum);

        ++concrete_self->failed;
    } else {
        ++concrete_self->passed;
    }

    thorium_actor_send_reply_empty(self, ACTION_PING_REPLY);
}
Exemplo n.º 2
0
void thorium_message_init_copy(struct thorium_message *self, struct thorium_message *old_message)
{
    thorium_message_init(self,
                    thorium_message_action(old_message),
                    thorium_message_count(old_message),
                    thorium_message_buffer(old_message));

    thorium_message_set_source(self,
                    thorium_message_source(old_message));
    thorium_message_set_destination(self,
                    thorium_message_destination(old_message));
}
Exemplo n.º 3
0
int thorium_worker_pool_give_message_to_actor(struct thorium_worker_pool *pool, struct thorium_message *message)
{
    int destination;
    struct thorium_actor *actor;
    struct thorium_worker *affinity_worker;
    int worker_index;
    int name;
    int dead;

    /*
    void *buffer;

    buffer = thorium_message_buffer(message);
    */
    destination = thorium_message_destination(message);
    actor = thorium_node_get_actor_from_name(pool->node, destination);

    if (actor == NULL) {
#ifdef THORIUM_WORKER_POOL_DEBUG_DEAD_CHANNEL
        printf("DEAD LETTER CHANNEL...\n");
#endif

        core_fast_queue_enqueue(&pool->messages_for_triage, message);

        return 0;
    }

    dead = thorium_actor_dead(actor);

    /* If the actor is dead, don't use it.
     */
    if (dead) {

        core_fast_queue_enqueue(&pool->messages_for_triage, message);

        return 0;
    }

    name = thorium_actor_name(actor);

    /* give the message to the actor
     */
    if (!thorium_actor_enqueue_mailbox_message(actor, message)) {

#ifdef THORIUM_WORKER_POOL_DEBUG_MESSAGE_BUFFERING
        printf("DEBUG897 could not enqueue message, buffering...\n");
#endif

        core_fast_queue_enqueue(&pool->inbound_message_queue_buffer, message);

    } else {
        /*
         * At this point, the message has been pushed to the actor.
         * Now, the actor must be scheduled on a worker.
         */
/*
        printf("DEBUG message was enqueued in actor mailbox\n");
        */

        /* Check if the actor is already assigned to a worker
         */
        worker_index = thorium_balancer_get_actor_worker(&pool->balancer, name);

        /* If not, ask the scheduler to assign the actor to a worker
         */
        if (worker_index < 0) {

            thorium_worker_pool_assign_worker_to_actor(pool, name);
            worker_index = thorium_balancer_get_actor_worker(&pool->balancer, name);
        }

        affinity_worker = thorium_worker_pool_get_worker(pool, worker_index);

        /*
        printf("DEBUG actor has an assigned worker\n");
        */

        /*
         * Push the actor on the scheduling queue of the worker.
         * If that fails, queue the actor.
         */
        if (!thorium_worker_enqueue_actor(affinity_worker, actor)) {
            core_fast_queue_enqueue(&pool->scheduled_actor_queue_buffer, &actor);
        }
    }

    return 1;
}
Exemplo n.º 4
0
/*
 * Returns 1 if the message was multiplexed.
 *
 * This is O(1) in regard to the number of thorium nodes.
 */
int thorium_message_multiplexer_multiplex(struct thorium_message_multiplexer *self,
                struct thorium_message *message)
{
    /*
     * If buffer is full, use thorium_node_send_with_transport
     *
     * get count
     *
     * if count is below or equal to the threshold
     *      multiplex the message.
     *      return 1
     *
     * return 0
     */

    int count;
    int current_size;
    int maximum_size;
    int action;
    struct core_memory_pool *pool;
    void *new_buffer;
    int new_count;
    void *buffer;
    int destination_node;
    int destination_actor;
    int new_size;
    int required_size;
    struct thorium_multiplexed_buffer *real_multiplexed_buffer;
    uint64_t time;
    int next_node_in_route;
    int source_node;
    int current_node;

#ifdef DEBUG_MULTIPLEXER
    thorium_printf("multiplex\n");
    thorium_message_print(message);
#endif

    if (CORE_BITMAP_GET_FLAG(self->flags, FLAG_DISABLED)) {
        return 0;
    }

    action = thorium_message_action(message);

    CORE_DEBUGGER_ASSERT(action != ACTION_INVALID);

#ifdef THORIUM_MULTIPLEXER_USE_ACTIONS_TO_SKIP
    /*
     * Don't multiplex already-multiplexed messages.
     */
    if (thorium_multiplexer_policy_is_action_to_skip(self->policy, action)) {
        return 0;
    }
#endif

#ifdef CONFIG_MULTIPLEXER_USE_DECISION_MAKER
    thorium_message_multiplexer_update_timeout(self);
#endif

    ++self->original_message_count;

    count = thorium_message_count(message);

    destination_node = thorium_message_destination_node(message);

    source_node = message->routing_source;
    current_node = self->node->name;
    next_node_in_route = thorium_router_get_next_rank_in_route(&self->router,
                    source_node, current_node, destination_node);

    /*
    thorium_message_print(message);
    thorium_printf("router: source_node %d current_node %d next_node_in_route %d"
                    " destination_node %d\n",
                    source_node, current_node,
                    next_node_in_route, destination_node);
                    */

#ifdef CONFIG_USE_TOPOLOGY_AWARE_AGGREGATION
    /*
     * The next node in the route for this message is
     * next_node_in_route.
     */
    destination_node = next_node_in_route;
#endif

    CORE_DEBUGGER_ASSERT(source_node >= 0);

    real_multiplexed_buffer = core_vector_at(&self->buffers, destination_node);

    CORE_DEBUGGER_ASSERT(real_multiplexed_buffer != NULL);

    required_size = thorium_multiplexed_buffer_required_size(real_multiplexed_buffer, count);

    buffer = thorium_message_buffer(message);
    destination_actor = thorium_message_destination(message);

#ifdef DEBUG_MULTIPLEXER
    thorium_printf("DEBUG multiplex count %d required_size %d action %x\n",
                    count, required_size, action);
#endif

    /*
     * Don't multiplex non-actor messages.
     */
    if (destination_actor == THORIUM_ACTOR_NOBODY) {
        return 0;
    }

#ifdef CORE_DEBUGGER_ASSERT
    if (real_multiplexed_buffer == NULL) {
        thorium_printf("Error action %d destination_node %d destination_actor %d\n", action, destination_node,
                        destination_actor);
    }
#endif

    current_size = thorium_multiplexed_buffer_current_size(real_multiplexed_buffer);
    maximum_size = thorium_multiplexed_buffer_maximum_size(real_multiplexed_buffer);

    /*
     * Don't multiplex large messages.
     */
    if (required_size > maximum_size) {

#ifdef DEBUG_MULTIPLEXER
        thorium_printf("too large required_size %d maximum_size %d\n", required_size, maximum_size);
#endif
        return 0;
    }

    /*
    thorium_printf("MULTIPLEX_MESSAGE\n");
    */

    new_size = current_size + required_size;

    /*
     * Flush now if there is no space left for the <required_size> bytes
     */
    if (new_size > maximum_size) {

#ifdef DEBUG_MULTIPLEXER
        thorium_printf("thorium_message_multiplexer: must FLUSH thorium_message_multiplexer_multiplex required_size %d new_size %d maximum_size %d\n",
                    required_size, new_size, maximum_size);
#endif

        thorium_message_multiplexer_flush(self, destination_node, FORCE_YES_SIZE);
        current_size = thorium_multiplexed_buffer_current_size(real_multiplexed_buffer);

        CORE_DEBUGGER_ASSERT(current_size == 0);
    }

    time = core_timer_get_nanoseconds(&self->timer);

    /*
     * If the buffer is empty before adding the data, it means that it is not
     * in the list of buffers with content and it must be added.
     */
    if (current_size == 0) {

        thorium_multiplexed_buffer_set_time(real_multiplexed_buffer, time);

#ifdef THORIUM_MULTIPLEXER_TRACK_BUFFERS_WITH_CONTENT
        core_set_add(&self->buffers_with_content, &destination_node);
#endif

        /*
         * Add it to the timeline.
         */

#ifdef THORIUM_MULTIPLEXER_USE_TREE
        core_red_black_tree_add_key_and_value(&self->timeline, &time, &destination_node);

#elif defined(THORIUM_MULTIPLEXER_USE_HEAP)
        core_binary_heap_insert(&self->timeline, &time, &destination_node);
#elif defined(THORIUM_MULTIPLEXER_USE_QUEUE)
        core_queue_enqueue(&self->timeline, &destination_node);
#endif

    }

    /*
     * The allocation of buffer is lazy.
     * The current worker is an exporter of small message for the destination
     * "destination_node".
     */
    if (thorium_multiplexed_buffer_buffer(real_multiplexed_buffer) == NULL) {
        pool = thorium_worker_get_outbound_message_memory_pool(self->worker);

        new_count = self->buffer_size_in_bytes + THORIUM_MESSAGE_METADATA_SIZE;
        new_buffer = core_memory_pool_allocate(pool, new_count);

        thorium_multiplexed_buffer_set_buffer(real_multiplexed_buffer, new_buffer);
    }

    /*
    thorium_printf("DEBUG worker_latency %d ns\n",
                    thorium_worker_latency(self->worker));
                    */

    thorium_multiplexed_buffer_append(real_multiplexed_buffer, count, buffer, time);

    /*
     * Try to flush. This only flushes something if the buffer is full.
     */

    if (thorium_message_multiplexer_buffer_is_ready(self, real_multiplexed_buffer)) {

        /*
         * Try to flush here too. This is required in order to satisfy the
         * technical requirement of a DOA limit.
         *
         * Obviously, don't flush if there is some outbound traffic congestion.
         * Otherwise, there will be too many messages on the network.
         */
        if (!thorium_worker_has_outbound_traffic_congestion(self->worker)) {
            thorium_message_multiplexer_flush(self, destination_node, FORCE_YES_SIZE);
        }
    }

    /*
     * Verify invariant.
     */
    CORE_DEBUGGER_ASSERT(thorium_multiplexed_buffer_current_size(real_multiplexed_buffer)<= maximum_size);

    /*
     * Inject the buffer into the worker too.
     */
    return 1;
}
Exemplo n.º 5
0
void thorium_worker_send(struct thorium_worker *worker, struct thorium_message *message)
{
    void *buffer;
    int count;
    void *old_buffer;

    old_buffer = thorium_message_buffer(message);

    /*
     * Allocate a buffer if the actor provided a NULL buffer or if it
     * provided its own buffer.
     */
    if (old_buffer == NULL
                    || old_buffer != worker->zero_copy_buffer) {

        count = thorium_message_count(message);
        /* use slab allocator */
        buffer = thorium_worker_allocate(worker, count);

        /* according to
         * http://stackoverflow.com/questions/3751797/can-i-call-core_memory_copy-and-core_memory_move-with-number-of-bytes-set-to-zero
         * memcpy works with a count of 0, but the addresses must be valid
         * nonetheless
         *
         * Copy the message data.
         */
        if (count > 0) {

#ifdef DISPLAY_COPY_WARNING
            printf("thorium_worker: Warning, not using zero-copy path, action %x count %d source %d destination %d\n",
                            thorium_message_action(message), count, thorium_message_source(message),
                            thorium_message_destination(message));
#endif
            core_memory_copy(buffer, old_buffer, count);
        }

        thorium_message_set_buffer(message, buffer);
    }

    /*
     * Always write metadata.
     */
    thorium_message_write_metadata(message);

#ifdef THORIUM_WORKER_DEBUG_INJECTION
    ++worker->counter_allocated_outbound_buffers;
#endif

#ifdef THORIUM_WORKER_DEBUG_MEMORY
    printf("ALLOCATE %p\n", buffer);
#endif

#ifdef THORIUM_WORKER_DEBUG
    printf("[thorium_worker_send] allocated %i bytes (%i + %i) for buffer %p\n",
                    all, count, metadata_size, buffer);

    printf("thorium_worker_send old buffer: %p\n",
                    thorium_message_buffer(message));
#endif

#ifdef THORIUM_BUG_594
    if (thorium_message_action(&copy) == 30202) {
        printf("DEBUG-594 thorium_worker_send\n");
        thorium_message_print(&copy);
    }
#endif

#ifdef THORIUM_WORKER_DEBUG_20140601
    if (thorium_message_action(message) == 1100) {
        printf("DEBUG thorium_worker_send 1100\n");
    }
#endif

    /* if the destination is on the same node,
     * handle that directly here to avoid locking things
     * with the node.
     */

    thorium_worker_enqueue_message(worker, message);
    worker->zero_copy_buffer = NULL;
}
Exemplo n.º 6
0
void thorium_worker_run(struct thorium_worker *worker)
{
    struct thorium_actor *actor;
    struct thorium_message other_message;

#ifdef THORIUM_NODE_INJECT_CLEAN_WORKER_BUFFERS
    void *buffer;
#endif

#ifdef THORIUM_NODE_ENABLE_INSTRUMENTATION
    time_t current_time;
    int elapsed;
    int period;
    uint64_t current_nanoseconds;
    uint64_t elapsed_nanoseconds;
#endif

#ifdef THORIUM_WORKER_DEBUG
    int tag;
    int destination;
    struct thorium_message *message;
#endif

#ifdef THORIUM_WORKER_ENABLE_LOCK
    thorium_worker_lock(worker);
#endif

#ifdef THORIUM_NODE_ENABLE_INSTRUMENTATION
    period = THORIUM_NODE_LOAD_PERIOD;
    current_time = time(NULL);

    elapsed = current_time - worker->last_report;

    if (elapsed >= period) {

        current_nanoseconds = core_timer_get_nanoseconds(&worker->timer);

#ifdef THORIUM_WORKER_DEBUG_LOAD
        printf("DEBUG Updating load report\n");
#endif
        elapsed_nanoseconds = current_nanoseconds - worker->epoch_start_in_nanoseconds;

        if (elapsed_nanoseconds > 0) {
            worker->epoch_load = (0.0 + worker->epoch_used_nanoseconds) / elapsed_nanoseconds;
            worker->epoch_used_nanoseconds = 0;
            worker->last_wake_up_count = core_thread_get_wake_up_count(&worker->thread);

            /* \see http://stackoverflow.com/questions/9657993/negative-zero-in-c
             */
            if (worker->epoch_load == 0) {
                worker->epoch_load = 0;
            }

            worker->epoch_start_in_nanoseconds = current_nanoseconds;
            worker->last_report = current_time;
        }

#ifdef THORIUM_WORKER_PRINT_SCHEDULING_QUEUE

        /*
        if (thorium_node_name(worker->node) == 0
                        && worker->name == 0) {
                        */

        thorium_scheduler_print(&worker->scheduler,
                        thorium_node_name(worker->node),
                        worker->name);
            /*
        }
        */
#endif

        if (core_bitmap_get_bit_uint32_t(&worker->flags, FLAG_DEBUG_ACTORS)) {
            thorium_worker_print_actors(worker, NULL);
        }
    }
#endif

#ifdef THORIUM_WORKER_DEBUG
    if (core_bitmap_get_bit_uint32_t(&worker->flags, FLAG_DEBUG)) {
        printf("DEBUG worker/%d thorium_worker_run\n",
                        thorium_worker_name(worker));
    }
#endif

    /* check for messages in inbound FIFO */
    if (thorium_worker_dequeue_actor(worker, &actor)) {

#ifdef THORIUM_WORKER_DEBUG
        message = biosal_work_message(&work);
        tag = thorium_message_action(message);
        destination = thorium_message_destination(message);

        if (tag == ACTION_ASK_TO_STOP) {
            printf("DEBUG pulled ACTION_ASK_TO_STOP for %d\n",
                            destination);
        }
#endif

        /*
         * Update the priority of the actor
         * before starting the timer because this is part of the
         * runtime system (RTS).
         */

#ifdef THORIUM_UPDATE_SCHEDULING_PRIORITIES
        thorium_priority_assigner_update(&worker->scheduler, actor);
#endif

#ifdef THORIUM_NODE_ENABLE_INSTRUMENTATION
        core_timer_start(&worker->timer);
#endif

        core_bitmap_set_bit_uint32_t(&worker->flags, FLAG_BUSY);

        /*
         * Dispatch message to a worker
         */
        thorium_worker_work(worker, actor);

        core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_BUSY);

#ifdef THORIUM_NODE_ENABLE_INSTRUMENTATION
        core_timer_stop(&worker->timer);

        elapsed_nanoseconds = core_timer_get_elapsed_nanoseconds(&worker->timer);

        if (elapsed_nanoseconds >= THORIUM_GRANULARITY_WARNING_THRESHOLD) {
        }

        worker->epoch_used_nanoseconds += elapsed_nanoseconds;
        worker->loop_used_nanoseconds += elapsed_nanoseconds;
        worker->scheduling_epoch_used_nanoseconds += elapsed_nanoseconds;

        worker->last_elapsed_nanoseconds = elapsed_nanoseconds;
#endif
    }

    /* queue buffered message
     */
    if (core_fast_queue_dequeue(&worker->outbound_message_queue_buffer, &other_message)) {

        if (!core_fast_ring_push_from_producer(&worker->outbound_message_queue, &other_message)) {

#ifdef SHOW_FULL_RING_WARNINGS
            printf("thorium_worker: Warning: ring is full => outbound_message_queue\n");
#endif

            core_fast_queue_enqueue(&worker->outbound_message_queue_buffer, &other_message);
        }
    }

#ifdef THORIUM_NODE_INJECT_CLEAN_WORKER_BUFFERS
    /*
     * Free outbound buffers, if any
     */

    if (thorium_worker_fetch_clean_outbound_buffer(worker, &buffer)) {
        core_memory_pool_free(&worker->outbound_message_memory_pool, buffer);

#ifdef THORIUM_WORKER_DEBUG_INJECTION
        ++worker->counter_freed_outbound_buffers_from_other_workers;
#endif
    }
#endif

    /*
     * Transfer messages for triage
     */

    if (core_fast_queue_dequeue(&worker->clean_message_queue_for_triage, &other_message)) {

        CORE_DEBUGGER_ASSERT(thorium_message_buffer(&other_message) != NULL);
        thorium_worker_enqueue_message_for_triage(worker, &other_message);
    }

#ifdef THORIUM_WORKER_ENABLE_LOCK
    thorium_worker_unlock(worker);
#endif
}
Exemplo n.º 7
0
int thorium_balancer_select_worker_least_busy(
                struct thorium_balancer *self, int *worker_score)
{
    int to_check;
    int score;
    int best_score;
    struct thorium_worker *worker;
    struct thorium_worker *best_worker;
    int selected_worker;

#if 0
    int last_worker_score;
#endif

#ifdef THORIUM_WORKER_DEBUG
    int tag;
    int destination;
    struct thorium_message *message;
#endif

    best_worker = NULL;
    best_score = 99;

    to_check = THORIUM_SCHEDULER_WORK_SCHEDULING_WINDOW;

    while (to_check--) {

        /*
         * get the worker to test for this iteration.
         */
        worker = thorium_worker_pool_get_worker(self->pool, self->worker_for_work);

        score = thorium_worker_get_epoch_load(worker);

#ifdef THORIUM_WORKER_POOL_DEBUG_ISSUE_334
        if (score >= THORIUM_WORKER_WARNING_THRESHOLD
                        && (self->last_scheduling_warning == 0
                             || score >= self->last_scheduling_warning + THORIUM_WORKER_WARNING_THRESHOLD_STRIDE)) {
            printf("Warning: node %d worker %d has a scheduling score of %d\n",
                            thorium_node_name(thorium_worker_pool_get_node(self->pool)),
                            self->worker_for_work, score);

            self->last_scheduling_warning = score;
        }
#endif

        /* if the worker is not busy and it has no work to do,
         * select it right away...
         */
        if (score == 0) {
            best_worker = worker;
            best_score = 0;
            break;
        }

        /* Otherwise, test the worker
         */
        if (best_worker == NULL || score < best_score) {
            best_worker = worker;
            best_score = score;
        }

        /*
         * assign the next worker
         */
        self->worker_for_work = thorium_worker_pool_next_worker(self->pool, self->worker_for_work);
    }

#ifdef THORIUM_WORKER_POOL_DEBUG
    message = biosal_work_message(work);
    tag = thorium_message_action(message);
    destination = thorium_message_destination(message);

    if (tag == ACTION_ASK_TO_STOP) {
        printf("DEBUG dispatching ACTION_ASK_TO_STOP for actor %d to worker %d\n",
                        destination, *start);
    }


#endif

    selected_worker = self->worker_for_work;

    /*
     * assign the next worker
     */
    self->worker_for_work = thorium_worker_pool_next_worker(self->pool, self->worker_for_work);

    *worker_score = best_score;
    /* This is a best effort algorithm
     */
    return selected_worker;
}