void thorium_balancer_update_actor_production(struct thorium_balancer *self, struct thorium_actor *actor) { int messages; int name; if (actor == NULL) { return; } messages = thorium_actor_get_sum_of_received_messages(actor); name = thorium_actor_name(actor); if (!core_map_update_value(&self->last_actor_received_messages, &name, &messages)) { core_map_add_value(&self->last_actor_received_messages, &name, &messages); } }
void thorium_worker_print_actors(struct thorium_worker *worker, struct thorium_balancer *scheduler) { struct core_map_iterator iterator; int name; int count; struct thorium_actor *actor; int producers; int consumers; int received; int difference; int script; struct core_map distribution; int frequency; struct thorium_script *script_object; int dead; int node_name; int worker_name; int previous_amount; node_name = thorium_node_name(worker->node); worker_name = worker->name; core_map_iterator_init(&iterator, &worker->actors); printf("node/%d worker/%d %d queued messages, received: %d busy: %d load: %f ring: %d scheduled actors: %d/%d\n", node_name, worker_name, thorium_worker_get_scheduled_message_count(worker), thorium_worker_get_sum_of_received_actor_messages(worker), thorium_worker_is_busy(worker), thorium_worker_get_scheduling_epoch_load(worker), core_fast_ring_size_from_producer(&worker->actors_to_schedule), thorium_scheduler_size(&worker->scheduler), (int)core_map_size(&worker->actors)); core_map_init(&distribution, sizeof(int), sizeof(int)); while (core_map_iterator_get_next_key_and_value(&iterator, &name, NULL)) { actor = thorium_node_get_actor_from_name(worker->node, name); if (actor == NULL) { continue; } dead = thorium_actor_dead(actor); if (dead) { continue; } count = thorium_actor_get_mailbox_size(actor); received = thorium_actor_get_sum_of_received_messages(actor); producers = core_map_size(thorium_actor_get_received_messages(actor)); consumers = core_map_size(thorium_actor_get_sent_messages(actor)); previous_amount = 0; core_map_get_value(&worker->actor_received_messages, &name, &previous_amount); difference = received - previous_amount;; if (!core_map_update_value(&worker->actor_received_messages, &name, &received)) { core_map_add_value(&worker->actor_received_messages, &name, &received); } printf(" [%s/%d] mailbox: %d received: %d (+%d) producers: %d consumers: %d\n", thorium_actor_script_name(actor), name, count, received, difference, producers, consumers); script = thorium_actor_script(actor); if (core_map_get_value(&distribution, &script, &frequency)) { ++frequency; core_map_update_value(&distribution, &script, &frequency); } else { frequency = 1; core_map_add_value(&distribution, &script, &frequency); } } /*printf("\n");*/ core_map_iterator_destroy(&iterator); core_map_iterator_init(&iterator, &distribution); printf("node/%d worker/%d Frequency list\n", node_name, worker_name); while (core_map_iterator_get_next_key_and_value(&iterator, &script, &frequency)) { script_object = thorium_node_find_script(worker->node, script); CORE_DEBUGGER_ASSERT(script_object != NULL); printf("node/%d worker/%d Frequency %s => %d\n", node_name, worker->name, thorium_script_name(script_object), frequency); } core_map_iterator_destroy(&iterator); core_map_destroy(&distribution); }
/* This can only be called from the CONSUMER */ int thorium_worker_dequeue_actor(struct thorium_worker *worker, struct thorium_actor **actor) { int value; int name; struct thorium_actor *other_actor; int other_name; int operations; int status; int mailbox_size; operations = 4; other_actor = NULL; /* Move an actor from the ring to the real actor scheduling queue */ while (operations-- && core_fast_ring_pop_from_consumer(&worker->actors_to_schedule, &other_actor)) { #ifdef CORE_DEBUGGER_ENABLE_ASSERT if (other_actor == NULL) { printf("NULL pointer pulled from ring, operations %d ring size %d\n", operations, core_fast_ring_size_from_consumer(&worker->actors_to_schedule)); } #endif CORE_DEBUGGER_ASSERT(other_actor != NULL); other_name = thorium_actor_name(other_actor); #ifdef THORIUM_WORKER_DEBUG_SCHEDULER printf("ring.DEQUEUE %d\n", other_name); #endif if (core_set_find(&worker->evicted_actors, &other_name)) { #ifdef THORIUM_WORKER_DEBUG_SCHEDULER printf("ALREADY EVICTED\n"); #endif continue; } if (!core_map_get_value(&worker->actors, &other_name, &status)) { /* Add the actor to the list of actors. * This does nothing if it is already in the list. */ status = STATUS_IDLE; core_map_add_value(&worker->actors, &other_name, &status); core_map_iterator_destroy(&worker->actor_iterator); core_map_iterator_init(&worker->actor_iterator, &worker->actors); } /* If the actor is not queued, queue it */ if (status == STATUS_IDLE) { status = STATUS_QUEUED; core_map_update_value(&worker->actors, &other_name, &status); thorium_scheduler_enqueue(&worker->scheduler, other_actor); } else { #ifdef THORIUM_WORKER_DEBUG_SCHEDULER printf("SCHEDULER %d already scheduled to run, scheduled: %d\n", other_name, (int)core_set_size(&worker->queued_actors)); #endif } } /* Now, dequeue an actor from the real queue. * If it has more than 1 message, re-enqueue it */ value = thorium_scheduler_dequeue(&worker->scheduler, actor); /* Setting name to nobody; * check_production at the end uses the value and the name; * if value is false, check_production is not using name anyway. */ name = THORIUM_ACTOR_NOBODY; /* an actor is ready to be run and it was dequeued from the scheduling queue. */ if (value) { name = thorium_actor_name(*actor); #ifdef THORIUM_WORKER_DEBUG_SCHEDULER printf("scheduler.DEQUEUE actor %d, removed from queued actors...\n", name); #endif mailbox_size = thorium_actor_get_mailbox_size(*actor); /* The actor has only one message and it is going to * be processed now. */ if (mailbox_size == 1) { #ifdef THORIUM_WORKER_DEBUG_SCHEDULER printf("SCHEDULER %d has no message to schedule...\n", name); #endif /* Set the status of the worker to STATUS_IDLE * * TODO: the ring new tail might not be visible too. * That could possibly be a problem... */ status = STATUS_IDLE; core_map_update_value(&worker->actors, &name, &status); /* The actor still has a lot of messages * to process. Keep them coming. */ } else if (mailbox_size >= 2) { /* Add the actor to the scheduling queue if it * still has messages */ #ifdef THORIUM_WORKER_DEBUG_SCHEDULER printf("Scheduling actor %d again, messages: %d\n", name, thorium_actor_get_mailbox_size(*actor)); #endif /* The status is still STATUS_QUEUED */ thorium_scheduler_enqueue(&worker->scheduler, *actor); /* The actor is scheduled to run, but the new tail is not * yet visible apparently. * * Solution, push back the actor in the scheduler queue, it can take a few cycles to see cache changes across cores. (MESIF protocol) * * This is done below. */ } else /* if (mailbox_size == 0) */ { status = STATUS_IDLE; core_map_update_value(&worker->actors, &name, &status); value = 0; } } thorium_worker_check_production(worker, value, name); return value; }
void thorium_worker_check_production(struct thorium_worker *worker, int value, int name) { uint64_t time; uint64_t elapsed; struct thorium_actor *other_actor; int mailbox_size; int status; uint64_t threshold; /* * If no actor is scheduled to run, things are getting out of hand * and this is bad for business. * * So, here, an actor is poked for inactivity */ if (!value) { ++worker->ticks_without_production; } else { worker->ticks_without_production = 0; } /* * If too many cycles were spent doing nothing, * check the fast ring since there could be issue in the * cache coherency of the CPU, even with the memory fences. * * This should not happen theoretically. * */ if (worker->ticks_without_production >= THORIUM_WORKER_UNPRODUCTIVE_TICK_LIMIT) { if (core_map_iterator_get_next_key_and_value(&worker->actor_iterator, &name, NULL)) { other_actor = thorium_node_get_actor_from_name(worker->node, name); mailbox_size = 0; if (other_actor != NULL) { mailbox_size = thorium_actor_get_mailbox_size(other_actor); } if (mailbox_size > 0) { thorium_scheduler_enqueue(&worker->scheduler, other_actor); status = STATUS_QUEUED; core_map_update_value(&worker->actors, &name, &status); } } else { /* Rewind the iterator. */ core_map_iterator_destroy(&worker->actor_iterator); core_map_iterator_init(&worker->actor_iterator, &worker->actors); /*worker->ticks_without_production = 0;*/ } /* * If there is still nothing, tell the operating system that the thread * needs to sleep. * * The operating system is: * - Linux on Cray XE6, * - Linux on Cray XC30, * - IBM Compute Node Kernel (CNK) on IBM Blue Gene/Q), */ if (worker->waiting_is_enabled) { /* This is a first warning */ if (worker->waiting_start_time == 0) { worker->waiting_start_time = core_timer_get_nanoseconds(&worker->timer); } else { time = core_timer_get_nanoseconds(&worker->timer); elapsed = time - worker->waiting_start_time; threshold = THORIUM_WORKER_UNPRODUCTIVE_MICROSECONDS_FOR_WAIT; /* Convert microseconds to nanoseconds */ threshold *= 1000; /* Verify the elapsed time. * There are 1000 nanoseconds in 1 microsecond. */ if (elapsed >= threshold) { /* * Here, the worker will wait until it receives a signal. * Such a signal will mean that something is ready to be consumed. */ /* Reset the time */ worker->waiting_start_time = 0; #ifdef THORIUM_WORKER_DEBUG_WAIT_SIGNAL printf("DEBUG worker/%d will wait, elapsed %d\n", worker->name, (int)elapsed); #endif /* */ thorium_worker_wait(worker); } } } } }
void thorium_balancer_generate_symmetric_migrations(struct thorium_balancer *self, struct core_map *symmetric_actor_scripts, struct core_vector *migrations) { int i; int worker_count; struct thorium_worker *worker; struct core_map *set; struct core_map_iterator iterator; struct thorium_migration migration; struct core_map script_current_worker; struct core_map script_current_worker_actor_count; int frequency; int current_worker; int current_worker_actor_count; int old_worker; #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY struct thorium_script *actual_script; #endif struct thorium_node *node; int actor_name; int script; int new_worker; struct thorium_actor *actor; int enabled; /* Gather symmetric actors: */ #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING enabled = 1; #else enabled = 0; #endif core_map_init(&script_current_worker, sizeof(int), sizeof(int)); core_map_init(&script_current_worker_actor_count, sizeof(int), sizeof(int)); node = thorium_worker_pool_get_node(self->pool); worker_count = thorium_worker_pool_worker_count(self->pool); for (i = 0; i < worker_count; i++) { worker = thorium_worker_pool_get_worker(self->pool, i); set = thorium_worker_get_actors(worker); core_map_iterator_init(&iterator, set); while (core_map_iterator_get_next_key_and_value(&iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(node, actor_name); if (actor == NULL) { continue; } script = thorium_actor_script(actor); /* * Check if the actor is symmetric */ if (core_map_get_value(symmetric_actor_scripts, &script, &frequency)) { current_worker = 0; if (!core_map_get_value(&script_current_worker, &script, ¤t_worker)) { core_map_add_value(&script_current_worker, &script, ¤t_worker); } current_worker_actor_count = 0; if (!core_map_get_value(&script_current_worker_actor_count, &script, ¤t_worker_actor_count)) { core_map_add_value(&script_current_worker_actor_count, &script, ¤t_worker_actor_count); } /* * Emit migration instruction */ old_worker = thorium_balancer_get_actor_worker(self, actor_name); new_worker = current_worker; #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY actual_script = thorium_node_find_script(node, script); #endif if (enabled && old_worker != new_worker) { thorium_migration_init(&migration, actor_name, old_worker, new_worker); core_vector_push_back(migrations, &migration); thorium_migration_destroy(&migration); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("[EMIT] "); #endif } else { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("[MOCK] "); #endif } #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("SCHEDULER -> symmetric placement... %s/%d scheduled for execution on worker/%d of node/%d\n", thorium_script_description(actual_script), actor_name, new_worker, thorium_node_name(node)); #endif ++current_worker_actor_count; core_map_update_value(&script_current_worker_actor_count, &script, ¤t_worker_actor_count); /* The current worker is full. * Increment the current worker and set the * worker actor count to 0. */ if (current_worker_actor_count == frequency) { ++current_worker; core_map_update_value(&script_current_worker, &script, ¤t_worker); current_worker_actor_count = 0; core_map_update_value(&script_current_worker_actor_count, &script, ¤t_worker_actor_count); } } } core_map_iterator_destroy(&iterator); } core_map_destroy(&script_current_worker); core_map_destroy(&script_current_worker_actor_count); }
void thorium_balancer_detect_symmetric_scripts(struct thorium_balancer *self, struct core_map *symmetric_actor_scripts) { int i; struct thorium_worker *worker; struct thorium_actor *actor; struct core_map_iterator iterator; struct core_map *set; int actor_name; struct thorium_node *node; int script; int frequency; struct core_map frequencies; int worker_count; int population_per_worker; #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY struct thorium_script *actual_script; #endif worker_count = thorium_worker_pool_worker_count(self->pool); core_map_init(&frequencies, sizeof(int), sizeof(int)); node = thorium_worker_pool_get_node(self->pool); /* Gather frequencies */ for (i = 0; i < worker_count; i++) { worker = thorium_worker_pool_get_worker(self->pool, i); set = thorium_worker_get_actors(worker); core_map_iterator_init(&iterator, set); while (core_map_iterator_get_next_key_and_value(&iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(node, actor_name); if (actor == NULL) { continue; } script = thorium_actor_script(actor); frequency = 0; if (!core_map_get_value(&frequencies, &script, &frequency)) { core_map_add_value(&frequencies, &script, &frequency); } ++frequency; core_map_update_value(&frequencies, &script, &frequency); } core_map_iterator_destroy(&iterator); } /* * Detect symmetric scripts */ core_map_iterator_init(&iterator, &frequencies); while (core_map_iterator_get_next_key_and_value(&iterator, &script, &frequency)) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY actual_script = thorium_node_find_script(node, script); #endif #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("SCHEDULER test symmetry %s %d\n", thorium_script_description(actual_script), frequency); #endif if (frequency % worker_count == 0) { population_per_worker = frequency / worker_count; core_map_add_value(symmetric_actor_scripts, &script, &population_per_worker); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("SCHEDULER: script %s is symmetric, worker_count: %d, population_per_worker: %d\n", thorium_script_description(actual_script), worker_count, population_per_worker); #endif } } core_map_iterator_destroy(&iterator); core_map_destroy(&frequencies); }