void thorium_worker_print_actors(struct thorium_worker *worker, struct thorium_balancer *scheduler) { struct core_map_iterator iterator; int name; int count; struct thorium_actor *actor; int producers; int consumers; int received; int difference; int script; struct core_map distribution; int frequency; struct thorium_script *script_object; int dead; int node_name; int worker_name; int previous_amount; node_name = thorium_node_name(worker->node); worker_name = worker->name; core_map_iterator_init(&iterator, &worker->actors); printf("node/%d worker/%d %d queued messages, received: %d busy: %d load: %f ring: %d scheduled actors: %d/%d\n", node_name, worker_name, thorium_worker_get_scheduled_message_count(worker), thorium_worker_get_sum_of_received_actor_messages(worker), thorium_worker_is_busy(worker), thorium_worker_get_scheduling_epoch_load(worker), core_fast_ring_size_from_producer(&worker->actors_to_schedule), thorium_scheduler_size(&worker->scheduler), (int)core_map_size(&worker->actors)); core_map_init(&distribution, sizeof(int), sizeof(int)); while (core_map_iterator_get_next_key_and_value(&iterator, &name, NULL)) { actor = thorium_node_get_actor_from_name(worker->node, name); if (actor == NULL) { continue; } dead = thorium_actor_dead(actor); if (dead) { continue; } count = thorium_actor_get_mailbox_size(actor); received = thorium_actor_get_sum_of_received_messages(actor); producers = core_map_size(thorium_actor_get_received_messages(actor)); consumers = core_map_size(thorium_actor_get_sent_messages(actor)); previous_amount = 0; core_map_get_value(&worker->actor_received_messages, &name, &previous_amount); difference = received - previous_amount;; if (!core_map_update_value(&worker->actor_received_messages, &name, &received)) { core_map_add_value(&worker->actor_received_messages, &name, &received); } printf(" [%s/%d] mailbox: %d received: %d (+%d) producers: %d consumers: %d\n", thorium_actor_script_name(actor), name, count, received, difference, producers, consumers); script = thorium_actor_script(actor); if (core_map_get_value(&distribution, &script, &frequency)) { ++frequency; core_map_update_value(&distribution, &script, &frequency); } else { frequency = 1; core_map_add_value(&distribution, &script, &frequency); } } /*printf("\n");*/ core_map_iterator_destroy(&iterator); core_map_iterator_init(&iterator, &distribution); printf("node/%d worker/%d Frequency list\n", node_name, worker_name); while (core_map_iterator_get_next_key_and_value(&iterator, &script, &frequency)) { script_object = thorium_node_find_script(worker->node, script); CORE_DEBUGGER_ASSERT(script_object != NULL); printf("node/%d worker/%d Frequency %s => %d\n", node_name, worker->name, thorium_script_name(script_object), frequency); } core_map_iterator_destroy(&iterator); core_map_destroy(&distribution); }
void thorium_balancer_balance(struct thorium_balancer *self) { /* * The 95th percentile is useful: * \see http://en.wikipedia.org/wiki/Burstable_billing * \see http://www.init7.net/en/backbone/95-percent-rule */ int load_percentile_50; struct core_timer timer; int i; struct core_vector loads; struct core_vector loads_unsorted; struct core_vector burdened_workers; struct core_vector stalled_workers; struct thorium_worker *worker; struct thorium_node *node; /*struct core_set *set;*/ struct core_pair pair; struct core_vector_iterator vector_iterator; int old_worker; int actor_name; int messages; int maximum; int with_maximum; struct core_map *set; struct core_map_iterator set_iterator; int stalled_index; int stalled_count; int new_worker_index; struct core_vector migrations; struct thorium_migration migration; struct thorium_migration *migration_to_do; struct thorium_actor *actor; int candidates; int load_value; int remaining_load; int projected_load; struct core_vector actors_to_migrate; int total; int with_messages; int stalled_percentile; int burdened_percentile; int old_total; int old_load; int new_load; int predicted_new_load; struct core_pair *pair_pointer; struct thorium_worker *new_worker; /*int new_total;*/ int actor_load; int test_stalled_index; int tests; int found_match; int spawned_actors; int killed_actors; int perfect; #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING struct core_map symmetric_actor_scripts; int script; #endif node = thorium_worker_pool_get_node(self->pool); spawned_actors = thorium_node_get_counter(node, CORE_COUNTER_SPAWNED_ACTORS); /* There is nothing to balance... */ if (spawned_actors == 0) { return; } killed_actors = thorium_node_get_counter(node, CORE_COUNTER_KILLED_ACTORS); /* * The system can probably not be balanced to get in * a better shape anyway. */ if (spawned_actors == self->last_spawned_actors && killed_actors == self->last_killed_actors && self->last_migrations == 0) { printf("SCHEDULER: balance can not be improved because nothing changed.\n"); return; } /* Check if we have perfection */ perfect = 1; for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); load_value = thorium_worker_get_epoch_load(worker) * 100; if (load_value != 100) { perfect = 0; break; } } if (perfect) { printf("SCHEDULER: perfect balance can not be improved.\n"); return; } /* update counters */ self->last_spawned_actors = spawned_actors; self->last_killed_actors = killed_actors; /* Otherwise, try to balance things */ core_timer_init(&timer); core_timer_start(&timer); #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING core_map_init(&symmetric_actor_scripts, sizeof(int), sizeof(int)); thorium_balancer_detect_symmetric_scripts(self, &symmetric_actor_scripts); #endif #ifdef THORIUM_WORKER_ENABLE_LOCK /* Lock all workers first */ for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); thorium_worker_lock(worker); } #endif core_vector_init(&migrations, sizeof(struct thorium_migration)); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("BALANCING\n"); #endif core_vector_init(&loads, sizeof(int)); core_vector_init(&loads_unsorted, sizeof(int)); core_vector_init(&burdened_workers, sizeof(struct core_pair)); core_vector_init(&stalled_workers, sizeof(struct core_pair)); core_vector_init(&actors_to_migrate, sizeof(struct core_pair)); for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); load_value = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION; #if 0 printf("DEBUG LOAD %d %d\n", i, load_value); #endif core_vector_push_back(&loads, &load_value); core_vector_push_back(&loads_unsorted, &load_value); } core_vector_sort_int(&loads); stalled_percentile = core_statistics_get_percentile_int(&loads, SCHEDULER_WINDOW); /*load_percentile_25 = core_statistics_get_percentile_int(&loads, 25);*/ load_percentile_50 = core_statistics_get_percentile_int(&loads, 50); /*load_percentile_75 = core_statistics_get_percentile_int(&loads, 75);*/ burdened_percentile = core_statistics_get_percentile_int(&loads, 100 - SCHEDULER_WINDOW); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("Percentiles for epoch loads: "); core_statistics_print_percentiles_int(&loads); #endif for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); load_value = core_vector_at_as_int(&loads_unsorted, i); set = thorium_worker_get_actors(worker); if (stalled_percentile == burdened_percentile) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_NORMAL_STRING); #endif } else if (load_value <= stalled_percentile) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_STALLED_STRING); #endif core_pair_init(&pair, load_value, i); core_vector_push_back(&stalled_workers, &pair); } else if (load_value >= burdened_percentile) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_BURDENED_STRING); #endif core_pair_init(&pair, load_value, i); core_vector_push_back(&burdened_workers, &pair); } else { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_NORMAL_STRING); #endif } #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY thorium_worker_print_actors(worker, self); #endif } core_vector_sort_int_reverse(&burdened_workers); core_vector_sort_int(&stalled_workers); stalled_count = core_vector_size(&stalled_workers); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("MIGRATIONS (stalled: %d, burdened: %d)\n", (int)core_vector_size(&stalled_workers), (int)core_vector_size(&burdened_workers)); #endif stalled_index = 0; core_vector_iterator_init(&vector_iterator, &burdened_workers); while (stalled_count > 0 && core_vector_iterator_get_next_value(&vector_iterator, &pair)) { old_worker = core_pair_get_second(&pair); worker = thorium_worker_pool_get_worker(self->pool, old_worker); set = thorium_worker_get_actors(worker); /* thorium_worker_print_actors(worker); printf("\n"); */ /* * Lock the worker and try to select actors for migration */ core_map_iterator_init(&set_iterator, set); maximum = -1; with_maximum = 0; total = 0; with_messages = 0; while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); messages = thorium_balancer_get_actor_production(self, actor); if (maximum == -1 || messages > maximum) { maximum = messages; with_maximum = 1; } else if (messages == maximum) { with_maximum++; } if (messages > 0) { ++with_messages; } total += messages; } core_map_iterator_destroy(&set_iterator); core_map_iterator_init(&set_iterator, set); --with_maximum; candidates = 0; load_value = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION; remaining_load = load_value; #if 0 printf("maximum %d with_maximum %d\n", maximum, with_maximum); #endif while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); if (actor == NULL) { continue; } messages = thorium_balancer_get_actor_production(self, actor); #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING script = thorium_actor_script(actor); /* symmetric actors are migrated elsewhere. */ if (core_map_get_value(&symmetric_actor_scripts, &script, NULL)) { continue; } #endif /* Simulate the remaining load */ projected_load = remaining_load; projected_load -= ((0.0 + messages) / total) * load_value; #ifdef THORIUM_SCHEDULER_DEBUG printf(" TESTING actor %d, production was %d, projected_load is %d (- %d * (1 - %d/%d)\n", actor_name, messages, projected_load, load_value, messages, total); #endif /* An actor without any queued messages should not be migrated */ if (messages > 0 && ((with_maximum > 0 && messages == maximum) || messages < maximum) /* * Avoid removing too many actors because * generating a stalled one is not desired */ && (projected_load >= load_percentile_50 /* * The previous rule does not apply when there * are 2 actors. */ || with_messages == 2) ) { remaining_load = projected_load; candidates++; if (messages == maximum) { --with_maximum; } core_pair_init(&pair, messages, actor_name); core_vector_push_back(&actors_to_migrate, &pair); #ifdef THORIUM_SCHEDULER_DEBUG printf("early CANDIDATE for migration: actor %d, worker %d\n", actor_name, old_worker); #endif } } core_map_iterator_destroy(&set_iterator); } core_vector_iterator_destroy(&vector_iterator); /* Sort the candidates */ /* core_vector_sort_int(&actors_to_migrate); printf("Percentiles for production: "); core_statistics_print_percentiles_int(&actors_to_migrate); */ /* Sort them in reverse order. */ core_vector_sort_int_reverse(&actors_to_migrate); core_vector_iterator_init(&vector_iterator, &actors_to_migrate); /* For each highly active actor, * try to match it with a stalled worker */ while (core_vector_iterator_get_next_value(&vector_iterator, &pair)) { actor_name = core_pair_get_second(&pair); actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); if (actor == NULL) { continue; } messages = thorium_balancer_get_actor_production(self, actor); old_worker = thorium_actor_assigned_worker(actor); worker = thorium_worker_pool_get_worker(self->pool, old_worker); /* old_total can not be 0 because otherwise the would not * be burdened. */ old_total = thorium_worker_get_production(worker, self); with_messages = thorium_worker_get_producer_count(worker, self); old_load = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION; actor_load = ((0.0 + messages) / old_total) * old_load; /* Try to find a stalled worker that can take it. */ test_stalled_index = stalled_index; tests = 0; predicted_new_load = 0; found_match = 0; while (tests < stalled_count) { core_vector_get_value(&stalled_workers, test_stalled_index, &pair); new_worker_index = core_pair_get_second(&pair); new_worker = thorium_worker_pool_get_worker(self->pool, new_worker_index); new_load = thorium_worker_get_scheduling_epoch_load(new_worker) * SCHEDULER_PRECISION; /*new_total = thorium_worker_get_production(new_worker);*/ predicted_new_load = new_load + actor_load; if (predicted_new_load > SCHEDULER_PRECISION /* && with_messages != 2 */) { #ifdef THORIUM_SCHEDULER_DEBUG printf("Scheduler: skipping actor %d, predicted load is %d >= 100\n", actor_name, predicted_new_load); #endif ++tests; ++test_stalled_index; if (test_stalled_index == stalled_count) { test_stalled_index = 0; } continue; } /* Otherwise, this stalled worker is fine... */ stalled_index = test_stalled_index; found_match = 1; break; } /* This actor can not be migrated to any stalled worker. */ if (!found_match) { continue; } /* Otherwise, update the load of the stalled one and go forward with the change. */ pair_pointer = (struct core_pair *)core_vector_at(&stalled_workers, stalled_index); core_pair_set_first(pair_pointer, predicted_new_load); ++stalled_index; if (stalled_index == stalled_count) { stalled_index = 0; } #if 0 new_worker = thorium_worker_pool_get_worker(pool, new_worker_index); printf(" CANDIDATE: actor %d old worker %d (%d - %d = %d) new worker %d (%d + %d = %d)\n", actor_name, old_worker, value, messages, 2new_score, new_worker_index, new_worker_old_score, messages, new_worker_new_score); #endif thorium_migration_init(&migration, actor_name, old_worker, new_worker_index); core_vector_push_back(&migrations, &migration); thorium_migration_destroy(&migration); } core_vector_iterator_destroy(&vector_iterator); core_vector_destroy(&stalled_workers); core_vector_destroy(&burdened_workers); core_vector_destroy(&loads); core_vector_destroy(&loads_unsorted); core_vector_destroy(&actors_to_migrate); /* Update the last values */ for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); set = thorium_worker_get_actors(worker); core_map_iterator_init(&set_iterator, set); while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); thorium_balancer_update_actor_production(self, actor); } core_map_iterator_destroy(&set_iterator); thorium_worker_reset_scheduling_epoch(worker); } #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING /* Generate migrations for symmetric actors. */ thorium_balancer_generate_symmetric_migrations(self, &symmetric_actor_scripts, &migrations); #endif /* Actually do the migrations */ core_vector_iterator_init(&vector_iterator, &migrations); while (core_vector_iterator_next(&vector_iterator, (void **)&migration_to_do)) { thorium_balancer_migrate(self, migration_to_do); } core_vector_iterator_destroy(&vector_iterator); self->last_migrations = core_vector_size(&migrations); core_vector_destroy(&migrations); #ifdef THORIUM_WORKER_ENABLE_LOCK /* Unlock all workers */ for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); thorium_worker_unlock(worker); } #endif #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING core_map_destroy(&symmetric_actor_scripts); #endif core_timer_stop(&timer); printf("SCHEDULER: elapsed time for balancing: %d us, %d migrations performed\n", (int)(core_timer_get_elapsed_nanoseconds(&timer) / 1000), self->last_migrations); }