void thorium_cfs_scheduler_print(struct thorium_scheduler *self) { struct thorium_cfs_scheduler *concrete_self; struct core_red_black_tree_iterator iterator; uint64_t virtual_runtime; struct thorium_actor *actor; int i; struct core_timer timer; core_timer_init(&timer); concrete_self = self->concrete_self; core_red_black_tree_iterator_init(&iterator, &concrete_self->tree); printf("[cfs_scheduler] %" PRIu64 " ns, timeline contains %d actors\n", core_timer_get_nanoseconds(&timer), core_red_black_tree_size(&concrete_self->tree)); i = 0; while (core_red_black_tree_iterator_get_next_key_and_value(&iterator, &virtual_runtime, &actor)) { printf("[%d] virtual_runtime= %" PRIu64 " actor= %s/%d\n", i, virtual_runtime, thorium_actor_script_name(actor), thorium_actor_name(actor)); ++i; } core_red_black_tree_iterator_destroy(&iterator); core_timer_destroy(&timer); }
void test_allocator(struct core_memory_pool *memory) { int i; int size; void *pointer; struct core_vector vector; struct core_timer timer; uint64_t elapsed; i = 1000000; size = 45; core_vector_init(&vector, sizeof(void *)); core_timer_init(&timer); core_timer_start(&timer); while (i--) { if (memory != NULL) { pointer = core_memory_pool_allocate(memory, size); } else { pointer = core_memory_allocate(size, -1); } core_vector_push_back(&vector, &pointer); } core_timer_stop(&timer); elapsed = core_timer_get_elapsed_nanoseconds(&timer); if (memory == NULL) { printf("Not using memory pool... "); } else { printf("Using memory pool... "); } printf("Elapsed : %" PRIu64 " milliseconds\n", elapsed / 1000 / 1000); size = core_vector_size(&vector); for (i = 0; i < size; ++i) { pointer = core_vector_at_as_void_pointer(&vector, i); if (memory != NULL) { core_memory_pool_free(memory, pointer); } else { core_memory_free(pointer, -1); } } core_vector_destroy(&vector); core_timer_destroy(&timer); }
void biosal_unitig_manager_init(struct thorium_actor *self) { struct biosal_unitig_manager *concrete_self; concrete_self = (struct biosal_unitig_manager *)thorium_actor_concrete_actor(self); core_vector_init(&concrete_self->spawners, sizeof(int)); core_vector_init(&concrete_self->graph_stores, sizeof(int)); core_vector_init(&concrete_self->visitors, sizeof(int)); core_vector_init(&concrete_self->walkers, sizeof(int)); concrete_self->completed = 0; concrete_self->manager = THORIUM_ACTOR_NOBODY; core_timer_init(&concrete_self->timer); concrete_self->state = STATE_VISITORS; }
void biosal_input_controller_init(struct thorium_actor *actor) { struct biosal_input_controller *concrete_actor; concrete_actor = (struct biosal_input_controller *)thorium_actor_concrete_actor(actor); core_map_init(&concrete_actor->mega_blocks, sizeof(int), sizeof(struct core_vector)); core_map_init(&concrete_actor->assigned_blocks, sizeof(int), sizeof(int)); core_vector_init(&concrete_actor->mega_block_vector, sizeof(struct biosal_mega_block)); core_vector_init(&concrete_actor->counting_streams, sizeof(int)); core_vector_init(&concrete_actor->reading_streams, sizeof(int)); core_vector_init(&concrete_actor->partition_commands, sizeof(int)); core_vector_init(&concrete_actor->stream_consumers, sizeof(int)); core_vector_init(&concrete_actor->consumer_active_requests, sizeof(int)); core_vector_init(&concrete_actor->files, sizeof(char *)); core_vector_init(&concrete_actor->spawners, sizeof(int)); core_vector_init(&concrete_actor->counts, sizeof(int64_t)); core_vector_init(&concrete_actor->consumers, sizeof(int)); core_vector_init(&concrete_actor->stores_per_spawner, sizeof(int)); core_timer_init(&concrete_actor->input_timer); core_timer_init(&concrete_actor->counting_timer); core_timer_init(&concrete_actor->distribution_timer); biosal_dna_codec_init(&concrete_actor->codec); if (biosal_dna_codec_must_use_two_bit_encoding(&concrete_actor->codec, thorium_actor_get_node_count(actor))) { biosal_dna_codec_enable_two_bit_encoding(&concrete_actor->codec); } core_queue_init(&concrete_actor->unprepared_spawners, sizeof(int)); concrete_actor->opened_streams = 0; concrete_actor->state = BIOSAL_INPUT_CONTROLLER_STATE_NONE; #ifdef BIOSAL_INPUT_CONTROLLER_DEBUG_10355 printf("DEBUG actor %d register ACTION_INPUT_CONTROLLER_CREATE_STORES\n", thorium_actor_name(actor)); #endif thorium_actor_add_action(actor, ACTION_INPUT_CONTROLLER_CREATE_STORES, biosal_input_controller_create_stores); thorium_actor_add_action(actor, ACTION_GET_NODE_NAME_REPLY, biosal_input_controller_get_node_name_reply); thorium_actor_add_action(actor, ACTION_GET_NODE_WORKER_COUNT_REPLY, biosal_input_controller_get_node_worker_count_reply); thorium_actor_add_action(actor, ACTION_INPUT_CONTROLLER_PREPARE_SPAWNERS, biosal_input_controller_prepare_spawners); thorium_actor_add_action(actor, ACTION_INPUT_CONTROLLER_SPAWN_READING_STREAMS, biosal_input_controller_spawn_streams); thorium_actor_add_action(actor, ACTION_INPUT_STREAM_SET_START_OFFSET_REPLY, biosal_input_controller_set_offset_reply); thorium_actor_add_script(actor, SCRIPT_INPUT_STREAM, &biosal_input_stream_script); thorium_actor_add_script(actor, SCRIPT_SEQUENCE_STORE, &biosal_sequence_store_script); thorium_actor_add_script(actor, SCRIPT_SEQUENCE_PARTITIONER, &biosal_sequence_partitioner_script); /* configuration for the input controller * other values for block size: 512, 1024, 2048, 4096, 8192 * / */ concrete_actor->block_size = 4096; concrete_actor->stores_per_worker_per_spawner = 0; #ifdef BIOSAL_INPUT_CONTROLLER_DEBUG printf("DEBUG %d init controller\n", thorium_actor_name(actor)); #endif concrete_actor->ready_spawners = 0; concrete_actor->ready_consumers = 0; concrete_actor->partitioner = THORIUM_ACTOR_NOBODY; concrete_actor->filled_consumers = 0; concrete_actor->counted = 0; }
/** * This is the architecture-independent kernel entry point. Before it is * called, architecture-specific code has done the bare minimum initialization * necessary. This function initializes the kernel and its various subsystems. * It calls back to architecture-specific code at several well defined points, * which all architectures must implement (e.g., setup_arch()). * * \callgraph */ void start_kernel() { unsigned int cpu; unsigned int timeout; int status; /* * Parse the kernel boot command line. * This is where boot-time configurable variables get set, * e.g., the ones with param() and DRIVER_PARAM() specifiers. */ parse_params(lwk_command_line); /* * Initialize the console subsystem. * printk()'s will be visible after this. */ console_init(); /* * Hello, Dave. */ printk("%s", lwk_banner); printk(KERN_DEBUG "%s\n", lwk_command_line); sort_exception_table(); /* * Do architecture specific initialization. * This detects memory, CPUs, architecture dependent irqs, etc. */ setup_arch(); /* * Setup the architecture independent interrupt handling. */ irq_init(); /* * Initialize the kernel memory subsystem. Up until now, the simple * boot-time memory allocator (bootmem) has been used for all dynamic * memory allocation. Here, the bootmem allocator is destroyed and all * of the free pages it was managing are added to the kernel memory * pool (kmem) or the user memory pool (umem). * * After this point, any use of the bootmem allocator will cause a * kernel panic. The normal kernel memory subsystem API should be used * instead (e.g., kmem_alloc() and kmem_free()). */ mem_subsys_init(); /* * Initialize the address space management subsystem. */ aspace_subsys_init(); sched_init_runqueue(0); /* This CPUs scheduler state + idle task */ sched_add_task(current); /* now safe to call schedule() */ /* * Initialize the task scheduling subsystem. */ core_timer_init(0); /* Start the kernel filesystems */ kfs_init(); /* * Initialize the random number generator. */ rand_init(); workq_init(); /* * Boot all of the other CPUs in the system, one at a time. */ printk(KERN_INFO "Number of CPUs detected: %d\n", num_cpus()); for_each_cpu_mask(cpu, cpu_present_map) { /* The bootstrap CPU (that's us) is already booted. */ if (cpu == 0) { cpu_set(cpu, cpu_online_map); continue; } printk(KERN_DEBUG "Booting CPU %u.\n", cpu); arch_boot_cpu(cpu); /* Wait for ACK that CPU has booted (5 seconds max). */ for (timeout = 0; timeout < 50000; timeout++) { if (cpu_isset(cpu, cpu_online_map)) break; udelay(100); } if (!cpu_isset(cpu, cpu_online_map)) panic("Failed to boot CPU %d.\n", cpu); } /* * Initialize the PCI subsystem. */ init_pci(); /* * Enable external interrupts. */ local_irq_enable(); #ifdef CONFIG_NETWORK /* * Bring up any network devices. */ netdev_init(); #endif #ifdef CONFIG_CRAY_GEMINI driver_init_list("net", "gemini"); #endif #ifdef CONFIG_BLOCK_DEVICE /** * Initialize the block devices */ blkdev_init(); #endif mcheck_init_late(); /* * And any modules that need to be started. */ driver_init_by_name( "module", "*" ); #ifdef CONFIG_KGDB /* * Stop eary (before "late" devices) in KGDB if requested */ kgdb_initial_breakpoint(); #endif /* * Bring up any late init devices. */ driver_init_by_name( "late", "*" ); /* * Bring up the Linux compatibility layer, if enabled. */ linux_init(); #ifdef CONFIG_DEBUG_HW_NOISE /* Measure noise/interference in the underlying hardware/VMM */ extern void measure_noise(int, uint64_t); measure_noise(0, 0); #endif /* * Start up user-space... */ printk(KERN_INFO "Loading initial user-level task (init_task)...\n"); if ((status = create_init_task()) != 0) panic("Failed to create init_task (status=%d).", status); current->state = TASK_EXITED; schedule(); /* This should not return */ BUG(); }
void spate_init(struct thorium_actor *self) { struct spate *concrete_self; concrete_self = (struct spate *)thorium_actor_concrete_actor(self); core_vector_init(&concrete_self->initial_actors, sizeof(int)); core_vector_init(&concrete_self->sequence_stores, sizeof(int)); core_vector_init(&concrete_self->graph_stores, sizeof(int)); concrete_self->is_leader = 0; concrete_self->input_controller = THORIUM_ACTOR_NOBODY; concrete_self->manager_for_sequence_stores = THORIUM_ACTOR_NOBODY; concrete_self->assembly_graph = THORIUM_ACTOR_NOBODY; concrete_self->assembly_graph_builder = THORIUM_ACTOR_NOBODY; core_timer_init(&concrete_self->timer); thorium_actor_add_action(self, ACTION_START, spate_start); thorium_actor_add_action(self, ACTION_ASK_TO_STOP, spate_ask_to_stop); thorium_actor_add_action(self, ACTION_SPAWN_REPLY, spate_spawn_reply); thorium_actor_add_action(self, ACTION_MANAGER_SET_SCRIPT_REPLY, spate_set_script_reply); thorium_actor_add_action(self, ACTION_SET_CONSUMERS_REPLY, spate_set_consumers_reply); thorium_actor_add_action(self, ACTION_SET_BLOCK_SIZE_REPLY, spate_set_block_size_reply); thorium_actor_add_action(self, ACTION_INPUT_DISTRIBUTE_REPLY, spate_distribute_reply); thorium_actor_add_action(self, ACTION_SPATE_ADD_FILES, spate_add_files); thorium_actor_add_action(self, ACTION_SPATE_ADD_FILES_REPLY, spate_add_files_reply); thorium_actor_add_action(self, ACTION_ADD_FILE_REPLY, spate_add_file_reply); thorium_actor_add_action(self, ACTION_START_REPLY, spate_start_reply); /* * Register required actor scripts now */ thorium_actor_add_script(self, SCRIPT_INPUT_CONTROLLER, &biosal_input_controller_script); thorium_actor_add_script(self, SCRIPT_DNA_KMER_COUNTER_KERNEL, &biosal_dna_kmer_counter_kernel_script); thorium_actor_add_script(self, SCRIPT_MANAGER, &core_manager_script); thorium_actor_add_script(self, SCRIPT_WRITER_PROCESS, &core_writer_process_script); thorium_actor_add_script(self, SCRIPT_AGGREGATOR, &biosal_aggregator_script); thorium_actor_add_script(self, SCRIPT_KMER_STORE, &biosal_kmer_store_script); thorium_actor_add_script(self, SCRIPT_SEQUENCE_STORE, &biosal_sequence_store_script); thorium_actor_add_script(self, SCRIPT_COVERAGE_DISTRIBUTION, &biosal_coverage_distribution_script); thorium_actor_add_script(self, SCRIPT_ASSEMBLY_GRAPH_BUILDER, &biosal_assembly_graph_builder_script); thorium_actor_add_script(self, SCRIPT_ASSEMBLY_GRAPH_STORE, &biosal_assembly_graph_store_script); thorium_actor_add_script(self, SCRIPT_ASSEMBLY_SLIDING_WINDOW, &biosal_assembly_sliding_window_script); thorium_actor_add_script(self, SCRIPT_ASSEMBLY_BLOCK_CLASSIFIER, &biosal_assembly_block_classifier_script); thorium_actor_add_script(self, SCRIPT_COVERAGE_DISTRIBUTION, &biosal_coverage_distribution_script); thorium_actor_add_script(self, SCRIPT_ASSEMBLY_ARC_KERNEL, &biosal_assembly_arc_kernel_script); thorium_actor_add_script(self, SCRIPT_ASSEMBLY_ARC_CLASSIFIER, &biosal_assembly_arc_classifier_script); thorium_actor_add_script(self, SCRIPT_UNITIG_WALKER, &biosal_unitig_walker_script); thorium_actor_add_script(self, SCRIPT_UNITIG_VISITOR, &biosal_unitig_visitor_script); thorium_actor_add_script(self, SCRIPT_UNITIG_MANAGER, &biosal_unitig_manager_script); /* * This is the I/O controller block size. * This is a number of sequences. */ concrete_self->block_size = 16 * 4096; concrete_self->file_index = 0; }
void thorium_transport_init(struct thorium_transport *self, struct thorium_node *node, int *argc, char ***argv, struct core_memory_pool *inbound_message_memory_pool, struct core_memory_pool *outbound_message_memory_pool) { int actual_argc; char **actual_argv; self->active_request_count = 0; actual_argc = *argc; actual_argv = *argv; self->flags = 0; core_bitmap_clear_bit_uint32_t(&self->flags, FLAG_PROFILE); core_bitmap_clear_bit_uint32_t(&self->flags, FLAG_PRINT_TRANSPORT_EVENTS); self->transport_interface = NULL; self->concrete_transport = NULL; /* printf("DEBUG Initiating transport\n"); */ /* Select the transport layer */ /* * Assign functions */ thorium_transport_select_implementation(self, actual_argc, actual_argv); self->node = node; self->rank = -1; self->size = -1; if (self->transport_interface != NULL) { self->concrete_transport = core_memory_allocate(self->transport_interface->size, MEMORY_TRANSPORT); self->transport_interface->init(self, argc, argv); } CORE_DEBUGGER_ASSERT(self->rank >= 0); CORE_DEBUGGER_ASSERT(self->size >= 1); CORE_DEBUGGER_ASSERT(self->node != NULL); self->inbound_message_memory_pool = inbound_message_memory_pool; self->outbound_message_memory_pool = outbound_message_memory_pool; thorium_transport_profiler_init(&self->transport_profiler); if (core_command_has_argument(actual_argc, actual_argv, "-enable-transport-profiler")) { printf("Enable transport profiler\n"); core_bitmap_set_bit_uint32_t(&self->flags, FLAG_PROFILE); } if (self->rank == 0) { printf("thorium_transport: type %s\n", self->transport_interface->name); } if (core_command_has_argument(actual_argc, actual_argv, "-print-transport-events")) { core_bitmap_set_bit_uint32_t(&self->flags, FLAG_PRINT_TRANSPORT_EVENTS); } core_timer_init(&self->timer); self->start_time = core_timer_get_nanoseconds(&self->timer); }
void thorium_message_multiplexer_init(struct thorium_message_multiplexer *self, struct thorium_node *node, struct thorium_multiplexer_policy *policy) { int size; int i; /* int bytes; */ int position; struct thorium_multiplexed_buffer *multiplexed_buffer; int argc; char **argv; thorium_decision_maker_init(&self->decision_maker); self->policy = policy; self->original_message_count = 0; self->real_message_count = 0; CORE_BITMAP_CLEAR_FLAGS(self->flags); CORE_BITMAP_CLEAR_FLAG(self->flags, FLAG_DISABLED); #ifdef THORIUM_MULTIPLEXER_TRACK_BUFFERS_WITH_CONTENT core_set_init(&self->buffers_with_content, sizeof(int)); #endif core_timer_init(&self->timer); self->buffer_size_in_bytes = thorium_multiplexer_policy_size_threshold(self->policy); #ifdef CONFIG_MULTIPLEXER_USE_DECISION_MAKER self->timeout_in_nanoseconds = thorium_decision_maker_get_best_timeout(&self->decision_maker, THORIUM_TIMEOUT_NO_VALUE); #else self->timeout_in_nanoseconds = self->policy->threshold_time_in_nanoseconds; #endif CORE_DEBUGGER_ASSERT(self->timeout_in_nanoseconds >= 0); self->node = node; core_vector_init(&self->buffers, sizeof(struct thorium_multiplexed_buffer)); size = thorium_node_nodes(self->node); core_vector_resize(&self->buffers, size); /* bytes = size * self->buffer_size_in_bytes; */ #ifdef DEBUG_MULTIPLEXER thorium_printf("DEBUG_MULTIPLEXER size %d bytes %d\n", size, bytes); #endif position = 0; for (i = 0; i < size; ++i) { multiplexed_buffer = core_vector_at(&self->buffers, i); CORE_DEBUGGER_ASSERT(multiplexed_buffer != NULL); /* * Initially, these multiplexed buffers have a NULL buffer. * It is only allocated when needed because each worker is an exporter * of small messages for a subset of all the destination nodes. */ thorium_multiplexed_buffer_init(multiplexed_buffer, self->buffer_size_in_bytes, self->timeout_in_nanoseconds); position += self->buffer_size_in_bytes; #ifdef DEBUG_MULTIPLEXER1 thorium_printf("DEBUG_MULTIPLEXER thorium_message_multiplexer_init index %d buffer %p\n", i, buffer); #endif #ifdef DEBUG_MULTIPLEXER thorium_printf("DEBUG_MULTIPLEXER thorium_message_multiplexer_init (after) index %d buffer %p\n", i, core_vector_at(&self->buffers, i)); #endif } if (thorium_multiplexer_policy_is_disabled(self->policy)) { CORE_BITMAP_SET_FLAG(self->flags, FLAG_DISABLED); } if (thorium_node_nodes(self->node) < thorium_multiplexer_policy_minimum_node_count(self->policy)) { CORE_BITMAP_SET_FLAG(self->flags, FLAG_DISABLED); } self->worker = NULL; argc = node->argc; argv = node->argv; /* * Aside from the policy, the end user can also disable the multiplexer code path */ if (core_command_has_argument(argc, argv, OPTION_DISABLE_MULTIPLEXER)) { CORE_BITMAP_SET_FLAG(self->flags, FLAG_DISABLED); } self->last_send_event_count = 0; self->last_time = core_timer_get_nanoseconds(&self->timer); self->last_update_time = time(NULL); self->degree_of_aggregation_limit = self->policy->degree_of_aggregation_limit; thorium_router_init(&self->router, self->node->nodes, TOPOLOGY_POLYTOPE); if (thorium_node_must_print_data(self->node)) { thorium_router_print(&self->router); } }
void thorium_worker_init(struct thorium_worker *worker, int name, struct thorium_node *node) { int capacity; int ephemeral_memory_block_size; int injected_buffer_ring_size; int argc; char **argv; worker->tick_count = 0; thorium_load_profiler_init(&worker->profiler); argc = thorium_node_argc(node); argv = thorium_node_argv(node); #ifdef THORIUM_WORKER_DEBUG_INJECTION worker->counter_allocated_outbound_buffers = 0; worker->counter_freed_outbound_buffers_from_self = 0; worker->counter_freed_outbound_buffers_from_other_workers = 0; worker->counter_injected_outbound_buffers_other_local_workers= 0; worker->counter_injected_inbound_buffers_from_thorium_core = 0; #endif core_map_init(&worker->actor_received_messages, sizeof(int), sizeof(int)); worker->waiting_is_enabled = 0; worker->waiting_start_time = 0; core_timer_init(&worker->timer); capacity = THORIUM_WORKER_RING_CAPACITY; /*worker->work_queue = work_queue;*/ worker->node = node; worker->name = name; core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_DEAD); worker->last_warning = 0; worker->last_wake_up_count = 0; /*worker->work_queue = &worker->works;*/ /* There are two options: * 1. enable atomic operations for change visibility * 2. Use volatile head and tail. */ core_fast_ring_init(&worker->actors_to_schedule, capacity, sizeof(struct thorium_actor *)); #ifdef THORIUM_NODE_INJECT_CLEAN_WORKER_BUFFERS injected_buffer_ring_size = capacity; core_fast_ring_init(&worker->injected_clean_outbound_buffers, injected_buffer_ring_size, sizeof(void *)); core_fast_ring_init(&worker->clean_message_ring_for_triage, injected_buffer_ring_size, sizeof(struct thorium_message)); core_fast_queue_init(&worker->clean_message_queue_for_triage, sizeof(struct thorium_message)); #endif thorium_scheduler_init(&worker->scheduler, thorium_node_name(worker->node), worker->name); core_map_init(&worker->actors, sizeof(int), sizeof(int)); core_map_iterator_init(&worker->actor_iterator, &worker->actors); core_fast_ring_init(&worker->outbound_message_queue, capacity, sizeof(struct thorium_message)); core_fast_queue_init(&worker->outbound_message_queue_buffer, sizeof(struct thorium_message)); core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_DEBUG); core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_BUSY); core_bitmap_clear_bit_uint32_t(&node->flags, FLAG_ENABLE_ACTOR_LOAD_PROFILER); worker->flags = 0; core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_DEBUG_ACTORS); if (core_command_has_argument(argc, argv, DEBUG_WORKER_OPTION)) { #if 0 printf("DEBUG has option %s\n", DEBUG_WORKER_OPTION); #endif if (thorium_node_name(worker->node) == 0 && thorium_worker_name(worker) == 0) { #if 0 printf("DEBUG setting bit FLAG_DEBUG_ACTORS because %s\n", DEBUG_WORKER_OPTION); #endif core_bitmap_set_bit_uint32_t(&worker->flags, FLAG_DEBUG_ACTORS); } } worker->epoch_used_nanoseconds = 0; worker->loop_used_nanoseconds = 0; worker->scheduling_epoch_used_nanoseconds = 0; worker->started_in_thread = 0; /* 2 MiB is the default size for Linux huge pages. * \see https://wiki.debian.org/Hugepages * \see http://lwn.net/Articles/376606/ */ /* * 8 MiB */ ephemeral_memory_block_size = 8388608; /*ephemeral_memory_block_size = 16777216;*/ core_memory_pool_init(&worker->ephemeral_memory, ephemeral_memory_block_size, MEMORY_POOL_NAME_WORKER_EPHEMERAL); core_memory_pool_disable_tracking(&worker->ephemeral_memory); core_memory_pool_enable_ephemeral_mode(&worker->ephemeral_memory); #ifdef THORIUM_WORKER_ENABLE_LOCK core_lock_init(&worker->lock); #endif core_set_init(&worker->evicted_actors, sizeof(int)); core_memory_pool_init(&worker->outbound_message_memory_pool, CORE_MEMORY_POOL_MESSAGE_BUFFER_BLOCK_SIZE, MEMORY_POOL_NAME_WORKER_OUTBOUND); /* * Disable the pool so that it uses allocate and free * directly. */ #ifdef CORE_MEMORY_POOL_DISABLE_MESSAGE_BUFFER_POOL core_memory_pool_disable(&worker->outbound_message_memory_pool); #endif /* * Transport message buffers are fancy objects. */ core_memory_pool_enable_normalization(&worker->outbound_message_memory_pool); core_memory_pool_enable_alignment(&worker->outbound_message_memory_pool); worker->ticks_without_production = 0; thorium_priority_assigner_init(&worker->assigner, thorium_worker_name(worker)); /* * This variables should be set in * thorium_worker_start, but when running on 1 process with 1 thread, * thorium_worker_start is never called... */ worker->last_report = time(NULL); worker->epoch_start_in_nanoseconds = core_timer_get_nanoseconds(&worker->timer); worker->loop_start_in_nanoseconds = worker->epoch_start_in_nanoseconds; worker->loop_end_in_nanoseconds = worker->loop_start_in_nanoseconds; worker->scheduling_epoch_start_in_nanoseconds = worker->epoch_start_in_nanoseconds; /* * Avoid valgrind warnings. */ worker->epoch_load = 0; }
void thorium_balancer_balance(struct thorium_balancer *self) { /* * The 95th percentile is useful: * \see http://en.wikipedia.org/wiki/Burstable_billing * \see http://www.init7.net/en/backbone/95-percent-rule */ int load_percentile_50; struct core_timer timer; int i; struct core_vector loads; struct core_vector loads_unsorted; struct core_vector burdened_workers; struct core_vector stalled_workers; struct thorium_worker *worker; struct thorium_node *node; /*struct core_set *set;*/ struct core_pair pair; struct core_vector_iterator vector_iterator; int old_worker; int actor_name; int messages; int maximum; int with_maximum; struct core_map *set; struct core_map_iterator set_iterator; int stalled_index; int stalled_count; int new_worker_index; struct core_vector migrations; struct thorium_migration migration; struct thorium_migration *migration_to_do; struct thorium_actor *actor; int candidates; int load_value; int remaining_load; int projected_load; struct core_vector actors_to_migrate; int total; int with_messages; int stalled_percentile; int burdened_percentile; int old_total; int old_load; int new_load; int predicted_new_load; struct core_pair *pair_pointer; struct thorium_worker *new_worker; /*int new_total;*/ int actor_load; int test_stalled_index; int tests; int found_match; int spawned_actors; int killed_actors; int perfect; #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING struct core_map symmetric_actor_scripts; int script; #endif node = thorium_worker_pool_get_node(self->pool); spawned_actors = thorium_node_get_counter(node, CORE_COUNTER_SPAWNED_ACTORS); /* There is nothing to balance... */ if (spawned_actors == 0) { return; } killed_actors = thorium_node_get_counter(node, CORE_COUNTER_KILLED_ACTORS); /* * The system can probably not be balanced to get in * a better shape anyway. */ if (spawned_actors == self->last_spawned_actors && killed_actors == self->last_killed_actors && self->last_migrations == 0) { printf("SCHEDULER: balance can not be improved because nothing changed.\n"); return; } /* Check if we have perfection */ perfect = 1; for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); load_value = thorium_worker_get_epoch_load(worker) * 100; if (load_value != 100) { perfect = 0; break; } } if (perfect) { printf("SCHEDULER: perfect balance can not be improved.\n"); return; } /* update counters */ self->last_spawned_actors = spawned_actors; self->last_killed_actors = killed_actors; /* Otherwise, try to balance things */ core_timer_init(&timer); core_timer_start(&timer); #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING core_map_init(&symmetric_actor_scripts, sizeof(int), sizeof(int)); thorium_balancer_detect_symmetric_scripts(self, &symmetric_actor_scripts); #endif #ifdef THORIUM_WORKER_ENABLE_LOCK /* Lock all workers first */ for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); thorium_worker_lock(worker); } #endif core_vector_init(&migrations, sizeof(struct thorium_migration)); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("BALANCING\n"); #endif core_vector_init(&loads, sizeof(int)); core_vector_init(&loads_unsorted, sizeof(int)); core_vector_init(&burdened_workers, sizeof(struct core_pair)); core_vector_init(&stalled_workers, sizeof(struct core_pair)); core_vector_init(&actors_to_migrate, sizeof(struct core_pair)); for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); load_value = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION; #if 0 printf("DEBUG LOAD %d %d\n", i, load_value); #endif core_vector_push_back(&loads, &load_value); core_vector_push_back(&loads_unsorted, &load_value); } core_vector_sort_int(&loads); stalled_percentile = core_statistics_get_percentile_int(&loads, SCHEDULER_WINDOW); /*load_percentile_25 = core_statistics_get_percentile_int(&loads, 25);*/ load_percentile_50 = core_statistics_get_percentile_int(&loads, 50); /*load_percentile_75 = core_statistics_get_percentile_int(&loads, 75);*/ burdened_percentile = core_statistics_get_percentile_int(&loads, 100 - SCHEDULER_WINDOW); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("Percentiles for epoch loads: "); core_statistics_print_percentiles_int(&loads); #endif for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); load_value = core_vector_at_as_int(&loads_unsorted, i); set = thorium_worker_get_actors(worker); if (stalled_percentile == burdened_percentile) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_NORMAL_STRING); #endif } else if (load_value <= stalled_percentile) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_STALLED_STRING); #endif core_pair_init(&pair, load_value, i); core_vector_push_back(&stalled_workers, &pair); } else if (load_value >= burdened_percentile) { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_BURDENED_STRING); #endif core_pair_init(&pair, load_value, i); core_vector_push_back(&burdened_workers, &pair); } else { #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("scheduling_class:%s ", THORIUM_CLASS_NORMAL_STRING); #endif } #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY thorium_worker_print_actors(worker, self); #endif } core_vector_sort_int_reverse(&burdened_workers); core_vector_sort_int(&stalled_workers); stalled_count = core_vector_size(&stalled_workers); #ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY printf("MIGRATIONS (stalled: %d, burdened: %d)\n", (int)core_vector_size(&stalled_workers), (int)core_vector_size(&burdened_workers)); #endif stalled_index = 0; core_vector_iterator_init(&vector_iterator, &burdened_workers); while (stalled_count > 0 && core_vector_iterator_get_next_value(&vector_iterator, &pair)) { old_worker = core_pair_get_second(&pair); worker = thorium_worker_pool_get_worker(self->pool, old_worker); set = thorium_worker_get_actors(worker); /* thorium_worker_print_actors(worker); printf("\n"); */ /* * Lock the worker and try to select actors for migration */ core_map_iterator_init(&set_iterator, set); maximum = -1; with_maximum = 0; total = 0; with_messages = 0; while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); messages = thorium_balancer_get_actor_production(self, actor); if (maximum == -1 || messages > maximum) { maximum = messages; with_maximum = 1; } else if (messages == maximum) { with_maximum++; } if (messages > 0) { ++with_messages; } total += messages; } core_map_iterator_destroy(&set_iterator); core_map_iterator_init(&set_iterator, set); --with_maximum; candidates = 0; load_value = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION; remaining_load = load_value; #if 0 printf("maximum %d with_maximum %d\n", maximum, with_maximum); #endif while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); if (actor == NULL) { continue; } messages = thorium_balancer_get_actor_production(self, actor); #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING script = thorium_actor_script(actor); /* symmetric actors are migrated elsewhere. */ if (core_map_get_value(&symmetric_actor_scripts, &script, NULL)) { continue; } #endif /* Simulate the remaining load */ projected_load = remaining_load; projected_load -= ((0.0 + messages) / total) * load_value; #ifdef THORIUM_SCHEDULER_DEBUG printf(" TESTING actor %d, production was %d, projected_load is %d (- %d * (1 - %d/%d)\n", actor_name, messages, projected_load, load_value, messages, total); #endif /* An actor without any queued messages should not be migrated */ if (messages > 0 && ((with_maximum > 0 && messages == maximum) || messages < maximum) /* * Avoid removing too many actors because * generating a stalled one is not desired */ && (projected_load >= load_percentile_50 /* * The previous rule does not apply when there * are 2 actors. */ || with_messages == 2) ) { remaining_load = projected_load; candidates++; if (messages == maximum) { --with_maximum; } core_pair_init(&pair, messages, actor_name); core_vector_push_back(&actors_to_migrate, &pair); #ifdef THORIUM_SCHEDULER_DEBUG printf("early CANDIDATE for migration: actor %d, worker %d\n", actor_name, old_worker); #endif } } core_map_iterator_destroy(&set_iterator); } core_vector_iterator_destroy(&vector_iterator); /* Sort the candidates */ /* core_vector_sort_int(&actors_to_migrate); printf("Percentiles for production: "); core_statistics_print_percentiles_int(&actors_to_migrate); */ /* Sort them in reverse order. */ core_vector_sort_int_reverse(&actors_to_migrate); core_vector_iterator_init(&vector_iterator, &actors_to_migrate); /* For each highly active actor, * try to match it with a stalled worker */ while (core_vector_iterator_get_next_value(&vector_iterator, &pair)) { actor_name = core_pair_get_second(&pair); actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); if (actor == NULL) { continue; } messages = thorium_balancer_get_actor_production(self, actor); old_worker = thorium_actor_assigned_worker(actor); worker = thorium_worker_pool_get_worker(self->pool, old_worker); /* old_total can not be 0 because otherwise the would not * be burdened. */ old_total = thorium_worker_get_production(worker, self); with_messages = thorium_worker_get_producer_count(worker, self); old_load = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION; actor_load = ((0.0 + messages) / old_total) * old_load; /* Try to find a stalled worker that can take it. */ test_stalled_index = stalled_index; tests = 0; predicted_new_load = 0; found_match = 0; while (tests < stalled_count) { core_vector_get_value(&stalled_workers, test_stalled_index, &pair); new_worker_index = core_pair_get_second(&pair); new_worker = thorium_worker_pool_get_worker(self->pool, new_worker_index); new_load = thorium_worker_get_scheduling_epoch_load(new_worker) * SCHEDULER_PRECISION; /*new_total = thorium_worker_get_production(new_worker);*/ predicted_new_load = new_load + actor_load; if (predicted_new_load > SCHEDULER_PRECISION /* && with_messages != 2 */) { #ifdef THORIUM_SCHEDULER_DEBUG printf("Scheduler: skipping actor %d, predicted load is %d >= 100\n", actor_name, predicted_new_load); #endif ++tests; ++test_stalled_index; if (test_stalled_index == stalled_count) { test_stalled_index = 0; } continue; } /* Otherwise, this stalled worker is fine... */ stalled_index = test_stalled_index; found_match = 1; break; } /* This actor can not be migrated to any stalled worker. */ if (!found_match) { continue; } /* Otherwise, update the load of the stalled one and go forward with the change. */ pair_pointer = (struct core_pair *)core_vector_at(&stalled_workers, stalled_index); core_pair_set_first(pair_pointer, predicted_new_load); ++stalled_index; if (stalled_index == stalled_count) { stalled_index = 0; } #if 0 new_worker = thorium_worker_pool_get_worker(pool, new_worker_index); printf(" CANDIDATE: actor %d old worker %d (%d - %d = %d) new worker %d (%d + %d = %d)\n", actor_name, old_worker, value, messages, 2new_score, new_worker_index, new_worker_old_score, messages, new_worker_new_score); #endif thorium_migration_init(&migration, actor_name, old_worker, new_worker_index); core_vector_push_back(&migrations, &migration); thorium_migration_destroy(&migration); } core_vector_iterator_destroy(&vector_iterator); core_vector_destroy(&stalled_workers); core_vector_destroy(&burdened_workers); core_vector_destroy(&loads); core_vector_destroy(&loads_unsorted); core_vector_destroy(&actors_to_migrate); /* Update the last values */ for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); set = thorium_worker_get_actors(worker); core_map_iterator_init(&set_iterator, set); while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) { actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name); thorium_balancer_update_actor_production(self, actor); } core_map_iterator_destroy(&set_iterator); thorium_worker_reset_scheduling_epoch(worker); } #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING /* Generate migrations for symmetric actors. */ thorium_balancer_generate_symmetric_migrations(self, &symmetric_actor_scripts, &migrations); #endif /* Actually do the migrations */ core_vector_iterator_init(&vector_iterator, &migrations); while (core_vector_iterator_next(&vector_iterator, (void **)&migration_to_do)) { thorium_balancer_migrate(self, migration_to_do); } core_vector_iterator_destroy(&vector_iterator); self->last_migrations = core_vector_size(&migrations); core_vector_destroy(&migrations); #ifdef THORIUM_WORKER_ENABLE_LOCK /* Unlock all workers */ for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) { worker = thorium_worker_pool_get_worker(self->pool, i); thorium_worker_unlock(worker); } #endif #ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING core_map_destroy(&symmetric_actor_scripts); #endif core_timer_stop(&timer); printf("SCHEDULER: elapsed time for balancing: %d us, %d migrations performed\n", (int)(core_timer_get_elapsed_nanoseconds(&timer) / 1000), self->last_migrations); }