Пример #1
0
void thorium_cfs_scheduler_print(struct thorium_scheduler *self)
{
    struct thorium_cfs_scheduler *concrete_self;
    struct core_red_black_tree_iterator iterator;
    uint64_t virtual_runtime;
    struct thorium_actor *actor;
    int i;
    struct core_timer timer;

    core_timer_init(&timer);
    concrete_self = self->concrete_self;

    core_red_black_tree_iterator_init(&iterator, &concrete_self->tree);

    printf("[cfs_scheduler] %" PRIu64 " ns, timeline contains %d actors\n",
                    core_timer_get_nanoseconds(&timer),
                    core_red_black_tree_size(&concrete_self->tree));

    i = 0;
    while (core_red_black_tree_iterator_get_next_key_and_value(&iterator, &virtual_runtime,
                            &actor)) {
        printf("[%d] virtual_runtime= %" PRIu64 " actor= %s/%d\n",
                        i, virtual_runtime,
                        thorium_actor_script_name(actor), thorium_actor_name(actor));
        ++i;
    }

    core_red_black_tree_iterator_destroy(&iterator);
    core_timer_destroy(&timer);
}
Пример #2
0
void test_allocator(struct core_memory_pool *memory)
{
    int i;
    int size;
    void *pointer;
    struct core_vector vector;
    struct core_timer timer;
    uint64_t elapsed;

    i = 1000000;
    size = 45;

    core_vector_init(&vector, sizeof(void *));
    core_timer_init(&timer);

    core_timer_start(&timer);

    while (i--) {
        if (memory != NULL) {
            pointer = core_memory_pool_allocate(memory, size);
        } else {
            pointer = core_memory_allocate(size, -1);
        }

        core_vector_push_back(&vector, &pointer);
    }

    core_timer_stop(&timer);
    elapsed = core_timer_get_elapsed_nanoseconds(&timer);

    if (memory == NULL) {
        printf("Not using memory pool... ");
    } else {
        printf("Using memory pool... ");
    }
    printf("Elapsed : %" PRIu64 " milliseconds\n", elapsed / 1000 / 1000);

    size = core_vector_size(&vector);
    for (i = 0; i < size; ++i) {
        pointer = core_vector_at_as_void_pointer(&vector, i);

        if (memory != NULL) {
            core_memory_pool_free(memory, pointer);
        } else {
            core_memory_free(pointer, -1);
        }
    }
    core_vector_destroy(&vector);
    core_timer_destroy(&timer);
}
Пример #3
0
void biosal_unitig_manager_init(struct thorium_actor *self)
{
    struct biosal_unitig_manager *concrete_self;

    concrete_self = (struct biosal_unitig_manager *)thorium_actor_concrete_actor(self);

    core_vector_init(&concrete_self->spawners, sizeof(int));
    core_vector_init(&concrete_self->graph_stores, sizeof(int));

    core_vector_init(&concrete_self->visitors, sizeof(int));
    core_vector_init(&concrete_self->walkers, sizeof(int));

    concrete_self->completed = 0;
    concrete_self->manager = THORIUM_ACTOR_NOBODY;

    core_timer_init(&concrete_self->timer);

    concrete_self->state = STATE_VISITORS;
}
Пример #4
0
void biosal_input_controller_init(struct thorium_actor *actor)
{
    struct biosal_input_controller *concrete_actor;

    concrete_actor = (struct biosal_input_controller *)thorium_actor_concrete_actor(actor);

    core_map_init(&concrete_actor->mega_blocks, sizeof(int), sizeof(struct core_vector));
    core_map_init(&concrete_actor->assigned_blocks, sizeof(int), sizeof(int));
    core_vector_init(&concrete_actor->mega_block_vector, sizeof(struct biosal_mega_block));

    core_vector_init(&concrete_actor->counting_streams, sizeof(int));
    core_vector_init(&concrete_actor->reading_streams, sizeof(int));
    core_vector_init(&concrete_actor->partition_commands, sizeof(int));
    core_vector_init(&concrete_actor->stream_consumers, sizeof(int));
    core_vector_init(&concrete_actor->consumer_active_requests, sizeof(int));
    core_vector_init(&concrete_actor->files, sizeof(char *));
    core_vector_init(&concrete_actor->spawners, sizeof(int));
    core_vector_init(&concrete_actor->counts, sizeof(int64_t));
    core_vector_init(&concrete_actor->consumers, sizeof(int));
    core_vector_init(&concrete_actor->stores_per_spawner, sizeof(int));

    core_timer_init(&concrete_actor->input_timer);
    core_timer_init(&concrete_actor->counting_timer);
    core_timer_init(&concrete_actor->distribution_timer);

    biosal_dna_codec_init(&concrete_actor->codec);

    if (biosal_dna_codec_must_use_two_bit_encoding(&concrete_actor->codec,
                            thorium_actor_get_node_count(actor))) {
        biosal_dna_codec_enable_two_bit_encoding(&concrete_actor->codec);
    }

    core_queue_init(&concrete_actor->unprepared_spawners, sizeof(int));

    concrete_actor->opened_streams = 0;
    concrete_actor->state = BIOSAL_INPUT_CONTROLLER_STATE_NONE;

#ifdef BIOSAL_INPUT_CONTROLLER_DEBUG_10355
    printf("DEBUG actor %d register ACTION_INPUT_CONTROLLER_CREATE_STORES\n",
                    thorium_actor_name(actor));
#endif

    thorium_actor_add_action(actor, ACTION_INPUT_CONTROLLER_CREATE_STORES,
                    biosal_input_controller_create_stores);
    thorium_actor_add_action(actor, ACTION_GET_NODE_NAME_REPLY,
                    biosal_input_controller_get_node_name_reply);
    thorium_actor_add_action(actor, ACTION_GET_NODE_WORKER_COUNT_REPLY,
                    biosal_input_controller_get_node_worker_count_reply);

    thorium_actor_add_action(actor, ACTION_INPUT_CONTROLLER_PREPARE_SPAWNERS,
                    biosal_input_controller_prepare_spawners);
    thorium_actor_add_action(actor, ACTION_INPUT_CONTROLLER_SPAWN_READING_STREAMS,
                    biosal_input_controller_spawn_streams);

    thorium_actor_add_action(actor, ACTION_INPUT_STREAM_SET_START_OFFSET_REPLY,
                    biosal_input_controller_set_offset_reply);
    thorium_actor_add_script(actor, SCRIPT_INPUT_STREAM, &biosal_input_stream_script);
    thorium_actor_add_script(actor, SCRIPT_SEQUENCE_STORE, &biosal_sequence_store_script);
    thorium_actor_add_script(actor, SCRIPT_SEQUENCE_PARTITIONER,
                    &biosal_sequence_partitioner_script);

    /* configuration for the input controller
     * other values for block size: 512, 1024, 2048, 4096, 8192 * /
     */
    concrete_actor->block_size = 4096;
    concrete_actor->stores_per_worker_per_spawner = 0;

#ifdef BIOSAL_INPUT_CONTROLLER_DEBUG
    printf("DEBUG %d init controller\n",
                    thorium_actor_name(actor));
#endif

    concrete_actor->ready_spawners = 0;
    concrete_actor->ready_consumers = 0;
    concrete_actor->partitioner = THORIUM_ACTOR_NOBODY;
    concrete_actor->filled_consumers = 0;

    concrete_actor->counted = 0;
}
Пример #5
0
/**
 * This is the architecture-independent kernel entry point. Before it is
 * called, architecture-specific code has done the bare minimum initialization
 * necessary. This function initializes the kernel and its various subsystems.
 * It calls back to architecture-specific code at several well defined points,
 * which all architectures must implement (e.g., setup_arch()).
 *
 * \callgraph
 */
void
start_kernel()
{
	unsigned int cpu;
	unsigned int timeout;
	int status;

	/*
 	 * Parse the kernel boot command line.
 	 * This is where boot-time configurable variables get set,
 	 * e.g., the ones with param() and DRIVER_PARAM() specifiers.
 	 */
	parse_params(lwk_command_line);

	/*
 	 * Initialize the console subsystem.
 	 * printk()'s will be visible after this.
 	 */
	console_init();

	/*
	 * Hello, Dave.
	 */
	printk("%s", lwk_banner);
	printk(KERN_DEBUG "%s\n", lwk_command_line);
	sort_exception_table();
	/*
	 * Do architecture specific initialization.
	 * This detects memory, CPUs, architecture dependent irqs, etc.
	 */
	setup_arch();

	/*
	 * Setup the architecture independent interrupt handling.
	 */
	irq_init();

	/*
	 * Initialize the kernel memory subsystem. Up until now, the simple
	 * boot-time memory allocator (bootmem) has been used for all dynamic
	 * memory allocation. Here, the bootmem allocator is destroyed and all
	 * of the free pages it was managing are added to the kernel memory
	 * pool (kmem) or the user memory pool (umem).
	 *
	 * After this point, any use of the bootmem allocator will cause a
	 * kernel panic. The normal kernel memory subsystem API should be used
	 * instead (e.g., kmem_alloc() and kmem_free()).
	 */
	mem_subsys_init();

	/*
 	 * Initialize the address space management subsystem.
 	 */
	aspace_subsys_init();


	sched_init_runqueue(0); /* This CPUs scheduler state + idle task */
	sched_add_task(current);  /* now safe to call schedule() */

	/*
 	 * Initialize the task scheduling subsystem.
 	 */
	core_timer_init(0);

	/* Start the kernel filesystems */
	kfs_init();

	/*
	 * Initialize the random number generator.
	 */
	rand_init();

	workq_init();

	/*
	 * Boot all of the other CPUs in the system, one at a time.
	 */
	printk(KERN_INFO "Number of CPUs detected: %d\n", num_cpus());
	for_each_cpu_mask(cpu, cpu_present_map) {
		/* The bootstrap CPU (that's us) is already booted. */
		if (cpu == 0) {
			cpu_set(cpu, cpu_online_map);
			continue;
		}

		printk(KERN_DEBUG "Booting CPU %u.\n", cpu);
		arch_boot_cpu(cpu);

		/* Wait for ACK that CPU has booted (5 seconds max). */
		for (timeout = 0; timeout < 50000; timeout++) {
			if (cpu_isset(cpu, cpu_online_map))
				break;
			udelay(100);
		}

		if (!cpu_isset(cpu, cpu_online_map))
			panic("Failed to boot CPU %d.\n", cpu);
	}

	/*
	 * Initialize the PCI subsystem.
	 */
	init_pci();

	/*
	 * Enable external interrupts.
	 */
	local_irq_enable();

#ifdef CONFIG_NETWORK
	/*
	 * Bring up any network devices.
	 */
	netdev_init();
#endif

#ifdef CONFIG_CRAY_GEMINI
	driver_init_list("net", "gemini");
#endif

#ifdef CONFIG_BLOCK_DEVICE
	/**
	 * Initialize the block devices
	 */
	blkdev_init();
#endif


	mcheck_init_late();

	/*
	 * And any modules that need to be started.
	 */
	driver_init_by_name( "module", "*" );

#ifdef CONFIG_KGDB
	/* 
	 * Stop eary (before "late" devices) in KGDB if requested
	 */
        kgdb_initial_breakpoint();
#endif

	/*
	 * Bring up any late init devices.
	 */
	driver_init_by_name( "late", "*" );

	/*
	 * Bring up the Linux compatibility layer, if enabled.
	 */
	linux_init();

#ifdef CONFIG_DEBUG_HW_NOISE
	/* Measure noise/interference in the underlying hardware/VMM */
	extern void measure_noise(int, uint64_t);
	measure_noise(0, 0);
#endif

	/*
	 * Start up user-space...
	 */
	printk(KERN_INFO "Loading initial user-level task (init_task)...\n");
	if ((status = create_init_task()) != 0)
		panic("Failed to create init_task (status=%d).", status);
	current->state = TASK_EXITED;
	schedule();  /* This should not return */
	BUG();
}
Пример #6
0
void spate_init(struct thorium_actor *self)
{
    struct spate *concrete_self;

    concrete_self = (struct spate *)thorium_actor_concrete_actor(self);
    core_vector_init(&concrete_self->initial_actors, sizeof(int));
    core_vector_init(&concrete_self->sequence_stores, sizeof(int));
    core_vector_init(&concrete_self->graph_stores, sizeof(int));

    concrete_self->is_leader = 0;

    concrete_self->input_controller = THORIUM_ACTOR_NOBODY;
    concrete_self->manager_for_sequence_stores = THORIUM_ACTOR_NOBODY;
    concrete_self->assembly_graph = THORIUM_ACTOR_NOBODY;
    concrete_self->assembly_graph_builder = THORIUM_ACTOR_NOBODY;

    core_timer_init(&concrete_self->timer);

    thorium_actor_add_action(self,
                    ACTION_START, spate_start);
    thorium_actor_add_action(self,
                    ACTION_ASK_TO_STOP, spate_ask_to_stop);
    thorium_actor_add_action(self,
                    ACTION_SPAWN_REPLY, spate_spawn_reply);
    thorium_actor_add_action(self,
                    ACTION_MANAGER_SET_SCRIPT_REPLY, spate_set_script_reply);
    thorium_actor_add_action(self,
                    ACTION_SET_CONSUMERS_REPLY, spate_set_consumers_reply);
    thorium_actor_add_action(self,
                    ACTION_SET_BLOCK_SIZE_REPLY, spate_set_block_size_reply);
    thorium_actor_add_action(self,
                    ACTION_INPUT_DISTRIBUTE_REPLY, spate_distribute_reply);
    thorium_actor_add_action(self,
                    ACTION_SPATE_ADD_FILES, spate_add_files);
    thorium_actor_add_action(self,
                    ACTION_SPATE_ADD_FILES_REPLY, spate_add_files_reply);
    thorium_actor_add_action(self,
                    ACTION_ADD_FILE_REPLY, spate_add_file_reply);

    thorium_actor_add_action(self,
                    ACTION_START_REPLY, spate_start_reply);

    /*
     * Register required actor scripts now
     */

    thorium_actor_add_script(self, SCRIPT_INPUT_CONTROLLER,
                    &biosal_input_controller_script);
    thorium_actor_add_script(self, SCRIPT_DNA_KMER_COUNTER_KERNEL,
                    &biosal_dna_kmer_counter_kernel_script);
    thorium_actor_add_script(self, SCRIPT_MANAGER,
                    &core_manager_script);
    thorium_actor_add_script(self, SCRIPT_WRITER_PROCESS,
                    &core_writer_process_script);
    thorium_actor_add_script(self, SCRIPT_AGGREGATOR,
                    &biosal_aggregator_script);
    thorium_actor_add_script(self, SCRIPT_KMER_STORE,
                    &biosal_kmer_store_script);
    thorium_actor_add_script(self, SCRIPT_SEQUENCE_STORE,
                    &biosal_sequence_store_script);
    thorium_actor_add_script(self, SCRIPT_COVERAGE_DISTRIBUTION,
                    &biosal_coverage_distribution_script);
    thorium_actor_add_script(self, SCRIPT_ASSEMBLY_GRAPH_BUILDER,
                    &biosal_assembly_graph_builder_script);

    thorium_actor_add_script(self, SCRIPT_ASSEMBLY_GRAPH_STORE,
                    &biosal_assembly_graph_store_script);
    thorium_actor_add_script(self, SCRIPT_ASSEMBLY_SLIDING_WINDOW,
                    &biosal_assembly_sliding_window_script);
    thorium_actor_add_script(self, SCRIPT_ASSEMBLY_BLOCK_CLASSIFIER,
                    &biosal_assembly_block_classifier_script);
    thorium_actor_add_script(self, SCRIPT_COVERAGE_DISTRIBUTION,
                    &biosal_coverage_distribution_script);

    thorium_actor_add_script(self, SCRIPT_ASSEMBLY_ARC_KERNEL,
                    &biosal_assembly_arc_kernel_script);
    thorium_actor_add_script(self, SCRIPT_ASSEMBLY_ARC_CLASSIFIER,
                    &biosal_assembly_arc_classifier_script);
    thorium_actor_add_script(self, SCRIPT_UNITIG_WALKER,
                    &biosal_unitig_walker_script);
    thorium_actor_add_script(self, SCRIPT_UNITIG_VISITOR,
                    &biosal_unitig_visitor_script);
    thorium_actor_add_script(self, SCRIPT_UNITIG_MANAGER,
                    &biosal_unitig_manager_script);

    /*
     * This is the I/O controller block size.
     * This is a number of sequences.
     */
    concrete_self->block_size = 16 * 4096;

    concrete_self->file_index = 0;
}
Пример #7
0
void thorium_transport_init(struct thorium_transport *self, struct thorium_node *node,
                int *argc, char ***argv,
                struct core_memory_pool *inbound_message_memory_pool,
                struct core_memory_pool *outbound_message_memory_pool)
{
    int actual_argc;
    char **actual_argv;

    self->active_request_count = 0;

    actual_argc = *argc;
    actual_argv = *argv;

    self->flags = 0;
    core_bitmap_clear_bit_uint32_t(&self->flags, FLAG_PROFILE);
    core_bitmap_clear_bit_uint32_t(&self->flags, FLAG_PRINT_TRANSPORT_EVENTS);

    self->transport_interface = NULL;
    self->concrete_transport = NULL;

    /*
    printf("DEBUG Initiating transport\n");
    */
    /* Select the transport layer
     */
    /*
     * Assign functions
     */
    thorium_transport_select_implementation(self, actual_argc, actual_argv);

    self->node = node;

    self->rank = -1;
    self->size = -1;

    if (self->transport_interface != NULL) {

        self->concrete_transport = core_memory_allocate(self->transport_interface->size, MEMORY_TRANSPORT);
        self->transport_interface->init(self, argc, argv);
    }

    CORE_DEBUGGER_ASSERT(self->rank >= 0);
    CORE_DEBUGGER_ASSERT(self->size >= 1);
    CORE_DEBUGGER_ASSERT(self->node != NULL);

    self->inbound_message_memory_pool = inbound_message_memory_pool;
    self->outbound_message_memory_pool = outbound_message_memory_pool;

    thorium_transport_profiler_init(&self->transport_profiler);

    if (core_command_has_argument(actual_argc, actual_argv, "-enable-transport-profiler")) {

        printf("Enable transport profiler\n");

        core_bitmap_set_bit_uint32_t(&self->flags, FLAG_PROFILE);
    }

    if (self->rank == 0) {
        printf("thorium_transport: type %s\n",
                    self->transport_interface->name);
    }

    if (core_command_has_argument(actual_argc, actual_argv, "-print-transport-events")) {
        core_bitmap_set_bit_uint32_t(&self->flags, FLAG_PRINT_TRANSPORT_EVENTS);
    }

    core_timer_init(&self->timer);
    self->start_time = core_timer_get_nanoseconds(&self->timer);
}
Пример #8
0
void thorium_message_multiplexer_init(struct thorium_message_multiplexer *self,
                struct thorium_node *node, struct thorium_multiplexer_policy *policy)
{
    int size;
    int i;
    /*
    int bytes;
    */
    int position;
    struct thorium_multiplexed_buffer *multiplexed_buffer;
    int argc;
    char **argv;

    thorium_decision_maker_init(&self->decision_maker);

    self->policy = policy;
    self->original_message_count = 0;
    self->real_message_count = 0;

    CORE_BITMAP_CLEAR_FLAGS(self->flags);
    CORE_BITMAP_CLEAR_FLAG(self->flags, FLAG_DISABLED);

#ifdef THORIUM_MULTIPLEXER_TRACK_BUFFERS_WITH_CONTENT
    core_set_init(&self->buffers_with_content, sizeof(int));
#endif

    core_timer_init(&self->timer);

    self->buffer_size_in_bytes = thorium_multiplexer_policy_size_threshold(self->policy);

#ifdef CONFIG_MULTIPLEXER_USE_DECISION_MAKER
    self->timeout_in_nanoseconds = thorium_decision_maker_get_best_timeout(&self->decision_maker,
                    THORIUM_TIMEOUT_NO_VALUE);
#else
    self->timeout_in_nanoseconds = self->policy->threshold_time_in_nanoseconds;
#endif

    CORE_DEBUGGER_ASSERT(self->timeout_in_nanoseconds >= 0);

    self->node = node;

    core_vector_init(&self->buffers, sizeof(struct thorium_multiplexed_buffer));

    size = thorium_node_nodes(self->node);
    core_vector_resize(&self->buffers, size);

    /*
    bytes = size * self->buffer_size_in_bytes;
    */

#ifdef DEBUG_MULTIPLEXER
    thorium_printf("DEBUG_MULTIPLEXER size %d bytes %d\n", size, bytes);
#endif

    position = 0;

    for (i = 0; i < size; ++i) {
        multiplexed_buffer = core_vector_at(&self->buffers, i);

        CORE_DEBUGGER_ASSERT(multiplexed_buffer != NULL);

        /*
         * Initially, these multiplexed buffers have a NULL buffer.
         * It is only allocated when needed because each worker is an exporter
         * of small messages for a subset of all the destination nodes.
         */
        thorium_multiplexed_buffer_init(multiplexed_buffer, self->buffer_size_in_bytes,
                        self->timeout_in_nanoseconds);

        position += self->buffer_size_in_bytes;

#ifdef DEBUG_MULTIPLEXER1
        thorium_printf("DEBUG_MULTIPLEXER thorium_message_multiplexer_init index %d buffer %p\n", i, buffer);
#endif

#ifdef DEBUG_MULTIPLEXER
        thorium_printf("DEBUG_MULTIPLEXER thorium_message_multiplexer_init (after) index %d buffer %p\n", i,
                        core_vector_at(&self->buffers, i));
#endif
    }

    if (thorium_multiplexer_policy_is_disabled(self->policy)) {
        CORE_BITMAP_SET_FLAG(self->flags, FLAG_DISABLED);
    }

    if (thorium_node_nodes(self->node) < thorium_multiplexer_policy_minimum_node_count(self->policy)) {
        CORE_BITMAP_SET_FLAG(self->flags, FLAG_DISABLED);
    }

    self->worker = NULL;

    argc = node->argc;
    argv = node->argv;

    /*
     * Aside from the policy, the end user can also disable the multiplexer code path
     */
    if (core_command_has_argument(argc, argv, OPTION_DISABLE_MULTIPLEXER)) {
        CORE_BITMAP_SET_FLAG(self->flags, FLAG_DISABLED);
    }

    self->last_send_event_count = 0;
    self->last_time = core_timer_get_nanoseconds(&self->timer);
    self->last_update_time = time(NULL);

    self->degree_of_aggregation_limit = self->policy->degree_of_aggregation_limit;

    thorium_router_init(&self->router, self->node->nodes,
                    TOPOLOGY_POLYTOPE);

    if (thorium_node_must_print_data(self->node)) {
        thorium_router_print(&self->router);
    }
}
Пример #9
0
void thorium_worker_init(struct thorium_worker *worker, int name, struct thorium_node *node)
{
    int capacity;
    int ephemeral_memory_block_size;
    int injected_buffer_ring_size;
    int argc;
    char **argv;

    worker->tick_count = 0;

    thorium_load_profiler_init(&worker->profiler);

    argc = thorium_node_argc(node);
    argv = thorium_node_argv(node);

#ifdef THORIUM_WORKER_DEBUG_INJECTION
    worker->counter_allocated_outbound_buffers = 0;
    worker->counter_freed_outbound_buffers_from_self = 0;
    worker->counter_freed_outbound_buffers_from_other_workers = 0;
    worker->counter_injected_outbound_buffers_other_local_workers= 0;
    worker->counter_injected_inbound_buffers_from_thorium_core = 0;
#endif

    core_map_init(&worker->actor_received_messages, sizeof(int), sizeof(int));

    worker->waiting_is_enabled = 0;
    worker->waiting_start_time = 0;

    core_timer_init(&worker->timer);
    capacity = THORIUM_WORKER_RING_CAPACITY;
    /*worker->work_queue = work_queue;*/
    worker->node = node;
    worker->name = name;
    core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_DEAD);
    worker->last_warning = 0;

    worker->last_wake_up_count = 0;

    /*worker->work_queue = &worker->works;*/

    /* There are two options:
     * 1. enable atomic operations for change visibility
     * 2. Use volatile head and tail.
     */
    core_fast_ring_init(&worker->actors_to_schedule, capacity, sizeof(struct thorium_actor *));

#ifdef THORIUM_NODE_INJECT_CLEAN_WORKER_BUFFERS
    injected_buffer_ring_size = capacity;
    core_fast_ring_init(&worker->injected_clean_outbound_buffers,
                    injected_buffer_ring_size, sizeof(void *));

    core_fast_ring_init(&worker->clean_message_ring_for_triage,
                    injected_buffer_ring_size,
                    sizeof(struct thorium_message));

    core_fast_queue_init(&worker->clean_message_queue_for_triage,
                    sizeof(struct thorium_message));
#endif

    thorium_scheduler_init(&worker->scheduler, thorium_node_name(worker->node),
                    worker->name);
    core_map_init(&worker->actors, sizeof(int), sizeof(int));
    core_map_iterator_init(&worker->actor_iterator, &worker->actors);

    core_fast_ring_init(&worker->outbound_message_queue, capacity, sizeof(struct thorium_message));

    core_fast_queue_init(&worker->outbound_message_queue_buffer, sizeof(struct thorium_message));

    core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_DEBUG);
    core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_BUSY);
    core_bitmap_clear_bit_uint32_t(&node->flags, FLAG_ENABLE_ACTOR_LOAD_PROFILER);

    worker->flags = 0;
    core_bitmap_clear_bit_uint32_t(&worker->flags, FLAG_DEBUG_ACTORS);

    if (core_command_has_argument(argc, argv, DEBUG_WORKER_OPTION)) {

#if 0
        printf("DEBUG has option %s\n", DEBUG_WORKER_OPTION);
#endif

        if (thorium_node_name(worker->node) == 0
                    && thorium_worker_name(worker) == 0) {

#if 0
            printf("DEBUG setting bit FLAG_DEBUG_ACTORS because %s\n", DEBUG_WORKER_OPTION);
#endif
            core_bitmap_set_bit_uint32_t(&worker->flags, FLAG_DEBUG_ACTORS);
        }
    }

    worker->epoch_used_nanoseconds = 0;
    worker->loop_used_nanoseconds = 0;
    worker->scheduling_epoch_used_nanoseconds = 0;

    worker->started_in_thread = 0;

/* 2 MiB is the default size for Linux huge pages.
 * \see https://wiki.debian.org/Hugepages
 * \see http://lwn.net/Articles/376606/
 */

    /*
     * 8 MiB
     */
    ephemeral_memory_block_size = 8388608;
    /*ephemeral_memory_block_size = 16777216;*/
    core_memory_pool_init(&worker->ephemeral_memory, ephemeral_memory_block_size,
                    MEMORY_POOL_NAME_WORKER_EPHEMERAL);

    core_memory_pool_disable_tracking(&worker->ephemeral_memory);
    core_memory_pool_enable_ephemeral_mode(&worker->ephemeral_memory);

#ifdef THORIUM_WORKER_ENABLE_LOCK
    core_lock_init(&worker->lock);
#endif

    core_set_init(&worker->evicted_actors, sizeof(int));

    core_memory_pool_init(&worker->outbound_message_memory_pool,
                    CORE_MEMORY_POOL_MESSAGE_BUFFER_BLOCK_SIZE, MEMORY_POOL_NAME_WORKER_OUTBOUND);

    /*
     * Disable the pool so that it uses allocate and free
     * directly.
     */

#ifdef CORE_MEMORY_POOL_DISABLE_MESSAGE_BUFFER_POOL
    core_memory_pool_disable(&worker->outbound_message_memory_pool);
#endif

    /*
     * Transport message buffers are fancy objects.
     */
    core_memory_pool_enable_normalization(&worker->outbound_message_memory_pool);
    core_memory_pool_enable_alignment(&worker->outbound_message_memory_pool);

    worker->ticks_without_production = 0;

    thorium_priority_assigner_init(&worker->assigner, thorium_worker_name(worker));

    /*
     * This variables should be set in
     * thorium_worker_start, but when running on 1 process with 1 thread,
     * thorium_worker_start is never called...
     */
    worker->last_report = time(NULL);
    worker->epoch_start_in_nanoseconds = core_timer_get_nanoseconds(&worker->timer);
    worker->loop_start_in_nanoseconds = worker->epoch_start_in_nanoseconds;
    worker->loop_end_in_nanoseconds = worker->loop_start_in_nanoseconds;
    worker->scheduling_epoch_start_in_nanoseconds = worker->epoch_start_in_nanoseconds;

    /*
     * Avoid valgrind warnings.
     */
    worker->epoch_load = 0;
}
Пример #10
0
void thorium_balancer_balance(struct thorium_balancer *self)
{
    /*
     * The 95th percentile is useful:
     * \see http://en.wikipedia.org/wiki/Burstable_billing
     * \see http://www.init7.net/en/backbone/95-percent-rule
     */
    int load_percentile_50;
    struct core_timer timer;

    int i;
    struct core_vector loads;
    struct core_vector loads_unsorted;
    struct core_vector burdened_workers;
    struct core_vector stalled_workers;
    struct thorium_worker *worker;
    struct thorium_node *node;

    /*struct core_set *set;*/
    struct core_pair pair;
    struct core_vector_iterator vector_iterator;
    int old_worker;
    int actor_name;
    int messages;
    int maximum;
    int with_maximum;
    struct core_map *set;
    struct core_map_iterator set_iterator;
    int stalled_index;
    int stalled_count;
    int new_worker_index;
    struct core_vector migrations;
    struct thorium_migration migration;
    struct thorium_migration *migration_to_do;
    struct thorium_actor *actor;
    int candidates;

    int load_value;
    int remaining_load;
    int projected_load;

    struct core_vector actors_to_migrate;
    int total;
    int with_messages;
    int stalled_percentile;
    int burdened_percentile;

    int old_total;
    int old_load;
    int new_load;
    int predicted_new_load;
    struct core_pair *pair_pointer;
    struct thorium_worker *new_worker;
    /*int new_total;*/
    int actor_load;

    int test_stalled_index;
    int tests;
    int found_match;
    int spawned_actors;
    int killed_actors;
    int perfect;

#ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING
    struct core_map symmetric_actor_scripts;
    int script;
#endif

    node = thorium_worker_pool_get_node(self->pool);

    spawned_actors = thorium_node_get_counter(node, CORE_COUNTER_SPAWNED_ACTORS);

    /* There is nothing to balance...
     */
    if (spawned_actors == 0) {
        return;
    }

    killed_actors = thorium_node_get_counter(node, CORE_COUNTER_KILLED_ACTORS);

    /*
     * The system can probably not be balanced to get in
     * a better shape anyway.
     */
    if (spawned_actors == self->last_spawned_actors
                    && killed_actors == self->last_killed_actors
                    && self->last_migrations == 0) {

        printf("SCHEDULER: balance can not be improved because nothing changed.\n");
        return;
    }

    /* Check if we have perfection
     */

    perfect = 1;
    for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) {
        worker = thorium_worker_pool_get_worker(self->pool, i);

        load_value = thorium_worker_get_epoch_load(worker) * 100;

        if (load_value != 100) {
            perfect = 0;
            break;
        }
    }

    if (perfect) {
        printf("SCHEDULER: perfect balance can not be improved.\n");
        return;
    }

    /* update counters
     */
    self->last_spawned_actors = spawned_actors;
    self->last_killed_actors = killed_actors;

    /* Otherwise, try to balance things
     */
    core_timer_init(&timer);

    core_timer_start(&timer);

#ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING
    core_map_init(&symmetric_actor_scripts, sizeof(int), sizeof(int));

    thorium_balancer_detect_symmetric_scripts(self, &symmetric_actor_scripts);
#endif

#ifdef THORIUM_WORKER_ENABLE_LOCK
    /* Lock all workers first
     */
    for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) {
        worker = thorium_worker_pool_get_worker(self->pool, i);

        thorium_worker_lock(worker);
    }
#endif

    core_vector_init(&migrations, sizeof(struct thorium_migration));

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
    printf("BALANCING\n");
#endif

    core_vector_init(&loads, sizeof(int));
    core_vector_init(&loads_unsorted, sizeof(int));
    core_vector_init(&burdened_workers, sizeof(struct core_pair));
    core_vector_init(&stalled_workers, sizeof(struct core_pair));

    core_vector_init(&actors_to_migrate, sizeof(struct core_pair));

    for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) {
        worker = thorium_worker_pool_get_worker(self->pool, i);
        load_value = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION;

#if 0
        printf("DEBUG LOAD %d %d\n", i, load_value);
#endif

        core_vector_push_back(&loads, &load_value);
        core_vector_push_back(&loads_unsorted, &load_value);
    }

    core_vector_sort_int(&loads);

    stalled_percentile = core_statistics_get_percentile_int(&loads, SCHEDULER_WINDOW);
    /*load_percentile_25 = core_statistics_get_percentile_int(&loads, 25);*/
    load_percentile_50 = core_statistics_get_percentile_int(&loads, 50);
    /*load_percentile_75 = core_statistics_get_percentile_int(&loads, 75);*/
    burdened_percentile = core_statistics_get_percentile_int(&loads, 100 - SCHEDULER_WINDOW);

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
    printf("Percentiles for epoch loads: ");
    core_statistics_print_percentiles_int(&loads);
#endif

    for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) {
        worker = thorium_worker_pool_get_worker(self->pool, i);
        load_value = core_vector_at_as_int(&loads_unsorted, i);

        set = thorium_worker_get_actors(worker);

        if (stalled_percentile == burdened_percentile) {

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
            printf("scheduling_class:%s ",
                            THORIUM_CLASS_NORMAL_STRING);
#endif

        } else if (load_value <= stalled_percentile) {

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
            printf("scheduling_class:%s ",
                            THORIUM_CLASS_STALLED_STRING);
#endif

            core_pair_init(&pair, load_value, i);
            core_vector_push_back(&stalled_workers, &pair);

        } else if (load_value >= burdened_percentile) {

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
            printf("scheduling_class:%s ",
                            THORIUM_CLASS_BURDENED_STRING);
#endif

            core_pair_init(&pair, load_value, i);
            core_vector_push_back(&burdened_workers, &pair);
        } else {
#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
            printf("scheduling_class:%s ",
                            THORIUM_CLASS_NORMAL_STRING);
#endif
        }

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
        thorium_worker_print_actors(worker, self);
#endif

    }

    core_vector_sort_int_reverse(&burdened_workers);
    core_vector_sort_int(&stalled_workers);

    stalled_count = core_vector_size(&stalled_workers);

#ifdef THORIUM_SCHEDULER_ENABLE_VERBOSITY
    printf("MIGRATIONS (stalled: %d, burdened: %d)\n", (int)core_vector_size(&stalled_workers),
                    (int)core_vector_size(&burdened_workers));
#endif

    stalled_index = 0;
    core_vector_iterator_init(&vector_iterator, &burdened_workers);

    while (stalled_count > 0
                    && core_vector_iterator_get_next_value(&vector_iterator, &pair)) {

        old_worker = core_pair_get_second(&pair);

        worker = thorium_worker_pool_get_worker(self->pool, old_worker);
        set = thorium_worker_get_actors(worker);

        /*
        thorium_worker_print_actors(worker);
        printf("\n");
        */

        /*
         * Lock the worker and try to select actors for migration
         */
        core_map_iterator_init(&set_iterator, set);

        maximum = -1;
        with_maximum = 0;
        total = 0;
        with_messages = 0;

        while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) {

            actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name);
            messages = thorium_balancer_get_actor_production(self, actor);

            if (maximum == -1 || messages > maximum) {
                maximum = messages;
                with_maximum = 1;
            } else if (messages == maximum) {
                with_maximum++;
            }

            if (messages > 0) {
                ++with_messages;
            }

            total += messages;
        }

        core_map_iterator_destroy(&set_iterator);

        core_map_iterator_init(&set_iterator, set);

        --with_maximum;

        candidates = 0;
        load_value = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION;

        remaining_load = load_value;

#if 0
        printf("maximum %d with_maximum %d\n", maximum, with_maximum);
#endif

        while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) {

            actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name);

            if (actor == NULL) {
                continue;
            }
            messages = thorium_balancer_get_actor_production(self, actor);

#ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING
            script = thorium_actor_script(actor);


            /* symmetric actors are migrated elsewhere.
             */
            if (core_map_get_value(&symmetric_actor_scripts, &script, NULL)) {
                continue;
            }
#endif

            /* Simulate the remaining load
             */
            projected_load = remaining_load;
            projected_load -= ((0.0 + messages) / total) * load_value;

#ifdef THORIUM_SCHEDULER_DEBUG
            printf(" TESTING actor %d, production was %d, projected_load is %d (- %d * (1 - %d/%d)\n",
                            actor_name, messages, projected_load,
                            load_value, messages, total);
#endif

            /* An actor without any queued messages should not be migrated
             */
            if (messages > 0
                            && ((with_maximum > 0 && messages == maximum) || messages < maximum)
                /*
                 * Avoid removing too many actors because
                 * generating a stalled one is not desired
                 */
                    && (projected_load >= load_percentile_50

                /*
                 * The previous rule does not apply when there
                 * are 2 actors.
                 */
                   || with_messages == 2) ) {

                remaining_load = projected_load;

                candidates++;

                if (messages == maximum) {
                    --with_maximum;
                }


                core_pair_init(&pair, messages, actor_name);
                core_vector_push_back(&actors_to_migrate, &pair);

#ifdef THORIUM_SCHEDULER_DEBUG
                printf("early CANDIDATE for migration: actor %d, worker %d\n",
                                actor_name, old_worker);
#endif
            }
        }
        core_map_iterator_destroy(&set_iterator);

    }

    core_vector_iterator_destroy(&vector_iterator);

    /* Sort the candidates
     */

    /*
    core_vector_sort_int(&actors_to_migrate);

    printf("Percentiles for production: ");
    core_statistics_print_percentiles_int(&actors_to_migrate);
    */

    /* Sort them in reverse order.
     */
    core_vector_sort_int_reverse(&actors_to_migrate);

    core_vector_iterator_init(&vector_iterator, &actors_to_migrate);

    /* For each highly active actor,
     * try to match it with a stalled worker
     */
    while (core_vector_iterator_get_next_value(&vector_iterator, &pair)) {

        actor_name = core_pair_get_second(&pair);

        actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name);

        if (actor == NULL) {
           continue;
        }

        messages = thorium_balancer_get_actor_production(self, actor);
        old_worker = thorium_actor_assigned_worker(actor);

        worker = thorium_worker_pool_get_worker(self->pool, old_worker);

        /* old_total can not be 0 because otherwise the would not
         * be burdened.
         */
        old_total = thorium_worker_get_production(worker, self);
        with_messages = thorium_worker_get_producer_count(worker, self);
        old_load = thorium_worker_get_scheduling_epoch_load(worker) * SCHEDULER_PRECISION;
        actor_load = ((0.0 + messages) / old_total) * old_load;

        /* Try to find a stalled worker that can take it.
         */

        test_stalled_index = stalled_index;
        tests = 0;
        predicted_new_load = 0;

        found_match = 0;
        while (tests < stalled_count) {

            core_vector_get_value(&stalled_workers, test_stalled_index, &pair);
            new_worker_index = core_pair_get_second(&pair);

            new_worker = thorium_worker_pool_get_worker(self->pool, new_worker_index);
            new_load = thorium_worker_get_scheduling_epoch_load(new_worker) * SCHEDULER_PRECISION;
        /*new_total = thorium_worker_get_production(new_worker);*/

            predicted_new_load = new_load + actor_load;

            if (predicted_new_load > SCHEDULER_PRECISION /* && with_messages != 2 */) {
#ifdef THORIUM_SCHEDULER_DEBUG
                printf("Scheduler: skipping actor %d, predicted load is %d >= 100\n",
                           actor_name, predicted_new_load);
#endif

                ++tests;
                ++test_stalled_index;

                if (test_stalled_index == stalled_count) {
                    test_stalled_index = 0;
                }
                continue;
            }

            /* Otherwise, this stalled worker is fine...
             */
            stalled_index = test_stalled_index;
            found_match = 1;

            break;
        }

        /* This actor can not be migrated to any stalled worker.
         */
        if (!found_match) {
            continue;
        }

        /* Otherwise, update the load of the stalled one and go forward with the change.
         */

        pair_pointer = (struct core_pair *)core_vector_at(&stalled_workers, stalled_index);

        core_pair_set_first(pair_pointer, predicted_new_load);

        ++stalled_index;

        if (stalled_index == stalled_count) {
            stalled_index = 0;
        }


#if 0
        new_worker = thorium_worker_pool_get_worker(pool, new_worker_index);
        printf(" CANDIDATE: actor %d old worker %d (%d - %d = %d) new worker %d (%d + %d = %d)\n",
                        actor_name,
                        old_worker, value, messages, 2new_score,
                        new_worker_index, new_worker_old_score, messages, new_worker_new_score);
#endif

        thorium_migration_init(&migration, actor_name, old_worker, new_worker_index);
        core_vector_push_back(&migrations, &migration);
        thorium_migration_destroy(&migration);

    }

    core_vector_iterator_destroy(&vector_iterator);

    core_vector_destroy(&stalled_workers);
    core_vector_destroy(&burdened_workers);
    core_vector_destroy(&loads);
    core_vector_destroy(&loads_unsorted);
    core_vector_destroy(&actors_to_migrate);

    /* Update the last values
     */
    for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) {

        worker = thorium_worker_pool_get_worker(self->pool, i);

        set = thorium_worker_get_actors(worker);

        core_map_iterator_init(&set_iterator, set);

        while (core_map_iterator_get_next_key_and_value(&set_iterator, &actor_name, NULL)) {
            actor = thorium_node_get_actor_from_name(thorium_worker_pool_get_node(self->pool), actor_name);
            thorium_balancer_update_actor_production(self, actor);
        }
        core_map_iterator_destroy(&set_iterator);

        thorium_worker_reset_scheduling_epoch(worker);
    }

#ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING
    /* Generate migrations for symmetric actors.
     */

    thorium_balancer_generate_symmetric_migrations(self, &symmetric_actor_scripts, &migrations);
#endif

    /* Actually do the migrations
     */
    core_vector_iterator_init(&vector_iterator, &migrations);

    while (core_vector_iterator_next(&vector_iterator, (void **)&migration_to_do)) {

        thorium_balancer_migrate(self, migration_to_do);
    }

    core_vector_iterator_destroy(&vector_iterator);

    self->last_migrations = core_vector_size(&migrations);

    core_vector_destroy(&migrations);

#ifdef THORIUM_WORKER_ENABLE_LOCK
    /* Unlock all workers
     */
    for (i = 0; i < thorium_worker_pool_worker_count(self->pool); i++) {
        worker = thorium_worker_pool_get_worker(self->pool, i);

        thorium_worker_unlock(worker);
    }
#endif

#ifdef THORIUM_SCHEDULER_ENABLE_SYMMETRIC_SCHEDULING
    core_map_destroy(&symmetric_actor_scripts);
#endif

    core_timer_stop(&timer);

    printf("SCHEDULER: elapsed time for balancing: %d us, %d migrations performed\n",
                    (int)(core_timer_get_elapsed_nanoseconds(&timer) / 1000),
                    self->last_migrations);
}