void spate_start_reply_builder(struct thorium_actor *self, struct thorium_message *message) { void *buffer; int spawner; struct spate *concrete_self; concrete_self = (struct spate *)thorium_actor_concrete_actor(self); buffer = thorium_message_buffer(message); core_vector_unpack(&concrete_self->graph_stores, buffer); thorium_actor_log(self, "%s/%d has %d graph stores", thorium_actor_script_name(self), thorium_actor_name(self), (int)core_vector_size(&concrete_self->graph_stores)); spawner = thorium_actor_get_spawner(self, &concrete_self->initial_actors); concrete_self->unitig_manager = THORIUM_ACTOR_SPAWNING_IN_PROGRESS; thorium_actor_add_action_with_condition(self, ACTION_SPAWN_REPLY, spate_spawn_reply_unitig_manager, &concrete_self->unitig_manager, THORIUM_ACTOR_SPAWNING_IN_PROGRESS); thorium_actor_send_int(self, spawner, ACTION_SPAWN, SCRIPT_UNITIG_MANAGER); }
void spate_help(struct thorium_actor *self) { printf("Application: %s\n", thorium_actor_script_name(self)); printf(" Version: %s\n", thorium_script_version(thorium_actor_get_script(self))); printf(" Description: %s\n", thorium_script_description(thorium_actor_get_script(self))); printf(" Library: biosal (biological sequence actor library)\n"); printf(" Engine: thorium (distributed event-driven native actor machine emulator)\n"); printf("\n"); printf("Usage:\n"); printf(" mpiexec -n <ranks> spate -threads-per-node <threads> [-k <kmer_length>] [-i <file>] [-p <file1> <file2>] [-s <file>] -o <output>\n"); printf("\n"); printf("Default values:\n"); printf(" -k %d (no limit and no recompilation is required)\n", BIOSAL_DEFAULT_KMER_LENGTH); printf(" -threads-per-node %d\n", 1); printf(" -o %s\n", BIOSAL_DEFAULT_OUTPUT); printf("\n"); printf("Example:\n"); printf(" mpiexec -n 128 spate -threads-per-node 24 -k 51 -i interleaved_file_1.fastq -i interleaved_file_2.fastq -o my-assembly\n"); printf("\n"); printf("Supported input formats:\n"); printf(" .fastq, .fastq.gz, .fasta, .fasta.gz\n"); printf(" .fastq can be named .fq, fasta can be named .fa, and .fasta can be multiline.\n"); }
void thorium_fifo_scheduler_print_with_priority(struct thorium_fifo_scheduler *queue, int priority, const char *name, int node, int worker) { struct core_fast_queue *selection; struct thorium_actor *actor; int size; int i; selection = thorium_fifo_scheduler_select_queue(queue, priority); size = core_fast_queue_size(selection); printf("node/%d worker/%d scheduling_queue: Priority Queue %d (%s), actors: %d\n", node, worker, priority, name, size); i = 0; while (i < size) { core_fast_queue_dequeue(selection, &actor); core_fast_queue_enqueue(selection, &actor); printf("node/%d worker/%d [%i] actor %s/%d (%d messages)\n", node, worker, i, thorium_actor_script_name(actor), thorium_actor_name(actor), thorium_actor_get_mailbox_size(actor)); ++i; } }
void thorium_cfs_scheduler_print(struct thorium_scheduler *self) { struct thorium_cfs_scheduler *concrete_self; struct core_red_black_tree_iterator iterator; uint64_t virtual_runtime; struct thorium_actor *actor; int i; struct core_timer timer; core_timer_init(&timer); concrete_self = self->concrete_self; core_red_black_tree_iterator_init(&iterator, &concrete_self->tree); printf("[cfs_scheduler] %" PRIu64 " ns, timeline contains %d actors\n", core_timer_get_nanoseconds(&timer), core_red_black_tree_size(&concrete_self->tree)); i = 0; while (core_red_black_tree_iterator_get_next_key_and_value(&iterator, &virtual_runtime, &actor)) { printf("[%d] virtual_runtime= %" PRIu64 " actor= %s/%d\n", i, virtual_runtime, thorium_actor_script_name(actor), thorium_actor_name(actor)); ++i; } core_red_black_tree_iterator_destroy(&iterator); core_timer_destroy(&timer); }
void process_init(struct thorium_actor *self) { struct process *concrete_self; int argc; char **argv; argc = thorium_actor_argc(self); argv = thorium_actor_argv(self); concrete_self = thorium_actor_concrete_actor(self); core_vector_init(&concrete_self->actors, sizeof(int)); thorium_actor_add_action(self, ACTION_START, process_start); thorium_actor_add_action(self, ACTION_ASK_TO_STOP, process_stop); thorium_actor_add_action(self, ACTION_PING, process_ping); thorium_actor_add_action(self, ACTION_PING_REPLY, process_ping_reply); thorium_actor_add_action(self, ACTION_NOTIFY, process_notify); concrete_self->passed = 0; concrete_self->failed = 0; concrete_self->events = 0; concrete_self->minimum_buffer_size = 16; concrete_self->maximum_buffer_size = 512*1024; if (core_command_has_argument(argc, argv, MIN_BUFFER_SIZE_OPTION)) { concrete_self->maximum_buffer_size = core_command_get_argument_value_int(argc, argv, MIN_BUFFER_SIZE_OPTION); } if (core_command_has_argument(argc, argv, MAX_BUFFER_SIZE_OPTION)) { concrete_self->maximum_buffer_size = core_command_get_argument_value_int(argc, argv, MAX_BUFFER_SIZE_OPTION); } concrete_self->event_count = 100000; if (core_command_has_argument(argc, argv, EVENT_COUNT_OPTION)) { concrete_self->event_count = core_command_get_argument_value_int(argc, argv, EVENT_COUNT_OPTION); } concrete_self->concurrent_event_count = 8; if (core_command_has_argument(argc, argv, CONCURRENT_EVENT_COUNT_OPTION)) { concrete_self->concurrent_event_count = core_command_get_argument_value_int(argc, argv, CONCURRENT_EVENT_COUNT_OPTION); } concrete_self->active_messages = 0; printf("%s/%d using %s %d %s %d %s %d %s %d\n", thorium_actor_script_name(self), thorium_actor_name(self), MIN_BUFFER_SIZE_OPTION, concrete_self->minimum_buffer_size, MAX_BUFFER_SIZE_OPTION, concrete_self->maximum_buffer_size, EVENT_COUNT_OPTION, concrete_self->event_count, CONCURRENT_EVENT_COUNT_OPTION, concrete_self->concurrent_event_count); }
void core_writer_process_init(struct thorium_actor *self) { struct core_writer_process *concrete_self; concrete_self = thorium_actor_concrete_actor(self); concrete_self->has_file = 0; thorium_actor_log(self, "%s/%d is ready to do input/output operations\n", thorium_actor_script_name(self), thorium_actor_name(self)); }
void process_stop(struct thorium_actor *self, struct thorium_message *message) { struct process *concrete_self; int total; concrete_self = (struct process *)thorium_actor_concrete_actor(self); total = concrete_self->passed + concrete_self->failed; printf("%s/%d PASSED %d/%d, FAILED %d/%d\n", thorium_actor_script_name(self), thorium_actor_name(self), concrete_self->passed, total, concrete_self->failed, total); thorium_actor_send_to_self_empty(self, ACTION_STOP); }
void biosal_assembly_arc_classifier_init(struct thorium_actor *self) { struct biosal_assembly_arc_classifier *concrete_self; concrete_self = (struct biosal_assembly_arc_classifier *)thorium_actor_concrete_actor(self); concrete_self->kmer_length = -1; thorium_actor_add_action(self, ACTION_ASK_TO_STOP, thorium_actor_ask_to_stop); thorium_actor_add_action(self, ACTION_SET_KMER_LENGTH, biosal_assembly_arc_classifier_set_kmer_length); /* * * Configure the codec. */ biosal_dna_codec_init(&concrete_self->codec); if (biosal_dna_codec_must_use_two_bit_encoding(&concrete_self->codec, thorium_actor_get_node_count(self))) { biosal_dna_codec_enable_two_bit_encoding(&concrete_self->codec); } core_vector_init(&concrete_self->consumers, sizeof(int)); thorium_actor_add_action(self, ACTION_ASSEMBLY_PUSH_ARC_BLOCK, biosal_assembly_arc_classifier_push_arc_block); concrete_self->received_blocks = 0; core_vector_init(&concrete_self->pending_requests, sizeof(int)); concrete_self->active_requests = 0; concrete_self->producer_is_waiting = 0; concrete_self->maximum_pending_request_count = thorium_actor_active_message_limit(self); concrete_self->consumer_count_above_threshold = 0; printf("%s/%d is now active, ACTIVE_MESSAGE_LIMIT %d\n", thorium_actor_script_name(self), thorium_actor_name(self), concrete_self->maximum_pending_request_count); }
void biosal_coverage_distribution_init(struct thorium_actor *self) { struct biosal_coverage_distribution *concrete_actor; concrete_actor = (struct biosal_coverage_distribution *)thorium_actor_concrete_actor(self); core_map_init(&concrete_actor->distribution, sizeof(int), sizeof(uint64_t)); #ifdef BIOSAL_COVERAGE_DISTRIBUTION_DEBUG thorium_actor_log(self, "DISTRIBUTION IS READY\n"); #endif concrete_actor->actual = 0; concrete_actor->expected = 0; thorium_actor_log(self, "%s/%d is ready\n", thorium_actor_script_name(self), thorium_actor_name(self)); }
void biosal_input_stream_count_reply_mock(struct thorium_actor *self, struct thorium_message *message) { struct biosal_input_stream *concrete_self; void *buffer; int count; struct core_vector mega_blocks; char *file; struct core_memory_pool *ephemeral_memory; uint64_t result; struct biosal_mega_block *block; concrete_self = (struct biosal_input_stream *)thorium_actor_concrete_actor(self); buffer = thorium_message_buffer(message); count = thorium_message_count(message); ephemeral_memory = thorium_actor_get_ephemeral_memory(self); core_vector_init(&mega_blocks, 0); core_vector_set_memory_pool(&mega_blocks, ephemeral_memory); core_vector_unpack(&mega_blocks, buffer); block = core_vector_at_last(&mega_blocks); result = biosal_mega_block_get_entries(block); #if 0 file = core_string_get(&concrete_self->file_for_parallel_counting); #endif file = concrete_self->file_name; printf("%s/%d COUNT_IN_PARALLEL result for %s is %" PRIu64 "\n", thorium_actor_script_name(self), thorium_actor_name(self), file, result); core_vector_destroy(&mega_blocks); thorium_actor_send_buffer(self, concrete_self->controller, ACTION_INPUT_COUNT_IN_PARALLEL_REPLY, count, buffer); }
void biosal_input_stream_count_in_parallel_mock(struct thorium_actor *self, struct thorium_message *message) { struct biosal_input_stream *concrete_self; void *buffer; int count; char *file; concrete_self = (struct biosal_input_stream *)thorium_actor_concrete_actor(self); buffer = thorium_message_buffer(message); count = thorium_message_count(message); file = concrete_self->file_name; printf("%s/%d receives ACTION_INPUT_COUNT_IN_PARALLEL file %s\n", thorium_actor_script_name(self), thorium_actor_name(self), file); thorium_actor_send_to_self_buffer(self, ACTION_INPUT_COUNT, count, buffer); }
void biosal_assembly_graph_store_print_progress(struct thorium_actor *self) { struct biosal_assembly_graph_store *concrete_self; uint64_t total; uint64_t stride; uint64_t current_value; int steps; float ratio; char finished[] = " FINISHED"; char not_finished[] = ""; char *state; concrete_self = thorium_actor_concrete_actor(self); total = core_map_size(&concrete_self->table); steps = 20; stride = total / steps; current_value = concrete_self->consumed_canonical_vertex_count; if ((current_value == 0) || (current_value % stride == 0) || (current_value == total)) { state = not_finished; if (current_value == total) { state = finished; } ratio = (0.0 + current_value) / total; printf("%s/%d %.2f of vertices were consumed%s\n", thorium_actor_script_name(self), thorium_actor_name(self), ratio, state); } }
void biosal_assembly_graph_store_mark_as_used(struct thorium_actor *self, struct biosal_assembly_vertex *vertex, int source, int path) { struct biosal_assembly_graph_store *concrete_self; CORE_DEBUGGER_ASSERT(source >= 0); CORE_DEBUGGER_ASSERT(path >= 0); concrete_self = thorium_actor_concrete_actor(self); if (!biosal_assembly_vertex_get_flag(vertex, BIOSAL_VERTEX_FLAG_USED)) { biosal_assembly_vertex_set_flag(vertex, BIOSAL_VERTEX_FLAG_USED); ++concrete_self->consumed_canonical_vertex_count; biosal_assembly_graph_store_print_progress(self); } #if 0 printf("%s set last_actor %d last_path_index %d\n", thorium_actor_script_name(self), source, path); #endif biosal_assembly_vertex_set_last_actor(vertex, source, path); }
void biosal_assembly_graph_store_push_data(struct thorium_actor *self, struct thorium_message *message) { struct biosal_assembly_graph_store *concrete_self; int name; int source; concrete_self = thorium_actor_concrete_actor(self); source = thorium_message_source(message); concrete_self->source = source; name = thorium_actor_name(self); core_map_init(&concrete_self->coverage_distribution, sizeof(int), sizeof(uint64_t)); printf("%s/%d: local table has %" PRIu64" canonical kmers (%" PRIu64 " kmers)\n", thorium_actor_script_name(self), name, core_map_size(&concrete_self->table), 2 * core_map_size(&concrete_self->table)); core_memory_pool_examine(&concrete_self->persistent_memory); core_map_iterator_init(&concrete_self->iterator, &concrete_self->table); thorium_actor_send_to_self_empty(self, ACTION_YIELD); }
void thorium_worker_print_actors(struct thorium_worker *worker, struct thorium_balancer *scheduler) { struct core_map_iterator iterator; int name; int count; struct thorium_actor *actor; int producers; int consumers; int received; int difference; int script; struct core_map distribution; int frequency; struct thorium_script *script_object; int dead; int node_name; int worker_name; int previous_amount; node_name = thorium_node_name(worker->node); worker_name = worker->name; core_map_iterator_init(&iterator, &worker->actors); printf("node/%d worker/%d %d queued messages, received: %d busy: %d load: %f ring: %d scheduled actors: %d/%d\n", node_name, worker_name, thorium_worker_get_scheduled_message_count(worker), thorium_worker_get_sum_of_received_actor_messages(worker), thorium_worker_is_busy(worker), thorium_worker_get_scheduling_epoch_load(worker), core_fast_ring_size_from_producer(&worker->actors_to_schedule), thorium_scheduler_size(&worker->scheduler), (int)core_map_size(&worker->actors)); core_map_init(&distribution, sizeof(int), sizeof(int)); while (core_map_iterator_get_next_key_and_value(&iterator, &name, NULL)) { actor = thorium_node_get_actor_from_name(worker->node, name); if (actor == NULL) { continue; } dead = thorium_actor_dead(actor); if (dead) { continue; } count = thorium_actor_get_mailbox_size(actor); received = thorium_actor_get_sum_of_received_messages(actor); producers = core_map_size(thorium_actor_get_received_messages(actor)); consumers = core_map_size(thorium_actor_get_sent_messages(actor)); previous_amount = 0; core_map_get_value(&worker->actor_received_messages, &name, &previous_amount); difference = received - previous_amount;; if (!core_map_update_value(&worker->actor_received_messages, &name, &received)) { core_map_add_value(&worker->actor_received_messages, &name, &received); } printf(" [%s/%d] mailbox: %d received: %d (+%d) producers: %d consumers: %d\n", thorium_actor_script_name(actor), name, count, received, difference, producers, consumers); script = thorium_actor_script(actor); if (core_map_get_value(&distribution, &script, &frequency)) { ++frequency; core_map_update_value(&distribution, &script, &frequency); } else { frequency = 1; core_map_add_value(&distribution, &script, &frequency); } } /*printf("\n");*/ core_map_iterator_destroy(&iterator); core_map_iterator_init(&iterator, &distribution); printf("node/%d worker/%d Frequency list\n", node_name, worker_name); while (core_map_iterator_get_next_key_and_value(&iterator, &script, &frequency)) { script_object = thorium_node_find_script(worker->node, script); CORE_DEBUGGER_ASSERT(script_object != NULL); printf("node/%d worker/%d Frequency %s => %d\n", node_name, worker->name, thorium_script_name(script_object), frequency); } core_map_iterator_destroy(&iterator); core_map_destroy(&distribution); }
void biosal_assembly_graph_store_receive(struct thorium_actor *self, struct thorium_message *message) { int tag; /*void *buffer;*/ struct biosal_assembly_graph_store *concrete_self; double value; struct biosal_dna_kmer kmer; /*struct core_memory_pool *ephemeral_memory;*/ int customer; int big_key_size; int big_value_size; if (thorium_actor_take_action(self, message)) { return; } /*ephemeral_memory = thorium_actor_get_ephemeral_memory(self);*/ concrete_self = thorium_actor_concrete_actor(self); tag = thorium_message_action(message); /*buffer = thorium_message_buffer(message);*/ if (tag == ACTION_SET_KMER_LENGTH) { thorium_message_unpack_int(message, 0, &concrete_self->kmer_length); biosal_dna_kmer_init_mock(&kmer, concrete_self->kmer_length, &concrete_self->storage_codec, thorium_actor_get_ephemeral_memory(self)); concrete_self->key_length_in_bytes = biosal_dna_kmer_pack_size(&kmer, concrete_self->kmer_length, &concrete_self->storage_codec); biosal_dna_kmer_destroy(&kmer, thorium_actor_get_ephemeral_memory(self)); big_key_size = concrete_self->key_length_in_bytes; big_value_size = sizeof(struct biosal_assembly_vertex); core_map_init(&concrete_self->table, big_key_size, big_value_size); core_map_set_memory_pool(&concrete_self->table, &concrete_self->persistent_memory); printf("DEBUG big_key_size %d big_value_size %d\n", big_key_size, big_value_size); /* * Configure the map for better performance. */ core_map_disable_deletion_support(&concrete_self->table); /* * The threshold of the map is not very important because * requests that hit the map have to first arrive as messages, * which are slow. */ core_map_set_threshold(&concrete_self->table, 0.95); thorium_actor_send_reply_empty(self, ACTION_SET_KMER_LENGTH_REPLY); } else if (tag == ACTION_ASSEMBLY_GET_KMER_LENGTH) { thorium_actor_send_reply_int(self, ACTION_ASSEMBLY_GET_KMER_LENGTH_REPLY, concrete_self->kmer_length); } else if (tag == ACTION_RESET) { /* * Reset the iterator. */ core_map_iterator_init(&concrete_self->iterator, &concrete_self->table); printf("DEBUG unitig_vertex_count %d\n", concrete_self->unitig_vertex_count); thorium_actor_send_reply_empty(self, ACTION_RESET_REPLY); } else if (tag == ACTION_SEQUENCE_STORE_REQUEST_PROGRESS_REPLY) { thorium_message_unpack_double(message, 0, &value); core_map_set_current_size_estimate(&concrete_self->table, value); } else if (tag == ACTION_ASK_TO_STOP) { printf("%s/%d received %d arc blocks\n", thorium_actor_script_name(self), thorium_actor_name(self), concrete_self->received_arc_block_count); thorium_actor_ask_to_stop(self, message); } else if (tag == ACTION_SET_CONSUMER) { thorium_message_unpack_int(message, 0, &customer); printf("%s/%d will use coverage distribution %d\n", thorium_actor_script_name(self), thorium_actor_name(self), customer); concrete_self->customer = customer; thorium_actor_send_reply_empty(self, ACTION_SET_CONSUMER_REPLY); } else if (tag == ACTION_PUSH_DATA) { printf("%s/%d receives ACTION_PUSH_DATA\n", thorium_actor_script_name(self), thorium_actor_name(self)); biosal_assembly_graph_store_push_data(self, message); } else if (tag == ACTION_STORE_GET_ENTRY_COUNT) { thorium_actor_send_reply_uint64_t(self, ACTION_STORE_GET_ENTRY_COUNT_REPLY, concrete_self->received); } else if (tag == ACTION_GET_RECEIVED_ARC_COUNT) { thorium_actor_send_reply_uint64_t(self, ACTION_GET_RECEIVED_ARC_COUNT_REPLY, concrete_self->received_arc_count); } }
void biosal_assembly_graph_store_yield_reply(struct thorium_actor *self, struct thorium_message *message) { struct biosal_dna_kmer kmer; void *key; struct biosal_assembly_vertex *value; int coverage; int customer; uint64_t *count; int new_count; void *new_buffer; struct thorium_message new_message; struct core_memory_pool *ephemeral_memory; struct biosal_assembly_graph_store *concrete_self; int i; int max; ephemeral_memory = thorium_actor_get_ephemeral_memory(self); concrete_self = thorium_actor_concrete_actor(self); customer = concrete_self->customer; #if 0 printf("YIELD REPLY\n"); #endif i = 0; max = 1024; key = NULL; value = NULL; while (i < max && core_map_iterator_has_next(&concrete_self->iterator)) { core_map_iterator_next(&concrete_self->iterator, (void **)&key, (void **)&value); biosal_dna_kmer_init_empty(&kmer); biosal_dna_kmer_unpack(&kmer, key, concrete_self->kmer_length, ephemeral_memory, &concrete_self->storage_codec); coverage = biosal_assembly_vertex_coverage_depth(value); count = (uint64_t *)core_map_get(&concrete_self->coverage_distribution, &coverage); if (count == NULL) { count = (uint64_t *)core_map_add(&concrete_self->coverage_distribution, &coverage); (*count) = 0; } /* increment for the lowest kmer (canonical) */ (*count)++; biosal_dna_kmer_destroy(&kmer, ephemeral_memory); ++i; } /* yield again if the iterator is not at the end */ if (core_map_iterator_has_next(&concrete_self->iterator)) { #if 0 printf("yield ! %d\n", i); #endif thorium_actor_send_to_self_empty(self, ACTION_YIELD); return; } /* printf("ready...\n"); */ core_map_iterator_destroy(&concrete_self->iterator); new_count = core_map_pack_size(&concrete_self->coverage_distribution); new_buffer = thorium_actor_allocate(self, new_count); core_map_pack(&concrete_self->coverage_distribution, new_buffer); printf("SENDING %s/%d sends map to %d, %d bytes / %d entries\n", thorium_actor_script_name(self), thorium_actor_name(self), customer, new_count, (int)core_map_size(&concrete_self->coverage_distribution)); thorium_message_init(&new_message, ACTION_PUSH_DATA, new_count, new_buffer); thorium_actor_send(self, customer, &new_message); thorium_message_destroy(&new_message); core_map_destroy(&concrete_self->coverage_distribution); thorium_actor_send_empty(self, concrete_self->source, ACTION_PUSH_DATA_REPLY); }
void spate_start(struct thorium_actor *self, struct thorium_message *message) { void *buffer; int name; struct spate *concrete_self; int spawner; char *directory_name; int already_created; int argc; char **argv; #ifdef SPATE_VERBOSE thorium_actor_send_to_self_int(self, ACTION_ENABLE_LOG_LEVEL, LOG_LEVEL_DEFAULT); #endif concrete_self = (struct spate *)thorium_actor_concrete_actor(self); buffer = thorium_message_buffer(message); name = thorium_actor_name(self); /* * The buffer contains initial actors spawned by Thorium */ core_vector_unpack(&concrete_self->initial_actors, buffer); if (!spate_must_print_help(self)) { thorium_actor_log(self, "spate/%d starts", name); } if (core_vector_index_of(&concrete_self->initial_actors, &name) == 0) { concrete_self->is_leader = 1; } /* * Abort if the actor is not the leader of the tribe. */ if (!concrete_self->is_leader) { return; } if (spate_must_print_help(self)) { spate_help(self); spate_stop(self); return; } /* * Otherwise, the coverage distribution will take care of creating * the directory. */ core_timer_start(&concrete_self->timer); /* * Verify if the directory already exists. If it does, don't * do anything as it is not a good thing to overwrite previous science * results. */ argc = thorium_actor_argc(self); argv = thorium_actor_argv(self); directory_name = biosal_command_get_output_directory(argc, argv); already_created = core_directory_verify_existence(directory_name); if (already_created) { thorium_actor_log(self, "%s/%d Error: output directory \"%s\" already exists, please delete it or use a different output directory", thorium_actor_script_name(self), thorium_actor_name(self), directory_name); spate_stop(self); return; } spawner = thorium_actor_get_spawner(self, &concrete_self->initial_actors); thorium_actor_send_int(self, spawner, ACTION_SPAWN, SCRIPT_INPUT_CONTROLLER); }
void biosal_assembly_graph_store_push_kmer_block(struct thorium_actor *self, struct thorium_message *message) { struct core_memory_pool *ephemeral_memory; struct biosal_dna_kmer_frequency_block block; struct biosal_assembly_vertex *bucket; void *packed_kmer; struct core_map_iterator iterator; struct biosal_assembly_graph_store *concrete_self; /*int tag;*/ void *key; struct core_map *kmers; struct biosal_dna_kmer kmer; void *buffer; int count; struct biosal_dna_kmer encoded_kmer; char *raw_kmer; int period; struct biosal_dna_kmer *kmer_pointer; int *frequency; ephemeral_memory = thorium_actor_get_ephemeral_memory(self); concrete_self = thorium_actor_concrete_actor(self); /*tag = thorium_message_action(message);*/ buffer = thorium_message_buffer(message); count = thorium_message_count(message); /* * Handler for PUSH_DATA */ biosal_dna_kmer_frequency_block_init(&block, concrete_self->kmer_length, ephemeral_memory, &concrete_self->transport_codec, 0); biosal_dna_kmer_frequency_block_unpack(&block, buffer, ephemeral_memory, &concrete_self->transport_codec); key = core_memory_pool_allocate(ephemeral_memory, concrete_self->key_length_in_bytes); kmers = biosal_dna_kmer_frequency_block_kmers(&block); core_map_iterator_init(&iterator, kmers); period = 2500000; raw_kmer = core_memory_pool_allocate(thorium_actor_get_ephemeral_memory(self), concrete_self->kmer_length + 1); if (!concrete_self->printed_vertex_size) { printf("DEBUG VERTEX DELIVERY %d bytes\n", count); concrete_self->printed_vertex_size = 1; } while (core_map_iterator_has_next(&iterator)) { /* * add kmers to store */ core_map_iterator_next(&iterator, (void **)&packed_kmer, (void **)&frequency); /* Store the kmer in 2 bit encoding */ biosal_dna_kmer_init_empty(&kmer); biosal_dna_kmer_unpack(&kmer, packed_kmer, concrete_self->kmer_length, ephemeral_memory, &concrete_self->transport_codec); kmer_pointer = &kmer; if (concrete_self->codec_are_different) { /* * Get a copy of the sequence */ biosal_dna_kmer_get_sequence(kmer_pointer, raw_kmer, concrete_self->kmer_length, &concrete_self->transport_codec); biosal_dna_kmer_init(&encoded_kmer, raw_kmer, &concrete_self->storage_codec, thorium_actor_get_ephemeral_memory(self)); kmer_pointer = &encoded_kmer; } biosal_dna_kmer_pack_store_key(kmer_pointer, key, concrete_self->kmer_length, &concrete_self->storage_codec, thorium_actor_get_ephemeral_memory(self)); #ifdef BIOSAL_DEBUG_ISSUE_540 if (strcmp(raw_kmer, "AGCTGGTAGTCATCACCAGACTGGAACAG") == 0 || strcmp(raw_kmer, "CGCGATCTGTTGCTGGGCCTAACGTGGTA") == 0 || strcmp(raw_kmer, "TACCACGTTAGGCCCAGCAACAGATCGCG") == 0) { printf("Examine store key for %s\n", raw_kmer); core_debugger_examine(key, concrete_self->key_length_in_bytes); } #endif bucket = core_map_get(&concrete_self->table, key); if (bucket == NULL) { /* This is the first time that this kmer is seen. */ bucket = core_map_add(&concrete_self->table, key); biosal_assembly_vertex_init(bucket); #if 0 printf("DEBUG303 ADD_KEY"); biosal_dna_kmer_print(&encoded_kmer, concrete_self->kmer_length, &concrete_self->storage_codec, ephemeral_memory); #endif } if (concrete_self->codec_are_different) { biosal_dna_kmer_destroy(&encoded_kmer, thorium_actor_get_ephemeral_memory(self)); } biosal_dna_kmer_destroy(&kmer, ephemeral_memory); biosal_assembly_vertex_increase_coverage_depth(bucket, *frequency); if (concrete_self->received >= concrete_self->last_received + period) { printf("%s/%d received %" PRIu64 " kmers so far," " store has %" PRIu64 " canonical kmers, %" PRIu64 " kmers\n", thorium_actor_script_name(self), thorium_actor_name(self), concrete_self->received, core_map_size(&concrete_self->table), 2 * core_map_size(&concrete_self->table)); concrete_self->last_received = concrete_self->received; } concrete_self->received += *frequency; } core_memory_pool_free(ephemeral_memory, key); core_memory_pool_free(ephemeral_memory, raw_kmer); core_map_iterator_destroy(&iterator); biosal_dna_kmer_frequency_block_destroy(&block, thorium_actor_get_ephemeral_memory(self)); thorium_actor_send_reply_empty(self, ACTION_PUSH_KMER_BLOCK_REPLY); }
void biosal_input_stream_init(struct thorium_actor *actor) { struct biosal_input_stream *input; struct biosal_input_stream *concrete_self; input = (struct biosal_input_stream *)thorium_actor_concrete_actor(actor); concrete_self = input; concrete_self->proxy_ready = 0; concrete_self->buffer_for_sequence = NULL; concrete_self->maximum_sequence_length = 0; concrete_self->open = 0; concrete_self->error = BIOSAL_INPUT_ERROR_NO_ERROR; concrete_self->file_name = NULL; biosal_dna_codec_init(&concrete_self->codec); if (biosal_dna_codec_must_use_two_bit_encoding(&concrete_self->codec, thorium_actor_get_node_count(actor))) { biosal_dna_codec_enable_two_bit_encoding(&concrete_self->codec); } /*concrete_self->mega_block_size = 2097152*/ /* * This is the mega block size in number of sequences. */ concrete_self->mega_block_size = 2097152; concrete_self->granularity = 1024; concrete_self->last_offset = 0; concrete_self->last_entries = 0; concrete_self->starting_offset = 0; /* * Use a large ending offset. */ concrete_self->ending_offset = 0; --concrete_self->ending_offset; core_vector_init(&concrete_self->mega_blocks, sizeof(struct biosal_mega_block)); thorium_actor_add_action(actor, ACTION_INPUT_STREAM_SET_START_OFFSET, biosal_input_stream_set_start_offset); thorium_actor_add_action(actor, ACTION_INPUT_STREAM_SET_END_OFFSET, biosal_input_stream_set_end_offset); #ifdef ENABLE_PARALLEL_COUNT thorium_actor_add_action(actor, ACTION_INPUT_COUNT_IN_PARALLEL, biosal_input_stream_count_in_parallel); thorium_actor_add_action(actor, ACTION_INPUT_COUNT_REPLY, biosal_input_stream_count_reply); #else thorium_actor_add_action(actor, ACTION_INPUT_COUNT_IN_PARALLEL, biosal_input_stream_count_in_parallel_mock); thorium_actor_add_action(actor, ACTION_INPUT_COUNT_REPLY, biosal_input_stream_count_reply_mock); #endif concrete_self->count_customer = THORIUM_ACTOR_NOBODY; #if 0 core_string_init(&concrete_self->file_for_parallel_counting, NULL); #endif /* * Parallel counting. */ concrete_self->total_entries = 0; /* * Disable parallel counting. */ concrete_self->finished_parallel_stream_count = 0; core_vector_init(&concrete_self->spawners, sizeof(int)); core_vector_init(&concrete_self->parallel_streams, sizeof(int)); core_vector_init(&concrete_self->start_offsets, sizeof(uint64_t)); core_vector_init(&concrete_self->end_offsets, sizeof(uint64_t)); core_vector_init(&concrete_self->parallel_mega_blocks, sizeof(struct core_vector)); printf("%s/%d is now online on node %d\n", thorium_actor_script_name(actor), thorium_actor_name(actor), thorium_actor_node_name(actor)); }