region_table_t *parse_regions(char *input_regions, int as_positions, const char *url, const char *species, const char *version) { region_table_t *regions_table = new_region_table_from_ws(url, species, version); char *saveptr, *token; size_t token_len; int num_regions; char **regions_data = split(input_regions, ",", &num_regions); region_t *regions[num_regions]; for (int i = 0; i < num_regions; i++) { // Set chromosome token = strtok_r(regions_data[i], ":", &saveptr); token_len = strlen(token); char *chromosome = strndup(token, token_len); // Set start position token = strtok_r(NULL, "-", &saveptr); size_t start_position, end_position; start_position = (token != NULL) ? atol(token) : 1; // Set end position token = strtok_r(NULL, "-", &saveptr); if (token != NULL) { end_position = atol(token); } else { if (as_positions) { end_position = start_position; } else { end_position = UINT_MAX; } } regions[i] = region_new(chromosome, start_position, end_position, NULL, NULL); LOG_DEBUG_F("region '%s:%u-%u'\n", regions[i]->chromosome, regions[i]->start_position, regions[i]->end_position); } insert_regions(regions, num_regions, regions_table); finish_region_table_loading(regions_table); for (int i = 0; i < num_regions; i++) { free(regions_data[i]); free(regions[i]); } free(regions_data); return regions_table; }
int insert_region(region_t *region, region_table_t *table) { return insert_regions(®ion, 1, table); }
region_table_t *parse_regions_from_gff_file(char *filename, const char *url, const char *species, const char *version) { gff_file_t *file = gff_open(filename); if (file == NULL) { return NULL; } region_table_t *regions_table = new_region_table_from_ws(url, species, version); int ret_code = 0; size_t max_batches = 20, batch_size = 2000; list_t *read_list = (list_t*) malloc (sizeof(list_t)); list_init("batches", 1, max_batches, read_list); #pragma omp parallel sections { // The producer reads the GFF file #pragma omp section { LOG_DEBUG_F("Thread %d reads the GFF file\n", omp_get_thread_num()); ret_code = gff_read_batches(read_list, batch_size, file); list_decr_writers(read_list); if (ret_code) { LOG_FATAL_F("Error while reading GFF file %s (%d)\n", filename, ret_code); } } // The consumer inserts regions in the structure #pragma omp section { list_item_t *item = NULL; gff_batch_t *batch; gff_record_t *record; region_t *regions_batch[REGIONS_CHUNKSIZE]; int avail_regions = 0; while ( item = list_remove_item(read_list) ) { batch = item->data_p; // For each record in the batch, generate a new region for (int i = 0; i < batch->records->size; i++) { record = batch->records->items[i]; region_t *region = region_new(strndup(record->sequence, record->sequence_len), record->start, record->end, record->strand ? strndup(&record->strand, 1) : NULL, record->feature ? strndup(record->feature, record->feature_len) : NULL); LOG_DEBUG_F("region '%s:%u-%u'\n", region->chromosome, region->start_position, region->end_position); regions_batch[avail_regions++] = region; // Save when the recommended size is reached if (avail_regions == REGIONS_CHUNKSIZE) { insert_regions(regions_batch, avail_regions, regions_table); for (int i = 0; i < avail_regions; i++) { free(regions_batch[i]); } avail_regions = 0; } } gff_batch_free(batch); list_item_free(item); } // Save the remaining regions that did not fill a batch if (avail_regions > 0) { insert_regions(regions_batch, avail_regions, regions_table); for (int i = 0; i < avail_regions; i++) { free(regions_batch[i]); } avail_regions = 0; } } } finish_region_table_loading(regions_table); list_free_deep(read_list, NULL); gff_close(file, 1); return regions_table; }
void physical_allocator_init(struct multiboot_info *multiboot_tables) { insert_regions(multiboot_tables); }