示例#1
0
region_table_t *parse_regions_from_gff_file(char *filename, const char *url, const char *species, const char *version) {
    gff_file_t *file = gff_open(filename);
    if (file == NULL) {
        return NULL;
    } 
    
    region_table_t *regions_table = create_table(url, species, version);
    
    int ret_code = 0;
    size_t max_batches = 20;
    size_t batch_size = 2000;
    list_t *read_list = (list_t*) malloc (sizeof(list_t));
    list_init("batches", 1, max_batches, read_list);
    
    #pragma omp parallel sections
    {
        // The producer reads the GFF file
        #pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the GFF file\n", omp_get_thread_num());
            ret_code = gff_read_batches(read_list, batch_size, file);
            list_decr_writers(read_list);
            
            if (ret_code) {
                LOG_FATAL_F("Error while reading GFF file %s (%d)\n", filename, ret_code);
            }
        }
        
        // The consumer inserts regions in the structure 
        #pragma omp section
        {    
            list_item_t *item = NULL, *batch_item = NULL;
            gff_batch_t *batch;
            gff_record_t *record;
            while ( (item = list_remove_item(read_list)) != NULL ) {
                batch = item->data_p;
                // For each record in the batch, generate a new region
                for (batch_item = batch->first_p; batch_item != NULL; batch_item = batch_item->next_p) {
                    record = batch_item->data_p;
                    
                    region_t *region = (region_t*) malloc (sizeof(region_t));
                    region->chromosome = (char*) calloc ((strlen(record->sequence)+1), sizeof(char));
                    strncat(region->chromosome, record->sequence, strlen(record->sequence));
                    region->start_position = record->start;
                    region->end_position = record->end;
                    LOG_DEBUG_F("region '%s:%u-%u'\n", region->chromosome, region->start_position, region->end_position);
                    
                    insert_region(region, regions_table);
                }
               
                gff_batch_free(item->data_p);
                list_item_free(item);
            }
        }
    }
    
    gff_close(file, 0);
    
    return regions_table;
}
示例#2
0
vcf_batch_t *fetch_vcf_batch_non_blocking(vcf_file_t *file) {
    assert(file);
    list_item_t *item = list_remove_item_async(file->record_batches);
    if (item) {
        vcf_batch_t *batch = item->data_p;
        list_item_free(item);
        return batch;
    }
    return NULL;
}
示例#3
0
void write_output_body(enum ASSOC_task task, list_t* output_list, FILE *fd) {
    assert(fd);
    list_item_t* item = NULL;
    
    if (task == CHI_SQUARE) {
        while (item = list_remove_item(output_list)) {
            assoc_basic_result_t *result = item->data_p;
            
            double freq_a1 = (result->affected1 + result->affected2 > 0) ? (double) result->affected1 / (result->affected1 + result->affected2) : 0.0f;
            double freq_u1 = (result->unaffected1 + result->unaffected2 > 0) ? (double) result->unaffected1 / (result->unaffected1 + result->unaffected2) : 0.0f;
            double freq_a2 = (result->affected1 + result->affected2 > 0) ? (double) result->affected2 / (result->affected1 + result->affected2) : 0.0f;
            double freq_u2 = (result->unaffected1 + result->unaffected2 > 0) ? (double) result->unaffected2 / (result->unaffected1 + result->unaffected2) : 0.0f;
            
            fprintf(fd, "%s\t%8ld\t%s\t%s\t%3d\t%3d\t%6f\t%6f\t%s\t%3d\t%3d\t%6f\t%6f\t%6f\t%6f\t%6f\n",
                    result->chromosome, result->position, result->id,
                    result->reference, result->affected1, result->unaffected1, freq_a1, freq_u1,
                    result->alternate, result->affected2, result->unaffected2, freq_a2, freq_u2,
                    result->odds_ratio, result->chi_square, result->p_value);
            
            assoc_basic_result_free(result);
            list_item_free(item);
        }
    } else if (task == FISHER) {
        while (item = list_remove_item(output_list)) {
            assoc_fisher_result_t *result = item->data_p;
            
            double freq_a1 = (result->affected1 + result->affected2 > 0) ? (double) result->affected1 / (result->affected1 + result->affected2) : 0.0f;
            double freq_u1 = (result->unaffected1 + result->unaffected2 > 0) ? (double) result->unaffected1 / (result->unaffected1 + result->unaffected2) : 0.0f;
            double freq_a2 = (result->affected1 + result->affected2 > 0) ? (double) result->affected2 / (result->affected1 + result->affected2) : 0.0f;
            double freq_u2 = (result->unaffected1 + result->unaffected2 > 0) ? (double) result->unaffected2 / (result->unaffected1 + result->unaffected2) : 0.0f;
            
            fprintf(fd, "%s\t%8ld\t%s\t%s\t%3d\t%3d\t%6f\t%6f\t%s\t%3d\t%3d\t%6f\t%6f\t%6f\t%6f\n",
                    result->chromosome, result->position, result->id, 
                    result->reference, result->affected1, result->unaffected1, freq_a1, freq_u1,
                    result->alternate, result->affected2, result->unaffected2, freq_a2, freq_u2,
                    result->odds_ratio, result->p_value);
            
            assoc_fisher_result_free(result);
            list_item_free(item);
        }
    }
}
示例#4
0
void precalculate_aux_values_for_annotation(int calculate_stats, int calculate_dp, int calculate_mq, vcf_record_t *record,
                                            variant_stats_t **variant_stats, file_stats_t *file_stats, list_t *stats_list, 
                                            int *dp, int *mq0, double *mq) {
    // Variant statistics
    if (calculate_stats) {
        get_variants_stats(&record, 1, NULL, NULL,0, stats_list, file_stats);
        list_item_t *item = list_remove_item(stats_list);
        *variant_stats = item->data_p;
        list_item_free(item);
    }
    
    // Read depth
    if (calculate_dp) {
        int dp_pos = -1;
        *dp = 0;
        for (int j = 0; j < record->samples->size; j++) {
            char *aux = strndup(record->format, record->format_len);
            dp_pos = get_field_position_in_format("DP", aux);
            free(aux);
            if (dp_pos >= 0) {
                aux = strdup((char*) array_list_get(j, record->samples));
                *dp += atoi(get_field_value_in_sample(aux, dp_pos));
                free(aux);
            }
        }
    }
    
    // Mapping quality
    if (calculate_mq) {
        for (int j = 0; j < record->samples->size; j++) {
            char *aux = strndup(record->format, record->format_len);
            int mq_pos = get_field_position_in_format("GQ", aux);
            free(aux);
            if (mq_pos < 0) {
                continue;
            }

            aux = strdup((char*) array_list_get(j, record->samples));
            int cur_gq = atoi(get_field_value_in_sample(aux, mq_pos));
            free(aux);
//                 printf("sample = %s\tmq_pos = %d\tvalue = %d\n", array_list_get(j, record->samples), mq_pos,
//                        atoi(get_field_value_in_sample(strdup((char*) array_list_get(j, record->samples)), mq_pos)));
            if (cur_gq == 0) {
                (*mq0)++;
            } else {
                *mq += cur_gq * cur_gq;
            }
        }
        *mq = sqrt(*mq / record->samples->size);
    }
}
示例#5
0
XdgListItem *_xdg_list_remove(XdgListItem *item, XdgListItemFree list_item_free)
{
    XdgListItem *res;

    if (item->prev)
        if (item->prev->next = res = item->next)
            res->prev = item->prev;
        else
            item->list->tail = item->prev;
    else
        if (item->list->head = res = item->next)
            res->prev = NULL;
        else
            item->list->tail = NULL;

    list_item_free(item);
    return res;
}
示例#6
0
region_table_t *parse_regions_from_gff_file(char *filename, const char *url, const char *species, const char *version) {
    gff_file_t *file = gff_open(filename);
    if (file == NULL) {
        return NULL;
    }

    region_table_t *regions_table = new_region_table_from_ws(url, species, version);

    int ret_code = 0;
    size_t max_batches = 20, batch_size = 2000;
    list_t *read_list = (list_t*) malloc (sizeof(list_t));
    list_init("batches", 1, max_batches, read_list);

    #pragma omp parallel sections
    {
        // The producer reads the GFF file
        #pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the GFF file\n", omp_get_thread_num());
            ret_code = gff_read_batches(read_list, batch_size, file);
            list_decr_writers(read_list);

            if (ret_code) {
                LOG_FATAL_F("Error while reading GFF file %s (%d)\n", filename, ret_code);
            }
        }

        // The consumer inserts regions in the structure
        #pragma omp section
        {
            list_item_t *item = NULL;
            gff_batch_t *batch;
            gff_record_t *record;

            region_t *regions_batch[REGIONS_CHUNKSIZE];
            int avail_regions = 0;

            while ( item = list_remove_item(read_list) ) {
                batch = item->data_p;
                // For each record in the batch, generate a new region
                for (int i = 0; i < batch->records->size; i++) {
                    record = batch->records->items[i];

                    region_t *region = region_new(strndup(record->sequence, record->sequence_len),
                                                  record->start, record->end,
                                                  record->strand ? strndup(&record->strand, 1) : NULL,
                                                  record->feature ? strndup(record->feature, record->feature_len) : NULL);

                    LOG_DEBUG_F("region '%s:%u-%u'\n", region->chromosome, region->start_position, region->end_position);

                    regions_batch[avail_regions++] = region;

                    // Save when the recommended size is reached
                    if (avail_regions == REGIONS_CHUNKSIZE) {
                        insert_regions(regions_batch, avail_regions, regions_table);
                        for (int i = 0; i < avail_regions; i++) {
                            free(regions_batch[i]);
                        }
                        avail_regions = 0;
                    }
                }

                gff_batch_free(batch);
                list_item_free(item);
            }

            // Save the remaining regions that did not fill a batch
            if (avail_regions > 0) {
                insert_regions(regions_batch, avail_regions, regions_table);
                for (int i = 0; i < avail_regions; i++) {
                    free(regions_batch[i]);
                }
                avail_regions = 0;
            }
        }
    }

    finish_region_table_loading(regions_table);

    list_free_deep(read_list, NULL);

    gff_close(file, 1);

    return regions_table;
}
示例#7
0
int run_effect(char **urls, shared_options_data_t *shared_options_data, effect_options_data_t *options_data) {
    int ret_code = 0;
    double start, stop, total;
    
    vcf_file_t *vcf_file = vcf_open(shared_options_data->vcf_filename, shared_options_data->max_batches);
    if (!vcf_file) {
        LOG_FATAL("VCF file does not exist!\n");
    }
    
    ped_file_t *ped_file = NULL;
    if (shared_options_data->ped_filename) {
        ped_file = ped_open(shared_options_data->ped_filename);
        if (!ped_file) {
            LOG_FATAL("PED file does not exist!\n");
        }
        LOG_INFO("About to read PED file...\n");
        // Read PED file before doing any processing
        ret_code = ped_read(ped_file);
        if (ret_code != 0) {
            LOG_FATAL_F("Can't read PED file: %s\n", ped_file->filename);
        }
    }
    
    char *output_directory = shared_options_data->output_directory;
    size_t output_directory_len = strlen(output_directory);
    
    ret_code = create_directory(output_directory);
    if (ret_code != 0 && errno != EEXIST) {
        LOG_FATAL_F("Can't create output directory: %s\n", output_directory);
    }
    
    // Remove all .txt files in folder
    ret_code = delete_files_by_extension(output_directory, "txt");
    if (ret_code != 0) {
        return ret_code;
    }
    
    // Initialize environment for connecting to the web service
    ret_code = init_http_environment(0);
    if (ret_code != 0) {
        return ret_code;
    }
    
    // Output file descriptors
    static cp_hashtable *output_files = NULL;
    // Lines of the output data in the main .txt files
    static list_t *output_list = NULL;
    // Consequence type counters (for summary, must be kept between web service calls)
    static cp_hashtable *summary_count = NULL;
    // Gene list (for genes-with-variants, must be kept between web service calls)
    static cp_hashtable *gene_list = NULL;

    // Initialize collections of file descriptors and summary counters
    ret_code = initialize_output_files(output_directory, output_directory_len, &output_files);
    if (ret_code != 0) {
        return ret_code;
    }
    initialize_output_data_structures(shared_options_data, &output_list, &summary_count, &gene_list);
    initialize_ws_buffers(shared_options_data->num_threads);
    
    // Create job.status file
    char job_status_filename[output_directory_len + 10];
    sprintf(job_status_filename, "%s/job.status", output_directory);
    FILE *job_status = new_job_status_file(job_status_filename);
    if (!job_status) {
        LOG_FATAL("Can't create job status file\n");
    } else {
        update_job_status_file(0, job_status);
    }
    
 
#pragma omp parallel sections private(start, stop, total)
    {
#pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the VCF file\n", omp_get_thread_num());
            
            start = omp_get_wtime();
            
            ret_code = vcf_read(vcf_file, 1,
                                (shared_options_data->batch_bytes > 0) ? shared_options_data->batch_bytes : shared_options_data->batch_lines,
                                shared_options_data->batch_bytes <= 0);

            stop = omp_get_wtime();
            total = stop - start;

            if (ret_code) {
                LOG_ERROR_F("Error %d while reading the file %s\n", ret_code, vcf_file->filename);
            }

            LOG_INFO_F("[%dR] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%dR] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            notify_end_parsing(vcf_file);
        }
        
#pragma omp section
        {
            // Enable nested parallelism and set the number of threads the user has chosen
            omp_set_nested(1);
            
            LOG_DEBUG_F("Thread %d processes data\n", omp_get_thread_num());
            
            // Filters and files for filtering output
            filter_t **filters = NULL;
            int num_filters = 0;
            if (shared_options_data->chain != NULL) {
                filters = sort_filter_chain(shared_options_data->chain, &num_filters);
            }
            FILE *passed_file = NULL, *failed_file = NULL, *non_processed_file = NULL;
            get_filtering_output_files(shared_options_data, &passed_file, &failed_file);
            
            // Pedigree information (used in some filters)
            individual_t **individuals = NULL;
            khash_t(ids) *sample_ids = NULL;
            
            // Filename structure outdir/vcfname.errors
            char *prefix_filename = calloc(strlen(shared_options_data->vcf_filename), sizeof(char));
            get_filename_from_path(shared_options_data->vcf_filename, prefix_filename);
            char *non_processed_filename = malloc((strlen(shared_options_data->output_directory) + strlen(prefix_filename) + 9) * sizeof(char));
            sprintf(non_processed_filename, "%s/%s.errors", shared_options_data->output_directory, prefix_filename);
            non_processed_file = fopen(non_processed_filename, "w");
            free(non_processed_filename);
            
            // Maximum size processed by each thread (never allow more than 1000 variants per query)
            if (shared_options_data->batch_lines > 0) {
                shared_options_data->entries_per_thread = MIN(MAX_VARIANTS_PER_QUERY, 
                            ceil((float) shared_options_data->batch_lines / shared_options_data->num_threads));
            } else {
                shared_options_data->entries_per_thread = MAX_VARIANTS_PER_QUERY;
            }
            LOG_DEBUG_F("entries-per-thread = %d\n", shared_options_data->entries_per_thread);
    
            int i = 0;
            vcf_batch_t *batch = NULL;
            int ret_ws_0 = 0, ret_ws_1 = 0, ret_ws_2 = 0;
            
            start = omp_get_wtime();

            while (batch = fetch_vcf_batch(vcf_file)) {
                if (i == 0) {
                    // Add headers associated to the defined filters
                    vcf_header_entry_t **filter_headers = get_filters_as_vcf_headers(filters, num_filters);
                    for (int j = 0; j < num_filters; j++) {
                        add_vcf_header_entry(filter_headers[j], vcf_file);
                    }
                        
                    // Write file format, header entries and delimiter
                    if (passed_file != NULL) { write_vcf_header(vcf_file, passed_file); }
                    if (failed_file != NULL) { write_vcf_header(vcf_file, failed_file); }
                    if (non_processed_file != NULL) { write_vcf_header(vcf_file, non_processed_file); }
                    
                    LOG_DEBUG("VCF header written\n");
                    
                    if (ped_file) {
                        // Create map to associate the position of individuals in the list of samples defined in the VCF file
                        sample_ids = associate_samples_and_positions(vcf_file);
                        // Sort individuals in PED as defined in the VCF file
                        individuals = sort_individuals(vcf_file, ped_file);
                    }
                }
                
//                     printf("batch loaded = '%.*s'\n", 50, batch->text);
//                     printf("batch text len = %zu\n", strlen(batch->text));

//                 if (i % 10 == 0) {
                    LOG_INFO_F("Batch %d reached by thread %d - %zu/%zu records \n", 
                            i, omp_get_thread_num(),
                            batch->records->size, batch->records->capacity);
//                 }

                int reconnections = 0;
                int max_reconnections = 3; // TODO allow to configure?

                // Write records that passed to a separate file, and query the WS with them as args
                array_list_t *failed_records = NULL;
                int num_variables = ped_file? get_num_variables(ped_file): 0;
                array_list_t *passed_records = filter_records(filters, num_filters, individuals, sample_ids, num_variables, batch->records, &failed_records);
                if (passed_records->size > 0) {
                    // Divide the list of passed records in ranges of size defined in config file
                    int num_chunks;
                    int *chunk_sizes;
                    int *chunk_starts = create_chunks(passed_records->size, shared_options_data->entries_per_thread, &num_chunks, &chunk_sizes);
                    
                    do {
                        // OpenMP: Launch a thread for each range
                        #pragma omp parallel for num_threads(shared_options_data->num_threads)
                        for (int j = 0; j < num_chunks; j++) {
                            int tid = omp_get_thread_num();
                            LOG_DEBUG_F("[%d] WS invocation\n", tid);
                            LOG_DEBUG_F("[%d] -- effect WS\n", tid);
                            if (!reconnections || ret_ws_0) {
                                ret_ws_0 = invoke_effect_ws(urls[0], (vcf_record_t**) (passed_records->items + chunk_starts[j]), 
                                                            chunk_sizes[j], options_data->excludes);
                                parse_effect_response(tid, output_directory, output_directory_len, output_files, output_list, summary_count, gene_list);
                                free(effect_line[tid]);
                                effect_line[tid] = (char*) calloc (max_line_size[tid], sizeof(char));
                            }
                            
                            if (!options_data->no_phenotypes) {
                                if (!reconnections || ret_ws_1) {
                                    LOG_DEBUG_F("[%d] -- snp WS\n", omp_get_thread_num());
                                    ret_ws_1 = invoke_snp_phenotype_ws(urls[1], (vcf_record_t**) (passed_records->items + chunk_starts[j]), chunk_sizes[j]);
                                    parse_snp_phenotype_response(tid, output_list);
                                    free(snp_line[tid]);
                                    snp_line[tid] = (char*) calloc (snp_max_line_size[tid], sizeof(char));
                                }
                                 
                                if (!reconnections || ret_ws_2) {
                                    LOG_DEBUG_F("[%d] -- mutation WS\n", omp_get_thread_num());
                                    ret_ws_2 = invoke_mutation_phenotype_ws(urls[2], (vcf_record_t**) (passed_records->items + chunk_starts[j]), chunk_sizes[j]);
                                    parse_mutation_phenotype_response(tid, output_list);
                                    free(mutation_line[tid]);
                                    mutation_line[tid] = (char*) calloc (mutation_max_line_size[tid], sizeof(char));
                                }
                            }
                        }
                        
                        LOG_DEBUG_F("*** %dth web services invocation finished\n", i);
                        
                        if (ret_ws_0 || ret_ws_1 || ret_ws_2) {
                            if (ret_ws_0) {
                                LOG_ERROR_F("Effect web service error: %s\n", get_last_http_error(ret_ws_0));
                            }
                            if (ret_ws_1) {
                                LOG_ERROR_F("SNP phenotype web service error: %s\n", get_last_http_error(ret_ws_1));
                            }
                            if (ret_ws_2) {
                                LOG_ERROR_F("Mutations phenotype web service error: %s\n", get_last_http_error(ret_ws_2));
                            }
                            
                            // In presence of errors, wait 4 seconds before retrying
                            reconnections++;
                            LOG_ERROR_F("Some errors ocurred, reconnection #%d\n", reconnections);
                            sleep(4);
                        } else {
                            free(chunk_starts);
                            free(chunk_sizes);
                        }
                    } while (reconnections < max_reconnections && (ret_ws_0 || ret_ws_1 || ret_ws_2));
                }
                
                // If the maximum number of reconnections was reached still with errors, 
                // write the non-processed batch to the corresponding file
                if (reconnections == max_reconnections && (ret_ws_0 || ret_ws_1 || ret_ws_2)) {
                #pragma omp critical
                    {
                        write_vcf_batch(batch, non_processed_file);
                    }
                }
                
                // Write records that passed and failed filters to separate files, and free them
                write_filtering_output_files(passed_records, failed_records, passed_file, failed_file);
                free_filtered_records(passed_records, failed_records, batch->records);
                
                // Free batch and its contents
                vcf_batch_free(batch);
                
                i++;
            }

            stop = omp_get_wtime();

            total = stop - start;

            LOG_INFO_F("[%d] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%d] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            // Free resources
            if (passed_file) { fclose(passed_file); }
            if (failed_file) { fclose(failed_file); }
            if (non_processed_file) { fclose(non_processed_file); }
            
            // Free filters
            for (i = 0; i < num_filters; i++) {
                filter_t *filter = filters[i];
                filter->free_func(filter);
            }
            free(filters);
            
            // Decrease list writers count
            for (i = 0; i < shared_options_data->num_threads; i++) {
                list_decr_writers(output_list);
            }
        }
        
#pragma omp section
        {
            // Thread which writes the results to all_variants, summary and one file per consequence type
            int ret = 0;
            char *line;
            list_item_t* item = NULL;
            FILE *fd = NULL;
            
            FILE *all_variants_file = cp_hashtable_get(output_files, "all_variants");
            FILE *snp_phenotype_file = cp_hashtable_get(output_files, "snp_phenotypes");
            FILE *mutation_phenotype_file = cp_hashtable_get(output_files, "mutation_phenotypes");
            
            while ((item = list_remove_item(output_list)) != NULL) {
                line = item->data_p;
                
                // Type greater than 0: consequence type identified by its SO code
                // Type equals to -1: SNP phenotype
                // Type equals to -2: mutation phenotype
                if (item->type > 0) {
                    // Write entry in the consequence type file
                    fd = cp_hashtable_get(output_files, &(item->type));
                    int ret = fprintf(fd, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to file: '%s'\n", line);
                    }
                    
                    // Write in all_variants
                    ret = fprintf(all_variants_file, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to all_variants: '%s'\n", line);
                    }
                    
                } else if (item->type == SNP_PHENOTYPE) {
                    ret = fprintf(snp_phenotype_file, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to snp_phenotypes: '%s'\n", line);
                    }
                    
                } else if (item->type == MUTATION_PHENOTYPE) {
                    ret = fprintf(mutation_phenotype_file, "%s\n", line);
                    if (ret < 0) {
                        LOG_ERROR_F("Error writing to mutation_phenotypes: '%s'\n", line);
                    }
                }
                
                free(line);
                list_item_free(item);
            }
            
        }
    }

    write_summary_file(summary_count, cp_hashtable_get(output_files, "summary"));
    write_genes_with_variants_file(gene_list, output_directory);
    write_result_file(shared_options_data, options_data, summary_count, output_directory);

    free_output_data_structures(output_files, summary_count, gene_list);
    free_ws_buffers(shared_options_data->num_threads);
    free(output_list);
    vcf_close(vcf_file);
    
    update_job_status_file(100, job_status);
    close_job_status_file(job_status);
    
    return ret_code;
}
static struct mode_list_elem *read_mode_file(const gchar *filename)
{
  GKeyFile *settingsfile;
  gboolean test = FALSE;
  struct mode_list_elem *list_item = NULL;

  settingsfile = g_key_file_new();
  test = g_key_file_load_from_file(settingsfile, filename, G_KEY_FILE_NONE, NULL);
  if(!test)
  {
      return(NULL);
  }
  list_item = malloc(sizeof(struct mode_list_elem));
  list_item->mode_name = g_key_file_get_string(settingsfile, MODE_ENTRY, MODE_NAME_KEY, NULL);
  log_debug("Dynamic mode name = %s\n", list_item->mode_name);
  list_item->mode_module = g_key_file_get_string(settingsfile, MODE_ENTRY, MODE_MODULE_KEY, NULL);
  log_debug("Dynamic mode module = %s\n", list_item->mode_module);
  list_item->appsync = g_key_file_get_integer(settingsfile, MODE_ENTRY, MODE_NEEDS_APPSYNC_KEY, NULL);
  list_item->mass_storage = g_key_file_get_integer(settingsfile, MODE_ENTRY, MODE_MASS_STORAGE, NULL);
  list_item->network = g_key_file_get_integer(settingsfile, MODE_ENTRY, MODE_NETWORK_KEY, NULL);
  list_item->network_interface = g_key_file_get_string(settingsfile, MODE_ENTRY, MODE_NETWORK_INTERFACE_KEY, NULL);
  list_item->sysfs_path = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_SYSFS_PATH, NULL);
  //log_debug("Dynamic mode sysfs path = %s\n", list_item->sysfs_path);
  list_item->sysfs_value = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_SYSFS_VALUE, NULL);
  //log_debug("Dynamic mode sysfs value = %s\n", list_item->sysfs_value);
  list_item->sysfs_reset_value = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_SYSFS_RESET_VALUE, NULL);
  list_item->softconnect = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_SOFTCONNECT, NULL);
  list_item->softconnect_disconnect = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_SOFTCONNECT_DISCONNECT, NULL);
  list_item->softconnect_path = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_SOFTCONNECT_PATH, NULL);
  list_item->android_extra_sysfs_path = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_PATH, NULL);
  list_item->android_extra_sysfs_path2 = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_PATH2, NULL);
  list_item->android_extra_sysfs_path3 = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_PATH3, NULL);
  list_item->android_extra_sysfs_path4 = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_PATH4, NULL);
  //log_debug("Android extra mode sysfs path2 = %s\n", list_item->android_extra_sysfs_path2);
  list_item->android_extra_sysfs_value = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_VALUE, NULL);
  list_item->android_extra_sysfs_value2 = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_VALUE2, NULL);
  list_item->android_extra_sysfs_value3 = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_VALUE3, NULL);
  list_item->android_extra_sysfs_value4 = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_ANDROID_EXTRA_SYSFS_VALUE4, NULL);
  //log_debug("Android extra value2 = %s\n", list_item->android_extra_sysfs_value2);
  list_item->idProduct = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_IDPRODUCT, NULL);
  list_item->idVendorOverride = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_IDVENDOROVERRIDE, NULL);
  list_item->nat = g_key_file_get_integer(settingsfile, MODE_OPTIONS_ENTRY, MODE_HAS_NAT, NULL);
  list_item->dhcp_server = g_key_file_get_integer(settingsfile, MODE_OPTIONS_ENTRY, MODE_HAS_DHCP_SERVER, NULL);
#ifdef CONNMAN
  list_item->connman_tethering = g_key_file_get_string(settingsfile, MODE_OPTIONS_ENTRY, MODE_CONNMAN_TETHERING, NULL);
#endif

  g_key_file_free(settingsfile);
  if(list_item->mode_name == NULL || list_item->mode_module == NULL)
  {
	/* free list_item as it will not be used */
	list_item_free(list_item);
	return NULL;
  }
  if(list_item->network && list_item->network_interface == NULL)
  {
	/* free list_item as it will not be used */
	list_item_free(list_item);
	return NULL;
  }
  if(list_item->sysfs_path && list_item->sysfs_value == NULL)
  {
	/* free list_item as it will not be used */
	list_item_free(list_item);
	return NULL;
  }
  if(list_item->softconnect &&  list_item->softconnect_path == NULL)
  {
	/* free list_item as it will not be used */
	list_item_free(list_item);
	return NULL;
  }
  else
  	return(list_item);
}
示例#9
0
void batch_aligner(batch_aligner_input_t *input) {

  //  printf("START: batch_aligner\n", omp_get_thread_num());

  size_t total_batches = 0;
		
  list_t *read_list = input->read_list;
  list_t *write_list = input->write_list;

  write_batch_t* write_batch = NULL;
  aligner_batch_t *aligner_batch = NULL;

  list_item_t *read_item = NULL, *write_item = NULL;

  unsigned int tid = omp_get_thread_num();

  struct timeval t1, t2;

  array_list_t *list1, *list2;

  // main loop
  while ( (read_item = list_remove_item(read_list)) != NULL ) {

    aligner_batch = aligner_batch_new((fastq_batch_t *) read_item->data_p);

    thr_batches[tid]++;

    //printf("********************** BATCH %d (batch aligner %d)\n", total_batches, omp_get_thread_num());

    // Burros-Wheeler transform
    gettimeofday(&t1, NULL);
    apply_bwt(input->bwt_input, aligner_batch);
    gettimeofday(&t2, NULL);
    bwt_time[tid] += ((t2.tv_sec - t1.tv_sec) * 1e6 + (t2.tv_usec - t1.tv_usec));
    //printf("---> %d, bwt, num targets = %d\n", tid, aligner_batch->num_targets);


    if (aligner_batch->num_targets > 0) {
      // seeding
      gettimeofday(&t1, NULL);
      apply_seeding(input->region_input, aligner_batch);
      gettimeofday(&t2, NULL);
      seeding_time[tid] += ((t2.tv_sec - t1.tv_sec) * 1e6 + (t2.tv_usec - t1.tv_usec));
      thr_seeding_items[tid] += aligner_batch->num_targets;
      //printf("---> %d, seeding, num targets = %d\n", tid, aligner_batch->num_targets);

      // seeking CALs
      gettimeofday(&t1, NULL);
      apply_caling(input->cal_input, aligner_batch);
      gettimeofday(&t2, NULL);
      cal_time[tid] += ((t2.tv_sec - t1.tv_sec) * 1e6 + (t2.tv_usec - t1.tv_usec));
      thr_cal_items[tid] += aligner_batch->num_targets;
      //printf("---> %d, cal, num targets = %d\n", tid, aligner_batch->num_targets);
    }

    // pair-mode managing
    if (input->pair_input != NULL) {      
      apply_pair(input->pair_input, aligner_batch);
      //      printf("---> %d, pair, num targets = %d\n", tid, aligner_batch->num_targets);
    }

    if (aligner_batch->num_targets > 0) {
      // Smith-Waterman
      gettimeofday(&t1, NULL);
      apply_sw(input->sw_input, aligner_batch);
      gettimeofday(&t2, NULL);
      sw_time[tid] += ((t2.tv_sec - t1.tv_sec) * 1e6 + (t2.tv_usec - t1.tv_usec));
      //thr_sw_items[tid] += aligner_batch->num_targets;
      //printf("---> %d, sw, num targets = %d\n", tid, aligner_batch->num_targets);
    }

    if (aligner_batch->num_targets > 0) {
      // prepare alignments (converts sw-output to alignment, searches pairs...)
      prepare_alignments(input->pair_input, aligner_batch);
    }

    write_item = list_item_new(total_batches, 0, aligner_batch);
    list_insert_item(write_item, write_list);

    list_item_free(read_item);
    total_batches++;
  } // main loop

  /*
  printf("Thread %d: BWT time     = %0.4f s\n", tid, bwt_time / 1e6);
  printf("Thread %d: Seeding time = %0.4f s\n", tid, seeding_time / 1e6);
  printf("Thread %d: CAL time     = %0.4f s\n", tid, cal_time / 1e6);
  printf("Thread %d: SW time      = %0.4f s\n", tid, sw_time / 1e6);
  */

  // decreasing writers
  if (write_list != NULL) list_decr_writers(write_list);

  //  printf("END: batch_aligner (%d), (total batches %d): END\n", omp_get_thread_num(), total_batches);
}
示例#10
0
void region_seeker_server(region_seeker_input_t *input_p){
  
  printf("region_seeker_server(%d): START\n", omp_get_thread_num());  
  list_item_t *item_p = NULL;
  list_item_t *cal_item_p = NULL;
  fastq_batch_t *unmapped_batch_p;
  size_t num_reads;
  array_list_t **allocate_mapping_p;
  cal_batch_t *cal_batch_p;
  size_t num_mappings, total_mappings = 0, num_batches = 0;
  size_t num_threads = input_p->region_threads;
  size_t chunk;
  size_t total_reads = 0;

  omp_set_num_threads(num_threads);
  
  while ( (item_p = list_remove_item(input_p->unmapped_read_list_p)) != NULL ) {

    //printf("Region Seeker Processing batch...\n");
    num_batches++;
    if (time_on) { timing_start(REGION_SEEKER, 0, timing_p); }
    
    unmapped_batch_p = (fastq_batch_t *)item_p->data_p;
    num_reads = unmapped_batch_p->num_reads;
    total_reads += num_reads;
    allocate_mapping_p = (array_list_t **)malloc(sizeof(array_list_t *)*num_reads);
    
    if (input_p->gpu_enable) {
      //******************************* GPU PROCESS *********************************//
      for (size_t i = 0; i < num_reads; i++) {
	allocate_mapping_p[i] = array_list_new(1000, 
					       1.25f, 
					       COLLECTION_MODE_ASYNCHRONIZED);
      }
      #ifdef HPG_GPU
      num_mappings = bwt_map_exact_seed_batch_gpu(unmapped_batch_p,
						  input_p->bwt_optarg_p, 
						  input_p->cal_optarg_p,
						  input_p->bwt_index_p,
						  input_p->gpu_context,
						  allocate_mapping_p);
      #endif
      //****************************************************************************//
    } else {

      //******************************* CPU PROCESS *********************************//
      //printf("Region Seeker :: Process Batch with %d reads\n", num_reads); 
      chunk = MAX(1, num_reads/(num_threads*10));
      
      //printf("Region Seeker :: Process Batch with %d reads\n", num_reads);
      #pragma omp parallel for private(num_mappings) reduction(+:total_mappings) schedule(dynamic, chunk)
      //#pragma omp parallel for private(num_mappings) reduction(+:total_mappings) schedule(static)
      for (size_t i = 0; i < num_reads; i++) {
	//printf("Threads region zone: %d\n", omp_get_num_threads());
	
	allocate_mapping_p[i] = array_list_new(1000, 
					       1.25f, 
					       COLLECTION_MODE_ASYNCHRONIZED);
	
	num_mappings = bwt_map_exact_seeds_seq(&(unmapped_batch_p->seq[unmapped_batch_p->data_indices[i]]), 
					       input_p->cal_optarg_p->seed_size,
					       input_p->cal_optarg_p->min_seed_size,
					       input_p->bwt_optarg_p, input_p->bwt_index_p, allocate_mapping_p[i]);
	
	total_mappings += num_mappings;
	//printf("----------------->>>>>>>>>>>Regions found %d\n", num_mappings);      
      }
      //****************************************************************************//
    
    }

    cal_batch_p = cal_batch_new(allocate_mapping_p, unmapped_batch_p);
      
    list_item_free(item_p);
    cal_item_p = list_item_new(0, 0, cal_batch_p);
    //region_batch_free(region_batch_p);    

    if (time_on) { timing_stop(REGION_SEEKER, 0, timing_p); }
    
    list_insert_item(cal_item_p, input_p->region_list_p);
    //printf("Region Seeker Processing batch finish!\n");

  } //End of while
  
  list_decr_writers(input_p->region_list_p);
 
  if (statistics_on) { 
    statistics_set(REGION_SEEKER_ST, 0, num_batches, statistics_p); 
    statistics_set(REGION_SEEKER_ST, 1, total_reads, statistics_p); 
  }
 
  printf("region_seeker_server: END\n");
  
}
示例#11
0
int main (int argc, char *argv[])
{
    size_t max_batches = 20;
    size_t batch_size = 2000;
    list_t *read_list = (list_t*) malloc (sizeof(list_t));
    list_init("batches", 1, max_batches, read_list);

    int ret_code;
    double start, stop, total;
    char *filename = (char*) malloc ((strlen(argv[1])+1) * sizeof(char));
    strncat(filename, argv[1], strlen(argv[1]));
    gff_file_t* file;

    init_log_custom(LOG_LEVEL_DEBUG, 1, NULL, "w");

    #pragma omp parallel sections private(start, stop, total) lastprivate(file)
    {
        #pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the GFF file\n", omp_get_thread_num());
            // Reading
            start = omp_get_wtime();

            file = gff_open(filename);
            ret_code = gff_read_batches(read_list, batch_size, file);

            stop = omp_get_wtime();
            total = (stop - start);

            if (ret_code) {
                LOG_FATAL_F("[%dR] Error code = %d\n", omp_get_thread_num(), ret_code);
            }
            LOG_INFO_F("[%dR] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%dR] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            // Writing to a new file
            if (argc == 3)
            {
                start = omp_get_wtime();

                ret_code = gff_write(file, argv[2]);

                stop = omp_get_wtime();
                total = (stop - start);

                if (ret_code) {
                    LOG_ERROR_F("[%dW] Error code = %d\n", omp_get_thread_num(), ret_code);
                }
                LOG_INFO_F("[%dW] Time elapsed = %f s\n", omp_get_thread_num(), total);
                LOG_INFO_F("[%dW] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);
            }

            list_decr_writers(read_list);

            gff_close(file, 0);
        }
        #pragma omp section
        {
            printf("1st log debug\n");
            LOG_DEBUG_F("OMP num threads = %d\n", omp_get_num_threads());
            LOG_DEBUG_F("Thread %d prints info\n", omp_get_thread_num());
            printf("after 1st log debug\n");

            start = omp_get_wtime();

            int i = 0;
            list_item_t* item = NULL;
            FILE *out = fopen("result.gff", "w");
            while ( (item = list_remove_item(read_list)) != NULL ) {
                if (i % 200 == 0)
                {
                    int debug = 1;
                    LOG_DEBUG_F("Batch %d reached by thread %d - %zu/%zu records \n", i, omp_get_thread_num(),
                    ((gff_batch_t*) item->data_p)->length, ((gff_batch_t*) item->data_p)->max_length);
                }

//             gff_write_to_file(file, out);
//             gff_batch_print(stdout, item->data_p);
                write_gff_batch(item->data_p, out);
                gff_batch_free(item->data_p);
                list_item_free(item);
                i++;
            }
            fclose(out);

            stop = omp_get_wtime();
            total = (stop - start);

            LOG_INFO_F("[%d] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%d] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);
        }
    }

    free(read_list);

    return 0;
}
示例#12
0
void batch_writer(batch_writer_input_t* input_p) {

    struct timespec ts;
    ts.tv_sec = 1;
    ts.tv_nsec = 0;

    alignment_t **buffer_p;
    bam1_t* bam1_p;
    bam_header_t* bam_header_p;
    bam_file_t* bam_file_p;

    char* match_filename = input_p->match_filename;
    //char* mismatch_filename = input_p->mismatch_filename;

    char* splice_exact_filename = input_p->splice_exact_filename;
    char* splice_extend_filename = input_p->splice_extend_filename;

    list_t* list_p = input_p->list_p;

    printf("batch_writer (%i): START\n", omp_get_thread_num());

    list_item_t *item_p = NULL;
    write_batch_t* batch_p;

    FILE* fd;
    FILE* splice_exact_fd  = fopen(splice_exact_filename, "w");
    FILE* splice_extend_fd = fopen(splice_extend_filename, "w");

    //printf("HEADER FROM WRITE: %s\n", input_p->header_filename);
    bam_header_p = bam_header_new(HUMAN, NCBI37, input_p->header_filename);
    //bam_file_p = bam_fopen(match_filename);
    bam_file_p = bam_fopen_mode(match_filename, bam_header_p, "w");
    bam_fwrite_header(bam_header_p, bam_file_p);

    // main loop
    while ( (item_p = list_remove_item(list_p)) != NULL ) {

        if (time_on) {
            timing_start(BATCH_WRITER, 0, timing_p);
        }

        batch_p = (write_batch_t*) item_p->data_p;
        //printf("*********************************Extract one item*********************************\n");
        if (batch_p->flag == MATCH_FLAG || batch_p->flag == MISMATCH_FLAG) { //fd = match_fd;
            //printf("start write alignment. Total %d\n", batch_p->size);
            buffer_p = (alignment_t **)batch_p->buffer_p;
            for(int i = 0; i < batch_p->size; i++) {
                //alignment_print(buffer_p[i]);
                bam1_p = convert_to_bam(buffer_p[i], 33);
                bam_fwrite(bam1_p, bam_file_p);
                bam_destroy1(bam1_p);
                alignment_free(buffer_p[i]);
            }
        } else {
            if (batch_p->flag == SPLICE_EXACT_FLAG) {
                fd = splice_exact_fd;
            }
            else if (batch_p->flag == SPLICE_EXTEND_FLAG) {
                fd = splice_extend_fd;
            }
            else {
                fd = NULL;
            }

            if (fd != NULL) {
                //printf("start write batch, %i bytes...\n", batch_p->size);
                fwrite((char *)batch_p->buffer_p, batch_p->size, 1, fd);
                //printf("write done !!\n");
                //if (time_on) { stop_timer(t1_write, t2_write, write_time); }
            }
        }
        //printf("Free batch\n");
        write_batch_free(batch_p);
        list_item_free(item_p);

        if (time_on) {
            timing_stop(BATCH_WRITER, 0, timing_p);
        }
    } // end of batch loop

    //fclose(match_fd);
    //fclose(mismatch_fd);
    fclose(splice_exact_fd);
    fclose(splice_extend_fd);

    bam_fclose(bam_file_p);
    //bam_header_free(bam_header_p);
    printf("batch_writer: END\n");
}
示例#13
0
void batch_writer2(batch_writer_input_t* input) {

    printf("START: batch_writer (%i): START, for file %s\n",
           omp_get_thread_num(), input->match_filename);

    bam1_t *bam1;
    bam_header_t *bam_header;
    bam_file_t *bam_file;
    alignment_t *alig;

    char* match_filename = input->match_filename;
    //  char* splice_filename = input->splice_filename;

    list_t *write_list = input->list_p;
    array_list_t *array_list;

    list_item_t *item = NULL;
    aligner_batch_t *batch = NULL;
    fastq_batch_t *fq_batch = NULL;

    FILE* fd;

    static char aux[10];

    size_t read_len;

    bam_header = bam_header_new(HUMAN, NCBI37, input->header_filename);
    bam_file = bam_fopen_mode(match_filename, bam_header, "w");

    bam_fwrite_header(bam_header, bam_file);

    size_t num_reads = 0, num_items = 0, total_mappings = 0;

    // main loop
    while ( (item = list_remove_item(write_list)) != NULL ) {

        //    if (array_list == NULL) printf("batch_writer.c...\n");

        batch = (aligner_batch_t *) item->data_p;
        fq_batch = batch->fq_batch;
        num_reads = batch->num_mapping_lists;

        for (size_t i = 0; i < num_reads; i++) {

            array_list = batch->mapping_lists[i];
            //      if (array_list == NULL) printf("READ %d, writer, list is NULL\n", i);

            //      printf("----> list == NULL ? %d\n", (array_list == NULL));
            num_items = (array_list == NULL ? 0 : array_list_size(array_list));
            //      printf("----> number of items = %d, num_items <= 0 ? %d\n", num_items, num_items <= 0);

            read_len = fq_batch->data_indices[i + 1] - fq_batch->data_indices[i] - 1;

            // mapped or not mapped ?
            if (num_items == 0) {

                //printf("\tWRITE : read %i (%d items): unmapped...\n", i, num_items);

                // calculating cigar
                sprintf(aux, "%luX", read_len);

                alig = alignment_new();
                alignment_init_single_end(&(fq_batch->header[fq_batch->header_indices[i]])+1,
                                          &(fq_batch->seq[fq_batch->data_indices[i]]),
                                          &(fq_batch->quality[fq_batch->data_indices[i]]),
                                          0,
                                          0,
                                          0,
                                          aux, 1, 255, 0, 0, alig);

                bam1 = convert_to_bam(alig, 33);
                bam_fwrite(bam1, bam_file);
                bam_destroy1(bam1);

                // some cosmetic stuff before freeing the alignment,
                // (in order to not free twice some fields)
                alig->query_name = NULL;
                alig->sequence = NULL;
                alig->quality = NULL;
                alig->cigar = NULL;
                alignment_free(alig);

                //	printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items);

            } else {
                //	printf("\tWRITE : read %d (%d items): mapped...\n", i, num_items);
                for (size_t j = 0; j < num_items; j++) {
                    alig = (alignment_t *) array_list_get(j, array_list);
                    if (alig != NULL) {

                        bam1 = convert_to_bam(alig, 33);
                        bam_fwrite(bam1, bam_file);
                        bam_destroy1(bam1);

                        alignment_free(alig);
                    }
                }
                //	printf("\tWRITE : read %d (%d items): mapped...done !!\n", i, num_items);
            }
            if (array_list != NULL) array_list_free(array_list, NULL);
        }

        if (batch != NULL) aligner_batch_free(batch);
        if (item != NULL) list_item_free(item);

        if (time_on) {
            timing_stop(BATCH_WRITER, 0, timing_p);
        }
    } // end of batch loop
    bam_fclose(bam_file);
    printf("END: batch_writer (total mappings %lu)\n", total_mappings);
}
示例#14
0
int run_stats(shared_options_data_t *shared_options_data, stats_options_data_t *options_data) {
    file_stats_t *file_stats = file_stats_new();
    sample_stats_t **sample_stats;
    
    // List that stores the batches of records filtered by each thread
    list_t *output_list[shared_options_data->num_threads];
    // List that stores which thread filtered the next batch to save
    list_t *next_token_list = malloc(sizeof(list_t));

    int ret_code;
    double start, stop, total;
    
    vcf_file_t *vcf_file = vcf_open(shared_options_data->vcf_filename, shared_options_data->max_batches);
    if (!vcf_file) {
        LOG_FATAL("VCF file does not exist!\n");
    }
    
    ped_file_t *ped_file = NULL;
    if (shared_options_data->ped_filename) {
        ped_file = ped_open(shared_options_data->ped_filename);
        if (!ped_file) {
            LOG_FATAL("PED file does not exist!\n");
        }
        if(options_data->variable) {
            set_variable_field(options_data->variable, 0, ped_file);
        } else {
            set_variable_field("PHENO", 6, ped_file);
        }
        
        if(options_data->variable_groups) {
            int n, m;
            char *variable_groups = strdup(options_data->variable_groups);
            char **groups;
            char **phenos_in_group;
            groups = split(variable_groups, ":", &n);
            for(int i = 0; i < n; i++){
                phenos_in_group = split(groups[i], ",", &m);
                if(set_phenotype_group(phenos_in_group, m, ped_file) < 0) {
                    LOG_ERROR("Variable can't appear in two groups\n");
                    return DUPLICATED_VARIABLE;
                }
                free(phenos_in_group);
            }
            ped_file->accept_new_values = 0;
            
            free(variable_groups);
            free(groups);
        } else {
            ped_file->accept_new_values = 1;
        }
        if(options_data->phenotype) {
            int n;
            char* phenotypes = strdup(options_data->phenotype);
            char** pheno_values = split(phenotypes, ",", &n);
            if(n != 2) {
                LOG_ERROR("To handle case-control test, only two phenotypes are supported\n");
                return MORE_THAN_TWO_PHENOTYPES;
            } else {
                set_unaffected_phenotype(pheno_values[0],ped_file);
                set_affected_phenotype(pheno_values[1],ped_file);
            }
        } else {
            set_unaffected_phenotype("1", ped_file);
            set_affected_phenotype("2", ped_file);
        }
        
        LOG_INFO("About to read PED file...\n");
        // Read PED file before doing any processing
        ret_code = ped_read(ped_file);
        if (ret_code != 0) {
            LOG_FATAL_F("Can't read PED file: %s\n", ped_file->filename);
        }
        if(!ped_file->num_field) {
            LOG_ERROR_F("Can't find the specified field \"%s\" in file: %s \n", options_data->variable, ped_file->filename);
            return VARIABLE_FIELD_NOT_FOUND;
        }
    }
    
    ret_code = create_directory(shared_options_data->output_directory);
    if (ret_code != 0 && errno != EEXIST) {
        LOG_FATAL_F("Can't create output directory: %s\n", shared_options_data->output_directory);
    }
    
    // Initialize variables related to the different threads
    for (int i = 0; i < shared_options_data->num_threads; i++) {
        output_list[i] = (list_t*) malloc(sizeof(list_t));
        list_init("input", 1, shared_options_data->num_threads * shared_options_data->batch_lines, output_list[i]);
    }
    list_init("next_token", shared_options_data->num_threads, INT_MAX, next_token_list);
    
    LOG_INFO("About to retrieve statistics from VCF file...\n");

#pragma omp parallel sections private(start, stop, total)
    {
#pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the VCF file\n", omp_get_thread_num());
            // Reading
            start = omp_get_wtime();

            if (shared_options_data->batch_bytes > 0) {
                ret_code = vcf_parse_batches_in_bytes(shared_options_data->batch_bytes, vcf_file);
            } else if (shared_options_data->batch_lines > 0) {
                ret_code = vcf_parse_batches(shared_options_data->batch_lines, vcf_file);
            }

            stop = omp_get_wtime();
            total = stop - start;

            if (ret_code) { LOG_FATAL_F("[%dR] Error code = %d\n", omp_get_thread_num(), ret_code); }

            LOG_INFO_F("[%dR] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%dR] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            notify_end_parsing(vcf_file);
        }
        
#pragma omp section
        {
            // Enable nested parallelism and set the number of threads the user has chosen
            omp_set_nested(1);
            LOG_DEBUG_F("Thread %d processes data\n", omp_get_thread_num());
            
            individual_t **individuals = NULL;
            khash_t(ids) *sample_ids = NULL;
            khash_t(str) *phenotype_ids = NULL;
            int num_phenotypes;
            
            start = omp_get_wtime();
            
            int i = 0;
            vcf_batch_t *batch = NULL;
            while ((batch = fetch_vcf_batch(vcf_file)) != NULL) {
                if (i == 0) {
                    sample_stats = malloc (get_num_vcf_samples(vcf_file) * sizeof(sample_stats_t*));
                    for (int j = 0; j < get_num_vcf_samples(vcf_file); j++) {
                        sample_stats[j] = sample_stats_new(array_list_get(j, vcf_file->samples_names));
                    }
                    
                    if (ped_file) {
                        // Create map to associate the position of individuals in the list of samples defined in the VCF file
                        sample_ids = associate_samples_and_positions(vcf_file);
                        // Sort individuals in PED as defined in the VCF file
                        individuals = sort_individuals(vcf_file, ped_file);
                        // Get the khash of the phenotypes in PED file
                        phenotype_ids = get_phenotypes(ped_file);
                        num_phenotypes = get_num_variables(ped_file);
                    }
                }
                
                if (i % 50 == 0) {
                    LOG_INFO_F("Batch %d reached by thread %d - %zu/%zu records \n", 
                                i, omp_get_thread_num(),
                                batch->records->size, batch->records->capacity);
                }

                // Divide the list of passed records in ranges of size defined in config file
                int num_chunks;
                int *chunk_sizes = NULL;
                array_list_t *input_records = batch->records;
                int *chunk_starts = create_chunks(input_records->size, 
                                                  ceil((float) shared_options_data->batch_lines / shared_options_data->num_threads), 
                                                  &num_chunks, &chunk_sizes);
                
                // OpenMP: Launch a thread for each range
                #pragma omp parallel for num_threads(shared_options_data->num_threads)
                for (int j = 0; j < num_chunks; j++) {
                    LOG_DEBUG_F("[%d] Stats invocation\n", omp_get_thread_num());
                    // Invoke variant stats and/or sample stats when applies
                    if (options_data->variant_stats) {
                        int index = omp_get_thread_num() % shared_options_data->num_threads;
                        ret_code = get_variants_stats((vcf_record_t**) (input_records->items + chunk_starts[j]),
                                                      chunk_sizes[j], individuals, sample_ids,num_phenotypes, output_list[index], file_stats); 
                    }
                    
                    if (options_data->sample_stats) {
                        ret_code |= get_sample_stats((vcf_record_t**) (input_records->items + chunk_starts[j]), 
                                                      chunk_sizes[j], individuals, sample_ids, sample_stats, file_stats);
                    }
                }
                
                if (options_data->variant_stats) {
                    // Insert as many tokens as elements correspond to each thread
                    for (int t = 0; t < num_chunks; t++) {
                        for (int s = 0; s < chunk_sizes[t]; s++) {
                            list_item_t *token_item = list_item_new(t, 0, NULL);
                            list_insert_item(token_item, next_token_list);
                        }
                    }
                }
                
                free(chunk_starts);
                free(chunk_sizes);
                vcf_batch_free(batch);
                
                i++;
            }
            
            stop = omp_get_wtime();
            total = stop - start;

            LOG_INFO_F("[%d] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%d] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);
            
            // Decrease list writers count
            for (i = 0; i < shared_options_data->num_threads; i++) {
                list_decr_writers(next_token_list);
                list_decr_writers(output_list[i]);
            }
            
            if (sample_ids) { kh_destroy(ids, sample_ids); }
            if (individuals) { free(individuals); }
        }
        
#pragma omp section
        {
            LOG_DEBUG_F("Thread %d writes the output\n", omp_get_thread_num());
            
            char *stats_prefix = get_vcf_stats_filename_prefix(shared_options_data->vcf_filename, 
                                                               shared_options_data->output_filename, 
                                                               shared_options_data->output_directory);
            
            // File names and descriptors for output to plain text files
            char *stats_filename, *summary_filename, *phenotype_filename;
            FILE *stats_fd, *summary_fd, **phenotype_fd;
            
            char *stats_db_name;
            sqlite3 *db = NULL;
            khash_t(stats_chunks) *hash;
            
            khash_t(str) *phenotype_ids;
            int num_phenotypes;
            if(ped_file){
                phenotype_ids = get_phenotypes(ped_file);
                num_phenotypes = get_num_variables(ped_file);
            }
            
            if (options_data->save_db) {
                delete_files_by_extension(shared_options_data->output_directory, "db");
                stats_db_name = calloc(strlen(stats_prefix) + strlen(".db") + 2, sizeof(char));
                sprintf(stats_db_name, "%s.db", stats_prefix);
                create_stats_db(stats_db_name, VCF_CHUNKSIZE, create_vcf_query_fields, &db);
                hash = kh_init(stats_chunks);
            }
            
            // Write variant (and global) statistics
            if (options_data->variant_stats) {
                stats_filename = get_variant_stats_output_filename(stats_prefix);
                if (!(stats_fd = fopen(stats_filename, "w"))) {
                    LOG_FATAL_F("Can't open file for writing statistics of variants: %s\n", stats_filename);
                }
                
                //Open one file for each phenotype
                if(ped_file){
                    phenotype_fd = malloc(sizeof(FILE*)*num_phenotypes);
                    if(options_data->variable_groups){
                        int n;
                        char *variable_groups = strdup(options_data->variable_groups);
                        char ** names = split(variable_groups, ":", &n);
                        for(int i = 0; i < n; i++) {
                            phenotype_filename = get_variant_phenotype_stats_output_filename(stats_prefix, names[i]);
                            if(!(phenotype_fd[i] = fopen(phenotype_filename, "w"))) {
                                LOG_FATAL_F("Can't open file for writing statistics of variants per phenotype: %s\n", stats_filename);
                            }
                            free(phenotype_filename);
                        }
                        free(names);
                        free(variable_groups);
                    } else {
                 
                        for (khint_t i = kh_begin(phenotype_ids); i != kh_end(phenotype_ids); ++i) {
                            if (!kh_exist(phenotype_ids,i)) continue;
                            
                            phenotype_filename = get_variant_phenotype_stats_output_filename(stats_prefix, kh_key(phenotype_ids,i));
                            if(!(phenotype_fd[kh_val(phenotype_ids,i)] = fopen(phenotype_filename, "w"))) {
                                LOG_FATAL_F("Can't open file for writing statistics of variants per phenotype: %s\n", stats_filename);
                            }
                            free(phenotype_filename);
                        }
                    }
                }
                // Write header
                report_vcf_variant_stats_header(stats_fd);
                if(ped_file){
                    for(int i = 0; i < num_phenotypes; i++)
                        report_vcf_variant_phenotype_stats_header(phenotype_fd[i]);
                }
                
                // For each variant, generate a new line
                int avail_stats = 0;
                variant_stats_t *var_stats_batch[VCF_CHUNKSIZE];
                list_item_t *token_item = NULL, *output_item = NULL;
                while ( token_item = list_remove_item(next_token_list) ) {
                    output_item = list_remove_item(output_list[token_item->id]);
                    assert(output_item);
                    var_stats_batch[avail_stats] = output_item->data_p;
                    avail_stats++;
                    
                    // Run only when certain amount of stats is available
                    if (avail_stats >= VCF_CHUNKSIZE) {
                        report_vcf_variant_stats(stats_fd, db, hash, avail_stats, var_stats_batch);
                        
                        if(ped_file)
                            for(int i = 0; i < num_phenotypes; i++)
                                report_vcf_variant_phenotype_stats(phenotype_fd[i], avail_stats, var_stats_batch, i);

                        // Free all stats from the "batch"
                        for (int i = 0; i < avail_stats; i++) {
                            variant_stats_free(var_stats_batch[i]);
                        }
                        avail_stats = 0;
                    }
                    
                    // Free resources
                    list_item_free(output_item);
                    list_item_free(token_item);
                }
                
                if (avail_stats > 0) {
                    report_vcf_variant_stats(stats_fd, db, hash, avail_stats, var_stats_batch);
                    
                    if(ped_file)
                        for(int i = 0; i < num_phenotypes; i++)
                            report_vcf_variant_phenotype_stats(phenotype_fd[i], avail_stats, var_stats_batch, i);

                    // Free all stats from the "batch"
                    for (int i = 0; i < avail_stats; i++) {
                        variant_stats_free(var_stats_batch[i]);
                    }
                    avail_stats = 0;
                }
                
                // Write whole file stats (data only got when launching variant stats)
                summary_filename = get_vcf_file_stats_output_filename(stats_prefix);
                if (!(summary_fd = fopen(summary_filename, "w"))) {
                    LOG_FATAL_F("Can't open file for writing statistics summary: %s\n", summary_filename);
                }
                report_vcf_summary_stats(summary_fd, db, file_stats);
                
                free(stats_filename);
                free(summary_filename);
                
                // Close variant stats file
                if (stats_fd) { fclose(stats_fd); }
                if (summary_fd) { fclose(summary_fd); }
				if(ped_file){
		            for(int i = 0; i < num_phenotypes; i++)
		                if(phenotype_fd[i]) fclose(phenotype_fd[i]);
					free(phenotype_fd);
				}
            }
            
            // Write sample statistics
            if (options_data->sample_stats) {
                stats_filename = get_sample_stats_output_filename(stats_prefix);
                if (!(stats_fd = fopen(stats_filename, "w"))) {
                    LOG_FATAL_F("Can't open file for writing statistics of samples: %s\n", stats_filename);
                }
                
                report_vcf_sample_stats_header(stats_fd);
                report_vcf_sample_stats(stats_fd, NULL, vcf_file->samples_names->size, sample_stats);
                
                // Close sample stats file
                free(stats_filename);
                if (stats_fd) { fclose(stats_fd); }
            }
            
            free(stats_prefix);
            
            if (db) {
                insert_chunk_hash(VCF_CHUNKSIZE, hash, db);
                create_stats_index(create_vcf_index, db);
                close_stats_db(db, hash);
            }
            
        }
    }
    
    for (int i = 0; i < get_num_vcf_samples(vcf_file); i++) {
        sample_stats_free(sample_stats[i]);
    }
    free(sample_stats);
    free(file_stats);
    
    free(next_token_list);
    for (int i = 0; i < shared_options_data->num_threads; i++) {
        free(output_list[i]);
    }
    
    vcf_close(vcf_file);
    if (ped_file) { ped_close(ped_file, 1,1); }
    
    return 0;
}
示例#15
0
int run_merge(shared_options_data_t *shared_options_data, merge_options_data_t *options_data) {
    if (options_data->num_files == 1) {
        LOG_INFO("Just one VCF file specified, no need to merge");
        return 0;
    }
    
    list_t *read_list[options_data->num_files];
    memset(read_list, 0, options_data->num_files * sizeof(list_t*));
    list_t *output_header_list = (list_t*) malloc (sizeof(list_t));
    list_init("headers", shared_options_data->num_threads, INT_MAX, output_header_list);
    list_t *output_list = (list_t*) malloc (sizeof(list_t));
    list_init("output", shared_options_data->num_threads, shared_options_data->max_batches * shared_options_data->batch_lines, output_list);
    list_t *merge_tokens = (list_t*) malloc (sizeof(list_t));
    list_init("tokens", 1, INT_MAX, merge_tokens);
    
    int ret_code = 0;
    double start, stop, total;
    vcf_file_t *files[options_data->num_files];
    memset(files, 0, options_data->num_files * sizeof(vcf_file_t*));
    
    // Initialize variables related to the different files
    for (int i = 0; i < options_data->num_files; i++) {
        files[i] = vcf_open(options_data->input_files[i], shared_options_data->max_batches);
        if (!files[i]) {
            LOG_FATAL_F("VCF file %s does not exist!\n", options_data->input_files[i]);
        }
        
        read_list[i] = (list_t*) malloc(sizeof(list_t));
        list_init("text", 1, shared_options_data->max_batches, read_list[i]);
    }
    
    ret_code = create_directory(shared_options_data->output_directory);
    if (ret_code != 0 && errno != EEXIST) {
        LOG_FATAL_F("Can't create output directory: %s\n", shared_options_data->output_directory);
    }

    chromosome_order = get_chromosome_order(shared_options_data->host_url, shared_options_data->species,
                                            shared_options_data->version, &num_chromosomes);
    
    printf("Number of threads = %d\n", shared_options_data->num_threads);
    
#pragma omp parallel sections private(start, stop, total)
    {
#pragma omp section
        {
            LOG_DEBUG_F("Thread %d reads the VCF file\n", omp_get_thread_num());
            // Reading
            start = omp_get_wtime();

            ret_code = vcf_multiread_batches(read_list, shared_options_data->batch_lines, files, options_data->num_files);

            stop = omp_get_wtime();
            total = stop - start;

            if (ret_code) {
                LOG_ERROR_F("Error %d while reading VCF files\n", ret_code);
            }

            LOG_INFO_F("[%dR] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%dR] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);
        }
        
#pragma omp section
        {
            // Enable nested parallelism
            omp_set_nested(1);
            
            LOG_DEBUG_F("Thread %d processes data\n", omp_get_thread_num());
            
            int num_eof_found = 0;
            int eof_found[options_data->num_files];
            memset(eof_found, 0, options_data->num_files * sizeof(int));
            
            list_item_t *items[options_data->num_files];
            memset(items, 0, options_data->num_files * sizeof(list_item_t*));
            char *texts[options_data->num_files];
            memset(texts, 0, options_data->num_files * sizeof(char*));
            
            khash_t(pos) *positions_read = kh_init(pos);
            
            long max_position_merged = LONG_MAX;
            char *max_chromosome_merged = NULL;
            int header_merged = 0;
            int token = 0;
            
            double start_parsing, start_insertion, total_parsing = 0, total_insertion = 0;
            
            start = omp_get_wtime();

            while (num_eof_found < options_data->num_files) {
                /* Process:
                 * - N threads getting batches of VCF records and inserting them in a data structure. The common minimum 
                 * position of each group of batches will also be stored.
                 * - If the data structure reaches certain size or the end of a chromosome, merge positions prior to the 
                 * last minimum registered.
                 */
                
                // Getting text elements in a critical region guarantees that each thread gets variants in positions in the same range
                for (int i = 0; i < options_data->num_files; i++) {
                    if (eof_found[i]) {
                        continue;
                    }
                    
                    items[i] = list_remove_item(read_list[i]);
                    if (items[i] == NULL || !strcmp(items[i]->data_p, "")) {
                        LOG_INFO_F("[%d] EOF found in file %s\n", omp_get_thread_num(), options_data->input_files[i]);
                        eof_found[i] = 1;
                        num_eof_found++;
                        
                        if(items[i] != NULL && !strcmp(items[i]->data_p, "")) {
                            free(items[i]->data_p);
                            list_item_free(items[i]);
                            LOG_DEBUG_F("[%d] Text batch freed\n", omp_get_thread_num());
                        } else {
                            LOG_DEBUG_F("[%d] No need to free text batch\n", omp_get_thread_num());
                        }
                        
                        continue;
                    }
                    
                    assert(items[i]->data_p != NULL);
                    texts[i] = items[i]->data_p;
                    
//                     printf("[%d] text batch from file %d\tcontents = '%s'\n", omp_get_thread_num(), i, texts[i]);
                }
                
                for (int i = 0; i < options_data->num_files; i++) {
                    if (eof_found[i]) {
                        continue;
                    }
                    
                    start_parsing = omp_get_wtime();
                    
                    char *text_begin = texts[i];
                    char *text_end = text_begin + strlen(text_begin);
                    assert(text_end != NULL);
                    
//                     printf("batch = '%.*s'\n", text_end - text_begin, text_begin);
                    
                    // Get VCF batches from text batches
                    vcf_reader_status *status = vcf_reader_status_new(shared_options_data->batch_lines, 0);
                    ret_code = run_vcf_parser(text_begin, text_end, shared_options_data->batch_lines, files[i], status);
                    
                    if (ret_code) {
                        // TODO stop?
                        LOG_ERROR_F("Error %d while reading the file %s\n", ret_code, files[i]->filename);
                        continue;
                    }

//                     printf("batches = %d\n", files[i]->record_batches->length);
                    vcf_batch_t *batch = fetch_vcf_batch_non_blocking(files[i]);
                    if (!batch) {
                        continue;
                    }
                    
                    total_parsing += omp_get_wtime() - start_parsing;
                    start_insertion = omp_get_wtime();
                    
                    // Insert records into hashtable
                    for (int j = 0; j < batch->records->size; j++) {
                        vcf_record_t *record = vcf_record_copy(array_list_get(j, batch->records));
                        vcf_record_file_link *link = vcf_record_file_link_new(record, files[i]);
                        char key[64];
                        compose_key_value(record->chromosome, record->position, key);
                        int ret = insert_position_read(key, link, positions_read);
                        assert(ret);
                    }
                    
                    total_insertion += omp_get_wtime() - start_insertion;
                    
                    // Update minimum position being a maximum of these batches
                    vcf_record_t *current_record = (vcf_record_t*) array_list_get(batch->records->size - 1, batch->records);
                    calculate_merge_interval(current_record, &max_chromosome_merged, &max_position_merged, chromosome_order, num_chromosomes);
                    
                    // Free batch and its contents
                    vcf_reader_status_free(status);
                    vcf_batch_free(batch);
                    list_item_free(items[i]);
                }
                
                if (num_eof_found == options_data->num_files) {
                    max_chromosome_merged = chromosome_order[num_chromosomes-1];
                    max_position_merged = LONG_MAX;
                }
                
                // Merge headers, if not previously done
                if (!header_merged) {
                    merge_vcf_headers(files, options_data->num_files, options_data, output_header_list);
                    header_merged = 1;
                    
                    // Decrease list writers count
                    for (int i = 0; i < shared_options_data->num_threads; i++) {
                        list_decr_writers(output_header_list);
                    }
                }
                
                // If the data structure reaches certain size or the end of a chromosome, 
                // merge positions prior to the last minimum registered
                if (num_eof_found < options_data->num_files && kh_size(positions_read) > TREE_LIMIT) {
                    LOG_INFO_F("Merging until position %s:%ld\n", max_chromosome_merged, max_position_merged);
                    token = merge_interval(positions_read, max_chromosome_merged, max_position_merged, chromosome_order, num_chromosomes,
                                   	   	   files, shared_options_data, options_data, output_list);
                }
                // When reaching EOF for all files, merge the remaining entries
                else if (num_eof_found == options_data->num_files && kh_size(positions_read) > 0) {
                    LOG_INFO_F("Merging remaining positions (last = %s:%ld)\n", chromosome_order[num_chromosomes - 1], LONG_MAX);
                    token = merge_remaining_interval(positions_read, files, shared_options_data, options_data, output_list);
                }
                
                if (token) {
                	int *token_ptr = malloc (sizeof(int)); *token_ptr = token;
                    list_item_t *item = list_item_new(1, 0, token_ptr);
                    list_insert_item(item, merge_tokens);
                }

                // Set variables ready for next iteration of the algorithm
                if (max_chromosome_merged) {
                    free(max_chromosome_merged);
                }
            	token = 0;
                max_chromosome_merged = NULL;
                max_position_merged = LONG_MAX;
            }
            
            kh_destroy(pos, positions_read);
            
            stop = omp_get_wtime();

            total = stop - start;

            LOG_INFO_F("[%d] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%d] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);

            LOG_DEBUG_F("** Time in parsing = %f s\n", total_parsing);
            LOG_DEBUG_F("** Time in insertion = %f s\n", total_insertion);
//             for (int i = 0; i < shared_options_data->num_threads; i++) {
//                 printf("[%d] Time in searching = %f s\n", i, total_search[i]);
//                 printf("[%d] Time in merging = %f s\n", i, total_merge[i]);
//             }
            
            // Decrease list writers count
            for (int i = 0; i < shared_options_data->num_threads; i++) {
                list_decr_writers(output_list);
            }
            list_decr_writers(merge_tokens);
        }
        
#pragma omp section
        {
            LOG_DEBUG_F("Thread %d writes the output\n", omp_get_thread_num());
    
            start = omp_get_wtime();

            // Create file streams for results
            char aux_filename[32]; memset(aux_filename, 0, 32 * sizeof(char));
            sprintf(aux_filename, "merge_from_%d_files.vcf", options_data->num_files);
            
            char *merge_filename;
            FILE *merge_fd = get_output_file(shared_options_data, aux_filename, &merge_filename);
            LOG_INFO_F("Output filename = %s\n", merge_filename);
            free(merge_filename);
            
            list_item_t *item1 = NULL, *item2 = NULL;
            vcf_header_entry_t *entry;
            vcf_record_t *record;
            int *num_records;
            
            // Write headers
            while ((item1 = list_remove_item(output_header_list)) != NULL) {
                entry = item1->data_p;
                write_vcf_header_entry(entry, merge_fd);
            }
            
            // Write delimiter
            array_list_t *sample_names = merge_vcf_sample_names(files, options_data->num_files);
            write_vcf_delimiter_from_samples((char**) sample_names->items, sample_names->size, merge_fd);
            
            // Write records
            // When a token is present, it means a set of batches has been merged. The token contains the number of records merged.
            // In this case, the records must be sorted by chromosome and position, and written afterwards.
            while ((item1 = list_remove_item(merge_tokens)) != NULL) {
                num_records = item1->data_p;
                vcf_record_t *records[*num_records];
                for (int i = 0; i < *num_records; i++) {
                    item2 = list_remove_item(output_list);
                    if (!item2) {
                        break;
                    }

                    records[i] = item2->data_p;
                    list_item_free(item2);
                }

                // Sort records
                qsort(records, *num_records, sizeof(vcf_record_t*), record_cmp);

                // Write and free sorted records
                for (int i = 0; i < *num_records; i++) {
                    record = records[i];
                    write_vcf_record(record, merge_fd);
                    vcf_record_free_deep(record);
                }

                free(num_records);
                list_item_free(item1);
            }
            
            // Close file
            if (merge_fd != NULL) { fclose(merge_fd); }
            
            stop = omp_get_wtime();

            total = stop - start;

            LOG_INFO_F("[%dW] Time elapsed = %f s\n", omp_get_thread_num(), total);
            LOG_INFO_F("[%dW] Time elapsed = %e ms\n", omp_get_thread_num(), total*1000);
        }
    }

    // Free variables related to the different files
    for (int i = 0; i < options_data->num_files; i++) {
        if(files[i]) { vcf_close(files[i]); }
        if(read_list[i]) { free(read_list[i]); }
    }
    free(output_list);
    
    return ret_code;
}