int compare_regions(void *region_1, void *region_2, char **chromosome_ordering, int num_chromosomes) { if (region_1 == NULL || region_2 == NULL) { return INT_MIN; } region_t *reg_1 = (region_t *) region_1; region_t *reg_2 = (region_t *) region_2; // TODO This could be avoided while inserting, because regions are classified by chromosome int result = compare_chromosomes(reg_1->chromosome, reg_2->chromosome, chromosome_ordering, num_chromosomes); if (result != 0) { return result; } else { // return compare_position_ranges(reg_1, reg_2); return compare_positions(reg_1->start_position, reg_2->start_position); } }
int merge_interval(kh_pos_t* positions_read, char *max_chromosome_merged, unsigned long max_position_merged, char **chromosome_order, int num_chromosomes, vcf_file_t **files, shared_options_data_t *shared_options_data, merge_options_data_t *options_data, list_t *output_list) { int num_entries = 0; #pragma omp parallel for num_threads(shared_options_data->num_threads) reduction(+:num_entries) for (int k = kh_begin(positions_read); k < kh_end(positions_read); k++) { if (kh_exist(positions_read, k)) { array_list_t *records_in_position = kh_value(positions_read, k); assert(records_in_position); vcf_record_t *record = ((vcf_record_file_link*) array_list_get(0, records_in_position))->record; vcf_record_file_link **links = NULL; int num_links = 0; // Remove positions prior to the last chromosome:position to merge int cmp_chrom = compare_chromosomes(record->chromosome, max_chromosome_merged, chromosome_order, num_chromosomes); if (cmp_chrom < 0 || (cmp_chrom == 0 && compare_positions(record->position, max_position_merged) <= 0)) { links = records_in_position->items; num_links = records_in_position->size; } // Launch merge if (num_links > 0) { // printf("links[0] = %s:%ld in file %s\n", links[0]->record->chromosome, links[0]->record->position, links[0]->file->filename); int err_code = 0; vcf_record_t *merged = merge_position(links, num_links, files, options_data->num_files, options_data, &err_code); if (!err_code) { list_item_t *item = list_item_new(k, MERGED_RECORD, merged); list_insert_item(item, output_list); num_entries += 1; } // Free empty nodes (lists of records in the same position) array_list_free(records_in_position, vcf_record_file_link_free); kh_del(pos, positions_read, k); } } // End kh_exist } return num_entries; }
void calculate_merge_interval(vcf_record_t* current_record, char** max_chromosome_merged, long unsigned int* max_position_merged, char **chromosome_order, int num_chromosomes) { if (*max_chromosome_merged == NULL) { // Max merged chrom:position not set, assign without any other consideration *max_chromosome_merged = strndup(current_record->chromosome, current_record->chromosome_len); *max_position_merged = current_record->position; } else { char *current_chromosome = strndup(current_record->chromosome, current_record->chromosome_len); long unsigned int current_position = current_record->position; // printf("current = %s:%ld\tmax = %s:%ld\n", current_chromosome, current_position, *max_chromosome_merged, *max_position_merged); int chrom_comparison = compare_chromosomes(current_chromosome, *max_chromosome_merged, chromosome_order, num_chromosomes); int position_comparison = compare_positions(current_position, *max_position_merged); // Max merged chrom:position is posterior to the last one in this batch if (chrom_comparison < 0 || (chrom_comparison == 0 && position_comparison < 0)) { *max_chromosome_merged = current_chromosome; *max_position_merged = current_position; } else { assert(current_chromosome); free(current_chromosome); } } }