int gt_bed_parser_parse(GtBEDParser *bed_parser, GtQueue *genome_nodes, const char *filename, GtError *err) { GtIO *bed_file; int had_err; gt_error_check(err); gt_assert(bed_parser && genome_nodes); bed_file = gt_io_new(filename, "r"); /* parse BED file */ had_err = parse_bed_file(bed_parser, bed_file, err); /* process created region and feature nodes */ gt_region_node_builder_build(bed_parser->region_node_builder, genome_nodes); gt_region_node_builder_reset(bed_parser->region_node_builder); while (gt_queue_size(bed_parser->feature_nodes)) gt_queue_add(genome_nodes, gt_queue_get(bed_parser->feature_nodes)); gt_io_delete(bed_file); return had_err; }
double* test_intersection(char *universe_file_name, char *source_file_name, char *target_file_name, int iters) { struct timeval t_start,t_end; int chrom_num = 24; /***********************REPLACE WITH INPUT FILE************************/ char *chrom_names[] = { "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY" }; /**********************************************************************/ struct chr_list universe[chrom_num], source[chrom_num], target[chrom_num]; // initialize chrom lists int i; for (i = 0; i < chrom_num; i++) { universe[i] = new_chr_list(chrom_names[i]); source[i] = new_chr_list(chrom_names[i]); target[i] = new_chr_list(chrom_names[i]); } // chr_lists need to be sorted before used qsort(universe, chrom_num, sizeof(struct chr_list), compare_chr_lists); qsort(source, chrom_num, sizeof(struct chr_list), compare_chr_lists); qsort(target, chrom_num, sizeof(struct chr_list), compare_chr_lists); FILE *universe_file = fopen(universe_file_name, "r"); FILE *source_file = fopen(source_file_name, "r"); FILE *target_file = fopen(target_file_name, "r"); if ( (universe_file == NULL) || (source_file == NULL) || (target_file == NULL) ) { fprintf(stderr, "%s\n", strerror(errno)); return 0; } parse_bed_file(universe_file, universe, chrom_num); parse_bed_file(source_file, source, chrom_num); parse_bed_file(target_file, target, chrom_num); fclose(universe_file); fclose(source_file); fclose(target_file); trim(universe, source, chrom_num); trim(universe, target, chrom_num); // Calculate the offsets for each iterval int c = 0; int max = 0; for (i = 0; i < chrom_num; i++) { struct interval_node *curr = universe[i].head; while (curr != NULL) { curr->offset = c; int end = c + curr->end - curr->start; if (end > max) max = end; c += curr->end - curr->start; curr = curr->next; } } int total_size = 0, target_size = 0, source_size = 0; /* * Get the total number of intervals in the source and target sets, we also * store the number of intervals individually to be used later for * randomization */ for (i = 0; i < chrom_num; i++) { total_size += source[i].size; total_size += target[i].size; target_size += target[i].size; source_size += source[i].size; } /* * get an array of just the target intervals, each random permutation will * consist of these intervals and a set of randomly generated intervals */ struct interval *target_intervals = (struct interval *) malloc( 2 * target_size * sizeof(struct interval) ); i = 0; int pushed_targets = push_intervals(chrom_num, &i, universe, target, target_intervals, 0); /* * we need will permute the intervals in target, to manage this we will * create and array of the interval sizes in source */ int *rand_sizes = (int *) malloc( source_size * sizeof(int) ); int j = 0; for (i = 0; i < chrom_num; i++) { struct interval_node *curr = source[i].head; while (curr != NULL) { rand_sizes[j++] = curr->end - curr->start; curr = curr->next; } } /* * set up an array with target and source to find the observed number of * intersections */ struct interval *intervals = (struct interval *) malloc( 2 * total_size * sizeof(struct interval) ); for (i = 0; i < 2 * target_size; i++) intervals[i] = target_intervals[i]; int pushed_sources = push_intervals(chrom_num, &i, universe, source, intervals, 1); gettimeofday(&t_start,0); int obs = get_intersections(intervals, total_size); gettimeofday(&t_end,0); double p_of_source = (double)obs / (double)source_size; double p_of_target = (double)obs / (double)target_size; int r = 0; int sum = 0; // do some random stuff struct interval rand_start, rand_end; rand_start.type = 's'; rand_end.type = 'e'; rand_start.sample = 1; rand_end.sample = 1; for (i = 0; i < iters; i++) { for (j = 0; j < 2 * target_size; j++) intervals[j] = target_intervals[j]; for (j = 0; j < source_size; j++) { rand_start.offset = rand() % (max - rand_sizes[j]); rand_end.offset = rand_start.offset + rand_sizes[j]; intervals[ 2 * target_size + 2 * j ] = rand_start; intervals[ 2 * target_size + 2 * j + 1 ] = rand_end; } int t = get_intersections(intervals, total_size); sum += t; if ( t >= obs ) ++r; } double p = ( (double)(r + 1) ) / ( (double)(iters + 1) ); double mean = ( (double)(sum) ) / ( (double)(iters) ); double *ret = (double *) malloc(5 * sizeof(double)); ret[0] = obs; ret[1] = mean; ret[2] = p; ret[3] = p_of_source; ret[4] = p_of_target; free_chr_list(universe, chrom_num); free_chr_list(source, chrom_num); free_chr_list(target, chrom_num); free(target_intervals); free(rand_sizes); free(intervals); return ret; }