static int gt_regioncov_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GT_UNUSED GtError *err) { GtRange *old_range_ptr, old_range, new_range; GtArray *ranges; GtRegionCovVisitor *regioncov_visitor; gt_error_check(err); regioncov_visitor = gt_regioncov_visitor_cast(nv); ranges = gt_hashmap_get(regioncov_visitor->region2rangelist, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) fn))); gt_assert(ranges); new_range = gt_genome_node_get_range((GtGenomeNode*) fn); if (!gt_array_size(ranges)) gt_array_add(ranges, new_range); else { old_range_ptr = gt_array_get_last(ranges); old_range = *old_range_ptr; old_range.end += regioncov_visitor->max_feature_dist; if (gt_range_overlap(&old_range, &new_range)) { old_range_ptr->end = MAX(old_range_ptr->end, new_range.end); } else gt_array_add(ranges, new_range); } return 0; }
static int select_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtSelectVisitor *select_visitor; gt_error_check(err); select_visitor = select_visitor_cast(nv); if (!gt_str_length(select_visitor->seqid) || /* no seqid was specified */ !gt_str_cmp(select_visitor->seqid, /* or seqids are equal */ gt_genome_node_get_seqid((GtGenomeNode*) rn))) { if (select_visitor->contain_range.start != GT_UNDEF_ULONG) { GtRange range = gt_genome_node_get_range((GtGenomeNode*) rn); if (gt_range_overlap(&range, &select_visitor->contain_range)) { /* an overlapping contain range was defined -> update range */ range.start = MAX(range.start, select_visitor->contain_range.start); range.end = MIN(range.end, select_visitor->contain_range.end); gt_genome_node_set_range((GtGenomeNode*) rn, &range); gt_queue_add(select_visitor->node_buffer, rn); } else /* contain range does not overlap with <rn> range -> delete <rn> */ gt_genome_node_delete((GtGenomeNode*) rn); } else gt_queue_add(select_visitor->node_buffer, rn); } else gt_genome_node_delete((GtGenomeNode*) rn); return 0; }
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2) { agn_assert(cov1 && cov2); gt_array_add_array(cov1, cov2); if(gt_array_size(cov1) > 1) gt_array_sort(cov1, (GtCompare)gt_range_compare); GtArray *runion = gt_array_new(sizeof(GtRange)); if(gt_array_size(cov1) == 0) return runion; GtRange *rng = gt_array_get(cov1, 0); gt_array_add(runion, *rng); GtRange *prev = gt_array_get(runion, 0); if(gt_array_size(cov1) == 1) return runion; GtUword i; for(i = 1; i < gt_array_size(cov1); i++) { rng = gt_array_get(cov1, i); if(gt_range_overlap(rng, prev)) *prev = gt_range_join(rng, prev); else { gt_array_add(runion, *rng); prev = gt_array_get(runion, gt_array_size(runion) - 1); } } return runion; }
static bool filter_overlap_range(GtFeatureNode *fn, GtRange overlap_range) { GtRange feature_range; gt_assert(fn); feature_range = gt_genome_node_get_range((GtGenomeNode*) fn); if (overlap_range.start != GT_UNDEF_ULONG && !gt_range_overlap(&overlap_range, &feature_range)) return true; return false; }
static GtArray* gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment) { agn_assert(genemodel && alignment); GtFeatureNode *genefn = gt_feature_node_cast(genemodel); GtFeatureNode *algnfn = gt_feature_node_cast(alignment); agn_assert(gt_feature_node_has_type(genefn, "mRNA")); GtStrand genestrand = gt_feature_node_get_strand(genefn); GtStrand algnstrand = gt_feature_node_get_strand(algnfn); if(genestrand != algnstrand) return NULL; GtArray *covered_parts = gt_array_new( sizeof(GtRange) ); GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon); GtWord i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn); GtFeatureNode *tempaln; GtRange nullrange = {0, 0}; for(tempaln = gt_feature_node_iterator_next(aniter); tempaln != NULL; tempaln = gt_feature_node_iterator_next(aniter)) { if(gt_feature_node_has_type(tempaln, "match_gap")) continue; GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln); GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange); if(gt_range_compare(&intr, &nullrange) != 0) gt_array_add(covered_parts, intr); } gt_feature_node_iterator_delete(aniter); } gt_array_delete(exons); for(i = 0; i < gt_array_size(covered_parts); i++) { GtRange *r1 = gt_array_get(covered_parts, i); GtUword j; for(j = i+1; j < gt_array_size(covered_parts); j++) { GtRange *r2 = gt_array_get(covered_parts, j); agn_assert(gt_range_overlap(r1, r2) == false); } } return covered_parts; }
bool gt_ranges_do_not_overlap(const GtArray *ranges) { GtUword i; gt_assert(ranges && gt_array_size(ranges)); for (i = 1; i < gt_array_size(ranges); i++) { if (gt_range_overlap(gt_array_get(ranges, i-1), gt_array_get(ranges, i))) return false; } return true; }
static GtRange gaeval_visitor_range_intersect(GtRange *r1, GtRange *r2) { agn_assert(r1 && r2); if(gt_range_overlap(r1, r2)) { GtRange inter = *r1; if(r2->start > inter.start) inter.start = r2->start; if(r2->end < inter.end) inter.end = r2->end; return inter; } GtRange nullrange = {0, 0}; return nullrange; }
static void enrich_chain(GthChain *chain, GtFragment *fragments, unsigned long num_of_fragments, bool comments, GtFile *outfp) { GtRange genomicrange, fragmentrange; GtArray *enrichment; unsigned long i; gt_assert(chain && fragments && num_of_fragments); if (comments) { gt_file_xprintf(outfp, "%c enrich global chain with the following " "forward ranges:\n",COMMENTCHAR); gt_file_xprintf(outfp, "%c ", COMMENTCHAR); gt_ranges_show(chain->forwardranges, outfp); } /* get genomic range of DP range */ genomicrange = chain_get_genomicrange(chain); enrichment = gt_array_new(sizeof (GtRange)); /* add each fragment which overlaps which DP range to the enrichment */ for (i = 0; i < num_of_fragments; i++) { fragmentrange.start = fragments[i].startpos2; fragmentrange.end = fragments[i].endpos2; if (gt_range_overlap(&genomicrange, &fragmentrange)) gt_array_add(enrichment, fragmentrange); } gt_assert(gt_array_size(enrichment)); /* sort the enrichment */ qsort(gt_array_get_space(enrichment), gt_array_size(enrichment), sizeof (GtRange), (GtCompare) gt_range_compare); /* reset the current DP range array */ gt_array_reset(chain->forwardranges); /* rebuild the DP range array which now includes the enrichment */ genomicrange = *(GtRange*) gt_array_get_first(enrichment); gt_array_add(chain->forwardranges, genomicrange); for (i = 1; i < gt_array_size(enrichment); i++) { genomicrange = *(GtRange*) gt_array_get(enrichment, i); if (genomicrange.start <= ((GtRange*) gt_array_get_last(chain->forwardranges))->end) { /* overlap found -> modify last range, if necessary */ if (((GtRange*) gt_array_get_last(chain->forwardranges))->end < genomicrange.end) { ((GtRange*) gt_array_get_last(chain->forwardranges))->end = genomicrange.end; } } else { /* save range */ gt_array_add(chain->forwardranges, genomicrange); } } gt_array_delete(enrichment); }
static bool infer_cds_visitor_infer_range(GtRange *exon_range, GtRange *leftcodon_range, GtRange *rightcodon_range, GtRange *cds_range) { cds_range->start = 0; cds_range->end = 0; // UTR if(exon_range->end < leftcodon_range->start || exon_range->start > rightcodon_range->end) return false; bool overlap_left = gt_range_overlap(exon_range, leftcodon_range); bool overlap_right = gt_range_overlap(exon_range, rightcodon_range); if(overlap_left && overlap_right) { cds_range->start = leftcodon_range->start; cds_range->end = rightcodon_range->end; } else if(overlap_left) { cds_range->start = leftcodon_range->start; cds_range->end = exon_range->end; } else if(overlap_right) { cds_range->start = exon_range->start; cds_range->end = rightcodon_range->end; } else { cds_range->start = exon_range->start; cds_range->end = exon_range->end; } return true; }
bool gt_range_overlap_delta(const GtRange *range_a, const GtRange *range_b, GtUword delta) { GtUword range_a_length, range_b_length; gt_assert(range_a->start <= range_a->end && range_b->start <= range_b->end); range_a_length = range_a->end - range_a->start + 1; range_b_length = range_b->end - range_b->start + 1; if (range_a_length < delta || range_b_length < delta) { /* no overlap of delta possible */ return false; } if (gt_range_overlap(range_a, range_b)) { if (range_a->start <= range_b->start) { if (range_a->end >= range_b->end) { /* ----A---- ---B--- */ if (range_b_length >= delta) return true; } else { /* range_a->end < range_b->end */ /* ----A---- ----B---- */ if (range_a->end - range_b->start + 1 >= delta) return true; } } else { /* range_a->start > range_b->start */ if (range_a->end <= range_b->end) { /* ---A--- ----B---- */ if (range_a_length >= delta) return true; } else { /* range_a->end > range_b->end */ /* ----A---- ----B---- */ if (range_b->end - range_a->start + 1 >= delta) return true; } } } return false; }
GtArray* agn_feature_neighbors(GtGenomeNode *feature, GtArray *feature_set) { GtArray *neighbors = gt_array_new( sizeof(GtGenomeNode *) ); GtUword i; for(i = 0; i < gt_array_size(feature_set); i++) { GtGenomeNode *other = *(GtGenomeNode **)gt_array_get(feature_set, i); if(other != feature) { GtRange feature_range = gt_genome_node_get_range(feature); GtRange other_range = gt_genome_node_get_range(other); if(gt_range_overlap(&feature_range, &other_range) == false) gt_array_add(neighbors, other); } } return neighbors; }
static GtArray* make_list_of_chain_fragments(GtChain *chain, GtFragment *fragments, unsigned long num_of_fragments, bool enrichchains, const GtRange *genomicrange) { unsigned long i, fragnum; GtArray *chain_fragments; GthJTMatch match; gt_assert(chain && fragments && num_of_fragments); chain_fragments = gt_array_new(sizeof (GthJTMatch)); if (!enrichchains) { /* no chain enrichment used -> store all fragments from chain */ for (i = 0; i < gt_chain_size(chain); i++) { fragnum = gt_chain_get_fragnum(chain, i); match.gen_range.start = fragments[fragnum].startpos2; match.gen_range.end = fragments[fragnum].endpos2; match.ref_range.start = fragments[fragnum].startpos1; match.ref_range.end = fragments[fragnum].endpos1; gt_array_add(chain_fragments, match); } } else { GtRange fragmentrange; /* chain enrichment used -> store all fragments which overlap with genomic range of computed chain */ for (i = 0; i < num_of_fragments; i++) { fragmentrange.start = fragments[i].startpos2; fragmentrange.end = fragments[i].endpos2; if (gt_range_overlap(genomicrange, &fragmentrange)) { match.gen_range.start = fragments[i].startpos2; match.gen_range.end = fragments[i].endpos2; match.ref_range.start = fragments[i].startpos1; match.ref_range.end = fragments[i].endpos1; gt_array_add(chain_fragments, match); } } } return chain_fragments; }
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSNPAnnotatorStream *sas; int had_err = 0; bool complete_cluster = false; GtGenomeNode *mygn = NULL; GtFeatureNode *fn = NULL; const char *snv_type = gt_symbol(gt_ft_SNV), *snp_type = gt_symbol(gt_ft_SNP), *gene_type = gt_symbol(gt_ft_gene); gt_error_check(err); sas = gt_snp_annotator_stream_cast(ns); /* if there are still SNPs left in the buffer, output them */ if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); return had_err; } else complete_cluster = false; while (!had_err && !complete_cluster) { had_err = gt_node_stream_next(sas->merge_stream, &mygn, err); /* stop if stream is at the end */ if (had_err || !mygn) break; /* process all feature nodes */ if ((fn = gt_feature_node_try_cast(mygn))) { GtGenomeNode *addgn; const char *type = gt_feature_node_get_type(fn); GtRange new_rng = gt_genome_node_get_range(mygn); if (type == snv_type || type == snp_type) { /* -----> this is a SNP <----- */ if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) { /* it falls into the currently observed range */ gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn)); } else { /* SNP outside a gene, this cluster is done add to out queue and start serving */ gt_assert(gt_queue_size(sas->outqueue) == 0); had_err = snp_annotator_stream_process_current_gene(sas, err); gt_queue_add(sas->outqueue, mygn); if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } else if (type == gene_type) { /* -----> this is a gene <----- */ if (gt_array_size(sas->cur_gene_set) == 0UL) { /* new overlapping gene cluster */ addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_genome_node_get_range(mygn); } else { if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) { /* gene overlaps with current one, add to cluster */ addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng); } else { /* finish current cluster and start a new one */ had_err = snp_annotator_stream_process_current_gene(sas, err); if (!had_err) { addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_genome_node_get_range(mygn); } if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } /* from now on, genes are kept in gene cluster arrays only */ gt_genome_node_delete(mygn); } } else { /* meta node */ had_err = snp_annotator_stream_process_current_gene(sas, err); if (!had_err) { gt_queue_add(sas->outqueue, mygn); } if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } return had_err; }
int gt_canvas_cairo_visit_element(GtCanvas *canvas, GtElement *elem, GtError *err) { int had_err = 0, arrow_status = ARROW_NONE; GtRange elem_range = gt_element_get_range(elem); GtDrawingRange draw_range; double elem_start = GT_UNDEF_DOUBLE, elem_width = GT_UNDEF_DOUBLE, stroke_width = STROKE_WIDTH_DEFAULT, bar_height = BAR_HEIGHT_DEFAULT, arrow_width = ARROW_WIDTH_DEFAULT; GtColor elem_color, grey, fill_color; const char *type; GtStyleQueryStatus rval; GtStr *style; GtStrand strand = gt_element_get_strand(elem); gt_assert(canvas && elem); /* This shouldn't happen. */ if (!gt_range_overlap(&elem_range, &canvas->pvt->viewrange)) return -1; type = gt_element_get_type(elem); grey.red = grey.green = grey.blue = .85; grey.alpha = 0.5; /* get default or image-wide bar height */ if (gt_style_get_num(canvas->pvt->sty, "format", "bar_height", &bar_height, NULL, err) == GT_STYLE_QUERY_ERROR) { return -1; } /* try to get type-specific bar height */ if (gt_style_get_num_with_track(canvas->pvt->sty, type, "bar_height", &bar_height, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } /* get default or image-wide arrow width */ if (gt_style_get_num(canvas->pvt->sty, "format", "arrow_width", &arrow_width, NULL, err)== GT_STYLE_QUERY_ERROR) { return -1; } /* try to get type-specific arrow width */ if (gt_style_get_num_with_track(canvas->pvt->sty, type, "arrow_width", &arrow_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if ((strand == GT_STRAND_REVERSE || strand == GT_STRAND_BOTH) /*&& delem == gt_dlist_first(elems)*/) arrow_status = ARROW_LEFT; if ((strand == GT_STRAND_FORWARD || strand == GT_STRAND_BOTH) /*&& gt_dlistelem_next(delem) == NULL*/) arrow_status = (arrow_status == ARROW_LEFT ? ARROW_BOTH : ARROW_RIGHT); gt_log_log("processing element from %lu to %lu, strand %d\n", elem_range.start, elem_range.end, (int) strand); draw_range = gt_coords_calc_generic_range(elem_range, canvas->pvt->viewrange); draw_range.start *= (canvas->pvt->width-2*canvas->pvt->margins); draw_range.end *= (canvas->pvt->width-2*canvas->pvt->margins); elem_start = draw_range.start + canvas->pvt->margins; elem_width = draw_range.end - draw_range.start; gt_assert(elem_start != GT_UNDEF_DOUBLE && elem_width != GT_UNDEF_DOUBLE); if (gt_element_is_marked(elem)) { if (gt_style_get_color_with_track(canvas->pvt->sty, type, "stroke_marked", &elem_color, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num_with_track(canvas->pvt->sty, "format", "stroke_marked_width", &stroke_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } } else { if (gt_style_get_color_with_track(canvas->pvt->sty, type, "stroke", &elem_color, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num_with_track(canvas->pvt->sty, "format", "stroke_width", &stroke_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num_with_track(canvas->pvt->sty, type, "stroke_width", &stroke_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } } if (gt_style_get_color_with_track(canvas->pvt->sty, type, "fill", &fill_color, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (canvas->pvt->bt && gt_double_smaller_double(draw_range.end-draw_range.start, 1.1)) { if ((unsigned long) draw_range.start > gt_bittab_size(canvas->pvt->bt)) return had_err; if (gt_bittab_bit_is_set(canvas->pvt->bt, (unsigned long) draw_range.start)) return had_err; gt_graphics_draw_vertical_line(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_color, bar_height, stroke_width); gt_bittab_set_bit(canvas->pvt->bt, (unsigned long) draw_range.start); } /* register coordinates in GtImageInfo object if available */ if (canvas->pvt->ii) { GtRecMap *rm = gt_rec_map_new(elem_start, canvas->pvt->y - bar_height/2, elem_start+elem_width, canvas->pvt->y+bar_height/2, (GtFeatureNode*) gt_element_get_node_ref(elem)); gt_image_info_add_rec_map(canvas->pvt->ii, rm); } if (canvas->pvt->bt && draw_range.end-draw_range.start <= 1.1) { return had_err; } gt_log_log("drawing element from %f to %f, arrow status: %d", draw_range.start, draw_range.end, arrow_status); /* draw each element according to style set in the style */ style = gt_str_new(); rval = gt_style_get_str_with_track(canvas->pvt->sty, type, "style", style, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err); switch (rval) { case GT_STYLE_QUERY_NOT_SET: gt_str_set(style, "box"); /* default style */ break; case GT_STYLE_QUERY_ERROR: gt_str_delete(style); gt_assert(gt_error_is_set(err)); return -1; default: break; } if (strcmp(gt_str_get(style), "box") == 0) { gt_graphics_draw_box(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, fill_color, arrow_status, arrow_width, stroke_width, elem_color, false); } else if (strcmp(gt_str_get(style), "rectangle") == 0) { gt_graphics_draw_box(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, fill_color, ARROW_NONE, arrow_width, stroke_width, elem_color, false); } else if (strcmp(gt_str_get(style), "caret") == 0) { gt_graphics_draw_caret(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, ARROW_NONE, arrow_width, stroke_width, elem_color); } else if (strcmp(gt_str_get(style), "dashes") == 0) { gt_graphics_draw_dashes(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, arrow_status, arrow_width, stroke_width, elem_color); } else if (strcmp(gt_str_get(style), "line") == 0) { gt_graphics_draw_horizontal_line(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_color, elem_width, 1.0); } else { gt_graphics_draw_box(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, fill_color, arrow_status, arrow_width, stroke_width, elem_color, false); } gt_str_delete(style); /* draw arrowheads at clipped margins */ if (draw_range.clip == CLIPPED_LEFT || draw_range.clip == CLIPPED_BOTH) gt_graphics_draw_arrowhead(canvas->pvt->g, canvas->pvt->margins - 10, canvas->pvt->y - 4, grey, ARROW_LEFT); if (draw_range.clip == CLIPPED_RIGHT || draw_range.clip == CLIPPED_BOTH) gt_graphics_draw_arrowhead(canvas->pvt->g, canvas->pvt->width-canvas->pvt->margins + 10, canvas->pvt->y - 4, grey, ARROW_RIGHT); return had_err; }
int gt_interval_tree_unit_test(GT_UNUSED GtError *err) { GtIntervalTree *it = NULL; GtIntervalTreeNode *res = NULL; unsigned long i = 0; int had_err = 0, num_testranges = 3000, num_samples = 300000, num_find_all_samples = 10000, gt_range_max_basepos = 90000, width = 700, query_width = 5000; GtRange *res_rng = NULL, qrange; GtArray *arr = NULL, *narr = NULL; arr = gt_array_new(sizeof (GtRange*)); /* generate test ranges */ for (i = 0;i<num_testranges;i++) { unsigned long start; GtRange *rng; rng = gt_calloc(1, sizeof (GtRange)); start = gt_rand_max(gt_range_max_basepos); rng->start = start; rng->end = start + gt_rand_max(width); gt_array_add(arr, rng); } it = gt_interval_tree_new(gt_free_func); /* insert ranges */ for (i = 0; i < num_testranges && !had_err; i++) { GtIntervalTreeNode *new_node; GtRange *rng; rng = *(GtRange**) gt_array_get(arr, i); new_node = gt_interval_tree_node_new(rng, rng->start, rng->end); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); /* perform test queries */ for (i = 0; i < num_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(width); res = gt_interval_tree_find_first_overlapping(it, qrange.start, qrange.end); if (res) { /* we have a hit, check if really overlapping */ res_rng = (GtRange*) gt_interval_tree_node_get_data(res); gt_ensure(had_err, gt_range_overlap(&qrange, res_rng)); } else { /* no hit, check whether there really is no overlapping interval in tree */ GtRange *this_rng; unsigned long j; bool found = false; for (j = 0; j < gt_array_size(arr); j++) { this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { found = true; break; } } gt_ensure(had_err, !found); } } /* test searching for all overlapping intervals */ for (i = 0; i < num_find_all_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(query_width); GtArray *res = gt_array_new(sizeof (GtRange*)); gt_interval_tree_find_all_overlapping(it, qrange.start, qrange.end, res); if (res) { /* generate reference overlapping interval list by linear search */ GtArray *ref; unsigned long j; ref = gt_array_new(sizeof (GtRange*)); for (j = 0; j < gt_array_size(arr); j++) { GtRange *this_rng; this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { gt_array_add(ref, this_rng); } } /* compare reference with interval tree query result */ gt_array_sort_stable(ref, range_ptr_compare); gt_array_sort_stable(res, range_ptr_compare); /* must be equal */ gt_ensure(had_err, gt_array_cmp(ref, res)==0); gt_array_delete(ref); } gt_array_delete(res); } gt_interval_tree_delete(it); it = gt_interval_tree_new(NULL); gt_array_reset(arr); /* generate test ranges */ for (i = 0;i<num_testranges && !had_err;i++) { unsigned long start; GtIntervalTreeNode *new_node; start = gt_rand_max(gt_range_max_basepos); new_node = gt_interval_tree_node_new((void*) i, start, start + gt_rand_max(width)); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); narr = gt_array_new(sizeof (GtIntervalTreeNode*)); for (i = 0; i < num_testranges && !had_err; i++) { unsigned long idx, n, val; GtIntervalTreeNode *node = NULL; /* get all nodes referenced by the interval tree */ interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); /* remove a random node */ idx = gt_rand_max(gt_array_size(narr)-1); node = *(GtIntervalTreeNode**) gt_array_get(narr, idx); gt_ensure(had_err, node != NULL); val = (unsigned long) gt_interval_tree_node_get_data(node); gt_interval_tree_remove(it, node); gt_array_reset(narr); /* make sure that the node has disappeared */ gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges - (i+1)); interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); gt_ensure(had_err, gt_array_size(narr) == num_testranges - (i+1)); for (n = 0; !had_err && n < gt_array_size(narr); n++) { GtIntervalTreeNode *onode = *(GtIntervalTreeNode**) gt_array_get(narr, n); gt_ensure(had_err, (unsigned long) gt_interval_tree_node_get_data(onode) != val); } } gt_array_delete(arr); gt_array_delete(narr); gt_interval_tree_delete(it); return had_err; }
static void infer_cds_visitor_infer_utrs(AgnInferCDSVisitor *v) { GtFeatureNode *start_codon, *stop_codon; bool exonsexplicit = gt_array_size(v->exons) > 0; bool cdsexplicit = gt_array_size(v->cds) > 0; bool startcodon_check = gt_array_size(v->starts) == 1 && (start_codon = gt_array_get(v->starts, 0)) != NULL; bool stopcodon_check = gt_array_size(v->stops) == 1 && (stop_codon = gt_array_get(v->stops, 0)) != NULL; bool caninferutrs = exonsexplicit && startcodon_check && stopcodon_check; if(gt_array_size(v->utrs) > 0) { return; } else if(!cdsexplicit && !caninferutrs) { return; } GtGenomeNode **leftcodon = gt_array_get(v->starts, 0); GtGenomeNode **rightcodon = gt_array_get(v->stops, 0); GtStrand strand = gt_feature_node_get_strand(v->mrna); const char *lefttype = "five_prime_UTR"; const char *righttype = "three_prime_UTR"; if(strand == GT_STRAND_REVERSE) { lefttype = "three_prime_UTR"; righttype = "five_prime_UTR"; void *temp = leftcodon; leftcodon = rightcodon; rightcodon = temp; } GtRange leftrange = gt_genome_node_get_range(*leftcodon); GtRange rightrange = gt_genome_node_get_range(*rightcodon); GtUword i; for(i = 0; i < gt_array_size(v->exons); i++) { GtGenomeNode **exon = gt_array_get(v->exons, i); GtRange exonrange = gt_genome_node_get_range(*exon); if(exonrange.start < leftrange.start) { GtRange utrrange; if(gt_range_overlap(&exonrange, &leftrange)) { utrrange.start = exonrange.start; utrrange.end = leftrange.start - 1; } else { utrrange = exonrange; } GtGenomeNode *utr = gt_feature_node_new(gt_genome_node_get_seqid(*exon), lefttype, utrrange.start, utrrange.end, strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)utr, v->source); gt_feature_node_add_child(v->mrna, (GtFeatureNode *)utr); gt_array_add(v->utrs, utr); } if(exonrange.end > rightrange.end) { GtRange utrrange; if(gt_range_overlap(&exonrange, &rightrange)) { utrrange.start = rightrange.end + 1; utrrange.end = exonrange.end; } else { utrrange = exonrange; } GtGenomeNode *utr = gt_feature_node_new(gt_genome_node_get_seqid(*exon), righttype, utrrange.start, utrrange.end, strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)utr, v->source); gt_feature_node_add_child(v->mrna, (GtFeatureNode *)utr); gt_array_add(v->utrs, utr); } } }
static void* gt_feature_index_unit_test_query(void *data) { GtFeatureIndexTestShared *shm = (GtFeatureIndexTestShared*) data; GtRange rng; GtError *err = shm->err; GtUword i; int had_err = 0; GtArray *arr, *arr_ref; gt_mutex_lock(shm->mutex); if (gt_error_is_set(shm->err)) { gt_mutex_unlock(shm->mutex); return NULL; } gt_mutex_unlock(shm->mutex); arr = gt_array_new(sizeof (GtFeatureNode*)); arr_ref = gt_array_new(sizeof (GtFeatureNode*)); rng.start = random() % (GT_FI_TEST_END - GT_FI_TEST_QUERY_WIDTH); rng.end = rng.start + random() % (GT_FI_TEST_QUERY_WIDTH); /* get reference set by linear search */ gt_mutex_lock(shm->mutex); for (i=0; i<GT_FI_TEST_FEATURES_PER_THREAD * gt_jobs; i++) { GtRange rng2; GtFeatureNode *fn; fn = *(GtFeatureNode**) gt_array_get(shm->nodes, i); rng2 = gt_genome_node_get_range((GtGenomeNode*) fn); if (gt_range_overlap(&rng, &rng2)) { gt_array_add(arr_ref, fn); } } gt_mutex_unlock(shm->mutex); /* query feature index */ gt_feature_index_get_features_for_range(shm->fi, arr, GT_FI_TEST_SEQID, &rng, err); /* result size must be equal */ if (gt_array_size(arr) != gt_array_size(arr_ref)) had_err = -1; /* nodes must be the same (note that we should not rely on ptr equality) */ if (!had_err) { gt_array_sort(arr_ref, cmp_range_start); gt_array_sort(arr , cmp_range_start); for (i=0;i<gt_array_size(arr);i++) { if (had_err) break; if (!gt_feature_node_is_similar(*(GtFeatureNode**) gt_array_get(arr, i), *(GtFeatureNode**) gt_array_get(arr_ref, i))) { had_err = -1; } } } if (had_err) { gt_mutex_lock(shm->mutex); shm->error_count++; gt_mutex_unlock(shm->mutex); } gt_array_delete(arr); gt_array_delete(arr_ref); return NULL; }
static bool compatible(const ConsensusSA *csa, unsigned long sa_1, unsigned long sa_2) { GtArray *exons_sa_1, *exons_sa_2; GtRange range_sa_1, range_sa_2; unsigned long i, j, num_of_exons_1, num_of_exons_2, start_1 = GT_UNDEF_ULONG, start_2 = GT_UNDEF_ULONG; bool start_values_set = false; const unsigned long fuzzlength = 0; /* XXX */ gt_assert(csa); /* check strands */ if (extract_strand(csa, sa_1) != extract_strand(csa, sa_2)) return false; /* init */ exons_sa_1 = gt_array_new(sizeof (GtRange)); exons_sa_2 = gt_array_new(sizeof (GtRange)); /* get ranges */ range_sa_1 = extract_genomic_range(csa, sa_1); range_sa_2 = extract_genomic_range(csa, sa_2); if (!gt_range_overlap(&range_sa_1, &range_sa_2)) { gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } /* get exons */ extract_exons(csa, exons_sa_1, sa_1); extract_exons(csa, exons_sa_2, sa_2); /* determine the first overlapping exon pair */ i = 0; j = 0; num_of_exons_1 = gt_array_size(exons_sa_1); num_of_exons_2 = gt_array_size(exons_sa_2); while (i < num_of_exons_1 && j < num_of_exons_2) { if (gt_range_overlap(gt_array_get(exons_sa_1, i), gt_array_get(exons_sa_2, j))) { start_1 = i; start_2 = j; start_values_set = true; break; } if (((GtRange*) gt_array_get(exons_sa_1, i))->start < ((GtRange*) gt_array_get(exons_sa_2, j))->start) { i++; } else j++; } if (!start_values_set) { gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } /* from now on the start values are set */ gt_assert(start_1 != GT_UNDEF_ULONG && start_2 != GT_UNDEF_ULONG); if (!(start_1 == 0 || start_2 == 0)) { /* no first segment could be maped */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } while (start_1 < num_of_exons_1 && start_2 < num_of_exons_2) { range_sa_1 = *((GtRange*) gt_array_get(exons_sa_1, start_1)); range_sa_2 = *((GtRange*) gt_array_get(exons_sa_2, start_2)); if (gt_range_overlap(&range_sa_1, &range_sa_2)) { /* analyze acceptor sites */ /* see if at least one exon has a acceptor site (on the left). in this case additional checks have to be performed. Otherwise, this exons are compatible (on the left side) because they overlap */ if (has_acceptor_site(exons_sa_1, start_1) || has_acceptor_site(exons_sa_2, start_2)) { if (has_acceptor_site(exons_sa_1, start_1) && has_acceptor_site(exons_sa_2, start_2) && range_sa_1.start!= range_sa_2.start) { /* the acceptor sites are different */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } else if (has_acceptor_site(exons_sa_1, start_1) && range_sa_2.start + fuzzlength < range_sa_1.start) { /* not within fuzzlength */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } else if (has_acceptor_site(exons_sa_2, start_2) && range_sa_1.start + fuzzlength < range_sa_2.start) { /* not within fuzzlength */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } } /* analyze donor sites */ /* see if at least one exon has a donor site (on the right). in this case additional checks have to be performed. Otherwise, this exons are compatible (on the right side) because they overlap */ if (has_donor_site(exons_sa_1, start_1) || has_donor_site(exons_sa_2, start_2)) { if (has_donor_site(exons_sa_1, start_1) && has_donor_site(exons_sa_2, start_2) && range_sa_1.end != range_sa_2.end) { /* the donor sites are different */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } else if (has_donor_site(exons_sa_1, start_1) && range_sa_2.end - fuzzlength > range_sa_1.end) { /* not within fuzzlength */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } else if (has_donor_site(exons_sa_2, start_2) && range_sa_1.end - fuzzlength > range_sa_2.end) { /* not within fuzzlength */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } } } else { /* no overlap: two ordered segments do not overlap each other */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return false; } start_1++; start_2++; } /* passed all tests */ gt_array_delete(exons_sa_1); gt_array_delete(exons_sa_2); return true; }
static int process_node(GtDiagram *d, GtFeatureNode *node, GtFeatureNode *parent, GtError *err) { GtRange elem_range; bool *collapse; GtShouldGroupByParent *group; const char *feature_type = NULL, *parent_gft = NULL; double tmp; GtStyleQueryStatus rval; GtUword max_show_width = GT_UNDEF_UWORD, par_max_show_width = GT_UNDEF_UWORD; gt_assert(d && node); gt_log_log(">> getting '%s'", gt_feature_node_get_type(node)); /* skip pseudonodes */ if (gt_feature_node_is_pseudo(node)) return 0; feature_type = gt_feature_node_get_type(node); gt_assert(feature_type); /* discard elements that do not overlap with visible range */ elem_range = gt_genome_node_get_range((GtGenomeNode*) node); if (!gt_range_overlap(&d->range, &elem_range)) return 0; /* get maximal view widths in nucleotides to show this type */ rval = gt_style_get_num(d->style, feature_type, "max_show_width", &tmp, NULL, err); switch (rval) { case GT_STYLE_QUERY_OK: max_show_width = tmp; break; case GT_STYLE_QUERY_ERROR: return -1; break; /* should never be reached */ default: /* do not change default value */ break; } /* for non-root nodes, get maximal view with to show parent */ if (parent) { if (!gt_feature_node_is_pseudo(parent)) { parent_gft = gt_feature_node_get_type(parent); rval = gt_style_get_num(d->style, parent_gft, "max_show_width", &tmp, NULL, err); switch (rval) { case GT_STYLE_QUERY_OK: par_max_show_width = tmp; break; case GT_STYLE_QUERY_ERROR: return -1; break; /* should never be reached */ default: /* do not change default value */ break; } } else par_max_show_width = GT_UNDEF_UWORD; } /* check if this type is to be displayed at all */ if (max_show_width != GT_UNDEF_UWORD && gt_range_length(&d->range) > max_show_width) { return 0; } /* disregard parent node if it is configured not to be shown */ if (parent && par_max_show_width != GT_UNDEF_UWORD && gt_range_length(&d->range) > par_max_show_width) { parent = NULL; } /* check if this is a collapsing type, cache result */ if ((collapse = (bool*) gt_hashmap_get(d->collapsingtypes, feature_type)) == NULL) { collapse = gt_malloc(sizeof (bool)); *collapse = false; if (gt_style_get_bool(d->style, feature_type, "collapse_to_parent", collapse, NULL, err) == GT_STYLE_QUERY_ERROR) { gt_free(collapse); return -1; } gt_hashmap_add(d->collapsingtypes, (void*) feature_type, collapse); } /* check if type should be grouped by parent, cache result */ if ((group = (GtShouldGroupByParent*) gt_hashmap_get(d->groupedtypes, feature_type)) == NULL) { bool tmp; group = gt_malloc(sizeof (GtShouldGroupByParent)); rval = gt_style_get_bool(d->style, feature_type, "group_by_parent", &tmp, NULL, err); switch (rval) { case GT_STYLE_QUERY_OK: if (tmp) *group = GT_GROUP_BY_PARENT; else *group = GT_DO_NOT_GROUP_BY_PARENT; break; case GT_STYLE_QUERY_NOT_SET: *group = GT_UNDEFINED_GROUPING; break; case GT_STYLE_QUERY_ERROR: gt_free(group); return -1; break; /* should never be reached */ } gt_hashmap_add(d->groupedtypes, (void*) feature_type, group); } /* decide where to place this feature: */ if (*collapse) { /* user has specified collapsing to parent for this type */ if (parent && !gt_feature_node_is_pseudo(parent)) { /* collapsing child nodes are added to upwards blocks, but never collapse into pseudo nodes */ add_recursive(d, node, parent, node); } else { /* if no parent or only pseudo-parent, do not collapse */ if (add_to_current(d, node, parent, err) < 0) { return -1; } } } else /* (!*collapse) */ { if (parent) { bool do_not_overlap = false; do_not_overlap = gt_feature_node_direct_children_do_not_overlap_st(parent, node); if (*group == GT_GROUP_BY_PARENT || (do_not_overlap && *group == GT_UNDEFINED_GROUPING)) { if (gt_feature_node_is_pseudo(parent) && gt_feature_node_is_multi(node)) { if (add_to_rep(d, node, parent, err) < 0) { return -1; } } else if (gt_feature_node_number_of_children(parent) > 1) { if (add_to_parent(d, node, parent, err) < 0) { return -1; } } else { if (add_to_current(d, node, parent, err) < 0) { return -1; } } } else { if (gt_feature_node_is_pseudo(parent) && gt_feature_node_is_multi(node)) { if (add_to_rep(d, node, parent, err) < 0) { return -1; } } else { if (add_to_current(d, node, parent, err) < 0) { return -1; } } } } else { /* root nodes always get their own block */ if (add_to_current(d, node, parent, err) < 0) { return -1; } } } /* we can now assume that this node (or its representative) has been processed into the reverse lookup structure */ #ifndef NDEBUG if (gt_feature_node_is_multi(node)) { GtFeatureNode *rep; rep = gt_feature_node_get_multi_representative((GtFeatureNode*) node); gt_assert(gt_hashmap_get(d->nodeinfo, rep)); } else gt_assert(gt_hashmap_get(d->nodeinfo, node)); #endif return 0; }
static int snp_annotator_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { int had_err = 0; GtSNPAnnotatorVisitor *sav; GtFeatureNodeIterator *fni, *mrnafni; GtFeatureNode *curnode, *curnode2; GtRange snp_rng; gt_error_check(err); sav = snp_annotator_visitor_cast(nv); /* ignore non-nodes */ if (!fn) return 0; /* only process SNPs */ if (!(gt_feature_node_get_type(fn) == sav->SNV_type || gt_feature_node_get_type(fn) == sav->SNP_type)) { return 0; } fni = gt_feature_node_iterator_new_direct(sav->gene); snp_rng = gt_genome_node_get_range((GtGenomeNode*) fn); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_get_type(curnode) == sav->mRNA_type) { GtStrand mrna_strand = gt_feature_node_get_strand(curnode); #ifndef NDEBUG const char *refstr; #endif GtUword mrnasnppos = 0; mrnafni = gt_feature_node_iterator_new(curnode); while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) { if (gt_feature_node_get_type(curnode2) == sav->CDS_type) { GtRange cds_rng = gt_genome_node_get_range((GtGenomeNode*) curnode2); if (gt_range_overlap(&snp_rng, &cds_rng)) { char *mRNA, origchar; char *variantchars, *variantptr = NULL; GT_UNUSED char *refchars, *refptr = NULL; mRNA = (char*) gt_hashmap_get(sav->rnaseqs, curnode); gt_assert(mRNA); gt_assert(snp_rng.start >= cds_rng.start); mrnasnppos += (snp_rng.start - cds_rng.start); if (mrna_strand == GT_STRAND_REVERSE) mrnasnppos = strlen(mRNA) - mrnasnppos - 1; gt_assert(mrnasnppos < strlen(mRNA)); origchar = mRNA[mrnasnppos]; #ifndef NDEBUG refstr = refptr = gt_cstr_dup(gt_feature_node_get_attribute(fn, GT_GVF_REFERENCE_SEQ)); if (!had_err && refstr) { if (gt_feature_node_get_strand(curnode) == GT_STRAND_REVERSE) { int rval = gt_complement(&origchar, origchar, err); gt_assert(rval == 0); } gt_assert(toupper(origchar) == toupper(refstr[0])); } #endif variantchars = variantptr = gt_cstr_dup( gt_feature_node_get_attribute(fn, GT_GVF_VARIANT_SEQ)); if (!had_err && variantchars) { GtUword i = 0; while (!had_err && (*variantchars != ';' && *variantchars != '\0')) { if (*variantchars != ',' && *variantchars != origchar) { char variantchar = *variantchars; #ifndef NDEBUG char refchar = refstr ? refstr[0] : '-'; /* XXX */ if (!had_err && mrna_strand == GT_STRAND_REVERSE) had_err = gt_complement(&refchar, refchar, err); #endif if (!had_err && mrna_strand == GT_STRAND_REVERSE) had_err = gt_complement(&variantchar, variantchar, err); if (!had_err) { had_err = snp_annotator_classify_snp(sav, curnode, fn, mrnasnppos, i++, variantchar, #ifndef NDEBUG refchar, #endif err); } } else if (*variantchars == origchar) { i++; } variantchars++; } gt_free(variantptr); gt_free(refptr); } } else { mrnasnppos += gt_range_length(&cds_rng); } } } gt_feature_node_iterator_delete(mrnafni); } } gt_feature_node_iterator_delete(fni); return had_err; }