static double gaeval_visitor_introns_confirmed(GtArray *introns, GtArray *gaps) { agn_assert(introns && gaps); GtUword intron_count = gt_array_size(introns); GtUword gap_count = gt_array_size(gaps); agn_assert(intron_count > 0); if(gap_count == 0) return 0.0; GtUword i, j, num_confirmed = 0; for(i = 0; i < intron_count; i++) { GtGenomeNode *intron = *(GtGenomeNode **)gt_array_get(introns, i); GtRange intron_range = gt_genome_node_get_range(intron); for(j = 0; j < gap_count; j++) { GtGenomeNode *gap = *(GtGenomeNode **)gt_array_get(gaps, j); GtRange gap_range = gt_genome_node_get_range(gap); if(gt_range_compare(&intron_range, &gap_range) == 0) { num_confirmed++; break; } } } return (double)num_confirmed / (double)intron_count; }
static GtArray* gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment) { agn_assert(genemodel && alignment); GtFeatureNode *genefn = gt_feature_node_cast(genemodel); GtFeatureNode *algnfn = gt_feature_node_cast(alignment); agn_assert(gt_feature_node_has_type(genefn, "mRNA")); GtStrand genestrand = gt_feature_node_get_strand(genefn); GtStrand algnstrand = gt_feature_node_get_strand(algnfn); if(genestrand != algnstrand) return NULL; GtArray *covered_parts = gt_array_new( sizeof(GtRange) ); GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon); GtWord i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn); GtFeatureNode *tempaln; GtRange nullrange = {0, 0}; for(tempaln = gt_feature_node_iterator_next(aniter); tempaln != NULL; tempaln = gt_feature_node_iterator_next(aniter)) { if(gt_feature_node_has_type(tempaln, "match_gap")) continue; GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln); GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange); if(gt_range_compare(&intr, &nullrange) != 0) gt_array_add(covered_parts, intr); } gt_feature_node_iterator_delete(aniter); } gt_array_delete(exons); for(i = 0; i < gt_array_size(covered_parts); i++) { GtRange *r1 = gt_array_get(covered_parts, i); GtUword j; for(j = i+1; j < gt_array_size(covered_parts); j++) { GtRange *r2 = gt_array_get(covered_parts, j); agn_assert(gt_range_overlap(r1, r2) == false); } } return covered_parts; }
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v) { if(gt_array_size(v->cds) == 0) return; const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID"); unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna); GtStrand strand = gt_feature_node_get_strand(v->mrna); GtRange stoprange; GtUword threeprimeindex = gt_array_size(v->cds) - 1; GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.start = stoprange.end - 2; if(strand == GT_STRAND_REVERSE) { threeprimesegment = gt_array_get(v->cds, 0); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.end = stoprange.start + 2; } if(gt_array_size(v->stops) > 1) { gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid, ln, gt_array_size(v->starts)); } else if(gt_array_size(v->stops) == 1) { GtGenomeNode **codon = gt_array_get(v->stops, 0); GtRange testrange = gt_genome_node_get_range(*codon); if(gt_range_compare(&stoprange, &testrange) != 0) { gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does " "not match explicitly provided stop codon [%lu, %lu] for " "mRNA '%s'", stoprange.start, stoprange.end, testrange.start, testrange.end, mrnaid); } } else // agn_assert(gt_array_size(v->stops) == 0) { GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna); GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon", stoprange.start, stoprange.end, strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source); GtFeatureNode *cf = (GtFeatureNode *)codonfeature; gt_feature_node_add_child(v->mrna, cf); gt_array_add(v->stops, cf); } }
bool gt_ranges_are_sorted(const GtArray *ranges) { GtUword i; gt_assert(ranges); for (i = 1; i < gt_array_size(ranges); i++) { if (gt_range_compare(gt_array_get(ranges, i-1), gt_array_get(ranges, i)) == 1) { return false; } } return true; }
static void gv_test_range_intersect(AgnUnitTest *test) { GtRange nullrange = {0,0}; GtRange r1 = { 500, 750 }; GtRange r2 = { 645, 900 }; GtRange inter1 = { 645, 750 }; GtRange testrange1 = gaeval_visitor_range_intersect(&r1, &r2); agn_unit_test_result(test, "range intersect (1)", gt_range_compare(&inter1, &testrange1) == 0); GtRange r3 = { 500, 750 }; GtRange r4 = { 750, 900 }; GtRange inter2 = { 750, 750 }; GtRange testrange2 = gaeval_visitor_range_intersect(&r3, &r4); agn_unit_test_result(test, "range intersect (2)", gt_range_compare(&inter2, &testrange2) == 0); GtRange r5 = { 500, 750 }; GtRange r6 = { 751, 900 }; GtRange testrange3 = gaeval_visitor_range_intersect(&r5, &r6); agn_unit_test_result(test, "range intersect (3)", gt_range_compare(&nullrange, &testrange3) == 0); }
bool gt_ranges_are_equal(const GtArray *ranges_1, const GtArray *ranges_2) { GtUword i; gt_assert(gt_ranges_are_sorted(ranges_1) && gt_ranges_are_sorted(ranges_2)); if (gt_array_size(ranges_1) != gt_array_size(ranges_2)) return false; for (i = 0; i < gt_array_size(ranges_1); i++) { if (gt_range_compare(gt_array_get(ranges_1, i), gt_array_get(ranges_2, i))) return false; } return true; }
static int range_ptr_compare(const void *r1p, const void *r2p) { int ret; gt_assert(r1p && r2p); ret = gt_range_compare(*(GtRange**) r1p, *(GtRange**) r2p); /* It could be that two identical ranges with different pointers are present. If so, compare pointers instead to get a canonical ordering. */ if (ret == 0 && *(GtRange**) r1p != *(GtRange**) r2p) { if (*(GtRange**) r1p < *(GtRange**) r2p) ret = -1; else ret = 1; } return ret; }
bool gt_block_has_only_one_fullsize_element(const GtBlock *block) { bool ret = false; GtUword bsize; gt_assert(block); bsize = gt_array_size(block->elements); if (bsize == 1) { GtRange elem_range, block_range; gt_assert(*(GtElement**) gt_array_get(block->elements, 0) == *(GtElement**) gt_array_get(block->elements, bsize-1)); elem_range = gt_element_get_range(*(GtElement**) gt_array_get(block->elements, 0)); block_range = gt_block_get_range(block); ret = (gt_range_compare(&block_range, &elem_range) == 0); } return ret; }
int gt_block_compare(const GtBlock *block1, const GtBlock *block2, GT_UNUSED void *data) { GtRange range_a, range_b; int ret = 0; gt_assert(block1 && block2); range_a = gt_block_get_range(block1), range_b = gt_block_get_range(block2); ret = gt_range_compare(&range_a, &range_b); if (ret == 0 && block1 != block2) { GtStr *caption1, *caption2; caption1 = gt_block_get_caption(block1); caption2 = gt_block_get_caption(block2); /* blocks do not necessarily have captions. If both have a caption, we compare them. If only one block has a caption, this block comes first. */ if (caption1 && caption2) ret = strcmp(gt_str_get(caption1), gt_str_get(caption2)); else if (caption1) ret = -1; else if (caption2) ret = 1; } return ret; }
int gt_block_unit_test(GtError *err) { GtRange r1, r2, r_temp, b_range; GtStrand s; GtGenomeNode *gn1, *gn2; GtElement *e1, *e2; double height; GtBlock *b; GtStr *seqid, *caption1, *caption2; int had_err = 0; GtStyle *sty; GtError *testerr; gt_error_check(err); seqid = gt_str_new_cstr("seqid"); caption1 = gt_str_new_cstr("foo"); caption2 = gt_str_new_cstr("bar"); testerr = gt_error_new(); r1.start = 10UL; r1.end = 50UL; r2.start = 40UL; r2.end = 50UL; gn1 = gt_feature_node_new(seqid, gt_ft_gene, r1.start, r1.end, GT_STRAND_FORWARD); gn2 = gt_feature_node_new(seqid, gt_ft_exon, r2.start, r2.end, GT_STRAND_FORWARD); e1 = gt_element_new((GtFeatureNode*) gn1); e2 = gt_element_new((GtFeatureNode*) gn2); b = gt_block_new(); /* test gt_block_insert_elements */ gt_ensure((0UL == gt_block_get_size(b))); gt_block_insert_element(b, (GtFeatureNode*) gn1); gt_ensure((1UL == gt_block_get_size(b))); gt_block_insert_element(b, (GtFeatureNode*) gn2); gt_ensure((2UL == gt_block_get_size(b))); /* test gt_block_set_range & gt_block_get_range */ r_temp = gt_range_join(&r1, &r2); gt_block_set_range(b, r_temp); b_range = gt_block_get_range(b); gt_ensure((0 == gt_range_compare(&b_range, &r_temp))); gt_ensure((1 == gt_range_compare(&r2, &r_temp))); /* tests gt_block_set_caption & gt_block_get_caption */ gt_block_set_caption(b, caption1); gt_ensure((0 == gt_str_cmp(gt_block_get_caption(b), caption1))); gt_ensure((0 != gt_str_cmp(gt_block_get_caption(b), caption2))); /* tests gt_block_set_strand & gt_block_get_range */ s = gt_block_get_strand(b); gt_ensure((GT_STRAND_UNKNOWN == s)); gt_block_set_strand(b, GT_STRAND_FORWARD); s = gt_block_get_strand(b); gt_ensure((GT_STRAND_FORWARD == s)); /* test gt_block_get_max_height() */ sty = gt_style_new(err); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == BAR_HEIGHT_DEFAULT); gt_style_set_num(sty, "exon", "bar_height", 42); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == 42); gt_style_set_num(sty, "gene", "bar_height", 23); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == 42); gt_style_unset(sty, "exon", "bar_height"); gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0); gt_ensure(!gt_error_is_set(testerr)); gt_ensure(height == 23); gt_str_delete(caption2); gt_str_delete(seqid); gt_element_delete(e1); gt_element_delete(e2); gt_block_delete(b); gt_style_delete(sty); gt_error_delete(testerr); gt_genome_node_delete(gn1); gt_genome_node_delete(gn2); return had_err; }
static void gv_test_union(AgnUnitTest *test) { GtArray *r1 = gt_array_new( sizeof(GtRange) ); GtArray *r2 = gt_array_new( sizeof(GtRange) ); GtRange rng01 = {1050, 9005}; GtRange rng02 = {11525, 14070}; gt_array_add(r2, rng01); gt_array_add(r2, rng02); GtArray *ru = gaeval_visitor_union(r1, r2); bool test1 = gt_array_size(ru) == 2; if(test1) { GtRange *temp1 = gt_array_get(ru, 0); GtRange *temp2 = gt_array_get(ru, 1); test1 = gt_range_compare(temp1, &rng01) == 0 && gt_range_compare(temp2, &rng02) == 0; } agn_unit_test_result(test, "union (1)", test1); gt_array_delete(r1); gt_array_delete(r2); gt_array_delete(ru); r1 = gt_array_new( sizeof(GtRange) ); r2 = gt_array_new( sizeof(GtRange) ); GtRange rng03 = { 300, 500 }; GtRange rng04 = { 700, 800 }; GtRange rng05 = { 200, 400 }; GtRange rng06 = { 700, 900 }; gt_array_add(r1, rng03); gt_array_add(r1, rng04); gt_array_add(r2, rng05); gt_array_add(r2, rng06); ru = gaeval_visitor_union(r1, r2); bool test2 = gt_array_size(ru) == 2; if(test2) { GtRange *temp1 = gt_array_get(ru, 0); GtRange *temp2 = gt_array_get(ru, 1); GtRange testr1 = { 200, 500 }; GtRange testr2 = { 700, 900 }; test2 = gt_range_compare(temp1, &testr1) == 0 && gt_range_compare(temp2, &testr2) == 0; } agn_unit_test_result(test, "union (2)", test2); gt_array_delete(r1); gt_array_delete(r2); gt_array_delete(ru); r1 = gt_array_new( sizeof(GtRange) ); r2 = gt_array_new( sizeof(GtRange) ); GtRange rng07 = { 300, 500 }; GtRange rng08 = { 700, 800 }; GtRange rng09 = { 200, 400 }; GtRange rng10 = { 700, 900 }; GtRange rng11 = { 100, 150 }; gt_array_add(r1, rng07); gt_array_add(r1, rng08); gt_array_add(r2, rng09); gt_array_add(r2, rng10); gt_array_add(r2, rng11); ru = gaeval_visitor_union(r1, r2); bool test3 = gt_array_size(ru) == 3; if(test3) { GtRange *temp1 = gt_array_get(ru, 0); GtRange *temp2 = gt_array_get(ru, 1); GtRange *temp3 = gt_array_get(ru, 2); GtRange testr1 = { 100, 150 }; GtRange testr2 = { 200, 500 }; GtRange testr3 = { 700, 900 }; test3 = gt_range_compare(temp1, &testr1) == 0 && gt_range_compare(temp2, &testr2) == 0 && gt_range_compare(temp3, &testr3) == 0; } agn_unit_test_result(test, "union (3)", test3); gt_array_delete(r1); gt_array_delete(r2); gt_array_delete(ru); }
static void gv_test_intersect(AgnUnitTest *test) { GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtError *error = gt_error_new(); const char *filename = "data/gff3/gaeval-stream-unit-test-1.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtNodeStream *fstream = gt_array_out_stream_new(gff3in, feats, error); int result = gt_node_stream_pull(fstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_intersect] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_error_delete(error); gt_node_stream_delete(gff3in); gt_node_stream_delete(fstream); agn_assert(gt_array_size(feats) == 9); GtGenomeNode *g1 = *(GtGenomeNode **)gt_array_get(feats, 1); GtGenomeNode *g2 = *(GtGenomeNode **)gt_array_get(feats, 3); GtGenomeNode *g3 = *(GtGenomeNode **)gt_array_get(feats, 7); GtGenomeNode *est1 = *(GtGenomeNode **)gt_array_get(feats, 0); GtGenomeNode *est2 = *(GtGenomeNode **)gt_array_get(feats, 2); GtGenomeNode *est3 = *(GtGenomeNode **)gt_array_get(feats, 4); GtGenomeNode *est4 = *(GtGenomeNode **)gt_array_get(feats, 5); GtGenomeNode *est5 = *(GtGenomeNode **)gt_array_get(feats, 6); GtGenomeNode *est6 = *(GtGenomeNode **)gt_array_get(feats, 8); GtArray *cov = gaeval_visitor_intersect(g1, est1); bool test1 = cov == NULL; cov = gaeval_visitor_intersect(g1, est2); test1 = gt_array_size(cov) == 1; if(test1) { GtRange *range01 = gt_array_pop(cov); GtRange testrange = { 400, 500 }; test1 = gt_range_compare(range01, &testrange) == 0; } agn_unit_test_result(test, "intersect (1)", test1); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est3); bool test2 = gt_array_size(cov) == 2; if(test2) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 800, 900 }; GtRange testrange2 = { 1050, 1075 }; test2 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (2)", test2); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est4); bool test3 = gt_array_size(cov) == 2; if(test3) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 1070, 1125 }; GtRange testrange2 = { 1250, 1310 }; test3 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (3)", test3); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est5); bool test4 = gt_array_size(cov) == 2; if(test4) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2000, 3000 }; GtRange testrange2 = { 4000, 5000 }; test4 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (4)", test4); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est6); bool test5 = gt_array_size(cov) == 2; if(test5) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2500, 3000 }; GtRange testrange2 = { 4000, 5000 }; test5 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (5)", test5); gt_array_delete(cov); gt_array_delete(feats); gt_genome_node_delete(g1); gt_genome_node_delete(g2); gt_genome_node_delete(g3); gt_genome_node_delete(est1); gt_genome_node_delete(est2); gt_genome_node_delete(est3); gt_genome_node_delete(est4); gt_genome_node_delete(est5); gt_genome_node_delete(est6); }
int gt_feature_in_stream_unit_test(GtError *error) { GtNodeStream *src, *dest; GtFeatureIndex *prefeat, *postfeat; GtRange range1, range1test, range2, range2test; prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); int result = gt_node_stream_pull(dest, error); if (result == -1) return -1; GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error); if (gt_str_array_size(seqids) != 2) { gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 " "seqids, found "GT_WU"", gt_str_array_size(seqids)); return -1; } gt_str_array_delete(seqids); range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src); result = gt_node_stream_pull(dest, error); if (result == -1) return -1; range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } range1test.start = 1; range1test.end = 100000; range2test.start = 1; range2test.end = 10000; gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); return 0; }