static void gv_test_introns_confirmed(AgnUnitTest *test) { GtGenomeNode *intron, *gap; GtStr *seqid = gt_str_new_cstr("chr"); GtArray *introns = gt_array_new( sizeof(GtGenomeNode *) ); intron = gt_feature_node_new(seqid, "intron", 1000, 1170, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 1225, 1305, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 1950, 2110, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 2545, 2655, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 2800, 2950, GT_STRAND_REVERSE); gt_array_add(introns, intron); GtArray *gaps = gt_array_new( sizeof(GtGenomeNode *) ); double intcon = gaeval_visitor_introns_confirmed(introns, gaps); bool test1 = fabs(intcon - 0.0) < 0.0001; agn_unit_test_result(test, "introns confirmed (no gaps)", test1); gap = gt_feature_node_new(seqid, "match_gap", 1000, 1170, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 1225, 1302, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 1950, 2110, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 2575, 2655, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 2800, 2950, GT_STRAND_REVERSE); gt_array_add(gaps, gap); intcon = gaeval_visitor_introns_confirmed(introns, gaps); bool test2 = fabs(intcon - 0.6) < 0.0001; agn_unit_test_result(test, "introns confirmed (gaps)", test2); while(gt_array_size(introns) > 0) { intron = *(GtGenomeNode **)gt_array_pop(introns); gt_genome_node_delete(intron); } gt_array_delete(introns); while(gt_array_size(gaps) > 0) { gap = *(GtGenomeNode **)gt_array_pop(gaps); gt_genome_node_delete(gap); } gt_array_delete(gaps); gt_str_delete(seqid); }
static void create_transitive_part_of_edges(GtTypeNode *node, GtBoolMatrix *part_of_out_edges, GtBoolMatrix *part_of_in_edges, GtArray *node_stack) { unsigned long i, j; if (gt_array_size(node_stack)) { for (i = gt_bool_matrix_get_first_column(part_of_in_edges, node->num); i != gt_bool_matrix_get_last_column(part_of_in_edges, node->num); i = gt_bool_matrix_get_next_column(part_of_in_edges, node->num, i)) { for (j = 0; j < gt_array_size(node_stack); j++) { GtTypeNode *child = *(GtTypeNode**) gt_array_get(node_stack, j); gt_bool_matrix_set(part_of_out_edges, i, child->num, true); gt_bool_matrix_set(part_of_in_edges, child->num, i, true); } } } gt_array_add(node_stack, node); for (i = 0; i < gt_array_size(node->is_a_out_edges); i++) { GtTypeNode *parent = *(GtTypeNode**) gt_array_get(node->is_a_out_edges, i); create_transitive_part_of_edges(parent, part_of_out_edges, part_of_in_edges, node_stack); } gt_array_pop(node_stack); }
static void infer_cds_visitor_test_data(GtQueue *queue) { GtError *error = gt_error_new(); const char *file = "data/gff3/grape-codons.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &file); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3in); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3in); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *icv_stream = agn_infer_cds_stream_new(gff3in, NULL, logger); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *arraystream = gt_array_out_stream_new(icv_stream, feats, error); int pullresult = gt_node_stream_pull(arraystream, error); if(pullresult == -1) { fprintf(stderr, "[AgnInferCDSVisitor::infer_cds_visitor_test_data] error " "processing features: %s\n", gt_error_get(error)); } gt_node_stream_delete(gff3in); gt_node_stream_delete(icv_stream); gt_node_stream_delete(arraystream); gt_logger_delete(logger); gt_array_sort(feats, (GtCompare)agn_genome_node_compare); gt_array_reverse(feats); while(gt_array_size(feats) > 0) { GtFeatureNode *fn = *(GtFeatureNode **)gt_array_pop(feats); gt_queue_add(queue, fn); } gt_array_delete(feats); gt_error_delete(error); }
void agn_unit_test_delete(AgnUnitTest *test) { gt_free(test->label); while(gt_array_size(test->results) > 0) { UnitTestResult *result = gt_array_pop(test->results); gt_free(result->label); } gt_array_delete(test->results); gt_free(test); }
GtFeatureNode* gt_feature_node_iterator_next(GtFeatureNodeIterator *fni) { GtFeatureNode *fn; gt_assert(fni); if (!gt_array_size(fni->feature_stack)) return NULL; /* pop */ fn = *(GtFeatureNode**) gt_array_pop(fni->feature_stack); /* push children on stack */ if (!fni->direct && fn->children) add_children_to_stack(fni->feature_stack, fn); return fn; }
static int feature_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *error) { GtFeatureInStream *stream = feature_in_stream_cast(ns); gt_error_check(error); if (!stream->init) { feature_in_stream_init(stream); stream->init = true; } if (gt_queue_size(stream->regioncache) > 0) { GtGenomeNode *region = gt_queue_get(stream->regioncache); *gn = region; return 0; } if (stream->featurecache == NULL || gt_array_size(stream->featurecache) == 0) { if (stream->featurecache != NULL) { gt_array_delete(stream->featurecache); stream->featurecache = NULL; } if (stream->seqindex == gt_str_array_size(stream->seqids)) { *gn = NULL; return 0; } const char *seqid = gt_str_array_get(stream->seqids, stream->seqindex++); stream->featurecache = gt_feature_index_get_features_for_seqid(stream->fi, seqid, error); gt_array_sort(stream->featurecache, (GtCompare)gt_genome_node_compare); gt_array_reverse(stream->featurecache); } GtGenomeNode *feat = *(GtGenomeNode **)gt_array_pop(stream->featurecache); *gn = gt_genome_node_ref(feat); return 0; }
bool gt_tool_iterator_next(GtToolIterator *tool_iterator, const char **name, GtTool **tool) { ToolIterationInfo tii; gt_assert(tool_iterator && name && tool); if (gt_array_size(tool_iterator->tool_stack)) { ToolEntry *entry = gt_array_pop(tool_iterator->tool_stack); *name = entry->name; *tool = entry->tool; if (tool_iterator->prefixptr) { gt_str_reset(tool_iterator->prefixptr); if (entry->prefix) { gt_str_append_str(tool_iterator->prefixptr, entry->prefix); gt_str_append_char(tool_iterator->prefixptr, tool_iterator->prefixsep); } } if (gt_tool_is_toolbox(entry->tool)) { GtToolbox *toolbox; GtArray *toollist; GtStr *myprefix; myprefix = gt_str_new_cstr(entry->prefix ? gt_str_get(entry->prefix) : ""); gt_str_append_cstr(myprefix, entry->name); toolbox = gt_tool_get_toolbox(entry->tool); toollist = gt_array_new(sizeof (ToolEntry)); tii.arr = toollist; tii.str = myprefix; gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii); if (gt_array_size(toollist)) { gt_array_reverse(toollist); /* alphabetical order */ gt_array_add_array(tool_iterator->tool_stack, toollist); } gt_array_delete(toollist); gt_str_delete(myprefix); } else gt_str_delete(entry->prefix); return true; } else return false; }
static void gv_test_intersect(AgnUnitTest *test) { GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtError *error = gt_error_new(); const char *filename = "data/gff3/gaeval-stream-unit-test-1.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtNodeStream *fstream = gt_array_out_stream_new(gff3in, feats, error); int result = gt_node_stream_pull(fstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_intersect] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_error_delete(error); gt_node_stream_delete(gff3in); gt_node_stream_delete(fstream); agn_assert(gt_array_size(feats) == 9); GtGenomeNode *g1 = *(GtGenomeNode **)gt_array_get(feats, 1); GtGenomeNode *g2 = *(GtGenomeNode **)gt_array_get(feats, 3); GtGenomeNode *g3 = *(GtGenomeNode **)gt_array_get(feats, 7); GtGenomeNode *est1 = *(GtGenomeNode **)gt_array_get(feats, 0); GtGenomeNode *est2 = *(GtGenomeNode **)gt_array_get(feats, 2); GtGenomeNode *est3 = *(GtGenomeNode **)gt_array_get(feats, 4); GtGenomeNode *est4 = *(GtGenomeNode **)gt_array_get(feats, 5); GtGenomeNode *est5 = *(GtGenomeNode **)gt_array_get(feats, 6); GtGenomeNode *est6 = *(GtGenomeNode **)gt_array_get(feats, 8); GtArray *cov = gaeval_visitor_intersect(g1, est1); bool test1 = cov == NULL; cov = gaeval_visitor_intersect(g1, est2); test1 = gt_array_size(cov) == 1; if(test1) { GtRange *range01 = gt_array_pop(cov); GtRange testrange = { 400, 500 }; test1 = gt_range_compare(range01, &testrange) == 0; } agn_unit_test_result(test, "intersect (1)", test1); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est3); bool test2 = gt_array_size(cov) == 2; if(test2) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 800, 900 }; GtRange testrange2 = { 1050, 1075 }; test2 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (2)", test2); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est4); bool test3 = gt_array_size(cov) == 2; if(test3) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 1070, 1125 }; GtRange testrange2 = { 1250, 1310 }; test3 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (3)", test3); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est5); bool test4 = gt_array_size(cov) == 2; if(test4) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2000, 3000 }; GtRange testrange2 = { 4000, 5000 }; test4 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (4)", test4); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est6); bool test5 = gt_array_size(cov) == 2; if(test5) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2500, 3000 }; GtRange testrange2 = { 4000, 5000 }; test5 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (5)", test5); gt_array_delete(cov); gt_array_delete(feats); gt_genome_node_delete(g1); gt_genome_node_delete(g2); gt_genome_node_delete(g3); gt_genome_node_delete(est1); gt_genome_node_delete(est2); gt_genome_node_delete(est3); gt_genome_node_delete(est4); gt_genome_node_delete(est5); gt_genome_node_delete(est6); }
static double gaeval_visitor_calculate_integrity(AgnGaevalVisitor *v, GtFeatureNode *genemodel, double coverage, double *components, GtError *error) { agn_assert(v && genemodel); GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)genemodel); GtRange mrna_range = gt_genome_node_get_range((GtGenomeNode *)genemodel); GtArray *overlapping = gt_array_new( sizeof(GtFeatureNode *) ); bool hasseqid; gt_feature_index_has_seqid(v->alignments, &hasseqid, gt_str_get(seqid),error); if(hasseqid) { gt_feature_index_get_features_for_range(v->alignments, overlapping, gt_str_get(seqid), &mrna_range, error); } GtArray *gaps = gt_array_new( sizeof(GtFeatureNode *) ); while(gt_array_size(overlapping) > 0) { GtFeatureNode *alignment = *(GtFeatureNode **)gt_array_pop(overlapping); GtArray *agaps = agn_typecheck_select(alignment, gaeval_visitor_typecheck_gap); gt_array_add_array(gaps, agaps); gt_array_delete(agaps); } gt_array_delete(overlapping); GtUword utr5p_len = agn_mrna_5putr_length(genemodel); double utr5p_score = 0.0; if(utr5p_len >= v->params.exp_5putr_len) utr5p_score = 1.0; else utr5p_score = (double)utr5p_len / (double)v->params.exp_5putr_len; GtUword utr3p_len = agn_mrna_3putr_length(genemodel); double utr3p_score = 0.0; if(utr3p_len >= v->params.exp_3putr_len) utr3p_score = 1.0; else utr3p_score = (double)utr3p_len / (double)v->params.exp_3putr_len; GtArray *introns = agn_typecheck_select(genemodel, agn_typecheck_intron); GtUword exoncount = agn_typecheck_count(genemodel, agn_typecheck_exon); agn_assert(gt_array_size(introns) == exoncount - 1); double structure_score = 0.0; if(gt_array_size(introns) == 0) { GtUword cdslen = agn_mrna_cds_length(genemodel); if(cdslen >= v->params.exp_cds_len) structure_score = 1.0; else structure_score = (double)cdslen / (double)v->params.exp_cds_len; } else { structure_score = gaeval_visitor_introns_confirmed(introns, gaps); } gt_array_delete(gaps); gt_array_delete(introns); double integrity = (v->params.alpha * structure_score) + (v->params.beta * coverage) + (v->params.gamma * utr5p_score) + (v->params.epsilon * utr3p_score); if(components != NULL) { components[0] = structure_score; components[1] = coverage; components[2] = utr5p_score; components[3] = utr3p_score; } return integrity; }