static void create_transitive_part_of_edges(GtTypeNode *node, GtBoolMatrix *part_of_out_edges, GtBoolMatrix *part_of_in_edges, GtArray *node_stack) { unsigned long i, j; if (gt_array_size(node_stack)) { for (i = gt_bool_matrix_get_first_column(part_of_in_edges, node->num); i != gt_bool_matrix_get_last_column(part_of_in_edges, node->num); i = gt_bool_matrix_get_next_column(part_of_in_edges, node->num, i)) { for (j = 0; j < gt_array_size(node_stack); j++) { GtTypeNode *child = *(GtTypeNode**) gt_array_get(node_stack, j); gt_bool_matrix_set(part_of_out_edges, i, child->num, true); gt_bool_matrix_set(part_of_in_edges, child->num, i, true); } } } gt_array_add(node_stack, node); for (i = 0; i < gt_array_size(node->is_a_out_edges); i++) { GtTypeNode *parent = *(GtTypeNode**) gt_array_get(node->is_a_out_edges, i); create_transitive_part_of_edges(parent, part_of_out_edges, part_of_in_edges, node_stack); } gt_array_pop(node_stack); }
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2) { agn_assert(cov1 && cov2); gt_array_add_array(cov1, cov2); if(gt_array_size(cov1) > 1) gt_array_sort(cov1, (GtCompare)gt_range_compare); GtArray *runion = gt_array_new(sizeof(GtRange)); if(gt_array_size(cov1) == 0) return runion; GtRange *rng = gt_array_get(cov1, 0); gt_array_add(runion, *rng); GtRange *prev = gt_array_get(runion, 0); if(gt_array_size(cov1) == 1) return runion; GtUword i; for(i = 1; i < gt_array_size(cov1); i++) { rng = gt_array_get(cov1, i); if(gt_range_overlap(rng, prev)) *prev = gt_range_join(rng, prev); else { gt_array_add(runion, *rng); prev = gt_array_get(runion, gt_array_size(runion) - 1); } } return runion; }
int gt_feature_index_add_gff3file(GtFeatureIndex *feature_index, const char *gff3file, GtError *err) { GtNodeStream *gff3_in_stream; GtGenomeNode *gn; GtArray *tmp; int had_err = 0; GtUword i; gt_error_check(err); gt_assert(feature_index && gff3file); tmp = gt_array_new(sizeof (GtGenomeNode*)); gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, &gff3file); while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) gt_array_add(tmp, gn); if (!had_err) { GtNodeVisitor *feature_visitor = gt_feature_visitor_new(feature_index); for (i=0;i<gt_array_size(tmp);i++) { gn = *(GtGenomeNode**) gt_array_get(tmp, i); /* no need to lock, add_*_node() is synchronized */ had_err = gt_genome_node_accept(gn, feature_visitor, NULL); gt_assert(!had_err); /* cannot happen */ } gt_node_visitor_delete(feature_visitor); } gt_node_stream_delete(gff3_in_stream); for (i=0;i<gt_array_size(tmp);i++) gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(tmp, i)); gt_array_delete(tmp); return had_err; }
static void infer_cds_visitor_check_cds_multi(AgnInferCDSVisitor *v) { if(gt_array_size(v->cds) <= 1) { return; } GtFeatureNode **firstsegment = gt_array_get(v->cds, 0); const char *id = gt_feature_node_get_attribute(*firstsegment, "ID"); if(id == NULL) { char newid[64]; sprintf(newid, "CDS%lu", v->cdscounter++); gt_feature_node_add_attribute(*firstsegment, "ID", newid); } gt_feature_node_make_multi_representative(*firstsegment); GtUword i; for(i = 0; i < gt_array_size(v->cds); i++) { GtFeatureNode **segment = gt_array_get(v->cds, i); if(!gt_feature_node_is_multi(*segment)) { gt_feature_node_set_multi_representative(*segment, *firstsegment); } } }
void gt_ranges_copy_to_opposite_strand(GtArray *outranges, const GtArray *inranges, GtUword gen_total_length, GtUword gen_offset) { GtRange range; GtUword i; /* outranges are empty */ gt_assert(!gt_array_size(outranges)); /* inranges are not empty */ gt_assert(gt_array_size(inranges)); for (i = gt_array_size(inranges); i > 0; i--) { /* genomic offset is defined */ gt_assert(gen_offset != GT_UNDEF_UWORD); range.start = gen_total_length - 1 - (((GtRange*) gt_array_get(inranges, i-1))->end - gen_offset) + gen_offset; range.end = gen_total_length - 1 - (((GtRange*) gt_array_get(inranges, i-1))->start - gen_offset) + gen_offset; gt_array_add(outranges, range); } /* outranges has the same number of elements as inranges */ gt_assert(gt_array_size(inranges) == gt_array_size(outranges)); }
static void xml_outputSCRline(const GthAGS *ags, unsigned int indentlevel, GtFile *outfp) { GthSpliceSiteProb *splicesiteprob; unsigned long i; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<SCR_line>\n"); indentlevel++; for (i = 0; i < gt_array_size(ags->exons) - 1; i++) { splicesiteprob = (GthSpliceSiteProb*) gt_array_get(ags->splicesiteprobs, i); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon-intron don_prob=\"%.3f\" " "acc_prob=\"%.3f\" e_score=\"%.3f\"/>\n", splicesiteprob->donorsiteprob, splicesiteprob->acceptorsiteprob, ((GthExonAGS*) gt_array_get(ags->exons, i))->score); } gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon-only e_score=\"%.3f\"/>\n", ((GthExonAGS*) gt_array_get(ags->exons, i))->score); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</SCR_line>\n"); }
static void convert_chain_to_inverted_chain(GthInvertedChain *inverted_chain, GthChain *chain) { unsigned long i, lastexonnum = gt_array_size(chain->forwardranges) - 1; GtRange range; /* inverted chain is empty */ gt_assert(!gt_array_size(inverted_chain->forwardranges)); /* chain is not empty */ gt_assert(gt_array_size(chain->forwardranges)); /* copy file and sequence numbers */ inverted_chain->gen_file_num = chain->gen_file_num; inverted_chain->gen_seq_num = chain->gen_seq_num; inverted_chain->ref_file_num = chain->ref_file_num; inverted_chain->ref_seq_num = chain->ref_seq_num; /* save startpos */ inverted_chain->startpos = ((GtRange*) gt_array_get_first(chain->forwardranges))->start; /* save endpos */ inverted_chain->endpos = ((GtRange*) gt_array_get_last(chain->forwardranges))->end; /* convert (potential) exons to (potential) introns */ for (i = 0; i < lastexonnum; i++) { range.start = ((GtRange*) gt_array_get(chain->forwardranges, i)) ->end + 1; range.end = ((GtRange*) gt_array_get(chain->forwardranges, i+1)) ->start - 1; gt_array_add(inverted_chain->forwardranges, range); } }
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSortStream *sort_stream; GtGenomeNode *node, *eofn; int had_err = 0; gt_error_check(err); sort_stream = gt_sort_stream_cast(ns); if (!sort_stream->sorted) { while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node, err)) && node) { if ((eofn = gt_eof_node_try_cast(node))) gt_genome_node_delete(eofn); /* get rid of EOF nodes */ else gt_array_add(sort_stream->nodes, node); } if (!had_err) { gt_genome_nodes_sort_stable(sort_stream->nodes); sort_stream->sorted = true; } } if (!had_err) { gt_assert(sort_stream->sorted); if (sort_stream->idx < gt_array_size(sort_stream->nodes)) { *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); sort_stream->idx++; /* join region nodes with the same sequence ID */ if (gt_region_node_try_cast(*gn)) { GtRange range_a, range_b; while (sort_stream->idx < gt_array_size(sort_stream->nodes)) { node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); if (!gt_region_node_try_cast(node) || gt_str_cmp(gt_genome_node_get_seqid(*gn), gt_genome_node_get_seqid(node))) { /* the next node is not a region node with the same ID */ break; } range_a = gt_genome_node_get_range(*gn); range_b = gt_genome_node_get_range(node); range_a = gt_range_join(&range_a, &range_b); gt_genome_node_set_range(*gn, &range_a); gt_genome_node_delete(node); sort_stream->idx++; } } return 0; } } if (!had_err) { gt_array_reset(sort_stream->nodes); *gn = NULL; } return had_err; }
static double gaeval_visitor_introns_confirmed(GtArray *introns, GtArray *gaps) { agn_assert(introns && gaps); GtUword intron_count = gt_array_size(introns); GtUword gap_count = gt_array_size(gaps); agn_assert(intron_count > 0); if(gap_count == 0) return 0.0; GtUword i, j, num_confirmed = 0; for(i = 0; i < intron_count; i++) { GtGenomeNode *intron = *(GtGenomeNode **)gt_array_get(introns, i); GtRange intron_range = gt_genome_node_get_range(intron); for(j = 0; j < gap_count; j++) { GtGenomeNode *gap = *(GtGenomeNode **)gt_array_get(gaps, j); GtRange gap_range = gt_genome_node_get_range(gap); if(gt_range_compare(&intron_range, &gap_range) == 0) { num_confirmed++; break; } } } return (double)num_confirmed / (double)intron_count; }
static void snp_annotator_stream_free(GtNodeStream *ns) { GtUword i; GtSNPAnnotatorStream *sas; if (!ns) return; sas = gt_snp_annotator_stream_cast(ns); gt_region_mapping_delete(sas->rmap); while (gt_queue_size(sas->snps) > 0) { gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->snps)); } while (gt_queue_size(sas->outqueue) > 0) { gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->outqueue)); } for (i = 0; i < gt_array_size(sas->instreams); i++) { gt_node_stream_delete(*(GtNodeStream**) gt_array_get(sas->instreams, i)); } for (i = 0; i < gt_array_size(sas->cur_gene_set); i++) { gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(sas->cur_gene_set, i)); } gt_array_delete(sas->cur_gene_set); gt_node_stream_delete(sas->merge_stream); gt_array_delete(sas->instreams); gt_queue_delete(sas->snps); gt_queue_delete(sas->outqueue); }
static void gv_test_calc_integrity(AgnUnitTest *test) { const char *filename = "data/gff3/gaeval-stream-unit-test-2.gff3"; GtNodeStream *align_in = gt_gff3_in_stream_new_unsorted(1, &filename); AgnGaevalParams params = { 0.6, 0.3, 0.05, 0.05, 400, 200, 100 }; GtNodeVisitor *nv = agn_gaeval_visitor_new(align_in, params); AgnGaevalVisitor *gv = gaeval_visitor_cast(nv); gt_node_stream_delete(align_in); GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "mRNA", "mRNA"); GtNodeStream *filtstream = agn_filter_stream_new(gff3in, typestokeep); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *ics = agn_infer_cds_stream_new(filtstream, NULL, logger); GtNodeStream *ies = agn_infer_exons_stream_new(ics, NULL, logger); GtError *error = gt_error_new(); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *featstream = gt_array_out_stream_new(ies, feats, error); int result = gt_node_stream_pull(featstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_calc_integrity] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_node_stream_delete(gff3in); gt_node_stream_delete(filtstream); gt_node_stream_delete(featstream); gt_node_stream_delete(ics); gt_node_stream_delete(ies); gt_logger_delete(logger); gt_hashmap_delete(typestokeep); agn_assert(gt_array_size(feats) == 2); GtFeatureNode *g1 = *(GtFeatureNode **)gt_array_get(feats, 0); GtFeatureNode *g2 = *(GtFeatureNode **)gt_array_get(feats, 1); double cov1 = gaeval_visitor_calculate_coverage(gv, g1, error); double cov2 = gaeval_visitor_calculate_coverage(gv, g2, error); double int1 = gaeval_visitor_calculate_integrity(gv, g1, cov1, NULL, error); double int2 = gaeval_visitor_calculate_integrity(gv, g2, cov2, NULL, error); bool test1 = fabs(cov1 - 1.000) < 0.001 && fabs(cov2 - 0.997) < 0.001 && fabs(int1 - 0.850) < 0.001 && fabs(int2 - 0.863) < 0.001; agn_unit_test_result(test, "calculate integrity", test1); gt_error_delete(error); gt_array_delete(feats); gt_genome_node_delete((GtGenomeNode *)g1); gt_genome_node_delete((GtGenomeNode *)g2); gt_node_visitor_delete(nv); }
static int gt_ltr_cluster_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRClusterStream *lcs; GtGenomeNode *ref_gn; int had_err = 0; unsigned long i = 0; gt_error_check(err); lcs = gt_ltr_cluster_stream_cast(ns); if (lcs->first_next) { while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) { gt_assert(*gn && !had_err); ref_gn = gt_genome_node_ref(*gn); gt_array_add(lcs->nodes, ref_gn); had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err); if (had_err) { gt_genome_node_delete(*gn); *gn = NULL; break; } } lcs->feat_to_encseq = gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv); lcs->feat_to_encseq_keys = gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv); if (!had_err) { for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) { had_err = process_feature(lcs, gt_str_array_get(lcs->feat_to_encseq_keys, i), err); if (had_err) break; } } if (!had_err) { *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index); lcs->next_index++; lcs->first_next = false; return 0; } } else { if (lcs->next_index >= gt_array_size(lcs->nodes)) *gn = NULL; else { *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index); lcs->next_index++; } return 0; } return had_err; }
static void sort_matches_and_calc_buckets(GtArray *matches, GtArray *buckets, GtUword *maxbucketlength) { GtUword i, currentstart = 0, currentend = 0; GthMatch *matchptr; Bucket bucket, *bucketptr; gt_assert(gt_array_size(matches)); /* sort matches */ qsort(gt_array_get_space(matches), gt_array_size(matches), sizeof (GthMatch), compare_matches); /* init first bucket */ matchptr = gt_array_get_first(matches); bucket.seqnum1 = matchptr->Storeseqnumreference; bucket.seqnum2 = matchptr->Storeseqnumgenomic; bucket.startpos = 0; /* calc buckets */ for (i = 1; i < gt_array_size(matches); i++) { matchptr = gt_array_get(matches, i); if (matchptr->Storeseqnumreference != bucket.seqnum1 || matchptr->Storeseqnumgenomic != bucket.seqnum2) { /* save the current bucket */ currentend = i - 1; bucket.length = currentend - currentstart + 1; gt_array_add(buckets, bucket); /* create new bucket */ currentstart = i; bucket.seqnum1 = matchptr->Storeseqnumreference; bucket.seqnum2 = matchptr->Storeseqnumgenomic; bucket.startpos = i; } } /* save last bucket */ currentend = i - 1; bucket.length = currentend - currentstart + 1; gt_array_add(buckets, bucket); /* compute maximum bucket length */ *maxbucketlength = 0; for (i = 0; i < gt_array_size(buckets); i++) { bucketptr = gt_array_get(buckets, i); if (bucketptr->length > *maxbucketlength) *maxbucketlength = bucketptr->length; } gt_assert(sum_of_bucket_lengths_equals_num_of_matches(buckets, gt_array_size(matches))); }
void agn_transcript_structure_gbk(GtFeatureNode *transcript, FILE *outstream) { gt_assert(transcript && outstream); GtArray *exons = gt_array_new( sizeof(GtFeatureNode *) ); GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript); GtFeatureNode *child; for ( child = gt_feature_node_iterator_next(iter); child != NULL; child = gt_feature_node_iterator_next(iter) ) { if(agn_gt_feature_node_is_exon_feature(child)) gt_array_add(exons, child); } gt_feature_node_iterator_delete(iter); gt_assert(gt_array_size(exons) > 0); gt_array_sort(exons, (GtCompare)agn_gt_genome_node_compare); if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) fputs("complement(", outstream); if(gt_array_size(exons) == 1) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, 0); GtRange exonrange = gt_genome_node_get_range(exon); fprintf(outstream, "<%lu..>%lu", exonrange.start, exonrange.end); } else { fputs("join(", outstream); GtUword i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); if(i == 0) fprintf(outstream, "<%lu..%lu", exonrange.start, exonrange.end); else if(i+1 == gt_array_size(exons)) fprintf(outstream, ",%lu..>%lu", exonrange.start, exonrange.end); else fprintf(outstream, ",%lu..%lu", exonrange.start, exonrange.end); } fputs(")", outstream); } if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) fputs(")", outstream); }
static void gt_hmmer_model_hit_delete(GtHMMERModelHit *mh) { unsigned long i; if (!mh) return; for (i = 0; i < gt_array_size(mh->fwd_hits); i++) gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->fwd_hits, i)); gt_array_delete(mh->fwd_hits); for (i = 0; i < gt_array_size(mh->rev_hits); i++) gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->rev_hits, i)); gt_array_delete(mh->rev_hits); gt_free(mh); }
static GtArray* gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment) { agn_assert(genemodel && alignment); GtFeatureNode *genefn = gt_feature_node_cast(genemodel); GtFeatureNode *algnfn = gt_feature_node_cast(alignment); agn_assert(gt_feature_node_has_type(genefn, "mRNA")); GtStrand genestrand = gt_feature_node_get_strand(genefn); GtStrand algnstrand = gt_feature_node_get_strand(algnfn); if(genestrand != algnstrand) return NULL; GtArray *covered_parts = gt_array_new( sizeof(GtRange) ); GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon); GtWord i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn); GtFeatureNode *tempaln; GtRange nullrange = {0, 0}; for(tempaln = gt_feature_node_iterator_next(aniter); tempaln != NULL; tempaln = gt_feature_node_iterator_next(aniter)) { if(gt_feature_node_has_type(tempaln, "match_gap")) continue; GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln); GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange); if(gt_range_compare(&intr, &nullrange) != 0) gt_array_add(covered_parts, intr); } gt_feature_node_iterator_delete(aniter); } gt_array_delete(exons); for(i = 0; i < gt_array_size(covered_parts); i++) { GtRange *r1 = gt_array_get(covered_parts, i); GtUword j; for(j = i+1; j < gt_array_size(covered_parts); j++) { GtRange *r2 = gt_array_get(covered_parts, j); agn_assert(gt_range_overlap(r1, r2) == false); } } return covered_parts; }
bool gt_ranges_do_not_overlap(const GtArray *ranges) { GtUword i; gt_assert(ranges && gt_array_size(ranges)); for (i = 1; i < gt_array_size(ranges); i++) { if (gt_range_overlap(gt_array_get(ranges, i-1), gt_array_get(ranges, i))) return false; } return true; }
static int gt_ltrdigest_pdom_visitor_process_hit(GT_UNUSED void *key, void *val, void *data, GT_UNUSED GtError *err) { GtHMMERModelHit *mh = (GtHMMERModelHit*) val; GtLTRdigestPdomVisitor *lv = (GtLTRdigestPdomVisitor*) data; const char *mdl = (const char*) key; GtArray *hits = NULL; GtUword nof_hits; GtFragment *frags; if (gt_double_compare(mh->best_fwd, mh->best_rev) <= 0) hits = mh->fwd_hits; else hits = mh->rev_hits; gt_assert(hits); nof_hits = gt_array_size(hits); if (nof_hits == 0) return 0; if (nof_hits > 1UL) { GtUword i, chainno; frags = gt_malloc((size_t) nof_hits * sizeof (GtFragment)); for (i = 0; i < nof_hits; i++) { GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i); gt_assert(h); frags[i].startpos1 = h->hmmfrom; frags[i].endpos1 = h->hmmto; frags[i].startpos2 = h->alifrom; frags[i].endpos2 = h->alito; frags[i].weight = (GtWord) (h->alito - h->alifrom + 1) * h->score; frags[i].data = h; } qsort(frags, (size_t) nof_hits, sizeof (GtFragment), gt_ltrdigest_pdom_visitor_fragcmp); gt_log_log("%s: chaining "GT_WU" frags", mdl, nof_hits); gt_globalchaining_max(frags, nof_hits, (GtUword) lv->chain_max_gap_length, gt_ltrdigest_pdom_visitor_chainproc, &chainno); gt_free(frags); for (i = 0; i < nof_hits; i++) { GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i); (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h); } } else { GtUword chainno = 0UL; GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, 0); gt_array_add(h->chains, chainno); (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h); } return 0; }
static int get_next_free_line(GtTrack *track, GtLine **result, GtBlock *block, GtError *err) { unsigned long i; GtLine* line; int had_err = 0; bool is_occupied; gt_assert(track); /* find unoccupied line -- may need optimisation */ for (i = 0; i < gt_array_size(track->lines); i++) { line = *(GtLine**) gt_array_get(track->lines, i); had_err = gt_line_breaker_line_is_occupied(track->lb, &is_occupied, line, block, err); if (had_err) break; if (!is_occupied) { *result = line; return 0; } } /* all lines are occupied, we need o create a new one */ if (!had_err) { /* if line limit is hit, do not create any more lines! */ if (track->max_num_lines != GT_UNDEF_ULONG && gt_array_size(track->lines) == track->max_num_lines) { track->discarded_blocks++; *result = NULL; } /* make sure there is only one line if 'split_lines' is set to false */ if (!track->split) { if (gt_array_size(track->lines) < 1) { line = gt_line_new(); gt_array_add(track->lines, line); } else line = *(GtLine**) gt_array_get(track->lines, 0); gt_assert(gt_array_size(track->lines) == 1); } else { line = gt_line_new(); gt_array_add(track->lines, line); } gt_assert(line); } *result = line; return had_err; }
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v) { if(gt_array_size(v->cds) == 0) return; const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID"); unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna); GtStrand strand = gt_feature_node_get_strand(v->mrna); GtRange stoprange; GtUword threeprimeindex = gt_array_size(v->cds) - 1; GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.start = stoprange.end - 2; if(strand == GT_STRAND_REVERSE) { threeprimesegment = gt_array_get(v->cds, 0); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.end = stoprange.start + 2; } if(gt_array_size(v->stops) > 1) { gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid, ln, gt_array_size(v->starts)); } else if(gt_array_size(v->stops) == 1) { GtGenomeNode **codon = gt_array_get(v->stops, 0); GtRange testrange = gt_genome_node_get_range(*codon); if(gt_range_compare(&stoprange, &testrange) != 0) { gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does " "not match explicitly provided stop codon [%lu, %lu] for " "mRNA '%s'", stoprange.start, stoprange.end, testrange.start, testrange.end, mrnaid); } } else // agn_assert(gt_array_size(v->stops) == 0) { GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna); GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon", stoprange.start, stoprange.end, strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source); GtFeatureNode *cf = (GtFeatureNode *)codonfeature; gt_feature_node_add_child(v->mrna, cf); gt_array_add(v->stops, cf); } }
static void infer_cds_visitor_infer_cds(AgnInferCDSVisitor *v) { GtFeatureNode **start_codon = NULL, **stop_codon = NULL; bool exonsexplicit = gt_array_size(v->exons) > 0; bool startcodon_check = gt_array_size(v->starts) == 1 && (start_codon = gt_array_get(v->starts, 0)) != NULL; bool stopcodon_check = gt_array_size(v->stops) == 1 && (stop_codon = gt_array_get(v->stops, 0)) != NULL; if(gt_array_size(v->cds) > 0) { return; } else if(!exonsexplicit || !startcodon_check || !stopcodon_check) { return; } GtRange left_codon_range, right_codon_range; left_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon); right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon); if(gt_feature_node_get_strand(v->mrna) == GT_STRAND_REVERSE) { left_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon); right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon); } GtUword i; for(i = 0; i < gt_array_size(v->exons); i++) { GtFeatureNode *exon = *(GtFeatureNode **)gt_array_get(v->exons, i); GtGenomeNode *exon_gn = (GtGenomeNode *)exon; GtRange exon_range = gt_genome_node_get_range(exon_gn); GtStrand exon_strand = gt_feature_node_get_strand(exon); GtRange cdsrange; bool exon_includes_cds = infer_cds_visitor_infer_range(&exon_range, &left_codon_range, &right_codon_range, &cdsrange); if(exon_includes_cds) { GtGenomeNode *cdsfeat; cdsfeat = gt_feature_node_new(gt_genome_node_get_seqid(exon_gn), "CDS", cdsrange.start, cdsrange.end, exon_strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)cdsfeat, v->source); gt_feature_node_add_child(v->mrna, (GtFeatureNode *)cdsfeat); gt_array_add(v->cds, cdsfeat); } } }
bool gt_ranges_are_sorted(const GtArray *ranges) { GtUword i; gt_assert(ranges); for (i = 1; i < gt_array_size(ranges); i++) { if (gt_range_compare(gt_array_get(ranges, i-1), gt_array_get(ranges, i)) == 1) { return false; } } return true; }
static int gtf_show_transcript(GtFeatureNode *feature_node, GtGTFVisitor *gtf_visitor, GtError *err) { GtFeatureNode *fn; GtUword i; int had_err; gt_error_check(err); gt_assert(feature_node && gtf_visitor); gt_array_reset(gtf_visitor->exon_features); gt_array_reset(gtf_visitor->CDS_features); had_err = gt_feature_node_traverse_direct_children(feature_node, gtf_visitor, save_exon_node, err); if (gt_array_size(gtf_visitor->exon_features)) { /* sort exon features */ qsort(gt_array_get_space(gtf_visitor->exon_features), gt_array_size(gtf_visitor->exon_features), sizeof (GtGenomeNode*), (GtCompare) gt_genome_node_compare); /* show exon features */ gtf_visitor->transcript_id++; for (i = 0; i < gt_array_size(gtf_visitor->exon_features); i++) { fn = *(GtFeatureNode**) gt_array_get(gtf_visitor->exon_features, i); gt_gff3_output_leading(fn, gtf_visitor->outfp); gt_file_xprintf(gtf_visitor->outfp, "gene_id \""GT_WU"\"; transcript_id " "\""GT_WU"."GT_WU"\";\n", gtf_visitor->gene_id, gtf_visitor->gene_id, gtf_visitor->transcript_id); } } if (gt_array_size(gtf_visitor->CDS_features)) { /* sort CDS features */ qsort(gt_array_get_space(gtf_visitor->CDS_features), gt_array_size(gtf_visitor->CDS_features), sizeof (GtGenomeNode*), (GtCompare) gt_genome_node_compare); /* show start_codon feature */ /* fn = *(GtFeatureNode**) */ (void) gt_array_get(gtf_visitor->CDS_features, 0); /* XXX: to be done */ /* show CDS features */ for (i = 0; i < gt_array_size(gtf_visitor->CDS_features); i++) { fn = *(GtFeatureNode**) gt_array_get(gtf_visitor->CDS_features, i); gt_gff3_output_leading(fn, gtf_visitor->outfp); gt_file_xprintf(gtf_visitor->outfp, "gene_id \""GT_WU"\"; transcript_id " "\""GT_WU"."GT_WU"\";\n", gtf_visitor->gene_id, gtf_visitor->gene_id, gtf_visitor->transcript_id); } /* XXX: show stop_codon feature and shorten last CDS feature */ } return had_err; }
static int gt_cluster_matches(GtClusteredSet *cs, GtMatchEdgeTable *matchedgetab, GtError *err) { unsigned long i; for (i = 0; i < matchedgetab->num_of_edges; i++) { if (gt_clustered_set_merge_clusters(cs, ((GtMatchEdge*)(gt_array_get(matchedgetab->edges, i)))->matchnum0, ((GtMatchEdge*)(gt_array_get(matchedgetab->edges, i)))->matchnum1, err) != 0) { return -1; } } return 0; }
void agn_bron_kerbosch( GtArray *R, GtArray *P, GtArray *X, GtArray *cliques, bool skipsimplecliques ) { gt_assert(R != NULL && P != NULL && X != NULL && cliques != NULL); if(gt_array_size(P) == 0 && gt_array_size(X) == 0) { if(skipsimplecliques == false || gt_array_size(R) != 1) { GtUword i; AgnTranscriptClique *clique = agn_transcript_clique_new(); for(i = 0; i < gt_array_size(R); i++) { GtFeatureNode *transcript = *(GtFeatureNode **)gt_array_get(R, i); agn_transcript_clique_add(clique, transcript); } gt_array_add(cliques, clique); } } while(gt_array_size(P) > 0) { GtGenomeNode *v = *(GtGenomeNode **)gt_array_get(P, 0); // newR = R \union {v} GtArray *newR = agn_gt_array_copy(R, sizeof(GtGenomeNode *)); gt_array_add(newR, v); // newP = P \intersect N(v) GtArray *newP = agn_feature_neighbors(v, P); // newX = X \intersect N(v) GtArray *newX = agn_feature_neighbors(v, X); // Recursive call // agn_bron_kerbosch(R \union {v}, P \intersect N(v), X \intersect N(X)) agn_bron_kerbosch(newR, newP, newX, cliques, skipsimplecliques); // Delete temporary arrays just created gt_array_delete(newR); gt_array_delete(newP); gt_array_delete(newX); // P := P \ {v} gt_array_rem(P, 0); // X := X \union {v} gt_array_add(X, v); } }
static void split_cds_feature(GtFeatureNode *cds_feature, GtFeatureNode *fn) { GtArray *parents; unsigned long i; gt_assert(cds_feature && fn); /* find parents */ parents = find_cds_parents(cds_feature, fn); /* remove CDS feature */ gt_feature_node_remove_leaf(fn, cds_feature); /* add CDS feature to all parents */ for (i = 0; i < gt_array_size(parents); i++) { GtFeatureNode *parent = *(GtFeatureNode**) gt_array_get(parents, i); const char *id = gt_feature_node_get_attribute(parent, GT_GFF_ID); if (!i) { gt_feature_node_set_attribute(cds_feature, GT_GFF_PARENT, id); gt_feature_node_add_child(parent, cds_feature); } else { GtFeatureNode *new_cds = gt_feature_node_clone(cds_feature); gt_feature_node_set_attribute(new_cds, GT_GFF_PARENT, id); gt_feature_node_add_child(parent, new_cds); gt_genome_node_delete((GtGenomeNode*) cds_feature); } } gt_array_delete(parents); }
static void potentialintronspostpro(GtArray *intronstoprocess, unsigned long icdelta, unsigned long icminremintronlength) { GtArray *originalintrons; GtRange potintron; unsigned long i, potintronlength, minintronlength = 2 * icdelta + icminremintronlength; originalintrons = gt_array_new(sizeof (GtRange)); /* save all (potential) introns */ gt_array_add_array(originalintrons, intronstoprocess); /* reset introns to process */ gt_array_set_size(intronstoprocess, 0); /* store introns */ for (i = 0; i < gt_array_size(originalintrons); i++) { potintron = *(GtRange*) gt_array_get(originalintrons, i); potintronlength = potintron.end - potintron.start + 1; if (potintronlength >= minintronlength) { /* keep this intron (plus/minus intron deltas) that is, this intron is cut out later */ potintron.start += icdelta; potintron.end -= icdelta; gt_array_add(intronstoprocess, potintron); } /* else: skip this intron that is, this intron is not cut out later */ } gt_array_delete(originalintrons); }
static int seqid_info_get(SeqidInfo *seqid_info, GtUword *seqnum, GtUword *filenum, GtRange *outrange, const GtRange *inrange, GT_UNUSED const char *filename, const char *seqid, GtError *err) { SeqidInfoElem *seqid_info_elem; GtUword i; gt_error_check(err); gt_assert(seqid_info && seqnum && outrange && inrange); for (i = 0; i < gt_array_size(seqid_info); i++) { seqid_info_elem = gt_array_get(seqid_info, i); if (seqid_info_elem->descrange.end == GT_UNDEF_UWORD || gt_range_contains(&seqid_info_elem->descrange, inrange)) { *seqnum = seqid_info_elem->seqnum; *filenum = seqid_info_elem->filenum; *outrange = seqid_info_elem->descrange; return 0; } } gt_error_set(err, "cannot find a sequence with ID \"%s\" " "{range " GT_WU "," GT_WU ")", seqid, inrange->start, inrange->end); return -1; }
bool gt_ranges_are_consecutive(const GtArray *ranges) { GtUword i; for (i = 0; i < gt_array_size(ranges); i++) { gt_assert(((GtRange*) gt_array_get(ranges, i))->start <= ((GtRange*) gt_array_get(ranges, i))->end); if (i) { /* check if ranges are consecutive */ if (((GtRange*) gt_array_get(ranges, i-1))->end >= ((GtRange*) gt_array_get(ranges, i))->start) { return false; } } } return true; }
static GtArray* generic_ranges_uniq(GtArray *out_ranges, const GtArray *in_ranges, bool count) { GtUword i, *ctr_ptr, ctr = 1; GtArray *count_array = NULL; GtRange cur = { GT_UNDEF_UWORD, GT_UNDEF_UWORD }, prev = { GT_UNDEF_UWORD, GT_UNDEF_UWORD }; gt_assert(out_ranges && in_ranges); gt_assert(gt_ranges_are_sorted(in_ranges)); if (count) count_array = gt_array_new(sizeof (GtUword)); for (i = 0; i < gt_array_size(in_ranges); i++) { cur = *(GtRange*) gt_array_get(in_ranges, i); if (!i) { gt_array_add(out_ranges, cur); if (count) gt_array_add(count_array, ctr); } else { if (prev.start == cur.start && prev.end == cur.end) { if (count) { ctr_ptr = gt_array_get_last(count_array); (*ctr_ptr)++; } } else { gt_array_add(out_ranges, cur); if (count) gt_array_add(count_array, ctr); } } prev = cur; } return count_array; }