unsigned long gt_layout_get_height(const GtLayout *layout) { GtTracklineInfo lines; double tmp, head_track_space = HEAD_TRACK_SPACE_DEFAULT; bool show_track_captions; unsigned long height, line_height, i; gt_assert(layout); /* get dynamic heights from tracks */ lines.style = layout->style; lines.height = 0; (void) gt_hashmap_foreach(layout->tracks, add_tracklines, &lines, NULL); height = lines.height; /* obtain line height and spacer from style */ if (gt_style_get_num(layout->style, "format", "bar_height", &tmp, NULL)) line_height = tmp; else line_height = BAR_HEIGHT_DEFAULT; if (gt_style_get_num(layout->style, "format", "bar_vspace", &tmp, NULL)) line_height += tmp; else line_height += BAR_VSPACE_DEFAULT; if (!(gt_style_get_bool(layout->style, "format","show_track_captions", &show_track_captions, NULL))) show_track_captions = true; /* add custom track space allotment */ if (show_track_captions) { double theight = TOY_TEXT_HEIGHT, captionspace = CAPTION_BAR_SPACE_DEFAULT; (void) gt_style_get_num(layout->style, "format", "track_caption_font_size", &theight, NULL); (void) gt_style_get_num(layout->style, "format", "track_caption_space", &captionspace, NULL); height += gt_array_size(layout->custom_tracks) * (theight + captionspace); } for (i=0;i<gt_array_size(layout->custom_tracks);i++) { GtCustomTrack *ct = *(GtCustomTrack**) gt_array_get(layout->custom_tracks, i); height += gt_custom_track_get_height(ct); (void) gt_style_get_num(layout->style, "format", "track_vspace", &tmp, NULL); height += tmp; } /* add header space and footer */ (void) gt_style_get_num(layout->style, "format", "ruler_space", &head_track_space, NULL); height += HEADER_SPACE + head_track_space + FOOTER_SPACE; return height; }
static void infer_cds_visitor_check_cds_multi(AgnInferCDSVisitor *v) { if(gt_array_size(v->cds) <= 1) { return; } GtFeatureNode **firstsegment = gt_array_get(v->cds, 0); const char *id = gt_feature_node_get_attribute(*firstsegment, "ID"); if(id == NULL) { char newid[64]; sprintf(newid, "CDS%lu", v->cdscounter++); gt_feature_node_add_attribute(*firstsegment, "ID", newid); } gt_feature_node_make_multi_representative(*firstsegment); GtUword i; for(i = 0; i < gt_array_size(v->cds); i++) { GtFeatureNode **segment = gt_array_get(v->cds, i); if(!gt_feature_node_is_multi(*segment)) { gt_feature_node_set_multi_representative(*segment, *firstsegment); } } }
void gt_ranges_copy_to_opposite_strand(GtArray *outranges, const GtArray *inranges, GtUword gen_total_length, GtUword gen_offset) { GtRange range; GtUword i; /* outranges are empty */ gt_assert(!gt_array_size(outranges)); /* inranges are not empty */ gt_assert(gt_array_size(inranges)); for (i = gt_array_size(inranges); i > 0; i--) { /* genomic offset is defined */ gt_assert(gen_offset != GT_UNDEF_UWORD); range.start = gen_total_length - 1 - (((GtRange*) gt_array_get(inranges, i-1))->end - gen_offset) + gen_offset; range.end = gen_total_length - 1 - (((GtRange*) gt_array_get(inranges, i-1))->start - gen_offset) + gen_offset; gt_array_add(outranges, range); } /* outranges has the same number of elements as inranges */ gt_assert(gt_array_size(inranges) == gt_array_size(outranges)); }
static void create_transitive_part_of_edges(GtTypeNode *node, GtBoolMatrix *part_of_out_edges, GtBoolMatrix *part_of_in_edges, GtArray *node_stack) { unsigned long i, j; if (gt_array_size(node_stack)) { for (i = gt_bool_matrix_get_first_column(part_of_in_edges, node->num); i != gt_bool_matrix_get_last_column(part_of_in_edges, node->num); i = gt_bool_matrix_get_next_column(part_of_in_edges, node->num, i)) { for (j = 0; j < gt_array_size(node_stack); j++) { GtTypeNode *child = *(GtTypeNode**) gt_array_get(node_stack, j); gt_bool_matrix_set(part_of_out_edges, i, child->num, true); gt_bool_matrix_set(part_of_in_edges, child->num, i, true); } } } gt_array_add(node_stack, node); for (i = 0; i < gt_array_size(node->is_a_out_edges); i++) { GtTypeNode *parent = *(GtTypeNode**) gt_array_get(node->is_a_out_edges, i); create_transitive_part_of_edges(parent, part_of_out_edges, part_of_in_edges, node_stack); } gt_array_pop(node_stack); }
static double gaeval_visitor_introns_confirmed(GtArray *introns, GtArray *gaps) { agn_assert(introns && gaps); GtUword intron_count = gt_array_size(introns); GtUword gap_count = gt_array_size(gaps); agn_assert(intron_count > 0); if(gap_count == 0) return 0.0; GtUword i, j, num_confirmed = 0; for(i = 0; i < intron_count; i++) { GtGenomeNode *intron = *(GtGenomeNode **)gt_array_get(introns, i); GtRange intron_range = gt_genome_node_get_range(intron); for(j = 0; j < gap_count; j++) { GtGenomeNode *gap = *(GtGenomeNode **)gt_array_get(gaps, j); GtRange gap_range = gt_genome_node_get_range(gap); if(gt_range_compare(&intron_range, &gap_range) == 0) { num_confirmed++; break; } } } return (double)num_confirmed / (double)intron_count; }
static void convert_chain_to_inverted_chain(GthInvertedChain *inverted_chain, GthChain *chain) { unsigned long i, lastexonnum = gt_array_size(chain->forwardranges) - 1; GtRange range; /* inverted chain is empty */ gt_assert(!gt_array_size(inverted_chain->forwardranges)); /* chain is not empty */ gt_assert(gt_array_size(chain->forwardranges)); /* copy file and sequence numbers */ inverted_chain->gen_file_num = chain->gen_file_num; inverted_chain->gen_seq_num = chain->gen_seq_num; inverted_chain->ref_file_num = chain->ref_file_num; inverted_chain->ref_seq_num = chain->ref_seq_num; /* save startpos */ inverted_chain->startpos = ((GtRange*) gt_array_get_first(chain->forwardranges))->start; /* save endpos */ inverted_chain->endpos = ((GtRange*) gt_array_get_last(chain->forwardranges))->end; /* convert (potential) exons to (potential) introns */ for (i = 0; i < lastexonnum; i++) { range.start = ((GtRange*) gt_array_get(chain->forwardranges, i)) ->end + 1; range.end = ((GtRange*) gt_array_get(chain->forwardranges, i+1)) ->start - 1; gt_array_add(inverted_chain->forwardranges, range); } }
static bool has_donor_site(GtArray *gene, unsigned long exon) { gt_assert(exon < gt_array_size(gene)); if (exon == gt_array_size(gene) - 1) return false; return true; }
/* necessary to call consensus_sa() */ static void process_splice_form_func(GtArray *spliced_alignments_in_form, GT_UNUSED const void *set_of_sas, GT_UNUSED GtUword number_of_sas, GT_UNUSED size_t size_of_sa, void *userdata) { GthPGL *pgl = (GthPGL*) userdata; GthAGS *ags; GtBittab *assemblytab; GtUword i; ags = gth_ags_new(pgl); assemblytab = gt_bittab_new(gt_array_size(pgl->saclusters)); for (i = 0; i < gt_array_size(spliced_alignments_in_form); i++) { gt_bittab_set_bit(assemblytab, *(GtUword*) gt_array_get(spliced_alignments_in_form, i)); } gth_build_AGS_from_assembly(ags, assemblytab, pgl->saclusters); gt_bittab_delete(assemblytab); gt_array_add(pgl->assemblies, ags); }
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2) { agn_assert(cov1 && cov2); gt_array_add_array(cov1, cov2); if(gt_array_size(cov1) > 1) gt_array_sort(cov1, (GtCompare)gt_range_compare); GtArray *runion = gt_array_new(sizeof(GtRange)); if(gt_array_size(cov1) == 0) return runion; GtRange *rng = gt_array_get(cov1, 0); gt_array_add(runion, *rng); GtRange *prev = gt_array_get(runion, 0); if(gt_array_size(cov1) == 1) return runion; GtUword i; for(i = 1; i < gt_array_size(cov1); i++) { rng = gt_array_get(cov1, i); if(gt_range_overlap(rng, prev)) *prev = gt_range_join(rng, prev); else { gt_array_add(runion, *rng); prev = gt_array_get(runion, gt_array_size(runion) - 1); } } return runion; }
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSortStream *sort_stream; GtGenomeNode *node, *eofn; int had_err = 0; gt_error_check(err); sort_stream = gt_sort_stream_cast(ns); if (!sort_stream->sorted) { while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node, err)) && node) { if ((eofn = gt_eof_node_try_cast(node))) gt_genome_node_delete(eofn); /* get rid of EOF nodes */ else gt_array_add(sort_stream->nodes, node); } if (!had_err) { gt_genome_nodes_sort_stable(sort_stream->nodes); sort_stream->sorted = true; } } if (!had_err) { gt_assert(sort_stream->sorted); if (sort_stream->idx < gt_array_size(sort_stream->nodes)) { *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); sort_stream->idx++; /* join region nodes with the same sequence ID */ if (gt_region_node_try_cast(*gn)) { GtRange range_a, range_b; while (sort_stream->idx < gt_array_size(sort_stream->nodes)) { node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); if (!gt_region_node_try_cast(node) || gt_str_cmp(gt_genome_node_get_seqid(*gn), gt_genome_node_get_seqid(node))) { /* the next node is not a region node with the same ID */ break; } range_a = gt_genome_node_get_range(*gn); range_b = gt_genome_node_get_range(node); range_a = gt_range_join(&range_a, &range_b); gt_genome_node_set_range(*gn, &range_a); gt_genome_node_delete(node); sort_stream->idx++; } } return 0; } } if (!had_err) { gt_array_reset(sort_stream->nodes); *gn = NULL; } return had_err; }
int gt_feature_index_add_gff3file(GtFeatureIndex *feature_index, const char *gff3file, GtError *err) { GtNodeStream *gff3_in_stream; GtGenomeNode *gn; GtArray *tmp; int had_err = 0; GtUword i; gt_error_check(err); gt_assert(feature_index && gff3file); tmp = gt_array_new(sizeof (GtGenomeNode*)); gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, &gff3file); while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) gt_array_add(tmp, gn); if (!had_err) { GtNodeVisitor *feature_visitor = gt_feature_visitor_new(feature_index); for (i=0;i<gt_array_size(tmp);i++) { gn = *(GtGenomeNode**) gt_array_get(tmp, i); /* no need to lock, add_*_node() is synchronized */ had_err = gt_genome_node_accept(gn, feature_visitor, NULL); gt_assert(!had_err); /* cannot happen */ } gt_node_visitor_delete(feature_visitor); } gt_node_stream_delete(gff3_in_stream); for (i=0;i<gt_array_size(tmp);i++) gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(tmp, i)); gt_array_delete(tmp); return had_err; }
static void snp_annotator_stream_free(GtNodeStream *ns) { GtUword i; GtSNPAnnotatorStream *sas; if (!ns) return; sas = gt_snp_annotator_stream_cast(ns); gt_region_mapping_delete(sas->rmap); while (gt_queue_size(sas->snps) > 0) { gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->snps)); } while (gt_queue_size(sas->outqueue) > 0) { gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->outqueue)); } for (i = 0; i < gt_array_size(sas->instreams); i++) { gt_node_stream_delete(*(GtNodeStream**) gt_array_get(sas->instreams, i)); } for (i = 0; i < gt_array_size(sas->cur_gene_set); i++) { gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(sas->cur_gene_set, i)); } gt_array_delete(sas->cur_gene_set); gt_node_stream_delete(sas->merge_stream); gt_array_delete(sas->instreams); gt_queue_delete(sas->snps); gt_queue_delete(sas->outqueue); }
static int gt_ltrdigest_pdom_visitor_attach_hit(GtLTRdigestPdomVisitor *lv, GtHMMERModelHit *modelhit, GtHMMERSingleHit *singlehit) { GT_UNUSED GtUword i; GtGenomeNode *gf; int had_err = 0; GtRange rrng; gt_assert(lv && singlehit); rrng = gt_ltrdigest_pdom_visitor_coords(lv, singlehit); if (gt_array_size(singlehit->chains) > 0 || lv->output_all_chains) { char buf[32]; gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) lv->ltr_retrotrans), gt_ft_protein_match, rrng.start, rrng.end, singlehit->strand); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment", gt_str_ref(singlehit->alignment), (GtFree) gt_str_delete); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq", gt_str_ref(singlehit->aastring), (GtFree) gt_str_delete); gt_feature_node_set_source((GtFeatureNode*) gf, lv->tag); gt_feature_node_set_score((GtFeatureNode*) gf, (float) singlehit->evalue); (void) snprintf(buf, (size_t) 32, "%d", (int) singlehit->frame); gt_feature_node_add_attribute((GtFeatureNode*) gf, "reading_frame", buf); if (modelhit->modelname != NULL) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "name", modelhit->modelname); } if (gt_array_size(singlehit->chains) > 1UL && lv->output_all_chains) { GtStr *buffer; GtUword j; gt_assert(singlehit->chains != NULL); buffer = gt_str_new(); for (j = 0UL; j < gt_array_size(singlehit->chains); j++) { gt_str_append_cstr(buffer, modelhit->modelname); gt_str_append_char(buffer, ':'); gt_str_append_ulong(buffer, *(GtUword*) gt_array_get(singlehit->chains, j)); if (j != gt_array_size(singlehit->chains) - 1) { gt_str_append_char(buffer, ','); } } gt_feature_node_set_attribute((GtFeatureNode*) gf, "chains", gt_str_get(buffer)); gt_str_delete(buffer); } gt_feature_node_add_child(lv->ltr_retrotrans, (GtFeatureNode*) gf); } gt_array_delete(singlehit->chains); singlehit->chains = NULL; return had_err; }
static void gt_hmmer_parse_status_mark_frame_finished(GtHMMERParseStatus *s) { GtHMMERModelHit *mh; gt_assert(s && s->models); mh = gt_hashmap_get(s->models, gt_str_get(s->cur_model)); if (mh != NULL) { mh->last_array_size_fwd = gt_array_size(mh->fwd_hits); mh->last_array_size_rev = gt_array_size(mh->rev_hits); } }
static GtArray* gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment) { agn_assert(genemodel && alignment); GtFeatureNode *genefn = gt_feature_node_cast(genemodel); GtFeatureNode *algnfn = gt_feature_node_cast(alignment); agn_assert(gt_feature_node_has_type(genefn, "mRNA")); GtStrand genestrand = gt_feature_node_get_strand(genefn); GtStrand algnstrand = gt_feature_node_get_strand(algnfn); if(genestrand != algnstrand) return NULL; GtArray *covered_parts = gt_array_new( sizeof(GtRange) ); GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon); GtWord i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn); GtFeatureNode *tempaln; GtRange nullrange = {0, 0}; for(tempaln = gt_feature_node_iterator_next(aniter); tempaln != NULL; tempaln = gt_feature_node_iterator_next(aniter)) { if(gt_feature_node_has_type(tempaln, "match_gap")) continue; GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln); GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange); if(gt_range_compare(&intr, &nullrange) != 0) gt_array_add(covered_parts, intr); } gt_feature_node_iterator_delete(aniter); } gt_array_delete(exons); for(i = 0; i < gt_array_size(covered_parts); i++) { GtRange *r1 = gt_array_get(covered_parts, i); GtUword j; for(j = i+1; j < gt_array_size(covered_parts); j++) { GtRange *r2 = gt_array_get(covered_parts, j); agn_assert(gt_range_overlap(r1, r2) == false); } } return covered_parts; }
static void gt_hmmer_model_hit_delete(GtHMMERModelHit *mh) { unsigned long i; if (!mh) return; for (i = 0; i < gt_array_size(mh->fwd_hits); i++) gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->fwd_hits, i)); gt_array_delete(mh->fwd_hits); for (i = 0; i < gt_array_size(mh->rev_hits); i++) gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->rev_hits, i)); gt_array_delete(mh->rev_hits); gt_free(mh); }
void agn_transcript_structure_gbk(GtFeatureNode *transcript, FILE *outstream) { gt_assert(transcript && outstream); GtArray *exons = gt_array_new( sizeof(GtFeatureNode *) ); GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript); GtFeatureNode *child; for ( child = gt_feature_node_iterator_next(iter); child != NULL; child = gt_feature_node_iterator_next(iter) ) { if(agn_gt_feature_node_is_exon_feature(child)) gt_array_add(exons, child); } gt_feature_node_iterator_delete(iter); gt_assert(gt_array_size(exons) > 0); gt_array_sort(exons, (GtCompare)agn_gt_genome_node_compare); if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) fputs("complement(", outstream); if(gt_array_size(exons) == 1) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, 0); GtRange exonrange = gt_genome_node_get_range(exon); fprintf(outstream, "<%lu..>%lu", exonrange.start, exonrange.end); } else { fputs("join(", outstream); GtUword i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); if(i == 0) fprintf(outstream, "<%lu..%lu", exonrange.start, exonrange.end); else if(i+1 == gt_array_size(exons)) fprintf(outstream, ",%lu..>%lu", exonrange.start, exonrange.end); else fprintf(outstream, ",%lu..%lu", exonrange.start, exonrange.end); } fputs(")", outstream); } if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) fputs(")", outstream); }
static void sort_matches_and_calc_buckets(GtArray *matches, GtArray *buckets, GtUword *maxbucketlength) { GtUword i, currentstart = 0, currentend = 0; GthMatch *matchptr; Bucket bucket, *bucketptr; gt_assert(gt_array_size(matches)); /* sort matches */ qsort(gt_array_get_space(matches), gt_array_size(matches), sizeof (GthMatch), compare_matches); /* init first bucket */ matchptr = gt_array_get_first(matches); bucket.seqnum1 = matchptr->Storeseqnumreference; bucket.seqnum2 = matchptr->Storeseqnumgenomic; bucket.startpos = 0; /* calc buckets */ for (i = 1; i < gt_array_size(matches); i++) { matchptr = gt_array_get(matches, i); if (matchptr->Storeseqnumreference != bucket.seqnum1 || matchptr->Storeseqnumgenomic != bucket.seqnum2) { /* save the current bucket */ currentend = i - 1; bucket.length = currentend - currentstart + 1; gt_array_add(buckets, bucket); /* create new bucket */ currentstart = i; bucket.seqnum1 = matchptr->Storeseqnumreference; bucket.seqnum2 = matchptr->Storeseqnumgenomic; bucket.startpos = i; } } /* save last bucket */ currentend = i - 1; bucket.length = currentend - currentstart + 1; gt_array_add(buckets, bucket); /* compute maximum bucket length */ *maxbucketlength = 0; for (i = 0; i < gt_array_size(buckets); i++) { bucketptr = gt_array_get(buckets, i); if (bucketptr->length > *maxbucketlength) *maxbucketlength = bucketptr->length; } gt_assert(sum_of_bucket_lengths_equals_num_of_matches(buckets, gt_array_size(matches))); }
bool gt_ranges_do_not_overlap(const GtArray *ranges) { GtUword i; gt_assert(ranges && gt_array_size(ranges)); for (i = 1; i < gt_array_size(ranges); i++) { if (gt_range_overlap(gt_array_get(ranges, i-1), gt_array_get(ranges, i))) return false; } return true; }
bool agn_unit_test_success(AgnUnitTest *test) { agn_assert(gt_array_size(test->results) > 0); GtUword i; for(i = 0; i < gt_array_size(test->results); i++) { UnitTestResult *result = gt_array_get(test->results, i); if(!result->success) return false; } return true; }
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v) { if(gt_array_size(v->cds) == 0) return; const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID"); unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna); GtStrand strand = gt_feature_node_get_strand(v->mrna); GtRange stoprange; GtUword threeprimeindex = gt_array_size(v->cds) - 1; GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.start = stoprange.end - 2; if(strand == GT_STRAND_REVERSE) { threeprimesegment = gt_array_get(v->cds, 0); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.end = stoprange.start + 2; } if(gt_array_size(v->stops) > 1) { gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid, ln, gt_array_size(v->starts)); } else if(gt_array_size(v->stops) == 1) { GtGenomeNode **codon = gt_array_get(v->stops, 0); GtRange testrange = gt_genome_node_get_range(*codon); if(gt_range_compare(&stoprange, &testrange) != 0) { gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does " "not match explicitly provided stop codon [%lu, %lu] for " "mRNA '%s'", stoprange.start, stoprange.end, testrange.start, testrange.end, mrnaid); } } else // agn_assert(gt_array_size(v->stops) == 0) { GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna); GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon", stoprange.start, stoprange.end, strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source); GtFeatureNode *cf = (GtFeatureNode *)codonfeature; gt_feature_node_add_child(v->mrna, cf); gt_array_add(v->stops, cf); } }
static int get_next_free_line(GtTrack *track, GtLine **result, GtBlock *block, GtError *err) { unsigned long i; GtLine* line; int had_err = 0; bool is_occupied; gt_assert(track); /* find unoccupied line -- may need optimisation */ for (i = 0; i < gt_array_size(track->lines); i++) { line = *(GtLine**) gt_array_get(track->lines, i); had_err = gt_line_breaker_line_is_occupied(track->lb, &is_occupied, line, block, err); if (had_err) break; if (!is_occupied) { *result = line; return 0; } } /* all lines are occupied, we need o create a new one */ if (!had_err) { /* if line limit is hit, do not create any more lines! */ if (track->max_num_lines != GT_UNDEF_ULONG && gt_array_size(track->lines) == track->max_num_lines) { track->discarded_blocks++; *result = NULL; } /* make sure there is only one line if 'split_lines' is set to false */ if (!track->split) { if (gt_array_size(track->lines) < 1) { line = gt_line_new(); gt_array_add(track->lines, line); } else line = *(GtLine**) gt_array_get(track->lines, 0); gt_assert(gt_array_size(track->lines) == 1); } else { line = gt_line_new(); gt_array_add(track->lines, line); } gt_assert(line); } *result = line; return had_err; }
static void infer_cds_visitor_infer_cds(AgnInferCDSVisitor *v) { GtFeatureNode **start_codon = NULL, **stop_codon = NULL; bool exonsexplicit = gt_array_size(v->exons) > 0; bool startcodon_check = gt_array_size(v->starts) == 1 && (start_codon = gt_array_get(v->starts, 0)) != NULL; bool stopcodon_check = gt_array_size(v->stops) == 1 && (stop_codon = gt_array_get(v->stops, 0)) != NULL; if(gt_array_size(v->cds) > 0) { return; } else if(!exonsexplicit || !startcodon_check || !stopcodon_check) { return; } GtRange left_codon_range, right_codon_range; left_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon); right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon); if(gt_feature_node_get_strand(v->mrna) == GT_STRAND_REVERSE) { left_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon); right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon); } GtUword i; for(i = 0; i < gt_array_size(v->exons); i++) { GtFeatureNode *exon = *(GtFeatureNode **)gt_array_get(v->exons, i); GtGenomeNode *exon_gn = (GtGenomeNode *)exon; GtRange exon_range = gt_genome_node_get_range(exon_gn); GtStrand exon_strand = gt_feature_node_get_strand(exon); GtRange cdsrange; bool exon_includes_cds = infer_cds_visitor_infer_range(&exon_range, &left_codon_range, &right_codon_range, &cdsrange); if(exon_includes_cds) { GtGenomeNode *cdsfeat; cdsfeat = gt_feature_node_new(gt_genome_node_get_seqid(exon_gn), "CDS", cdsrange.start, cdsrange.end, exon_strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)cdsfeat, v->source); gt_feature_node_add_child(v->mrna, (GtFeatureNode *)cdsfeat); gt_array_add(v->cds, cdsfeat); } } }
static void gv_test_introns_confirmed(AgnUnitTest *test) { GtGenomeNode *intron, *gap; GtStr *seqid = gt_str_new_cstr("chr"); GtArray *introns = gt_array_new( sizeof(GtGenomeNode *) ); intron = gt_feature_node_new(seqid, "intron", 1000, 1170, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 1225, 1305, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 1950, 2110, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 2545, 2655, GT_STRAND_REVERSE); gt_array_add(introns, intron); intron = gt_feature_node_new(seqid, "intron", 2800, 2950, GT_STRAND_REVERSE); gt_array_add(introns, intron); GtArray *gaps = gt_array_new( sizeof(GtGenomeNode *) ); double intcon = gaeval_visitor_introns_confirmed(introns, gaps); bool test1 = fabs(intcon - 0.0) < 0.0001; agn_unit_test_result(test, "introns confirmed (no gaps)", test1); gap = gt_feature_node_new(seqid, "match_gap", 1000, 1170, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 1225, 1302, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 1950, 2110, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 2575, 2655, GT_STRAND_REVERSE); gt_array_add(gaps, gap); gap = gt_feature_node_new(seqid, "match_gap", 2800, 2950, GT_STRAND_REVERSE); gt_array_add(gaps, gap); intcon = gaeval_visitor_introns_confirmed(introns, gaps); bool test2 = fabs(intcon - 0.6) < 0.0001; agn_unit_test_result(test, "introns confirmed (gaps)", test2); while(gt_array_size(introns) > 0) { intron = *(GtGenomeNode **)gt_array_pop(introns); gt_genome_node_delete(intron); } gt_array_delete(introns); while(gt_array_size(gaps) > 0) { gap = *(GtGenomeNode **)gt_array_pop(gaps); gt_genome_node_delete(gap); } gt_array_delete(gaps); gt_str_delete(seqid); }
static void enrich_chain(GthChain *chain, GtFragment *fragments, unsigned long num_of_fragments, bool comments, GtFile *outfp) { GtRange genomicrange, fragmentrange; GtArray *enrichment; unsigned long i; gt_assert(chain && fragments && num_of_fragments); if (comments) { gt_file_xprintf(outfp, "%c enrich global chain with the following " "forward ranges:\n",COMMENTCHAR); gt_file_xprintf(outfp, "%c ", COMMENTCHAR); gt_ranges_show(chain->forwardranges, outfp); } /* get genomic range of DP range */ genomicrange = chain_get_genomicrange(chain); enrichment = gt_array_new(sizeof (GtRange)); /* add each fragment which overlaps which DP range to the enrichment */ for (i = 0; i < num_of_fragments; i++) { fragmentrange.start = fragments[i].startpos2; fragmentrange.end = fragments[i].endpos2; if (gt_range_overlap(&genomicrange, &fragmentrange)) gt_array_add(enrichment, fragmentrange); } gt_assert(gt_array_size(enrichment)); /* sort the enrichment */ qsort(gt_array_get_space(enrichment), gt_array_size(enrichment), sizeof (GtRange), (GtCompare) gt_range_compare); /* reset the current DP range array */ gt_array_reset(chain->forwardranges); /* rebuild the DP range array which now includes the enrichment */ genomicrange = *(GtRange*) gt_array_get_first(enrichment); gt_array_add(chain->forwardranges, genomicrange); for (i = 1; i < gt_array_size(enrichment); i++) { genomicrange = *(GtRange*) gt_array_get(enrichment, i); if (genomicrange.start <= ((GtRange*) gt_array_get_last(chain->forwardranges))->end) { /* overlap found -> modify last range, if necessary */ if (((GtRange*) gt_array_get_last(chain->forwardranges))->end < genomicrange.end) { ((GtRange*) gt_array_get_last(chain->forwardranges))->end = genomicrange.end; } } else { /* save range */ gt_array_add(chain->forwardranges, genomicrange); } } gt_array_delete(enrichment); }
static int gff3_show_feature_node(GtFeatureNode *fn, void *data, GT_UNUSED GtError *err) { bool part_shown = false; GtGFF3Visitor *gff3_visitor = (GtGFF3Visitor*) data; GtArray *parent_features = NULL; ShowAttributeInfo info; GtUword i; GtStr *id; gt_error_check(err); gt_assert(fn && gff3_visitor); /* output leading part */ gt_gff3_output_leading(fn, gff3_visitor->outfp); /* show unique id part of attributes */ if ((id = gt_hashmap_get(gff3_visitor->feature_node_to_unique_id_str, fn))) { gt_file_xprintf(gff3_visitor->outfp, "%s=%s", GT_GFF_ID, gt_str_get(id)); part_shown = true; } /* show parent part of attributes */ parent_features = gt_hashmap_get(gff3_visitor->feature_node_to_id_array, fn); if (gt_array_size(parent_features)) { if (part_shown) gt_file_xfputc(';', gff3_visitor->outfp); gt_file_xprintf(gff3_visitor->outfp, "%s=", GT_GFF_PARENT); for (i = 0; i < gt_array_size(parent_features); i++) { if (i) gt_file_xfputc(',', gff3_visitor->outfp); gt_file_xprintf(gff3_visitor->outfp, "%s", *(char**) gt_array_get(parent_features, i)); } part_shown = true; } /* show missing part of attributes */ info.attribute_shown = &part_shown; info.outfp = gff3_visitor->outfp; gt_feature_node_foreach_attribute(fn, show_attribute, &info); /* show dot if no attributes have been shown */ if (!part_shown) gt_file_xfputc('.', gff3_visitor->outfp); /* show terminal newline */ gt_file_xfputc('\n', gff3_visitor->outfp); return 0; }
void agn_bron_kerbosch( GtArray *R, GtArray *P, GtArray *X, GtArray *cliques, bool skipsimplecliques ) { gt_assert(R != NULL && P != NULL && X != NULL && cliques != NULL); if(gt_array_size(P) == 0 && gt_array_size(X) == 0) { if(skipsimplecliques == false || gt_array_size(R) != 1) { GtUword i; AgnTranscriptClique *clique = agn_transcript_clique_new(); for(i = 0; i < gt_array_size(R); i++) { GtFeatureNode *transcript = *(GtFeatureNode **)gt_array_get(R, i); agn_transcript_clique_add(clique, transcript); } gt_array_add(cliques, clique); } } while(gt_array_size(P) > 0) { GtGenomeNode *v = *(GtGenomeNode **)gt_array_get(P, 0); // newR = R \union {v} GtArray *newR = agn_gt_array_copy(R, sizeof(GtGenomeNode *)); gt_array_add(newR, v); // newP = P \intersect N(v) GtArray *newP = agn_feature_neighbors(v, P); // newX = X \intersect N(v) GtArray *newX = agn_feature_neighbors(v, X); // Recursive call // agn_bron_kerbosch(R \union {v}, P \intersect N(v), X \intersect N(X)) agn_bron_kerbosch(newR, newP, newX, cliques, skipsimplecliques); // Delete temporary arrays just created gt_array_delete(newR); gt_array_delete(newP); gt_array_delete(newX); // P := P \ {v} gt_array_rem(P, 0); // X := X \union {v} gt_array_add(X, v); } }
static void split_cds_feature(GtFeatureNode *cds_feature, GtFeatureNode *fn) { GtArray *parents; unsigned long i; gt_assert(cds_feature && fn); /* find parents */ parents = find_cds_parents(cds_feature, fn); /* remove CDS feature */ gt_feature_node_remove_leaf(fn, cds_feature); /* add CDS feature to all parents */ for (i = 0; i < gt_array_size(parents); i++) { GtFeatureNode *parent = *(GtFeatureNode**) gt_array_get(parents, i); const char *id = gt_feature_node_get_attribute(parent, GT_GFF_ID); if (!i) { gt_feature_node_set_attribute(cds_feature, GT_GFF_PARENT, id); gt_feature_node_add_child(parent, cds_feature); } else { GtFeatureNode *new_cds = gt_feature_node_clone(cds_feature); gt_feature_node_set_attribute(new_cds, GT_GFF_PARENT, id); gt_feature_node_add_child(parent, new_cds); gt_genome_node_delete((GtGenomeNode*) cds_feature); } } gt_array_delete(parents); }
static void potentialintronspostpro(GtArray *intronstoprocess, unsigned long icdelta, unsigned long icminremintronlength) { GtArray *originalintrons; GtRange potintron; unsigned long i, potintronlength, minintronlength = 2 * icdelta + icminremintronlength; originalintrons = gt_array_new(sizeof (GtRange)); /* save all (potential) introns */ gt_array_add_array(originalintrons, intronstoprocess); /* reset introns to process */ gt_array_set_size(intronstoprocess, 0); /* store introns */ for (i = 0; i < gt_array_size(originalintrons); i++) { potintron = *(GtRange*) gt_array_get(originalintrons, i); potintronlength = potintron.end - potintron.start + 1; if (potintronlength >= minintronlength) { /* keep this intron (plus/minus intron deltas) that is, this intron is cut out later */ potintron.start += icdelta; potintron.end -= icdelta; gt_array_add(intronstoprocess, potintron); } /* else: skip this intron that is, this intron is not cut out later */ } gt_array_delete(originalintrons); }
static void push_features_as_table(lua_State *L, GtArray *features) { unsigned long i; if (features && gt_array_size(features)) { /* push table containing feature references onto the stack */ lua_newtable(L); for (i = 0; i < gt_array_size(features); i++) { lua_pushinteger(L, i+1); /* in Lua we index from 1 on */ gt_lua_genome_node_push(L, gt_genome_node_ref(*(GtGenomeNode**) gt_array_get(features, i))); lua_rawset(L, -3); } } else lua_pushnil(L); }