static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtGFF3Visitor *gff3_visitor; gt_error_check(err); gff3_visitor = gff3_visitor_cast(nv); gt_assert(nv && rn); gff3_version_string(nv); if (!gff3_visitor->outstr) { gt_file_xprintf(gff3_visitor->outfp, "%s %s "GT_WU" "GT_WU"\n", GT_GFF_SEQUENCE_REGION, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)), gt_genome_node_get_start((GtGenomeNode*) rn), gt_genome_node_get_end((GtGenomeNode*) rn)); } else { gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION); gt_str_append_cstr(gff3_visitor->outstr, " "); gt_str_append_cstr(gff3_visitor->outstr, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn))); gt_str_append_char(gff3_visitor->outstr, ' '); gt_str_append_ulong(gff3_visitor->outstr, gt_genome_node_get_start((GtGenomeNode*) rn)); gt_str_append_char(gff3_visitor->outstr, ' '); gt_str_append_ulong(gff3_visitor->outstr, gt_genome_node_get_end((GtGenomeNode*) rn)); gt_str_append_char(gff3_visitor->outstr, '\n'); } return 0; }
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSortStream *sort_stream; GtGenomeNode *node, *eofn; int had_err = 0; gt_error_check(err); sort_stream = gt_sort_stream_cast(ns); if (!sort_stream->sorted) { while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node, err)) && node) { if ((eofn = gt_eof_node_try_cast(node))) gt_genome_node_delete(eofn); /* get rid of EOF nodes */ else gt_array_add(sort_stream->nodes, node); } if (!had_err) { gt_genome_nodes_sort_stable(sort_stream->nodes); sort_stream->sorted = true; } } if (!had_err) { gt_assert(sort_stream->sorted); if (sort_stream->idx < gt_array_size(sort_stream->nodes)) { *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); sort_stream->idx++; /* join region nodes with the same sequence ID */ if (gt_region_node_try_cast(*gn)) { GtRange range_a, range_b; while (sort_stream->idx < gt_array_size(sort_stream->nodes)) { node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); if (!gt_region_node_try_cast(node) || gt_str_cmp(gt_genome_node_get_seqid(*gn), gt_genome_node_get_seqid(node))) { /* the next node is not a region node with the same ID */ break; } range_a = gt_genome_node_get_range(*gn); range_b = gt_genome_node_get_range(node); range_a = gt_range_join(&range_a, &range_b); gt_genome_node_set_range(*gn, &range_a); gt_genome_node_delete(node); sort_stream->idx++; } } return 0; } } if (!had_err) { gt_array_reset(sort_stream->nodes); *gn = NULL; } return had_err; }
void gt_region_node_consolidate(GtRegionNode *rn_a, GtRegionNode *rn_b) { GtRange range_a, range_b; gt_assert(rn_a); gt_assert(rn_b); gt_assert(!gt_str_cmp(gt_genome_node_get_seqid((GtGenomeNode*) rn_a), gt_genome_node_get_seqid((GtGenomeNode*) rn_b))); range_a = gt_genome_node_get_range((GtGenomeNode*) rn_a); range_b = gt_genome_node_get_range((GtGenomeNode*) rn_b); range_a = gt_range_join(&range_a, &range_b); gt_genome_node_set_range((GtGenomeNode*) rn_a, &range_a); }
static int pdom_hit_attach_gff3(GtPdomModel *model, GtPdomModelHit *hit, void *data, GT_UNUSED GtError *err) { unsigned long i; GtRange rng; GtLTRdigestStream *ls = (GtLTRdigestStream *) data; GtStrand strand; gt_assert(model && hit); strand = gt_pdom_model_hit_get_best_strand(hit); /* do not use the hits on the non-predicted strand -- maybe identify nested elements ? */ if (strand != gt_feature_node_get_strand(ls->element.mainnode)) return 0; for (i=0;i<gt_pdom_model_hit_best_chain_length(hit);i++) { GtGenomeNode *gf; GtStr *alignmentstring, *aastring; GtPdomSingleHit *singlehit; GtPhase frame; singlehit = gt_pdom_model_hit_best_single_hit(hit, i); alignmentstring = gt_str_new(); aastring = gt_str_new(); frame = gt_pdom_single_hit_get_phase(singlehit); rng = gt_pdom_single_hit_get_range(singlehit); gt_pdom_single_hit_format_alignment(singlehit, GT_ALIWIDTH, alignmentstring); gt_pdom_single_hit_get_aaseq(singlehit, aastring); rng.start++; rng.end++; /* GFF3 is 1-based */ gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode), GT_PDOM_TYPE, rng.start, rng.end, strand); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment", alignmentstring, (GtFree) gt_str_delete); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq", aastring, (GtFree) gt_str_delete); gt_feature_node_set_source((GtFeatureNode*) gf, ls->ltrdigest_tag); gt_feature_node_set_score((GtFeatureNode*) gf, gt_pdom_single_hit_get_evalue(singlehit)); gt_feature_node_set_phase((GtFeatureNode*) gf, frame); if (gt_pdom_model_get_name(model)) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "name", gt_pdom_model_get_name(model)); } if (gt_pdom_model_get_acc(model)) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "id", gt_pdom_model_get_acc(model)); } gt_feature_node_add_child(ls->element.mainnode, (GtFeatureNode*) gf); } return 0; }
static int md5_to_seqid(GtGenomeNode *gn, GtRegionMapping *region_mapping, GtError *err) { GtStr *seqid; int had_err = 0; gt_error_check(err); gt_assert(gn && region_mapping); seqid = gt_genome_node_get_seqid(gn); if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { /* seqid is a MD5 seqid -> change id */ GtStr *desc = gt_str_new(); had_err = gt_region_mapping_get_description(region_mapping, desc, seqid, err); if (!had_err) { GtStr *new_seqid = gt_str_new(); gt_regular_seqid_save(new_seqid, desc); if (gt_feature_node_try_cast(gn)) { M2IChangeSeqidInfo info; info.new_seqid = new_seqid; info.region_mapping = region_mapping; had_err = gt_feature_node_traverse_children((GtFeatureNode*) gn, &info, m2i_change_seqid, true, err); } else gt_genome_node_change_seqid(gn, new_seqid); gt_str_delete(new_seqid); } gt_str_delete(desc); } return had_err; }
static int gt_regioncov_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GT_UNUSED GtError *err) { GtRange *old_range_ptr, old_range, new_range; GtArray *ranges; GtRegionCovVisitor *regioncov_visitor; gt_error_check(err); regioncov_visitor = gt_regioncov_visitor_cast(nv); ranges = gt_hashmap_get(regioncov_visitor->region2rangelist, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) fn))); gt_assert(ranges); new_range = gt_genome_node_get_range((GtGenomeNode*) fn); if (!gt_array_size(ranges)) gt_array_add(ranges, new_range); else { old_range_ptr = gt_array_get_last(ranges); old_range = *old_range_ptr; old_range.end += regioncov_visitor->max_feature_dist; if (gt_range_overlap(&old_range, &new_range)) { old_range_ptr->end = MAX(old_range_ptr->end, new_range.end); } else gt_array_add(ranges, new_range); } return 0; }
static int select_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtSelectVisitor *select_visitor; gt_error_check(err); select_visitor = select_visitor_cast(nv); if (!gt_str_length(select_visitor->seqid) || /* no seqid was specified */ !gt_str_cmp(select_visitor->seqid, /* or seqids are equal */ gt_genome_node_get_seqid((GtGenomeNode*) rn))) { if (select_visitor->contain_range.start != GT_UNDEF_ULONG) { GtRange range = gt_genome_node_get_range((GtGenomeNode*) rn); if (gt_range_overlap(&range, &select_visitor->contain_range)) { /* an overlapping contain range was defined -> update range */ range.start = MAX(range.start, select_visitor->contain_range.start); range.end = MIN(range.end, select_visitor->contain_range.end); gt_genome_node_set_range((GtGenomeNode*) rn, &range); gt_queue_add(select_visitor->node_buffer, rn); } else /* contain range does not overlap with <rn> range -> delete <rn> */ gt_genome_node_delete((GtGenomeNode*) rn); } else gt_queue_add(select_visitor->node_buffer, rn); } else gt_genome_node_delete((GtGenomeNode*) rn); return 0; }
static int feature_index_lua_add_feature_node(lua_State *L) { GtFeatureIndex **fi; GtGenomeNode **gn; GtFeatureNode *fn; GtStr *seqid; GtError *err; bool has_seqid; gt_assert(L); fi = check_feature_index(L, 1); gn = check_genome_node(L, 2); fn = gt_feature_node_cast(*gn); luaL_argcheck(L, fn, 2, "not a feature node"); seqid = gt_genome_node_get_seqid(*gn); luaL_argcheck(L, seqid, 2, "feature does not have a sequence id"); err = gt_error_new(); if (gt_feature_index_has_seqid(*fi, &has_seqid, gt_str_get(seqid), err)) return gt_lua_error(L, err); gt_error_delete(err); luaL_argcheck(L, has_seqid, 2, "feature index does not contain corresponding sequence region"); err = gt_error_new(); if (gt_feature_index_add_feature_node(*fi, fn, err)) return gt_lua_error(L, err); gt_error_delete(err); return 0; }
double agn_calc_edit_distance(GtFeatureNode *t1, GtFeatureNode *t2) { AgnTranscriptClique *clique1 = agn_transcript_clique_new(); agn_transcript_clique_add(clique1, t1); AgnTranscriptClique *clique2 = agn_transcript_clique_new(); agn_transcript_clique_add(clique2, t2); GtGenomeNode *gn1 = (GtGenomeNode *)t1; GtRange r1 = gt_genome_node_get_range(gn1); GtStr *seqid = gt_genome_node_get_seqid(gn1); GtGenomeNode *gn2 = (GtGenomeNode *)t2; GtRange r2 = gt_genome_node_get_range(gn2); GtRange local_range = r1; if(r2.start < r1.start) local_range.start = r2.start; if(r2.end > r1.end) local_range.end = r2.end; AgnCliquePair *pair = agn_clique_pair_new(gt_str_get(seqid), clique1, clique2, &local_range); agn_clique_pair_build_model_vectors(pair); agn_clique_pair_comparative_analysis(pair); double ed = agn_clique_pair_get_edit_distance(pair); agn_transcript_clique_delete(clique1); agn_transcript_clique_delete(clique2); agn_clique_pair_delete(pair); return ed; }
void gt_gff3_output_leading_str(GtFeatureNode *fn, GtStr *outstr) { GtGenomeNode *gn; gt_assert(fn && outstr); gn = (GtGenomeNode*) fn; gt_str_append_str(outstr, gt_genome_node_get_seqid(gn)); gt_str_append_char(outstr, '\t'); gt_str_append_cstr(outstr, gt_feature_node_get_source(fn)); gt_str_append_char(outstr, '\t'); gt_str_append_cstr(outstr, gt_feature_node_get_type(fn)); gt_str_append_char(outstr, '\t'); gt_str_append_uword(outstr, gt_genome_node_get_start(gn)); gt_str_append_char(outstr, '\t'); gt_str_append_uword(outstr, gt_genome_node_get_end(gn)); gt_str_append_char(outstr, '\t'); if (gt_feature_node_score_is_defined(fn)) { char buf[BUFSIZ]; (void) snprintf(buf, BUFSIZ, "%.3g", gt_feature_node_get_score(fn)); gt_str_append_cstr(outstr, buf); } else gt_str_append_char(outstr, '.'); gt_str_append_char(outstr, '\t'); gt_str_append_char(outstr, GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]); gt_str_append_char(outstr, '\t'); gt_str_append_char(outstr, GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]); gt_str_append_char(outstr, '\t'); }
static int gt_ltrdigest_pdom_visitor_attach_hit(GtLTRdigestPdomVisitor *lv, GtHMMERModelHit *modelhit, GtHMMERSingleHit *singlehit) { GT_UNUSED GtUword i; GtGenomeNode *gf; int had_err = 0; GtRange rrng; gt_assert(lv && singlehit); rrng = gt_ltrdigest_pdom_visitor_coords(lv, singlehit); if (gt_array_size(singlehit->chains) > 0 || lv->output_all_chains) { char buf[32]; gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) lv->ltr_retrotrans), gt_ft_protein_match, rrng.start, rrng.end, singlehit->strand); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment", gt_str_ref(singlehit->alignment), (GtFree) gt_str_delete); gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq", gt_str_ref(singlehit->aastring), (GtFree) gt_str_delete); gt_feature_node_set_source((GtFeatureNode*) gf, lv->tag); gt_feature_node_set_score((GtFeatureNode*) gf, (float) singlehit->evalue); (void) snprintf(buf, (size_t) 32, "%d", (int) singlehit->frame); gt_feature_node_add_attribute((GtFeatureNode*) gf, "reading_frame", buf); if (modelhit->modelname != NULL) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "name", modelhit->modelname); } if (gt_array_size(singlehit->chains) > 1UL && lv->output_all_chains) { GtStr *buffer; GtUword j; gt_assert(singlehit->chains != NULL); buffer = gt_str_new(); for (j = 0UL; j < gt_array_size(singlehit->chains); j++) { gt_str_append_cstr(buffer, modelhit->modelname); gt_str_append_char(buffer, ':'); gt_str_append_ulong(buffer, *(GtUword*) gt_array_get(singlehit->chains, j)); if (j != gt_array_size(singlehit->chains) - 1) { gt_str_append_char(buffer, ','); } } gt_feature_node_set_attribute((GtFeatureNode*) gf, "chains", gt_str_get(buffer)); gt_str_delete(buffer); } gt_feature_node_add_child(lv->ltr_retrotrans, (GtFeatureNode*) gf); } gt_array_delete(singlehit->chains); singlehit->chains = NULL; return had_err; }
static void build_key(GtStr *key, GtFeatureNode *feature, GtStr *target_id) { gt_assert(key && feature && target_id); gt_str_reset(key); gt_str_append_str(key, gt_genome_node_get_seqid((GtGenomeNode*) feature)); gt_str_append_char(key, '\t'); /* cannot occur in seqid or target_id */ gt_str_append_str(key, target_id); }
static int genome_node_lua_get_seqid(lua_State *L) { GtStr *seqid; GtGenomeNode **gn = check_genome_node(L, 1); if ((seqid = gt_genome_node_get_seqid(*gn))) lua_pushstring(L, gt_str_get(seqid)); else lua_pushnil(L); return 1; }
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v) { if(gt_array_size(v->cds) == 0) return; const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID"); unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna); GtStrand strand = gt_feature_node_get_strand(v->mrna); GtRange stoprange; GtUword threeprimeindex = gt_array_size(v->cds) - 1; GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.start = stoprange.end - 2; if(strand == GT_STRAND_REVERSE) { threeprimesegment = gt_array_get(v->cds, 0); stoprange = gt_genome_node_get_range(*threeprimesegment); stoprange.end = stoprange.start + 2; } if(gt_array_size(v->stops) > 1) { gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid, ln, gt_array_size(v->starts)); } else if(gt_array_size(v->stops) == 1) { GtGenomeNode **codon = gt_array_get(v->stops, 0); GtRange testrange = gt_genome_node_get_range(*codon); if(gt_range_compare(&stoprange, &testrange) != 0) { gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does " "not match explicitly provided stop codon [%lu, %lu] for " "mRNA '%s'", stoprange.start, stoprange.end, testrange.start, testrange.end, mrnaid); } } else // agn_assert(gt_array_size(v->stops) == 0) { GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna); GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon", stoprange.start, stoprange.end, strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source); GtFeatureNode *cf = (GtFeatureNode *)codonfeature; gt_feature_node_add_child(v->mrna, cf); gt_array_add(v->stops, cf); } }
static void infer_cds_visitor_infer_cds(AgnInferCDSVisitor *v) { GtFeatureNode **start_codon = NULL, **stop_codon = NULL; bool exonsexplicit = gt_array_size(v->exons) > 0; bool startcodon_check = gt_array_size(v->starts) == 1 && (start_codon = gt_array_get(v->starts, 0)) != NULL; bool stopcodon_check = gt_array_size(v->stops) == 1 && (stop_codon = gt_array_get(v->stops, 0)) != NULL; if(gt_array_size(v->cds) > 0) { return; } else if(!exonsexplicit || !startcodon_check || !stopcodon_check) { return; } GtRange left_codon_range, right_codon_range; left_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon); right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon); if(gt_feature_node_get_strand(v->mrna) == GT_STRAND_REVERSE) { left_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon); right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon); } GtUword i; for(i = 0; i < gt_array_size(v->exons); i++) { GtFeatureNode *exon = *(GtFeatureNode **)gt_array_get(v->exons, i); GtGenomeNode *exon_gn = (GtGenomeNode *)exon; GtRange exon_range = gt_genome_node_get_range(exon_gn); GtStrand exon_strand = gt_feature_node_get_strand(exon); GtRange cdsrange; bool exon_includes_cds = infer_cds_visitor_infer_range(&exon_range, &left_codon_range, &right_codon_range, &cdsrange); if(exon_includes_cds) { GtGenomeNode *cdsfeat; cdsfeat = gt_feature_node_new(gt_genome_node_get_seqid(exon_gn), "CDS", cdsrange.start, cdsrange.end, exon_strand); if(v->source) gt_feature_node_set_source((GtFeatureNode *)cdsfeat, v->source); gt_feature_node_add_child(v->mrna, (GtFeatureNode *)cdsfeat); gt_array_add(v->cds, cdsfeat); } } }
static int create_block_features(GtBEDParser *bed_parser, GtFeatureNode *fn, GtUword block_count, GtSplitter *size_splitter, GtSplitter *start_splitter, GtIO *bed_file, GtError *err) { GtUword i; int had_err = 0; gt_assert(fn && block_count && size_splitter && start_splitter); gt_assert(gt_splitter_size(size_splitter) == block_count); gt_assert(gt_splitter_size(start_splitter) == block_count); for (i = 0; !had_err && i < block_count; i++) { GtUword block_size, block_start, start, end; GtGenomeNode *block; const char *name; if (gt_parse_uword(&block_size, gt_splitter_get_token(size_splitter, i))) { gt_error_set(err, "file \"%s\": line "GT_WU": could not parse blockSize '%s'", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), gt_splitter_get_token(size_splitter, i)); had_err = -1; } if (!had_err && gt_parse_uword(&block_start, gt_splitter_get_token(start_splitter, i))) { gt_error_set(err, "file \"%s\": line "GT_WU": could not parse blockStart " "'%s'", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), gt_splitter_get_token(start_splitter, i)); had_err = -1; } if (!had_err) { start = gt_genome_node_get_start((GtGenomeNode*) fn) + block_start; end = start + block_size - 1; block = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) fn), bed_parser->block_type ? bed_parser->block_type : BED_BLOCK_TYPE, start, end, gt_feature_node_get_strand(fn)); if ((name = gt_feature_node_get_attribute(fn, GT_GFF_NAME))) { gt_feature_node_add_attribute((GtFeatureNode*) block, GT_GFF_NAME, name); } gt_feature_node_set_score((GtFeatureNode*) block, gt_feature_node_get_score(fn)); gt_feature_node_set_strand((GtFeatureNode*) block, gt_feature_node_get_strand(fn)); gt_feature_node_add_child(fn, (GtFeatureNode*) block); } } return had_err; }
static void pbs_attach_results_to_gff3(GtPBSResults *results, GtLTRElement *element, GtStrand *canonical_strand, GtStr *tag) { GtRange pbs_range; GtGenomeNode *gf; unsigned long i = 0; char buffer[BUFSIZ]; GtPBSHit* hit = gt_pbs_results_get_ranked_hit(results, i++); if (*canonical_strand == GT_STRAND_UNKNOWN) *canonical_strand = gt_pbs_hit_get_strand(hit); else { /* do we have to satisfy a strand constraint? * then find best-scoring PBS on the given canonical strand */ while (gt_pbs_hit_get_strand(hit) != *canonical_strand && i < gt_pbs_results_get_number_of_hits(results)) { gt_log_log("dropping PBS because of nonconsistent strand: %s\n", gt_feature_node_get_attribute(element->mainnode, "ID")); hit = gt_pbs_results_get_ranked_hit(results, i++); } /* if there is none, do not report a PBS */ if (gt_pbs_hit_get_strand(hit) != *canonical_strand) return; } pbs_range = gt_pbs_hit_get_coords(hit); pbs_range.start++; pbs_range.end++; /* GFF3 is 1-based */ gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) element->mainnode), GT_PBS_TYPE, pbs_range.start, pbs_range.end, gt_pbs_hit_get_strand(hit)); gt_feature_node_set_source((GtFeatureNode*) gf, tag); gt_feature_node_set_score((GtFeatureNode*) gf, (float) gt_pbs_hit_get_score(hit)); if (gt_pbs_hit_get_trna(hit) != NULL) { gt_feature_node_add_attribute((GtFeatureNode*) gf, "trna", gt_pbs_hit_get_trna(hit)); } gt_feature_node_set_strand(element->mainnode, gt_pbs_hit_get_strand(hit)); (void) snprintf(buffer, BUFSIZ-1, "%lu", gt_pbs_hit_get_tstart(hit)); gt_feature_node_add_attribute((GtFeatureNode*) gf, "trnaoffset", buffer); (void) snprintf(buffer, BUFSIZ-1, "%lu", gt_pbs_hit_get_offset(hit)); gt_feature_node_add_attribute((GtFeatureNode*) gf, "pbsoffset", buffer); (void) snprintf(buffer, BUFSIZ-1, "%lu", gt_pbs_hit_get_edist(hit)); gt_feature_node_add_attribute((GtFeatureNode*) gf, "edist", buffer); gt_feature_node_add_child(element->mainnode, (GtFeatureNode*) gf); }
static int gt_regioncov_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtRegionCovVisitor *regioncov_visitor; GtArray *rangelist; gt_error_check(err); regioncov_visitor = gt_regioncov_visitor_cast(nv); rangelist = gt_array_new(sizeof (GtRange)); gt_hashmap_add(regioncov_visitor->region2rangelist, gt_cstr_dup(gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn))), rangelist); return 0; }
static int select_visitor_sequence_node(GtNodeVisitor *nv, GtSequenceNode *sn, GT_UNUSED GtError *err) { GtSelectVisitor *select_visitor; gt_error_check(err); select_visitor = select_visitor_cast(nv); if (!gt_str_length(select_visitor->seqid) || /* no seqid was specified */ !gt_str_cmp(select_visitor->seqid, /* or seqids are equal */ gt_genome_node_get_seqid((GtGenomeNode*) sn))) { gt_queue_add(select_visitor->node_buffer, sn); } else gt_genome_node_delete((GtGenomeNode*) sn); return 0; }
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtGFF3Visitor *gff3_visitor; gt_error_check(err); gff3_visitor = gff3_visitor_cast(nv); gt_assert(nv && rn); gff3_version_string(nv); gt_file_xprintf(gff3_visitor->outfp, "%s %s %lu %lu\n", GT_GFF_SEQUENCE_REGION, gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)), gt_genome_node_get_start((GtGenomeNode*) rn), gt_genome_node_get_end((GtGenomeNode*) rn)); return 0; }
static int feature_index_lua_add_feature_node(lua_State *L) { GtFeatureIndex **fi; GtGenomeNode **gn; GtFeatureNode *gf; GtStr *seqid; gt_assert(L); fi = check_feature_index(L, 1); gn = check_genome_node(L, 2); gf = gt_genome_node_cast(gt_feature_node_class(), *gn); luaL_argcheck(L, gf, 2, "not a feature node"); seqid = gt_genome_node_get_seqid(*gn); luaL_argcheck(L, seqid, 2, "feature does not have a sequence id"); luaL_argcheck(L, gt_feature_index_has_seqid(*fi, gt_str_get(seqid)), 2, "feature index does not contain corresponding sequence region"); gt_feature_index_add_feature_node(*fi, gf); return 0; }
void gt_gff3_output_leading(GtFeatureNode *fn, GtFile *outfp) { GtGenomeNode *gn; gt_assert(fn); gn = (GtGenomeNode*) fn; gt_file_xprintf(outfp, "%s\t%s\t%s\t"GT_WU"\t"GT_WU"\t", gt_str_get(gt_genome_node_get_seqid(gn)), gt_feature_node_get_source(fn), gt_feature_node_get_type(fn), gt_genome_node_get_start(gn), gt_genome_node_get_end(gn)); if (gt_feature_node_score_is_defined(fn)) gt_file_xprintf(outfp, "%.3g", gt_feature_node_get_score(fn)); else gt_file_xfputc('.', outfp); gt_file_xprintf(outfp, "\t%c\t%c\t", GT_STRAND_CHARS[gt_feature_node_get_strand(fn)], GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]); }
static void construct_thick_feature(GtBEDParser *bed_parser, GtFeatureNode *fn, GtRange range) { GtGenomeNode *thick_feature; const char *name; gt_assert(fn); thick_feature = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) fn), bed_parser->thick_feature_type ? bed_parser->thick_feature_type : BED_THICK_FEATURE_TYPE, range.start, range.end, gt_feature_node_get_strand(fn)); if ((name = gt_feature_node_get_attribute(fn, "Name"))) gt_feature_node_add_attribute((GtFeatureNode*) thick_feature, "Name", name); gt_feature_node_set_score((GtFeatureNode*) thick_feature, gt_feature_node_get_score(fn)); gt_feature_node_set_strand((GtFeatureNode*) thick_feature, gt_feature_node_get_strand(fn)); gt_feature_node_add_child(fn, (GtFeatureNode*) thick_feature); }
static void orf_attach_results_to_gff3(GtFeatureNode *gf, GtRange orf_rng, unsigned int orf_frame, GtStrand strand, GT_UNUSED GtError *err) { GtGenomeNode *child; GtStr *tag; tag = gt_str_new_cstr(GT_ORF_FINDER_TAG); orf_rng.start++; orf_rng.end++; GtFeatureNodeIterator *gfi; GtFeatureNode *curnode = NULL, *parent_node = NULL; GtRange gfi_range; char frame_buf[3]; sprintf(frame_buf, "%d", orf_frame); gfi = gt_feature_node_iterator_new(gf); while ((curnode = gt_feature_node_iterator_next(gfi))) { if (strcmp(gt_feature_node_get_type(curnode), (const char*) GT_ORF_TYPE) != 0) { gfi_range = gt_genome_node_get_range((GtGenomeNode*) curnode); if (gt_range_contains(&gfi_range, &orf_rng)) { parent_node = curnode; } } } if (parent_node) { child = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) gf), GT_ORF_TYPE, orf_rng.start, orf_rng.end, strand); gt_feature_node_set_source((GtFeatureNode*) child, tag); gt_feature_node_set_attribute((GtFeatureNode*) child, "frame", frame_buf); gt_feature_node_add_child(parent_node,(GtFeatureNode*) child); } gt_str_delete(tag); gt_feature_node_iterator_delete(gfi); }
static double gaeval_visitor_calculate_coverage(AgnGaevalVisitor *v, GtFeatureNode *genemodel, GtError *error) { agn_assert(v && genemodel); GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)genemodel); GtRange mrna_range = gt_genome_node_get_range((GtGenomeNode *)genemodel); GtArray *overlapping = gt_array_new( sizeof(GtFeatureNode *) ); bool hasseqid; gt_feature_index_has_seqid(v->alignments, &hasseqid, gt_str_get(seqid),error); if(hasseqid) { gt_feature_index_get_features_for_range(v->alignments, overlapping, gt_str_get(seqid), &mrna_range, error); } GtArray *exon_coverage = gt_array_new( sizeof(GtRange) ); GtUword i; for(i = 0; i < gt_array_size(overlapping); i++) { GtFeatureNode *alignment = *(GtFeatureNode **)gt_array_get(overlapping, i); GtArray *covered_parts = gaeval_visitor_intersect((GtGenomeNode*)genemodel, (GtGenomeNode*)alignment); if(covered_parts != NULL) { GtArray *temp = gaeval_visitor_union(exon_coverage, covered_parts); gt_array_delete(covered_parts); gt_array_delete(exon_coverage); exon_coverage = temp; } } double coverage = gaeval_visitor_coverage_resolve(genemodel, exon_coverage); gt_array_delete(exon_coverage); gt_array_delete(overlapping); return coverage; }
static void ppt_attach_results_to_gff3(GtPPTResults *results, GtLTRElement *element, GtStrand *canonical_strand, GtStr *tag) { GtRange ppt_range; unsigned long i = 0; GtGenomeNode *gf; GtPPTHit* hit = gt_ppt_results_get_ranked_hit(results, i++); if (*canonical_strand == GT_STRAND_UNKNOWN) *canonical_strand = gt_ppt_hit_get_strand(hit); else { /* find best-scoring PPT on the given canonical strand */ while (gt_ppt_hit_get_strand(hit) != *canonical_strand && i < gt_ppt_results_get_number_of_hits(results)) { gt_log_log("dropping PPT because of nonconsistent strand: %s\n", gt_feature_node_get_attribute(element->mainnode, "ID")); hit = gt_ppt_results_get_ranked_hit(results, i++); } /* if there is none, do not report a PPT */ if (gt_ppt_hit_get_strand(hit) != *canonical_strand) return; } ppt_range = gt_ppt_hit_get_coords(hit); ppt_range.start++; ppt_range.end++; /* GFF3 is 1-based */ gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) element->mainnode), GT_PPT_TYPE, ppt_range.start, ppt_range.end, gt_ppt_hit_get_strand(hit)); gt_feature_node_set_source((GtFeatureNode*) gf, tag); gt_feature_node_set_strand(element->mainnode, gt_ppt_hit_get_strand(hit)); gt_feature_node_add_child(element->mainnode, (GtFeatureNode*) gf); }
static int extract_join_feature(GtGenomeNode *gn, const char *type, GtRegionMapping *region_mapping, GtStr *sequence, bool *reverse_strand, bool *first_child_of_type_seen, GtPhase *phase, GtError *err) { char *outsequence; GtFeatureNode *fn; GtRange range; int had_err = 0; gt_error_check(err); fn = gt_feature_node_cast(gn); gt_assert(fn); if (gt_feature_node_has_type(fn, type)) { if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { *reverse_strand = true; *phase = gt_feature_node_get_phase(fn); } else { if (!(*first_child_of_type_seen)) { *first_child_of_type_seen = true; *phase = gt_feature_node_get_phase(fn); } else *phase = GT_PHASE_UNDEFINED; } range = gt_genome_node_get_range(gn); had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); } } return had_err; }
static int add_ids_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtAddIDsVisitor *aiv; const char *seqid; int had_err = 0; gt_error_check(err); aiv = add_ids_visitor_cast(nv); seqid = gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)); if (gt_hashmap_get(aiv->undefined_sequence_regions, seqid)) { gt_error_set(err, "genome feature with id \"%s\" has been defined before " "the corresponding \"%s\" definition on line %u in file " "\"%s\"", seqid, GT_GFF_SEQUENCE_REGION, gt_genome_node_get_line_number((GtGenomeNode*) rn), gt_genome_node_get_filename((GtGenomeNode*) rn)); had_err = -1; } if (!had_err) { if (!gt_cstr_table_get(aiv->defined_seqids, seqid)) gt_cstr_table_add(aiv->defined_seqids, seqid); gt_queue_add(aiv->node_buffer, rn); } return had_err; }
static int inter_feature_in_children(GtFeatureNode *current_feature, void *data, GT_UNUSED GtError *err) { GtInterFeatureVisitor *aiv = (GtInterFeatureVisitor*) data; GtFeatureNode *inter_node; GtRange previous_range, current_range, inter_range; GtStrand previous_strand, /*current_strand, */inter_strand; GtStr *parent_seqid; gt_error_check(err); gt_assert(current_feature); if (gt_feature_node_has_type(current_feature, aiv->outside_type)) { if (aiv->previous_feature) { /* determine inter range */ previous_range = gt_genome_node_get_range((GtGenomeNode*) aiv->previous_feature); current_range = gt_genome_node_get_range((GtGenomeNode*) current_feature); if (previous_range.end >= current_range.start) { gt_warning("overlapping boundary features " GT_WU "-" GT_WU " and " GT_WU "-" GT_WU ", " "not placing '%s' inter-feature", previous_range.start, previous_range.end, current_range.start, current_range.end, aiv->inter_type); return 0; } if (current_range.start - previous_range.end < 2) { gt_warning("no space for inter-feature '%s' between " GT_WU " and " GT_WU, aiv->inter_type, previous_range.end, current_range.start); return 0; } inter_range.start = previous_range.end + 1; inter_range.end = current_range.start - 1; /* determine inter strand */ previous_strand = gt_feature_node_get_strand(aiv->previous_feature); /*current_strand = gt_feature_node_get_strand(current_feature);*/ gt_assert(previous_strand == gt_feature_node_get_strand(current_feature)); inter_strand = previous_strand; /* determine sequence id */ parent_seqid = gt_genome_node_get_seqid((GtGenomeNode*) aiv->parent_feature); gt_assert(!gt_str_cmp(parent_seqid, gt_genome_node_get_seqid((GtGenomeNode*) aiv->previous_feature))); gt_assert(!gt_str_cmp(parent_seqid, gt_genome_node_get_seqid((GtGenomeNode*) current_feature))); /* create inter feature */ inter_node = (GtFeatureNode*) gt_feature_node_new(parent_seqid, aiv->inter_type, inter_range.start, inter_range.end, inter_strand); gt_feature_node_add_child(aiv->parent_feature, inter_node); } aiv->previous_feature = current_feature; } return 0; }
static int select_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GT_UNUSED GtError *err) { GtSelectVisitor *fv; bool filter_node = false; gt_error_check(err); fv = select_visitor_cast(nv); fv->current_feature++; if ((!gt_str_length(fv->seqid) || /* no seqid was specified or seqids are equal */ !gt_str_cmp(fv->seqid, gt_genome_node_get_seqid((GtGenomeNode*) fn))) && (!gt_str_length(fv->source) || /* no source was specified or sources are equal */ !strcmp(gt_str_get(fv->source), gt_feature_node_get_source(fn)))) { GtRange range = gt_genome_node_get_range((GtGenomeNode*) fn); /* enforce maximum gene length */ /* XXX: we (spuriously) assume that genes are always root nodes */ if (fn && gt_feature_node_has_type(fn, gt_ft_gene)) { if (fv->max_gene_length != GT_UNDEF_ULONG && gt_range_length(&range) > fv->max_gene_length) { filter_node = true; } else if (fv->max_gene_num != GT_UNDEF_ULONG && fv->gene_num >= fv->max_gene_num) { filter_node = true; } else if (fv->min_gene_score != GT_UNDEF_DOUBLE && gt_feature_node_get_score(fn) < fv->min_gene_score) { filter_node = true; } else if (fv->max_gene_score != GT_UNDEF_DOUBLE && gt_feature_node_get_score(fn) > fv->max_gene_score) { filter_node = true; } else if (fv->feature_num != GT_UNDEF_ULONG && fv->feature_num != fv->current_feature) { filter_node = true; } if (!filter_node) fv->gene_num++; /* gene passed filter */ } } else filter_node = true; if (!filter_node) filter_node = filter_contain_range(fn, fv->contain_range); if (!filter_node) filter_node = filter_overlap_range(fn, fv->overlap_range); if (!filter_node) filter_node = filter_strand(fn, fv->strand); if (!filter_node) filter_node = filter_targetstrand(fn, fv->targetstrand); if (!filter_node) filter_node = filter_has_CDS(fn, fv->has_CDS); if (!filter_node) filter_node = filter_min_average_ssp(fn, fv->min_average_splice_site_prob); if (filter_node) gt_genome_node_delete((GtGenomeNode*) fn); else gt_queue_add(fv->node_buffer, fn); return 0; }