static int check_boundaries_visitor_check_rec(GtFeatureNode *parent, GtFeatureNode *child, GtError *err) { GtFeatureNodeIterator *fni; GtFeatureNode *node; GtRange range, p_range; int had_err = 0; range = gt_genome_node_get_range((GtGenomeNode*) child); p_range = gt_genome_node_get_range((GtGenomeNode*) parent); if (range.start < p_range.start || range.end > p_range.end) { gt_warning("%s child range " GT_WU "-" GT_WU " (file %s, line %u) not " "contained in %s parent range " GT_WU "-" GT_WU " (file %s, " "line %u)", gt_feature_node_get_type(child), range.start, range.end, gt_genome_node_get_filename((GtGenomeNode*) child), gt_genome_node_get_line_number((GtGenomeNode*) child), gt_feature_node_get_type(parent), p_range.start, p_range.end, gt_genome_node_get_filename((GtGenomeNode*) parent), gt_genome_node_get_line_number((GtGenomeNode*) parent)); } fni = gt_feature_node_iterator_new_direct(child); while ((node = gt_feature_node_iterator_next(fni))) { had_err = check_boundaries_visitor_check_rec(child, node, err); } gt_feature_node_iterator_delete(fni); return had_err; }
GtRange agn_transcript_cds_range(GtFeatureNode *transcript) { gt_assert(transcript); GtRange trange; trange.start = 0; trange.end = 0; GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript); GtFeatureNode *current; for ( current = gt_feature_node_iterator_next(iter); current != NULL; current = gt_feature_node_iterator_next(iter) ) { if(agn_gt_feature_node_is_cds_feature(current)) { GtRange crange = gt_genome_node_get_range((GtGenomeNode *)current); if(trange.start == 0 || crange.start < trange.start) trange.start = crange.start; if(trange.end == 0 || crange.end > trange.end) trange.end = crange.end; } } if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) { GtUword temp = trange.start; trange.start = trange.end; trange.end = temp; } return trange; }
void agn_transcript_structure_gbk(GtFeatureNode *transcript, FILE *outstream) { gt_assert(transcript && outstream); GtArray *exons = gt_array_new( sizeof(GtFeatureNode *) ); GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript); GtFeatureNode *child; for ( child = gt_feature_node_iterator_next(iter); child != NULL; child = gt_feature_node_iterator_next(iter) ) { if(agn_gt_feature_node_is_exon_feature(child)) gt_array_add(exons, child); } gt_feature_node_iterator_delete(iter); gt_assert(gt_array_size(exons) > 0); gt_array_sort(exons, (GtCompare)agn_gt_genome_node_compare); if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) fputs("complement(", outstream); if(gt_array_size(exons) == 1) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, 0); GtRange exonrange = gt_genome_node_get_range(exon); fprintf(outstream, "<%lu..>%lu", exonrange.start, exonrange.end); } else { fputs("join(", outstream); GtUword i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); if(i == 0) fprintf(outstream, "<%lu..%lu", exonrange.start, exonrange.end); else if(i+1 == gt_array_size(exons)) fprintf(outstream, ",%lu..>%lu", exonrange.start, exonrange.end); else fprintf(outstream, ",%lu..%lu", exonrange.start, exonrange.end); } fputs(")", outstream); } if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE) fputs(")", outstream); }
static int feature_node_iterator_lua_new_direct(lua_State *L) { GtFeatureNodeIterator **fni; GtFeatureNode **fn; gt_assert(L); fn = (GtFeatureNode**) check_genome_node(L, 1); fni = lua_newuserdata(L, sizeof (GtFeatureNodeIterator*)); gt_assert(fni); *fni = gt_feature_node_iterator_new_direct(*fn); luaL_getmetatable(L, GENOME_NODE_ITERATOR_METATABLE); lua_setmetatable(L, -2); return 1; }
static int check_cds_phases_if_necessary(GtFeatureNode *fn, GtCDSCheckVisitor *v, bool second_pass, GtError *err) { GtFeatureNodeIterator *fni; GtFeatureNode *node; GtArray *cds_features = NULL; GtHashmap *multi_features = NULL; int had_err = 0; gt_error_check(err); gt_assert(fn); fni = gt_feature_node_iterator_new_direct(fn); while ((node = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_has_type(node, gt_ft_CDS)) { if (gt_feature_node_is_multi(node)) { GtArray *features; if (!multi_features) multi_features = gt_hashmap_new(GT_HASH_DIRECT, NULL, (GtFree) gt_array_delete); if ((features = gt_hashmap_get(multi_features, gt_feature_node_get_multi_representative(node)))) { gt_array_add(features, node); } else { GtFeatureNode *representative; features = gt_array_new(sizeof (GtFeatureNode*)); representative = gt_feature_node_get_multi_representative(node); gt_array_add(features, representative); gt_hashmap_add(multi_features, representative, features); } } else { if (!cds_features) cds_features = gt_array_new(sizeof (GtFeatureNode*)); gt_array_add(cds_features, node); } } } if (cds_features) had_err = check_cds_phases(cds_features, v, false, second_pass, err); if (!had_err && multi_features) had_err = gt_hashmap_foreach(multi_features, check_cds_phases_hm, v, err); gt_array_delete(cds_features); gt_hashmap_delete(multi_features); gt_feature_node_iterator_delete(fni); return had_err; }
static int check_boundaries_visitor_feature_node(GT_UNUSED GtNodeVisitor *nv, GtFeatureNode *fn, GT_UNUSED GtError *err) { GtFeatureNodeIterator *fni; GtFeatureNode *node; int had_err = 0; fni = gt_feature_node_iterator_new_direct(fn); while (!had_err && (node = gt_feature_node_iterator_next(fni))) { had_err = check_boundaries_visitor_check_rec(fn, node, err); } gt_feature_node_iterator_delete(fni); return 0; }
static GtArray* find_cds_parents(GtFeatureNode *cds_feature, GtFeatureNode *fn) { GtFeatureNodeIterator *fni, *di; GtFeatureNode *parent, *child; GtArray *parents; gt_assert(cds_feature && fn); parents = gt_array_new(sizeof (GtFeatureNode*)); fni = gt_feature_node_iterator_new(fn); while ((parent = gt_feature_node_iterator_next(fni))) { di = gt_feature_node_iterator_new_direct(parent); while ((child = gt_feature_node_iterator_next(di))) { if (child == cds_feature) gt_array_add(parents, parent); } gt_feature_node_iterator_delete(di); } gt_feature_node_iterator_delete(fni); return parents; }
static int snp_annotator_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { int had_err = 0; GtSNPAnnotatorVisitor *sav; GtFeatureNodeIterator *fni, *mrnafni; GtFeatureNode *curnode, *curnode2; GtRange snp_rng; gt_error_check(err); sav = snp_annotator_visitor_cast(nv); /* ignore non-nodes */ if (!fn) return 0; /* only process SNPs */ if (!(gt_feature_node_get_type(fn) == sav->SNV_type || gt_feature_node_get_type(fn) == sav->SNP_type)) { return 0; } fni = gt_feature_node_iterator_new_direct(sav->gene); snp_rng = gt_genome_node_get_range((GtGenomeNode*) fn); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_get_type(curnode) == sav->mRNA_type) { GtStrand mrna_strand = gt_feature_node_get_strand(curnode); #ifndef NDEBUG const char *refstr; #endif GtUword mrnasnppos = 0; mrnafni = gt_feature_node_iterator_new(curnode); while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) { if (gt_feature_node_get_type(curnode2) == sav->CDS_type) { GtRange cds_rng = gt_genome_node_get_range((GtGenomeNode*) curnode2); if (gt_range_overlap(&snp_rng, &cds_rng)) { char *mRNA, origchar; char *variantchars, *variantptr = NULL; GT_UNUSED char *refchars, *refptr = NULL; mRNA = (char*) gt_hashmap_get(sav->rnaseqs, curnode); gt_assert(mRNA); gt_assert(snp_rng.start >= cds_rng.start); mrnasnppos += (snp_rng.start - cds_rng.start); if (mrna_strand == GT_STRAND_REVERSE) mrnasnppos = strlen(mRNA) - mrnasnppos - 1; gt_assert(mrnasnppos < strlen(mRNA)); origchar = mRNA[mrnasnppos]; #ifndef NDEBUG refstr = refptr = gt_cstr_dup(gt_feature_node_get_attribute(fn, GT_GVF_REFERENCE_SEQ)); if (!had_err && refstr) { if (gt_feature_node_get_strand(curnode) == GT_STRAND_REVERSE) { int rval = gt_complement(&origchar, origchar, err); gt_assert(rval == 0); } gt_assert(toupper(origchar) == toupper(refstr[0])); } #endif variantchars = variantptr = gt_cstr_dup( gt_feature_node_get_attribute(fn, GT_GVF_VARIANT_SEQ)); if (!had_err && variantchars) { GtUword i = 0; while (!had_err && (*variantchars != ';' && *variantchars != '\0')) { if (*variantchars != ',' && *variantchars != origchar) { char variantchar = *variantchars; #ifndef NDEBUG char refchar = refstr ? refstr[0] : '-'; /* XXX */ if (!had_err && mrna_strand == GT_STRAND_REVERSE) had_err = gt_complement(&refchar, refchar, err); #endif if (!had_err && mrna_strand == GT_STRAND_REVERSE) had_err = gt_complement(&variantchar, variantchar, err); if (!had_err) { had_err = snp_annotator_classify_snp(sav, curnode, fn, mrnasnppos, i++, variantchar, #ifndef NDEBUG refchar, #endif err); } } else if (*variantchars == origchar) { i++; } variantchars++; } gt_free(variantptr); gt_free(refptr); } } else { mrnasnppos += gt_range_length(&cds_rng); } } } gt_feature_node_iterator_delete(mrnafni); } } gt_feature_node_iterator_delete(fni); return had_err; }
static int gt_extract_feature_sequence_generic(GtStr *sequence, GtGenomeNode *gn, const char *type, bool join, GtStr *seqid, GtStrArray *target_ids, unsigned int *out_phase_offset, GtRegionMapping *region_mapping, GtError *err) { GtFeatureNode *fn; GtRange range; unsigned int phase_offset = 0; char *outsequence; const char *target; int had_err = 0; gt_error_check(err); fn = gt_genome_node_cast(gt_feature_node_class(), gn); gt_assert(fn); if (seqid) gt_str_append_str(seqid, gt_genome_node_get_seqid(gn)); if (target_ids && (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) { had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } if (!had_err) { if (join) { GtFeatureNodeIterator *fni; GtFeatureNode *child; bool reverse_strand = false, first_child = true, first_child_of_type_seen = false; GtPhase phase = GT_PHASE_UNDEFINED; /* in this case we have to traverse the children */ fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn)); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (first_child) { if (target_ids && (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) { gt_str_array_reset(target_ids); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } first_child = false; } if (!had_err) { if (extract_join_feature((GtGenomeNode*) child, type, region_mapping, sequence, &reverse_strand, &first_child_of_type_seen, &phase, err)) { had_err = -1; } if (phase != GT_PHASE_UNDEFINED) { phase_offset = (int) phase; } } } gt_feature_node_iterator_delete(fni); gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED); if (!had_err && gt_str_length(sequence)) { if (reverse_strand) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } else if (gt_feature_node_get_type(fn) == type) { GtPhase phase = gt_feature_node_get_phase(fn); gt_assert(!had_err); if (phase != GT_PHASE_UNDEFINED) phase_offset = (unsigned int) phase; /* otherwise we only have to look at this feature */ range = gt_genome_node_get_range(gn); gt_assert(range.start); /* 1-based coordinates */ had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } } if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) { *out_phase_offset = phase_offset; } return had_err; }