static GtArray* gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment) { agn_assert(genemodel && alignment); GtFeatureNode *genefn = gt_feature_node_cast(genemodel); GtFeatureNode *algnfn = gt_feature_node_cast(alignment); agn_assert(gt_feature_node_has_type(genefn, "mRNA")); GtStrand genestrand = gt_feature_node_get_strand(genefn); GtStrand algnstrand = gt_feature_node_get_strand(algnfn); if(genestrand != algnstrand) return NULL; GtArray *covered_parts = gt_array_new( sizeof(GtRange) ); GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon); GtWord i; for(i = 0; i < gt_array_size(exons); i++) { GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i); GtRange exonrange = gt_genome_node_get_range(exon); GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn); GtFeatureNode *tempaln; GtRange nullrange = {0, 0}; for(tempaln = gt_feature_node_iterator_next(aniter); tempaln != NULL; tempaln = gt_feature_node_iterator_next(aniter)) { if(gt_feature_node_has_type(tempaln, "match_gap")) continue; GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln); GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange); if(gt_range_compare(&intr, &nullrange) != 0) gt_array_add(covered_parts, intr); } gt_feature_node_iterator_delete(aniter); } gt_array_delete(exons); for(i = 0; i < gt_array_size(covered_parts); i++) { GtRange *r1 = gt_array_get(covered_parts, i); GtUword j; for(j = i+1; j < gt_array_size(covered_parts); j++) { GtRange *r2 = gt_array_get(covered_parts, j); agn_assert(gt_range_overlap(r1, r2) == false); } } return covered_parts; }
static void feature_node_change_seqid(GtGenomeNode *gn, GtStr *seqid) { GtFeatureNode *fn = gt_feature_node_cast(gn); gt_assert(fn && seqid); gt_str_delete(fn->seqid); fn->seqid = gt_str_ref(seqid); }
GtGenomeNode* gt_feature_node_new(GtStr *seqid, const char *type, GtUword start, GtUword end, GtStrand strand) { GtGenomeNode *gn; GtFeatureNode *fn; gt_assert(seqid && type); gt_assert(start <= end); gn = gt_genome_node_create(gt_feature_node_class()); fn = gt_feature_node_cast(gn); fn->seqid = gt_str_ref(seqid); fn->source = NULL; fn->type = gt_symbol(type); fn->score = GT_UNDEF_FLOAT; fn->range.start = start; fn->range.end = end; fn->representative = NULL; fn->attributes = NULL; fn->bit_field = 0; fn->bit_field |= strand << STRAND_OFFSET; fn->children = NULL; /* the children list is create on demand */ fn->observer = NULL; gt_feature_node_set_phase(fn, GT_PHASE_UNDEFINED); set_transcriptfeaturetype(fn, TRANSCRIPT_FEATURE_TYPE_UNDETERMINED); set_tree_status(&fn->bit_field, IS_TREE); /* the DFS status is set to DFS_WHITE already */ fn->representative = NULL; return gn; }
static int feature_index_lua_add_feature_node(lua_State *L) { GtFeatureIndex **fi; GtGenomeNode **gn; GtFeatureNode *fn; GtStr *seqid; GtError *err; bool has_seqid; gt_assert(L); fi = check_feature_index(L, 1); gn = check_genome_node(L, 2); fn = gt_feature_node_cast(*gn); luaL_argcheck(L, fn, 2, "not a feature node"); seqid = gt_genome_node_get_seqid(*gn); luaL_argcheck(L, seqid, 2, "feature does not have a sequence id"); err = gt_error_new(); if (gt_feature_index_has_seqid(*fi, &has_seqid, gt_str_get(seqid), err)) return gt_lua_error(L, err); gt_error_delete(err); luaL_argcheck(L, has_seqid, 2, "feature index does not contain corresponding sequence region"); err = gt_error_new(); if (gt_feature_index_add_feature_node(*fi, fn, err)) return gt_lua_error(L, err); gt_error_delete(err); return 0; }
int gt_diagram_unit_test(GtError *err) { int had_err = 0; GtGenomeNode *gn; GtDiagramTestShared sh; GtRange testrng = {100, 10000}; gt_error_check(err); gn = gt_feature_node_new_standard_gene(); sh.fi = gt_feature_index_memory_new(); sh.sty = gt_style_new(err); sh.err = err; sh.errstatus = 0; gt_feature_index_add_feature_node(sh.fi, gt_feature_node_cast(gn), err); gt_genome_node_delete(gn); sh.d = gt_diagram_new(sh.fi, "ctg123", &testrng, sh.sty, err); /* removed the multithreading test for now until it is fixed */ gt_diagram_unit_test_sketch_func(&sh); gt_ensure(sh.errstatus == 0); gt_style_delete(sh.sty); gt_diagram_delete(sh.d); gt_feature_index_delete(sh.fi); return had_err; }
static void feature_node_set_range(GtGenomeNode *gn, const GtRange *range) { GtFeatureNode *fn = gt_feature_node_cast(gn); fn->range = *range; if (fn->observer && fn->observer->range_changed) fn->observer->range_changed(fn, &(fn->range), fn->observer->data); }
static GtFeatureIndex *in_stream_test_data(GtError *error) { GtFeatureIndex *fi; GtFeatureNode *fn; GtGenomeNode *gn; GtRegionNode *rn; GtStr *seqid; fi = gt_feature_index_memory_new(); seqid = gt_str_new_cstr("chr1"); gn = gt_region_node_new(seqid, 1, 100000); rn = gt_region_node_cast(gn); gt_feature_index_add_region_node(fi, rn, error); gt_genome_node_delete(gn); gn = gt_feature_node_new(seqid, "region", 500, 5000, GT_STRAND_BOTH); fn = gt_feature_node_cast(gn); gt_feature_index_add_feature_node(fi, fn, error); gt_genome_node_delete(gn); gn = gt_feature_node_new(seqid, "region", 50000, 75000, GT_STRAND_BOTH); fn = gt_feature_node_cast(gn); gt_feature_index_add_feature_node(fi, fn, error); gt_genome_node_delete(gn); gt_str_delete(seqid); seqid = gt_str_new_cstr("scf0001"); gn = gt_region_node_new(seqid, 1, 10000); rn = gt_region_node_cast(gn); gt_feature_index_add_region_node(fi, rn, error); gt_genome_node_delete(gn); gn = gt_feature_node_new(seqid, "mRNA", 4000, 6000, GT_STRAND_REVERSE); fn = gt_feature_node_cast(gn); gt_feature_index_add_feature_node(fi, fn, error); gt_genome_node_delete(gn); gn = gt_feature_node_new(seqid, "mRNA", 7000, 9500, GT_STRAND_FORWARD); fn = gt_feature_node_cast(gn); gt_feature_index_add_feature_node(fi, fn, error); gt_genome_node_delete(gn); gt_str_delete(seqid); return fi; }
static int feature_node_accept(GtGenomeNode *gn, GtNodeVisitor *nv, GtError *err) { GtFeatureNode *fn; gt_error_check(err); fn = gt_feature_node_cast(gn); return gt_node_visitor_visit_feature_node(nv, fn, err); }
static int genome_node_lua_contains_marked(lua_State *L) { GtGenomeNode **gn; gn = check_genome_node(L, 1); lua_pushboolean(L, gt_feature_node_contains_marked(gt_feature_node_cast(*gn))); return 1; }
GtGenomeNode* gt_feature_node_new_pseudo(GtStr *seqid, GtUword start, GtUword end, GtStrand strand) { GtFeatureNode *pf; GtGenomeNode *pn; gt_assert(seqid); gt_assert(start <= end); pn = gt_feature_node_new(seqid, "pseudo", start, end, strand); pf = gt_feature_node_cast(pn); pf->type = NULL; /* pseudo features do not have a type */ pf->bit_field |= 1 << PSEUDO_FEATURE_OFFSET; return pn; }
GtGenomeNode* gt_feature_node_new_pseudo_template(GtFeatureNode *fn) { GtFeatureNode *pf; GtGenomeNode *pn; GtRange range; gt_assert(fn); range = feature_node_get_range((GtGenomeNode*) fn), pn = gt_feature_node_new_pseudo(feature_node_get_seqid((GtGenomeNode*) fn), range.start, range.end, gt_feature_node_get_strand(fn)); pf = gt_feature_node_cast(pn); gt_feature_node_set_source(pf, fn->source); return pn; }
static int extracttarget_from_node(GtGenomeNode *gn, GtStrArray *seqfiles, GtError *err) { GtFeatureNodeIterator *fni; int had_err = 0; gt_error_check(err); gt_assert(gn && seqfiles); if (gt_genome_node_cast(gt_feature_node_class(), gn)) { const char *target; GtFeatureNode *child; fni = gt_feature_node_iterator_new(gt_feature_node_cast(gn)); while (!had_err && /* XXX remove cast */ (child = (GtFeatureNode*) gt_feature_node_iterator_next(fni))) { if ((target = gt_feature_node_get_attribute(child, "Target"))) had_err = extracttarget_from_seqfiles(target, seqfiles, err); } gt_feature_node_iterator_delete(fni); } return had_err; }
static void feature_node_free(GtGenomeNode *gn) { GtFeatureNode *fn = gt_feature_node_cast(gn); gt_str_delete(fn->seqid); gt_str_delete(fn->source); gt_tag_value_map_delete(fn->attributes); if (fn->children) { GtDlistelem *dlistelem; for (dlistelem = gt_dlist_first(fn->children); dlistelem != NULL; dlistelem = gt_dlistelem_next(dlistelem)) { gt_genome_node_delete(gt_dlistelem_get_data(dlistelem)); } } gt_dlist_delete(fn->children); if (fn->observer && fn->observer->deleted) fn->observer->deleted(fn, fn->observer->data); if (fn->observer) gt_feature_node_observer_delete(fn->observer); }
static int extract_join_feature(GtGenomeNode *gn, const char *type, GtRegionMapping *region_mapping, GtStr *sequence, bool *reverse_strand, bool *first_child_of_type_seen, GtPhase *phase, GtError *err) { char *outsequence; GtFeatureNode *fn; GtRange range; int had_err = 0; gt_error_check(err); fn = gt_feature_node_cast(gn); gt_assert(fn); if (gt_feature_node_has_type(fn, type)) { if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { *reverse_strand = true; *phase = gt_feature_node_get_phase(fn); } else { if (!(*first_child_of_type_seen)) { *first_child_of_type_seen = true; *phase = gt_feature_node_get_phase(fn); } else *phase = GT_PHASE_UNDEFINED; } range = gt_genome_node_get_range(gn); had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); } } return had_err; }
/* to be called from implementing class! */ int gt_feature_index_unit_test(GtFeatureIndex *fi, GtError *err) { int had_err = 0, i, rval; GtFeatureIndexTestShared sh; GtStrArray *seqids; GtStr *seqid; GtRange check_range; GtRegionNode *rn; bool has_seqid; gt_error_check(err); sh.mutex = gt_mutex_new(); sh.nodes = gt_array_new(sizeof (GtFeatureNode*)); sh.error_count = 0; sh.next_node_idx = 0; sh.fi = fi; sh.err = gt_error_new(); /* create region */ seqid = gt_str_new_cstr(GT_FI_TEST_SEQID); rn = (GtRegionNode*) gt_region_node_new(seqid, GT_FI_TEST_START, GT_FI_TEST_END); /* test seqid is not supposed to exist */ gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid, GT_FI_TEST_SEQID, err) == 0); gt_ensure(!has_seqid); /* add a sequence region directly and check if it has been added */ rval = gt_feature_index_add_region_node(sh.fi, rn, err); gt_ensure(rval == 0); gt_genome_node_delete((GtGenomeNode*) rn); gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid, GT_FI_TEST_SEQID, err) == 0); gt_ensure(has_seqid); gt_feature_index_get_range_for_seqid(sh.fi, &check_range, GT_FI_TEST_SEQID, err); gt_ensure(check_range.start == GT_FI_TEST_START && check_range.end == GT_FI_TEST_END); /* set up nodes to store */ for (i=0;i<GT_FI_TEST_FEATURES_PER_THREAD*gt_jobs;i++) { GtUword start, end; GtFeatureNode *fn; start = random() % (GT_FI_TEST_END - GT_FI_TEST_FEATURE_WIDTH); end = start + random() % (GT_FI_TEST_FEATURE_WIDTH); fn = gt_feature_node_cast(gt_feature_node_new(seqid, "gene", start, end, GT_STRAND_FORWARD)); gt_array_add(sh.nodes, fn); } /* test parallel addition */ gt_multithread(gt_feature_index_unit_test_add, &sh, err); seqids = gt_feature_index_get_seqids(fi, err); gt_ensure(seqids); gt_ensure(gt_feature_index_has_seqid(fi, &has_seqid,GT_FI_TEST_SEQID, err) == 0); gt_ensure(has_seqid); gt_ensure(gt_str_array_size(seqids) == 1); /* test parallel query */ if (!had_err) gt_multithread(gt_feature_index_unit_test_query, &sh, err); gt_ensure(sh.error_count == 0); gt_mutex_delete(sh.mutex); gt_error_delete(sh.err); gt_str_array_delete(seqids); gt_array_delete(sh.nodes); gt_str_delete(seqid); return had_err; }
static int genome_node_lua_mark(lua_State *L) { GtGenomeNode **gn = check_genome_node(L, 1); gt_feature_node_mark(gt_feature_node_cast(*gn)); return 0; }
static int gt_extract_feature_sequence_generic(GtStr *sequence, GtGenomeNode *gn, const char *type, bool join, GtStr *seqid, GtStrArray *target_ids, unsigned int *out_phase_offset, GtRegionMapping *region_mapping, GtError *err) { GtFeatureNode *fn; GtRange range; unsigned int phase_offset = 0; char *outsequence; const char *target; int had_err = 0; gt_error_check(err); fn = gt_genome_node_cast(gt_feature_node_class(), gn); gt_assert(fn); if (seqid) gt_str_append_str(seqid, gt_genome_node_get_seqid(gn)); if (target_ids && (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) { had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } if (!had_err) { if (join) { GtFeatureNodeIterator *fni; GtFeatureNode *child; bool reverse_strand = false, first_child = true, first_child_of_type_seen = false; GtPhase phase = GT_PHASE_UNDEFINED; /* in this case we have to traverse the children */ fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn)); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (first_child) { if (target_ids && (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) { gt_str_array_reset(target_ids); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } first_child = false; } if (!had_err) { if (extract_join_feature((GtGenomeNode*) child, type, region_mapping, sequence, &reverse_strand, &first_child_of_type_seen, &phase, err)) { had_err = -1; } if (phase != GT_PHASE_UNDEFINED) { phase_offset = (int) phase; } } } gt_feature_node_iterator_delete(fni); gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED); if (!had_err && gt_str_length(sequence)) { if (reverse_strand) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } else if (gt_feature_node_get_type(fn) == type) { GtPhase phase = gt_feature_node_get_phase(fn); gt_assert(!had_err); if (phase != GT_PHASE_UNDEFINED) phase_offset = (unsigned int) phase; /* otherwise we only have to look at this feature */ range = gt_genome_node_get_range(gn); gt_assert(range.start); /* 1-based coordinates */ had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } } if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) { *out_phase_offset = phase_offset; } return had_err; }
static GtStr* feature_node_get_seqid(GtGenomeNode *gn) { GtFeatureNode *fn = gt_feature_node_cast(gn); return fn->seqid; }
static GtRange feature_node_get_range(GtGenomeNode *gn) { GtFeatureNode *fn = gt_feature_node_cast(gn); return fn->range; }