GtGenomeNode* gt_feature_node_new(GtStr *seqid, const char *type, GtUword start, GtUword end, GtStrand strand) { GtGenomeNode *gn; GtFeatureNode *fn; gt_assert(seqid && type); gt_assert(start <= end); gn = gt_genome_node_create(gt_feature_node_class()); fn = gt_feature_node_cast(gn); fn->seqid = gt_str_ref(seqid); fn->source = NULL; fn->type = gt_symbol(type); fn->score = GT_UNDEF_FLOAT; fn->range.start = start; fn->range.end = end; fn->representative = NULL; fn->attributes = NULL; fn->bit_field = 0; fn->bit_field |= strand << STRAND_OFFSET; fn->children = NULL; /* the children list is create on demand */ fn->observer = NULL; gt_feature_node_set_phase(fn, GT_PHASE_UNDEFINED); set_transcriptfeaturetype(fn, TRANSCRIPT_FEATURE_TYPE_UNDETERMINED); set_tree_status(&fn->bit_field, IS_TREE); /* the DFS status is set to DFS_WHITE already */ fn->representative = NULL; return gn; }
static int feature_index_lua_add_feature_node(lua_State *L) { GtFeatureIndex **fi; GtGenomeNode **gn; GtFeatureNode *gf; GtStr *seqid; gt_assert(L); fi = check_feature_index(L, 1); gn = check_genome_node(L, 2); gf = gt_genome_node_cast(gt_feature_node_class(), *gn); luaL_argcheck(L, gf, 2, "not a feature node"); seqid = gt_genome_node_get_seqid(*gn); luaL_argcheck(L, seqid, 2, "feature does not have a sequence id"); luaL_argcheck(L, gt_feature_index_has_seqid(*fi, gt_str_get(seqid)), 2, "feature index does not contain corresponding sequence region"); gt_feature_index_add_feature_node(*fi, gf); return 0; }
static int extracttarget_from_node(GtGenomeNode *gn, GtStrArray *seqfiles, GtError *err) { GtFeatureNodeIterator *fni; int had_err = 0; gt_error_check(err); gt_assert(gn && seqfiles); if (gt_genome_node_cast(gt_feature_node_class(), gn)) { const char *target; GtFeatureNode *child; fni = gt_feature_node_iterator_new(gt_feature_node_cast(gn)); while (!had_err && /* XXX remove cast */ (child = (GtFeatureNode*) gt_feature_node_iterator_next(fni))) { if ((target = gt_feature_node_get_attribute(child, "Target"))) had_err = extracttarget_from_seqfiles(target, seqfiles, err); } gt_feature_node_iterator_delete(fni); } return had_err; }
static int CpGIOverlap_stream_next(GtNodeStream * ns, GtGenomeNode ** gn, GtError * err) { GtGenomeNode * cur_node, * next_node; GtFeatureNodeIterator * iter; int err_num = 0; *gn = NULL; CpGIOverlap_stream * context; const char * gene_name = NULL; const char * overlap_name = NULL; char chr_str[255]; int chr_num; unsigned int TSS; float CpGIOverlap; context = CpGIOverlap_stream_cast(ns); // find the genes, determine expression level if(!gt_node_stream_next(context->in_stream, &cur_node, err ) && cur_node != NULL ) { *gn = cur_node; // try casting as a feature node so we can test type if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node)) { return 0; } else // we found a feature node { // first check if it is a pseudo node, if so find the gene in it if available if (gt_feature_node_is_pseudo(cur_node)) { iter = gt_feature_node_iterator_new(cur_node); if (iter == NULL) return; while ((next_node = gt_feature_node_iterator_next(iter)) && !gt_feature_node_has_type(next_node, feature_type_gene)); gt_feature_node_iterator_delete(iter); if (NULL == (cur_node = next_node)) return 0; } if(!gt_feature_node_has_type(cur_node, feature_type_gene)) return 0; // find name of gene gene_name = gt_feature_node_get_attribute(cur_node, "Name"); if (gene_name == NULL) return; if ( 1 != sscanf(gt_str_get(gt_genome_node_get_seqid(cur_node)), "Chr%d", &chr_num)) return 0; TSS = (gt_feature_node_get_strand(cur_node) == GT_STRAND_FORWARD) ? gt_genome_node_get_start(cur_node) : gt_genome_node_get_end(cur_node); // now figure out the overlapping gene if (! (overlap_name = CpGIOverlap_stream_find_gene_overlap( context, TSS, chr_num))) return 0; // save the score into the node gt_feature_node_set_attribute(cur_node, "cpgi_at_tss", overlap_name); return 0; } } return err_num; }
static int CpGI_score_stream_next(GtNodeStream * ns, GtGenomeNode ** gn, GtError * err) { GtGenomeNode * cur_node; int err_num = 0; *gn = NULL; CpGI_score_stream * score_stream; unsigned long island_start; unsigned long island_end; float island_score; int chromosome_num; GtStr * seqID_gtstr; char * seqID_str; char * num_cg_str; unsigned long num_cg = 0; score_stream = CpGI_score_stream_cast(ns); // find the CpGI's, process methylome score if(!gt_node_stream_next(score_stream->in_stream, &cur_node, err ) && cur_node != NULL ) { *gn = cur_node; // try casting as a feature node so we can test type if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node)) { return 0; } else // we found a feature node { if(!gt_feature_node_has_type(cur_node, feature_type_CpGI)) return 0; #if DEBUG_SCORE printf("found CpGI\n"); #endif island_start = gt_genome_node_get_start(cur_node); island_end = gt_genome_node_get_end(cur_node); seqID_gtstr = gt_genome_node_get_seqid(cur_node); seqID_str = gt_str_get(seqID_gtstr); sscanf(seqID_str, "Chr%d", &chromosome_num); num_cg_str = gt_feature_node_get_attribute(cur_node, "sumcg"); if (!num_cg_str) return 0; sscanf(num_cg_str, "%d", &num_cg); // now figure out the score island_score = CpGI_score_stream_score_island(score_stream , chromosome_num, num_cg, island_start, island_end); // gt_str_delete(seqID_gtstr); // save the score into the node gt_feature_node_set_score(cur_node, island_score); return 0; } } return err_num; }
static int gt_extract_feature_sequence_generic(GtStr *sequence, GtGenomeNode *gn, const char *type, bool join, GtStr *seqid, GtStrArray *target_ids, unsigned int *out_phase_offset, GtRegionMapping *region_mapping, GtError *err) { GtFeatureNode *fn; GtRange range; unsigned int phase_offset = 0; char *outsequence; const char *target; int had_err = 0; gt_error_check(err); fn = gt_genome_node_cast(gt_feature_node_class(), gn); gt_assert(fn); if (seqid) gt_str_append_str(seqid, gt_genome_node_get_seqid(gn)); if (target_ids && (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) { had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } if (!had_err) { if (join) { GtFeatureNodeIterator *fni; GtFeatureNode *child; bool reverse_strand = false, first_child = true, first_child_of_type_seen = false; GtPhase phase = GT_PHASE_UNDEFINED; /* in this case we have to traverse the children */ fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn)); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (first_child) { if (target_ids && (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) { gt_str_array_reset(target_ids); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } first_child = false; } if (!had_err) { if (extract_join_feature((GtGenomeNode*) child, type, region_mapping, sequence, &reverse_strand, &first_child_of_type_seen, &phase, err)) { had_err = -1; } if (phase != GT_PHASE_UNDEFINED) { phase_offset = (int) phase; } } } gt_feature_node_iterator_delete(fni); gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED); if (!had_err && gt_str_length(sequence)) { if (reverse_strand) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } else if (gt_feature_node_get_type(fn) == type) { GtPhase phase = gt_feature_node_get_phase(fn); gt_assert(!had_err); if (phase != GT_PHASE_UNDEFINED) phase_offset = (unsigned int) phase; /* otherwise we only have to look at this feature */ range = gt_genome_node_get_range(gn); gt_assert(range.start); /* 1-based coordinates */ had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } } if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) { *out_phase_offset = phase_offset; } return had_err; }