static int select_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSelectStream *fs; int had_err; gt_error_check(err); fs = gt_select_stream_cast(ns); /* we still have nodes in the buffer */ if (gt_select_visitor_node_buffer_size(fs->select_visitor)) { /* return one of them */ *gn = gt_select_visitor_get_node(fs->select_visitor); return 0; } /* no nodes in the buffer -> get new nodes */ while (!(had_err = gt_node_stream_next(fs->in_stream, gn, err)) && *gn) { gt_assert(*gn && !had_err); had_err = gt_genome_node_accept(*gn, fs->select_visitor, err); if (had_err) { /* we own the node -> delete it */ gt_genome_node_delete(*gn); *gn = NULL; break; } if (gt_select_visitor_node_buffer_size(fs->select_visitor)) { *gn = gt_select_visitor_get_node(fs->select_visitor); return 0; } } /* either we have an error or no new node */ gt_assert(had_err || !*gn); return had_err; }
int gt_feature_index_add_gff3file(GtFeatureIndex *feature_index, const char *gff3file, GtError *err) { GtNodeStream *gff3_in_stream; GtGenomeNode *gn; GtArray *tmp; int had_err = 0; GtUword i; gt_error_check(err); gt_assert(feature_index && gff3file); tmp = gt_array_new(sizeof (GtGenomeNode*)); gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, &gff3file); while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) gt_array_add(tmp, gn); if (!had_err) { GtNodeVisitor *feature_visitor = gt_feature_visitor_new(feature_index); for (i=0;i<gt_array_size(tmp);i++) { gn = *(GtGenomeNode**) gt_array_get(tmp, i); /* no need to lock, add_*_node() is synchronized */ had_err = gt_genome_node_accept(gn, feature_visitor, NULL); gt_assert(!had_err); /* cannot happen */ } gt_node_visitor_delete(feature_visitor); } gt_node_stream_delete(gff3_in_stream); for (i=0;i<gt_array_size(tmp);i++) gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(tmp, i)); gt_array_delete(tmp); return had_err; }
static int filter_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtFilterStream *fs; int had_err; gt_error_check(err); fs = gt_filter_stream_cast(gs); /* we still have nodes in the buffer */ if (gt_filter_visitor_node_buffer_size(fs->filter_visitor)) { /* return one of them */ *gn = gt_filter_visitor_get_node(fs->filter_visitor); return 0; } /* no nodes in the buffer -> get new nodes */ while (!(had_err = gt_node_stream_next(fs->in_stream, gn, err)) && *gn) { gt_assert(*gn && !had_err); had_err = gt_genome_node_accept(*gn, fs->filter_visitor, err); if (had_err) break; if (gt_filter_visitor_node_buffer_size(fs->filter_visitor)) { *gn = gt_filter_visitor_get_node(fs->filter_visitor); return 0; } } /* either we have an error or no new node */ gt_assert(had_err || !*gn); return had_err; }
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSortStream *sort_stream; GtGenomeNode *node, *eofn; int had_err = 0; gt_error_check(err); sort_stream = gt_sort_stream_cast(ns); if (!sort_stream->sorted) { while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node, err)) && node) { if ((eofn = gt_eof_node_try_cast(node))) gt_genome_node_delete(eofn); /* get rid of EOF nodes */ else gt_array_add(sort_stream->nodes, node); } if (!had_err) { gt_genome_nodes_sort_stable(sort_stream->nodes); sort_stream->sorted = true; } } if (!had_err) { gt_assert(sort_stream->sorted); if (sort_stream->idx < gt_array_size(sort_stream->nodes)) { *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); sort_stream->idx++; /* join region nodes with the same sequence ID */ if (gt_region_node_try_cast(*gn)) { GtRange range_a, range_b; while (sort_stream->idx < gt_array_size(sort_stream->nodes)) { node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes, sort_stream->idx); if (!gt_region_node_try_cast(node) || gt_str_cmp(gt_genome_node_get_seqid(*gn), gt_genome_node_get_seqid(node))) { /* the next node is not a region node with the same ID */ break; } range_a = gt_genome_node_get_range(*gn); range_b = gt_genome_node_get_range(node); range_a = gt_range_join(&range_a, &range_b); gt_genome_node_set_range(*gn, &range_a); gt_genome_node_delete(node); sort_stream->idx++; } } return 0; } } if (!had_err) { gt_array_reset(sort_stream->nodes); *gn = NULL; } return had_err; }
static int gff3_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtGFF3InStream *is; gt_error_check(err); is = gff3_in_stream_cast(ns); return gt_node_stream_next(is->last_stream, gn, err); }
static int filter_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *error) { AgnFilterStream *stream; GtFeatureNode *fn; int had_err; gt_error_check(error); stream = filter_stream_cast(ns); if(gt_queue_size(stream->cache) > 0) { *gn = gt_queue_get(stream->cache); return 0; } while(1) { had_err = gt_node_stream_next(stream->in_stream, gn, error); if(had_err) return had_err; if(!*gn) return 0; fn = gt_feature_node_try_cast(*gn); if(!fn) return 0; GtFeatureNode *current; GtFeatureNodeIterator *iter = gt_feature_node_iterator_new(fn); for(current = gt_feature_node_iterator_next(iter); current != NULL; current = gt_feature_node_iterator_next(iter)) { const char *type = gt_feature_node_get_type(current); bool keepfeature = false; if(gt_hashmap_get(stream->typestokeep, type) != NULL) keepfeature = true; if(keepfeature) { gt_genome_node_ref((GtGenomeNode *)current); gt_queue_add(stream->cache, current); } } gt_feature_node_iterator_delete(iter); gt_genome_node_delete((GtGenomeNode *)fn); if(gt_queue_size(stream->cache) > 0) { *gn = gt_queue_get(stream->cache); return 0; } } return 0; }
static int gt_ltr_cluster_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRClusterStream *lcs; GtGenomeNode *ref_gn; int had_err = 0; unsigned long i = 0; gt_error_check(err); lcs = gt_ltr_cluster_stream_cast(ns); if (lcs->first_next) { while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) { gt_assert(*gn && !had_err); ref_gn = gt_genome_node_ref(*gn); gt_array_add(lcs->nodes, ref_gn); had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err); if (had_err) { gt_genome_node_delete(*gn); *gn = NULL; break; } } lcs->feat_to_encseq = gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv); lcs->feat_to_encseq_keys = gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv); if (!had_err) { for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) { had_err = process_feature(lcs, gt_str_array_get(lcs->feat_to_encseq_keys, i), err); if (had_err) break; } } if (!had_err) { *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index); lcs->next_index++; lcs->first_next = false; return 0; } } else { if (lcs->next_index >= gt_array_size(lcs->nodes)) *gn = NULL; else { *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index); lcs->next_index++; } return 0; } return had_err; }
static int inter_feature_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtInterFeatureStream *ais; int had_err; gt_error_check(err); ais = gt_inter_feature_stream_cast(ns); had_err = gt_node_stream_next(ais->in_stream, gn, err); if (!had_err && *gn) had_err = gt_genome_node_accept(*gn, ais->inter_feature_visitor, err); return had_err; }
static int feature_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtFeatureStream *feature_stream; int had_err; gt_error_check(err); feature_stream = feature_stream_cast(gs); had_err = gt_node_stream_next(feature_stream->in_stream, gn, err); if (!had_err && *gn) had_err = gt_genome_node_accept(*gn, feature_stream->feature_visitor, err); return had_err; }
static int gt_node_stream_lua_next_tree(lua_State *L) { GtNodeStream **gs = check_genome_stream(L, 1); GtGenomeNode *gn; GtError *err = gt_error_new(); if (gt_node_stream_next(*gs, &gn, err)) return gt_lua_error(L, err); /* handle error */ else if (gn) gt_lua_genome_node_push(L, gn); else lua_pushnil(L); gt_error_delete(err); return 1; }
static int sequence_node_add_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSequenceNodeAddStream *s; int had_err; gt_error_check(err); s = gt_sequence_node_add_stream_cast(ns); /* stream nodes as long as we have some, record seen seqids */ if (!(had_err = gt_node_stream_next(s->in_stream, gn, err)) && *gn) { had_err = gt_genome_node_accept(*gn, s->collect_vis, err); } /* if there are no more */ if (!had_err && !*gn) { if (!s->seqids) { s->seqids = gt_cstr_table_get_all(s->seqid_table); } gt_assert(s->seqids); if (s->cur_seqid >= gt_str_array_size(s->seqids)) { *gn = NULL; return 0; } else { GtGenomeNode *new_sn; GtUword len; char *seq = NULL; GtStr *seqid = gt_str_new(), *seqstr = gt_str_new(); gt_str_append_cstr(seqid, gt_str_array_get(s->seqids, s->cur_seqid)); had_err = gt_region_mapping_get_sequence_length(s->rm, &len, seqid, err); if (!had_err) { had_err = gt_region_mapping_get_sequence(s->rm, &seq, seqid, 1, len, err); } if (!had_err) { gt_str_append_cstr_nt(seqstr, seq, len); new_sn = gt_sequence_node_new(gt_str_get(seqid), seqstr); *gn = new_sn; } s->cur_seqid++; gt_free(seq); gt_str_delete(seqid); gt_str_delete(seqstr); } } return had_err; }
static int buffer_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtBufferStream *bs; gt_error_check(err); bs = buffer_stream_cast(ns); if (bs->buffering) { int had_err = gt_node_stream_next(bs->in_stream, gn, err); if (!had_err && *gn) gt_queue_add(bs->node_buffer, gt_genome_node_ref(*gn)); return had_err; } else { *gn = gt_queue_size(bs->node_buffer) ? gt_queue_get(bs->node_buffer) : NULL; return 0; } }
static int visitor_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtVisitorStream *visitor_stream; int had_err; gt_error_check(err); visitor_stream = visitor_stream_cast(ns); had_err = gt_node_stream_next(visitor_stream->in_stream, gn, err); if (!had_err && *gn) had_err = gt_genome_node_accept(*gn, visitor_stream->visitor, err); if (had_err) { /* we own the node -> delete it */ gt_genome_node_delete(*gn); *gn = NULL; } return had_err; }
static int stat_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtStatStream *stat_stream; int had_err; gt_error_check(err); stat_stream = stat_stream_cast(ns); had_err = gt_node_stream_next(stat_stream->in_stream, gn, err); if (!had_err) { gt_assert(stat_stream->stat_visitor); if (*gn) { if (!gt_eof_node_try_cast(*gn)) /* do not count EOF nodes */ stat_stream->number_of_DAGs++; had_err = gt_genome_node_accept(*gn, stat_stream->stat_visitor, err); gt_assert(!had_err); /* the status visitor is sane */ } } return had_err; }
static int cds_check_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtCDSCheckStream *cs; int had_err; gt_error_check(err); cs = cds_check_stream_cast(ns); gt_assert(cs); had_err = gt_node_stream_next(cs->in_stream, gn, err); if (!had_err && *gn) had_err = gt_genome_node_accept(*gn, cs->cds_check_visitor, err); if (had_err) { /* we own the node -> delete it */ gt_genome_node_delete(*gn); *gn = NULL; } return had_err; }
int gt_regioncov(int argc, const char **argv, GtError *err) { GtNodeVisitor *regioncov_visitor; GtNodeStream *gff3_in_stream; GtGenomeNode *gn; RegionCovArguments arguments; int parsed_args, had_err = 0; gt_error_check(err); /* option parsing */ switch (parse_options(&parsed_args, &arguments, argc, argv, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: return -1; case OPTIONPARSER_REQUESTS_EXIT: return 0; } /* create gff3 input stream */ gt_assert(parsed_args < argc); gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments.verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create region coverage visitor */ regioncov_visitor = gt_regioncov_visitor_new(arguments.max_feature_dist); /* pull the features through the stream and free them afterwards */ while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) { had_err = gt_genome_node_accept(gn, regioncov_visitor, err); gt_genome_node_delete(gn); } /* show region coverage */ if (!had_err) gt_regioncov_visitor_show_coverage(regioncov_visitor); /* free */ gt_node_visitor_delete(regioncov_visitor); gt_node_stream_delete(gff3_in_stream); return had_err; }
static int bssm_train_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GthBSSMTrainStream *bssm_train_stream; int had_err; gt_error_check(err); bssm_train_stream = bssm_train_stream_cast(ns); had_err = gt_node_stream_next(bssm_train_stream->in_stream, gn, err); if (!had_err && *gn) { had_err = gt_genome_node_accept(*gn, bssm_train_stream->bssm_train_visitor, err); } if (had_err) { /* we own the node -> delete it */ gt_genome_node_delete(*gn); *gn = NULL; } return had_err; }
int main(int argc, const char *argv[]) { GtNodeStream *gff3_in_stream; GtGenomeNode *gn; GtError *err; int had_err; if (gt_version_check(GT_MAJOR_VERSION, GT_MINOR_VERSION, GT_MICRO_VERSION)) { fprintf(stderr, "error: %s\n", gt_version_check(GT_MAJOR_VERSION, GT_MINOR_VERSION, GT_MICRO_VERSION)); return EXIT_FAILURE; } /* initialize */ gt_lib_init(); /* create error object */ err = gt_error_new(); /* create GFF3 input stream (with ID attribute checking) */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc-1, argv+1); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream); /* pull the features through the stream and free them afterwards */ while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) gt_genome_node_delete(gn); /* handle error */ if (had_err) fprintf(stderr, "%s: error: %s\n", argv[0], gt_error_get(err)); else printf("input is valid GFF3\n"); /* free */ gt_node_stream_delete(gff3_in_stream); gt_error_delete(err); if (had_err) return EXIT_FAILURE; return EXIT_SUCCESS; }
static int gt_array_out_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtArrayOutStream *aos; GtGenomeNode *node, *gn_ref; int had_err = 0; gt_error_check(err); aos = gt_array_out_stream_cast(gs); had_err = gt_node_stream_next(aos->in_stream, gn, err); if (!had_err && *gn) { if ((node = gt_feature_node_try_cast(*gn))) { gn_ref = gt_genome_node_ref(*gn); gt_array_add(aos->nodes, gn_ref); } } return had_err; }
static int gt_load_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLoadStream *load_stream; GtGenomeNode *node, *eofn; int had_err = 0; gt_error_check(err); load_stream = gt_load_stream_cast(ns); if (!load_stream->full) { while (!(had_err = gt_node_stream_next(load_stream->in_stream, &node, err)) && node) { if ((eofn = gt_eof_node_try_cast(node))) gt_genome_node_delete(eofn); /* get rid of EOF nodes */ else gt_array_add(load_stream->nodes, node); } if (!had_err) { load_stream->full = true; } } if (!had_err) { gt_assert(load_stream->full); if (load_stream->idx < gt_array_size(load_stream->nodes)) { *gn = *(GtGenomeNode**) gt_array_get(load_stream->nodes, load_stream->idx); load_stream->idx++; return 0; } } if (!had_err) { gt_array_reset(load_stream->nodes); *gn = NULL; } return had_err; }
static int gt_extracttarget_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { ExtractTargetArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream; GtGenomeNode *gn; int had_err; gt_error_check(err); gt_assert(arguments); gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, argv + parsed_args); while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) { had_err = extracttarget_from_node(gn, arguments->seqfiles, err); gt_genome_node_delete(gn); } gt_node_stream_delete(gff3_in_stream); return had_err; }
static int targetbest_filter_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtTargetbestFilterStream *tfs; GtGenomeNode *node; int had_err = 0; gt_error_check(err); tfs = targetbest_filter_stream_cast(gs); if (!tfs->in_stream_processed) { while (!(had_err = gt_node_stream_next(tfs->in_stream, &node, err)) && node) { if (gt_feature_node_try_cast(node) && gt_feature_node_get_attribute((GtFeatureNode*) node, "Target")) { filter_targetbest((GtFeatureNode*) node, tfs->trees, tfs->target_to_elem); } else gt_dlist_add(tfs->trees, node); } tfs->next = gt_dlist_first(tfs->trees); tfs->in_stream_processed = true; } if (!had_err) { gt_assert(tfs->in_stream_processed); if (tfs->next) { *gn = gt_dlistelem_get_data(tfs->next); tfs->next = gt_dlistelem_next(tfs->next); } else *gn = NULL; return 0; } return had_err; }
static int gff3_numsorted_out_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtGFF3NumsortedOutStream *gff3_out_stream; int had_err = 0; GtUword i = 0; gt_error_check(err); gff3_out_stream = gff3_numsorted_out_stream_cast(ns); if (!gff3_out_stream->outqueue) { gff3_out_stream->outqueue = gt_queue_new(); while (!(had_err = gt_node_stream_next(gff3_out_stream->in_stream, gn, err))) { if (!*gn) break; gt_array_add(gff3_out_stream->buffer, *gn); } if (!had_err) { gt_genome_nodes_sort_stable_with_func(gff3_out_stream->buffer, (GtCompare) gt_genome_node_compare_numeric_seqids); for (i = 0; !had_err && i < gt_array_size(gff3_out_stream->buffer); i++) { GtGenomeNode *mygn = *(GtGenomeNode**) gt_array_get(gff3_out_stream->buffer, i); gt_queue_add(gff3_out_stream->outqueue, mygn); } } } if (gff3_out_stream->outqueue && !had_err) { if (gt_queue_size(gff3_out_stream->outqueue) > 0) { GtGenomeNode *mygn = (GtGenomeNode*) gt_queue_get(gff3_out_stream->outqueue); gt_assert(mygn); had_err = gt_genome_node_accept(mygn, gff3_out_stream->gff3_visitor, err); if (!had_err) *gn = mygn; } } return had_err; }
static int gt_seqpos_classifier_next_fn(GtSeqposClassifier *seqpos_classifier, GtError *err) { int had_err = 0; gt_assert(seqpos_classifier != NULL); if (seqpos_classifier->fni != NULL) { gt_feature_node_iterator_delete(seqpos_classifier->fni); seqpos_classifier->fni = NULL; } while (true) { if (seqpos_classifier->gn != NULL) { gt_genome_node_delete(seqpos_classifier->gn); } had_err = gt_node_stream_next(seqpos_classifier->annotation_stream, &seqpos_classifier->gn, err); if (had_err != 0 || seqpos_classifier->gn == NULL) { seqpos_classifier->fn = NULL; seqpos_classifier->gn = NULL; return had_err; } else { if ((seqpos_classifier->fn = gt_feature_node_try_cast(seqpos_classifier->gn)) != NULL) { seqpos_classifier->fni = gt_feature_node_iterator_new(seqpos_classifier->fn); return had_err; } } } }
static int CpGIOverlap_stream_next(GtNodeStream * ns, GtGenomeNode ** gn, GtError * err) { GtGenomeNode * cur_node, * next_node; GtFeatureNodeIterator * iter; int err_num = 0; *gn = NULL; CpGIOverlap_stream * context; const char * gene_name = NULL; const char * overlap_name = NULL; char chr_str[255]; int chr_num; unsigned int TSS; float CpGIOverlap; context = CpGIOverlap_stream_cast(ns); // find the genes, determine expression level if(!gt_node_stream_next(context->in_stream, &cur_node, err ) && cur_node != NULL ) { *gn = cur_node; // try casting as a feature node so we can test type if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node)) { return 0; } else // we found a feature node { // first check if it is a pseudo node, if so find the gene in it if available if (gt_feature_node_is_pseudo(cur_node)) { iter = gt_feature_node_iterator_new(cur_node); if (iter == NULL) return; while ((next_node = gt_feature_node_iterator_next(iter)) && !gt_feature_node_has_type(next_node, feature_type_gene)); gt_feature_node_iterator_delete(iter); if (NULL == (cur_node = next_node)) return 0; } if(!gt_feature_node_has_type(cur_node, feature_type_gene)) return 0; // find name of gene gene_name = gt_feature_node_get_attribute(cur_node, "Name"); if (gene_name == NULL) return; if ( 1 != sscanf(gt_str_get(gt_genome_node_get_seqid(cur_node)), "Chr%d", &chr_num)) return 0; TSS = (gt_feature_node_get_strand(cur_node) == GT_STRAND_FORWARD) ? gt_genome_node_get_start(cur_node) : gt_genome_node_get_end(cur_node); // now figure out the overlapping gene if (! (overlap_name = CpGIOverlap_stream_find_gene_overlap( context, TSS, chr_num))) return 0; // save the score into the node gt_feature_node_set_attribute(cur_node, "cpgi_at_tss", overlap_name); return 0; } } return err_num; }
static int CpGI_score_stream_next(GtNodeStream * ns, GtGenomeNode ** gn, GtError * err) { GtGenomeNode * cur_node; int err_num = 0; *gn = NULL; CpGI_score_stream * score_stream; unsigned long island_start; unsigned long island_end; float island_score; int chromosome_num; GtStr * seqID_gtstr; char * seqID_str; char * num_cg_str; unsigned long num_cg = 0; score_stream = CpGI_score_stream_cast(ns); // find the CpGI's, process methylome score if(!gt_node_stream_next(score_stream->in_stream, &cur_node, err ) && cur_node != NULL ) { *gn = cur_node; // try casting as a feature node so we can test type if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node)) { return 0; } else // we found a feature node { if(!gt_feature_node_has_type(cur_node, feature_type_CpGI)) return 0; #if DEBUG_SCORE printf("found CpGI\n"); #endif island_start = gt_genome_node_get_start(cur_node); island_end = gt_genome_node_get_end(cur_node); seqID_gtstr = gt_genome_node_get_seqid(cur_node); seqID_str = gt_str_get(seqID_gtstr); sscanf(seqID_str, "Chr%d", &chromosome_num); num_cg_str = gt_feature_node_get_attribute(cur_node, "sumcg"); if (!num_cg_str) return 0; sscanf(num_cg_str, "%d", &num_cg); // now figure out the score island_score = CpGI_score_stream_score_island(score_stream , chromosome_num, num_cg, island_start, island_end); // gt_str_delete(seqID_gtstr); // save the score into the node gt_feature_node_set_score(cur_node, island_score); return 0; } } return err_num; }
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSNPAnnotatorStream *sas; int had_err = 0; bool complete_cluster = false; GtGenomeNode *mygn = NULL; GtFeatureNode *fn = NULL; const char *snv_type = gt_symbol(gt_ft_SNV), *snp_type = gt_symbol(gt_ft_SNP), *gene_type = gt_symbol(gt_ft_gene); gt_error_check(err); sas = gt_snp_annotator_stream_cast(ns); /* if there are still SNPs left in the buffer, output them */ if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); return had_err; } else complete_cluster = false; while (!had_err && !complete_cluster) { had_err = gt_node_stream_next(sas->merge_stream, &mygn, err); /* stop if stream is at the end */ if (had_err || !mygn) break; /* process all feature nodes */ if ((fn = gt_feature_node_try_cast(mygn))) { GtGenomeNode *addgn; const char *type = gt_feature_node_get_type(fn); GtRange new_rng = gt_genome_node_get_range(mygn); if (type == snv_type || type == snp_type) { /* -----> this is a SNP <----- */ if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) { /* it falls into the currently observed range */ gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn)); } else { /* SNP outside a gene, this cluster is done add to out queue and start serving */ gt_assert(gt_queue_size(sas->outqueue) == 0); had_err = snp_annotator_stream_process_current_gene(sas, err); gt_queue_add(sas->outqueue, mygn); if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } else if (type == gene_type) { /* -----> this is a gene <----- */ if (gt_array_size(sas->cur_gene_set) == 0UL) { /* new overlapping gene cluster */ addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_genome_node_get_range(mygn); } else { if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) { /* gene overlaps with current one, add to cluster */ addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng); } else { /* finish current cluster and start a new one */ had_err = snp_annotator_stream_process_current_gene(sas, err); if (!had_err) { addgn = gt_genome_node_ref(mygn); gt_array_add(sas->cur_gene_set, addgn); sas->cur_gene_range = gt_genome_node_get_range(mygn); } if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } /* from now on, genes are kept in gene cluster arrays only */ gt_genome_node_delete(mygn); } } else { /* meta node */ had_err = snp_annotator_stream_process_current_gene(sas, err); if (!had_err) { gt_queue_add(sas->outqueue, mygn); } if (gt_queue_size(sas->outqueue) > 0) { *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue); complete_cluster = true; } } } return had_err; }
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtChseqidsStream *cs; GtGenomeNode *node, **gn_a, **gn_b; GtFeatureNode *feature_node; GtStr *changed_seqid; unsigned long i; int rval, had_err = 0; gt_error_check(err); cs = chseqids_stream_cast(gs); if (!cs->sequence_regions_processed) { while (!had_err) { if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) { if (node) gt_array_add(cs->gt_genome_node_buffer, node); else break; if (!gt_region_node_try_cast(node)) break; /* no more sequence regions */ } } /* now the buffer contains only sequence regions (except the last entry) -> change sequence ids */ for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) { node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i); if (gt_genome_node_get_seqid(node)) { if ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping, gt_str_get(gt_genome_node_get_seqid(node)), err))) { if ((feature_node = gt_feature_node_try_cast(node))) { rval = gt_genome_node_traverse_children(node, changed_seqid, change_sequence_id, true, err); gt_assert(!rval); /* change_sequence_id() is sane */ } else gt_genome_node_change_seqid(node, changed_seqid); gt_str_delete(changed_seqid); } else had_err = -1; } } /* sort them */ if (!had_err) gt_genome_nodes_sort(cs->gt_genome_node_buffer); /* consolidate them */ for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer); i++) { gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1); gn_b = gt_array_get(cs->gt_genome_node_buffer, i); if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) { gt_region_node_consolidate(gt_region_node_cast(*gn_b), gt_region_node_cast(*gn_a)); gt_genome_node_delete(*gn_a); *gn_a = NULL; } } cs->sequence_regions_processed = true; } /* return non-null nodes from buffer */ while (!had_err && cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) { node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, cs->buffer_index); cs->buffer_index++; if (node) { *gn = node; return had_err; } } if (!had_err) had_err = gt_node_stream_next(cs->in_stream, gn, err); if (!had_err && *gn) { if (gt_genome_node_get_seqid(*gn)) { changed_seqid = gt_mapping_map_string(cs->chseqids_mapping, gt_str_get(gt_genome_node_get_seqid(*gn)), err); gt_assert(changed_seqid); /* is always defined, because an undefined mapping would be catched earlier */ if ((feature_node = gt_feature_node_try_cast(*gn))) { rval = gt_genome_node_traverse_children(*gn, changed_seqid, change_sequence_id, true, err); gt_assert(!rval); /* change_sequence_id() is sane */ } else gt_genome_node_change_seqid(*gn, changed_seqid); gt_str_delete(changed_seqid); } } return had_err; }
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLTRdigestFileOutStream *ls; GtFeatureNode *fn; GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}, pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD}; int had_err; GtUword i=0; gt_error_check(err); ls = gt_ltrdigest_file_out_stream_cast(ns); /* initialize this element */ memset(&ls->element, 0, sizeof (GtLTRElement)); /* get annotations from parser */ had_err = gt_node_stream_next(ls->in_stream, gn, err); if (!had_err && *gn) { GtFeatureNodeIterator* gni; GtFeatureNode *mygn; /* only process feature nodes */ if (!(fn = gt_feature_node_try_cast(*gn))) return 0; ls->element.pdomorder = gt_array_new(sizeof (const char*)); /* fill LTRElement structure from GFF3 subgraph */ gni = gt_feature_node_iterator_new(fn); for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni)) (void) gt_genome_node_accept((GtGenomeNode*) mygn, (GtNodeVisitor*) ls->lv, err); gt_feature_node_iterator_delete(gni); } if (!had_err && ls->element.mainnode != NULL) { char desc[GT_MAXFASTAHEADER]; GtFeatureNode *ltr3, *ltr5; GtStr *sdesc, *sreg, *seq; /* find sequence in GtEncseq */ sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode); sdesc = gt_str_new(); had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err); if (!had_err) { GtRange rng; ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char)); (void) snprintf(ls->element.seqid, MIN((size_t) gt_str_length(sdesc), (size_t) ls->seqnamelen)+1, "%s", gt_str_get(sdesc)); gt_cstr_rep(ls->element.seqid, ' ', '_'); if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen) ls->element.seqid[ls->seqnamelen] = '\0'; (void) gt_ltrelement_format_description(&ls->element, ls->seqnamelen, desc, (size_t) (GT_MAXFASTAHEADER-1)); gt_str_delete(sdesc); /* output basic retrotransposon data */ lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR); rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR); rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode); gt_file_xprintf(ls->tabout_file, GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t" GT_WU"\t"GT_WU"\t"GT_WU"\t", rng.start, rng.end, gt_ltrelement_length(&ls->element), ls->element.seqid, lltr_rng.start, lltr_rng.end, gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start, rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element)); } seq = gt_str_new(); /* output TSDs */ if (!had_err && ls->element.leftTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.leftTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); if (!had_err && ls->element.rightTSD != NULL) { GtRange tsd_rng; tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.rightTSD, gt_symbol(gt_ft_target_site_duplication), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t", tsd_rng.start, tsd_rng.end, gt_str_get(seq)); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t"); /* output PPT */ if (!had_err && ls->element.ppt != NULL) { GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt); ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.ppt, gt_symbol(gt_ft_RR_tract), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng), GT_FSWIDTH, ls->pptout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t", ppt_rng.start, ppt_rng.end, gt_str_get(seq), GT_STRAND_CHARS[ppt_strand], (ppt_strand == GT_STRAND_FORWARD ? abs((int) (rltr_rng.start - ppt_rng.end)) : abs((int) (lltr_rng.end - ppt_rng.start)))); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t"); /* output PBS */ if (!had_err && ls->element.pbs != NULL) { GtStrand pbs_strand; pbs_strand = gt_feature_node_get_strand(ls->element.pbs); pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.pbs, gt_symbol(gt_ft_primer_binding_site), false, NULL, NULL, ls->rmap, err); if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng), GT_FSWIDTH, ls->pbsout_file); gt_file_xprintf(ls->tabout_file, ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t", pbs_rng.start, pbs_rng.end, GT_STRAND_CHARS[pbs_strand], gt_feature_node_get_attribute(ls->element.pbs, "trna"), gt_str_get(seq), gt_feature_node_get_attribute(ls->element.pbs, "pbsoffset"), gt_feature_node_get_attribute(ls->element.pbs, "trnaoffset"), gt_feature_node_get_attribute(ls->element.pbs, "edist")); } gt_str_reset(seq); } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t"); /* output protein domains */ if (!had_err && ls->element.pdoms != NULL) { GtStr *pdomorderstr = gt_str_new(); for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* key = *(const char**) gt_array_get(ls->element.pdomorder, i); GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key); had_err = write_pdom(ls, entry, key, ls->rmap, desc, err); } if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode)) gt_array_reverse(ls->element.pdomorder); for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++) { const char* name = *(const char**) gt_array_get(ls->element.pdomorder, i); gt_str_append_cstr(pdomorderstr, name); if (i != gt_array_size(ls->element.pdomorder)-1) gt_str_append_cstr(pdomorderstr, "/"); } gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr)); gt_str_delete(pdomorderstr); } /* output LTRs (we just expect them to exist) */ switch (gt_feature_node_get_strand(ls->element.mainnode)) { case GT_STRAND_REVERSE: ltr5 = ls->element.rightLTR; ltr3 = ls->element.leftLTR; break; case GT_STRAND_FORWARD: default: ltr5 = ls->element.leftLTR; ltr3 = ls->element.rightLTR; break; } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr5out_file); gt_str_reset(seq); } if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3, gt_symbol(gt_ft_long_terminal_repeat), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->ltr3out_file); gt_str_reset(seq); } /* output complete oriented element */ if (!had_err) { had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ls->element.mainnode, gt_symbol(gt_ft_LTR_retrotransposon), false, NULL, NULL, ls->rmap, err); } if (!had_err) { gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq), GT_FSWIDTH, ls->elemout_file); gt_str_reset(seq); } gt_file_xprintf(ls->tabout_file, "\n"); gt_str_delete(seq); } gt_hashmap_delete(ls->element.pdoms); gt_array_delete(ls->element.pdomorder); gt_free(ls->element.seqid); return had_err; }