GtNodeStream* gt_snp_annotator_stream_new(GtNodeStream *gvf_stream, GtNodeStream *gff_stream, GtTransTable *trans_table, GtRegionMapping *rmap) { GtSNPAnnotatorStream *sas; GtNodeStream *ns; gt_assert(gvf_stream && gff_stream && rmap); ns = gt_node_stream_create(gt_snp_annotator_stream_class(), true); sas = gt_snp_annotator_stream_cast(ns); sas->instreams = gt_array_new(sizeof (GtNodeStream*)); (void) gt_node_stream_ref(gvf_stream); gt_array_add(sas->instreams, gvf_stream); (void) gt_node_stream_ref(gff_stream); gt_array_add(sas->instreams, gff_stream); sas->cur_gene_set = gt_array_new(sizeof (GtFeatureNode*)); sas->merge_stream = gt_merge_stream_new(sas->instreams); sas->rmap = gt_region_mapping_ref(rmap); sas->cur_gene_range.start = sas->cur_gene_range.end = GT_UNDEF_UWORD; sas->snps = gt_queue_new(); sas->outqueue = gt_queue_new(); sas->tt = trans_table; return ns; }
GtNodeStream* gt_filter_stream_new(GtNodeStream *in_stream, GtStr *seqid, GtStr *typefilter, GtRange contain_range, GtRange overlap_range, GtStrand strand, GtStrand targetstrand, bool has_CDS, unsigned long max_gene_length, unsigned long max_gene_num, double min_gene_score, double max_gene_score, double min_average_splice_site_prob, unsigned long feature_num) { GtNodeStream *gs = gt_node_stream_create(gt_filter_stream_class(), gt_node_stream_is_sorted(in_stream)); GtFilterStream *filter_stream = gt_filter_stream_cast(gs); gt_assert(in_stream); filter_stream->in_stream = gt_node_stream_ref(in_stream); filter_stream->filter_visitor = gt_filter_visitor_new(seqid, typefilter, contain_range, overlap_range, strand, targetstrand, has_CDS, max_gene_length, max_gene_num, min_gene_score, max_gene_score, min_average_splice_site_prob, feature_num); return gs; }
static int lua_custom_stream_new_generic(lua_State *L, bool sorted) { GtLuaCustomStream *lcs; GtNodeStream **ns; gt_assert(L); ns = lua_newuserdata(L, sizeof (GtNodeVisitor*)); gt_assert(ns); *ns = gt_node_stream_create(gt_lua_custom_stream_class(), sorted); gt_assert(*ns); lcs = lua_custom_stream_cast(*ns); luaL_getmetatable(L, GENOME_STREAM_METATABLE); lua_setmetatable(L, -2); /* set clean env for this object */ lua_newtable(L); lua_setfenv(L, -2); lcs->L = L; /* replace the default next_tree method to force an override in the custom stream */ lua_pushstring(L, "next_tree"); lua_pushcfunction(L, gt_node_stream_lua_next_tree_fail); lua_settable(L, -3); /* store reference to Lua object */ lua_pushvalue(L, -1); lcs->ref = luaL_ref(L, LUA_REGISTRYINDEX); return 1; }
GtNodeStream* gt_select_stream_new(GtNodeStream *in_stream, GtStr *seqid, GtStr *source, const GtRange *contain_range, const GtRange *overlap_range, GtStrand strand, GtStrand targetstrand, bool has_CDS, unsigned long max_gene_length, unsigned long max_gene_num, double min_gene_score, double max_gene_score, double min_average_splice_site_prob, unsigned long feature_num, GtStrArray *select_files, GtStr *select_logic, GtError *err) { GtNodeStream *ns = gt_node_stream_create(gt_select_stream_class(), gt_node_stream_is_sorted(in_stream)); GtSelectStream *select_stream = gt_select_stream_cast(ns); gt_assert(in_stream); select_stream->in_stream = gt_node_stream_ref(in_stream); select_stream->select_visitor = gt_select_visitor_new(seqid, source, contain_range, overlap_range, strand, targetstrand, has_CDS, max_gene_length, max_gene_num, min_gene_score, max_gene_score, min_average_splice_site_prob, feature_num, select_files, select_logic, err); if (!select_stream->select_visitor) { gt_node_stream_delete(ns); return NULL; } return ns; }
GtNodeStream* gt_cds_check_stream_new(GtNodeStream *in_stream) { GtNodeStream *ns = gt_node_stream_create(gt_cds_check_stream_class(), false); GtCDSCheckStream *cs = cds_check_stream_cast(ns); cs->in_stream = gt_node_stream_ref(in_stream); cs->cds_check_visitor = gt_cds_check_visitor_new(); return ns; }
GtNodeStream* gt_gtf_in_stream_new(const char *filename) { GtGTFInStream *gtf_in_stream; GtNodeStream *ns = gt_node_stream_create(gt_gtf_in_stream_class(), false); gtf_in_stream = gtf_in_stream_cast(ns); gtf_in_stream->genome_node_buffer = gt_queue_new(); gtf_in_stream->type_checker = gt_type_checker_builtin_new(); gtf_in_stream->filename = filename ? gt_cstr_dup(filename) : NULL; return ns; }
GtNodeStream* gt_feature_stream_new(GtNodeStream *in_stream, GtFeatureIndex *fi) { GtNodeStream *gs; GtFeatureStream *feature_stream; gs = gt_node_stream_create(gt_feature_stream_class(), false); feature_stream = feature_stream_cast(gs); feature_stream->in_stream = gt_node_stream_ref(in_stream); feature_stream->feature_visitor = gt_feature_visitor_new(fi); return gs; }
GtNodeStream* gt_load_stream_new(GtNodeStream *in_stream) { GtNodeStream *ns = gt_node_stream_create(gt_load_stream_class(), true); GtLoadStream *load_stream = gt_load_stream_cast(ns); gt_assert(in_stream); load_stream->in_stream = gt_node_stream_ref(in_stream); load_stream->full = false; load_stream->idx = 0; load_stream->nodes = gt_array_new(sizeof (GtGenomeNode*)); return ns; }
GtNodeStream* gt_sort_stream_new(GtNodeStream *in_stream) { GtNodeStream *ns = gt_node_stream_create(gt_sort_stream_class(), true); GtSortStream *sort_stream = gt_sort_stream_cast(ns); gt_assert(in_stream); sort_stream->in_stream = gt_node_stream_ref(in_stream); sort_stream->sorted = false; sort_stream->idx = 0; sort_stream->nodes = gt_array_new(sizeof (GtGenomeNode*)); return ns; }
GtNodeStream* gt_script_wrapper_stream_new(GtScriptWrapperStreamNextFunc next, GtScriptWrapperStreamFreeFunc free) { GtNodeStream *ns; GtScriptWrapperStream *script_wrapper_stream; gt_assert(next); ns = gt_node_stream_create(gt_script_wrapper_stream_class(), false); script_wrapper_stream = script_wrapper_stream_cast(ns); script_wrapper_stream->next_func = next; script_wrapper_stream->free_func = free; return ns; }
GtNodeStream* gt_buffer_stream_new(GtNodeStream *in_stream) { GtBufferStream *bs; GtNodeStream *ns; gt_assert(in_stream); ns = gt_node_stream_create(gt_buffer_stream_class(), false); bs = buffer_stream_cast(ns); bs->in_stream = gt_node_stream_ref(in_stream); bs->node_buffer = gt_queue_new(); bs->buffering = true; return ns; }
GtNodeStream* gt_visitor_stream_new(GtNodeStream *in_stream, GtNodeVisitor *visitor) { GtVisitorStream *visitor_stream; GtNodeStream *ns; ns = gt_node_stream_create(gt_visitor_stream_class(), gt_node_stream_is_sorted(in_stream)); visitor_stream = visitor_stream_cast(ns); visitor_stream->in_stream = gt_node_stream_ref(in_stream); visitor_stream->visitor = visitor; return ns; }
GtNodeStream* gt_inter_feature_stream_new(GtNodeStream *in_stream, const char *outside_type, const char *inter_type) { GtNodeStream *ns = gt_node_stream_create(gt_inter_feature_stream_class(), false); GtInterFeatureStream *ais = gt_inter_feature_stream_cast(ns); gt_assert(in_stream); ais->in_stream = gt_node_stream_ref(in_stream); ais->inter_feature_visitor = gt_inter_feature_visitor_new(outside_type, inter_type); return ns; }
GtNodeStream* gt_gff3_numsorted_out_stream_new(GtNodeStream *in_stream, GtFile *outfp) { GtNodeStream *ns = gt_node_stream_create(gt_gff3_numsorted_out_stream_class(), false); GtGFF3NumsortedOutStream *gff3_out_stream = gff3_numsorted_out_stream_cast(ns); gff3_out_stream->in_stream = gt_node_stream_ref(in_stream); gff3_out_stream->buffer = gt_array_new(sizeof (GtGenomeNode*)); gff3_out_stream->outqueue = NULL; gff3_out_stream->gff3_visitor = gt_gff3_visitor_new(outfp); return ns; }
GtNodeStream* agn_filter_stream_new(GtNodeStream *in_stream, GtHashmap *typestokeep) { GtNodeStream *ns; AgnFilterStream *stream; gt_assert(in_stream && typestokeep); ns = gt_node_stream_create(filter_stream_class(), false); stream = filter_stream_cast(ns); stream->in_stream = gt_node_stream_ref(in_stream); stream->cache = gt_queue_new(); stream->typestokeep = gt_hashmap_ref(typestokeep); return ns; }
GtNodeStream* gt_targetbest_filter_stream_new(GtNodeStream *in_stream) { GtTargetbestFilterStream *tfs; GtNodeStream *gs; gt_assert(in_stream); gs = gt_node_stream_create(gt_targetbest_filter_stream_class(), gt_node_stream_is_sorted(in_stream)); tfs = targetbest_filter_stream_cast(gs); tfs->in_stream = gt_node_stream_ref(in_stream); tfs->in_stream_processed = false; tfs->trees = gt_dlist_new(NULL); tfs->target_to_elem = gt_hashmap_new(HASH_STRING, gt_free_func, NULL); return gs; }
GtNodeStream* gt_array_out_stream_new(GtNodeStream *in_stream, GtArray *nodes, GT_UNUSED GtError *err) { GtNodeStream *gs; GtArrayOutStream *aos; gt_assert(in_stream && nodes); gs = gt_node_stream_create(gt_array_out_stream_class(), false); aos = gt_array_out_stream_cast(gs); aos->in_stream = gt_node_stream_ref(in_stream); aos->nodes = nodes; return gs; }
GtNodeStream* gt_array_in_stream_new(GtArray *nodes, unsigned long *progress, GT_UNUSED GtError *err) { GtNodeStream *gs; GtArrayInStream *ais; gt_assert(nodes); gs = gt_node_stream_create(gt_array_in_stream_class(), false); ais = gt_array_in_stream_cast(gs); ais->nodes = nodes; ais->progress = progress; ais->next_index = 0; return gs; }
GtNodeStream* gt_feature_in_stream_new(GtFeatureIndex *fi) { GtNodeStream *ns; GtFeatureInStream *stream; gt_assert(fi); ns = gt_node_stream_create(gt_feature_in_stream_class(), true); stream = feature_in_stream_cast(ns); stream->fi = fi; stream->regioncache = gt_queue_new(); stream->featurecache = NULL; stream->useorig = false; stream->init = false; return ns; }
GtNodeStream* gt_sequence_node_add_stream_new(GtNodeStream *in_stream, GtRegionMapping *rm, GT_UNUSED GtError *err) { GtNodeStream *ns = gt_node_stream_create(gt_sequence_node_add_stream_class(), gt_node_stream_is_sorted(in_stream)); GtSequenceNodeAddStream *s = gt_sequence_node_add_stream_cast(ns); gt_assert(in_stream); s->rm = rm; s->in_stream = gt_node_stream_ref(in_stream); s->seqid_table = gt_cstr_table_new(); s->seqids = NULL; s->cur_seqid = 0; s->collect_vis = gt_collect_ids_visitor_new(s->seqid_table); return ns; }
GtNodeStream* gt_gff3_in_stream_new_sorted(const char *filename) { GtNodeStream *ns = gt_node_stream_create(gt_gff3_in_stream_class(), true); GtGFF3InStream *is = gff3_in_stream_cast(ns); is->fix_region_stream = NULL; is->last_stream = is->gff3_in_stream_plain = gt_gff3_in_stream_plain_new_sorted(filename); gt_gff3_in_stream_plain_check_region_boundaries( (GtGFF3InStreamPlain*) is->gff3_in_stream_plain); is->last_stream = is->add_ids_stream = gt_add_ids_stream_new(is->last_stream); is->last_stream = is->multi_sanitize_stream = gt_visitor_stream_new(is->last_stream, gt_multi_sanitizer_visitor_new()); is->last_stream = is->cds_check_stream = gt_cds_check_stream_new(is->last_stream); return ns; }
GtNodeStream* gt_stat_stream_new(GtNodeStream *in_stream, bool gene_length_distri, bool gene_score_distri, bool exon_length_distri, bool exon_number_distri, bool intron_length_distri, bool cds_length_distri, bool used_sources) { GtNodeStream *ns = gt_node_stream_create(gt_stat_stream_class(), false); GtStatStream *ss = stat_stream_cast(ns); ss->in_stream = gt_node_stream_ref(in_stream); ss->stat_visitor = gt_stat_visitor_new(gene_length_distri, gene_score_distri, exon_length_distri, exon_number_distri, intron_length_distri, cds_length_distri, used_sources); return ns; }
GtNodeStream * CpGIOverlap_stream_new(GtNodeStream * in_stream, const char * cpgi_db) { GtNodeStream * ns = gt_node_stream_create(CpGIOverlap_stream_class(), true); // must be sorted CpGIOverlap_stream * context = CpGIOverlap_stream_cast(ns); gt_assert(in_stream); context->in_stream = gt_node_stream_ref(in_stream); if ((context->cpgi_file = fopen(cpgi_db, "r")) == NULL) { gt_node_stream_delete(ns); fprintf(stderr, "Failed to open CpG Island db file %s\n", cpgi_db); return NULL; } return ns; }
GtNodeStream* gt_ltr_cluster_stream_new(GtNodeStream *in_stream, GtEncseq *encseq, int match_score, int mismatch_cost, int gap_open_cost, int gap_ext_cost, int xdrop, int ydrop, int zdrop, int k, int mscoregapped, int mscoregapless, unsigned long plarge, unsigned long psmall, char **current_state, GtError *err) { GtNodeStream *ns; GtLTRClusterStream *lcs; ns = gt_node_stream_create(gt_ltr_cluster_stream_class(), false); lcs = gt_ltr_cluster_stream_cast(ns); lcs->in_stream = gt_node_stream_ref(in_stream); lcs->feat_to_encseq = NULL; lcs->feat_to_encseq_keys = NULL; lcs->nodes = gt_array_new(sizeof(GtGenomeNode*)); lcs->lcv = gt_ltr_cluster_prepare_seq_visitor_cast( gt_ltr_cluster_prepare_seq_visitor_new(encseq, err)); lcs->first_next = true; lcs->next_index = 0; lcs->match_score = match_score; lcs->mismatch_cost = mismatch_cost; lcs->gap_open_cost = gap_open_cost; lcs->gap_ext_cost = gap_ext_cost; lcs->xdrop = xdrop; lcs->ydrop = ydrop; lcs->zdrop = zdrop; lcs->mscoregapped = mscoregapped; lcs->mscoregapless = mscoregapless; lcs->k = k; lcs->plarge = plarge; lcs->psmall = psmall; lcs->current_state = current_state; return ns; }
GtNodeStream* gth_bssm_train_stream_new(GtNodeStream *in_stream, GtRegionMapping *region_mapping, GthBSSMSeqProcessor *bsp, const char *filter_type, const char *extract_type, unsigned int good_exon_count, double cutoff) { GtNodeStream *ns; GthBSSMTrainStream *bts; gt_assert(in_stream && region_mapping && filter_type && extract_type); ns = gt_node_stream_create(gth_bssm_train_stream_class(), true); bts = bssm_train_stream_cast(ns); bts->in_stream = gt_node_stream_ref(in_stream); bts->bssm_train_visitor = gth_bssm_train_visitor_new(region_mapping, bsp, filter_type, extract_type, good_exon_count, cutoff); return ns; }
GtNodeStream* gt_chseqids_stream_new(GtNodeStream *in_stream, GtStr *chseqids_file, GtError *err) { GtNodeStream *gs; GtChseqidsStream *cs; gt_error_check(err); gt_assert(in_stream && chseqids_file); gt_assert(gt_node_stream_is_sorted(in_stream)); gs = gt_node_stream_create(gt_chseqids_stream_class(), false); cs = chseqids_stream_cast(gs); cs->in_stream = gt_node_stream_ref(in_stream); cs->chseqids_mapping = gt_mapping_new(chseqids_file, "chseqids", MAPPINGTYPE_STRING, err); if (!cs->chseqids_mapping) { gt_node_stream_delete(gs); return NULL; } cs->gt_genome_node_buffer = gt_array_new(sizeof (GtGenomeNode*)); return gs; }
GtNodeStream * CpGI_score_stream_new(GtNodeStream * in_stream, const char * methylome_db) { GtNodeStream * ns = gt_node_stream_create(CpGI_score_stream_class(), true); // must be sorted CpGI_score_stream * score_stream = CpGI_score_stream_cast(ns); gt_assert(in_stream); score_stream->in_stream = gt_node_stream_ref(in_stream); score_stream->previous_methylome_position = 0; score_stream->previous_methylome_fraction = 0.0f; score_stream->previous_methylome_chromosome = 0; if ((score_stream->methylome_file = fopen(methylome_db, "r")) == NULL) { gt_node_stream_delete(ns); fprintf(stderr, "Failed to open methylome db file %s\n", methylome_db); return NULL; } return ns; }
/* takes ownership of <files> */ static GtNodeStream* gff3_in_stream_plain_new(GtStrArray *files, bool ensure_sorting) { GtNodeStream *ns = gt_node_stream_create(gt_gff3_in_stream_plain_class(), ensure_sorting); GtGFF3InStreamPlain *gff3_in_stream_plain = gff3_in_stream_plain_cast(ns); gff3_in_stream_plain->next_file = 0; gff3_in_stream_plain->files = files; gff3_in_stream_plain->stdinstr = gt_str_new_cstr("stdin"); gff3_in_stream_plain->ensure_sorting = ensure_sorting; gff3_in_stream_plain->stdin_argument = false; gff3_in_stream_plain->file_is_open = false; gff3_in_stream_plain->fpin = NULL; gff3_in_stream_plain->line_number = 0; gff3_in_stream_plain->genome_node_buffer = gt_queue_new(); gff3_in_stream_plain->checkids = false; gff3_in_stream_plain->checkregions = false; gff3_in_stream_plain->gff3_parser = gt_gff3_parser_new(NULL); gff3_in_stream_plain->used_types = gt_cstr_table_new(); gff3_in_stream_plain->progress_bar = false; return ns; }
GtNodeStream* gt_ltrdigest_file_out_stream_new(GtNodeStream *in_stream, int tests_to_run, GtRegionMapping *rmap, char *file_prefix, unsigned int seqnamelen, GtError* err) { GtNodeStream *ns; GtLTRdigestFileOutStream *ls; char fn[GT_MAXFILENAMELEN]; gt_error_check(err); gt_assert(file_prefix && in_stream && rmap); ns = gt_node_stream_create(gt_ltrdigest_file_out_stream_class(), false); ls = gt_ltrdigest_file_out_stream_cast(ns); /* ref GFF input stream and sequences*/ ls->in_stream = gt_node_stream_ref(in_stream); ls->rmap = rmap; ls->tests_to_run = tests_to_run; ls->seqnamelen = seqnamelen; ls->write_pdom_alignments = false; ls->write_pdom_aaseqs = false; /* open outfiles */ ls->fileprefix = file_prefix; (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_tabout.csv", file_prefix); ls->tabout_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); if (tests_to_run & GT_LTRDIGEST_RUN_PPT) { (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_ppt.fas", file_prefix); ls->pptout_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); } if (tests_to_run & GT_LTRDIGEST_RUN_PBS) { (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_pbs.fas", file_prefix); ls->pbsout_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); } (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_5ltr.fas", file_prefix); ls->ltr5out_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_3ltr.fas", file_prefix); ls->ltr3out_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1), "%s_complete.fas", file_prefix); ls->elemout_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err); /* create hashmaps to hold protein domain output files */ ls->pdomout_files = gt_hashmap_new(GT_HASH_STRING, gt_free_func, (GtFree) gt_file_delete); ls->pdomali_files = gt_hashmap_new(GT_HASH_STRING, gt_free_func, (GtFree) gt_file_delete); ls->pdomaa_files = gt_hashmap_new(GT_HASH_STRING, gt_free_func, (GtFree) gt_file_delete); /* print tabular outfile headline */ gt_file_xprintf(ls->tabout_file, "element start\telement end\telement length\tsequence\t" "lLTR start\tlLTR end\tlLTR length\t" "rLTR start\trLTR end\trLTR length\t" "lTSD start\tlTSD end\tlTSD motif\t" "rTSD start\trTSD end\trTSD motif\t" "PPT start\tPPT end\tPPT motif\tPPT strand\tPPT offset"); gt_file_xprintf(ls->tabout_file, "\tPBS start\tPBS end\tPBS strand\ttRNA\ttRNA motif\tPBS offset\t" "tRNA offset\tPBS/tRNA edist"); #ifdef HAVE_HMMER gt_file_xprintf(ls->tabout_file, "\tProtein domain hits"); #endif gt_file_xprintf(ls->tabout_file, "\n"); /* create visitor */ ls->lv = (GtLTRVisitor*) gt_ltr_visitor_new(&ls->element); return ns; }