static int gt_csa_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream, *csa_stream, *gff3_out_stream; CSAArguments *arguments = tool_arguments; int had_err; gt_error_check(err); gt_assert(arguments); /* create the streams */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); csa_stream = gt_csa_stream_new(gff3_in_stream, arguments->join_length); gff3_out_stream = gt_gff3_out_stream_new(csa_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(csa_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
int gt_eval_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { EvalArguments *arguments = tool_arguments; GtNodeStream *reference_stream, *prediction_stream; GtStreamEvaluator *evaluator; int had_err; gt_error_check(err); gt_assert(arguments); /* create the reference stream */ reference_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) reference_stream); /* create the prediction stream */ prediction_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args + 1]); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) prediction_stream); /* create the stream evaluator */ evaluator = gt_stream_evaluator_new(reference_stream, prediction_stream, arguments->nuceval, arguments->evalLTR, arguments->LTRdelta); /* compute the evaluation */ had_err = gt_stream_evaluator_evaluate(evaluator, arguments->verbose, arguments->exondiff, arguments->exondiffcollapsed, NULL, err); /* show the evaluation */ if (!had_err) gt_stream_evaluator_show(evaluator, arguments->outfp); /* free */ gt_stream_evaluator_delete(evaluator); gt_node_stream_delete(prediction_stream); gt_node_stream_delete(reference_stream); return had_err; }
GtSeqposClassifier *gt_seqpos_classifier_new(const char *filename, const char *feature_type) { GtSeqposClassifier *seqpos_classifier; seqpos_classifier = gt_malloc(sizeof (GtSeqposClassifier)); seqpos_classifier->annotation_stream = gt_gff3_in_stream_new_sorted(filename); seqpos_classifier->fn = NULL; seqpos_classifier->fni = NULL; seqpos_classifier->gn = NULL; seqpos_classifier->nof_specified_ft_found = 0; seqpos_classifier->specified_ft = feature_type; return seqpos_classifier; }
int gt_chseqids(int argc, const char **argv, GtError *err) { GtNodeStream *gff3_in_stream, *chseqids_stream, *sort_stream = NULL, *gff3_out_stream = NULL; ChseqidsArguments arguments; GtStr *chseqids; int parsed_args, had_err = 0; gt_error_check(err); /* option parsing */ switch (parse_options(&parsed_args, &arguments, argc, argv, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: return -1; case GT_OPTION_PARSER_REQUESTS_EXIT: return 0; } /* create the streams */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args + 1]); if (arguments.verbose && arguments.outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); chseqids = gt_str_new_cstr(argv[parsed_args]); chseqids_stream = gt_chseqids_stream_new(gff3_in_stream, chseqids, err); if (!chseqids_stream) had_err = -1; gt_str_delete(chseqids); if (!had_err) { if (arguments.sort) { sort_stream = gt_sort_stream_new(chseqids_stream); gff3_out_stream = gt_gff3_out_stream_new(sort_stream, arguments.outfp); } else { gff3_out_stream = gt_gff3_out_stream_new(chseqids_stream, arguments.outfp); } } /* pull the features through the stream and free them afterwards */ if (!had_err) had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(chseqids_stream); gt_node_stream_delete(sort_stream); gt_node_stream_delete(gff3_in_stream); gt_file_delete(arguments.outfp); return had_err; }
static int gff3_in_stream_lua_new_sorted(lua_State *L) { GtNodeStream **gs; const char *filename; gt_assert(L); /* get/check parameters */ filename = luaL_checkstring(L, 1); luaL_argcheck(L, gt_file_exists(filename), 1, "file does not exist"); /* construct object */ gs = lua_newuserdata(L, sizeof (GtNodeStream*)); *gs = gt_gff3_in_stream_new_sorted(filename); gt_assert(*gs); luaL_getmetatable(L, GENOME_STREAM_METATABLE); lua_setmetatable(L, -2); return 1; }
static int gt_extractfeat_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *extract_feature_stream = NULL; GtExtractFeatArguments *arguments = tool_arguments; GtRegionMapping *region_mapping; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (!had_err) { /* create gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create region mapping */ region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!region_mapping) had_err = -1; } if (!had_err) { /* create extract feature stream */ extract_feature_stream = gt_extract_feature_stream_new(gff3_in_stream, region_mapping, gt_str_get(arguments->type), arguments->join, arguments->translate, arguments->seqid, arguments->target, arguments->width, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(extract_feature_stream, err); } /* free */ gt_node_stream_delete(extract_feature_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
static int gt_cds_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream, *cds_stream = NULL, *gff3_out_stream = NULL; CDSArguments *arguments = tool_arguments; GtRegionMapping *region_mapping; int had_err = 0; gt_error_check(err); gt_assert(arguments); /* create gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create region mapping */ region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!region_mapping) had_err = -1; if (!had_err) { /* create CDS stream */ cds_stream = gt_cds_stream_new(gff3_in_stream, region_mapping, arguments->minorflen, GT_CDS_SOURCE_TAG, arguments->start_codon, arguments->final_stop_codon, arguments->generic_start_codons); /* create gff3 output stream */ gff3_out_stream = gt_gff3_out_stream_new(cds_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); } /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(cds_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
int gt_regioncov(int argc, const char **argv, GtError *err) { GtNodeVisitor *regioncov_visitor; GtNodeStream *gff3_in_stream; GtGenomeNode *gn; RegionCovArguments arguments; int parsed_args, had_err = 0; gt_error_check(err); /* option parsing */ switch (parse_options(&parsed_args, &arguments, argc, argv, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: return -1; case OPTIONPARSER_REQUESTS_EXIT: return 0; } /* create gff3 input stream */ gt_assert(parsed_args < argc); gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments.verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create region coverage visitor */ regioncov_visitor = gt_regioncov_visitor_new(arguments.max_feature_dist); /* pull the features through the stream and free them afterwards */ while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) { had_err = gt_genome_node_accept(gn, regioncov_visitor, err); gt_genome_node_delete(gn); } /* show region coverage */ if (!had_err) gt_regioncov_visitor_show_coverage(regioncov_visitor); /* free */ gt_node_visitor_delete(regioncov_visitor); gt_node_stream_delete(gff3_in_stream); return had_err; }
int main(int argc, char ** argv) { GtNodeStream * in, * score, * out; GtFile * out_file; GtError * err; if (argc != 4) { usage(argv[0]); exit(1); } // initilaize genometools gt_lib_init(); err = gt_error_new(); if (!(in = gt_gff3_in_stream_new_sorted(argv[1]))) { fprintf(stderr, "Failed to open input stream with arg %s\n", argv[1]); exit(1); } if (!(out_file = gt_file_new(argv[2], "w+", err))) { gt_node_stream_delete(in); fprintf(stderr, "Failed to create output file %s\n", argv[2]); exit(1); } if (!(score = CpGI_score_stream_new(in, argv[3]))) { gt_file_delete(out_file); gt_node_stream_delete(in); fprintf(stderr, "Failed to create CpGI score stream\n"); exit(1); } out = gt_gff3_out_stream_new(in, out_file); if (!(out = gt_gff3_out_stream_new(score, out_file))) { gt_node_stream_delete(score); gt_file_delete(out_file); gt_node_stream_delete(in); fprintf(stderr, "Failed to create output stream\n"); exit(1); } if (gt_node_stream_pull(out, err)) { fprintf(stderr, "Failed to pull through out stream\n"); } // close genome tools gt_node_stream_delete(out); gt_node_stream_delete(score); gt_file_delete(out_file); gt_node_stream_delete(in); gt_error_delete(err); gt_lib_clean(); return 0; }
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtLTRdigestOptions *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *pdom_stream = NULL, *ppt_stream = NULL, *pbs_stream = NULL, *tab_out_stream = NULL, *sa_stream = NULL, *last_stream = NULL; int had_err = 0, tests_to_run = 0, arg = parsed_args; GtRegionMapping *rmap = NULL; GtPdomModelSet *ms = NULL; gt_error_check(err); gt_assert(arguments); /* determine and open sequence source */ if (gt_seqid2file_option_used(arguments->s2fi)) { /* create region mapping */ rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rmap) had_err = -1; } else { GtEncseqLoader *el; GtEncseq *encseq; /* no new-style sequence source option given, fall back to legacy syntax */ if (argc < 3) { gt_error_set(err, "missing mandatory argument(s)"); had_err = -1; } if (!had_err) { el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_description_support(el); encseq = gt_encseq_loader_load(el, argv[argc-1], err); /* XXX: clip off terminal argument */ gt_free((char*) argv[argc-1]); argv[argc-1] = NULL; argc--; gt_encseq_loader_delete(el); if (!encseq) had_err = -1; else { rmap = gt_region_mapping_new_encseq_seqno(encseq); gt_encseq_delete(encseq); } } } gt_assert(had_err || rmap); /* Always search for PPT. */ tests_to_run |= GT_LTRDIGEST_RUN_PPT; /* Open tRNA library if given. */ if (!had_err && arguments->trna_lib && gt_str_length(arguments->trna_lib) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PBS; arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib), err); if (gt_error_is_set(err)) had_err = -1; } /* Set HMMER cutoffs. */ if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PDOM; if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) { arguments->cutoff = GT_PHMM_CUTOFF_GA; } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) { arguments->cutoff = GT_PHMM_CUTOFF_TC; } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) { arguments->cutoff = GT_PHMM_CUTOFF_NONE; } else { gt_error_set(err, "invalid cutoff setting!"); had_err = -1; } } if (!had_err) { last_stream = gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[arg]); } if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { GtNodeVisitor *pdom_v; ms = gt_pdom_model_set_new(arguments->hmm_files, err); if (ms != NULL) { pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff, arguments->chain_max_gap_length, arguments->cutoff, rmap, err); if (pdom_v == NULL) had_err = -1; if (!had_err) { gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*) pdom_v, GT_LTRDIGEST_TAG); if (arguments->output_all_chains) gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*) pdom_v); last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v); } } else had_err = -1; } if (!had_err && arguments->trna_lib_bs) { GtNodeVisitor *pbs_v; pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius, arguments->max_edist, arguments->alilen, arguments->offsetlen, arguments->trnaoffsetlen, arguments->ali_score_match, arguments->ali_score_mismatch, arguments->ali_score_insertion, arguments->ali_score_deletion, arguments->trna_lib_bs, err); if (pbs_v != NULL) last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v); else had_err = -1; } if (!had_err) { GtNodeVisitor *ppt_v; ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len, arguments->ubox_len, arguments->ppt_pyrimidine_prob, arguments->ppt_purine_prob, arguments->bkg_a_prob, arguments->bkg_g_prob, arguments->bkg_t_prob, arguments->bkg_c_prob, arguments->ubox_u_prob, arguments->ppt_radius, arguments->max_ubox_dist, err); if (ppt_v != NULL) last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v); else had_err = -1; } if (!had_err) { GtNodeVisitor *sa_v; sa_v = gt_ltrdigest_strand_assign_visitor_new(); gt_assert(sa_v); last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v); } if (!had_err) { /* attach tabular output stream, if requested */ if (gt_str_length(arguments->prefix) > 0) { last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new( last_stream, tests_to_run, rmap, gt_str_get(arguments->prefix), arguments->seqnamelen, err); if (!tab_out_stream) had_err = -1; if (!had_err && arguments->print_metadata) { had_err = gt_ltrdigest_file_out_stream_write_metadata( (GtLTRdigestFileOutStream*) tab_out_stream, tests_to_run, gt_str_get(arguments->trna_lib), argv[arg], arguments->ppt_len, arguments->ubox_len, arguments->ppt_radius, arguments->alilen, arguments->max_edist, arguments->offsetlen, arguments->trnaoffsetlen, arguments->pbs_radius, arguments->hmm_files, arguments->chain_max_gap_length, arguments->evalue_cutoff, err); } if (!had_err) { if (arguments->write_alignments) gt_ltrdigest_file_out_stream_enable_pdom_alignment_output( tab_out_stream); if (arguments->write_aaseqs) gt_ltrdigest_file_out_stream_enable_aa_sequence_output( tab_out_stream); } } last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(last_stream, err); } gt_pdom_model_set_delete(ms); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(ppt_stream); gt_node_stream_delete(pbs_stream); gt_node_stream_delete(sa_stream); gt_node_stream_delete(pdom_stream); gt_node_stream_delete(tab_out_stream); gt_node_stream_delete(gff3_in_stream); gt_bioseq_delete(arguments->trna_lib_bs); gt_region_mapping_delete(rmap); return had_err; }
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtLTRdigestOptions *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *ltrdigest_stream = NULL, *tab_out_stream = NULL, *last_stream = NULL; int had_err = 0, tests_to_run = 0, arg = parsed_args; const char *indexname = argv[arg+1]; GtLogger *logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); GtEncseqLoader *el; GtEncseq *encseq; gt_error_check(err); gt_assert(arguments); /* Set sequence encoder options. Defaults are ok. */ el = gt_encseq_loader_new(); gt_encseq_loader_set_logger(el, logger); /* Open sequence file */ encseq = gt_encseq_loader_load(el, indexname, err); if (!encseq) had_err = -1; /* Always search for PPT. */ tests_to_run |= GT_LTRDIGEST_RUN_PPT; /* Open tRNA library if given. */ if (!had_err && arguments->trna_lib && gt_str_length(arguments->trna_lib) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PBS; arguments->pbs_opts.trna_lib = gt_bioseq_new(gt_str_get(arguments->trna_lib), err); if (gt_error_is_set(err)) had_err = -1; } #ifdef HAVE_HMMER /* Open HMMER files if given. */ if (!had_err && gt_str_array_size(arguments->pdom_opts.hmm_files) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PDOM; if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) { arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_GA; } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) { arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_TC; } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) { arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_NONE; } else { gt_error_set(err, "invalid cutoff setting!"); had_err = -1; } } #endif if (!had_err) { /* set up stream flow * ------------------*/ last_stream = gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[arg]); last_stream = ltrdigest_stream = gt_ltrdigest_stream_new(last_stream, tests_to_run, encseq, &arguments->pbs_opts, &arguments->ppt_opts, #ifdef HAVE_HMMER &arguments->pdom_opts, #endif err); if (!ltrdigest_stream) had_err = -1; } if (!had_err) { /* attach tabular output stream, if requested */ if (gt_str_length(arguments->prefix) > 0) { last_stream = tab_out_stream = gt_ltr_fileout_stream_new(last_stream, tests_to_run, encseq, gt_str_get(arguments->prefix), &arguments->ppt_opts, &arguments->pbs_opts, #ifdef HAVE_HMMER &arguments->pdom_opts, #endif gt_str_get(arguments->trna_lib), argv[arg+1], argv[arg], arguments->seqnamelen, err); #ifdef HAVE_HMMER if (&arguments->pdom_opts.write_alignments) gt_ltr_fileout_stream_enable_pdom_alignment_output(tab_out_stream); if (&arguments->pdom_opts.write_aaseqs) gt_ltr_fileout_stream_enable_aa_sequence_output(tab_out_stream); #endif } last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(last_stream, err); } gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(ltrdigest_stream); if (tab_out_stream != NULL) gt_node_stream_delete(tab_out_stream); gt_node_stream_delete(gff3_in_stream); gt_encseq_loader_delete(el); gt_encseq_delete(encseq); encseq = NULL; gt_bioseq_delete(arguments->pbs_opts.trna_lib); gt_logger_delete(logger); return had_err; }