static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err) { GtStr *bioseq_index_file = NULL, *bioseq_ois_file = NULL, *bioseq_sds_file = NULL, *bioseq_md5_file = NULL, *bioseq_des_file = NULL; int had_err = 0; GtStr *bioseq_basename; gt_assert(!bs->encseq); if (bs->use_stdin) bioseq_basename = gt_str_new_cstr("stdin"); else bioseq_basename = bs->sequence_file; /* construct file names */ bioseq_index_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX); bioseq_ois_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX); bioseq_sds_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX); bioseq_md5_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX); bioseq_des_file = gt_str_clone(bioseq_basename); gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX); /* construct the bioseq files if necessary */ if (recreate || bs->use_stdin || !gt_file_exists(gt_str_get(bioseq_index_file)) || !gt_file_exists(gt_str_get(bioseq_ois_file)) || !gt_file_exists(gt_str_get(bioseq_sds_file)) || !gt_file_exists(gt_str_get(bioseq_md5_file)) || !gt_file_exists(gt_str_get(bioseq_des_file)) || gt_file_is_newer(gt_str_get(bs->sequence_file), gt_str_get(bioseq_index_file))) { had_err = construct_bioseq_files(bs, bioseq_basename, err); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_lossless_support(el); gt_encseq_loader_require_description_support(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_multiseq_support(el); bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err); if (bs->encseq == NULL) { had_err = -1; gt_assert(gt_error_is_set(err)); } gt_encseq_loader_delete(el); } if (!had_err) { gt_assert(bs->encseq); } /* free */ if (bs->use_stdin) gt_str_delete(bioseq_basename); gt_str_delete(bioseq_index_file); gt_str_delete(bioseq_ois_file); gt_str_delete(bioseq_md5_file); gt_str_delete(bioseq_sds_file); gt_str_delete(bioseq_des_file); return had_err; }
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtLTRdigestOptions *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *pdom_stream = NULL, *ppt_stream = NULL, *pbs_stream = NULL, *tab_out_stream = NULL, *sa_stream = NULL, *last_stream = NULL; int had_err = 0, tests_to_run = 0, arg = parsed_args; GtRegionMapping *rmap = NULL; GtPdomModelSet *ms = NULL; gt_error_check(err); gt_assert(arguments); /* determine and open sequence source */ if (gt_seqid2file_option_used(arguments->s2fi)) { /* create region mapping */ rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rmap) had_err = -1; } else { GtEncseqLoader *el; GtEncseq *encseq; /* no new-style sequence source option given, fall back to legacy syntax */ if (argc < 3) { gt_error_set(err, "missing mandatory argument(s)"); had_err = -1; } if (!had_err) { el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_description_support(el); encseq = gt_encseq_loader_load(el, argv[argc-1], err); /* XXX: clip off terminal argument */ gt_free((char*) argv[argc-1]); argv[argc-1] = NULL; argc--; gt_encseq_loader_delete(el); if (!encseq) had_err = -1; else { rmap = gt_region_mapping_new_encseq_seqno(encseq); gt_encseq_delete(encseq); } } } gt_assert(had_err || rmap); /* Always search for PPT. */ tests_to_run |= GT_LTRDIGEST_RUN_PPT; /* Open tRNA library if given. */ if (!had_err && arguments->trna_lib && gt_str_length(arguments->trna_lib) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PBS; arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib), err); if (gt_error_is_set(err)) had_err = -1; } /* Set HMMER cutoffs. */ if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PDOM; if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) { arguments->cutoff = GT_PHMM_CUTOFF_GA; } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) { arguments->cutoff = GT_PHMM_CUTOFF_TC; } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) { arguments->cutoff = GT_PHMM_CUTOFF_NONE; } else { gt_error_set(err, "invalid cutoff setting!"); had_err = -1; } } if (!had_err) { last_stream = gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[arg]); } if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { GtNodeVisitor *pdom_v; ms = gt_pdom_model_set_new(arguments->hmm_files, err); if (ms != NULL) { pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff, arguments->chain_max_gap_length, arguments->cutoff, rmap, err); if (pdom_v == NULL) had_err = -1; if (!had_err) { gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*) pdom_v, GT_LTRDIGEST_TAG); if (arguments->output_all_chains) gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*) pdom_v); last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v); } } else had_err = -1; } if (!had_err && arguments->trna_lib_bs) { GtNodeVisitor *pbs_v; pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius, arguments->max_edist, arguments->alilen, arguments->offsetlen, arguments->trnaoffsetlen, arguments->ali_score_match, arguments->ali_score_mismatch, arguments->ali_score_insertion, arguments->ali_score_deletion, arguments->trna_lib_bs, err); if (pbs_v != NULL) last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v); else had_err = -1; } if (!had_err) { GtNodeVisitor *ppt_v; ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len, arguments->ubox_len, arguments->ppt_pyrimidine_prob, arguments->ppt_purine_prob, arguments->bkg_a_prob, arguments->bkg_g_prob, arguments->bkg_t_prob, arguments->bkg_c_prob, arguments->ubox_u_prob, arguments->ppt_radius, arguments->max_ubox_dist, err); if (ppt_v != NULL) last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v); else had_err = -1; } if (!had_err) { GtNodeVisitor *sa_v; sa_v = gt_ltrdigest_strand_assign_visitor_new(); gt_assert(sa_v); last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v); } if (!had_err) { /* attach tabular output stream, if requested */ if (gt_str_length(arguments->prefix) > 0) { last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new( last_stream, tests_to_run, rmap, gt_str_get(arguments->prefix), arguments->seqnamelen, err); if (!tab_out_stream) had_err = -1; if (!had_err && arguments->print_metadata) { had_err = gt_ltrdigest_file_out_stream_write_metadata( (GtLTRdigestFileOutStream*) tab_out_stream, tests_to_run, gt_str_get(arguments->trna_lib), argv[arg], arguments->ppt_len, arguments->ubox_len, arguments->ppt_radius, arguments->alilen, arguments->max_edist, arguments->offsetlen, arguments->trnaoffsetlen, arguments->pbs_radius, arguments->hmm_files, arguments->chain_max_gap_length, arguments->evalue_cutoff, err); } if (!had_err) { if (arguments->write_alignments) gt_ltrdigest_file_out_stream_enable_pdom_alignment_output( tab_out_stream); if (arguments->write_aaseqs) gt_ltrdigest_file_out_stream_enable_aa_sequence_output( tab_out_stream); } } last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(last_stream, err); } gt_pdom_model_set_delete(ms); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(ppt_stream); gt_node_stream_delete(pbs_stream); gt_node_stream_delete(sa_stream); gt_node_stream_delete(pdom_stream); gt_node_stream_delete(tab_out_stream); gt_node_stream_delete(gff3_in_stream); gt_bioseq_delete(arguments->trna_lib_bs); gt_region_mapping_delete(rmap); return had_err; }