static int gt_inlineseq_split_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtInlineseqSplitArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *split_stream = NULL, *last_stream = NULL; GtFile *seq_out_file = NULL, *gff3_out_file = NULL; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (gt_str_length(arguments->seqoutfile) > 0) { seq_out_file = gt_file_new(gt_str_get(arguments->seqoutfile), "w+", err); if (!seq_out_file) had_err = -1; } if (!had_err && gt_str_length(arguments->gffoutfile) > 0) { gff3_out_file = gt_file_new(gt_str_get(arguments->gffoutfile), "w+", err); if (!gff3_out_file) had_err = -1; } if (!had_err) { last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); } if (!had_err) { last_stream = split_stream = gt_sequence_node_out_stream_new(last_stream, seq_out_file, err); gt_assert(split_stream); } if (!had_err) { last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, gff3_out_file); had_err = gt_node_stream_pull(last_stream, err); } gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(split_stream); gt_file_delete(seq_out_file); gt_file_delete(gff3_out_file); return had_err; }
static int create_manpage(const char *outdir, const char *toolname, GtOptionParser *option_parser, GtError *err) { GtFile *outfile = NULL; GtStr *man, *pathbuf; char *utoolname; int had_err = 0; gt_error_check(err); gt_assert(outdir && toolname && option_parser); man = gt_str_new(); pathbuf = gt_str_new_cstr(outdir); utoolname = gt_cstr_dup(toolname); gt_cstr_rep(utoolname, ' ', '_'); if (!gt_file_exists(gt_str_get(pathbuf))) gt_xmkdir(gt_str_get(pathbuf)); gt_str_append_char(pathbuf, GT_PATH_SEPARATOR); gt_str_append_cstr(pathbuf, utoolname); gt_str_append_cstr(pathbuf, ".mansrc"); gt_free(utoolname); if (!(outfile = gt_file_new(gt_str_get(pathbuf), "w+", err))) had_err = -1; if (!had_err) had_err = gt_option_parser_manpage(option_parser, toolname, man, err); if (!had_err) gt_file_xprintf(outfile, "%s", gt_str_get(man)); gt_file_delete(outfile); gt_str_delete(pathbuf); gt_str_delete(man); return had_err; }
static void canon_gff3_parse_options(int argc, char * const *argv, CanonGFF3Options *options, GtError *error) { int opt = 0; int optindex = 0; const char *optstr = "hio:s:v"; const struct option init_options[] = { { "help", no_argument, NULL, 'h' }, { "infer", no_argument, NULL, 'i' }, { "outfile", required_argument, NULL, 'o' }, { "source", required_argument, NULL, 's' }, { "version", no_argument, NULL, 'v' }, { NULL, no_argument, NULL, 0 }, }; for(opt = getopt_long(argc, argv, optstr, init_options, &optindex); opt != -1; opt = getopt_long(argc, argv, optstr, init_options, &optindex)) { if(opt == 'h') { print_usage(stdout); exit(0); } else if(opt == 'i') options->infer = true; else if(opt == 'o') { if(options->outstream != NULL) gt_file_delete(options->outstream); options->outstream = gt_file_new(optarg, "w", error); } else if(opt == 's') { if(options->source != NULL) gt_str_delete(options->source); options->source = gt_str_new_cstr(optarg); } else if(opt == 'v') { agn_print_version("CanonGFF3", stdout); exit(0); } } }
static int gtf_in_stream_process_file(GtGTFInStream *gtf_in_stream, GtError *err) { GtGTFParser *gtf_parser; GtStr *filenamestr; GtFile *fpin; int had_err = 0; gt_error_check(err); gt_assert(gtf_in_stream); gtf_parser = gt_gtf_parser_new(gtf_in_stream->type_checker); /* open input file */ if (gtf_in_stream->filename) { if (!(fpin = gt_file_new(gtf_in_stream->filename, "r", err))) had_err = -1; } else fpin = NULL; /* parse input file */ if (!had_err) { filenamestr = gt_str_new_cstr(gtf_in_stream->filename ? gtf_in_stream->filename : "stdin"); had_err = gt_gtf_parser_parse(gtf_parser, gtf_in_stream->genome_node_buffer, filenamestr, fpin, gtf_in_stream->tidy, err); gt_str_delete(filenamestr); } /* close input file, if necessary */ gt_file_delete(fpin); /* free */ gt_gtf_parser_delete(gtf_parser); return had_err; }
int gt_condenseq_output_to_gff3(const GtCondenseq *condenseq, GtError *err) { int had_err = 0; GtUword idx, name_len, seqnum = 0, seqstart = 0, seqend = 0, desclen; GtStr *filename = NULL, *id = gt_str_new_cstr("U"), *name = gt_str_new_cstr("unique"), *parent_unique = gt_str_new_cstr("U"), *seqid = gt_str_new(), *source = gt_str_new_cstr("Condenseq"); GtFile *outfile = NULL; GtGFF3Visitor *gffv = NULL; GtNodeVisitor *nodev = NULL; GtFeatureNode *fnode = NULL; GtGenomeNode *node = NULL; GtRange range; gt_assert(condenseq != NULL); filename = gt_str_new_cstr(gt_condenseq_basefilename(condenseq)); name_len = gt_str_length(name); gt_str_append_cstr(filename, ".gff3"); outfile = gt_file_new(gt_str_get(filename), "w", err); nodev = gt_gff3_visitor_new(outfile); gffv = (GtGFF3Visitor *) nodev; gt_gff3_visitor_retain_id_attributes(gffv); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); for (idx = 0; !had_err && idx < condenseq->udb_nelems; ++idx) { GtCondenseqUnique uq = condenseq->uniques[idx]; if (seqend <= uq.orig_startpos) { const char *desc; gt_genome_node_delete(node); seqnum = gt_condenseq_pos2seqnum(condenseq, uq.orig_startpos); seqstart = gt_condenseq_seqstartpos(condenseq, seqnum); seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart); desc = gt_condenseq_description(condenseq, &desclen, seqnum); gt_str_reset(seqid); gt_str_append_cstr_nt(seqid, desc, desclen); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); } gt_str_set_length(name, name_len); gt_str_append_uword(name, idx); gt_str_set_length(id, (GtUword) 1); gt_str_append_uword(id, idx); gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name)); gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id)); /* 1 Based coordinates! */ range.start = uq.orig_startpos + 1 - seqstart; range.end = uq.orig_startpos + uq.len - seqstart; gt_genome_node_set_range(node, &range); had_err = gt_genome_node_accept(node, nodev, err); } gt_str_reset(name); gt_str_append_cstr(name, "link"); gt_str_reset(id); gt_str_append_cstr(id, "L"); name_len = gt_str_length(name); seqend = 0; for (idx = 0; !had_err && idx < condenseq->ldb_nelems; ++idx) { GtCondenseqLink link = condenseq->links[idx]; if (seqend <= link.orig_startpos) { const char *desc; gt_genome_node_delete(node); seqnum = gt_condenseq_pos2seqnum(condenseq, link.orig_startpos); seqstart = gt_condenseq_seqstartpos(condenseq, seqnum); seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart); desc = gt_condenseq_description(condenseq, &desclen, seqnum); gt_str_reset(seqid); gt_str_append_cstr_nt(seqid, desc, desclen); node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1, (GtUword) 1, GT_STRAND_BOTH); fnode = (GtFeatureNode*) node; gt_feature_node_set_source(fnode, source); } gt_str_set_length(name, name_len); gt_str_append_uword(name, idx); gt_str_set_length(id, (GtUword) 1); gt_str_append_uword(id, idx); gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name)); gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id)); gt_str_set_length(parent_unique, (GtUword) 1); gt_str_append_uword(parent_unique, link.unique_id); gt_feature_node_set_attribute(fnode, "Derives_from", gt_str_get(parent_unique)); /* 1 Based coordinates! */ range.start = link.orig_startpos + 1 - seqstart; range.end = link.orig_startpos + link.len - seqstart; gt_genome_node_set_range(node, &range); had_err = gt_genome_node_accept(node, nodev, err); } gt_file_delete(outfile); gt_genome_node_delete(node); gt_node_visitor_delete(nodev); gt_str_delete(filename); gt_str_delete(id); gt_str_delete(name); gt_str_delete(parent_unique); gt_str_delete(seqid); gt_str_delete(source); return had_err; }
void vcfoutput_init(VcfOutput *v, const char *of) { gt_assert(v); gt_assert(of); GtError *err = gt_error_new(); v->file = gt_file_new(of,"w", err); }
int main(int argc, char ** argv) { GtNodeStream * in, * score, * out; GtFile * out_file; GtError * err; if (argc != 4) { usage(argv[0]); exit(1); } // initilaize genometools gt_lib_init(); err = gt_error_new(); if (!(in = gt_gff3_in_stream_new_sorted(argv[1]))) { fprintf(stderr, "Failed to open input stream with arg %s\n", argv[1]); exit(1); } if (!(out_file = gt_file_new(argv[2], "w+", err))) { gt_node_stream_delete(in); fprintf(stderr, "Failed to create output file %s\n", argv[2]); exit(1); } if (!(score = CpGI_score_stream_new(in, argv[3]))) { gt_file_delete(out_file); gt_node_stream_delete(in); fprintf(stderr, "Failed to create CpGI score stream\n"); exit(1); } out = gt_gff3_out_stream_new(in, out_file); if (!(out = gt_gff3_out_stream_new(score, out_file))) { gt_node_stream_delete(score); gt_file_delete(out_file); gt_node_stream_delete(in); fprintf(stderr, "Failed to create output stream\n"); exit(1); } if (gt_node_stream_pull(out, err)) { fprintf(stderr, "Failed to pull through out stream\n"); } // close genome tools gt_node_stream_delete(out); gt_node_stream_delete(score); gt_file_delete(out_file); gt_node_stream_delete(in); gt_error_delete(err); gt_lib_clean(); return 0; }
static int gt_condenser_search_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCondenserSearchArguments *arguments = tool_arguments; int i, had_err = 0; char *querypath = gt_str_get(arguments->querypath); GtStr* coarse_fname = gt_str_new_cstr("coarse_"); char *db_basename = NULL; char *suffix_ptr = NULL; GtTimer *timer = NULL; GtLogger *logger = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); db_basename = gt_basename(gt_str_get(arguments->dbpath)); /* if first char is '.' this might be a hidden file */ if (strlen(db_basename) > (size_t) 1 && (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) { /* remove suffix */ *suffix_ptr = '\0'; } gt_str_append_cstr(coarse_fname, db_basename); gt_str_append_cstr(coarse_fname, ".fas"); gt_free(db_basename); db_basename = NULL; suffix_ptr = NULL; if (arguments->blastn || arguments->blastp) { GtMatch *match; GtMatchIterator *mp = NULL; GtNREncseq *nrencseq = NULL; GtStr *fastaname = gt_str_clone(arguments->dbpath); HitPosition *hits; double eval, raw_eval = 0.0; GtUword coarse_db_len = 0; GtMatchIteratorStatus status; int curr_hits = 0, max_hits = 100; hits = gt_malloc(sizeof (*hits) * (size_t) max_hits); gt_str_append_cstr(fastaname, ".fas"); for (i=0; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("initialization"); gt_timer_start(timer); } /*extract sequences from compressed database*/ if (!had_err) { nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath), logger, err); if (nrencseq == NULL) had_err = -1; } if (!had_err) { if (arguments->ceval == GT_UNDEF_DOUBLE || arguments->feval == GT_UNDEF_DOUBLE) { /* from NCBI BLAST tutorial: E = Kmne^{-lambdaS} calculates E-value for score S with natural scale parameters K for search space size and lambda for the scoring system E = mn2^-S' m being the subject (total) length, n the length of ONE query calculates E-value for bit-score S' */ GtFastaReader *reader; GtCondenserSearchAvg avg = {0,0}; reader = gt_fasta_reader_rec_new(arguments->querypath); had_err = gt_fasta_reader_run(reader, NULL, NULL, gt_condenser_search_cum_moving_avg, &avg, err); if (!had_err) { GtUword S = arguments->bitscore; gt_log_log(GT_WU " queries, avg query size: " GT_WU, avg.count, avg.avg); raw_eval = 1/pow(2.0, (double) S) * avg.avg; gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval); gt_assert(avg.avg != 0); } gt_fasta_reader_delete(reader); } } /*create BLAST database from compressed database fasta file*/ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create coarse BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname), err); } if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "coarse BLAST run", stderr); if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(fastaname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, arguments->ceval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtUword hit_seq_id; char string[7]; const char *dbseqid = gt_match_get_seqid2(match); if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) { gt_match_get_range_seq2(match, hits[curr_hits].range); hits[curr_hits].idx = hit_seq_id; gt_match_delete(match); curr_hits++; if (curr_hits == max_hits) { HitPosition *hit_extention; max_hits += 100; hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits); for (i=max_hits - 100; i < max_hits; i++) { hits[i].range = gt_malloc(sizeof (*hits[i].range)); } } } else { gt_error_set(err, "could not parse unique db header %s", dbseqid); had_err = -1; } } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_match_iterator_delete(mp); } /*extract sequences*/ if (!had_err) { GtNREncseqDecompressor *decomp; GtFile *coarse_hits; if (timer != NULL) gt_timer_show_progress(timer, "extract coarse search hits", stderr); decomp = gt_n_r_encseq_decompressor_new(nrencseq); coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err); /* TODO DW do NOT extract complete uniques! these could be complete chromosomes!! just extract something around it? maybe +- max query length*/ for (i = 0; i < curr_hits; i++) { gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp, hits[i].idx); } had_err = gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits, decomp, &coarse_db_len, err); gt_assert(coarse_db_len != 0); gt_file_delete(coarse_hits); gt_n_r_encseq_decompressor_delete(decomp); } gt_n_r_encseq_delete(nrencseq); /* create BLAST database from decompressed database file */ if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "create fine BLAST db", stderr); if (arguments->blastn) had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname), err); else had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname), err); } /* perform fine BLAST search */ if (!had_err) { GtBlastProcessCall *call; if (timer != NULL) gt_timer_show_progress(timer, "fine BLAST run", stderr); if (arguments->feval == GT_UNDEF_DOUBLE) { eval = raw_eval * coarse_db_len; } else { eval = arguments->feval; } if (arguments->blastp) call = gt_blast_process_call_new_prot(); else call = gt_blast_process_call_new_nucl(); gt_blast_process_call_set_db(call, gt_str_get(coarse_fname)); gt_blast_process_call_set_query(call, querypath); gt_blast_process_call_set_evalue(call, eval); gt_blast_process_call_set_num_threads(call, arguments->blthreads); gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval, coarse_db_len); mp = gt_match_iterator_blast_process_new(call, err); if (!mp) had_err = -1; gt_blast_process_call_delete(call); if (!had_err) { GtUword numofhits = 0; while (!had_err && (status = gt_match_iterator_next(mp, &match, err)) != GT_MATCHER_STATUS_END) { if (status == GT_MATCHER_STATUS_OK) { GtMatchBlast *matchb = (GtMatchBlast*) match; char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50); GtRange range_seq1; GtRange range_seq2; numofhits++; gt_match_get_range_seq1(match, &range_seq1); gt_match_get_range_seq2(match, &range_seq2); gt_file_xprintf( arguments->outfp, "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t%g\t%.3f\n", gt_match_get_seqid1(match), gt_match_get_seqid2(match), gt_match_blast_get_similarity(matchb), gt_match_blast_get_align_length(matchb), range_seq1.start, range_seq1.end, range_seq2.start, range_seq2.end, gt_match_blast_get_evalue(matchb), (double) gt_match_blast_get_bitscore(matchb)); gt_match_delete(match); gt_free(dbseqid); } else if (status == GT_MATCHER_STATUS_ERROR) { had_err = -1; } } gt_log_log(GT_WU " hits found\n", numofhits); } gt_match_iterator_delete(mp); } if (!had_err) if (timer != NULL) gt_timer_show_progress_final(timer, stderr); gt_timer_delete(timer); /*cleanup*/ for (i=0; i < max_hits; i++) { gt_free(hits[i].range); } gt_free(hits); gt_str_delete(fastaname); } gt_str_delete(coarse_fname); gt_logger_delete(logger); return had_err; }
static int gt_select_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SelectArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream, *select_stream, *targetbest_select_stream = NULL, *gff3_out_stream; int had_err; GtFile *drop_file = NULL; GtNodeVisitor *gff3outvis = NULL; gt_error_check(err); gt_assert(arguments); /* create a gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create a filter stream */ select_stream = gt_select_stream_new(gff3_in_stream, arguments->seqid, arguments->source, &arguments->contain_range, &arguments->overlap_range, arguments->strand, arguments->targetstrand, arguments->has_CDS, arguments->max_gene_length, arguments->max_gene_num, arguments->min_gene_score, arguments->max_gene_score, arguments->min_average_splice_site_prob, arguments->feature_num, arguments->filter_files, arguments->filter_logic, err); if (select_stream) { GtSelectStream *fs = (GtSelectStream*) select_stream; if (gt_str_length(arguments->dropped_file) > 0) { drop_file = gt_file_new(gt_str_get(arguments->dropped_file), "w", err); gff3outvis = gt_gff3_visitor_new(drop_file); gt_select_stream_set_drophandler(fs, print_to_file_drophandler, (void*) gff3outvis); } else { gt_select_stream_set_drophandler(fs, default_drophandler, NULL); } gt_select_stream_set_single_intron_factor(select_stream, arguments->single_intron_factor); if (arguments->targetbest) targetbest_select_stream = gt_targetbest_select_stream_new(select_stream); /* create a gff3 output stream */ gff3_out_stream = gt_gff3_out_stream_new(arguments->targetbest ? targetbest_select_stream : select_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(select_stream); gt_node_stream_delete(targetbest_select_stream); } else { had_err = -1; } gt_file_delete(drop_file); gt_node_visitor_delete(gff3outvis); gt_node_stream_delete(gff3_in_stream); return had_err; }