static int gt_extractseq_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { ExtractSeqArguments *arguments = tool_arguments; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (gt_str_length(arguments->fastakeyfile)) { had_err = process_fastakeyfile(arguments->fastakeyfile, argc - parsed_args, argv + parsed_args, arguments->width, arguments->outfp, err); } else { GtBioseqIterator *bsi; GtBioseq *bs; bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!had_err && !(had_err = gt_bioseq_iterator_next(bsi, &bs, err)) && bs) { if (arguments->frompos) { had_err = extractseq_pos(arguments->outfp, bs, arguments->frompos, arguments->topos, arguments->width, err); } else { had_err = extractseq_match(arguments->outfp, bs, gt_str_get(arguments->pattern), arguments->width, err); } gt_bioseq_delete(bs); } gt_bioseq_iterator_delete(bsi); } return had_err; }
static int gt_seqmutate_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { MutateArguments *arguments = tool_arguments; GtBioseqIterator *bsi; unsigned long i; GtBioseq *bioseq; GtSeq *mutated_seq; int had_err; gt_error_check(err); gt_assert(arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) { for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) { mutated_seq = gt_mutate_seq(gt_bioseq_get_description(bioseq, i), gt_bioseq_get_sequence(bioseq, i), gt_bioseq_get_sequence_length(bioseq, i), gt_bioseq_get_alphabet(bioseq), arguments->rate); gt_fasta_show_entry(gt_seq_get_description(mutated_seq), gt_seq_get_orig(mutated_seq), gt_seq_length(mutated_seq), arguments->width, arguments->outfp); gt_seq_delete(mutated_seq); } gt_bioseq_delete(bioseq); } gt_bioseq_iterator_delete(bsi); return had_err; }
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SeqFilterArguments *arguments = tool_arguments; GtBioseqIterator *bsi; GtBioseq *bioseq; unsigned long i; unsigned long long passed = 0, filtered = 0, num_of_sequences = 0; int had_err = 0; gt_error_check(err); gt_assert(tool_arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) { for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) { if ((arguments->minlength == GT_UNDEF_ULONG || gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) && (arguments->maxlength == GT_UNDEF_ULONG || gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength) && (arguments->maxseqnum == GT_UNDEF_ULONG || passed + 1 <= arguments->maxseqnum)) { gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i), gt_bioseq_get_sequence(bioseq, i), gt_bioseq_get_sequence_length(bioseq, i), arguments->width, arguments->outfp); passed++; } else filtered++; num_of_sequences++; } gt_bioseq_delete(bioseq); } /* show statistics */ if (!had_err) { gt_assert(passed + filtered == num_of_sequences); fprintf(stderr, "# %llu out of %llu sequences have been removed (%.3f%%)\n", filtered, num_of_sequences, ((double) filtered / num_of_sequences) * 100.0); } gt_bioseq_iterator_delete(bsi); return had_err; }
static int gt_shredder_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtShredderArguments *arguments = tool_arguments; GtBioseqIterator *bsi; unsigned long i; GtBioseq *bioseq; int had_err; GtStr *desc; gt_error_check(err); gt_assert(arguments); /* init */ desc = gt_str_new(); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); /* shredder */ while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) { for (i = 0; i < arguments->coverage; i++) { GtShredder *shredder; unsigned long fragment_length; const char *fragment; shredder = gt_shredder_new(bioseq, arguments->minlength, arguments->maxlength); gt_shredder_set_overlap(shredder, arguments->overlap); gt_shredder_set_sample_probability(shredder, arguments->sample_probability); while ((fragment = gt_shredder_shred(shredder, &fragment_length, desc))) { gt_str_append_cstr(desc, " [shreddered fragment]"); gt_fasta_show_entry(gt_str_get(desc), fragment, fragment_length, 0); } gt_shredder_delete(shredder); } gt_bioseq_delete(bioseq); } /* free */ gt_bioseq_iterator_delete(bsi); gt_str_delete(desc); return had_err; }
static int gt_seqtransform_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SeqtransformArguments *arguments = tool_arguments; GtBioseqIterator *bsi; unsigned long i; GtBioseq *bioseq; int had_err; gt_error_check(err); gt_assert(arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) { GtAlphabet *alphabet; bool is_protein; alphabet = gt_bioseq_get_alphabet(bioseq); is_protein = gt_alphabet_is_protein(alphabet); for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) { const char *desc, *suffix = NULL; char *seq; unsigned long seqlen; desc = gt_bioseq_get_description(bioseq, i); seq = gt_bioseq_get_sequence(bioseq, i); seqlen = gt_bioseq_get_sequence_length(bioseq, i); if (arguments->addstopaminos && is_protein && seqlen && seq[seqlen-1] != GT_STOP_AMINO) { suffix = GT_STOP_AMINO_CSTR; } gt_fasta_show_entry_with_suffix(desc, seq, seqlen, suffix, arguments->width, arguments->outfp); gt_free(seq); } gt_bioseq_delete(bioseq); } gt_bioseq_iterator_delete(bsi); return had_err; }
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SeqFilterArguments *arguments = tool_arguments; GtBioseqIterator *bsi; GtBioseq *bioseq; GtUint64 passed = 0, filtered = 0, num_of_sequences = 0, steps = 0; int had_err = 0; gt_error_check(err); gt_assert(tool_arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq != NULL) { GtUword i; GtUint64 current_num = gt_bioseq_number_of_sequences(bioseq); for (i = 0; i < current_num && (arguments->maxseqnum == GT_UNDEF_UWORD || passed + 1 <= arguments->maxseqnum); i++) { char *seq; if ((arguments->step == 1 || steps + 1 == arguments->step) && (arguments->sample_prob == 1.0 || gt_rand_0_to_1() <= arguments->sample_prob) && (arguments->minlength == GT_UNDEF_UWORD || gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) && (arguments->maxlength == GT_UNDEF_UWORD || gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength)) { seq = gt_bioseq_get_sequence(bioseq, i); gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i), seq, gt_bioseq_get_sequence_length(bioseq, i), arguments->width, arguments->outfp); gt_free(seq); passed++; } else { filtered++; } steps = (steps + 1 == arguments->step) ? 0 : steps + 1; } filtered += current_num - i; num_of_sequences += current_num; gt_bioseq_delete(bioseq); } /* show statistics */ if (!had_err) { gt_assert(passed + filtered == num_of_sequences); fprintf(stderr, "# " GT_LLU " out of " GT_LLU " sequences have been removed (%.3f%%)\n", filtered, num_of_sequences, ((double) filtered / num_of_sequences) * 100.0); } gt_bioseq_iterator_delete(bsi); return had_err; }