static int gt_seqmutate_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { MutateArguments *arguments = tool_arguments; GtBioseqIterator *bsi; unsigned long i; GtBioseq *bioseq; GtSeq *mutated_seq; int had_err; gt_error_check(err); gt_assert(arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) { for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) { mutated_seq = gt_mutate_seq(gt_bioseq_get_description(bioseq, i), gt_bioseq_get_sequence(bioseq, i), gt_bioseq_get_sequence_length(bioseq, i), gt_bioseq_get_alphabet(bioseq), arguments->rate); gt_fasta_show_entry(gt_seq_get_description(mutated_seq), gt_seq_get_orig(mutated_seq), gt_seq_length(mutated_seq), arguments->width, arguments->outfp); gt_seq_delete(mutated_seq); } gt_bioseq_delete(bioseq); } gt_bioseq_iterator_delete(bsi); return had_err; }
static int gt_seqtransform_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SeqtransformArguments *arguments = tool_arguments; GtBioseqIterator *bsi; unsigned long i; GtBioseq *bioseq; int had_err; gt_error_check(err); gt_assert(arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) { GtAlphabet *alphabet; bool is_protein; alphabet = gt_bioseq_get_alphabet(bioseq); is_protein = gt_alphabet_is_protein(alphabet); for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) { const char *desc, *suffix = NULL; char *seq; unsigned long seqlen; desc = gt_bioseq_get_description(bioseq, i); seq = gt_bioseq_get_sequence(bioseq, i); seqlen = gt_bioseq_get_sequence_length(bioseq, i); if (arguments->addstopaminos && is_protein && seqlen && seq[seqlen-1] != GT_STOP_AMINO) { suffix = GT_STOP_AMINO_CSTR; } gt_fasta_show_entry_with_suffix(desc, seq, seqlen, suffix, arguments->width, arguments->outfp); gt_free(seq); } gt_bioseq_delete(bioseq); } gt_bioseq_iterator_delete(bsi); return had_err; }
int gth_bssm_param_parameterize(GthBSSMParam *bssm_param, const char *path, Termtype termtype, bool gzip, GtError *err) { GtAlphabet *alphabet = NULL; GtBioseq *bioseq; GtStr *file2proc; GtUword i, j; int had_err = 0; gt_error_check(err); file2proc = gt_str_new(); /* set version number */ bssm_param->version_num = (unsigned char) MYVERSION; /* set model to true and set window sizes */ switch (termtype) { case GT_DONOR_TYPE: bssm_param->gt_donor_model_set = true; set_window_sizes_in_Bssmmodel(&bssm_param->gt_donor_model); break; case GC_DONOR_TYPE: bssm_param->gc_donor_model_set = true; set_window_sizes_in_Bssmmodel(&bssm_param->gc_donor_model); break; case AG_ACCEPTOR_TYPE: bssm_param->ag_acceptor_model_set = true; set_window_sizes_in_Bssmmodel(&bssm_param->ag_acceptor_model); break; default: gt_assert(0); } for (i = 0; !had_err && i < NUMOFFILES; i++) { /* process datafile */ gt_str_append_cstr(file2proc, path); switch (termtype) { case GT_DONOR_TYPE: gt_str_append_cstr(file2proc, "/GT_donor/"); gt_str_append_cstr(file2proc, filenames[i]); break; case GC_DONOR_TYPE: gt_str_append_cstr(file2proc, "/GC_donor/"); gt_str_append_cstr(file2proc, filenames[i]); break; case AG_ACCEPTOR_TYPE: gt_str_append_cstr(file2proc, "/AG_acceptor/"); gt_str_append_cstr(file2proc, filenames[i]); break; default: gt_assert(0); } if (gzip) gt_str_append_cstr(file2proc, ".gz"); if (!(bioseq = gt_bioseq_new(gt_str_get(file2proc), err))) had_err = -1; if (!had_err) alphabet = gt_bioseq_get_alphabet(bioseq); /* check here if all sequences have the length 102 and correct bases at positions 51 and 52 (i.e., GT, GC, or AG) */ for (j = 0; !had_err && j < gt_bioseq_number_of_sequences(bioseq); j++) { GtUchar encoded_seq[2]; /* check length */ if (gt_bioseq_get_sequence_length(bioseq, j) != STRINGSIZE) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" does not have length %u", j, gt_str_get(file2proc), STRINGSIZE); had_err = -1; } encoded_seq[0] = gt_bioseq_get_encoded_char(bioseq, j, 50); encoded_seq[1] = gt_bioseq_get_encoded_char(bioseq, j, 51); if (!had_err) { /* check base correctness */ switch (termtype) { case GT_DONOR_TYPE: if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') || encoded_seq[1] != gt_alphabet_encode(alphabet, 'T')) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GT " "sequence", j, gt_str_get(file2proc)); had_err = -1; } break; case GC_DONOR_TYPE: if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') || encoded_seq[1] != gt_alphabet_encode(alphabet, 'C')) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GC " "sequence", j, gt_str_get(file2proc)); had_err = -1; } break; case AG_ACCEPTOR_TYPE: if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'A') || encoded_seq[1] != gt_alphabet_encode(alphabet, 'G')) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a AG " "sequence", j, gt_str_get(file2proc)); had_err = -1; } break; default: gt_assert(0); } } } if (!had_err) { switch (termtype) { case GT_DONOR_TYPE: build_bssm(bioseq, &bssm_param->gt_donor_model, i); break; case GC_DONOR_TYPE: build_bssm(bioseq, &bssm_param->gc_donor_model, i); break; case AG_ACCEPTOR_TYPE: build_bssm(bioseq, &bssm_param->ag_acceptor_model, i); break; default: gt_assert(0); } } /* reset */ gt_str_reset(file2proc); /* free space */ gt_bioseq_delete(bioseq); } gt_str_delete(file2proc); return had_err; }