Esempio n. 1
0
static int gt_seqmutate_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  MutateArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  GtSeq *mutated_seq;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      mutated_seq = gt_mutate_seq(gt_bioseq_get_description(bioseq, i),
                                  gt_bioseq_get_sequence(bioseq, i),
                                  gt_bioseq_get_sequence_length(bioseq, i),
                                  gt_bioseq_get_alphabet(bioseq),
                                  arguments->rate);
      gt_fasta_show_entry(gt_seq_get_description(mutated_seq),
                          gt_seq_get_orig(mutated_seq),
                          gt_seq_length(mutated_seq),
                          arguments->width, arguments->outfp);
      gt_seq_delete(mutated_seq);
    }
    gt_bioseq_delete(bioseq);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
Esempio n. 2
0
static int gt_seqtransform_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  SeqtransformArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    GtAlphabet *alphabet;
    bool is_protein;
    alphabet = gt_bioseq_get_alphabet(bioseq);
    is_protein = gt_alphabet_is_protein(alphabet);
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      const char *desc, *suffix = NULL;
      char *seq;
      unsigned long seqlen;
      desc = gt_bioseq_get_description(bioseq, i);
      seq = gt_bioseq_get_sequence(bioseq, i);
      seqlen = gt_bioseq_get_sequence_length(bioseq, i);
      if (arguments->addstopaminos && is_protein && seqlen &&
          seq[seqlen-1] != GT_STOP_AMINO) {
        suffix = GT_STOP_AMINO_CSTR;
      }
      gt_fasta_show_entry_with_suffix(desc, seq, seqlen, suffix,
                                      arguments->width, arguments->outfp);
      gt_free(seq);
    }
    gt_bioseq_delete(bioseq);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
Esempio n. 3
0
int gth_bssm_param_parameterize(GthBSSMParam *bssm_param, const char *path,
                                Termtype termtype, bool gzip, GtError *err)
{
    GtAlphabet *alphabet = NULL;
    GtBioseq *bioseq;
    GtStr *file2proc;
    GtUword i, j;
    int had_err = 0;
    gt_error_check(err);

    file2proc = gt_str_new();

    /* set version number */
    bssm_param->version_num = (unsigned char) MYVERSION;

    /* set model to true and set window sizes */
    switch (termtype) {
    case GT_DONOR_TYPE:
        bssm_param->gt_donor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->gt_donor_model);
        break;
    case GC_DONOR_TYPE:
        bssm_param->gc_donor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->gc_donor_model);
        break;
    case AG_ACCEPTOR_TYPE:
        bssm_param->ag_acceptor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->ag_acceptor_model);
        break;
    default:
        gt_assert(0);
    }

    for (i = 0; !had_err && i < NUMOFFILES; i++) {
        /* process datafile */
        gt_str_append_cstr(file2proc, path);
        switch (termtype) {
        case GT_DONOR_TYPE:
            gt_str_append_cstr(file2proc, "/GT_donor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        case GC_DONOR_TYPE:
            gt_str_append_cstr(file2proc, "/GC_donor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        case AG_ACCEPTOR_TYPE:
            gt_str_append_cstr(file2proc, "/AG_acceptor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        default:
            gt_assert(0);
        }

        if (gzip)
            gt_str_append_cstr(file2proc, ".gz");

        if (!(bioseq = gt_bioseq_new(gt_str_get(file2proc), err)))
            had_err = -1;

        if (!had_err)
            alphabet = gt_bioseq_get_alphabet(bioseq);

        /* check here if all sequences have the length 102 and correct bases at
           positions 51 and 52 (i.e., GT, GC, or AG) */
        for (j = 0; !had_err && j < gt_bioseq_number_of_sequences(bioseq); j++) {
            GtUchar encoded_seq[2];
            /* check length */
            if (gt_bioseq_get_sequence_length(bioseq, j) != STRINGSIZE) {
                gt_error_set(err,
                             "sequence "GT_WU" in file \"%s\" does not have length %u",
                             j, gt_str_get(file2proc), STRINGSIZE);
                had_err = -1;
            }
            encoded_seq[0] = gt_bioseq_get_encoded_char(bioseq, j, 50);
            encoded_seq[1] = gt_bioseq_get_encoded_char(bioseq, j, 51);
            if (!had_err) {
                /* check base correctness */
                switch (termtype) {
                case GT_DONOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'T')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GT "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                case GC_DONOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'C')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GC "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                case AG_ACCEPTOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'A') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'G')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a AG "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                default:
                    gt_assert(0);
                }
            }
        }

        if (!had_err) {
            switch (termtype) {
            case GT_DONOR_TYPE:
                build_bssm(bioseq, &bssm_param->gt_donor_model, i);
                break;
            case GC_DONOR_TYPE:
                build_bssm(bioseq, &bssm_param->gc_donor_model, i);
                break;
            case AG_ACCEPTOR_TYPE:
                build_bssm(bioseq, &bssm_param->ag_acceptor_model, i);
                break;
            default:
                gt_assert(0);
            }
        }

        /* reset */
        gt_str_reset(file2proc);

        /* free space */
        gt_bioseq_delete(bioseq);
    }
    gt_str_delete(file2proc);

    return had_err;
}