Example #1
0
static void gt_hpol_processor_output_segment(GtAlignedSegment *as,
    bool may_be_gapped, GtFile *outfp, const char *desc)
{
  unsigned long slen;
  if (may_be_gapped)
    gt_aligned_segment_ungap_seq_and_qual(as);
  slen = (unsigned long)strlen(gt_aligned_segment_seq(as));
  gt_assert(slen == (unsigned long)strlen(gt_aligned_segment_qual(as)));
  if (gt_aligned_segment_is_reverse(as))
  {
    GtError *err = gt_error_new();
    char *q = gt_aligned_segment_qual(as), tmp;
    unsigned long i;
    for (i = 0; i < (slen + 1UL) >> 1; i++)
    {
      tmp = q[i];
      q[i] = q[slen - i - 1UL];
      q[slen - i - 1UL] = tmp;
    }
    gt_assert((unsigned long)strlen(gt_aligned_segment_qual(as)) == slen);
    if (gt_reverse_complement(gt_aligned_segment_seq(as), slen, err) != 0)
    {
      fprintf(stderr, "error: %s", gt_error_get(err));
      exit(EXIT_FAILURE);
    }
    gt_error_delete(err);
  }
  gt_fastq_show_entry((desc != NULL) ? desc :
      gt_aligned_segment_description(as), gt_aligned_segment_seq(as),
      gt_aligned_segment_qual(as), slen, 0, false, outfp);
}
Example #2
0
int gt_splicedseq_reverse(Splicedseq *ss, GtError *err)
{
  int had_err;
  gt_error_check(err);
  gt_assert(ss);
  had_err = gt_reverse_complement(gt_str_get(ss->splicedseq),
                               gt_str_length(ss->splicedseq), err);
  if (!had_err) {
    gt_array_reverse(ss->positionmapping);
    ss->forward = !ss->forward;
  }
  return had_err;
}
Example #3
0
GtMD5SetStatus gt_md5set_add_sequence(GtMD5Set *set, const char* seq,
                                      GtUword seqlen, bool both_strands,
                                      GtError *err)
{
    md5_t md5sum, md5sum_rc;
    GtUword i;
    int retval = 0;
    bool found;

    gt_assert(set != NULL);
    gt_assert(set->table != NULL);

    md5set_prepare_buffer(set, seqlen);
    for (i = 0; i < seqlen; i++)
        set->buffer[i] = toupper(seq[i]);

    MD5SET_HASH_STRING(set->buffer, seqlen, md5sum);
    found = md5set_search(set, md5sum, true);
    if (found)
        return GT_MD5SET_FOUND;

    if (both_strands) {
        retval = gt_reverse_complement(set->buffer, seqlen, err);
        if (retval != 0) {
            gt_assert(retval < 0);
            return GT_MD5SET_ERROR;
        }

        MD5SET_HASH_STRING(set->buffer, seqlen, md5sum_rc);
        /* if the MD5 sum of the reverse complement equals the MD5 sum of the
           sequence itself we don't check if the reverse complement is in the set.
           Otherwise such sequences would never be added to the set at all. */
        if (md5sum_rc.l == md5sum.l && md5sum_rc.h == md5sum.h) {
            return GT_MD5SET_NOT_FOUND;
        }
        found = md5set_search(set, md5sum_rc, false);
        if (found)
            return GT_MD5SET_RC_FOUND;
    }

    return GT_MD5SET_NOT_FOUND;
}
Example #4
0
GtMD5SetStatus gt_md5set_add_sequence(GtMD5Set *set, const char* seq,
                                      unsigned long seqlen, bool both_strands,
                                      GtError *err)
{
    gt_md5_t md5sum, md5sum_rc;
    unsigned long i;
    int retval = 0;
    bool found;

    gt_assert(set != NULL);
    gt_assert(set->table != NULL);

    gt_md5set_prepare_buffer(set, seqlen);
    for (i = 0; i < seqlen; i++)
        set->buffer[i] = toupper(seq[i]);

    GT_MD5SET_HASH_STRING(set->buffer, seqlen, md5sum);
    found = gt_md5set_search(set, md5sum, true);
    if (found)
        return GT_MD5SET_FOUND;

    if (both_strands)
    {
        retval = gt_reverse_complement(set->buffer, seqlen, err);
        if (retval != 0)
        {
            gt_assert(retval < 0);
            return GT_MD5SET_ERROR;
        }

        GT_MD5SET_HASH_STRING(set->buffer, seqlen, md5sum_rc);
        found = gt_md5set_search(set, md5sum_rc, false);
        if (found)
            return GT_MD5SET_RC_FOUND;
    }

    return GT_MD5SET_NOT_FOUND;
}
static int run_orffinder(GtRegionMapping *rmap,
                         GtFeatureNode *gf,
                         unsigned long start,
                         GT_UNUSED unsigned long end,
                         unsigned int min,
                         unsigned int max,
                         bool all,
                         GtError *err)
{
  int had_err = 0, i;
  unsigned long sum;
  GtCodonIterator* ci = NULL;
  GtTranslator* translator = NULL;
  GtORFIterator* orfi = NULL;
  GtORFIteratorStatus state;
  GtRange orf_rng, tmp_orf_rng[3];
  GtStr *seq;
  unsigned int orf_frame;

  /* forward strand */
  seq = gt_str_new();
  had_err = gt_extract_feature_sequence(seq,
                                        (GtGenomeNode*) gf,
                                        gt_feature_node_get_type(gf),
                                        false, NULL, NULL, rmap, err);

  ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq), err);
  gt_assert(ci);
  translator = gt_translator_new(ci);
  gt_assert(translator);

  orfi = gt_orf_iterator_new(ci, translator);
  gt_assert(orfi);

  for (i = 0; i < 3; i++) {
    tmp_orf_rng[i].start = GT_UNDEF_ULONG;
    tmp_orf_rng[i].end = GT_UNDEF_ULONG;
  }

  while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                              err)) == GT_ORF_ITERATOR_OK) {
      if (all) {
        process_orf(orf_rng, orf_frame, GT_STRAND_FORWARD, gf,
                    start, min, max, err);
      } else {
        if (gt_range_length(&orf_rng) >
            gt_range_length(&tmp_orf_rng[orf_frame])) {
          tmp_orf_rng[orf_frame].start = orf_rng.start;
          tmp_orf_rng[orf_frame].end = orf_rng.end;
        }
      }
  }
  if (state == GT_ORF_ITERATOR_ERROR)
    had_err = -1;

  if (!had_err) {
    if (!all) {
      for (i = 0; i < 3; i++) {
        if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
          process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_FORWARD, gf,
                      start, min, max, err);
        }
      }
    }
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
    orfi = NULL;
    ci = NULL;
    translator = NULL;

    for (i = 0; i < 3; i++) {
      tmp_orf_rng[i].start = GT_UNDEF_ULONG;
      tmp_orf_rng[i].end = GT_UNDEF_ULONG;
    }

    /* reverse strand */
    if (!had_err) {
      GT_UNUSED int rval = 0;
      unsigned long length = gt_str_length(seq);
      char *strp = (char*) gt_str_get_mem(seq);
      rval = gt_reverse_complement(strp, gt_str_length(seq), err);
      gt_assert(!rval); /* XXX */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq),
                                        err);
      gt_assert(ci);
      translator = gt_translator_new(ci);
      gt_assert(translator);
      orfi = gt_orf_iterator_new(ci, translator);
      gt_assert(orfi);

      sum = start + length - 1;

      while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                                  err)) == GT_ORF_ITERATOR_OK) {
          if (all) {
            process_orf(orf_rng, orf_frame, GT_STRAND_REVERSE, gf,
                        sum, min, max, err);
          } else {
            if (gt_range_length(&orf_rng) >
                gt_range_length(&tmp_orf_rng[orf_frame])) {
              tmp_orf_rng[orf_frame].start = orf_rng.start;
              tmp_orf_rng[orf_frame].end = orf_rng.end;
            }
          }
      }
      if (state == GT_ORF_ITERATOR_ERROR)
        had_err = -1;
      if (!had_err) {
        if (!all) {
          for (i = 0; i < 3; i++) {
            if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
              process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_REVERSE,
                          gf, sum, min, max, err);
            }
          }
        }
      }
    }
    gt_str_delete(seq);
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
  }
  return had_err;
}
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav,
                                                 GtError *err)
{
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *last_mRNA = NULL;
  GtStr *mrnaseq,
        *seqid;
  int had_err = 0;

  mrnaseq = gt_str_new();
  seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene);
  fni = gt_feature_node_iterator_new(sav->gene);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtFeatureNode *curnode2;
      if (last_mRNA) {
        char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
        (void) strncpy(mrna_charseq, gt_str_get(mrnaseq),
                       gt_str_length(mrnaseq));
        if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                          err);
        }
        if (!had_err) {
          gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
          last_mRNA = curnode;
          gt_str_reset(mrnaseq);
        }
      } else last_mRNA = curnode;
      if (!had_err) {
        mrnafni = gt_feature_node_iterator_new(curnode);
        while (!had_err && (curnode2 =
                                      gt_feature_node_iterator_next(mrnafni))) {
          if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
            char *tmp;
            GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
            had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid,
                                                     rng.start, rng.end, err);
            if (!had_err) {
              gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng));
              gt_free(tmp);
            }
          }
        }
        gt_feature_node_iterator_delete(mrnafni);
      }
    }
  }
  if (!had_err && last_mRNA) {
    char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
    (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq));
    if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
      had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                      err);
    }
    if (!had_err) {
      gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
    }
  }
  gt_feature_node_iterator_delete(fni);
  gt_str_delete(mrnaseq);
  return had_err;
}
Example #7
0
int gt_pbs_unit_test(GtError *err)
{
  int had_err = 0;
  GtLTRElement element;
  GtPBSOptions o;
  GtStr *tmpfilename;
  FILE *tmpfp;
  GtPBSResults *res;
  GtPBSHit *hit;
  double score1, score2;
  GtRange rng;
  char *rev_seq,
       *seq,
       tmp[BUFSIZ];
  const char *fullseq =                           "aaaaaaaaaaaaaaaaaaaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "acatactaggatgctag" /* <- PBS forward */
                                     "aatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatag"
                                   /* PBS reverse -> */ "gatcctaaggctac"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa"
                    "aaaaaaaaaaaaaaaaaaaa";

  /* notice previous errors */
  gt_error_check(err);

  /* create temporary tRNA library file */
  tmpfilename = gt_str_new();
  tmpfp = gt_xtmpfp(tmpfilename);
  fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n"
                 ">test2\ncccccccccgatcctagggctaccctttc\n");
  gt_fa_xfclose(tmpfp);
  ensure(had_err, gt_file_exists(gt_str_get(tmpfilename)));

  /* setup testing parameters */
  o.radius = 30;
  o.max_edist = 1;
  o.alilen.start = 11;
  o.alilen.end = 30;
  o.offsetlen.start = 0;
  o.offsetlen.end = 5;
  o.trnaoffsetlen.start = 0;
  o.trnaoffsetlen.end =  40;
  o.ali_score_match = 5;
  o.ali_score_mismatch = -10;
  o.ali_score_insertion = o.ali_score_deletion = -20;
  o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err);
  ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2);

  element.leftLTR_5 = 20;
  element.leftLTR_3 = 119;
  element.rightLTR_5 = 520;
  element.rightLTR_3 = 619;

  /* setup sequences */
  seq     = gt_malloc(600 * sizeof (char));
  rev_seq = gt_malloc(600 * sizeof (char));
  memcpy(seq,     fullseq + 20, 600);
  memcpy(rev_seq, fullseq + 20, 600);
  gt_reverse_complement(rev_seq, 600, err);

  /* try to find PBS in sequences */
  res = gt_pbs_find(seq, rev_seq, &element, &o, err);
  ensure(had_err, res != NULL);
  ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2);

  /* check first hit on forward strand */
  hit = gt_pbs_results_get_ranked_hit(res, 0);
  ensure(had_err, hit != NULL);
  ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17);
  ensure(had_err, gt_pbs_hit_get_edist(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_offset(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3);
  ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0);
  rng = gt_pbs_hit_get_coords(hit);
  ensure(had_err, rng.start == 120);
  ensure(had_err, rng.end == 136);
  score1 = gt_pbs_hit_get_score(hit);
  ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD);
  memset(tmp, 0, BUFSIZ-1);
  memcpy(tmp, fullseq + (rng.start * sizeof (char)),
         (rng.end - rng.start + 1) * sizeof (char));
  ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0);

  /* check second hit on reverse strand */
  hit = gt_pbs_results_get_ranked_hit(res, 1);
  ensure(had_err, hit != NULL);
  ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14);
  ensure(had_err, gt_pbs_hit_get_edist(hit) == 1);
  ensure(had_err, gt_pbs_hit_get_offset(hit) == 0);
  ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6);
  ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0);
  rng = gt_pbs_hit_get_coords(hit);
  ensure(had_err, rng.start == 506);
  ensure(had_err, rng.end == 519);
  score2 = gt_pbs_hit_get_score(hit);
  ensure(had_err, gt_double_compare(score1, score2) > 0);
  ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE);
  memset(tmp, 0, BUFSIZ-1);
  memcpy(tmp, fullseq + (rng.start * sizeof (char)),
         (rng.end - rng.start + 1) * sizeof (char));
  ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0);

  /* clean up */
  gt_xremove(gt_str_get(tmpfilename));
  ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename)));
  gt_str_delete(tmpfilename);
  gt_bioseq_delete(o.trna_lib);
  gt_free(rev_seq);
  gt_free(seq);
  gt_pbs_results_delete(res);

  return had_err;
}
Example #8
0
GtPBSResults* gt_pbs_find(const char *seq,
                          const char *rev_seq,
                          GtLTRElement *element,
                          GtPBSOptions *o,
                          GtError *err)
{
  GtSeq *seq_forward, *seq_rev;
  GtPBSResults *results;
  unsigned long j;
  GtAlignment *ali;
  GtAlphabet *a = gt_alphabet_new_dna();
  GtScoreFunction *sf = gt_dna_scorefunc_new(a,
                                             o->ali_score_match,
                                             o->ali_score_mismatch,
                                             o->ali_score_insertion,
                                             o->ali_score_deletion);

  gt_assert(seq && rev_seq && sf && a && element);

  results = gt_pbs_results_new(element, o);

  seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element))
                               - (o->radius),
                           2*o->radius + 1,
                           a);

  seq_rev     = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element))
                                   - (o->radius),
                           2*o->radius + 1,
                           a);

    for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++)
  {
    GtSeq *trna_seq, *trna_from3;
    char *trna_from3_full;
    unsigned long trna_seqlen;

    trna_seq = gt_bioseq_get_seq(o->trna_lib, j);
    trna_seqlen = gt_seq_length(trna_seq);

    trna_from3_full = gt_calloc(trna_seqlen, sizeof (char));
    memcpy(trna_from3_full, gt_seq_get_orig(trna_seq),
           sizeof (char)*trna_seqlen);
    (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err);
    trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a);

    ali = gt_swalign(seq_forward, trna_from3, sf);
    gt_pbs_add_hit(results->hits, ali, o, trna_seqlen,
                   gt_seq_get_description(trna_seq), GT_STRAND_FORWARD,
                   results);
    gt_alignment_delete(ali);

    ali = gt_swalign(seq_rev, trna_from3, sf);
    gt_pbs_add_hit(results->hits, ali, o, trna_seqlen,
                   gt_seq_get_description(trna_seq), GT_STRAND_REVERSE,
                   results);
    gt_alignment_delete(ali);

    gt_seq_delete(trna_from3);
  }
  gt_seq_delete(seq_forward);
  gt_seq_delete(seq_rev);
  gt_score_function_delete(sf);
  gt_alphabet_delete(a);
  gt_array_sort(results->hits, gt_pbs_hit_compare);
  return results;
}
static int gt_seqtranslate_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GT_UNUSED GtError *err)
{
  GtTranslateArguments *arguments = tool_arguments;
  GtSeqIterator *si = NULL;
  GtSequenceBuffer *sb = NULL;
  GtStrArray *infiles;
  int had_err = 0,
      rval,
      i;
  GtStr *translations[3];
  translations[0] = gt_str_new();
  translations[1] = gt_str_new();
  translations[2] = gt_str_new();

  gt_error_check(err);
  gt_assert(arguments);

  infiles = gt_str_array_new();
  for (i = parsed_args; i < argc; i++) {
    gt_str_array_add_cstr(infiles, argv[i]);
  }
  sb = gt_sequence_buffer_new_guess_type(infiles, err);
  if (!sb)
    had_err = -1;
  if (!had_err) {
    si = gt_seq_iterator_sequence_buffer_new_with_buffer(sb);
    if (!si)
      had_err = -1;
  }
  if (!had_err) {
    char *desc;
    const GtUchar *sequence;
    GtUword len;
    while (!had_err && (rval = gt_seq_iterator_next(si,
                                                   &sequence,
                                                   &len, &desc, err))) {
      if (rval < 0) {
        had_err = -1;
        break;
      }
      if (len < GT_CODON_LENGTH) {
        gt_warning("sequence '%s' is shorter than codon length of %d, skipping",
                   desc, GT_CODON_LENGTH);
      } else {
        had_err = gt_seqtranslate_do_translation(arguments, (char*) sequence,
                                                 len, desc,
                                                 translations, false, err);
        if (!had_err && arguments->reverse) {
          char *revseq = gt_cstr_dup_nt((char*) sequence, len);
          had_err = gt_reverse_complement(revseq, len, err);
          if (!had_err) {
            had_err = gt_seqtranslate_do_translation(arguments, revseq, len,
                                                  desc, translations, true,
                                                  err);
          }
          gt_free(revseq);
        }
      }
    }
  }
  gt_str_delete(translations[0]);
  gt_str_delete(translations[1]);
  gt_str_delete(translations[2]);
  gt_str_array_delete(infiles);
  gt_seq_iterator_delete(si);
  gt_sequence_buffer_delete(sb);
  return had_err;
}
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv,
                                                  GtFeatureNode *fn,
                                                  GtError *err)
{
  GtLTRdigestPdomVisitor *lv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;
  GtUword i;
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR element */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode), lv->root_type) == 0) {
      lv->ltr_retrotrans = curnode;
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (!had_err && lv->ltr_retrotrans != NULL) {
    GtCodonIterator *ci;
    GtTranslator *tr;
    GtTranslatorStatus status;
    GtUword seqlen;
    char translated, *rev_seq;
#ifndef _WIN32
    FILE *instream;
    GtHMMERParseStatus *pstatus;
#endif
    unsigned int frame;
    GtStr *seq;

    seq = gt_str_new();
    rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans);
    lv->leftLTR_5 = rng.start - 1;
    lv->rightLTR_3 = rng.end - 1;
    seqlen = gt_range_length(&rng);

    had_err = gt_extract_feature_sequence(seq,
                                          (GtGenomeNode*) lv->ltr_retrotrans,
                                          lv->root_type,
                                          false, NULL, NULL, lv->rmap, err);

    if (!had_err) {
      for (i = 0UL; i < 3UL; i++) {
        gt_str_reset(lv->fwd[i]);
        gt_str_reset(lv->rev[i]);
      }

      /* create translations */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL);
      gt_assert(ci);
      tr = gt_translator_new(ci);
      status = gt_translator_next(tr, &translated, &frame, err);
      while (status == GT_TRANSLATOR_OK && translated) {
        gt_str_append_char(lv->fwd[frame], translated);
        status = gt_translator_next(tr, &translated, &frame, NULL);
      }
      if (status == GT_TRANSLATOR_ERROR) had_err = -1;
      if (!had_err) {
        rev_seq = gt_malloc((size_t) seqlen * sizeof (char));
        strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char));
        (void) gt_reverse_complement(rev_seq, seqlen, NULL);
        gt_codon_iterator_delete(ci);
        ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL);
        gt_translator_set_codon_iterator(tr, ci);
        status = gt_translator_next(tr, &translated, &frame, err);
        while (status == GT_TRANSLATOR_OK && translated) {
          gt_str_append_char(lv->rev[frame], translated);
          status = gt_translator_next(tr, &translated, &frame, NULL);
        }
        if (status == GT_TRANSLATOR_ERROR) had_err = -1;
        gt_free(rev_seq);
      }
      gt_codon_iterator_delete(ci);
      gt_translator_delete(tr);
    }

    /* run HMMER and handle results */
    if (!had_err) {
#ifndef _WIN32
      int pid, pc[2], cp[2];
      GT_UNUSED int rval;

      (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's
                                               exit status */
      rval = pipe(pc);
      gt_assert(rval == 0);
      rval = pipe(cp);
      gt_assert(rval == 0);

      switch ((pid = (int) fork())) {
        case -1:
          perror("Can't fork");
          exit(1);   /* XXX: error handling */
        case 0:    /* child */
          (void) close(1);    /* close current stdout. */
          rval = dup(cp[1]);  /* make stdout go to write end of pipe. */
          (void) close(0);    /* close current stdin. */
          rval = dup(pc[0]);  /* make stdin come from read end of pipe. */
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[0]);
          (void) close(cp[1]);
          (void) execvp("hmmscan", lv->args); /* XXX: read path from env */
          perror("couldn't execute hmmscan!");
          exit(1);
        default:    /* parent */
          for (i = 0UL; i < 3UL; i++) {
            char buf[5];
            GT_UNUSED ssize_t written;
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '+');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->fwd[i]),
                            (size_t) gt_str_length(lv->fwd[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '-');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->rev[i]),
                            (size_t) gt_str_length(lv->rev[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
          }
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[1]);
          instream = fdopen(cp[0], "r");
          pstatus = gt_hmmer_parse_status_new();
          had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus,
                                                           instream, err);
          (void) fclose(instream);
          if (!had_err)
            had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err);
          gt_hmmer_parse_status_delete(pstatus);
      }
#else
      /* XXX */
      gt_error_set(err, "HMMER call not implemented on Windows\n");
      had_err = -1;
#endif
    }
    gt_str_delete(seq);
  }
  if (!had_err)
    had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv);
  return had_err;
}
Example #11
0
static int gt_convertseq_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GtError *err)
{
  GtConvertseqArguments *arguments = tool_arguments;
  int had_err = 0, i;
  GtFilelengthvalues *flv;
  GtSeqIterator *seqit;
  GtSequenceBuffer *sb = NULL;
  GtStrArray *files;
  const GtUchar *sequence;
  char *desc;
  GtUword len, j;
  off_t totalsize;
  gt_error_check(err);
  gt_assert(arguments != NULL);

  files = gt_str_array_new();
  for (i = parsed_args; i < argc; i++)
  {
    gt_str_array_add_cstr(files, argv[i]);
  }
  totalsize = gt_files_estimate_total_size(files);

  flv = gt_calloc((size_t) gt_str_array_size(files),
                  sizeof (GtFilelengthvalues));

  sb = gt_sequence_buffer_new_guess_type(files, err);
  if (!sb) {
    had_err = -1;
  }
  if (!had_err) {
    gt_sequence_buffer_set_filelengthtab(sb, flv);
    /* read input using seqiterator */
    seqit = gt_seq_iterator_sequence_buffer_new_with_buffer(sb);
    if (arguments->verbose)
    {
      gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit,
                                                           (GtUint64)
                                                           totalsize),
                           (GtUint64) totalsize);
    }
    while (true)
    {
      GtUchar *seq = NULL;
      desc = NULL;
      j = 0UL;
      had_err = gt_seq_iterator_next(seqit, &sequence, &len, &desc, err);
      if (had_err != 1)
        break;
      if (arguments->revcomp) {
        GtUchar *newseq = gt_calloc((size_t) len+1, sizeof (GtUchar));
        memcpy(newseq, sequence, (size_t) len*sizeof (GtUchar));
        had_err = gt_reverse_complement((char*) newseq, len, err);
        if (had_err)
          break;
        seq = newseq;
      } else seq = (GtUchar*) sequence;

      if (!arguments->showseq) {
        bool in_wildcard = false;
        gt_file_xprintf(arguments->outfp, ">%s\n", desc);
        for (i = 0; (GtUword) i < len; i++) {
          if (arguments->reduce_wc_dna) {
            switch (seq[i]) {
              case 'a':
              case 'A':
              case 'c':
              case 'C':
              case 'g':
              case 'G':
              case 't':
              case 'u':
              case 'T':
              case 'U':
                in_wildcard = false;
                gt_file_xfputc((int) seq[i], arguments->outfp);
                j++;
                break;
              default:
                if (!in_wildcard) {
                  in_wildcard = true;
                  if (isupper((int) seq[i]))
                    gt_file_xfputc((int) 'N', arguments->outfp);
                  else
                    gt_file_xfputc((int) 'n', arguments->outfp);
                  j++;
                }
            }
          }
          else if (arguments->reduce_wc_prot) {
            switch (seq[i]) {
              case 'X':
              case 'B':
              case 'Z':
                if (!in_wildcard) {
                  in_wildcard = true;
                  gt_file_xfputc((int) 'N', arguments->outfp);
                  j++;
                }
                break;
              case 'x':
              case 'b':
              case 'z':
                if (!in_wildcard) {
                  in_wildcard = true;
                  gt_file_xfputc((int) 'n', arguments->outfp);
                  j++;
                }
                break;
              default:
                in_wildcard = false;
                gt_file_xfputc((int) seq[i], arguments->outfp);
                j++;
            }
          }
          else {
            gt_file_xfputc((int) seq[i], arguments->outfp);
            j++;
          }
          if (arguments->fastawidth > 0 && j % arguments->fastawidth == 0) {
            j = 0;
            gt_file_xprintf(arguments->outfp, "\n");
          }
        }
        if (arguments->fastawidth == 0 || len % arguments->fastawidth != 0)
            gt_file_xprintf(arguments->outfp, "\n");
      }
      if (arguments->revcomp) {
        gt_free(seq);
      }
    }
    if (arguments->showflv) {
      for (j=0;j<gt_str_array_size(files);j++) {
        fprintf(stderr, "file "GT_WU" (%s): "GT_WU"/"GT_WU"\n",
               j,
               gt_str_array_get(files, j),
               (GtUword) flv[j].length,
               (GtUword) flv[j].effectivelength);
      }
    }
    if (arguments->verbose)
    {
      gt_progressbar_stop();
    }
    gt_sequence_buffer_delete(sb);
    gt_seq_iterator_delete(seqit);
  }
  gt_str_array_delete(files);
  gt_free(flv);

  return had_err;
}
static int gt_extract_feature_sequence_generic(GtStr *sequence,
                                GtGenomeNode *gn,
                                const char *type, bool join, GtStr *seqid,
                                GtStrArray *target_ids,
                                unsigned int *out_phase_offset,
                                GtRegionMapping *region_mapping, GtError *err)
{
  GtFeatureNode *fn;
  GtRange range;
  unsigned int phase_offset = 0;
  char *outsequence;
  const char *target;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_genome_node_cast(gt_feature_node_class(), gn);
  gt_assert(fn);

  if (seqid)
    gt_str_append_str(seqid, gt_genome_node_get_seqid(gn));
  if (target_ids &&
      (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) {
    had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                         target_ids, NULL,
                                                         NULL, "", 0, err);
  }
  if (!had_err) {
    if (join) {
      GtFeatureNodeIterator *fni;
      GtFeatureNode *child;
      bool reverse_strand = false,
           first_child = true,
           first_child_of_type_seen = false;
      GtPhase phase = GT_PHASE_UNDEFINED;
      /* in this case we have to traverse the children */
      fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn));
      while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
        if (first_child) {
          if (target_ids &&
               (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) {
            gt_str_array_reset(target_ids);
            had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                                 target_ids,
                                                                 NULL,
                                                                 NULL, "", 0,
                                                                 err);
          }
          first_child = false;
        }
        if (!had_err) {
          if (extract_join_feature((GtGenomeNode*) child, type, region_mapping,
                                   sequence, &reverse_strand,
                                   &first_child_of_type_seen,
                                   &phase, err)) {
            had_err = -1;
          }
          if (phase != GT_PHASE_UNDEFINED) {
            phase_offset = (int) phase;
          }
        }
      }
      gt_feature_node_iterator_delete(fni);
      gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED);
      if (!had_err && gt_str_length(sequence)) {
        if (reverse_strand) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
    else if (gt_feature_node_get_type(fn) == type) {
      GtPhase phase = gt_feature_node_get_phase(fn);
      gt_assert(!had_err);
      if (phase != GT_PHASE_UNDEFINED)
        phase_offset = (unsigned int) phase;
      /* otherwise we only have to look at this feature */
      range = gt_genome_node_get_range(gn);
      gt_assert(range.start); /* 1-based coordinates */
      had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                               gt_genome_node_get_seqid(gn),
                                               range.start, range.end, err);
      if (!had_err) {
        gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
        gt_free(outsequence);
        if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
  }
  if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) {
    *out_phase_offset = phase_offset;
  }
  return had_err;
}
Example #13
0
                         GtLTRdigestStream *ls,
#ifdef HAVE_HMMER
                         GtError *err)
#else
                         GT_UNUSED GtError *err)
#endif
{
  int had_err = 0;
  char *rev_seq;
  unsigned long seqlen = gt_ltrelement_length(element);
  GtStrand canonical_strand = GT_STRAND_UNKNOWN;

  /* create reverse strand sequence */
  rev_seq = gt_calloc((size_t) seqlen+1, sizeof (char));
  memcpy(rev_seq, seq, sizeof (char) * seqlen);
  had_err = gt_reverse_complement(rev_seq, seqlen, err);

  if (!had_err)
  {
#ifdef HAVE_HMMER
    /* Protein domain finding
     * ----------------------*/
    if (ls->tests_to_run & GT_LTRDIGEST_RUN_PDOM)
    {
      GtPdomResults *pdom_results = NULL;
      if (!ls->pdf)
      {
        gt_error_set(err, "No PdomFinder object found -- "
                          "how could that happen?");
        had_err = -1;
      } else