Beispiel #1
0
static int gt_codon_iterator_encseq_single_test(GtEncseq *encseq,
                                                const char *testseq,
                                                const char *testseq_cmp,
                                                GtReadmode readmode,
                                                GT_UNUSED GtError *err)
{
  unsigned long j, k, i;
  GtCodonIterator *ci;
  char n1, n2, n3;
  int had_err = 0;
  unsigned int frame;
  gt_error_check(err);

  for (j = 0; !had_err && j < strlen(testseq); j++) {
    for (k = j; !had_err && k < strlen(testseq); k++) {
      GtCodonIteratorStatus s;
      ci = gt_codon_iterator_encseq_new_with_readmode(encseq, j,
                                                      strlen(testseq) - k,
                                                      readmode,
                                                      NULL);
      i = j;
      while (!had_err && !(s = gt_codon_iterator_next(ci, &n1, &n2, &n3,
                                                      &frame, NULL))) {
        gt_ensure(had_err, n1 == testseq_cmp[i]);
        gt_ensure(had_err, n2 == testseq_cmp[i+1]);
        gt_ensure(had_err, n3 == testseq_cmp[i+2]);
        i++;
      }
      gt_codon_iterator_delete(ci);
    }
  }
  return had_err;
}
Beispiel #2
0
static int translate_dna_lua(lua_State *L)
{
  GtStr *protein;
  GtTranslator *tr;
  int rval;
  char translated;
  unsigned int frame;
  const char *dna = luaL_checkstring(L, 1);
  protein = gt_str_new();

  GtCodonIterator *ci = gt_codon_iterator_simple_new(dna,
                                                     strlen(dna),
                                                     NULL);
  tr = gt_translator_new(ci);
  rval = gt_translator_next(tr, &translated, &frame, NULL);
  while (!rval && translated) {
    gt_str_append_char(protein, translated);
    rval = gt_translator_next(tr, &translated, &frame, NULL);
  }
  lua_pushstring(L, gt_str_get(protein));
  gt_str_delete(protein);
  gt_translator_delete(tr);
  gt_codon_iterator_delete(ci);
  return 1;
}
static int show_entry(GtStr *description, GtStr *sequence, bool translate,
                      GtUword width, GtFile *outfp)
{
  int had_err = 0;
  if (translate) {
    GtTranslatorStatus status;
    unsigned int frame;
    char translated;
    GtStr *protein = gt_str_new();

    GtCodonIterator *ci = gt_codon_iterator_simple_new(gt_str_get(sequence),
                                                       gt_str_length(sequence),
                                                       NULL);
    GtTranslator* tr = gt_translator_new(ci);
    status = gt_translator_next(tr, &translated, &frame, NULL);
    while (status == GT_TRANSLATOR_OK) {
      if (frame == 0)
        gt_str_append_char(protein, translated);
      status = gt_translator_next(tr, &translated, &frame, NULL);
    }
    if (status == GT_TRANSLATOR_ERROR)
      had_err = -1;
    gt_fasta_show_entry(gt_str_get(description), gt_str_get(protein),
                        gt_str_length(protein), width, outfp);
    gt_str_delete(protein);
    gt_translator_delete(tr);
    gt_codon_iterator_delete(ci);
  }
  else {
    gt_fasta_show_entry(gt_str_get(description), gt_str_get(sequence),
                        gt_str_length(sequence), width, outfp);
  }
  return had_err;
}
Beispiel #4
0
int gt_codon_iterator_encseq_unit_test(GtError *err)
{
  int had_err = 0,
      i, j;
  const char *testseq    = "gctgatcgactgaacatagctagcacggccgcgcgatcgtacgatg",
             *testseq_rc = "catcgtacgatcgcgcggccgtgctagctatgttcagtcgatcagc",
             *testseq_rv = "gtagcatgctagcgcgccggcacgatcgatacaagtcagctagtcg",
             *testseq_cm = "cgactagctgacttgtatcgatcgtgccggcgcgctagcatgctac";
  GtEncseq *encseq;
  GtEncseqBuilder *eb;
  GtCodonIterator *ci;
  GtAlphabet *alpha;
  char n1, n2, n3;
  unsigned int frame;
  gt_error_check(err);

  alpha = gt_alphabet_new_dna();
  eb = gt_encseq_builder_new(alpha);
  gt_encseq_builder_add_cstr(eb, testseq, strlen(testseq), "foo");
  encseq = gt_encseq_builder_build(eb, NULL);

  /* forward tests */
  had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq,
                                                 GT_READMODE_FORWARD, err);

  /* complement tests */
  had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq_cm,
                                                 GT_READMODE_COMPL, err);

  /* revcompl tests */
  had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq_rc,
                                                 GT_READMODE_REVCOMPL, err);

  /* reverse tests */
  had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq_rv,
                                                 GT_READMODE_REVERSE, err);

  /* lengths < 3 */
  for (j = 0; !had_err && j < 3; j++) {
    ci = gt_codon_iterator_encseq_new_with_readmode(encseq, 10, j,
                                                    GT_READMODE_REVCOMPL, NULL);
    i = 10;
    while (!(gt_codon_iterator_next(ci, &n1, &n2, &n3, &frame, NULL))) {
      gt_ensure(had_err, false);
    }
    gt_ensure(had_err, i == 10);
    gt_codon_iterator_delete(ci);
  }

  gt_encseq_delete(encseq);
  gt_encseq_builder_delete(eb);
  gt_alphabet_delete(alpha);
  return had_err;
}
int gt_extract_and_translate_feature_sequence(GtFeatureNode *feature_node,
                                              const char *type,
                                              bool join,
                                              GtRegionMapping *rm,
                                              GtTransTable *ttable,
                                              GtStr *translation_fr1,
                                              GtStr *translation_fr2,
                                              GtStr *translation_fr3,
                                              GtError *err)
{
  GtTranslator *tr = NULL;
  GtTranslatorStatus status;
  GtCodonIterator *ci = NULL;
  unsigned int frame, phase_offset = 0;
  char translated;
  int had_err = 0;
  GtStr *sequence = gt_str_new();
  gt_assert(feature_node && type);

  had_err = gt_extract_feature_sequence_generic(sequence,
                                                (GtGenomeNode*) feature_node,
                                                type, join, NULL, NULL,
                                                &phase_offset, rm, err);

  /* do translation if we have at least one codon */
  if (!had_err && gt_str_length(sequence) > phase_offset + 2) {
    ci = gt_codon_iterator_simple_new(gt_str_get(sequence) + phase_offset,
                                      gt_str_length(sequence) - phase_offset,
                                      NULL);
    tr = gt_translator_new(ci);
    if (ttable)
      gt_translator_set_translation_table(tr, ttable);
    status = gt_translator_next(tr, &translated, &frame, NULL);
    while (status == GT_TRANSLATOR_OK) {
      if (frame == 0 && translation_fr1)
        gt_str_append_char(translation_fr1, translated);
      else if (frame == 1 && translation_fr2)
        gt_str_append_char(translation_fr2, translated);
      else if (frame == 2 && translation_fr3)
        gt_str_append_char(translation_fr3, translated);
      status = gt_translator_next(tr, &translated, &frame, NULL);
    }
    if (status == GT_TRANSLATOR_ERROR)
      had_err = -1;
  }
  gt_translator_delete(tr);
  gt_codon_iterator_delete(ci);
  gt_str_delete(sequence);

  return had_err;
}
static int gt_seqtranslate_do_translation(GtTranslateArguments *arguments,
                                       const char *sequence,
                                       GtUword length,
                                       const char *desc,
                                       GtStr **translations,
                                       bool rev,
                                       GtError *err)
{
  GtTranslator *tr;
  GT_UNUSED GtTranslatorStatus trst;
  GtCodonIterator *ci;
  char translated;
  int had_err = 0;
  GtStr *str;
  unsigned int frame,
               i;

  ci = gt_codon_iterator_simple_new(sequence, length, err);
  tr = gt_translator_new(ci);
  trst = gt_translator_next(tr, &translated, &frame, err);
  while (trst == GT_TRANSLATOR_OK) {
    gt_str_append_char(translations[frame], translated);
    trst = gt_translator_next(tr, &translated, &frame, err);
  }
  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  if (trst == GT_TRANSLATOR_ERROR)
    return -1;
  str = gt_str_new();
  for (i = 0; i < 3; i++) {
    if (gt_str_length(translations[i]) > 0) {
      gt_str_append_cstr(str, desc);
      gt_str_append_cstr(str, " (");
      gt_str_append_ulong(str, i+1);
      gt_str_append_cstr(str, rev ? "-" : "+");
      gt_str_append_cstr(str, ")");
      gt_fasta_show_entry(gt_str_get(str), gt_str_get(translations[i]),
                          gt_str_length(translations[i]),
                          arguments->fasta_width, arguments->outfp);
      gt_str_reset(translations[i]);
      gt_str_reset(str);
    }
  }
  gt_str_delete(str);
  return had_err;
}
int gt_codon_iterator_simple_unit_test(GtError *err)
{
  int had_err = 0,
      i;
  const char *testseq = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  GtCodonIterator *ci;
  char n1, n2, n3;
  unsigned int frame;
  gt_error_check(err);

  ci = gt_codon_iterator_simple_new(testseq, 26, NULL);
  i = 0;
  while (!gt_codon_iterator_next(ci, &n1, &n2, &n3, &frame, NULL)) {
    gt_ensure(had_err, n1 == testseq[i]);
    gt_ensure(had_err, n2 == testseq[i+1]);
    gt_ensure(had_err, n3 == testseq[i+2]);
    i++;
  }
  gt_ensure(had_err, i == 24);
  gt_codon_iterator_delete(ci);
  return had_err;
}
int gt_translator_unit_test(GtError *err)
{
  int had_err = 0;
  GtTranslatorStatus test_errnum;
  GtTranslator *tr;
  GtCodonIterator *ci;
  GtError *test_err;
  GtStrArray *codons, *invalidcodons;
  const char *seq = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGT"
                    "GGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGT"
                    "TACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG";
  const char *no_startcodon = "AAAAAAAAAATCATCTCCCCATTTTTTT";
  const char *invalidseq  = "ZAGCTTTTCATTCTGACTGCAAATATGTCTCTGTGT";
  const char *invalidseq2 = "AGCTTTTCATTCTGACZTGCAAATATGTCTCTGTGT";

  char translated;
  unsigned int frame;
  GtUword pos = 0;
  GtStr *protein[3];
  gt_error_check(err);

  test_err = gt_error_new();
  ci = gt_codon_iterator_simple_new(seq, (GtUword) strlen(seq), test_err);
  tr = gt_translator_new(ci);
  protein[0] = gt_str_new();
  protein[1] = gt_str_new();
  protein[2] = gt_str_new();
  codons = gt_str_array_new();
  gt_str_array_add_cstr(codons, "ACG");
  gt_str_array_add_cstr(codons, "ACT");
  invalidcodons = gt_str_array_new();
  gt_str_array_add_cstr(invalidcodons, "ACG");
  gt_str_array_add_cstr(invalidcodons, "AC");

  /* do 3-frame translation */
  gt_error_unset(test_err);
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  while (!test_errnum && translated) {
    gt_str_append_char(protein[frame], translated);
    test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
    gt_ensure(
           test_errnum != GT_TRANSLATOR_ERROR && !gt_error_is_set(test_err));
  }
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* check 3-frame translation */
  gt_ensure(strcmp(gt_str_get(protein[0]),
                         "SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LR") == 0);
  gt_ensure(strcmp(gt_str_get(protein[1]),
                         "AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDL") == 0);
  gt_ensure(strcmp(gt_str_get(protein[2]),
                         "LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*") == 0);

  /* find start codon -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_startcodon(tr, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 11UL);

  /* find stop codon -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 12UL);

  /* find arbitrary codons -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 14UL);

  /* find arbitrary codons -- negative (invalid codons) */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, invalidcodons, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err));

  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(invalidseq,
                                    (GtUword) strlen(invalidseq),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  /* check translation of sequence with invalid beginning */
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  gt_ensure(test_errnum && gt_error_is_set(test_err));

  /* check translation of sequence with invalid character within */
  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(invalidseq2,
                                    (GtUword) strlen(invalidseq2),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  while (!test_errnum && translated) {
    gt_str_append_char(protein[frame], translated);
    test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  }
  gt_ensure(
         test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err));

  /* find start codon -- fail */
  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(no_startcodon,
                                    (GtUword) strlen(no_startcodon),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  test_errnum = gt_translator_find_startcodon(tr, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* find stop codon -- fail */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* find arbitrary codons -- negative (none there) */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  gt_str_delete(protein[0]);
  gt_str_delete(protein[1]);
  gt_str_delete(protein[2]);
  gt_str_array_delete(codons);
  gt_str_array_delete(invalidcodons);
  gt_error_delete(test_err);

  return had_err;
}
static int run_orffinder(GtRegionMapping *rmap,
                         GtFeatureNode *gf,
                         unsigned long start,
                         GT_UNUSED unsigned long end,
                         unsigned int min,
                         unsigned int max,
                         bool all,
                         GtError *err)
{
  int had_err = 0, i;
  unsigned long sum;
  GtCodonIterator* ci = NULL;
  GtTranslator* translator = NULL;
  GtORFIterator* orfi = NULL;
  GtORFIteratorStatus state;
  GtRange orf_rng, tmp_orf_rng[3];
  GtStr *seq;
  unsigned int orf_frame;

  /* forward strand */
  seq = gt_str_new();
  had_err = gt_extract_feature_sequence(seq,
                                        (GtGenomeNode*) gf,
                                        gt_feature_node_get_type(gf),
                                        false, NULL, NULL, rmap, err);

  ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq), err);
  gt_assert(ci);
  translator = gt_translator_new(ci);
  gt_assert(translator);

  orfi = gt_orf_iterator_new(ci, translator);
  gt_assert(orfi);

  for (i = 0; i < 3; i++) {
    tmp_orf_rng[i].start = GT_UNDEF_ULONG;
    tmp_orf_rng[i].end = GT_UNDEF_ULONG;
  }

  while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                              err)) == GT_ORF_ITERATOR_OK) {
      if (all) {
        process_orf(orf_rng, orf_frame, GT_STRAND_FORWARD, gf,
                    start, min, max, err);
      } else {
        if (gt_range_length(&orf_rng) >
            gt_range_length(&tmp_orf_rng[orf_frame])) {
          tmp_orf_rng[orf_frame].start = orf_rng.start;
          tmp_orf_rng[orf_frame].end = orf_rng.end;
        }
      }
  }
  if (state == GT_ORF_ITERATOR_ERROR)
    had_err = -1;

  if (!had_err) {
    if (!all) {
      for (i = 0; i < 3; i++) {
        if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
          process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_FORWARD, gf,
                      start, min, max, err);
        }
      }
    }
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
    orfi = NULL;
    ci = NULL;
    translator = NULL;

    for (i = 0; i < 3; i++) {
      tmp_orf_rng[i].start = GT_UNDEF_ULONG;
      tmp_orf_rng[i].end = GT_UNDEF_ULONG;
    }

    /* reverse strand */
    if (!had_err) {
      GT_UNUSED int rval = 0;
      unsigned long length = gt_str_length(seq);
      char *strp = (char*) gt_str_get_mem(seq);
      rval = gt_reverse_complement(strp, gt_str_length(seq), err);
      gt_assert(!rval); /* XXX */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq),
                                        err);
      gt_assert(ci);
      translator = gt_translator_new(ci);
      gt_assert(translator);
      orfi = gt_orf_iterator_new(ci, translator);
      gt_assert(orfi);

      sum = start + length - 1;

      while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                                  err)) == GT_ORF_ITERATOR_OK) {
          if (all) {
            process_orf(orf_rng, orf_frame, GT_STRAND_REVERSE, gf,
                        sum, min, max, err);
          } else {
            if (gt_range_length(&orf_rng) >
                gt_range_length(&tmp_orf_rng[orf_frame])) {
              tmp_orf_rng[orf_frame].start = orf_rng.start;
              tmp_orf_rng[orf_frame].end = orf_rng.end;
            }
          }
      }
      if (state == GT_ORF_ITERATOR_ERROR)
        had_err = -1;
      if (!had_err) {
        if (!all) {
          for (i = 0; i < 3; i++) {
            if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
              process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_REVERSE,
                          gf, sum, min, max, err);
            }
          }
        }
      }
    }
    gt_str_delete(seq);
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
  }
  return had_err;
}
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv,
                                                  GtFeatureNode *fn,
                                                  GtError *err)
{
  GtLTRdigestPdomVisitor *lv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;
  GtUword i;
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR element */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode), lv->root_type) == 0) {
      lv->ltr_retrotrans = curnode;
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (!had_err && lv->ltr_retrotrans != NULL) {
    GtCodonIterator *ci;
    GtTranslator *tr;
    GtTranslatorStatus status;
    GtUword seqlen;
    char translated, *rev_seq;
#ifndef _WIN32
    FILE *instream;
    GtHMMERParseStatus *pstatus;
#endif
    unsigned int frame;
    GtStr *seq;

    seq = gt_str_new();
    rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans);
    lv->leftLTR_5 = rng.start - 1;
    lv->rightLTR_3 = rng.end - 1;
    seqlen = gt_range_length(&rng);

    had_err = gt_extract_feature_sequence(seq,
                                          (GtGenomeNode*) lv->ltr_retrotrans,
                                          lv->root_type,
                                          false, NULL, NULL, lv->rmap, err);

    if (!had_err) {
      for (i = 0UL; i < 3UL; i++) {
        gt_str_reset(lv->fwd[i]);
        gt_str_reset(lv->rev[i]);
      }

      /* create translations */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL);
      gt_assert(ci);
      tr = gt_translator_new(ci);
      status = gt_translator_next(tr, &translated, &frame, err);
      while (status == GT_TRANSLATOR_OK && translated) {
        gt_str_append_char(lv->fwd[frame], translated);
        status = gt_translator_next(tr, &translated, &frame, NULL);
      }
      if (status == GT_TRANSLATOR_ERROR) had_err = -1;
      if (!had_err) {
        rev_seq = gt_malloc((size_t) seqlen * sizeof (char));
        strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char));
        (void) gt_reverse_complement(rev_seq, seqlen, NULL);
        gt_codon_iterator_delete(ci);
        ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL);
        gt_translator_set_codon_iterator(tr, ci);
        status = gt_translator_next(tr, &translated, &frame, err);
        while (status == GT_TRANSLATOR_OK && translated) {
          gt_str_append_char(lv->rev[frame], translated);
          status = gt_translator_next(tr, &translated, &frame, NULL);
        }
        if (status == GT_TRANSLATOR_ERROR) had_err = -1;
        gt_free(rev_seq);
      }
      gt_codon_iterator_delete(ci);
      gt_translator_delete(tr);
    }

    /* run HMMER and handle results */
    if (!had_err) {
#ifndef _WIN32
      int pid, pc[2], cp[2];
      GT_UNUSED int rval;

      (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's
                                               exit status */
      rval = pipe(pc);
      gt_assert(rval == 0);
      rval = pipe(cp);
      gt_assert(rval == 0);

      switch ((pid = (int) fork())) {
        case -1:
          perror("Can't fork");
          exit(1);   /* XXX: error handling */
        case 0:    /* child */
          (void) close(1);    /* close current stdout. */
          rval = dup(cp[1]);  /* make stdout go to write end of pipe. */
          (void) close(0);    /* close current stdin. */
          rval = dup(pc[0]);  /* make stdin come from read end of pipe. */
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[0]);
          (void) close(cp[1]);
          (void) execvp("hmmscan", lv->args); /* XXX: read path from env */
          perror("couldn't execute hmmscan!");
          exit(1);
        default:    /* parent */
          for (i = 0UL; i < 3UL; i++) {
            char buf[5];
            GT_UNUSED ssize_t written;
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '+');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->fwd[i]),
                            (size_t) gt_str_length(lv->fwd[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '-');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->rev[i]),
                            (size_t) gt_str_length(lv->rev[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
          }
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[1]);
          instream = fdopen(cp[0], "r");
          pstatus = gt_hmmer_parse_status_new();
          had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus,
                                                           instream, err);
          (void) fclose(instream);
          if (!had_err)
            had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err);
          gt_hmmer_parse_status_delete(pstatus);
      }
#else
      /* XXX */
      gt_error_set(err, "HMMER call not implemented on Windows\n");
      had_err = -1;
#endif
    }
    gt_str_delete(seq);
  }
  if (!had_err)
    had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv);
  return had_err;
}
Beispiel #11
0
void gt_outputtranslationandorf(unsigned long pglnum, const GthAGS *ags,
                                unsigned long agsnum,
                                unsigned long translationtable,
                                GthInput *input,
                                unsigned int indentlevel,
                                GthOutput *out)
{
  unsigned long i;
  unsigned int nframe;
  const unsigned char *gen_seq_orig;
  GtStr *frame[3];
  char translated;
  GtTranslatorStatus status;
  GtTranslator *translator;
  GtTransTable *transtable;
  GtCodonIterator *ci;
  GthSplicedSeq *spliced_seq;
  GtArray *ranges;
  GtFile *outfp = out->outfp;

  /* output header */
  if (out->xmlout) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<three_phase_translation "
                    "xmlns=\"http://www.genomethreader.org/GTH_output/"
                    "PGL_module/predicted_gene_location/AGS_information/"
                    "three_phase_translation/\">\n");
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<description PGL_serial=\"%lu\" "
                              "AGS_serial=\"%lu\" gDNA_strand=\"%c\"/>\n",
                       pglnum + OUTPUTOFFSET, agsnum + OUTPUTOFFSET,
                       SHOWSTRAND(gth_ags_is_forward(ags)));
  }
  else {
    gt_file_xprintf(outfp, "3-phase translation of AGS-%lu (%cstrand):\n\n",
                       agsnum + OUTPUTOFFSET,
                       SHOWSTRAND(gth_ags_is_forward(ags)));
  }

  ranges = gt_array_new(sizeof (GtRange));
  for (i = 0; i < gt_array_size(ags->exons); i++)
    gt_array_add(ranges, ((GthExonAGS*) gt_array_get(ags->exons, i))->range);

  /* get genomic sequence */
  gen_seq_orig = gth_input_original_genomic_sequence(input,
                                                     gth_ags_filenum(ags),
                                                     gth_ags_is_forward(ags));

  spliced_seq = gth_spliced_seq_new(gen_seq_orig, ranges);

  frame[0] = gt_str_new();
  frame[1] = gt_str_new();
  frame[2] = gt_str_new();

  /* prepare for translation */
  ci = gt_codon_iterator_simple_new((const char*) spliced_seq->splicedseq,
                                    spliced_seq->splicedseqlen, NULL);
  gt_assert(ci);
  transtable = gt_trans_table_new(translationtable, NULL);
  gt_assert(transtable);

  /* translate the template in all three frames */
  translator = gt_translator_new_with_table(transtable, ci);
  status = gt_translator_next(translator, &translated, &nframe, NULL);
  while (status == GT_TRANSLATOR_OK) {
    gt_str_append_char(frame[nframe], translated);
    status = gt_translator_next(translator, &translated, &nframe, NULL);
  }
  gt_assert(status != GT_TRANSLATOR_ERROR);
  gt_translator_delete(translator);
  gt_trans_table_delete(transtable);
  gt_codon_iterator_delete(ci);

  /* show the translation */
  showtranslation(spliced_seq, gt_str_get(frame[0]), gt_str_get(frame[1]),
                  gt_str_get(frame[2]), ags->exons, gth_ags_is_forward(ags),
                  gth_ags_total_length(ags), gth_ags_genomic_offset(ags),
                  indentlevel, out);

  /* show the (consolidated) ORFs */
  gthshowORFs(gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]),
              gt_str_length(frame[0]), gt_str_length(frame[1]),
              gt_str_length(frame[2]), gth_ags_is_forward(ags),
              gth_ags_total_length(ags), gth_ags_genomic_offset(ags),
              gt_str_get(ags->gen_id), pglnum, agsnum, spliced_seq,
              indentlevel, out);

  if (out->xmlout) {
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</three_phase_translation>\n");
  }

  gth_spliced_seq_delete(spliced_seq);
  gt_array_delete(ranges);
  gt_str_delete(frame[0]);
  gt_str_delete(frame[1]);
  gt_str_delete(frame[2]);
}