static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED
                                                      GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0, cur_domain = GT_UNDEF_INT, line = -1;
  GtHMMERSingleHit *hit = NULL;
  gt_assert(lv && instream && status);
  gt_error_check(err);
  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal pipeline statistics",
                             buf, (size_t) 28) &&
                     strncmp(">>", buf, (size_t) 2)) {
    if ((buf[2] == '=' && buf[3] == '=')) {
      buf[17] = '\0';
      cur_domain = atoi(buf+12);
      gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0);
      hit = gt_hmmer_parse_status_get_hit(status,
                                          (unsigned long) cur_domain - 1);
      gt_assert(hit && !hit->alignment);
      hit->alignment = gt_str_new();
      hit->aastring = gt_str_new();
      line = -2;
    } else {
      gt_assert(hit && hit->alignment);
      gt_str_append_cstr(hit->alignment, buf);
      gt_str_append_char(hit->alignment, '\n');
      switch (line % 4) {
        case 1:
          gt_str_append_char(hit->alignment, '\n');
          break;
        case 0:
          {
            char *b = buf;
            b = strtok(buf, " ");
            gt_assert(strspn(b, "012+-") == (size_t) 2);
            b = strtok(NULL, " ");
            gt_assert(strlen(b) > 0);
            b = strtok(NULL, " ");
            gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring);
          }
          break;
      }
      line++;
    }
    had_err = pdom_parser_get_next_line(buf, instream, err);
  }
  return had_err;
}
Пример #2
0
static int gt_ltrdigest_pdom_visitor_parse_query(GtLTRdigestPdomVisitor *lv,
                                                 GtHMMERParseStatus *status,
                                                 bool *end,
                                                 FILE *instream, GtError *err)
{
  int had_err = 0;
  char buf[GT_HMMER_BUF_LEN];
  gt_assert(lv && instream && status);
  gt_error_check(err);

  had_err = pdom_parser_get_next_line(buf, instream, err);
  if (!had_err && strncmp("Query:", buf, (size_t) 6) != 0) {
    *end = true;
  }
  if (!had_err && !(*end)) {
    status->strand = gt_strand_get(buf[14]);
    buf[14] = '\0';
    status->frame = (unsigned) atoi(buf+13);
  }
  if (!had_err && !(*end)) {
    had_err = gt_ltrdigest_pdom_visitor_parse_scores(lv, buf, instream, err);
  }
  if (!had_err && !(*end)) {
    had_err = gt_ltrdigest_pdom_visitor_parse_domainhits(lv, status, buf,
                                                         instream, err);
  }
  if (!had_err && !(*end)) {
    had_err = gt_ltrdigest_pdom_visitor_parse_statistics(lv, buf, instream,
                                                         err);
  }
  return had_err;
}
static int gt_ltrdigest_pdom_visitor_parse_scores(GT_UNUSED
                                                  GtLTRdigestPdomVisitor *lv,
                                                  char *buf, FILE *instream,
                                                  GtError *err)
{
  int had_err = 0;
  gt_assert(lv && instream);
  gt_error_check(err);
  had_err = pdom_parser_get_next_line(buf, instream, err);
  if (!had_err && strncmp("Scores", buf, (size_t) 6) != 0) {
    gt_error_set(err, "expected 'Scores:' at beginning of new scores "
                      "section, '%s' read instead", buf);
    had_err = -1;
  }
  while (!had_err && strncmp("Domain annotation", buf, (size_t) 17))
    had_err = pdom_parser_get_next_line(buf, instream, err);
  return had_err;
}
Пример #4
0
static int gt_ltrdigest_pdom_visitor_parse_statistics(GT_UNUSED
                                                      GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     char *buf, FILE *instream,
                                                     GtError *err)
{
  int had_err = 0;
  gt_assert(lv && instream);
  gt_error_check(err);
  while (!had_err && (buf[0] != '/' && buf[1] != '/'))
    had_err = pdom_parser_get_next_line(buf, instream, err);
  return had_err;
}
Пример #5
0
static int gt_ltrdigest_pdom_visitor_parse_domainhits(GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0;
  GtUword i, nof_targets = 0, nof_hits = 0;
  gt_assert(lv && instream && status);
  gt_error_check(err);

  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal", buf, (size_t) 8)) {
    GtUword no, hmmfrom, hmmto, alifrom, alito;
    double score, evalue;
    char threshold_ok = '-';
    if ((buf[0] == '>' && buf[1] == '>')) {
      char *b = buf;
      b = strtok(buf+3, " ");
      gt_str_reset(status->cur_model);
      gt_str_append_cstr(status->cur_model, b);
      had_err = pdom_parser_get_next_line(buf, instream, err);
      if (!had_err && strncmp("   [No individual", buf, (size_t) 17)) {
        for (i = 0UL; i < 2UL && !had_err; i++)
          had_err = pdom_parser_get_next_line(buf, instream, err);
      }
      nof_targets++;
      nof_hits = 0UL;
      gt_hmmer_parse_status_mark_frame_finished(status);
    }
    while (!had_err &&
             8 == sscanf(buf, ""GT_WU" %c %lf %*f %*f %lf "GT_WU" "GT_WU" %*s "
                         GT_WU" "GT_WU"", &no,  &threshold_ok, &score, &evalue,
                         &hmmfrom, &hmmto, &alifrom, &alito)) {
      GtHMMERSingleHit *shit = gt_calloc((size_t) 1, sizeof (*shit));
      shit->hmmfrom = hmmfrom;
      shit->hmmto = hmmto;
      shit->alifrom = alifrom;
      shit->alito = alito;
      shit->score = score;
      shit->evalue = evalue;
      shit->strand = status->strand;
      shit->frame = (GtUword) status->frame;
      shit->reported = (threshold_ok == '!');
      shit->chains = gt_array_new(sizeof (GtUword));
      gt_hmmer_parse_status_add_hit(status, shit);
      nof_hits++;
      had_err = pdom_parser_get_next_line(buf, instream, err);
    }
    if (!had_err) {
      if (nof_hits > 0)
        had_err = gt_ltrdigest_pdom_visitor_parse_alignments(lv, status, buf,
                                                             instream, err);
      else
        had_err = pdom_parser_get_next_line(buf, instream, err);
    }
  }
  return had_err;
}
Пример #6
0
static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED
                                                      GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0, cur_domain = GT_UNDEF_INT, line = GT_UNDEF_INT;
  bool first_align_line = false;
  int mod_val = 4;
  GtHMMERSingleHit *hit = NULL;
  gt_assert(lv && instream && status);
  gt_error_check(err);
  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal pipeline statistics",
                             buf, (size_t) 28) &&
                     strncmp(">>", buf, (size_t) 2)) {
    if ((buf[2] == '=' && buf[3] == '=')) {
      buf[17] = '\0';
      cur_domain = atoi(buf+12);
      gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0);
      hit = gt_hmmer_parse_status_get_hit(status,
                                          (GtUword) cur_domain - 1);
      gt_assert(hit && !hit->alignment);
      hit->alignment = gt_str_new();
      hit->aastring = gt_str_new();
      first_align_line = true;
      mod_val = 4;
    } else {
      bool run = true;
      char junkbuf[BUFSIZ];
      if (first_align_line) {
        /* some models contain consensus structure annotation -- in this case
           there is an additional line in the output which must be taken
           into account */
        line = 0;
        if (1 == sscanf(buf, "%*s %s", junkbuf)) {
          if (0 == strcmp(junkbuf, "CS") || 0 == strcmp(junkbuf, "RF")) {
            mod_val = 5;
            line = -1;
            run = false;
          }
        }
        first_align_line = false;
      }
      if (run) {
        gt_assert(hit && hit->alignment);
        gt_str_append_cstr(hit->alignment, buf);
        gt_str_append_char(hit->alignment, '\n');
        switch (line % mod_val) {
          case 1:
            gt_str_append_char(hit->alignment, '\n');
            break;
          case 2:
            {
              GT_UNUSED char *b = buf;
              b = strtok(buf, " ");
              gt_assert(strspn(b, "012+-") == (size_t) 2);
              b = strtok(NULL, " ");
              gt_assert(strlen(b) > 0);
              b = strtok(NULL, " ");
              gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring);
            }
            break;
        }
      }
      line++;
    }
    had_err = pdom_parser_get_next_line(buf, instream, err);
  }
  return had_err;
}