static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, char *buf, FILE *instream, GtError *err) { int had_err = 0, cur_domain = GT_UNDEF_INT, line = -1; GtHMMERSingleHit *hit = NULL; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); gt_assert(buf != NULL); while (!had_err && strncmp("Internal pipeline statistics", buf, (size_t) 28) && strncmp(">>", buf, (size_t) 2)) { if ((buf[2] == '=' && buf[3] == '=')) { buf[17] = '\0'; cur_domain = atoi(buf+12); gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0); hit = gt_hmmer_parse_status_get_hit(status, (unsigned long) cur_domain - 1); gt_assert(hit && !hit->alignment); hit->alignment = gt_str_new(); hit->aastring = gt_str_new(); line = -2; } else { gt_assert(hit && hit->alignment); gt_str_append_cstr(hit->alignment, buf); gt_str_append_char(hit->alignment, '\n'); switch (line % 4) { case 1: gt_str_append_char(hit->alignment, '\n'); break; case 0: { char *b = buf; b = strtok(buf, " "); gt_assert(strspn(b, "012+-") == (size_t) 2); b = strtok(NULL, " "); gt_assert(strlen(b) > 0); b = strtok(NULL, " "); gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring); } break; } line++; } had_err = pdom_parser_get_next_line(buf, instream, err); } return had_err; }
static int gt_ltrdigest_pdom_visitor_parse_query(GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, bool *end, FILE *instream, GtError *err) { int had_err = 0; char buf[GT_HMMER_BUF_LEN]; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); if (!had_err && strncmp("Query:", buf, (size_t) 6) != 0) { *end = true; } if (!had_err && !(*end)) { status->strand = gt_strand_get(buf[14]); buf[14] = '\0'; status->frame = (unsigned) atoi(buf+13); } if (!had_err && !(*end)) { had_err = gt_ltrdigest_pdom_visitor_parse_scores(lv, buf, instream, err); } if (!had_err && !(*end)) { had_err = gt_ltrdigest_pdom_visitor_parse_domainhits(lv, status, buf, instream, err); } if (!had_err && !(*end)) { had_err = gt_ltrdigest_pdom_visitor_parse_statistics(lv, buf, instream, err); } return had_err; }
static int gt_ltrdigest_pdom_visitor_parse_scores(GT_UNUSED GtLTRdigestPdomVisitor *lv, char *buf, FILE *instream, GtError *err) { int had_err = 0; gt_assert(lv && instream); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); if (!had_err && strncmp("Scores", buf, (size_t) 6) != 0) { gt_error_set(err, "expected 'Scores:' at beginning of new scores " "section, '%s' read instead", buf); had_err = -1; } while (!had_err && strncmp("Domain annotation", buf, (size_t) 17)) had_err = pdom_parser_get_next_line(buf, instream, err); return had_err; }
static int gt_ltrdigest_pdom_visitor_parse_statistics(GT_UNUSED GtLTRdigestPdomVisitor *lv, char *buf, FILE *instream, GtError *err) { int had_err = 0; gt_assert(lv && instream); gt_error_check(err); while (!had_err && (buf[0] != '/' && buf[1] != '/')) had_err = pdom_parser_get_next_line(buf, instream, err); return had_err; }
static int gt_ltrdigest_pdom_visitor_parse_domainhits(GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, char *buf, FILE *instream, GtError *err) { int had_err = 0; GtUword i, nof_targets = 0, nof_hits = 0; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); gt_assert(buf != NULL); while (!had_err && strncmp("Internal", buf, (size_t) 8)) { GtUword no, hmmfrom, hmmto, alifrom, alito; double score, evalue; char threshold_ok = '-'; if ((buf[0] == '>' && buf[1] == '>')) { char *b = buf; b = strtok(buf+3, " "); gt_str_reset(status->cur_model); gt_str_append_cstr(status->cur_model, b); had_err = pdom_parser_get_next_line(buf, instream, err); if (!had_err && strncmp(" [No individual", buf, (size_t) 17)) { for (i = 0UL; i < 2UL && !had_err; i++) had_err = pdom_parser_get_next_line(buf, instream, err); } nof_targets++; nof_hits = 0UL; gt_hmmer_parse_status_mark_frame_finished(status); } while (!had_err && 8 == sscanf(buf, ""GT_WU" %c %lf %*f %*f %lf "GT_WU" "GT_WU" %*s " GT_WU" "GT_WU"", &no, &threshold_ok, &score, &evalue, &hmmfrom, &hmmto, &alifrom, &alito)) { GtHMMERSingleHit *shit = gt_calloc((size_t) 1, sizeof (*shit)); shit->hmmfrom = hmmfrom; shit->hmmto = hmmto; shit->alifrom = alifrom; shit->alito = alito; shit->score = score; shit->evalue = evalue; shit->strand = status->strand; shit->frame = (GtUword) status->frame; shit->reported = (threshold_ok == '!'); shit->chains = gt_array_new(sizeof (GtUword)); gt_hmmer_parse_status_add_hit(status, shit); nof_hits++; had_err = pdom_parser_get_next_line(buf, instream, err); } if (!had_err) { if (nof_hits > 0) had_err = gt_ltrdigest_pdom_visitor_parse_alignments(lv, status, buf, instream, err); else had_err = pdom_parser_get_next_line(buf, instream, err); } } return had_err; }
static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED GtLTRdigestPdomVisitor *lv, GtHMMERParseStatus *status, char *buf, FILE *instream, GtError *err) { int had_err = 0, cur_domain = GT_UNDEF_INT, line = GT_UNDEF_INT; bool first_align_line = false; int mod_val = 4; GtHMMERSingleHit *hit = NULL; gt_assert(lv && instream && status); gt_error_check(err); had_err = pdom_parser_get_next_line(buf, instream, err); gt_assert(buf != NULL); while (!had_err && strncmp("Internal pipeline statistics", buf, (size_t) 28) && strncmp(">>", buf, (size_t) 2)) { if ((buf[2] == '=' && buf[3] == '=')) { buf[17] = '\0'; cur_domain = atoi(buf+12); gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0); hit = gt_hmmer_parse_status_get_hit(status, (GtUword) cur_domain - 1); gt_assert(hit && !hit->alignment); hit->alignment = gt_str_new(); hit->aastring = gt_str_new(); first_align_line = true; mod_val = 4; } else { bool run = true; char junkbuf[BUFSIZ]; if (first_align_line) { /* some models contain consensus structure annotation -- in this case there is an additional line in the output which must be taken into account */ line = 0; if (1 == sscanf(buf, "%*s %s", junkbuf)) { if (0 == strcmp(junkbuf, "CS") || 0 == strcmp(junkbuf, "RF")) { mod_val = 5; line = -1; run = false; } } first_align_line = false; } if (run) { gt_assert(hit && hit->alignment); gt_str_append_cstr(hit->alignment, buf); gt_str_append_char(hit->alignment, '\n'); switch (line % mod_val) { case 1: gt_str_append_char(hit->alignment, '\n'); break; case 2: { GT_UNUSED char *b = buf; b = strtok(buf, " "); gt_assert(strspn(b, "012+-") == (size_t) 2); b = strtok(NULL, " "); gt_assert(strlen(b) > 0); b = strtok(NULL, " "); gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring); } break; } } line++; } had_err = pdom_parser_get_next_line(buf, instream, err); } return had_err; }