static int tag_line(GtIO *obo_file, GtStr *tag, GtStr *value, GtError *err) { int had_err; gt_error_check(err); gt_assert(obo_file && tag && value); do { had_err = proc_any_char(obo_file, tag, false, err); } while (!had_err && any_char(obo_file, false)); if (!had_err) had_err = gt_io_expect(obo_file, OBO_SEPARATOR_CHAR, err); while (!had_err && gt_io_peek(obo_file) == OBO_BLANK_CHAR) gt_io_next(obo_file); if (!had_err) { do { had_err = proc_any_char(obo_file, value, true, err); } while (!had_err && any_char(obo_file, true)); } if (!had_err) { if (gt_io_peek(obo_file) == OBO_COMMENT_CHAR) had_err = comment_line(obo_file, err); else had_err = gt_io_expect(obo_file, GT_END_OF_LINE, err); } return had_err; }
static int proc_any_char(GtIO *obo_file, GtStr *capture, bool be_permissive, GtError *err) { gt_error_check(err); gt_assert(obo_file && capture); if (!any_char(obo_file, be_permissive)) { if (gt_io_peek(obo_file) == GT_END_OF_FILE) { gt_error_set(err, "file \"%s\": line %lu: unexpected end-of-file", gt_io_get_filename(obo_file), gt_io_get_line_number(obo_file)); } else if ((gt_io_peek(obo_file) == GT_CARRIAGE_RETURN) || (gt_io_peek(obo_file) == GT_END_OF_LINE)) { gt_error_set(err, "file \"%s\": line %lu: unexpected newline", gt_io_get_filename(obo_file), gt_io_get_line_number(obo_file)); } else { gt_error_set(err, "file \"%s\": line %lu: unexpected character '%c'", gt_io_get_filename(obo_file), gt_io_get_line_number(obo_file), gt_io_peek(obo_file)); } return -1; } gt_str_append_char(capture, gt_io_next(obo_file)); return 0; }
static int stanza(GtOBOParseTree *obo_parse_tree, GtIO *obo_file, GtError *err) { unsigned long stanza_line_number; int had_err; GtStr *type, *tag, *value; gt_error_check(err); gt_assert(obo_parse_tree && obo_file); type = gt_str_new(); tag = gt_str_new(); value = gt_str_new(); stanza_line_number = gt_io_get_line_number(obo_file); had_err = stanza_line(obo_file, type, err); if (!had_err) { GtOBOStanza *obo_stanza = gt_obo_stanza_new(gt_str_get(type), stanza_line_number, gt_io_get_filename_str(obo_file)); gt_obo_parse_tree_add_stanza(obo_parse_tree, obo_stanza); while (!had_err && (any_char(obo_file, false) || gt_io_peek(obo_file) == OBO_COMMENT_CHAR)) { gt_str_reset(tag); gt_str_reset(value); if (gt_io_peek(obo_file) == OBO_COMMENT_CHAR) had_err = comment_line(obo_file, err); else { had_err = tag_line(obo_file, tag, value, err); gt_obo_stanza_add(obo_stanza, gt_str_get(tag), gt_str_get(value)); } } } gt_str_delete(value); gt_str_delete(tag); gt_str_delete(type); return had_err; }
static int gt_xrf_abbr_parse_tree_entry(GtXRFAbbrParseTree *xrf_abbr_parse_tree, GtIO *xrf_abbr_file, GtError *err) { GtUword entry_line_number; int had_err = 0; GtStr *tag, *value; gt_error_check(err); gt_assert(xrf_abbr_parse_tree && xrf_abbr_file); tag = gt_str_new(); value = gt_str_new(); entry_line_number = gt_io_get_line_number(xrf_abbr_file); if (!had_err) { GtXRFAbbrEntry *xrf_abbr_entry = gt_xrf_abbr_entry_new(entry_line_number, gt_io_get_filename_str(xrf_abbr_file)); gt_xrf_abbr_parse_tree_add_entry(xrf_abbr_parse_tree, xrf_abbr_entry); while (!had_err && (gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, false) || gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR)) { gt_str_reset(tag); gt_str_reset(value); if (gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR) had_err = gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); else { had_err = gt_xrf_abbr_parse_tree_tag_line(xrf_abbr_file, tag, value, err); gt_xrf_abbr_entry_add(xrf_abbr_entry, gt_str_get(tag), gt_str_get(value)); } } } gt_str_delete(value); gt_str_delete(tag); return had_err; }
static int gt_xrf_abbr_parse_tree_blank_line(GtIO *xrf_abbr_file, GtError *err) { int had_err; gt_error_check(err); gt_log_log("blank"); had_err = gt_io_expect(xrf_abbr_file, XRF_BLANK_CHAR, err); while (!had_err) { char cc = gt_io_peek(xrf_abbr_file); if (cc == XRF_COMMENT_CHAR) return gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); else if (cc == GT_CARRIAGE_RETURN) { gt_io_next(xrf_abbr_file); if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE) gt_io_next(xrf_abbr_file); break; } else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) { gt_io_next(xrf_abbr_file); break; } else had_err = gt_io_expect(xrf_abbr_file, XRF_BLANK_CHAR, err); } return had_err; }
static int gt_xrf_abbr_parse_tree_comment_line(GtIO *xrf_abbr_file, GtError *err) { int had_err; gt_error_check(err); gt_log_log("comment"); had_err = gt_io_expect(xrf_abbr_file, XRF_COMMENT_CHAR, err); while (!had_err) { switch (gt_io_peek(xrf_abbr_file)) { case GT_CARRIAGE_RETURN: gt_io_next(xrf_abbr_file); if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE) gt_io_next(xrf_abbr_file); return had_err; case GT_END_OF_LINE: gt_io_next(xrf_abbr_file); /*@fallthrough@*/ case GT_END_OF_FILE: return had_err; default: gt_io_next(xrf_abbr_file); } } return had_err; }
static int parse_bed_file(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(bed_file); while (!had_err && gt_io_has_char(bed_file)) { switch (gt_io_peek(bed_file)) { case BLANK_CHAR: had_err = bed_parser_blank_line(bed_file, err); break; case COMMENT_CHAR: had_err = bed_parser_comment_line(bed_file, err); break; case GT_CARRIAGE_RETURN: gt_io_next(bed_file); if (gt_io_peek(bed_file) == GT_END_OF_LINE) gt_io_next(bed_file); break; case GT_END_OF_LINE: gt_io_next(bed_file); break; default: had_err = bed_line(bed_parser, bed_file, err); } } if (!had_err) had_err = gt_io_expect(bed_file, GT_END_OF_FILE, err); return had_err; }
static bool gt_xrf_abbr_parse_tree_ignored_line(GtIO *xrf_abbr_file, GtError *err) { gt_error_check(err); gt_log_log("ignored"); if (gt_io_peek(xrf_abbr_file) == XRF_BLANK_CHAR) return gt_xrf_abbr_parse_tree_blank_line(xrf_abbr_file, err); if (gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR) return gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); gt_io_next(xrf_abbr_file); return false; }
static int quoted_word(GtStr *word, GtIO *bed_file, GtError *err) { bool break_while = false; int had_err; gt_error_check(err); gt_str_reset(word); had_err = gt_io_expect(bed_file, QUOTE_CHAR, err); while (!had_err) { switch (gt_io_peek(bed_file)) { case QUOTE_CHAR: case GT_CARRIAGE_RETURN: case GT_END_OF_LINE: case GT_END_OF_FILE: break_while = true; break; default: gt_str_append_char(word, gt_io_next(bed_file)); } if (break_while) break; } if (!had_err) had_err = gt_io_expect(bed_file, QUOTE_CHAR, err); return had_err; }
static bool ignored_line(GtIO *obo_file, GtError *err) { gt_error_check(err); if (gt_io_peek(obo_file) == OBO_BLANK_CHAR) return blank_line(obo_file, err); return comment_line(obo_file, err); }
int gt_io_expect(GtIO *io, char expected_char, GtError *err) { char cc; gt_error_check(err); cc = gt_io_next(io); if (cc != expected_char) { if (expected_char == GT_END_OF_LINE && cc == GT_CARRIAGE_RETURN) { if (gt_io_peek(io) == GT_END_OF_LINE) gt_io_next(io); return 0; } if (expected_char == GT_END_OF_FILE) { gt_error_set(err, "file \"%s\": line %lu: expected end-of-file, got '%c'", gt_io_get_filename(io), gt_io_get_line_number(io), cc); } else if ((cc == GT_CARRIAGE_RETURN) || (cc == GT_END_OF_LINE)) { gt_error_set(err, "file \"%s\": line %lu: expected character '%c', got " "newline", gt_io_get_filename(io), gt_io_get_line_number(io), expected_char); } else { gt_error_set(err, "file \"%s\": line %lu: expected character '%c', got " "'%c'", gt_io_get_filename(io), gt_io_get_line_number(io), expected_char, cc); } return -1; } return 0; }
static bool bed_separator(GtIO *bed_file) { char cc = gt_io_peek(bed_file); if (cc == BLANK_CHAR || cc == TABULATOR_CHAR) return true; return false; }
static bool gt_xrf_abbr_parse_tree_ignored_char(GtIO *xrf_abbr_file) { char cc = gt_io_peek(xrf_abbr_file); if ((cc == XRF_BLANK_CHAR) || (cc == XRF_COMMENT_CHAR) || (cc == GT_CARRIAGE_RETURN) || (cc == GT_END_OF_LINE)) return true; return false; }
static bool ignored_char(GtIO *obo_file) { char cc = gt_io_peek(obo_file); if ((cc == OBO_BLANK_CHAR) || (cc == OBO_COMMENT_CHAR) || (cc == GT_CARRIAGE_RETURN) || (cc == GT_END_OF_LINE)) return true; return false; }
static void rest_line(GtIO *bed_file) { for (;;) { switch (gt_io_peek(bed_file)) { case GT_CARRIAGE_RETURN: gt_io_next(bed_file); if (gt_io_peek(bed_file) == GT_END_OF_LINE) gt_io_next(bed_file); return; case GT_END_OF_LINE: gt_io_next(bed_file); /*@fallthrough@*/ case GT_END_OF_FILE: return; default: gt_io_next(bed_file); } } }
static int track_rest(GtBEDParser *bed_parser, GtIO *bed_file, GtError *err) { char cc; int had_err = 0; gt_error_check(err); bed_parser->offset = 0; /* reset offset for new track line */ if (bed_separator(bed_file)) /* skip to first attribute=value pair */ had_err = skip_blanks(bed_file, err); while (!had_err && (cc = gt_io_peek(bed_file)) != GT_END_OF_LINE && cc != GT_CARRIAGE_RETURN) { /* parse attribute */ word(bed_parser->word, bed_file); had_err = gt_io_expect(bed_file, PAIR_SEPARATOR, err); /* parse value */ if (!had_err) { if (gt_io_peek(bed_file) == QUOTE_CHAR) had_err = quoted_word(bed_parser->another_word, bed_file, err); else word(bed_parser->another_word, bed_file); } /* process offset if necessary */ if (!had_err && !strcmp(gt_str_get(bed_parser->word), OFFSET_KEYWORD)) { if (gt_parse_word(&bed_parser->offset, gt_str_get(bed_parser->another_word))) { gt_error_set(err, "file \"%s\": line "GT_WU": could not parse offset value " "'%s'", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), gt_str_get(bed_parser->another_word)); had_err = -1; } } /* skip blanks up to next attribute or end-of-line */ if (!had_err && bed_separator(bed_file)) had_err = skip_blanks(bed_file, err); } /* the end of the line should now be reached */ if (!had_err) had_err = gt_io_expect(bed_file, GT_END_OF_LINE, err); return had_err; }
static int skip_blanks(GtIO *bed_file, GtError *err) { gt_error_check(err); if (!bed_separator(bed_file)) { gt_error_set(err, "file \"%s\": line %lu: expected blank or tabulator, got " "'%c'", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), gt_io_peek(bed_file)); return -1; } while (bed_separator(bed_file)) gt_io_next(bed_file); return 0; }
static int bed_parser_blank_line(GtIO *bed_file, GtError *err) { int had_err = 0; gt_error_check(err); had_err = gt_io_expect(bed_file, BLANK_CHAR, err); while (!had_err) { char cc = gt_io_peek(bed_file); if (cc == GT_CARRIAGE_RETURN) { gt_io_next(bed_file); if (gt_io_peek(bed_file) == GT_END_OF_LINE) gt_io_next(bed_file); break; } else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) { gt_io_next(bed_file); break; } else had_err = gt_io_expect(bed_file, BLANK_CHAR, err); } return had_err; }
static int comment_line(GtIO *obo_file, GtError *err) { int had_err; gt_error_check(err); had_err = gt_io_expect(obo_file, OBO_COMMENT_CHAR, err); while (!had_err) { switch (gt_io_peek(obo_file)) { case GT_CARRIAGE_RETURN: gt_io_next(obo_file); if (gt_io_peek(obo_file) == GT_END_OF_LINE) gt_io_next(obo_file); return had_err; case GT_END_OF_LINE: gt_io_next(obo_file); /*@fallthrough@*/ case GT_END_OF_FILE: return had_err; default: gt_io_next(obo_file); } } return had_err; }
static int parse_xrf_abbr_file(GtXRFAbbrParseTree *xrf_abbr_parse_tree, GtIO *xrf_abbr_file, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(xrf_abbr_parse_tree && xrf_abbr_file); while (!had_err && gt_xrf_abbr_parse_tree_ignored_char(xrf_abbr_file)) { had_err = gt_xrf_abbr_parse_tree_ignored_line(xrf_abbr_file, err); } while (!had_err && gt_io_has_char(xrf_abbr_file)) { switch (gt_io_peek(xrf_abbr_file)) { case XRF_BLANK_CHAR: had_err = gt_xrf_abbr_parse_tree_blank_line(xrf_abbr_file, err); break; case XRF_COMMENT_CHAR: had_err = gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); break; case GT_CARRIAGE_RETURN: gt_io_next(xrf_abbr_file); if (gt_io_peek(xrf_abbr_file) == GT_END_OF_LINE) gt_io_next(xrf_abbr_file); break; case GT_END_OF_LINE: gt_io_next(xrf_abbr_file); break; default: had_err = gt_xrf_abbr_parse_tree_entry(xrf_abbr_parse_tree, xrf_abbr_file, err); } } if (!had_err) had_err = gt_io_expect(xrf_abbr_file, GT_END_OF_FILE, err); if (!had_err) had_err = gt_xrf_abbr_parse_tree_validate_entries(xrf_abbr_parse_tree, err); return had_err; }
static int parse_obo_file(GtOBOParseTree *obo_parse_tree, GtIO *obo_file, GtError *err) { int had_err = 0; gt_error_check(err); gt_assert(obo_parse_tree && obo_file); while (!had_err && ignored_char(obo_file)) { had_err = ignored_line(obo_file, err); } if (!had_err) had_err = header(obo_parse_tree, obo_file, err); while (!had_err && gt_io_has_char(obo_file)) { switch (gt_io_peek(obo_file)) { case OBO_BLANK_CHAR: had_err = blank_line(obo_file, err); break; case OBO_COMMENT_CHAR: had_err = comment_line(obo_file, err); break; case GT_CARRIAGE_RETURN: gt_io_next(obo_file); if (gt_io_peek(obo_file) == GT_END_OF_LINE) gt_io_next(obo_file); break; case GT_END_OF_LINE: gt_io_next(obo_file); break; default: had_err = stanza(obo_parse_tree, obo_file, err); } } if (!had_err) had_err = gt_io_expect(obo_file, GT_END_OF_FILE, err); if (!had_err) had_err = gt_obo_parse_tree_validate_stanzas(obo_parse_tree, err); return had_err; }
static int gt_xrf_abbr_parse_tree_tag_line(GtIO *xrf_abbr_file, GtStr *tag, GtStr *value, GtError *err) { int had_err = 0; gt_error_check(err); gt_log_log("tag"); gt_assert(xrf_abbr_file && tag && value); do { had_err = gt_xrf_abbr_parse_tree_proc_any_char(xrf_abbr_file, tag, false, err); } while (!had_err && gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, false)); if (!had_err) had_err = gt_io_expect(xrf_abbr_file, XRF_SEPARATOR_CHAR, err); while (!had_err && gt_io_peek(xrf_abbr_file) == XRF_BLANK_CHAR) gt_io_next(xrf_abbr_file); if (!had_err) { do { had_err = gt_xrf_abbr_parse_tree_proc_any_char(xrf_abbr_file, value, true, err); } while (!had_err && gt_xrf_abbr_parse_tree_any_char(xrf_abbr_file, true)); } if (!had_err) { if (gt_io_peek(xrf_abbr_file) == XRF_COMMENT_CHAR) had_err = gt_xrf_abbr_parse_tree_comment_line(xrf_abbr_file, err); else had_err = gt_io_expect(xrf_abbr_file, GT_END_OF_LINE, err); } if (!had_err && !gt_xrf_abbr_parse_tree_valid_label(gt_str_get(tag))) { gt_warning("file \"%s\": line "GT_WU": unknown label \"%s\"", gt_io_get_filename(xrf_abbr_file), gt_io_get_line_number(xrf_abbr_file), gt_str_get(tag)); } gt_log_log("parsed line %s/%s", gt_str_get(tag), gt_str_get(value)); return had_err; }
static int blank_line(GtIO *obo_file, GtError *err) { int had_err; gt_error_check(err); had_err = gt_io_expect(obo_file, OBO_BLANK_CHAR, err); while (!had_err) { char cc = gt_io_peek(obo_file); if (cc == OBO_COMMENT_CHAR) return comment_line(obo_file, err); else if (cc == GT_CARRIAGE_RETURN) { gt_io_next(obo_file); if (gt_io_peek(obo_file) == GT_END_OF_LINE) gt_io_next(obo_file); break; } else if ((cc == GT_END_OF_LINE) || (cc == GT_END_OF_FILE)) { gt_io_next(obo_file); break; } else had_err = gt_io_expect(obo_file, OBO_BLANK_CHAR, err); } return had_err; }
static bool gt_xrf_abbr_parse_tree_any_char(GtIO *xrf_abbr_file, bool be_permissive) { switch (gt_io_peek(xrf_abbr_file)) { case XRF_BLANK_CHAR: case XRF_SEPARATOR_CHAR: if (be_permissive) return true; case XRF_COMMENT_CHAR: case GT_CARRIAGE_RETURN: case GT_END_OF_LINE: case GT_END_OF_FILE: return false; } return true; }
static void word(GtStr *word, GtIO *bed_file) { gt_str_reset(word); for (;;) { switch (gt_io_peek(bed_file)) { case BLANK_CHAR: case TABULATOR_CHAR: case PAIR_SEPARATOR: case GT_CARRIAGE_RETURN: case GT_END_OF_LINE: case GT_END_OF_FILE: return; default: gt_str_append_char(word, gt_io_next(bed_file)); } } }
static bool any_char(GtIO *obo_file, bool be_permissive) { switch (gt_io_peek(obo_file)) { case OBO_BLANK_CHAR: case OBO_SEPARATOR_CHAR: case OBO_STANZA_OPEN_CHAR: case OBO_STANZA_CLOSE_CHAR: if (be_permissive) return true; case OBO_COMMENT_CHAR: case GT_CARRIAGE_RETURN: case GT_END_OF_LINE: case GT_END_OF_FILE: return false; } return true; }