static bool read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) { eat_byte_safe(reader, '('); bool end = peek_delim(reader, ')'); *dest = end ? reader->rdf_nil : blank_id(reader); if (ctx.subject) { // subject predicate _:head *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN); TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); *ctx.flags |= SERD_LIST_CONT; } else { *ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN); } if (end) { return end_collection(reader, ctx, 0, 0, true); } /* The order of node allocation here is necessarily not in stack order, so we create two nodes and recycle them throughout. */ Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); Ref n2 = 0; Ref node = n1; Ref rest = 0; ctx.subject = *dest; while (!(end = peek_delim(reader, ')'))) { // _:node rdf:first object ctx.predicate = reader->rdf_first; bool ate_dot = false; if (!read_object(reader, ctx, &ate_dot) || ate_dot) { return end_collection(reader, ctx, n1, n2, false); } if (!(end = peek_delim(reader, ')'))) { /* Give rest a new ID. Done as late as possible to ensure it is used and > IDs generated by read_object above. */ if (!rest) { rest = n2 = blank_id(reader); // First pass, push } else { set_blank_id(reader, rest, genid_size(reader)); } } // _:node rdf:rest _:rest *ctx.flags |= SERD_LIST_CONT; ctx.predicate = reader->rdf_rest; TRY_RET(emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0)); ctx.subject = rest; // _:node = _:rest rest = node; // _:rest = (old)_:node node = ctx.subject; // invariant } return end_collection(reader, ctx, n1, n2, true); }
static bool read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) { TRY_RET(read_object(reader, ctx, ate_dot)); while (!*ate_dot && eat_delim(reader, ',')) { TRY_RET(read_object(reader, ctx, ate_dot)); } return true; }
static bool read_base(SerdReader* reader) { // `@' is already eaten in read_directive eat_string(reader, "base", 4); TRY_RET(read_ws_plus(reader)); Ref uri; TRY_RET(uri = read_IRIREF(reader)); if (reader->base_sink) { reader->base_sink(reader->handle, deref(reader, uri)); } pop_node(reader, uri); return true; }
static bool read_statement(SerdReader* reader) { SerdStatementFlags flags = 0; ReadContext ctx = { 0, 0, 0, &flags }; read_ws_star(reader); bool ate_dot = false; switch (peek_byte(reader)) { case '\0': reader->eof = true; return true; case '@': TRY_RET(read_directive(reader)); read_ws_star(reader); return (eat_byte_check(reader, '.') == '.'); default: if (!read_triples(reader, ctx, &ate_dot)) { return false; } else if (ate_dot) { return true; } else { read_ws_star(reader); return (eat_byte_check(reader, '.') == '.'); } break; } read_ws_star(reader); // remove? return true; }
static bool read_prefixID(SerdReader* reader) { bool ret = true; // `@' is already eaten in read_directive eat_string(reader, "prefix", 6); TRY_RET(read_ws_plus(reader)); Ref name = push_node(reader, SERD_LITERAL, "", 0); if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { return pop_node(reader, name); } if (eat_byte_check(reader, ':') != ':') { return pop_node(reader, name); } read_ws_star(reader); const Ref uri = read_IRIREF(reader); if (!uri) { pop_node(reader, name); return false; } if (reader->prefix_sink) { ret = !reader->prefix_sink(reader->handle, deref(reader, name), deref(reader, uri)); } pop_node(reader, uri); pop_node(reader, name); return ret; }
static bool read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot) { bool nested = false; const Ref subject = read_subject(reader, ctx, &nested); bool ret = false; if (subject) { ctx.subject = subject; if (nested) { read_ws_star(reader); ret = true; if (peek_byte(reader) != '.') { ret = read_predicateObjectList(reader, ctx, ate_dot); } } else { TRY_RET(read_ws_plus(reader)); ret = read_predicateObjectList(reader, ctx, ate_dot); } pop_node(reader, subject); } else { ret = false; } ctx.subject = ctx.predicate = 0; return ret; }
static bool read_turtleDoc(SerdReader* reader) { while (!reader->eof) { TRY_RET(read_statement(reader)); } return true; }
static bool read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest, bool* ate_dot) { const SerdStatementFlags old_flags = *ctx.flags; bool empty; switch (peek_byte(reader)) { case '_': return (*dest = read_BLANK_NODE_LABEL(reader, ate_dot)); case '[': eat_byte_safe(reader, '['); if ((empty = peek_delim(reader, ']'))) { *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; } else { *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; if (peek_delim(reader, '=')) { if (!(*dest = read_blankName(reader)) || !eat_delim(reader, ';')) { return false; } } } if (!*dest) { *dest = blank_id(reader); } if (ctx.subject) { TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); } ctx.subject = *dest; if (!empty) { *ctx.flags &= ~(SERD_LIST_CONT); if (!subject) { *ctx.flags |= SERD_ANON_CONT; } bool ate_dot_in_list = false; read_predicateObjectList(reader, ctx, &ate_dot_in_list); if (ate_dot_in_list) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); } read_ws_star(reader); if (reader->end_sink) { reader->end_sink(reader->handle, deref(reader, *dest)); } *ctx.flags = old_flags; } return (eat_byte_check(reader, ']') == ']'); case '(': return read_collection(reader, ctx, dest); default: return false; // never reached } }
static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); Ref ref = push_node(reader, SERD_URI, "", 0); uint32_t code; while (true) { const uint8_t c = peek_byte(reader); switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c); return pop_node(reader, ref); case '>': eat_byte_safe(reader, c); return ref; case '\\': eat_byte_safe(reader, c); if (!read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); return pop_node(reader, ref); } switch (code) { case 0: case ' ': case '<': case '>': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escaped IRI character %X %c\n", code, code); return pop_node(reader, ref); } break; default: if (c <= 0x20) { if (isprint(c)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c' (escape %%%02X)\n", c, c); } else { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character (escape %%%02X)\n", c, c); } if (reader->strict) { return pop_node(reader, ref); } push_byte(reader, ref, eat_byte_safe(reader, c)); } else { push_byte(reader, ref, eat_byte_safe(reader, c)); } } } }
static inline uint8_t eat_byte(SerdReader* reader, const uint8_t byte) { const uint8_t c = peek_byte(reader); ++reader->read_head; switch (c) { case '\n': ++reader->cur.line; reader->cur.col = 0; break; default: ++reader->cur.col; } if (c != byte) { return error(reader, "expected `%c', not `%c'\n", byte, c); } if (reader->from_file && (reader->read_head == READ_BUF_LEN)) { TRY_RET(page(reader)); assert(reader->read_head < READ_BUF_LEN); } if (reader->read_buf[reader->read_head] == '\0') { reader->eof = true; } return c; }
static inline bool read_ws_plus(SerdReader* reader) { TRY_RET(read_ws(reader)); return read_ws_star(reader); }