static bool read_statement(SerdReader* reader) { SerdStatementFlags flags = 0; ReadContext ctx = { 0, 0, 0, &flags }; read_ws_star(reader); bool ate_dot = false; switch (peek_byte(reader)) { case '\0': reader->eof = true; return true; case '@': TRY_RET(read_directive(reader)); read_ws_star(reader); return (eat_byte_check(reader, '.') == '.'); default: if (!read_triples(reader, ctx, &ate_dot)) { return false; } else if (ate_dot) { return true; } else { read_ws_star(reader); return (eat_byte_check(reader, '.') == '.'); } break; } read_ws_star(reader); // remove? return true; }
// STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE // Initial quote is already eaten by caller static Ref read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { Ref ref = push_node(reader, SERD_LITERAL, "", 0); while (true) { const uint8_t c = peek_byte(reader); uint32_t code; switch (c) { case '\n': case '\r': r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); return pop_node(reader, ref); case '\\': eat_byte_safe(reader, c); if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); } break; default: if (c == q) { eat_byte_check(reader, q); return ref; } else { read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } } eat_byte_check(reader, q); return ref; }
static bool read_literal(SerdReader* reader, Ref* dest, Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot) { Ref str = read_String(reader, flags); if (!str) { return false; } switch (peek_byte(reader)) { case '@': eat_byte_safe(reader, '@'); TRY_THROW(*lang = read_LANGTAG(reader)); break; case '^': eat_byte_safe(reader, '^'); eat_byte_check(reader, '^'); TRY_THROW(read_iri(reader, datatype, ate_dot)); break; } *dest = str; return true; except: pop_node(reader, *datatype); pop_node(reader, *lang); pop_node(reader, str); return false; }
static inline void eat_string(SerdReader* reader, const char* str, unsigned n) { for (unsigned i = 0; i < n; ++i) { eat_byte_check(reader, ((const uint8_t*)str)[i]); } }
static bool read_prefixID(SerdReader* reader) { bool ret = true; // `@' is already eaten in read_directive eat_string(reader, "prefix", 6); TRY_RET(read_ws_plus(reader)); Ref name = push_node(reader, SERD_LITERAL, "", 0); if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { return pop_node(reader, name); } if (eat_byte_check(reader, ':') != ':') { return pop_node(reader, name); } read_ws_star(reader); const Ref uri = read_IRIREF(reader); if (!uri) { pop_node(reader, name); return false; } if (reader->prefix_sink) { ret = !reader->prefix_sink(reader->handle, deref(reader, name), deref(reader, uri)); } pop_node(reader, uri); pop_node(reader, name); return ret; }
static bool read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest, bool* ate_dot) { const SerdStatementFlags old_flags = *ctx.flags; bool empty; switch (peek_byte(reader)) { case '_': return (*dest = read_BLANK_NODE_LABEL(reader, ate_dot)); case '[': eat_byte_safe(reader, '['); if ((empty = peek_delim(reader, ']'))) { *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; } else { *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; if (peek_delim(reader, '=')) { if (!(*dest = read_blankName(reader)) || !eat_delim(reader, ';')) { return false; } } } if (!*dest) { *dest = blank_id(reader); } if (ctx.subject) { TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); } ctx.subject = *dest; if (!empty) { *ctx.flags &= ~(SERD_LIST_CONT); if (!subject) { *ctx.flags |= SERD_ANON_CONT; } bool ate_dot_in_list = false; read_predicateObjectList(reader, ctx, &ate_dot_in_list); if (ate_dot_in_list) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); } read_ws_star(reader); if (reader->end_sink) { reader->end_sink(reader->handle, deref(reader, *dest)); } *ctx.flags = old_flags; } return (eat_byte_check(reader, ']') == ']'); case '(': return read_collection(reader, ctx, dest); default: return false; // never reached } }
static Ref read_blankName(SerdReader* reader) { eat_byte_safe(reader, '='); if (eat_byte_check(reader, '=') != '=') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n"); } Ref subject = 0; bool ate_dot = false; read_ws_star(reader); read_iri(reader, &subject, &ate_dot); return subject; }
static Ref read_IRIREF(SerdReader* reader) { TRY_RET(eat_byte_check(reader, '<')); Ref ref = push_node(reader, SERD_URI, "", 0); uint32_t code; while (true) { const uint8_t c = peek_byte(reader); switch (c) { case '"': case '<': case '^': case '`': case '{': case '|': case '}': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c); return pop_node(reader, ref); case '>': eat_byte_safe(reader, c); return ref; case '\\': eat_byte_safe(reader, c); if (!read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); return pop_node(reader, ref); } switch (code) { case 0: case ' ': case '<': case '>': r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escaped IRI character %X %c\n", code, code); return pop_node(reader, ref); } break; default: if (c <= 0x20) { if (isprint(c)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c' (escape %%%02X)\n", c, c); } else { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character (escape %%%02X)\n", c, c); } if (reader->strict) { return pop_node(reader, ref); } push_byte(reader, ref, eat_byte_safe(reader, c)); } else { push_byte(reader, ref, eat_byte_safe(reader, c)); } } } }
static Ref read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) { eat_byte_safe(reader, '_'); eat_byte_check(reader, ':'); Ref ref = push_node(reader, SERD_BLANK, reader->bprefix ? (char*)reader->bprefix : "", reader->bprefix_len); uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) if (is_digit(c) || c == '_') { push_byte(reader, ref, eat_byte_safe(reader, c)); } else if (!read_PN_CHARS(reader, ref)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n"); return pop_node(reader, ref); } while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* if (c == '.') { push_byte(reader, ref, eat_byte_safe(reader, c)); } else if (!read_PN_CHARS(reader, ref)) { break; } } SerdNode* n = deref(reader, ref); if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, ref)) { // Ate trailing dot, pop it from stack/node and inform caller --n->n_bytes; serd_stack_pop(&reader->stack, 1); *ate_dot = true; } if (reader->syntax == SERD_TURTLE) { if (is_digit(n->buf[reader->bprefix_len + 1])) { if ((n->buf[reader->bprefix_len]) == 'b') { ((char*)n->buf)[reader->bprefix_len] = 'B'; // Prevent clash reader->seen_genid = true; } else if (reader->seen_genid && n->buf[reader->bprefix_len] == 'B') { r_err(reader, SERD_ERR_ID_CLASH, "found both `b' and `B' blank IDs, prefix required\n"); return pop_node(reader, ref); } } } return ref; }
static bool read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) { if (read_prefix) { if (read_PN_PREFIX(reader, dest) > SERD_FAILURE) { return false; } } if (eat_byte_check(reader, ':') != ':') { return false; } push_byte(reader, dest, ':'); return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE; }