static inline SerdStatus bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) { r_err(reader, SERD_ERR_BAD_SYNTAX, fmt, c); push_replacement(reader, dest); // Skip bytes until the next start byte for (uint8_t b = peek_byte(reader); (b & 0x80);) { eat_byte_safe(reader, b); b = peek_byte(reader); } return SERD_SUCCESS; }
static Ref read_blankName(SerdReader* reader) { eat_byte_safe(reader, '='); if (eat_byte_check(reader, '=') != '=') { return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n"); } Ref subject = 0; bool ate_dot = false; read_ws_star(reader); read_iri(reader, &subject, &ate_dot); return subject; }
static bool read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) { const SerdStatementFlags old_flags = *ctx.flags; bool empty; eat_byte_safe(reader, '['); if ((empty = peek_delim(reader, ']'))) { *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; } else { *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; if (peek_delim(reader, '=')) { if (!(*dest = read_blankName(reader)) || !eat_delim(reader, ';')) { return false; } } } if (!*dest) { *dest = blank_id(reader); } if (ctx.subject) { TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); } ctx.subject = *dest; if (!empty) { *ctx.flags &= ~(SERD_LIST_CONT); if (!subject) { *ctx.flags |= SERD_ANON_CONT; } bool ate_dot_in_list = false; read_predicateObjectList(reader, ctx, &ate_dot_in_list); if (ate_dot_in_list) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); } read_ws_star(reader); if (reader->end_sink) { reader->end_sink(reader->handle, deref(reader, *dest)); } *ctx.flags = old_flags; } return (eat_byte_check(reader, ']') == ']'); }
static Ref read_LANGTAG(SerdReader* reader) { uint8_t c = peek_byte(reader); if (!is_alpha(c)) { return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c); } Ref ref = push_node(reader, SERD_LITERAL, "", 0); push_byte(reader, ref, eat_byte_safe(reader, c)); while ((c = peek_byte(reader)) && is_alpha(c)) { push_byte(reader, ref, eat_byte_safe(reader, c)); } while (peek_byte(reader) == '-') { push_byte(reader, ref, eat_byte_safe(reader, '-')); while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) { push_byte(reader, ref, eat_byte_safe(reader, c)); } } return ref; }
// Read the remainder of a PN_PREFIX after some initial characters static SerdStatus read_PN_PREFIX_tail(SerdReader* reader, Ref dest) { uint8_t c; while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* if (c == '.') { push_byte(reader, dest, eat_byte_safe(reader, c)); } else if (!read_PN_CHARS(reader, dest)) { break; } } const SerdNode* const n = deref(reader, dest); if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, dest)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n"); return SERD_ERR_BAD_SYNTAX; } return SERD_SUCCESS; }
// STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE // Initial triple quotes are already eaten by caller static Ref read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) { Ref ref = push_node(reader, SERD_LITERAL, "", 0); while (true) { const uint8_t c = peek_byte(reader); uint32_t code; switch (c) { case '\\': eat_byte_safe(reader, c); if (!read_ECHAR(reader, ref, flags) && !read_UCHAR(reader, ref, &code)) { r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape `\\%c'\n", peek_byte(reader)); return pop_node(reader, ref); } break; default: if (c == q) { eat_byte_safe(reader, q); const uint8_t q2 = eat_byte_safe(reader, peek_byte(reader)); const uint8_t q3 = peek_byte(reader); if (q2 == q && q3 == q) { // End of string eat_byte_safe(reader, q3); return ref; } else { *flags |= SERD_HAS_QUOTE; push_byte(reader, ref, c); read_character(reader, ref, flags, q2); } } else { read_character(reader, ref, flags, eat_byte_safe(reader, c)); } } } return ref; }
// Read UCHAR escape, initial \ is already eaten by caller static inline bool read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) { const uint8_t b = peek_byte(reader); unsigned length = 0; switch (b) { case 'U': length = 8; break; case 'u': length = 4; break; default: return false; } eat_byte_safe(reader, b); uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (unsigned i = 0; i < length; ++i) { if (!(buf[i] = read_HEX(reader))) { return false; } } uint32_t code; sscanf((const char*)buf, "%X", &code); unsigned size = 0; if (code < 0x00000080) { size = 1; } else if (code < 0x00000800) { size = 2; } else if (code < 0x00010000) { size = 3; } else if (code < 0x00110000) { size = 4; } else { r_err(reader, SERD_ERR_BAD_SYNTAX, "unicode character 0x%X out of range\n", code); push_replacement(reader, dest); *char_code = 0xFFFD; return true; } // Build output in buf // (Note # of bytes = # of leading 1 bits in first byte) uint32_t c = code; switch (size) { case 4: buf[3] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; c |= (16 << 12); // set bit 4 case 3: buf[2] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; c |= (32 << 6); // set bit 5 case 2: buf[1] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; c |= 0xC0; // set bits 6 and 7 case 1: buf[0] = (uint8_t)c; } for (unsigned i = 0; i < size; ++i) { push_byte(reader, dest, buf[i]); } *char_code = code; return true; }