Beispiel #1
0
static bool
read_0_9(SerdReader* reader, Ref str, bool at_least_one)
{
	unsigned count = 0;
	for (uint8_t c; is_digit((c = peek_byte(reader))); ++count) {
		push_byte(reader, str, eat_byte_safe(reader, c));
	}
	if (at_least_one && count == 0) {
		r_err(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n");
	}
	return count;
}
Beispiel #2
0
// Read ECHAR escape, initial \ is already eaten by caller
static inline bool
read_ECHAR(SerdReader* reader, Ref dest, SerdNodeFlags* flags)
{
	const uint8_t c = peek_byte(reader);
	switch (c) {
	case 't':
		eat_byte_safe(reader, 't');
		push_byte(reader, dest, '\t');
		return true;
	case 'b':
		eat_byte_safe(reader, 'b');
		push_byte(reader, dest, '\b');
		return true;
	case 'n':
		*flags |= SERD_HAS_NEWLINE;
		eat_byte_safe(reader, 'n');
		push_byte(reader, dest, '\n');
		return true;
	case 'r':
		*flags |= SERD_HAS_NEWLINE;
		eat_byte_safe(reader, 'r');
		push_byte(reader, dest, '\r');
		return true;
	case 'f':
		eat_byte_safe(reader, 'f');
		push_byte(reader, dest, '\f');
		return true;
	case '\\': case '"': case '\'':
		push_byte(reader, dest, eat_byte_safe(reader, c));
		return true;
	default:
		return false;
	}
}
Beispiel #3
0
static bool
read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest, bool* ate_dot)
{
	const SerdStatementFlags old_flags = *ctx.flags;
	bool empty;
	switch (peek_byte(reader)) {
	case '_':
		return (*dest = read_BLANK_NODE_LABEL(reader, ate_dot));
	case '[':
		eat_byte_safe(reader, '[');
		if ((empty = peek_delim(reader, ']'))) {
			*ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O;
		} else {
			*ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN;
			if (peek_delim(reader, '=')) {
				if (!(*dest = read_blankName(reader)) ||
				    !eat_delim(reader, ';')) {
					return false;
				}
			}
		}

		if (!*dest) {
			*dest = blank_id(reader);
		}
		if (ctx.subject) {
			TRY_RET(emit_statement(reader, ctx, *dest, 0, 0));
		}

		ctx.subject = *dest;
		if (!empty) {
			*ctx.flags &= ~(SERD_LIST_CONT);
			if (!subject) {
				*ctx.flags |= SERD_ANON_CONT;
			}
			bool ate_dot_in_list = false;
			read_predicateObjectList(reader, ctx, &ate_dot_in_list);
			if (ate_dot_in_list) {
				return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n");
			}
			read_ws_star(reader);
			if (reader->end_sink) {
				reader->end_sink(reader->handle, deref(reader, *dest));
			}
			*ctx.flags = old_flags;
		}
		return (eat_byte_check(reader, ']') == ']');
	case '(':
		return read_collection(reader, ctx, dest);
	default: return false;  // never reached
	}
}
Beispiel #4
0
static bool
read_PERCENT(SerdReader* reader, Ref dest)
{
	push_byte(reader, dest, eat_byte_safe(reader, '%'));
	const uint8_t h1 = read_HEX(reader);
	const uint8_t h2 = read_HEX(reader);
	if (h1 && h2) {
		push_byte(reader, dest, h1);
		push_byte(reader, dest, h2);
		return true;
	}
	return false;
}
Beispiel #5
0
static SerdStatus
read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
{
	uint8_t    c = peek_byte(reader);
	SerdStatus st;
	switch (c) {
	case '0': case '1': case '2': case '3': case '4': case '5':
	case '6': case '7': case '8': case '9': case ':': case '_':
		push_byte(reader, dest, eat_byte_safe(reader, c));
		break;
	default:
		if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
			return st;
		} else if (st != SERD_SUCCESS && !read_PN_CHARS_BASE(reader, dest)) {
			return SERD_FAILURE;
		}
	}

	while ((c = peek_byte(reader))) {  // Middle: (PN_CHARS | '.' | ';')*
		if (c == '.' || c == ':') {
			push_byte(reader, dest, eat_byte_safe(reader, c));
		} else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
			return st;
		} else if (st != SERD_SUCCESS && !read_PN_CHARS(reader, dest)) {
			break;
		}
	}

	SerdNode* const n = deref(reader, dest);
	if (n->buf[n->n_bytes - 1] == '.') {
		// Ate trailing dot, pop it from stack/node and inform caller
		--n->n_bytes;
		serd_stack_pop(&reader->stack, 1);
		*ate_dot = true;
	}

	return SERD_SUCCESS;
}
Beispiel #6
0
static inline SerdStatus
bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c)
{
	r_err(reader, SERD_ERR_BAD_SYNTAX, fmt, c);
	push_replacement(reader, dest);

	// Skip bytes until the next start byte
	for (uint8_t b = peek_byte(reader); (b & 0x80);) {
		eat_byte_safe(reader, b);
		b = peek_byte(reader);
	}

	return SERD_SUCCESS;
}
Beispiel #7
0
static Ref
read_blankName(SerdReader* reader)
{
	eat_byte_safe(reader, '=');
	if (eat_byte_check(reader, '=') != '=') {
		return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n");
	}

	Ref  subject = 0;
	bool ate_dot = false;
	read_ws_star(reader);
	read_iri(reader, &subject, &ate_dot);
	return subject;
}
Beispiel #8
0
// STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE
// Initial triple quotes are already eaten by caller
static Ref
read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
{
	Ref ref = push_node(reader, SERD_LITERAL, "", 0);
	while (true) {
		const uint8_t c = peek_byte(reader);
		uint32_t      code;
		switch (c) {
		case '\\':
			eat_byte_safe(reader, c);
			if (!read_ECHAR(reader, ref, flags) &&
			    !read_UCHAR(reader, ref, &code)) {
				r_err(reader, SERD_ERR_BAD_SYNTAX,
				      "invalid escape `\\%c'\n", peek_byte(reader));
				return pop_node(reader, ref);
			}
			break;
		default:
			if (c == q) {
				eat_byte_safe(reader, q);
				const uint8_t q2 = eat_byte_safe(reader, peek_byte(reader));
				const uint8_t q3 = peek_byte(reader);
				if (q2 == q && q3 == q) {  // End of string
					eat_byte_safe(reader, q3);
					return ref;
				} else {
					*flags |= SERD_HAS_QUOTE;
					push_byte(reader, ref, c);
					read_character(reader, ref, flags, q2);
				}
			} else {
				read_character(reader, ref, flags, eat_byte_safe(reader, c));
			}
		}
	}
	return ref;
}
Beispiel #9
0
static SerdStatus
read_PLX(SerdReader* reader, Ref dest)
{
	uint8_t c = peek_byte(reader);
	switch (c) {
	case '%':
		if (!read_PERCENT(reader, dest)) {
			return SERD_ERR_BAD_SYNTAX;
		}
		return SERD_SUCCESS;
	case '\\':
		eat_byte_safe(reader, c);
		if (is_alpha(c = peek_byte(reader))) {
			// Escapes like \u \n etc. are not supported
			return SERD_ERR_BAD_SYNTAX;
		} else {
			// Allow escaping of pretty much any other character
			push_byte(reader, dest, eat_byte_safe(reader, c));
			return SERD_SUCCESS;
		}
	default:
		return SERD_FAILURE;
	}
}
Beispiel #10
0
// [24] ws ::= #x9 | #xA | #xD | #x20 | comment
static inline bool
read_ws(SerdReader* reader)
{
	const uint8_t c = peek_byte(reader);
	switch (c) {
	case 0x9: case 0xA: case 0xD: case 0x20:
		eat_byte_safe(reader, c);
		return true;
	case '#':
		read_comment(reader);
		return true;
	default:
		return false;
	}
}
Beispiel #11
0
// Read the remainder of a PN_PREFIX after some initial characters
static SerdStatus
read_PN_PREFIX_tail(SerdReader* reader, Ref dest)
{
	uint8_t c;
	while ((c = peek_byte(reader))) {  // Middle: (PN_CHARS | '.')*
		if (c == '.') {
			push_byte(reader, dest, eat_byte_safe(reader, c));
		} else if (!read_PN_CHARS(reader, dest)) {
			break;
		}
	}

	const SerdNode* const n = deref(reader, dest);
	if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, dest)) {
		r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n");
		return SERD_ERR_BAD_SYNTAX;
	}

	return SERD_SUCCESS;
}
Beispiel #12
0
static bool
read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot)
{
	#define XSD_DECIMAL NS_XSD "decimal"
	#define XSD_DOUBLE  NS_XSD "double"
	#define XSD_INTEGER NS_XSD "integer"
	Ref     ref         = push_node(reader, SERD_LITERAL, "", 0);
	uint8_t c           = peek_byte(reader);
	bool    has_decimal = false;
	if (c == '-' || c == '+') {
		push_byte(reader, ref, eat_byte_safe(reader, c));
	}
	if ((c = peek_byte(reader)) == '.') {
		has_decimal = true;
		// decimal case 2 (e.g. '.0' or `-.0' or `+.0')
		push_byte(reader, ref, eat_byte_safe(reader, c));
		TRY_THROW(read_0_9(reader, ref, true));
	} else {
		// all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ...
		TRY_THROW(is_digit(c));
		read_0_9(reader, ref, true);
		if ((c = peek_byte(reader)) == '.') {
			has_decimal = true;

			// Annoyingly, dot can be end of statement, so tentatively eat
			eat_byte_safe(reader, c);
			c = peek_byte(reader);
			if (!is_digit(c) && c != 'e' && c != 'E') {
				*dest    = ref;
				*ate_dot = true;  // Force caller to deal with stupid grammar
				return true;  // Next byte is not a number character, done
			}

			push_byte(reader, ref, '.');
			read_0_9(reader, ref, false);
		}
	}
	c = peek_byte(reader);
	if (c == 'e' || c == 'E') {
		// double
		push_byte(reader, ref, eat_byte_safe(reader, c));
		switch ((c = peek_byte(reader))) {
		case '+': case '-':
			push_byte(reader, ref, eat_byte_safe(reader, c));
		default: break;
		}
		TRY_THROW(read_0_9(reader, ref, true));
		*datatype = push_node(reader, SERD_URI,
		                      XSD_DOUBLE, sizeof(XSD_DOUBLE) - 1);
	} else if (has_decimal) {
		*datatype = push_node(reader, SERD_URI,
		                      XSD_DECIMAL, sizeof(XSD_DECIMAL) - 1);
	} else {
		*datatype = push_node(reader, SERD_URI,
		                      XSD_INTEGER, sizeof(XSD_INTEGER) - 1);
	}
	*dest = ref;
	return true;
except:
	pop_node(reader, *datatype);
	pop_node(reader, ref);
	return false;
}
Beispiel #13
0
// Read UCHAR escape, initial \ is already eaten by caller
static inline bool
read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
{
	const uint8_t b      = peek_byte(reader);
	unsigned      length = 0;
	switch (b) {
	case 'U':
		length = 8;
		break;
	case 'u':
		length = 4;
		break;
	default:
		return false;
	}
	eat_byte_safe(reader, b);

	uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	for (unsigned i = 0; i < length; ++i) {
		if (!(buf[i] = read_HEX(reader))) {
			return false;
		}
	}

	uint32_t code;
	sscanf((const char*)buf, "%X", &code);

	unsigned size = 0;
	if (code < 0x00000080) {
		size = 1;
	} else if (code < 0x00000800) {
		size = 2;
	} else if (code < 0x00010000) {
		size = 3;
	} else if (code < 0x00110000) {
		size = 4;
	} else {
		r_err(reader, SERD_ERR_BAD_SYNTAX,
		      "unicode character 0x%X out of range\n", code);
		push_replacement(reader, dest);
		*char_code = 0xFFFD;
		return true;
	}

	// Build output in buf
	// (Note # of bytes = # of leading 1 bits in first byte)
	uint32_t c = code;
	switch (size) {
	case 4:
		buf[3] = 0x80 | (uint8_t)(c & 0x3F);
		c >>= 6;
		c |= (16 << 12);  // set bit 4
	case 3:
		buf[2] = 0x80 | (uint8_t)(c & 0x3F);
		c >>= 6;
		c |= (32 << 6);  // set bit 5
	case 2:
		buf[1] = 0x80 | (uint8_t)(c & 0x3F);
		c >>= 6;
		c |= 0xC0;  // set bits 6 and 7
	case 1:
		buf[0] = (uint8_t)c;
	}

	for (unsigned i = 0; i < size; ++i) {
		push_byte(reader, dest, buf[i]);
	}
	*char_code = code;
	return true;
}