Esempio n. 1
0
static bool
read_statement(SerdReader* reader)
{
	SerdStatementFlags flags = 0;
	ReadContext ctx = { 0, 0, 0, &flags };
	read_ws_star(reader);
	bool ate_dot = false;
	switch (peek_byte(reader)) {
	case '\0':
		reader->eof = true;
		return true;
	case '@':
		TRY_RET(read_directive(reader));
		read_ws_star(reader);
		return (eat_byte_check(reader, '.') == '.');
	default:
		if (!read_triples(reader, ctx, &ate_dot)) {
			return false;
		} else if (ate_dot) {
			return true;
		} else {
			read_ws_star(reader);
			return (eat_byte_check(reader, '.') == '.');
		}
		break;
	}
	read_ws_star(reader); // remove?
	return true;
}
Esempio n. 2
0
// STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE
// Initial quote is already eaten by caller
static Ref
read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q)
{
	Ref ref = push_node(reader, SERD_LITERAL, "", 0);
	while (true) {
		const uint8_t c = peek_byte(reader);
		uint32_t      code;
		switch (c) {
		case '\n': case '\r':
			r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n");
			return pop_node(reader, ref);
		case '\\':
			eat_byte_safe(reader, c);
			if (!read_ECHAR(reader, ref, flags) &&
			    !read_UCHAR(reader, ref, &code)) {
				r_err(reader, SERD_ERR_BAD_SYNTAX,
				      "invalid escape `\\%c'\n", peek_byte(reader));
				return pop_node(reader, ref);
			}
			break;
		default:
			if (c == q) {
				eat_byte_check(reader, q);
				return ref;
			} else {
				read_character(reader, ref, flags, eat_byte_safe(reader, c));
			}
		}
	}
	eat_byte_check(reader, q);
	return ref;
}
Esempio n. 3
0
static bool
read_literal(SerdReader* reader, Ref* dest,
             Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot)
{
	Ref str = read_String(reader, flags);
	if (!str) {
		return false;
	}

	switch (peek_byte(reader)) {
	case '@':
		eat_byte_safe(reader, '@');
		TRY_THROW(*lang = read_LANGTAG(reader));
		break;
	case '^':
		eat_byte_safe(reader, '^');
		eat_byte_check(reader, '^');
		TRY_THROW(read_iri(reader, datatype, ate_dot));
		break;
	}
	*dest = str;
	return true;
except:
	pop_node(reader, *datatype);
	pop_node(reader, *lang);
	pop_node(reader, str);
	return false;
}
Esempio n. 4
0
static inline void
eat_string(SerdReader* reader, const char* str, unsigned n)
{
	for (unsigned i = 0; i < n; ++i) {
		eat_byte_check(reader, ((const uint8_t*)str)[i]);
	}
}
Esempio n. 5
0
static bool
read_prefixID(SerdReader* reader)
{
	bool ret = true;
	// `@' is already eaten in read_directive
	eat_string(reader, "prefix", 6);
	TRY_RET(read_ws_plus(reader));

	Ref name = push_node(reader, SERD_LITERAL, "", 0);

	if (read_PN_PREFIX(reader, name) > SERD_FAILURE) {
		return pop_node(reader, name);
	}

	if (eat_byte_check(reader, ':') != ':') {
		return pop_node(reader, name);
	}

	read_ws_star(reader);
	const Ref uri = read_IRIREF(reader);
	if (!uri) {
		pop_node(reader, name);
		return false;
	}

	if (reader->prefix_sink) {
		ret = !reader->prefix_sink(reader->handle,
		                           deref(reader, name),
		                           deref(reader, uri));
	}
	pop_node(reader, uri);
	pop_node(reader, name);
	return ret;
}
Esempio n. 6
0
static bool
read_blank(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest, bool* ate_dot)
{
	const SerdStatementFlags old_flags = *ctx.flags;
	bool empty;
	switch (peek_byte(reader)) {
	case '_':
		return (*dest = read_BLANK_NODE_LABEL(reader, ate_dot));
	case '[':
		eat_byte_safe(reader, '[');
		if ((empty = peek_delim(reader, ']'))) {
			*ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O;
		} else {
			*ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN;
			if (peek_delim(reader, '=')) {
				if (!(*dest = read_blankName(reader)) ||
				    !eat_delim(reader, ';')) {
					return false;
				}
			}
		}

		if (!*dest) {
			*dest = blank_id(reader);
		}
		if (ctx.subject) {
			TRY_RET(emit_statement(reader, ctx, *dest, 0, 0));
		}

		ctx.subject = *dest;
		if (!empty) {
			*ctx.flags &= ~(SERD_LIST_CONT);
			if (!subject) {
				*ctx.flags |= SERD_ANON_CONT;
			}
			bool ate_dot_in_list = false;
			read_predicateObjectList(reader, ctx, &ate_dot_in_list);
			if (ate_dot_in_list) {
				return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n");
			}
			read_ws_star(reader);
			if (reader->end_sink) {
				reader->end_sink(reader->handle, deref(reader, *dest));
			}
			*ctx.flags = old_flags;
		}
		return (eat_byte_check(reader, ']') == ']');
	case '(':
		return read_collection(reader, ctx, dest);
	default: return false;  // never reached
	}
}
Esempio n. 7
0
static Ref
read_blankName(SerdReader* reader)
{
	eat_byte_safe(reader, '=');
	if (eat_byte_check(reader, '=') != '=') {
		return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n");
	}

	Ref  subject = 0;
	bool ate_dot = false;
	read_ws_star(reader);
	read_iri(reader, &subject, &ate_dot);
	return subject;
}
Esempio n. 8
0
static Ref
read_IRIREF(SerdReader* reader)
{
	TRY_RET(eat_byte_check(reader, '<'));
	Ref      ref = push_node(reader, SERD_URI, "", 0);
	uint32_t code;
	while (true) {
		const uint8_t c = peek_byte(reader);
		switch (c) {
		case '"': case '<': case '^': case '`': case '{': case '|': case '}':
			r_err(reader, SERD_ERR_BAD_SYNTAX,
			      "invalid IRI character `%c'\n", c);
			return pop_node(reader, ref);
		case '>':
			eat_byte_safe(reader, c);
			return ref;
		case '\\':
			eat_byte_safe(reader, c);
			if (!read_UCHAR(reader, ref, &code)) {
				r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
				return pop_node(reader, ref);
			}
			switch (code) {
			case 0: case ' ': case '<': case '>':
				r_err(reader, SERD_ERR_BAD_SYNTAX,
				      "invalid escaped IRI character %X %c\n", code, code);
				return pop_node(reader, ref);
			}
			break;
		default:
			if (c <= 0x20) {
				if (isprint(c)) {
					r_err(reader, SERD_ERR_BAD_SYNTAX,
					      "invalid IRI character `%c' (escape %%%02X)\n", c, c);
				} else {
					r_err(reader, SERD_ERR_BAD_SYNTAX,
					      "invalid IRI character (escape %%%02X)\n", c, c);
				}
				if (reader->strict) {
					return pop_node(reader, ref);
				}
				push_byte(reader, ref, eat_byte_safe(reader, c));
			} else {
				push_byte(reader, ref, eat_byte_safe(reader, c));
			}
		}
	}
}
Esempio n. 9
0
static Ref
read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot)
{
	eat_byte_safe(reader, '_');
	eat_byte_check(reader, ':');
	Ref ref = push_node(reader, SERD_BLANK,
	                    reader->bprefix ? (char*)reader->bprefix : "",
	                    reader->bprefix_len);

	uint8_t c = peek_byte(reader);  // First: (PN_CHARS | '_' | [0-9])
	if (is_digit(c) || c == '_') {
		push_byte(reader, ref, eat_byte_safe(reader, c));
	} else if (!read_PN_CHARS(reader, ref)) {
		r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n");
		return pop_node(reader, ref);
	}

	while ((c = peek_byte(reader))) {  // Middle: (PN_CHARS | '.')*
		if (c == '.') {
			push_byte(reader, ref, eat_byte_safe(reader, c));
		} else if (!read_PN_CHARS(reader, ref)) {
			break;
		}
	}

	SerdNode* n = deref(reader, ref);
	if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, ref)) {
		// Ate trailing dot, pop it from stack/node and inform caller
		--n->n_bytes;
		serd_stack_pop(&reader->stack, 1);
		*ate_dot = true;
	}

	if (reader->syntax == SERD_TURTLE) {
		if (is_digit(n->buf[reader->bprefix_len + 1])) {
			if ((n->buf[reader->bprefix_len]) == 'b') {
				((char*)n->buf)[reader->bprefix_len] = 'B';  // Prevent clash
				reader->seen_genid = true;
			} else if (reader->seen_genid &&
			           n->buf[reader->bprefix_len] == 'B') {
				r_err(reader, SERD_ERR_ID_CLASH,
				      "found both `b' and `B' blank IDs, prefix required\n");
				return pop_node(reader, ref);
			}
		}
	}
	return ref;
}
Esempio n. 10
0
static bool
read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot)
{
	if (read_prefix) {
		if (read_PN_PREFIX(reader, dest) > SERD_FAILURE) {
			return false;
		}
	}

	if (eat_byte_check(reader, ':') != ':') {
		return false;
	}

	push_byte(reader, dest, ':');
	return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE;
}