Пример #1
0
/*
 * Process command. When we get here, we've processed "<!-"
 */
xml_token_type_t
xml_skip_comment(xml_reader_t *xr)
{
	int match = 0, cc;

	if (xml_getc(xr) != '-') {
		xml_parse_error(xr, "Unexpected <!-...> element");
		return None;
	}

	while ((cc = xml_getc(xr)) != EOF) {
		if (cc == '-') {
			match++;
		} else {
			if (cc == '>' && match >= 2) {
#ifdef XMLDEBUG_PARSER
				xml_debug("Processed comment\n");
#endif
				return Comment;
			}
			match = 0;
		}
	}

	xml_parse_error(xr, "Unexpected end of file while parsing comment");
	return None;
}
Пример #2
0
/*
 * Process CDATA. When we get here, we've processed "<[CDATA"
 */
xml_token_type_t
xml_process_cdata(xml_reader_t *xr, string_t *res)
{
	int cc, state = 0;

	cc = xml_getc(xr);
	if (cc == EOF)
		goto unexpected_eof;
	if (cc != '[') {
		xml_parse_error(xr, "Unexpected '%c' after <[CDATA in XML stream", cc);
		return None;
	}

	while (state != 3) {
		cc = xml_getc(xr);
		if (cc == EOF)
			goto unexpected_eof;

		if (cc == ']') {
			if (state == 2) {
				string_putc(res, ']');
			} else {
				++state;
			}
		} else
		if (cc == '>' && state == 2) {
			++state;
		} else {
			while (state) {
				string_putc(res, ']');
				state--;
			}
			string_putc(res, cc);
		}
	}

	xr->state = Initial;
	return CData;

unexpected_eof:
	xml_parse_error(xr, "Unexpected EOF after <[CDATA in XML stream");
	return None;
}
Пример #3
0
/*
 * Expand an XML entity.
 * For now, we support &<number>; as well as symbolic entities
 *   lt gt amp
 */
bool
xml_expand_entity(xml_reader_t *xr, string_t *res)
{
	char entity[128];
	unsigned int elen = 0;
	int cc, expanded;

	while ((cc = xml_getc(xr)) != ';') {
		if (cc == EOF) {
			xml_parse_error(xr, "Unexpenced EOF in entity");
			return false;
		}
		if (isspace(cc))
			continue;
		if (elen + 1 >= sizeof(entity)) {
			xml_parse_error(xr, "Entity string too long");
			return false;
		}
		entity[elen++] = cc;
	}
	entity[elen] = '\0';

	if (elen == 0) {
		xml_parse_error(xr, "Empty entity &;");
		return false;
	}

	if (!strcasecmp(entity, "lt"))
		expanded = '<';
	else if (!strcasecmp(entity, "gt"))
		expanded = '>';
	else if (!strcasecmp(entity, "amp"))
		expanded = '&';
	else {
		const char *es = entity;

		if (*es == '#') {
			expanded = strtoul(es + 1, (char **) &es, 0);
			if (*es == '\0')
				goto good;
		}

		xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity);
		return false;
	}

good:
	string_putc(res, expanded);
	return true;
}
Пример #4
0
/*
 * Expand an XML entity.
 * For now, we support &<number>; as well as symbolic entities
 *   lt gt amp
 */
ni_bool_t
xml_expand_entity(xml_reader_t *xr, ni_stringbuf_t *res)
{
	char temp[128];
	ni_stringbuf_t entity = NI_STRINGBUF_INIT_BUFFER(temp);
	int cc, expanded;

	while ((cc = xml_getc(xr)) != ';') {
		if (cc == EOF) {
			xml_parse_error(xr, "Unexpenced EOF in entity");
			return FALSE;
		}
		if (isspace(cc))
			continue;
		if (entity.len + sizeof(char) >= entity.size) {
			xml_parse_error(xr, "Entity is too long");
			return FALSE;
		}
		ni_stringbuf_putc(&entity, cc);
	}

	if (ni_string_empty(entity.string)) {
		xml_parse_error(xr, "Empty entity &;");
		return FALSE;
	}

	if (!strcasecmp(entity.string, "lt"))
		expanded = '<';
	else if (!strcasecmp(entity.string, "gt"))
		expanded = '>';
	else if (!strcasecmp(entity.string, "amp"))
		expanded = '&';
	else {
		const char *es = entity.string;

		if (*es == '#') {
			expanded = strtoul(es + 1, (char **) &es, 0);
			if (*es == '\0')
				goto good;
		}

		xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity.string);
		return FALSE;
	}

good:
	ni_stringbuf_putc(res, expanded);
	return TRUE;
}
Пример #5
0
/*
 * Skip any space in the input stream, and copy if to @result
 */
void
xml_skip_space(xml_reader_t *xr, ni_stringbuf_t *result)
{
	int cc;

	while ((cc = xml_getc(xr)) != EOF) {
		if (!isspace(cc)) {
			xml_ungetc(xr, cc);
			break;
		}

		if (result)
			ni_stringbuf_putc(result, cc);
	}
}
Пример #6
0
xml_token_type_t
xml_get_token_tag(xml_reader_t *xr, ni_stringbuf_t *res)
{
	int cc, oc;

	xml_skip_space(xr, NULL);

	cc = xml_getc(xr);
	if (cc == EOF) {
		xml_parse_error(xr, "Unexpected EOF while parsing tag");
		return None;
	}

	ni_stringbuf_putc(res, cc);

	switch (cc) {
	case '<':
		goto error;

	case '?':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		ni_stringbuf_putc(res, cc);
		xr->state = Initial;
		return RightAngleQ;

	case '>':
		xr->state = Initial;
		return RightAngle;

	case '/':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		ni_stringbuf_putc(res, cc);
		xr->state = Initial;
		return RightAngleSlash;

	case '=':
		return Equals;

	case 'a' ... 'z':
	case 'A' ... 'Z':
	case '_':
	case '!':
		while ((cc = xml_getc(xr)) != EOF) {
			if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') {
				xml_ungetc(xr, cc);
				break;
			}
			ni_stringbuf_putc(res, cc);
		}
		return Identifier;

	case '\'':
	case '"':
		ni_stringbuf_clear(res);
		oc = cc;
		while (1) {
			cc = xml_getc(xr);
			if (cc == EOF) {
				xml_parse_error(xr, "Unexpected EOF while parsing quoted string");
				return None;
			}
			if (cc == oc)
				break;
			ni_stringbuf_putc(res, cc);
		}
		return QuotedString;

	default:
		break;
	}

error:
	xml_parse_error(xr, "Unexpected character %c in XML document", cc);
	return None;
}
Пример #7
0
/*
 * While in state Initial, obtain the next token
 */
xml_token_type_t
xml_get_token_initial(xml_reader_t *xr, ni_stringbuf_t *res)
{
	xml_token_type_t token;
	int cc;

restart:
	/* Eat initial white space and store it in @res */
	xml_skip_space(xr, res);

	cc = xml_getc(xr);
	if (cc == EOF) {
		ni_stringbuf_clear(res);
		return EndOfDocument;
	}

	if (cc == '<') {
		/* Discard the white space in @res - we're not interested in that. */
		ni_stringbuf_clear(res);

		ni_stringbuf_putc(res, cc);

		if (xr->state != Initial) {
			xml_parse_error(xr, "Unexpected < in XML stream (state %s)",
					xml_parser_state_name(xr->state));
			return None;
		}

		/* tag is legal here */
		xr->state = Tag;

		cc = xml_getc(xr);
		switch (cc) {
		case '/':
			ni_stringbuf_putc(res, cc);
			return LeftAngleSlash;
		case '?':
			ni_stringbuf_putc(res, cc);
			return LeftAngleQ;
		case '!':
			ni_stringbuf_putc(res, cc);

			/* If it's <!IDENTIFIER, return LeftAngleExclam */
			cc = xml_getc(xr);
			if (cc != '-') {
				xml_ungetc(xr, cc);
				return LeftAngleExclam;
			}

			token = xml_skip_comment(xr);
			if (token == Comment) {
				xr->state = Initial;
				ni_stringbuf_clear(res);
				goto restart;
			}
			return token;
		default:
			xml_ungetc(xr, cc);
			break;
		}
		return LeftAngle;
	}

	// Looks like CDATA. 
	// Ignore initial newline, then scan to next <
	do {
		if (cc == '<') {
			/* Looks like we're done.
			 * FIXME: handle comments within CDATA?
			 */
			xml_ungetc(xr, cc);
			break;
		} else
		if (cc == '&') {
			if (!xml_expand_entity(xr, res))
				return None;
		} else {
			ni_stringbuf_putc(res, cc);
		}

		cc = xml_getc(xr);
	} while (cc != EOF);

	ni_stringbuf_trim_empty_lines(res);

	return CData;
}