Example #1
0
/*
 * Process command. When we get here, we've processed "<!-"
 */
xml_token_type_t
xml_skip_comment(xml_reader_t *xr)
{
	int match = 0, cc;

	if (xml_getc(xr) != '-') {
		xml_parse_error(xr, "Unexpected <!-...> element");
		return None;
	}

	while ((cc = xml_getc(xr)) != EOF) {
		if (cc == '-') {
			match++;
		} else {
			if (cc == '>' && match >= 2) {
#ifdef XMLDEBUG_PARSER
				xml_debug("Processed comment\n");
#endif
				return Comment;
			}
			match = 0;
		}
	}

	xml_parse_error(xr, "Unexpected end of file while parsing comment");
	return None;
}
Example #2
0
/*
 * Expand an XML entity.
 * For now, we support &<number>; as well as symbolic entities
 *   lt gt amp
 */
bool
xml_expand_entity(xml_reader_t *xr, string_t *res)
{
	char entity[128];
	unsigned int elen = 0;
	int cc, expanded;

	while ((cc = xml_getc(xr)) != ';') {
		if (cc == EOF) {
			xml_parse_error(xr, "Unexpenced EOF in entity");
			return false;
		}
		if (isspace(cc))
			continue;
		if (elen + 1 >= sizeof(entity)) {
			xml_parse_error(xr, "Entity string too long");
			return false;
		}
		entity[elen++] = cc;
	}
	entity[elen] = '\0';

	if (elen == 0) {
		xml_parse_error(xr, "Empty entity &;");
		return false;
	}

	if (!strcasecmp(entity, "lt"))
		expanded = '<';
	else if (!strcasecmp(entity, "gt"))
		expanded = '>';
	else if (!strcasecmp(entity, "amp"))
		expanded = '&';
	else {
		const char *es = entity;

		if (*es == '#') {
			expanded = strtoul(es + 1, (char **) &es, 0);
			if (*es == '\0')
				goto good;
		}

		xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity);
		return false;
	}

good:
	string_putc(res, expanded);
	return true;
}
Example #3
0
/*
 * Expand an XML entity.
 * For now, we support &<number>; as well as symbolic entities
 *   lt gt amp
 */
ni_bool_t
xml_expand_entity(xml_reader_t *xr, ni_stringbuf_t *res)
{
	char temp[128];
	ni_stringbuf_t entity = NI_STRINGBUF_INIT_BUFFER(temp);
	int cc, expanded;

	while ((cc = xml_getc(xr)) != ';') {
		if (cc == EOF) {
			xml_parse_error(xr, "Unexpenced EOF in entity");
			return FALSE;
		}
		if (isspace(cc))
			continue;
		if (entity.len + sizeof(char) >= entity.size) {
			xml_parse_error(xr, "Entity is too long");
			return FALSE;
		}
		ni_stringbuf_putc(&entity, cc);
	}

	if (ni_string_empty(entity.string)) {
		xml_parse_error(xr, "Empty entity &;");
		return FALSE;
	}

	if (!strcasecmp(entity.string, "lt"))
		expanded = '<';
	else if (!strcasecmp(entity.string, "gt"))
		expanded = '>';
	else if (!strcasecmp(entity.string, "amp"))
		expanded = '&';
	else {
		const char *es = entity.string;

		if (*es == '#') {
			expanded = strtoul(es + 1, (char **) &es, 0);
			if (*es == '\0')
				goto good;
		}

		xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity.string);
		return FALSE;
	}

good:
	ni_stringbuf_putc(res, expanded);
	return TRUE;
}
Example #4
0
/*
 * Get the next token from the XML stream
 */
xml_token_type_t
xml_get_token(xml_reader_t *xr, ni_stringbuf_t *res)
{
#ifdef XMLDEBUG_PARSER
	xml_parser_state_t old_state = xr->state;
#endif
	xml_token_type_t token;

	ni_stringbuf_clear(res);
	switch (xr->state) {
	default:
		xml_parse_error(xr, "Unexpected state %u in XML reader", xr->state);
		return None;

	case Error:
		return None;

	case Initial:
		token = xml_get_token_initial(xr, res);
		break;

	case Tag:
		token = xml_get_token_tag(xr, res);
		break;
	}

	xml_debug("++ %3u %-7s %-10s (%s)\n",
			xr->lineCount,
			xml_parser_state_name(old_state),
			xml_token_name(token),
			res->string?: "");
	return token;
}
Example #5
0
xml_token_type_t
xml_get_tag_attributes(xml_reader_t *xr, xml_node_t *node)
{
	ni_stringbuf_t tokenValue, attrName, attrValue;
	xml_token_type_t token;

	ni_stringbuf_init(&tokenValue);
	ni_stringbuf_init(&attrName);
	ni_stringbuf_init(&attrValue);

	token = xml_get_token(xr, &tokenValue);
	while (1) {
		if (token == RightAngle || token == RightAngleQ || token == RightAngleSlash)
			break;

		if (token != Identifier) {
			xml_parse_error(xr, "Unexpected token in tag attributes");
			token = None;
			break;
		}

		ni_stringbuf_move(&attrName, &tokenValue);

		token = xml_get_token(xr, &tokenValue);
		if (token != Equals) {
			xml_node_add_attr(node, attrName.string, NULL);
			continue;
		}

		token = xml_get_token(xr, &tokenValue);
		if (token != QuotedString) {
			xml_parse_error(xr, "Attribute value not a quoted string!");
			token = None;
			break;
		}

		xml_debug("  attr %s=%s\n", attrName.string, tokenValue.string);
		xml_node_add_attr(node, attrName.string, tokenValue.string);

		token = xml_get_token(xr, &tokenValue);
	}

	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&attrName);
	ni_stringbuf_destroy(&attrValue);
	return token;
}
Example #6
0
/*
 * Process CDATA. When we get here, we've processed "<[CDATA"
 */
xml_token_type_t
xml_process_cdata(xml_reader_t *xr, string_t *res)
{
	int cc, state = 0;

	cc = xml_getc(xr);
	if (cc == EOF)
		goto unexpected_eof;
	if (cc != '[') {
		xml_parse_error(xr, "Unexpected '%c' after <[CDATA in XML stream", cc);
		return None;
	}

	while (state != 3) {
		cc = xml_getc(xr);
		if (cc == EOF)
			goto unexpected_eof;

		if (cc == ']') {
			if (state == 2) {
				string_putc(res, ']');
			} else {
				++state;
			}
		} else
		if (cc == '>' && state == 2) {
			++state;
		} else {
			while (state) {
				string_putc(res, ']');
				state--;
			}
			string_putc(res, cc);
		}
	}

	xr->state = Initial;
	return CData;

unexpected_eof:
	xml_parse_error(xr, "Unexpected EOF after <[CDATA in XML stream");
	return None;
}
Example #7
0
xml_token_type_t
xml_get_token_tag(xml_reader_t *xr, ni_stringbuf_t *res)
{
	int cc, oc;

	xml_skip_space(xr, NULL);

	cc = xml_getc(xr);
	if (cc == EOF) {
		xml_parse_error(xr, "Unexpected EOF while parsing tag");
		return None;
	}

	ni_stringbuf_putc(res, cc);

	switch (cc) {
	case '<':
		goto error;

	case '?':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		ni_stringbuf_putc(res, cc);
		xr->state = Initial;
		return RightAngleQ;

	case '>':
		xr->state = Initial;
		return RightAngle;

	case '/':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		ni_stringbuf_putc(res, cc);
		xr->state = Initial;
		return RightAngleSlash;

	case '=':
		return Equals;

	case 'a' ... 'z':
	case 'A' ... 'Z':
	case '_':
	case '!':
		while ((cc = xml_getc(xr)) != EOF) {
			if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') {
				xml_ungetc(xr, cc);
				break;
			}
			ni_stringbuf_putc(res, cc);
		}
		return Identifier;

	case '\'':
	case '"':
		ni_stringbuf_clear(res);
		oc = cc;
		while (1) {
			cc = xml_getc(xr);
			if (cc == EOF) {
				xml_parse_error(xr, "Unexpected EOF while parsing quoted string");
				return None;
			}
			if (cc == oc)
				break;
			ni_stringbuf_putc(res, cc);
		}
		return QuotedString;

	default:
		break;
	}

error:
	xml_parse_error(xr, "Unexpected character %c in XML document", cc);
	return None;
}
Example #8
0
/*
 * While in state Initial, obtain the next token
 */
xml_token_type_t
xml_get_token_initial(xml_reader_t *xr, ni_stringbuf_t *res)
{
	xml_token_type_t token;
	int cc;

restart:
	/* Eat initial white space and store it in @res */
	xml_skip_space(xr, res);

	cc = xml_getc(xr);
	if (cc == EOF) {
		ni_stringbuf_clear(res);
		return EndOfDocument;
	}

	if (cc == '<') {
		/* Discard the white space in @res - we're not interested in that. */
		ni_stringbuf_clear(res);

		ni_stringbuf_putc(res, cc);

		if (xr->state != Initial) {
			xml_parse_error(xr, "Unexpected < in XML stream (state %s)",
					xml_parser_state_name(xr->state));
			return None;
		}

		/* tag is legal here */
		xr->state = Tag;

		cc = xml_getc(xr);
		switch (cc) {
		case '/':
			ni_stringbuf_putc(res, cc);
			return LeftAngleSlash;
		case '?':
			ni_stringbuf_putc(res, cc);
			return LeftAngleQ;
		case '!':
			ni_stringbuf_putc(res, cc);

			/* If it's <!IDENTIFIER, return LeftAngleExclam */
			cc = xml_getc(xr);
			if (cc != '-') {
				xml_ungetc(xr, cc);
				return LeftAngleExclam;
			}

			token = xml_skip_comment(xr);
			if (token == Comment) {
				xr->state = Initial;
				ni_stringbuf_clear(res);
				goto restart;
			}
			return token;
		default:
			xml_ungetc(xr, cc);
			break;
		}
		return LeftAngle;
	}

	// Looks like CDATA. 
	// Ignore initial newline, then scan to next <
	do {
		if (cc == '<') {
			/* Looks like we're done.
			 * FIXME: handle comments within CDATA?
			 */
			xml_ungetc(xr, cc);
			break;
		} else
		if (cc == '&') {
			if (!xml_expand_entity(xr, res))
				return None;
		} else {
			ni_stringbuf_putc(res, cc);
		}

		cc = xml_getc(xr);
	} while (cc != EOF);

	ni_stringbuf_trim_empty_lines(res);

	return CData;
}
Example #9
0
ni_bool_t
xml_process_element_nested(xml_reader_t *xr, xml_node_t *cur, unsigned int nesting)
{
	ni_stringbuf_t tokenValue, identifier;
	xml_token_type_t token;
	xml_node_t *child;

	ni_stringbuf_init(&tokenValue);
	ni_stringbuf_init(&identifier);

	while (1) {
		token = xml_get_token(xr, &tokenValue);

		switch (token) {
		case CData:
			/* process element content */
			xml_node_set_cdata(cur, tokenValue.string);
			break;

		case LeftAngleExclam:
			/* Most likely <!DOCTYPE ...> */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open <! not followed by identifier");
				goto error;
			}

			if (strcmp(identifier.string, "DOCTYPE")) {
				xml_parse_error(xr, "Unexpected element: <!%s ...> not supported", identifier.string);
				goto error;
			}

			while (1) {
				token = xml_get_token(xr, &identifier);
				if (token == RightAngle)
					break;
				if (token == Identifier && !xr->doctype)
					ni_string_dup(&xr->doctype, identifier.string);
				if (token != Identifier && token != QuotedString) {
					xml_parse_error(xr, "Error parsing <!DOCTYPE ...> attributes");
					goto error;
				}
			}
			break;

		case LeftAngle:
			/* New element start */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open < not followed by identifier");
				goto error;
			}

			child = xml_node_new(identifier.string, cur);
			if (xr->shared_location)
				child->location = xml_location_new(xr->shared_location, xr->lineCount);

			token = xml_get_tag_attributes(xr, child);
			if (token == None) {
				xml_parse_error(xr, "Error parsing <%s ...> tag attributes", child->name);
				goto error;
			} else
			if (token == RightAngle) {
				/* Handle <foo>...</foo> */
				xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name);
				if (!xml_process_element_nested(xr, child, nesting + 2))
					goto error;
			} else if (token == RightAngleSlash) {
				/* We parsed a "<foo/>" element - nothing left to do, we're done */
				xml_debug("%*.*s<%s/>\n", nesting, nesting, "", child->name);
			} else {
				xml_parse_error(xr, "Unexpected token %s at end of <%s ...",
						xml_token_name(token), child->name);
				goto error;
			}

			break;

		case LeftAngleSlash:
			/* Element end */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: end tag open </ not followed by identifier");
				goto error;
			}

			if (xml_get_token(xr, &tokenValue) != RightAngle) {
				xml_parse_error(xr, "Bad element: </%s - missing tag close", identifier.string);
				goto error;
			}

			if (cur->parent == NULL) {
				xml_parse_error(xr, "Unexpected </%s> tag", identifier.string);
				goto error;
			}
			if (strcmp(cur->name, identifier.string)) {
				xml_parse_error(xr, "Closing tag </%s> does not match <%s>",
						identifier.string, cur->name);
				goto error;
			}

			xml_debug("%*.*s</%s>\n", nesting, nesting, "", cur->name);
			goto success;

		case LeftAngleQ:
			/* New PI node starts here */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open <? not followed by identifier");
				goto error;
			}

			child = xml_node_new(identifier.string, NULL);
			if (xr->shared_location)
				child->location = xml_location_new(xr->shared_location, xr->lineCount);

			token = xml_get_tag_attributes(xr, child);
			if (token == None) {
				xml_parse_error(xr, "Error parsing <?%s ...?> tag attributes", child->name);
				xml_node_free(child);
				goto error;
			} else
			if (token == RightAngleQ) {
				xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name);
				xml_process_pi_node(xr, child);
				xml_node_free(child);
			} else {
				xml_parse_error(xr, "Unexpected token %s at end of <?%s ...",
						xml_token_name(token), child->name);
				xml_node_free(child);
				goto error;
			}

			break;

		case EndOfDocument:
			if (cur->parent) {
				xml_parse_error(xr, "End of document while processing element <%s>", cur->name);
				goto error;
			}
			goto success;

		case None:
			/* parser error */
			goto error;

		default:
			xml_parse_error(xr, "Unexpected token %s", xml_token_name(token));
			goto error;
		}
	}

success:
	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&identifier);
	return TRUE;

error:
	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&identifier);
	return FALSE;
}