示例#1
0
ni_bool_t
xml_process_element_nested(xml_reader_t *xr, xml_node_t *cur, unsigned int nesting)
{
	ni_stringbuf_t tokenValue, identifier;
	xml_token_type_t token;
	xml_node_t *child;

	ni_stringbuf_init(&tokenValue);
	ni_stringbuf_init(&identifier);

	while (1) {
		token = xml_get_token(xr, &tokenValue);

		switch (token) {
		case CData:
			/* process element content */
			xml_node_set_cdata(cur, tokenValue.string);
			break;

		case LeftAngleExclam:
			/* Most likely <!DOCTYPE ...> */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open <! not followed by identifier");
				goto error;
			}

			if (strcmp(identifier.string, "DOCTYPE")) {
				xml_parse_error(xr, "Unexpected element: <!%s ...> not supported", identifier.string);
				goto error;
			}

			while (1) {
				token = xml_get_token(xr, &identifier);
				if (token == RightAngle)
					break;
				if (token == Identifier && !xr->doctype)
					ni_string_dup(&xr->doctype, identifier.string);
				if (token != Identifier && token != QuotedString) {
					xml_parse_error(xr, "Error parsing <!DOCTYPE ...> attributes");
					goto error;
				}
			}
			break;

		case LeftAngle:
			/* New element start */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open < not followed by identifier");
				goto error;
			}

			child = xml_node_new(identifier.string, cur);
			if (xr->shared_location)
				child->location = xml_location_new(xr->shared_location, xr->lineCount);

			token = xml_get_tag_attributes(xr, child);
			if (token == None) {
				xml_parse_error(xr, "Error parsing <%s ...> tag attributes", child->name);
				goto error;
			} else
			if (token == RightAngle) {
				/* Handle <foo>...</foo> */
				xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name);
				if (!xml_process_element_nested(xr, child, nesting + 2))
					goto error;
			} else if (token == RightAngleSlash) {
				/* We parsed a "<foo/>" element - nothing left to do, we're done */
				xml_debug("%*.*s<%s/>\n", nesting, nesting, "", child->name);
			} else {
				xml_parse_error(xr, "Unexpected token %s at end of <%s ...",
						xml_token_name(token), child->name);
				goto error;
			}

			break;

		case LeftAngleSlash:
			/* Element end */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: end tag open </ not followed by identifier");
				goto error;
			}

			if (xml_get_token(xr, &tokenValue) != RightAngle) {
				xml_parse_error(xr, "Bad element: </%s - missing tag close", identifier.string);
				goto error;
			}

			if (cur->parent == NULL) {
				xml_parse_error(xr, "Unexpected </%s> tag", identifier.string);
				goto error;
			}
			if (strcmp(cur->name, identifier.string)) {
				xml_parse_error(xr, "Closing tag </%s> does not match <%s>",
						identifier.string, cur->name);
				goto error;
			}

			xml_debug("%*.*s</%s>\n", nesting, nesting, "", cur->name);
			goto success;

		case LeftAngleQ:
			/* New PI node starts here */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open <? not followed by identifier");
				goto error;
			}

			child = xml_node_new(identifier.string, NULL);
			if (xr->shared_location)
				child->location = xml_location_new(xr->shared_location, xr->lineCount);

			token = xml_get_tag_attributes(xr, child);
			if (token == None) {
				xml_parse_error(xr, "Error parsing <?%s ...?> tag attributes", child->name);
				xml_node_free(child);
				goto error;
			} else
			if (token == RightAngleQ) {
				xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name);
				xml_process_pi_node(xr, child);
				xml_node_free(child);
			} else {
				xml_parse_error(xr, "Unexpected token %s at end of <?%s ...",
						xml_token_name(token), child->name);
				xml_node_free(child);
				goto error;
			}

			break;

		case EndOfDocument:
			if (cur->parent) {
				xml_parse_error(xr, "End of document while processing element <%s>", cur->name);
				goto error;
			}
			goto success;

		case None:
			/* parser error */
			goto error;

		default:
			xml_parse_error(xr, "Unexpected token %s", xml_token_name(token));
			goto error;
		}
	}

success:
	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&identifier);
	return TRUE;

error:
	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&identifier);
	return FALSE;
}
示例#2
0
/*
 * While in state Initial, obtain the next token
 */
xml_token_type_t
xml_get_token_initial(xml_reader_t *xr, string_t *res)
{
	xml_token_type_t token;
	int cc;

restart:
	/* Eat initial white space and store it in @res */
	xml_skip_space(xr, res);

	cc = xml_getc(xr);
	if (cc == EOF) {
		string_destroy(res);
		return EndOfDocument;
	}

	if (cc == '<') {
		/* Discard the white space in @res - we're not interested in that. */
		string_destroy(res);

		string_putc(res, cc);

		if (xr->state != Initial) {
			xml_parse_error(xr, "Unexpected < in XML stream (state %s)",
					xml_parser_state_name(xr->state));
			return None;
		}

		/* tag is legal here */
		xr->state = Tag;

		cc = xml_getc(xr);
		switch (cc) {
		case '/':
			string_putc(res, cc);
			return LeftAngleSlash;
		case '?':
			string_putc(res, cc);
			return LeftAngleQ;
		case '!':
			string_putc(res, cc);

			/* If it's <!IDENTIFIER, return LeftAngleExclam */
			cc = xml_getc(xr);
			if (cc == '[') {
				/* Looks like CDATA */
				if (!xml_get_identifier(xr, res) || strcmp("CDATA", res->string)) {
					xml_parse_error(xr, "Unexpected <[%s in XML stream", res->string);
					return None;
				}

				string_destroy(res);
				return xml_process_cdata(xr, res);
			}
			if (cc != '-') {
				xml_ungetc(xr, cc);
				return LeftAngleExclam;
			}

			token = xml_skip_comment(xr);
			if (token == Comment) {
				xr->state = Initial;
				string_destroy(res);
				goto restart;
			}
			return token;
		default:
			xml_ungetc(xr, cc);
			break;
		}
		return LeftAngle;
	}

	// Looks like CDATA. 
	// Ignore initial newline, then scan to next <
	do {
		if (cc == '<') {
			/* Looks like we're done.
			 * FIXME: handle comments within CDATA?
			 */
			xml_ungetc(xr, cc);
			break;
		} else
		if (cc == '&') {
			if (!xml_expand_entity(xr, res))
				return None;
		} else {
			string_putc(res, cc);
		}

		cc = xml_getc(xr);
	} while (cc != EOF);

	string_trim_empty_lines(res);

	return CData;
}