Пример #1
0
/*
 * Get the next token from the XML stream
 */
xml_token_type_t
xml_get_token(xml_reader_t *xr, ni_stringbuf_t *res)
{
#ifdef XMLDEBUG_PARSER
	xml_parser_state_t old_state = xr->state;
#endif
	xml_token_type_t token;

	ni_stringbuf_clear(res);
	switch (xr->state) {
	default:
		xml_parse_error(xr, "Unexpected state %u in XML reader", xr->state);
		return None;

	case Error:
		return None;

	case Initial:
		token = xml_get_token_initial(xr, res);
		break;

	case Tag:
		token = xml_get_token_tag(xr, res);
		break;
	}

	xml_debug("++ %3u %-7s %-10s (%s)\n",
			xr->lineCount,
			xml_parser_state_name(old_state),
			xml_token_name(token),
			res->string?: "");
	return token;
}
Пример #2
0
void
ni_stringbuf_move(ni_stringbuf_t *dest, ni_stringbuf_t *src)
{
	ni_assert(dest->dynamic == src->dynamic);
	ni_stringbuf_clear(dest);
	*dest = *src;

	src->string = NULL;
	src->len = 0;
}
Пример #3
0
xml_token_type_t
xml_get_token_tag(xml_reader_t *xr, ni_stringbuf_t *res)
{
	int cc, oc;

	xml_skip_space(xr, NULL);

	cc = xml_getc(xr);
	if (cc == EOF) {
		xml_parse_error(xr, "Unexpected EOF while parsing tag");
		return None;
	}

	ni_stringbuf_putc(res, cc);

	switch (cc) {
	case '<':
		goto error;

	case '?':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		ni_stringbuf_putc(res, cc);
		xr->state = Initial;
		return RightAngleQ;

	case '>':
		xr->state = Initial;
		return RightAngle;

	case '/':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		ni_stringbuf_putc(res, cc);
		xr->state = Initial;
		return RightAngleSlash;

	case '=':
		return Equals;

	case 'a' ... 'z':
	case 'A' ... 'Z':
	case '_':
	case '!':
		while ((cc = xml_getc(xr)) != EOF) {
			if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') {
				xml_ungetc(xr, cc);
				break;
			}
			ni_stringbuf_putc(res, cc);
		}
		return Identifier;

	case '\'':
	case '"':
		ni_stringbuf_clear(res);
		oc = cc;
		while (1) {
			cc = xml_getc(xr);
			if (cc == EOF) {
				xml_parse_error(xr, "Unexpected EOF while parsing quoted string");
				return None;
			}
			if (cc == oc)
				break;
			ni_stringbuf_putc(res, cc);
		}
		return QuotedString;

	default:
		break;
	}

error:
	xml_parse_error(xr, "Unexpected character %c in XML document", cc);
	return None;
}
Пример #4
0
/*
 * While in state Initial, obtain the next token
 */
xml_token_type_t
xml_get_token_initial(xml_reader_t *xr, ni_stringbuf_t *res)
{
	xml_token_type_t token;
	int cc;

restart:
	/* Eat initial white space and store it in @res */
	xml_skip_space(xr, res);

	cc = xml_getc(xr);
	if (cc == EOF) {
		ni_stringbuf_clear(res);
		return EndOfDocument;
	}

	if (cc == '<') {
		/* Discard the white space in @res - we're not interested in that. */
		ni_stringbuf_clear(res);

		ni_stringbuf_putc(res, cc);

		if (xr->state != Initial) {
			xml_parse_error(xr, "Unexpected < in XML stream (state %s)",
					xml_parser_state_name(xr->state));
			return None;
		}

		/* tag is legal here */
		xr->state = Tag;

		cc = xml_getc(xr);
		switch (cc) {
		case '/':
			ni_stringbuf_putc(res, cc);
			return LeftAngleSlash;
		case '?':
			ni_stringbuf_putc(res, cc);
			return LeftAngleQ;
		case '!':
			ni_stringbuf_putc(res, cc);

			/* If it's <!IDENTIFIER, return LeftAngleExclam */
			cc = xml_getc(xr);
			if (cc != '-') {
				xml_ungetc(xr, cc);
				return LeftAngleExclam;
			}

			token = xml_skip_comment(xr);
			if (token == Comment) {
				xr->state = Initial;
				ni_stringbuf_clear(res);
				goto restart;
			}
			return token;
		default:
			xml_ungetc(xr, cc);
			break;
		}
		return LeftAngle;
	}

	// Looks like CDATA. 
	// Ignore initial newline, then scan to next <
	do {
		if (cc == '<') {
			/* Looks like we're done.
			 * FIXME: handle comments within CDATA?
			 */
			xml_ungetc(xr, cc);
			break;
		} else
		if (cc == '&') {
			if (!xml_expand_entity(xr, res))
				return None;
		} else {
			ni_stringbuf_putc(res, cc);
		}

		cc = xml_getc(xr);
	} while (cc != EOF);

	ni_stringbuf_trim_empty_lines(res);

	return CData;
}
Пример #5
0
void
ni_stringbuf_destroy(ni_stringbuf_t *sb)
{
	ni_stringbuf_clear(sb);
}