Example #1
0
/*
 * Process CDATA. When we get here, we've processed "<[CDATA"
 */
xml_token_type_t
xml_process_cdata(xml_reader_t *xr, string_t *res)
{
	int cc, state = 0;

	cc = xml_getc(xr);
	if (cc == EOF)
		goto unexpected_eof;
	if (cc != '[') {
		xml_parse_error(xr, "Unexpected '%c' after <[CDATA in XML stream", cc);
		return None;
	}

	while (state != 3) {
		cc = xml_getc(xr);
		if (cc == EOF)
			goto unexpected_eof;

		if (cc == ']') {
			if (state == 2) {
				string_putc(res, ']');
			} else {
				++state;
			}
		} else
		if (cc == '>' && state == 2) {
			++state;
		} else {
			while (state) {
				string_putc(res, ']');
				state--;
			}
			string_putc(res, cc);
		}
	}

	xr->state = Initial;
	return CData;

unexpected_eof:
	xml_parse_error(xr, "Unexpected EOF after <[CDATA in XML stream");
	return None;
}
Example #2
0
/*
 * Expand an XML entity.
 * For now, we support &<number>; as well as symbolic entities
 *   lt gt amp
 */
bool
xml_expand_entity(xml_reader_t *xr, string_t *res)
{
	char entity[128];
	unsigned int elen = 0;
	int cc, expanded;

	while ((cc = xml_getc(xr)) != ';') {
		if (cc == EOF) {
			xml_parse_error(xr, "Unexpenced EOF in entity");
			return false;
		}
		if (isspace(cc))
			continue;
		if (elen + 1 >= sizeof(entity)) {
			xml_parse_error(xr, "Entity string too long");
			return false;
		}
		entity[elen++] = cc;
	}
	entity[elen] = '\0';

	if (elen == 0) {
		xml_parse_error(xr, "Empty entity &;");
		return false;
	}

	if (!strcasecmp(entity, "lt"))
		expanded = '<';
	else if (!strcasecmp(entity, "gt"))
		expanded = '>';
	else if (!strcasecmp(entity, "amp"))
		expanded = '&';
	else {
		const char *es = entity;

		if (*es == '#') {
			expanded = strtoul(es + 1, (char **) &es, 0);
			if (*es == '\0')
				goto good;
		}

		xml_parse_error(xr, "Cannot expand unknown entity &%s;", entity);
		return false;
	}

good:
	string_putc(res, expanded);
	return true;
}
Example #3
0
/*
 * Skip any space in the input stream, and copy if to @result
 */
void
xml_skip_space(xml_reader_t *xr, string_t *result)
{
	int cc;

	while ((cc = xml_getc(xr)) != EOF) {
		if (!isspace(cc)) {
			xml_ungetc(xr, cc);
			break;
		}

		if (result)
			string_putc(result, cc);
	}
}
Example #4
0
static int
handle_header(struct string* s, int* httpres)
{
    char* b;
    size_t l, i;
    char* p, * p2;

    b = string_get(s);
    l = string_length(s);
    if (l < 4) return HTTP_ENETERR;
    for (i = 0; i < l; i++) if (b[i] == 0) return HTTP_ENETERR;
    if (!string_putc(s, 0)) return HTTP_ENOMEM;
    if (memcmp(b, "HTTP", 4)) return HTTP_ENETERR;
    if ((p = strchr(b, ' ')) == NULL) return HTTP_ENETERR;
    if ((p2 = strchr(p + 1, ' ')) == NULL) return HTTP_ENETERR;
    *p2 = 0;
    *httpres = atoi(p);
    *p2 = 32;
    return HTTP_EOK;
}
Example #5
0
xml_token_type_t
xml_get_token_tag(xml_reader_t *xr, string_t *res)
{
	int cc, oc;

	xml_skip_space(xr, NULL);

	cc = xml_getc(xr);
	if (cc == EOF) {
		xml_parse_error(xr, "Unexpected EOF while parsing tag");
		return None;
	}

	string_putc(res, cc);

	switch (cc) {
	case '<':
		goto error;

	case '?':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		string_putc(res, cc);
		xr->state = Initial;
		return RightAngleQ;

	case '>':
		xr->state = Initial;
		return RightAngle;

	case '/':
		if ((cc = xml_getc(xr)) != '>')
			goto error;
		string_putc(res, cc);
		xr->state = Initial;
		return RightAngleSlash;

	case '=':
		return Equals;

	case 'a' ... 'z':
	case 'A' ... 'Z':
	case '_':
	case '!':
		while ((cc = xml_getc(xr)) != EOF) {
			if (!isalnum(cc) && cc != '_' && cc != '!' && cc != ':' && cc != '-') {
				xml_ungetc(xr, cc);
				break;
			}
			string_putc(res, cc);
		}
		return Identifier;

	case '\'':
	case '"':
		string_destroy(res);
		oc = cc;
		while (1) {
			cc = xml_getc(xr);
			if (cc == EOF)
				goto unexpected_eof;
			if (cc == '\\' && oc == '"') {
				cc = xml_getc(xr);
				if (cc == EOF)
					goto unexpected_eof;
			} else
			if (cc == oc)
				break;
			string_putc(res, cc);
		}
		return QuotedString;

	default:
		break;
	}

error:
	xml_parse_error(xr, "Unexpected character %c in XML document", cc);
	return None;

unexpected_eof:
	xml_parse_error(xr, "Unexpected EOF while parsing quoted string");
	return None;
}
Example #6
0
/*
 * While in state Initial, obtain the next token
 */
xml_token_type_t
xml_get_token_initial(xml_reader_t *xr, string_t *res)
{
	xml_token_type_t token;
	int cc;

restart:
	/* Eat initial white space and store it in @res */
	xml_skip_space(xr, res);

	cc = xml_getc(xr);
	if (cc == EOF) {
		string_destroy(res);
		return EndOfDocument;
	}

	if (cc == '<') {
		/* Discard the white space in @res - we're not interested in that. */
		string_destroy(res);

		string_putc(res, cc);

		if (xr->state != Initial) {
			xml_parse_error(xr, "Unexpected < in XML stream (state %s)",
					xml_parser_state_name(xr->state));
			return None;
		}

		/* tag is legal here */
		xr->state = Tag;

		cc = xml_getc(xr);
		switch (cc) {
		case '/':
			string_putc(res, cc);
			return LeftAngleSlash;
		case '?':
			string_putc(res, cc);
			return LeftAngleQ;
		case '!':
			string_putc(res, cc);

			/* If it's <!IDENTIFIER, return LeftAngleExclam */
			cc = xml_getc(xr);
			if (cc == '[') {
				/* Looks like CDATA */
				if (!xml_get_identifier(xr, res) || strcmp("CDATA", res->string)) {
					xml_parse_error(xr, "Unexpected <[%s in XML stream", res->string);
					return None;
				}

				string_destroy(res);
				return xml_process_cdata(xr, res);
			}
			if (cc != '-') {
				xml_ungetc(xr, cc);
				return LeftAngleExclam;
			}

			token = xml_skip_comment(xr);
			if (token == Comment) {
				xr->state = Initial;
				string_destroy(res);
				goto restart;
			}
			return token;
		default:
			xml_ungetc(xr, cc);
			break;
		}
		return LeftAngle;
	}

	// Looks like CDATA. 
	// Ignore initial newline, then scan to next <
	do {
		if (cc == '<') {
			/* Looks like we're done.
			 * FIXME: handle comments within CDATA?
			 */
			xml_ungetc(xr, cc);
			break;
		} else
		if (cc == '&') {
			if (!xml_expand_entity(xr, res))
				return None;
		} else {
			string_putc(res, cc);
		}

		cc = xml_getc(xr);
	} while (cc != EOF);

	string_trim_empty_lines(res);

	return CData;
}