Exemplo n.º 1
0
xml_token_type_t
xml_get_tag_attributes(xml_reader_t *xr, xml_node_t *node)
{
	ni_stringbuf_t tokenValue, attrName, attrValue;
	xml_token_type_t token;

	ni_stringbuf_init(&tokenValue);
	ni_stringbuf_init(&attrName);
	ni_stringbuf_init(&attrValue);

	token = xml_get_token(xr, &tokenValue);
	while (1) {
		if (token == RightAngle || token == RightAngleQ || token == RightAngleSlash)
			break;

		if (token != Identifier) {
			xml_parse_error(xr, "Unexpected token in tag attributes");
			token = None;
			break;
		}

		ni_stringbuf_move(&attrName, &tokenValue);

		token = xml_get_token(xr, &tokenValue);
		if (token != Equals) {
			xml_node_add_attr(node, attrName.string, NULL);
			continue;
		}

		token = xml_get_token(xr, &tokenValue);
		if (token != QuotedString) {
			xml_parse_error(xr, "Attribute value not a quoted string!");
			token = None;
			break;
		}

		xml_debug("  attr %s=%s\n", attrName.string, tokenValue.string);
		xml_node_add_attr(node, attrName.string, tokenValue.string);

		token = xml_get_token(xr, &tokenValue);
	}

	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&attrName);
	ni_stringbuf_destroy(&attrValue);
	return token;
}
Exemplo n.º 2
0
int xml_get_attr(XML_PARSER* parser, XML_TOKEN* attr, XML_TOKEN* value)
{
	xml_get_token(parser, attr);
	if(attr->type != XML_TOKEN_ATTR_NAME) {
		parser->pushed_back_token = *attr;
		parser->has_token = 1;
		return 0;
	}

	xml_get_token(parser, value);
	if(value->type != XML_TOKEN_ATTR_VALUE) {
		parser->pushed_back_token = *attr;
		parser->has_token = 1;
		return 0;
	}

	return 1;
}
Exemplo n.º 3
0
void xml_skip_attributes(XML_PARSER* parser)
{
	XML_TOKEN token;
	while(xml_get_token(parser, &token) &&
	      (token.type == XML_TOKEN_ATTR_NAME ||
		   token.type == XML_TOKEN_ATTR_VALUE)) {
	}

	parser->pushed_back_token = token;
	parser->has_token = 1;
}
Exemplo n.º 4
0
int xml_get_child(XML_PARSER* parser, XML_TOKEN* child)
{
	while(xml_get_token(parser, child)) {
		if(child->type == XML_TOKEN_END_ELEMENT) {
			return 0;
		} else if(child->type == XML_TOKEN_BEGIN_ELEMENT) {
			return 1;
		}
	}

	return 0;
}
Exemplo n.º 5
0
int xml_get_text_and_skip(XML_PARSER* parser, XML_TOKEN* text)
{
	xml_skip_attributes(parser);
	while(xml_get_token(parser, text)) {
		if(text->type == XML_TOKEN_TEXT) {
			xml_skip_element(parser);
			return 1;
		} else if(text->type == XML_TOKEN_BEGIN_ELEMENT) {
			xml_skip_element(parser);
		} else {
			return 0;
		}
	}
}
Exemplo n.º 6
0
void xml_skip_element(XML_PARSER* parser)
{
	XML_TOKEN token;
	int elements = 1;
	while(xml_get_token(parser, &token)) {
		if(token.type == XML_TOKEN_BEGIN_ELEMENT) {
			++elements;
		} else if(token.type == XML_TOKEN_END_ELEMENT) {
			if(--elements == 0) {
				return;
			}
		}
	}
}
Exemplo n.º 7
0
int main(int argc, char** argv)
{
	int n;
	for(n = 1; n < argc; ++n) {
		int fd = open(argv[n], O_RDONLY);
		struct stat stat;
		char* buf;
		int nbytes;
		fstat(fd, &stat);
		buf = (char*)malloc(stat.st_size+1);
		nbytes = read(fd, buf, stat.st_size);
		fprintf(stderr, "read %d/%d bytes\n", nbytes, stat.st_size);
		buf[stat.st_size] = 0;
		close(fd);

		{
			XML_PARSER parser;
			XML_TOKEN token;
			xml_init_parser(&parser, buf);
			while(xml_get_token(&parser, &token)) {
					/*
				const char* types[] = {
					"begin element",
					"end element",
					"attr name",
					"attr value",
					"text",
				};

				fprintf(stderr, "parsed '%s': '", types[token.type]);
				write(2, token.str, token.length);
				fprintf(stderr, "'\n");
				*/
			}

			if(token.type == XML_TOKEN_ERROR) {
				fprintf(stderr, "ERROR: %s\n", token.str);
			}
		}
	}
}
Exemplo n.º 8
0
int	xml_parse_tokens(struct xml_parser * sptr )
{
	int	c;
	int	whoops;
	int	status=_XML_SUCCESS;
	while ((c = xml_remove_space( sptr )) != 0) {
		if ((whoops = xml_get_token( sptr )) > 0) {
			if ((status = xml_use_token(sptr)) != _XML_SUCCESS)
				break;
			}
		else if ( whoops == -1 ) {
			status=_XML_INCORRECT_TOKEN;
			break;
			}
		else if (!( sptr->punctuation = xml_getch(sptr) ))
			break;
		else if ((status = xml_use_punctuation( sptr )) != _XML_SUCCESS)
			break;
		}
	return(status);
}
Exemplo n.º 9
0
int xml_get_token(XML_PARSER* parser, XML_TOKEN* token)
{
	if(parser->state == XML_STATE_ERROR) {
		token->str = "";
		token->length = 0;
		token->type = XML_TOKEN_ERROR;
		return 0;
	}

	if(parser->state == XML_STATE_END_DOCUMENT) {
		token->type = XML_TOKEN_END_DOCUMENT;
		return 0;
	}

	if(parser->has_token) {
		parser->has_token = 0;
		*token = parser->pushed_back_token;
		return token->type != XML_TOKEN_END_DOCUMENT &&
		       token->type != XML_TOKEN_ERROR;
	}

	if(parser->state == XML_STATE_FIND_ELEMENT) {
		int text_found = 0;
		const char* begin = parser->pos;
		while(*parser->pos && *parser->pos != '<') {
			if(isalnum(*parser->pos) || ispunct(*parser->pos)) {
				text_found = 1;
			}

			++parser->pos;
		}

		if(!*parser->pos) {
			ERROR_AND_RETURN(parser->elements, "unexpected end of document");
			ERROR_AND_RETURN(text_found, "text found at top level");

			parser->state = XML_STATE_END_DOCUMENT;
			token->type = XML_TOKEN_END_DOCUMENT;
			return 0;
		}

		if(text_found) {
			token->type = XML_TOKEN_TEXT;
			token->str = begin;
			token->length = parser->pos - begin;
			return 1;
		} else {
			const char* begin = parser->pos+1;
			if(*begin == '?') {
				parser->pos = strstr(begin,"?>");
				ERROR_AND_RETURN(parser->pos == NULL, "End of DTD not found");

				parser->pos += 2;
				return xml_get_token(parser, token);
			} else if(*begin == '/') {
				--parser->elements;
				++begin;
				parser->pos = strchr(begin,'>');
				ERROR_AND_RETURN(parser->pos == NULL,
				                 "unexpected end of document");
				token->type = XML_TOKEN_END_ELEMENT;
				token->str = begin;
				token->length = parser->pos - token->str;
				++parser->pos;
				return 1;
			} else {
				++parser->elements;
				token->str = parser->pos;
				while(*parser->pos && *parser->pos != ' ' &&
				      *parser->pos != '>' && *parser->pos != '/') {
					++parser->pos;
				}

				ERROR_AND_RETURN(!*parser->pos, "unexpected end of document");

				token->type = XML_TOKEN_BEGIN_ELEMENT;
				token->str = begin;
				token->length = parser->pos - token->str;
				if(*parser->pos == '>') {
					++parser->pos;
					parser->state = XML_STATE_FIND_ELEMENT;
				} else {
					parser->state = XML_STATE_FIND_ATTRIBUTE;
				}

				return 1;
			}
		}
	} else if(parser->state == XML_STATE_FIND_ATTRIBUTE) {
		while(*parser->pos == ' ') {
			++parser->pos;
		}

		ERROR_AND_RETURN(!*parser->pos, "unexpected end of document");

		if(*parser->pos == '/') {
			--parser->elements;
			token->type = XML_TOKEN_END_ELEMENT;
			token->str = "";
			token->length = 0;
			++parser->pos;
			while(*parser->pos != '>') {
				ERROR_AND_RETURN(!*parser->pos, "unexpected end of document");
				ERROR_AND_RETURN(isalpha(*parser->pos) || ispunct(*parser->pos),
				                 "unexpected characters at end of element");
				++parser->pos;
			}
		} else if(isalpha(*parser->pos)) {
			token->type = XML_TOKEN_ATTR_NAME;
			token->str = parser->pos;
			parser->pos = strchr(token->str, '=');
			ERROR_AND_RETURN(parser->pos == NULL, "unexpected end of document");
			token->length = parser->pos - token->str;
			parser->state = XML_STATE_FIND_VALUE;
			return 1;
		} else if(*parser->pos == '>') {
			++parser->pos;
			parser->state = XML_STATE_FIND_ELEMENT;
			return xml_get_token(parser, token);
		} else {
			SET_ERROR("unexpected characters when searching for attribute");
			return 0;
		}
	} else if(parser->state == XML_STATE_FIND_VALUE) {
		++parser->pos;
		while(*parser->pos && *parser->pos != '"') {
			ERROR_AND_RETURN(!isspace(*parser->pos),
			                  "unexpected character when searching for value");
			++parser->pos;
		}

		ERROR_AND_RETURN(!*parser->pos, "unexpected end of document");
		token->str = parser->pos+1;
		parser->pos = strchr(token->str,'"');
		ERROR_AND_RETURN(parser->pos == NULL, "unexpected end of document");

		token->length = parser->pos - token->str;
		token->type = XML_TOKEN_ATTR_VALUE;
		++parser->pos;
		parser->state = XML_STATE_FIND_ATTRIBUTE;
		return 1;
	} else {
		SET_ERROR("bad state");
		return 0;
	}
}
Exemplo n.º 10
0
ni_bool_t
xml_get_identifier(xml_reader_t *xr, ni_stringbuf_t *res)
{
	return xml_get_token(xr, res) == Identifier;
}
Exemplo n.º 11
0
ni_bool_t
xml_process_element_nested(xml_reader_t *xr, xml_node_t *cur, unsigned int nesting)
{
	ni_stringbuf_t tokenValue, identifier;
	xml_token_type_t token;
	xml_node_t *child;

	ni_stringbuf_init(&tokenValue);
	ni_stringbuf_init(&identifier);

	while (1) {
		token = xml_get_token(xr, &tokenValue);

		switch (token) {
		case CData:
			/* process element content */
			xml_node_set_cdata(cur, tokenValue.string);
			break;

		case LeftAngleExclam:
			/* Most likely <!DOCTYPE ...> */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open <! not followed by identifier");
				goto error;
			}

			if (strcmp(identifier.string, "DOCTYPE")) {
				xml_parse_error(xr, "Unexpected element: <!%s ...> not supported", identifier.string);
				goto error;
			}

			while (1) {
				token = xml_get_token(xr, &identifier);
				if (token == RightAngle)
					break;
				if (token == Identifier && !xr->doctype)
					ni_string_dup(&xr->doctype, identifier.string);
				if (token != Identifier && token != QuotedString) {
					xml_parse_error(xr, "Error parsing <!DOCTYPE ...> attributes");
					goto error;
				}
			}
			break;

		case LeftAngle:
			/* New element start */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open < not followed by identifier");
				goto error;
			}

			child = xml_node_new(identifier.string, cur);
			if (xr->shared_location)
				child->location = xml_location_new(xr->shared_location, xr->lineCount);

			token = xml_get_tag_attributes(xr, child);
			if (token == None) {
				xml_parse_error(xr, "Error parsing <%s ...> tag attributes", child->name);
				goto error;
			} else
			if (token == RightAngle) {
				/* Handle <foo>...</foo> */
				xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name);
				if (!xml_process_element_nested(xr, child, nesting + 2))
					goto error;
			} else if (token == RightAngleSlash) {
				/* We parsed a "<foo/>" element - nothing left to do, we're done */
				xml_debug("%*.*s<%s/>\n", nesting, nesting, "", child->name);
			} else {
				xml_parse_error(xr, "Unexpected token %s at end of <%s ...",
						xml_token_name(token), child->name);
				goto error;
			}

			break;

		case LeftAngleSlash:
			/* Element end */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: end tag open </ not followed by identifier");
				goto error;
			}

			if (xml_get_token(xr, &tokenValue) != RightAngle) {
				xml_parse_error(xr, "Bad element: </%s - missing tag close", identifier.string);
				goto error;
			}

			if (cur->parent == NULL) {
				xml_parse_error(xr, "Unexpected </%s> tag", identifier.string);
				goto error;
			}
			if (strcmp(cur->name, identifier.string)) {
				xml_parse_error(xr, "Closing tag </%s> does not match <%s>",
						identifier.string, cur->name);
				goto error;
			}

			xml_debug("%*.*s</%s>\n", nesting, nesting, "", cur->name);
			goto success;

		case LeftAngleQ:
			/* New PI node starts here */
			if (!xml_get_identifier(xr, &identifier)) {
				xml_parse_error(xr, "Bad element: tag open <? not followed by identifier");
				goto error;
			}

			child = xml_node_new(identifier.string, NULL);
			if (xr->shared_location)
				child->location = xml_location_new(xr->shared_location, xr->lineCount);

			token = xml_get_tag_attributes(xr, child);
			if (token == None) {
				xml_parse_error(xr, "Error parsing <?%s ...?> tag attributes", child->name);
				xml_node_free(child);
				goto error;
			} else
			if (token == RightAngleQ) {
				xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name);
				xml_process_pi_node(xr, child);
				xml_node_free(child);
			} else {
				xml_parse_error(xr, "Unexpected token %s at end of <?%s ...",
						xml_token_name(token), child->name);
				xml_node_free(child);
				goto error;
			}

			break;

		case EndOfDocument:
			if (cur->parent) {
				xml_parse_error(xr, "End of document while processing element <%s>", cur->name);
				goto error;
			}
			goto success;

		case None:
			/* parser error */
			goto error;

		default:
			xml_parse_error(xr, "Unexpected token %s", xml_token_name(token));
			goto error;
		}
	}

success:
	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&identifier);
	return TRUE;

error:
	ni_stringbuf_destroy(&tokenValue);
	ni_stringbuf_destroy(&identifier);
	return FALSE;
}
Exemplo n.º 12
0
bool
xml_get_identifier(xml_reader_t *xr, string_t *res)
{
	return xml_get_token(xr, res) == Identifier;
}