ni_bool_t xml_process_element_nested(xml_reader_t *xr, xml_node_t *cur, unsigned int nesting) { ni_stringbuf_t tokenValue, identifier; xml_token_type_t token; xml_node_t *child; ni_stringbuf_init(&tokenValue); ni_stringbuf_init(&identifier); while (1) { token = xml_get_token(xr, &tokenValue); switch (token) { case CData: /* process element content */ xml_node_set_cdata(cur, tokenValue.string); break; case LeftAngleExclam: /* Most likely <!DOCTYPE ...> */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: tag open <! not followed by identifier"); goto error; } if (strcmp(identifier.string, "DOCTYPE")) { xml_parse_error(xr, "Unexpected element: <!%s ...> not supported", identifier.string); goto error; } while (1) { token = xml_get_token(xr, &identifier); if (token == RightAngle) break; if (token == Identifier && !xr->doctype) ni_string_dup(&xr->doctype, identifier.string); if (token != Identifier && token != QuotedString) { xml_parse_error(xr, "Error parsing <!DOCTYPE ...> attributes"); goto error; } } break; case LeftAngle: /* New element start */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: tag open < not followed by identifier"); goto error; } child = xml_node_new(identifier.string, cur); if (xr->shared_location) child->location = xml_location_new(xr->shared_location, xr->lineCount); token = xml_get_tag_attributes(xr, child); if (token == None) { xml_parse_error(xr, "Error parsing <%s ...> tag attributes", child->name); goto error; } else if (token == RightAngle) { /* Handle <foo>...</foo> */ xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name); if (!xml_process_element_nested(xr, child, nesting + 2)) goto error; } else if (token == RightAngleSlash) { /* We parsed a "<foo/>" element - nothing left to do, we're done */ xml_debug("%*.*s<%s/>\n", nesting, nesting, "", child->name); } else { xml_parse_error(xr, "Unexpected token %s at end of <%s ...", xml_token_name(token), child->name); goto error; } break; case LeftAngleSlash: /* Element end */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: end tag open </ not followed by identifier"); goto error; } if (xml_get_token(xr, &tokenValue) != RightAngle) { xml_parse_error(xr, "Bad element: </%s - missing tag close", identifier.string); goto error; } if (cur->parent == NULL) { xml_parse_error(xr, "Unexpected </%s> tag", identifier.string); goto error; } if (strcmp(cur->name, identifier.string)) { xml_parse_error(xr, "Closing tag </%s> does not match <%s>", identifier.string, cur->name); goto error; } xml_debug("%*.*s</%s>\n", nesting, nesting, "", cur->name); goto success; case LeftAngleQ: /* New PI node starts here */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: tag open <? not followed by identifier"); goto error; } child = xml_node_new(identifier.string, NULL); if (xr->shared_location) child->location = xml_location_new(xr->shared_location, xr->lineCount); token = xml_get_tag_attributes(xr, child); if (token == None) { xml_parse_error(xr, "Error parsing <?%s ...?> tag attributes", child->name); xml_node_free(child); goto error; } else if (token == RightAngleQ) { xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name); xml_process_pi_node(xr, child); xml_node_free(child); } else { xml_parse_error(xr, "Unexpected token %s at end of <?%s ...", xml_token_name(token), child->name); xml_node_free(child); goto error; } break; case EndOfDocument: if (cur->parent) { xml_parse_error(xr, "End of document while processing element <%s>", cur->name); goto error; } goto success; case None: /* parser error */ goto error; default: xml_parse_error(xr, "Unexpected token %s", xml_token_name(token)); goto error; } } success: ni_stringbuf_destroy(&tokenValue); ni_stringbuf_destroy(&identifier); return TRUE; error: ni_stringbuf_destroy(&tokenValue); ni_stringbuf_destroy(&identifier); return FALSE; }
/* * While in state Initial, obtain the next token */ xml_token_type_t xml_get_token_initial(xml_reader_t *xr, string_t *res) { xml_token_type_t token; int cc; restart: /* Eat initial white space and store it in @res */ xml_skip_space(xr, res); cc = xml_getc(xr); if (cc == EOF) { string_destroy(res); return EndOfDocument; } if (cc == '<') { /* Discard the white space in @res - we're not interested in that. */ string_destroy(res); string_putc(res, cc); if (xr->state != Initial) { xml_parse_error(xr, "Unexpected < in XML stream (state %s)", xml_parser_state_name(xr->state)); return None; } /* tag is legal here */ xr->state = Tag; cc = xml_getc(xr); switch (cc) { case '/': string_putc(res, cc); return LeftAngleSlash; case '?': string_putc(res, cc); return LeftAngleQ; case '!': string_putc(res, cc); /* If it's <!IDENTIFIER, return LeftAngleExclam */ cc = xml_getc(xr); if (cc == '[') { /* Looks like CDATA */ if (!xml_get_identifier(xr, res) || strcmp("CDATA", res->string)) { xml_parse_error(xr, "Unexpected <[%s in XML stream", res->string); return None; } string_destroy(res); return xml_process_cdata(xr, res); } if (cc != '-') { xml_ungetc(xr, cc); return LeftAngleExclam; } token = xml_skip_comment(xr); if (token == Comment) { xr->state = Initial; string_destroy(res); goto restart; } return token; default: xml_ungetc(xr, cc); break; } return LeftAngle; } // Looks like CDATA. // Ignore initial newline, then scan to next < do { if (cc == '<') { /* Looks like we're done. * FIXME: handle comments within CDATA? */ xml_ungetc(xr, cc); break; } else if (cc == '&') { if (!xml_expand_entity(xr, res)) return None; } else { string_putc(res, cc); } cc = xml_getc(xr); } while (cc != EOF); string_trim_empty_lines(res); return CData; }