xml_node_t * xml_node_scan(FILE *fp, const char *location) { xml_reader_t reader; xml_node_t *root = xml_node_new(NULL, NULL); if (xml_reader_init_file(&reader, fp, location) < 0) return NULL; if (reader.shared_location) root->location = xml_location_new(reader.shared_location, reader.lineCount); /* Note! We do not deal with properly formatted XML documents here. * Specifically, we do not expect them to have a document header. */ if (!xml_process_element_nested(&reader, root, 0)) { xml_node_free(root); return NULL; } if (xml_reader_destroy(&reader) < 0) { xml_node_free(root); return NULL; } return root; }
xml_document_t * xml_process_document(xml_reader_t *xr) { xml_document_t *doc; xml_node_t *root; doc = xml_document_new(); root = xml_document_root(doc); /* Note! We do not deal with properly formatted XML documents here. * Specifically, we do not expect them to have a document header. */ if (!xml_process_element_nested(xr, root, 0)) { xml_document_free(doc); return NULL; } return doc; }
ni_bool_t xml_process_element_nested(xml_reader_t *xr, xml_node_t *cur, unsigned int nesting) { ni_stringbuf_t tokenValue, identifier; xml_token_type_t token; xml_node_t *child; ni_stringbuf_init(&tokenValue); ni_stringbuf_init(&identifier); while (1) { token = xml_get_token(xr, &tokenValue); switch (token) { case CData: /* process element content */ xml_node_set_cdata(cur, tokenValue.string); break; case LeftAngleExclam: /* Most likely <!DOCTYPE ...> */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: tag open <! not followed by identifier"); goto error; } if (strcmp(identifier.string, "DOCTYPE")) { xml_parse_error(xr, "Unexpected element: <!%s ...> not supported", identifier.string); goto error; } while (1) { token = xml_get_token(xr, &identifier); if (token == RightAngle) break; if (token == Identifier && !xr->doctype) ni_string_dup(&xr->doctype, identifier.string); if (token != Identifier && token != QuotedString) { xml_parse_error(xr, "Error parsing <!DOCTYPE ...> attributes"); goto error; } } break; case LeftAngle: /* New element start */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: tag open < not followed by identifier"); goto error; } child = xml_node_new(identifier.string, cur); if (xr->shared_location) child->location = xml_location_new(xr->shared_location, xr->lineCount); token = xml_get_tag_attributes(xr, child); if (token == None) { xml_parse_error(xr, "Error parsing <%s ...> tag attributes", child->name); goto error; } else if (token == RightAngle) { /* Handle <foo>...</foo> */ xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name); if (!xml_process_element_nested(xr, child, nesting + 2)) goto error; } else if (token == RightAngleSlash) { /* We parsed a "<foo/>" element - nothing left to do, we're done */ xml_debug("%*.*s<%s/>\n", nesting, nesting, "", child->name); } else { xml_parse_error(xr, "Unexpected token %s at end of <%s ...", xml_token_name(token), child->name); goto error; } break; case LeftAngleSlash: /* Element end */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: end tag open </ not followed by identifier"); goto error; } if (xml_get_token(xr, &tokenValue) != RightAngle) { xml_parse_error(xr, "Bad element: </%s - missing tag close", identifier.string); goto error; } if (cur->parent == NULL) { xml_parse_error(xr, "Unexpected </%s> tag", identifier.string); goto error; } if (strcmp(cur->name, identifier.string)) { xml_parse_error(xr, "Closing tag </%s> does not match <%s>", identifier.string, cur->name); goto error; } xml_debug("%*.*s</%s>\n", nesting, nesting, "", cur->name); goto success; case LeftAngleQ: /* New PI node starts here */ if (!xml_get_identifier(xr, &identifier)) { xml_parse_error(xr, "Bad element: tag open <? not followed by identifier"); goto error; } child = xml_node_new(identifier.string, NULL); if (xr->shared_location) child->location = xml_location_new(xr->shared_location, xr->lineCount); token = xml_get_tag_attributes(xr, child); if (token == None) { xml_parse_error(xr, "Error parsing <?%s ...?> tag attributes", child->name); xml_node_free(child); goto error; } else if (token == RightAngleQ) { xml_debug("%*.*s<%s>\n", nesting, nesting, "", child->name); xml_process_pi_node(xr, child); xml_node_free(child); } else { xml_parse_error(xr, "Unexpected token %s at end of <?%s ...", xml_token_name(token), child->name); xml_node_free(child); goto error; } break; case EndOfDocument: if (cur->parent) { xml_parse_error(xr, "End of document while processing element <%s>", cur->name); goto error; } goto success; case None: /* parser error */ goto error; default: xml_parse_error(xr, "Unexpected token %s", xml_token_name(token)); goto error; } } success: ni_stringbuf_destroy(&tokenValue); ni_stringbuf_destroy(&identifier); return TRUE; error: ni_stringbuf_destroy(&tokenValue); ni_stringbuf_destroy(&identifier); return FALSE; }