void xml_html_parser::parse () { string r; while (s) { if (s[0] == '<') { if (N(r) != 0) { a << tree (r); } if (test (s, "</")) a << parse_closing (); else if (test (s, "<?")) a << parse_pi (); else if (test (s, "<!--")) a << parse_comment (); else if (test (s, "<![CDATA[")) a << parse_cdata (); else if (test (s, "<!DOCTYPE")) a << parse_doctype (); else if (test (s, "<!")) a << parse_misc (); else a << parse_opening (); r= ""; } else if (s[0] == '&') r << parse_entity (); else r << s->read (1); } if (N(r) != 0) a << tree (r); }
inline bool xml_element::parse_body(const char *&data) { while(true) { if(!*data) return false; if(*data++ != '<') continue; if(*data == '/') return false; if(strbegin(data, "!DOCTYPE") == true) { parse_doctype(data); return true; } if(strbegin(data, "!--")) { if(optional<unsigned> offset = strpos(data, "-->")) { data += offset() + 3; continue; } else { throw "..."; } } if(strbegin(data, "![CDATA[")) { if(optional<unsigned> offset = strpos(data, "]]>")) { data += offset() + 3; continue; } else { throw "..."; } } optional<unsigned> offset = strpos(data, ">"); if(!offset) throw "..."; string tag = substr(data, 0, offset()); data += offset() + 1; const char *content_begin = data; bool self_terminating = false; if(strend(tag, "?") == true) { self_terminating = true; tag.rtrim_once("?"); } else if(strend(tag, "/") == true) { self_terminating = true; tag.rtrim_once("/"); } parse_head(tag); if(self_terminating) return true; while(*data) { unsigned index = element.size(); xml_element node; if(node.parse_body(data) == false) { if(*data == '/') { signed length = data - content_begin - 1; if(length > 0) content = substr(content_begin, 0, length); data++; optional<unsigned> offset = strpos(data, ">"); if(!offset) throw "..."; tag = substr(data, 0, offset()); data += offset() + 1; tag.replace("\t", " "); tag.replace("\r", " "); tag.replace("\n", " "); while(strpos(tag, " ")) tag.replace(" ", " "); tag.rtrim(); if(name != tag) throw "..."; return true; } } else { element.append(node); } } } }