tree xml_html_parser::parse_doctype () { s += 9; tree dt= tuple ("doctype"); skip_space (); dt << parse_name (); skip_space (); if (test (s, "SYSTEM")) dt << parse_system (); else if (test (s, "PUBLIC")) dt << parse_public (); skip_space (); if (test (s, "[")) { s += 1; while (s) { skip_space (); if (test (s, "]")) { s += 1; break; } else if (test (s, "<!ELEMENT")) dt << parse_element (); else if (test (s, "<!ATTLIST")) dt << parse_cdata (); else if (test (s, "<!ENTITY")) parse_entity_decl (); else if (test (s, "<!NOTATION")) a << parse_notation (); else if (test (s, "<?")) dt << parse_pi (); else if (test (s, "<!--")) dt << parse_comment (); else if (s[0] == '&' || s[0] == '%') (void) parse_entity (); else s += 1; } } skip_space (); if (test (s, ">")) s += 1; return dt; }
void xml_html_parser::parse () { string r; while (s) { if (s[0] == '<') { if (N(r) != 0) { a << tree (r); } if (test (s, "</")) a << parse_closing (); else if (test (s, "<?")) a << parse_pi (); else if (test (s, "<!--")) a << parse_comment (); else if (test (s, "<![CDATA[")) a << parse_cdata (); else if (test (s, "<!DOCTYPE")) a << parse_doctype (); else if (test (s, "<!")) a << parse_misc (); else a << parse_opening (); r= ""; } else if (s[0] == '&') r << parse_entity (); else r << s->read (1); } if (N(r) != 0) a << tree (r); }
static int parse_node(xmlNodePtr node, simple_binary_t **sibxml) { int status = 0; /* Call for the parser function of the node type. */ switch (node->type) { case XML_ELEMENT_NODE: status = parse_element(node, sibxml); break; case XML_TEXT_NODE: status = parse_text(node, sibxml); break; case XML_CDATA_SECTION_NODE: status = parse_cdata(node, sibxml); break; case XML_COMMENT_NODE: case XML_PI_NODE: /* Comments and PIs are ignored. */ break; /* * XML has also many other node types, these are not needed with * SI. Therefore they are assumed to be an error. */ default: error(0, "SI compiler: Unknown XML node in the SI source."); return -1; break; } /* * If node is an element with content, it will need an end tag after it's * children. The status for it is returned by parse_element. */ switch (status) { case 0: if (node->children != NULL) if (parse_node(node->children, sibxml) == -1) return -1; break; case 1: if (node->children != NULL) if (parse_node(node->children, sibxml) == -1) return -1; parse_end(sibxml); break; case -1: /* Something went wrong in the parsing. */ return -1; default: warning(0,"SI compiler: undefined return value in a parse function."); return -1; break; } if (node->next != NULL) if (parse_node(node->next, sibxml) == -1) return -1; return 0; }
bool xml_istream::process() { stack<string> tag_stack; stack< map<string, string> > attribute_stack; bool ok = true; while (ok != false && good() != false) { string tag, cdata, doctype, s; map<string, string> attributes; xml_t t = get_xml_node(s, tag, attributes); switch (t) { case OPEN_TAG: tag_stack.push(tag); attribute_stack.push(attributes); break; case EMPTY_TAG: { callback pFunc = _callback_map[tag]; if (pFunc != NULL) { pFunc(s, attributes); } } break; case CLOSE_TAG: if (tag_stack.size() > 0 && tag_stack.top() == tag) /* Ensure opening tag matches the closing tag. */ { tag_stack.pop(); attribute_stack.pop(); } else { ok = false; } break; case TEXT: if (s.empty() == false && tag_stack.size() > 0) { callback pFunc = _callback_map[tag_stack.top()]; if (pFunc != NULL) { if (_entity_translation != false) { string ts; entity_translation(s, ts); s = ts; } if (_collapse_whitespace != false) { string ts; collapse_whitespace(s, ts); s = ts; } pFunc(s, attribute_stack.top()); } } else { ok = false; } break; case CDATA: ok = parse_cdata(s, cdata); if (cdata.empty() == false && tag_stack.size() > 0) { callback pFunc = _callback_map[tag_stack.top()]; if (pFunc != NULL) { pFunc(cdata, attribute_stack.top()); } } else { ok = false; } break; case UNKNOWN: ok = false; break; } } if (tag_stack.size() != 0 || attribute_stack.size() != 0) { ok = false; } return ok; }