Exemple #1
0
void
xml_html_parser::parse () {
  string r;
  while (s) {
    if (s[0] == '<') {
      if (N(r) != 0) { a << tree (r); }
      if (test (s, "</")) a << parse_closing ();
      else if (test (s, "<?")) a << parse_pi ();
      else if (test (s, "<!--")) a << parse_comment ();
      else if (test (s, "<![CDATA[")) a << parse_cdata ();
      else if (test (s, "<!DOCTYPE")) a << parse_doctype ();
      else if (test (s, "<!")) a << parse_misc ();
      else a << parse_opening ();
      r= "";
    }
    else if (s[0] == '&') r << parse_entity ();
    else r << s->read (1);
  }
  if (N(r) != 0) a << tree (r);
}
Exemple #2
0
inline bool xml_element::parse_body(const char *&data) {
  while(true) {
    if(!*data) return false;
    if(*data++ != '<') continue;
    if(*data == '/') return false;

    if(strbegin(data, "!DOCTYPE") == true) {
      parse_doctype(data);
      return true;
    }

    if(strbegin(data, "!--")) {
      if(optional<unsigned> offset = strpos(data, "-->")) {
        data += offset() + 3;
        continue;
      } else {
        throw "...";
      }
    }

    if(strbegin(data, "![CDATA[")) {
      if(optional<unsigned> offset = strpos(data, "]]>")) {
        data += offset() + 3;
        continue;
      } else {
        throw "...";
      }
    }

    optional<unsigned> offset = strpos(data, ">");
    if(!offset) throw "...";

    string tag = substr(data, 0, offset());
    data += offset() + 1;
    const char *content_begin = data;

    bool self_terminating = false;

    if(strend(tag, "?") == true) {
      self_terminating = true;
      tag.rtrim_once("?");
    } else if(strend(tag, "/") == true) {
      self_terminating = true;
      tag.rtrim_once("/");
    }

    parse_head(tag);
    if(self_terminating) return true;

    while(*data) {
      unsigned index = element.size();
      xml_element node;
      if(node.parse_body(data) == false) {
        if(*data == '/') {
          signed length = data - content_begin - 1;
          if(length > 0) content = substr(content_begin, 0, length);

          data++;
          optional<unsigned> offset = strpos(data, ">");
          if(!offset) throw "...";

          tag = substr(data, 0, offset());
          data += offset() + 1;

          tag.replace("\t", " ");
          tag.replace("\r", " ");
          tag.replace("\n", " ");
          while(strpos(tag, "  ")) tag.replace("  ", " ");
          tag.rtrim();

          if(name != tag) throw "...";
          return true;
        }
      } else {
        element.append(node);
      }
    }
  }
}