Пример #1
0
tree
xml_html_parser::parse_doctype () {
  s += 9;
  tree dt= tuple ("doctype");
  skip_space ();
  dt << parse_name ();
  skip_space ();
  if (test (s, "SYSTEM")) dt << parse_system ();
  else if (test (s, "PUBLIC")) dt << parse_public ();
  skip_space ();

  if (test (s, "[")) {
    s += 1;
    while (s) {
      skip_space ();
      if (test (s, "]")) { s += 1; break; }
      else if (test (s, "<!ELEMENT")) dt << parse_element ();
      else if (test (s, "<!ATTLIST")) dt << parse_cdata ();
      else if (test (s, "<!ENTITY")) parse_entity_decl ();
      else if (test (s, "<!NOTATION")) a << parse_notation ();
      else if (test (s, "<?")) dt << parse_pi ();
      else if (test (s, "<!--")) dt << parse_comment ();
      else if (s[0] == '&' || s[0] == '%') (void) parse_entity ();
      else s += 1;
    }
  }

  skip_space ();
  if (test (s, ">")) s += 1;
  return dt;
}
Пример #2
0
void
xml_html_parser::parse () {
  string r;
  while (s) {
    if (s[0] == '<') {
      if (N(r) != 0) { a << tree (r); }
      if (test (s, "</")) a << parse_closing ();
      else if (test (s, "<?")) a << parse_pi ();
      else if (test (s, "<!--")) a << parse_comment ();
      else if (test (s, "<![CDATA[")) a << parse_cdata ();
      else if (test (s, "<!DOCTYPE")) a << parse_doctype ();
      else if (test (s, "<!")) a << parse_misc ();
      else a << parse_opening ();
      r= "";
    }
    else if (s[0] == '&') r << parse_entity ();
    else r << s->read (1);
  }
  if (N(r) != 0) a << tree (r);
}
Пример #3
0
static int parse_node(xmlNodePtr node, simple_binary_t **sibxml)
{
    int status = 0;
    
    /* Call for the parser function of the node type. */
    switch (node->type) {
    case XML_ELEMENT_NODE:
	status = parse_element(node, sibxml);
	break;
    case XML_TEXT_NODE:
	status = parse_text(node, sibxml);
	break;
    case XML_CDATA_SECTION_NODE:
	status = parse_cdata(node, sibxml);
	break;
    case XML_COMMENT_NODE:
    case XML_PI_NODE:
	/* Comments and PIs are ignored. */
	break;
	/*
	 * XML has also many other node types, these are not needed with 
	 * SI. Therefore they are assumed to be an error.
	 */
    default:
	error(0, "SI compiler: Unknown XML node in the SI source.");
	return -1;
	break;
    }

    /* 
     * If node is an element with content, it will need an end tag after it's
     * children. The status for it is returned by parse_element.
     */
    switch (status) {
    case 0:

	if (node->children != NULL)
	    if (parse_node(node->children, sibxml) == -1)
		return -1;
	break;
    case 1:
	if (node->children != NULL)
	    if (parse_node(node->children, sibxml) == -1)
		return -1;
	parse_end(sibxml);
	break;

    case -1: /* Something went wrong in the parsing. */
	return -1;
    default:
	warning(0,"SI compiler: undefined return value in a parse function.");
	return -1;
	break;
    }

    if (node->next != NULL)
	if (parse_node(node->next, sibxml) == -1)
	    return -1;

    return 0;
}
Пример #4
0
bool xml_istream::process()
{
    stack<string> tag_stack;
    stack< map<string, string> > attribute_stack;

    bool ok = true;

    while (ok != false && good() != false)
    {
        string tag, cdata, doctype, s;
        map<string, string> attributes;

        xml_t t = get_xml_node(s, tag, attributes);

        switch (t)
        {
        case OPEN_TAG:
            tag_stack.push(tag);
            attribute_stack.push(attributes);
            break;

        case EMPTY_TAG:
        {
            callback pFunc = _callback_map[tag];

            if (pFunc != NULL)
            {
                pFunc(s, attributes);
            }
        }
        break;

        case CLOSE_TAG:
            if (tag_stack.size() > 0 && tag_stack.top() == tag) /* Ensure opening tag matches the closing tag. */
            {
                tag_stack.pop();
                attribute_stack.pop();
            }
            else
            {
                ok = false;
            }
            break;

        case TEXT:
            if (s.empty() == false && tag_stack.size() > 0)
            {
                callback pFunc = _callback_map[tag_stack.top()];

                if (pFunc != NULL)
                {
                    if (_entity_translation != false)
                    {
                        string ts;
                        entity_translation(s, ts);
                        s = ts;
                    }

                    if (_collapse_whitespace != false)
                    {
                        string ts;
                        collapse_whitespace(s, ts);
                        s = ts;
                    }

                    pFunc(s, attribute_stack.top());
                }
            }
            else
            {
                ok = false;
            }
            break;

        case CDATA:
            ok = parse_cdata(s, cdata);

            if (cdata.empty() == false && tag_stack.size() > 0)
            {
                callback pFunc = _callback_map[tag_stack.top()];

                if (pFunc != NULL)
                {
                    pFunc(cdata, attribute_stack.top());
                }
            }
            else
            {
                ok = false;
            }
            break;

        case UNKNOWN:
            ok = false;
            break;
        }
    }

    if (tag_stack.size() != 0 || attribute_stack.size() != 0)
    {
        ok = false;
    }

    return ok;
}