/***********************************************************************//** * @brief Parse XML URL * * @param[in] url Unified Resource Locator. * * @exception GException::xml_syntax_error * XML syntax error. * * Parses either a XML file or a XML text string and creates all associated * nodes. The XML file is split into segments, made either of text or of * tags. ***************************************************************************/ void GXml::parse(const GUrl& url) { // Initialise parser int c; bool in_markup = false; bool in_comment = false; std::string segment; GXmlNode* current = &m_root; // Main parsing loop while ((c = url.get_char()) != EOF) { // Convert special characters into line feeds if (c == '\x85' || c == L'\x2028') { if (in_markup) { throw GException::xml_syntax_error(G_PARSE, segment, "invalid character encountered"); } else { c = '\x0a'; } } // Skip all linefeeds (to avoid extra linefeeds in text segments) if (c == '\x0a') { continue; } // If we are not within a markup and if a markup is reached then // add the text segment to the nodes and switch to in_markup mode if (in_markup == false) { // Markup start reached? if (c == '<') { // Add text segment to nodes (ignores empty segments) process_text(¤t, segment); // Prepare new segment and signal that we are within tag segment.clear(); segment.append(1, (char)c); in_markup = true; } // Markup stop encountered? else if (c == '>') { segment.append(1, (char)c); throw GException::xml_syntax_error(G_PARSE, segment, "unexpected closing bracket \">\" encountered"); } // ... otherwise add character to segment else { segment.append(1, (char)c); } } // If we are within a markup and if a markup end is reached then // process the markup and switch to not in_tag mode else { // Markup stop reached? if (c == '>') { // Append character to segment segment.append(1, (char)c); // If we are in comment then check if this is the end of // the comment if (in_comment) { int n = segment.length(); if (n > 2) { if (segment.compare(n-3,3,"-->") == 0) { in_comment = false; } } } // If we are not in the comment, then process markup if (!in_comment) { // Process markup process_markup(¤t, segment); // Prepare new segment and signal that we are not // within markup segment.clear(); in_markup = false; } } // Markup start encountered? else if (!in_comment && c == '<') { // Append character to segment segment.append(1, (char)c); // If we encounter an opening bracket then throw an exception throw GException::xml_syntax_error(G_PARSE, segment, "unexpected opening bracket \"<\" encountered"); } // ... otherwise add character to segment else { segment.append(1, (char)c); if (!in_comment && segment == "<!--") { in_comment = true; } } } } // endwhile: main parsing loop // Process any pending segment if (segment.size() > 0) { if (in_markup) { process_markup(¤t, segment); } else { process_text(¤t, segment); } } // Verify that we are back to the root node if (current != &m_root) { std::string message = "closing tag "; GXmlElement* element = dynamic_cast<GXmlElement*>(current); if (element != NULL) { message += "for GXmlElement \""+element->name()+"\""; } message += " is missing"; throw GException::xml_syntax_error(G_PARSE, "", message); } // Return return; }