Ejemplo n.º 1
0
/***********************************************************************//**
 * @brief Parse XML URL
 *
 * @param[in] url Unified Resource Locator.
 *
 * @exception GException::xml_syntax_error
 *            XML syntax error.
 *
 * Parses either a XML file or a XML text string and creates all associated
 * nodes. The XML file is split into segments, made either of text or of
 * tags.
 ***************************************************************************/
void GXml::parse(const GUrl& url)
{
    // Initialise parser
    int         c;
    bool        in_markup  = false;
    bool        in_comment = false;
    std::string segment;
    GXmlNode*   current = &m_root;

    // Main parsing loop
    while ((c = url.get_char()) != EOF) {

        // Convert special characters into line feeds
        if (c == '\x85' || c == L'\x2028') {
            if (in_markup) {
                 throw GException::xml_syntax_error(G_PARSE, segment,
                                   "invalid character encountered");
            }
            else {
                c = '\x0a';
            }
        }

        // Skip all linefeeds (to avoid extra linefeeds in text segments)
        if (c == '\x0a') {
            continue;
        }

        // If we are not within a markup and if a markup is reached then
        // add the text segment to the nodes and switch to in_markup mode
        if (in_markup == false) {

            // Markup start reached?
            if (c == '<') {

                // Add text segment to nodes (ignores empty segments)
                process_text(&current, segment);

                // Prepare new segment and signal that we are within tag
                segment.clear();
                segment.append(1, (char)c);
                in_markup = true;

            }

            // Markup stop encountered?
            else if (c == '>') {
                 segment.append(1, (char)c);
                 throw GException::xml_syntax_error(G_PARSE, segment,
                       "unexpected closing bracket \">\" encountered");
            }

            // ... otherwise add character to segment
            else {
                segment.append(1, (char)c);
            }
        }

        // If we are within a markup and if a markup end is reached then
        // process the markup and switch to not in_tag mode
        else {

            // Markup stop reached?
            if (c == '>') {

                // Append character to segment
                segment.append(1, (char)c);

                // If we are in comment then check if this is the end of
                // the comment
                if (in_comment) {
                    int n = segment.length();
                    if (n > 2) {
                        if (segment.compare(n-3,3,"-->") == 0) {
                            in_comment = false;
                        }
                    }
                }

                // If we are not in the comment, then process markup
                if (!in_comment) {

                    // Process markup
                    process_markup(&current, segment);

                    // Prepare new segment and signal that we are not
                    // within markup
                    segment.clear();
                    in_markup = false;
                }
            }

            // Markup start encountered?
            else if (!in_comment && c == '<') {

                // Append character to segment
                segment.append(1, (char)c);

                // If we encounter an opening bracket then throw an exception
                throw GException::xml_syntax_error(G_PARSE, segment,
                      "unexpected opening bracket \"<\" encountered");
            }

            // ... otherwise add character to segment
            else {
                segment.append(1, (char)c);
                if (!in_comment && segment == "<!--") {
                    in_comment = true;
                }
            }
        }

    } // endwhile: main parsing loop

    // Process any pending segment
    if (segment.size() > 0) {
        if (in_markup) {
            process_markup(&current, segment);
        }
        else {
            process_text(&current, segment);
        }
    }

    // Verify that we are back to the root node
    if (current != &m_root) {
        std::string message = "closing tag ";
        GXmlElement* element = dynamic_cast<GXmlElement*>(current);
        if (element != NULL) {
            message += "for GXmlElement \""+element->name()+"\"";
        }
        message += " is missing";
        throw GException::xml_syntax_error(G_PARSE, "", message);
    }

    // Return
    return;
}