Esempio n. 1
0
bool Parser::parseProlog( Node *node )
{
    parseXMLDeclaration( node );
    while( parseMisc( node ) ) {}

    parseDocType( node );
    while( parseMisc( node ) ) {}

    return true;
}
Esempio n. 2
0
Node *Parser::parseDocument()
{
    readChar( false );
    Node *result = new Node;
    result->setName( "" );
    result->setNodeType( Document );

    if( !parseProlog( result ) )
        reportError( "Missing prolog" );

    if( !parseElement( result ) )
        reportError( "Missing content" );

    while( parseMisc( result ) ) {}

    return result;
}
Esempio n. 3
0
UXMLElement *
UXMLParser::parse(const UnicodeString &src, UErrorCode &status) {
    if(U_FAILURE(status)) {
        return NULL;
    }

    UXMLElement   *root = NULL;
    fPos = 0; // TODO use just a local pos variable and pass it into functions
              // where necessary?

    // set all matchers to work on the input string
    mXMLDecl.reset(src);
    mXMLComment.reset(src);
    mXMLSP.reset(src);
    mXMLDoctype.reset(src);
    mXMLPI.reset(src);
    mXMLElemStart.reset(src);
    mXMLElemEnd.reset(src);
    mXMLElemEmpty.reset(src);
    mXMLCharData.reset(src);
    mAttrValue.reset(src);
    mAttrNormalizer.reset(src);
    mNewLineNormalizer.reset(src);
    mAmps.reset(src);

    // Consume the XML Declaration, if present.
    if (mXMLDecl.lookingAt(fPos, status)) {
        fPos = mXMLDecl.end(status);
    }

    // Consume "misc" [XML production 27] appearing before DocType
    parseMisc(status);

    // Consume a DocType declaration, if present.
    if (mXMLDoctype.lookingAt(fPos, status)) {
        fPos = mXMLDoctype.end(status);
    }

    // Consume additional "misc" [XML production 27] appearing after the DocType
    parseMisc(status);

    // Get the root element
    if (mXMLElemEmpty.lookingAt(fPos, status)) {
        // Root is an empty element (no nested elements or content)
        root = createElement(mXMLElemEmpty, status);
        fPos = mXMLElemEmpty.end(status);
    } else {
        if (mXMLElemStart.lookingAt(fPos, status) == FALSE) {
            error("Root Element expected", status);
            goto errorExit;
        }
        root = createElement(mXMLElemStart, status);
        UXMLElement  *el = root;

        //
        // This is the loop that consumes the root element of the document,
        //      including all nested content.   Nested elements are handled by
        //      explicit pushes/pops of the element stack; there is no recursion
        //      in the control flow of this code.
        //      "el" always refers to the current element, the one to which content
        //      is being added.  It is above the top of the element stack.
        for (;;) {
            // Nested Element Start
            if (mXMLElemStart.lookingAt(fPos, status)) {
                UXMLElement *t = createElement(mXMLElemStart, status);
                el->fChildren.addElement(t, status);
                t->fParent = el;
                fElementStack.push(el, status);
                el = t;
                continue;
            }

            // Text Content.  String is concatenated onto the current node's content,
            //                but only if it contains something other than spaces.
            UnicodeString s = scanContent(status);
            if (s.length() > 0) {
                mXMLSP.reset(s);
                if (mXMLSP.matches(status) == FALSE) {
                    // This chunk of text contains something other than just
                    //  white space. Make a child node for it.
                    replaceCharRefs(s, status);
                    el->fChildren.addElement(s.clone(), status);
                }
                mXMLSP.reset(src);    // The matchers need to stay set to the main input string.
                continue;
            }

            // Comments.  Discard.
            if (mXMLComment.lookingAt(fPos, status)) {
                fPos = mXMLComment.end(status);
                continue;
            }

            // PIs.  Discard.
            if (mXMLPI.lookingAt(fPos, status)) {
                fPos = mXMLPI.end(status);
                continue;
            }

            // Element End
            if (mXMLElemEnd.lookingAt(fPos, status)) {
                fPos = mXMLElemEnd.end(0, status);
                const UnicodeString name = mXMLElemEnd.group(1, status);
                if (name != *el->fName) {
                    error("Element start / end tag mismatch", status);
                    goto errorExit;
                }
                if (fElementStack.empty()) {
                    // Close of the root element.  We're done with the doc.
                    el = NULL;
                    break;
                }
                el = (UXMLElement *)fElementStack.pop();
                continue;
            }

            // Empty Element.  Stored as a child of the current element, but not stacked.
            if (mXMLElemEmpty.lookingAt(fPos, status)) {
                UXMLElement *t = createElement(mXMLElemEmpty, status);
                el->fChildren.addElement(t, status);
                continue;
            }

            // Hit something within the document that doesn't match anything.
            //   It's an error.
            error("Unrecognized markup", status);
            break;
        }

        if (el != NULL || !fElementStack.empty()) {
            // We bailed out early, for some reason.
            error("Root element not closed.", status);
            goto errorExit;
        }
    }

    // Root Element parse is complete.
    // Consume the annoying xml "Misc" that can appear at the end of the doc.
    parseMisc(status);

    // We should have reached the end of the input
    if (fPos != src.length()) {
        error("Extra content at the end of the document", status);
        goto errorExit;
    }

    // Success!
    return root;

errorExit:
    delete root;
    return NULL;
}