bool Parser::parseProlog( Node *node ) { parseXMLDeclaration( node ); while( parseMisc( node ) ) {} parseDocType( node ); while( parseMisc( node ) ) {} return true; }
Node *Parser::parseDocument() { readChar( false ); Node *result = new Node; result->setName( "" ); result->setNodeType( Document ); if( !parseProlog( result ) ) reportError( "Missing prolog" ); if( !parseElement( result ) ) reportError( "Missing content" ); while( parseMisc( result ) ) {} return result; }
UXMLElement * UXMLParser::parse(const UnicodeString &src, UErrorCode &status) { if(U_FAILURE(status)) { return NULL; } UXMLElement *root = NULL; fPos = 0; // TODO use just a local pos variable and pass it into functions // where necessary? // set all matchers to work on the input string mXMLDecl.reset(src); mXMLComment.reset(src); mXMLSP.reset(src); mXMLDoctype.reset(src); mXMLPI.reset(src); mXMLElemStart.reset(src); mXMLElemEnd.reset(src); mXMLElemEmpty.reset(src); mXMLCharData.reset(src); mAttrValue.reset(src); mAttrNormalizer.reset(src); mNewLineNormalizer.reset(src); mAmps.reset(src); // Consume the XML Declaration, if present. if (mXMLDecl.lookingAt(fPos, status)) { fPos = mXMLDecl.end(status); } // Consume "misc" [XML production 27] appearing before DocType parseMisc(status); // Consume a DocType declaration, if present. if (mXMLDoctype.lookingAt(fPos, status)) { fPos = mXMLDoctype.end(status); } // Consume additional "misc" [XML production 27] appearing after the DocType parseMisc(status); // Get the root element if (mXMLElemEmpty.lookingAt(fPos, status)) { // Root is an empty element (no nested elements or content) root = createElement(mXMLElemEmpty, status); fPos = mXMLElemEmpty.end(status); } else { if (mXMLElemStart.lookingAt(fPos, status) == FALSE) { error("Root Element expected", status); goto errorExit; } root = createElement(mXMLElemStart, status); UXMLElement *el = root; // // This is the loop that consumes the root element of the document, // including all nested content. Nested elements are handled by // explicit pushes/pops of the element stack; there is no recursion // in the control flow of this code. // "el" always refers to the current element, the one to which content // is being added. It is above the top of the element stack. for (;;) { // Nested Element Start if (mXMLElemStart.lookingAt(fPos, status)) { UXMLElement *t = createElement(mXMLElemStart, status); el->fChildren.addElement(t, status); t->fParent = el; fElementStack.push(el, status); el = t; continue; } // Text Content. String is concatenated onto the current node's content, // but only if it contains something other than spaces. UnicodeString s = scanContent(status); if (s.length() > 0) { mXMLSP.reset(s); if (mXMLSP.matches(status) == FALSE) { // This chunk of text contains something other than just // white space. Make a child node for it. replaceCharRefs(s, status); el->fChildren.addElement(s.clone(), status); } mXMLSP.reset(src); // The matchers need to stay set to the main input string. continue; } // Comments. Discard. if (mXMLComment.lookingAt(fPos, status)) { fPos = mXMLComment.end(status); continue; } // PIs. Discard. if (mXMLPI.lookingAt(fPos, status)) { fPos = mXMLPI.end(status); continue; } // Element End if (mXMLElemEnd.lookingAt(fPos, status)) { fPos = mXMLElemEnd.end(0, status); const UnicodeString name = mXMLElemEnd.group(1, status); if (name != *el->fName) { error("Element start / end tag mismatch", status); goto errorExit; } if (fElementStack.empty()) { // Close of the root element. We're done with the doc. el = NULL; break; } el = (UXMLElement *)fElementStack.pop(); continue; } // Empty Element. Stored as a child of the current element, but not stacked. if (mXMLElemEmpty.lookingAt(fPos, status)) { UXMLElement *t = createElement(mXMLElemEmpty, status); el->fChildren.addElement(t, status); continue; } // Hit something within the document that doesn't match anything. // It's an error. error("Unrecognized markup", status); break; } if (el != NULL || !fElementStack.empty()) { // We bailed out early, for some reason. error("Root element not closed.", status); goto errorExit; } } // Root Element parse is complete. // Consume the annoying xml "Misc" that can appear at the end of the doc. parseMisc(status); // We should have reached the end of the input if (fPos != src.length()) { error("Extra content at the end of the document", status); goto errorExit; } // Success! return root; errorExit: delete root; return NULL; }