void XMLParser::ParseLevel() { // MAIN PARSING LOOP while(*m_pointer) { // STEP 1: Skip whitespace SkipOuterWhiteSpace(); // STEP 2: Check for end if(!*m_pointer) { return; } // STEP 3: Check for a node if(*m_pointer != '<') { SetError(XmlError::XE_NotAnXMLMessage,m_pointer); return; } // STEP 4: One of five node types if(strncmp((const char*)m_pointer,"<?xml",5) == 0) { ParseDeclaration(); } else if(strncmp((const char*)m_pointer,"<!--",4) == 0) { ParseComment(); } else if(strncmp((const char*)m_pointer,"<![CDATA[",9) == 0) { ParseCDATA(); return; } else if(strncmp((const char*)m_pointer,"<!",2) == 0) { ParseDTD(); } else if(ParseElement()) { ParseAfterElement(); return; } } }
bool ParsePrologue (ParserCtx *pCtx) // ParsePrologue // // Parses <?XML prologue { // We don't allow any whitespace at the beginning if (*pCtx->pPos != '<') { pCtx->sError = ERR_XML_PROLOGUE_EXPECTED; return false; } // Expect open processor instruction. If we don't find it, // then we assume that there is no prologue and proceed // to parse the root element. if (ParseToken(pCtx) != tkPIOpen) return true; // Expect XML tag if (ParseToken(pCtx) != tkText || !strEquals(strToLower(pCtx->sToken), STR_XML)) { pCtx->sError = ERR_XML_PROLOGUE_EXPECTED; return false; } // Parse contents while (ParseToken(pCtx) == tkText) { CString sTokenLC = strToLower(pCtx->sToken); if (strEquals(sTokenLC, FIELD_VERSION)) { if (ParseToken(pCtx) != tkEquals) { pCtx->sError = ERR_VERSION_EXPECTED; return false; } if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_VERSION_EXPECTED; return false; } if (ParseToken(pCtx) != tkText || !strEquals(pCtx->sToken, STR_10)) { pCtx->sError = ERR_VERSION_10_EXPECTED; return false; } if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_INVALID_VERSION_ATTRIB; return false; } } else if (strEquals(sTokenLC, FIELD_ENCODING)) { if (ParseToken(pCtx) != tkEquals) { pCtx->sError = ERR_ENCODING_EXPECTED; return false; } if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_ENCODING_EXPECTED; return false; } if (ParseToken(pCtx) != tkText) { pCtx->sError = ERR_INVALID_ENCODING_ATTRIB; return false; } if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_INVALID_ENCODING_ATTRIB; return false; } } else { // Assume it is an unknown attribute if (ParseToken(pCtx) != tkEquals) { pCtx->sError = ERR_INVALID_PROLOG_ATTRIB; return false; } if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_INVALID_PROLOG_ATTRIB; return false; } if (ParseToken(pCtx) != tkText) { pCtx->sError = ERR_INVALID_PROLOG_ATTRIB; return false; } if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_INVALID_PROLOG_ATTRIB; return false; } } #ifdef LATER // Handle EncodingDecl // Handle RMDecl #endif } // Expect close if (pCtx->iToken != tkPIClose) { pCtx->sError = ERR_INVALID_XML_PROLOG; return false; } // See if we've got a DOCTYPE declaration if (ParseToken(pCtx) == tkDeclOpen) { if (ParseToken(pCtx) != tkText || !strEqualsNoCase(pCtx->sToken, STR_DOCTYPE)) { pCtx->sError = ERR_DOCTYPE_EXPECTED; return false; } // Get the name if (ParseToken(pCtx) != tkText) { pCtx->sError = ERR_DOCTYPE_NAME_EXPECTED; return false; } // If we're just looking for the root tag, then we can quit after this if (pCtx->m_bParseRootTag) { pCtx->m_sRootTag = pCtx->sToken; return true; } // External ID? if (ParseToken(pCtx, StartDeclState) == tkText) { // Either SYSTEM or PUBLIC // Expect a quote if (ParseToken(pCtx) != tkQuote) { pCtx->sError = ERR_DOCTYPE_ID_EXPECTED; return false; } pCtx->iAttribQuote = tkQuote; // Get the path or URL if (ParseToken(pCtx, AttributeState) != tkText) { pCtx->sError = ERR_DOCTYPE_ID_EXPECTED; return false; } CString sDTDPath = pCtx->sToken; // End quote if (ParseToken(pCtx) != pCtx->iAttribQuote) { pCtx->sError = ERR_DOCTYPE_QUOTE_EXPECTED; return false; } ParseToken(pCtx); } // Internal DTD? if (pCtx->iToken == tkBracketOpen) { if (!ParseDTD(pCtx)) return false; if (pCtx->iToken != tkBracketClose) { pCtx->sError = ERR_DOCTYPE_BRACKET_EXPECTED; return false; } ParseToken(pCtx); } // Close if (pCtx->iToken != tkTagClose) { pCtx->sError = ERR_DOCTYPE_GREATER_EXPECTED; return false; } ParseToken(pCtx); } // Consume tokens until we get a tag open while (pCtx->iToken == tkText) ParseToken(pCtx); return true; }