/** * This method is repeatedly called by the tokenizer. * Each time, we determine the kind of token we're about to * read, and then we call the appropriate method to handle * that token type. * * @param aScanner The source of our input. * @param aFlushTokens An OUT parameter to tell the caller whether it should * process our queued tokens up to now (e.g., when we * reach a <script>). * @return Success or error */ nsresult nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens) { PRUnichar theChar; CToken* theToken = nullptr; nsresult result = aScanner.Peek(theChar); switch(result) { case kEOF: // Tell our caller that'we finished. return result; case NS_OK: default: if (!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) { if (kLessThan == theChar) { return ConsumeTag(theChar, theToken, aScanner, aFlushTokens); } else if (kAmpersand == theChar) { return ConsumeEntity(theChar, theToken, aScanner); } } if (kCR == theChar || kLF == theChar) { return ConsumeNewline(theChar, theToken, aScanner); } else { if (!nsCRT::IsAsciiSpace(theChar)) { if (theChar != '\0') { result = ConsumeText(theToken, aScanner); } else { // Skip the embedded null char. Fix bug 64098. aScanner.GetChar(theChar); } break; } result = ConsumeWhitespace(theChar, theToken, aScanner); } break; } return result; }
BOOL XMLInternalParser::ReadTextDecl (BOOL xmldecl) { unsigned start_index = index; textdecl_encoding_start = textdecl_encoding_length = 0; expecting_eof = TRUE; BOOL has_start = Match (UNI_L ("<?xml"), 5), unproblematic; expecting_eof = FALSE; ParseError error = xmldecl ? PARSE_ERROR_Invalid_XMLDecl : PARSE_ERROR_Invalid_TextDecl; if (has_start && ConsumeWhitespace ()) { textdecl_version = XMLVERSION_1_0; textdecl_standalone = XMLSTANDALONE_NONE; textdecl_encoding = 0; textdecl_encoding_length = 0; if (Match (UNI_L ("version"), 7)) { ConsumeWhitespace (); if (!Match (UNI_L ("="), 1)) HandleError (error); ConsumeWhitespace (); if (!ReadQuotedLiteral (unproblematic) || index == length || literal_length != 3 || literal[0] != '1' || literal[1] != '.' || literal[2] != '0' && literal[2] != '1') HandleError ((ParseError) (error + 1)); if (literal[2] == '0') textdecl_version = XMLVERSION_1_0; else textdecl_version = XMLVERSION_1_1; if (current_context != PARSE_CONTEXT_DOCUMENT && version < textdecl_version) HandleError (WELL_FORMEDNESS_ERROR_ExternalEntityHasLaterVersion); if (!ConsumeWhitespace ()) if (!xmldecl) HandleError (error); else goto end; } else if (xmldecl) HandleError (error); if (Match (UNI_L ("encoding"), 8)) { ConsumeWhitespace (); if (!Match (UNI_L ("="), 1)) HandleError (error); ConsumeWhitespace (); if (!ReadQuotedLiteral (unproblematic) || index == length || literal_length == 0) HandleError (error); uni_char ch = literal[0]; if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z')) HandleError ((ParseError) (error + 2)); for (const uni_char *ptr = literal, *ptr_end = ptr + literal_length; ptr != ptr_end; ++ptr) { uni_char ch = *ptr; if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z') && (ch < '0' || ch > '9') && ch != '.' && ch != '_' && ch != '-') HandleError ((ParseError) (error + 2)); } textdecl_encoding_start = literal_start; textdecl_encoding_length = literal_length; if (!ConsumeWhitespace ()) goto end; } else if (!xmldecl) HandleError (error); unsigned index_before_sddecl = index; if (Match (UNI_L ("standalone"), 10)) { ConsumeWhitespace (); if (!Match (UNI_L ("="), 1)) HandleError (error); ConsumeWhitespace (); if (!ReadQuotedLiteral (unproblematic) || index == length) HandleError (error); if (!xmldecl) HandleError (error, index_before_sddecl, index - index_before_sddecl); if (literal_length == 2 && literal[0] == 'n' && literal[1] == 'o') textdecl_standalone = XMLSTANDALONE_NO; else if (literal_length == 3 && literal[0] == 'y' && literal[1] == 'e' && literal[2] == 's') textdecl_standalone = XMLSTANDALONE_YES; else HandleError ((ParseError) (error + 3)); ConsumeWhitespace (); } } else { index = start_index; return FALSE; } end: if (!Match (UNI_L ("?>"), 2)) HandleError (error); textdecl_encoding = textdecl_encoding_start != 0 ? buffer + textdecl_encoding_start : 0; return TRUE; }