/**
 * This method is repeatedly called by the tokenizer. 
 * Each time, we determine the kind of token we're about to 
 * read, and then we call the appropriate method to handle
 * that token type.
 *  
 * @param  aScanner The source of our input.
 * @param  aFlushTokens An OUT parameter to tell the caller whether it should
 *                      process our queued tokens up to now (e.g., when we
 *                      reach a <script>).
 * @return Success or error
 */
nsresult
nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner, bool& aFlushTokens)
{
  PRUnichar theChar;
  CToken* theToken = nullptr;

  nsresult result = aScanner.Peek(theChar);

  switch(result) {
    case kEOF:
      // Tell our caller that'we finished.
      return result;

    case NS_OK:
    default:
      if (!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) {
        if (kLessThan == theChar) {
          return ConsumeTag(theChar, theToken, aScanner, aFlushTokens);
        } else if (kAmpersand == theChar) {
          return ConsumeEntity(theChar, theToken, aScanner);
        }
      }

      if (kCR == theChar || kLF == theChar) {
        return ConsumeNewline(theChar, theToken, aScanner);
      } else {
        if (!nsCRT::IsAsciiSpace(theChar)) {
          if (theChar != '\0') {
            result = ConsumeText(theToken, aScanner);
          } else {
            // Skip the embedded null char. Fix bug 64098.
            aScanner.GetChar(theChar);
          }
          break;
        }
        result = ConsumeWhitespace(theChar, theToken, aScanner);
      }
      break;
  }

  return result;
}
示例#2
0
BOOL
XMLInternalParser::ReadTextDecl (BOOL xmldecl)
{
  unsigned start_index = index;

  textdecl_encoding_start = textdecl_encoding_length = 0;
  expecting_eof = TRUE;

  BOOL has_start = Match (UNI_L ("<?xml"), 5), unproblematic;

  expecting_eof = FALSE;

  ParseError error = xmldecl ? PARSE_ERROR_Invalid_XMLDecl : PARSE_ERROR_Invalid_TextDecl;

  if (has_start && ConsumeWhitespace ())
    {
      textdecl_version = XMLVERSION_1_0;
      textdecl_standalone = XMLSTANDALONE_NONE;
      textdecl_encoding = 0;
      textdecl_encoding_length = 0;

      if (Match (UNI_L ("version"), 7))
        {
          ConsumeWhitespace ();

          if (!Match (UNI_L ("="), 1))
            HandleError (error);

          ConsumeWhitespace ();

          if (!ReadQuotedLiteral (unproblematic) || index == length || literal_length != 3 || literal[0] != '1' || literal[1] != '.' || literal[2] != '0' && literal[2] != '1')
            HandleError ((ParseError) (error + 1));

          if (literal[2] == '0')
            textdecl_version = XMLVERSION_1_0;
          else
            textdecl_version = XMLVERSION_1_1;

          if (current_context != PARSE_CONTEXT_DOCUMENT && version < textdecl_version)
            HandleError (WELL_FORMEDNESS_ERROR_ExternalEntityHasLaterVersion);

          if (!ConsumeWhitespace ())
            if (!xmldecl)
              HandleError (error);
            else
              goto end;
        }
      else if (xmldecl)
        HandleError (error);

      if (Match (UNI_L ("encoding"), 8))
        {
          ConsumeWhitespace ();

          if (!Match (UNI_L ("="), 1))
            HandleError (error);

          ConsumeWhitespace ();

          if (!ReadQuotedLiteral (unproblematic) || index == length || literal_length == 0)
            HandleError (error);

          uni_char ch = literal[0];

          if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z'))
            HandleError ((ParseError) (error + 2));

          for (const uni_char *ptr = literal, *ptr_end = ptr + literal_length; ptr != ptr_end; ++ptr)
            {
              uni_char ch = *ptr;
              if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z') && (ch < '0' || ch > '9') && ch != '.' && ch != '_' && ch != '-')
                HandleError ((ParseError) (error + 2));
            }

          textdecl_encoding_start = literal_start;
          textdecl_encoding_length = literal_length;

          if (!ConsumeWhitespace ())
            goto end;
        }
      else if (!xmldecl)
        HandleError (error);

      unsigned index_before_sddecl = index;

      if (Match (UNI_L ("standalone"), 10))
        {
          ConsumeWhitespace ();

          if (!Match (UNI_L ("="), 1))
            HandleError (error);

          ConsumeWhitespace ();

          if (!ReadQuotedLiteral (unproblematic) || index == length)
            HandleError (error);

          if (!xmldecl)
            HandleError (error, index_before_sddecl, index - index_before_sddecl);

          if (literal_length == 2 && literal[0] == 'n' && literal[1] == 'o')
            textdecl_standalone = XMLSTANDALONE_NO;
          else if (literal_length == 3 && literal[0] == 'y' && literal[1] == 'e' && literal[2] == 's')
            textdecl_standalone = XMLSTANDALONE_YES;
          else
            HandleError ((ParseError) (error + 3));

          ConsumeWhitespace ();
        }
    }
  else
    {
      index = start_index;
      return FALSE;
    }

end:
  if (!Match (UNI_L ("?>"), 2))
    HandleError (error);

  textdecl_encoding = textdecl_encoding_start != 0 ? buffer + textdecl_encoding_start : 0;
  return TRUE;
}