void
nsHTMLContentSerializer::AppendToStringWrapped(const nsASingleFragmentString& aStr,
        nsAString& aOutputStr,
        PRBool aTranslateEntities)
{
    nsASingleFragmentString::const_char_iterator pos, end, sequenceStart;

    aStr.BeginReading(pos);
    aStr.EndReading(end);

    // if the current line already has text on it, such as a tag,
    // leading whitespace is significant

    PRBool mayIgnoreStartOfLineWhitespaceSequence = !mColPos;

    while (pos < end) {
        sequenceStart = pos;

        // if beginning of a whitespace sequence
        if (*pos == ' ' || *pos == '\n' || *pos == '\t') {
            AppendWrapped_WhitespaceSequence(pos, end, sequenceStart,
                                             mayIgnoreStartOfLineWhitespaceSequence, aOutputStr);
        }
        else { // any other non-whitespace char
            AppendWrapped_NonWhitespaceSequence(pos, end, sequenceStart,
                                                mayIgnoreStartOfLineWhitespaceSequence, aOutputStr);
        }
    }
}
Example #2
0
/**
 * Parses the given string into a sequence of Tokens
 */
nsresult
txExprLexer::parse(const nsASingleFragmentString& aPattern)
{
  iterator start, end;
  start = aPattern.BeginReading(mPosition);
  aPattern.EndReading(end);

  //-- initialize previous token, this will automatically get
  //-- deleted when it goes out of scope
  Token nullToken(nsnull, nsnull, Token::NULL_TOKEN);

  Token::Type defType;
  Token* newToken = nsnull;
  Token* prevToken = &nullToken;
  PRBool isToken;

  while (mPosition < end) {

    defType = Token::CNAME;
    isToken = PR_TRUE;

    if (*mPosition == DOLLAR_SIGN) {
      if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
        return NS_ERROR_XPATH_INVALID_VAR_NAME;
      }
      defType = Token::VAR_REFERENCE;
    } 
    // just reuse the QName parsing, which will use defType 
    // the token to construct

    if (XMLUtils::isLetter(*mPosition)) {
      // NCName, can get QName or OperatorName;
      //  FunctionName, NodeName, and AxisSpecifier may want whitespace,
      //  and are dealt with below
      start = mPosition;
      while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
        /* just go */
      }
      if (mPosition < end && *mPosition == COLON) {
        // try QName or wildcard, might need to step back for axis
        if (++mPosition == end) {
          return NS_ERROR_XPATH_UNEXPECTED_END;
        }
        if (XMLUtils::isLetter(*mPosition)) {
          while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
            /* just go */
          }
        }
        else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
          // eat wildcard for NameTest, bail for var ref at COLON
          ++mPosition;
        }
        else {
          --mPosition; // step back
        }
      }
      if (nextIsOperatorToken(prevToken)) {
        nsDependentSubstring op(Substring(start, mPosition));
        if (txXPathAtoms::_and->Equals(op)) {
          defType = Token::AND_OP;
        }
        else if (txXPathAtoms::_or->Equals(op)) {
          defType = Token::OR_OP;
        }
        else if (txXPathAtoms::mod->Equals(op)) {
          defType = Token::MODULUS_OP;
        }
        else if (txXPathAtoms::div->Equals(op)) {
          defType = Token::DIVIDE_OP;
        }
        else {
          // XXX QUESTION: spec is not too precise
          // badops is sure an error, but is bad:ops, too? We say yes!
          return NS_ERROR_XPATH_OPERATOR_EXPECTED;
        }
      }
      newToken = new Token(start, mPosition, defType);
    }
    else if (isXPathDigit(*mPosition)) {
      start = mPosition;
      while (++mPosition < end && isXPathDigit(*mPosition)) {
        /* just go */
      }
      if (mPosition < end && *mPosition == '.') {
        while (++mPosition < end && isXPathDigit(*mPosition)) {
          /* just go */
        }
      }
      newToken = new Token(start, mPosition, Token::NUMBER);
    }
    else {
      switch (*mPosition) {
        //-- ignore whitespace
      case SPACE:
      case TX_TAB:
      case TX_CR:
      case TX_LF:
        ++mPosition;
        isToken = PR_FALSE;
        break;
      case S_QUOTE :
      case D_QUOTE :
        start = mPosition;
        while (++mPosition < end && *mPosition != *start) {
          // eat literal
        }
        if (mPosition == end) {
          mPosition = start;
          return NS_ERROR_XPATH_UNCLOSED_LITERAL;
        }
        newToken = new Token(start + 1, mPosition, Token::LITERAL);
        ++mPosition;
        break;
      case PERIOD:
        // period can be .., .(DIGITS)+ or ., check next
        if (++mPosition == end) {
          newToken = new Token(mPosition - 1, Token::SELF_NODE);
        }
        else if (isXPathDigit(*mPosition)) {
          start = mPosition - 1;
          while (++mPosition < end && isXPathDigit(*mPosition)) {
            /* just go */
          }
          newToken = new Token(start, mPosition, Token::NUMBER);
        }
        else if (*mPosition == PERIOD) {
          ++mPosition;
          newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
        }
        else {
          newToken = new Token(mPosition - 1, Token::SELF_NODE);
        }
        break;
      case COLON: // QNames are dealt above, must be axis ident
        if (++mPosition >= end || *mPosition != COLON ||
            prevToken->mType != Token::CNAME) {
          return NS_ERROR_XPATH_BAD_COLON;
        }
        prevToken->mType = Token::AXIS_IDENTIFIER;
        ++mPosition;
        isToken = PR_FALSE;
        break;
      case FORWARD_SLASH :
        if (++mPosition < end && *mPosition == FORWARD_SLASH) {
          ++mPosition;
          newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
        }
        else {
          newToken = new Token(mPosition - 1, Token::PARENT_OP);
        }
        break;
      case BANG : // can only be !=
        if (++mPosition < end && *mPosition == EQUAL) {
          ++mPosition;
          newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
          break;
        }
        // Error ! is not not()
        return NS_ERROR_XPATH_BAD_BANG;
      case EQUAL:
        newToken = new Token(mPosition, Token::EQUAL_OP);
        ++mPosition;
        break;
      case L_ANGLE:
        if (++mPosition == end) {
          return NS_ERROR_XPATH_UNEXPECTED_END;
        }
        if (*mPosition == EQUAL) {
          ++mPosition;
          newToken = new Token(mPosition - 2, mPosition,
                               Token::LESS_OR_EQUAL_OP);
        }
        else {
          newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
        }
        break;
      case R_ANGLE:
        if (++mPosition == end) {
          return NS_ERROR_XPATH_UNEXPECTED_END;
        }
        if (*mPosition == EQUAL) {
          ++mPosition;
          newToken = new Token(mPosition - 2, mPosition,
                               Token::GREATER_OR_EQUAL_OP);
        }
        else {
          newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
        }
        break;
      case HYPHEN :
        newToken = new Token(mPosition, Token::SUBTRACTION_OP);
        ++mPosition;
        break;
      case ASTERIX:
        if (nextIsOperatorToken(prevToken)) {
          newToken = new Token(mPosition, Token::MULTIPLY_OP);
        }
        else {
          newToken = new Token(mPosition, Token::CNAME);
        }
        ++mPosition;
        break;
      case L_PAREN:
        if (prevToken->mType == Token::CNAME) {
          const nsDependentSubstring& val = prevToken->Value();
          if (val.EqualsLiteral("comment")) {
            prevToken->mType = Token::COMMENT_AND_PAREN;
          }
          else if (val.EqualsLiteral("node")) {
            prevToken->mType = Token::NODE_AND_PAREN;
          }
          else if (val.EqualsLiteral("processing-instruction")) {
            prevToken->mType = Token::PROC_INST_AND_PAREN;
          }
          else if (val.EqualsLiteral("text")) {
            prevToken->mType = Token::TEXT_AND_PAREN;
          }
          else {
            prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
          }
          isToken = PR_FALSE;
        }
        else {
          newToken = new Token(mPosition, Token::L_PAREN);
        }
        ++mPosition;
        break;
      case R_PAREN:
        newToken = new Token(mPosition, Token::R_PAREN);
        ++mPosition;
        break;
      case L_BRACKET:
        newToken = new Token(mPosition, Token::L_BRACKET);
        ++mPosition;
        break;
      case R_BRACKET:
        newToken = new Token(mPosition, Token::R_BRACKET);
        ++mPosition;
        break;
      case COMMA:
        newToken = new Token(mPosition, Token::COMMA);
        ++mPosition;
        break;
      case AT_SIGN :
        newToken = new Token(mPosition, Token::AT_SIGN);
        ++mPosition;
        break;
      case PLUS:
        newToken = new Token(mPosition, Token::ADDITION_OP);
        ++mPosition;
        break;
      case VERT_BAR:
        newToken = new Token(mPosition, Token::UNION_OP);
        ++mPosition;
        break;
      default:
        // Error, don't grok character :-(
        return NS_ERROR_XPATH_ILLEGAL_CHAR;
      }
    }
    if (isToken) {
      NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
      NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
      prevToken = newToken;
      addToken(newToken);
    }
  }

  // add a endToken to the list
  newToken = new Token(end, end, Token::END);
  if (!newToken) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  addToken(newToken);

  return NS_OK;
}
Example #3
0
int32_t
CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
                   const nsASingleFragmentString& aUTF16String)
  {
    static const uint32_t NOT_ASCII = uint32_t(~0x7F);

    const char *u8, *u8end;
    aUTF8String.BeginReading(u8);
    aUTF8String.EndReading(u8end);

    const char16_t *u16, *u16end;
    aUTF16String.BeginReading(u16);
    aUTF16String.EndReading(u16end);

    while (u8 != u8end && u16 != u16end)
      {
        // Cast away the signedness of *u8 to prevent signextension when
        // converting to uint32_t
        uint32_t c8_32 = (uint8_t)*u8;

        if (c8_32 & NOT_ASCII)
          {
            bool err;
            c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
            if (err)
              return INT32_MIN;

            uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
            // The above UTF16CharEnumerator::NextChar() calls can
            // fail, but if it does for anything other than no data to
            // look at (which can't happen here), it returns the
            // Unicode replacement character 0xFFFD for the invalid
            // data they were fed. Ignore that error and treat invalid
            // UTF16 as 0xFFFD.
            //
            // This matches what our UTF16 to UTF8 conversion code
            // does, and thus a UTF8 string that came from an invalid
            // UTF16 string will compare equal to the invalid UTF16
            // string it came from. Same is true for any other UTF16
            // string differs only in the invalid part of the string.
            
            if (c8_32 != c16_32)
              return c8_32 < c16_32 ? -1 : 1;
          }
        else
          {
            if (c8_32 != *u16)
              return c8_32 > *u16 ? 1 : -1;

            ++u8;
            ++u16;
          }
      }

    if (u8 != u8end)
      {
        // We get to the end of the UTF16 string, but no to the end of
        // the UTF8 string. The UTF8 string is longer than the UTF16
        // string

        return 1;
      }

    if (u16 != u16end)
      {
        // We get to the end of the UTF8 string, but no to the end of
        // the UTF16 string. The UTF16 string is longer than the UTF8
        // string

        return -1;
      }

    // The two strings match.

    return 0;
  }