void nsHTMLContentSerializer::AppendToStringWrapped(const nsASingleFragmentString& aStr, nsAString& aOutputStr, PRBool aTranslateEntities) { nsASingleFragmentString::const_char_iterator pos, end, sequenceStart; aStr.BeginReading(pos); aStr.EndReading(end); // if the current line already has text on it, such as a tag, // leading whitespace is significant PRBool mayIgnoreStartOfLineWhitespaceSequence = !mColPos; while (pos < end) { sequenceStart = pos; // if beginning of a whitespace sequence if (*pos == ' ' || *pos == '\n' || *pos == '\t') { AppendWrapped_WhitespaceSequence(pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence, aOutputStr); } else { // any other non-whitespace char AppendWrapped_NonWhitespaceSequence(pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence, aOutputStr); } } }
/** * Parses the given string into a sequence of Tokens */ nsresult txExprLexer::parse(const nsASingleFragmentString& aPattern) { iterator start, end; start = aPattern.BeginReading(mPosition); aPattern.EndReading(end); //-- initialize previous token, this will automatically get //-- deleted when it goes out of scope Token nullToken(nsnull, nsnull, Token::NULL_TOKEN); Token::Type defType; Token* newToken = nsnull; Token* prevToken = &nullToken; PRBool isToken; while (mPosition < end) { defType = Token::CNAME; isToken = PR_TRUE; if (*mPosition == DOLLAR_SIGN) { if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) { return NS_ERROR_XPATH_INVALID_VAR_NAME; } defType = Token::VAR_REFERENCE; } // just reuse the QName parsing, which will use defType // the token to construct if (XMLUtils::isLetter(*mPosition)) { // NCName, can get QName or OperatorName; // FunctionName, NodeName, and AxisSpecifier may want whitespace, // and are dealt with below start = mPosition; while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { /* just go */ } if (mPosition < end && *mPosition == COLON) { // try QName or wildcard, might need to step back for axis if (++mPosition == end) { return NS_ERROR_XPATH_UNEXPECTED_END; } if (XMLUtils::isLetter(*mPosition)) { while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { /* just go */ } } else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) { // eat wildcard for NameTest, bail for var ref at COLON ++mPosition; } else { --mPosition; // step back } } if (nextIsOperatorToken(prevToken)) { nsDependentSubstring op(Substring(start, mPosition)); if (txXPathAtoms::_and->Equals(op)) { defType = Token::AND_OP; } else if (txXPathAtoms::_or->Equals(op)) { defType = Token::OR_OP; } else if (txXPathAtoms::mod->Equals(op)) { defType = Token::MODULUS_OP; } else if (txXPathAtoms::div->Equals(op)) { defType = Token::DIVIDE_OP; } else { // XXX QUESTION: spec is not too precise // badops is sure an error, but is bad:ops, too? We say yes! return NS_ERROR_XPATH_OPERATOR_EXPECTED; } } newToken = new Token(start, mPosition, defType); } else if (isXPathDigit(*mPosition)) { start = mPosition; while (++mPosition < end && isXPathDigit(*mPosition)) { /* just go */ } if (mPosition < end && *mPosition == '.') { while (++mPosition < end && isXPathDigit(*mPosition)) { /* just go */ } } newToken = new Token(start, mPosition, Token::NUMBER); } else { switch (*mPosition) { //-- ignore whitespace case SPACE: case TX_TAB: case TX_CR: case TX_LF: ++mPosition; isToken = PR_FALSE; break; case S_QUOTE : case D_QUOTE : start = mPosition; while (++mPosition < end && *mPosition != *start) { // eat literal } if (mPosition == end) { mPosition = start; return NS_ERROR_XPATH_UNCLOSED_LITERAL; } newToken = new Token(start + 1, mPosition, Token::LITERAL); ++mPosition; break; case PERIOD: // period can be .., .(DIGITS)+ or ., check next if (++mPosition == end) { newToken = new Token(mPosition - 1, Token::SELF_NODE); } else if (isXPathDigit(*mPosition)) { start = mPosition - 1; while (++mPosition < end && isXPathDigit(*mPosition)) { /* just go */ } newToken = new Token(start, mPosition, Token::NUMBER); } else if (*mPosition == PERIOD) { ++mPosition; newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE); } else { newToken = new Token(mPosition - 1, Token::SELF_NODE); } break; case COLON: // QNames are dealt above, must be axis ident if (++mPosition >= end || *mPosition != COLON || prevToken->mType != Token::CNAME) { return NS_ERROR_XPATH_BAD_COLON; } prevToken->mType = Token::AXIS_IDENTIFIER; ++mPosition; isToken = PR_FALSE; break; case FORWARD_SLASH : if (++mPosition < end && *mPosition == FORWARD_SLASH) { ++mPosition; newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP); } else { newToken = new Token(mPosition - 1, Token::PARENT_OP); } break; case BANG : // can only be != if (++mPosition < end && *mPosition == EQUAL) { ++mPosition; newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP); break; } // Error ! is not not() return NS_ERROR_XPATH_BAD_BANG; case EQUAL: newToken = new Token(mPosition, Token::EQUAL_OP); ++mPosition; break; case L_ANGLE: if (++mPosition == end) { return NS_ERROR_XPATH_UNEXPECTED_END; } if (*mPosition == EQUAL) { ++mPosition; newToken = new Token(mPosition - 2, mPosition, Token::LESS_OR_EQUAL_OP); } else { newToken = new Token(mPosition - 1, Token::LESS_THAN_OP); } break; case R_ANGLE: if (++mPosition == end) { return NS_ERROR_XPATH_UNEXPECTED_END; } if (*mPosition == EQUAL) { ++mPosition; newToken = new Token(mPosition - 2, mPosition, Token::GREATER_OR_EQUAL_OP); } else { newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP); } break; case HYPHEN : newToken = new Token(mPosition, Token::SUBTRACTION_OP); ++mPosition; break; case ASTERIX: if (nextIsOperatorToken(prevToken)) { newToken = new Token(mPosition, Token::MULTIPLY_OP); } else { newToken = new Token(mPosition, Token::CNAME); } ++mPosition; break; case L_PAREN: if (prevToken->mType == Token::CNAME) { const nsDependentSubstring& val = prevToken->Value(); if (val.EqualsLiteral("comment")) { prevToken->mType = Token::COMMENT_AND_PAREN; } else if (val.EqualsLiteral("node")) { prevToken->mType = Token::NODE_AND_PAREN; } else if (val.EqualsLiteral("processing-instruction")) { prevToken->mType = Token::PROC_INST_AND_PAREN; } else if (val.EqualsLiteral("text")) { prevToken->mType = Token::TEXT_AND_PAREN; } else { prevToken->mType = Token::FUNCTION_NAME_AND_PAREN; } isToken = PR_FALSE; } else { newToken = new Token(mPosition, Token::L_PAREN); } ++mPosition; break; case R_PAREN: newToken = new Token(mPosition, Token::R_PAREN); ++mPosition; break; case L_BRACKET: newToken = new Token(mPosition, Token::L_BRACKET); ++mPosition; break; case R_BRACKET: newToken = new Token(mPosition, Token::R_BRACKET); ++mPosition; break; case COMMA: newToken = new Token(mPosition, Token::COMMA); ++mPosition; break; case AT_SIGN : newToken = new Token(mPosition, Token::AT_SIGN); ++mPosition; break; case PLUS: newToken = new Token(mPosition, Token::ADDITION_OP); ++mPosition; break; case VERT_BAR: newToken = new Token(mPosition, Token::UNION_OP); ++mPosition; break; default: // Error, don't grok character :-( return NS_ERROR_XPATH_ILLEGAL_CHAR; } } if (isToken) { NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE); prevToken = newToken; addToken(newToken); } } // add a endToken to the list newToken = new Token(end, end, Token::END); if (!newToken) { return NS_ERROR_OUT_OF_MEMORY; } addToken(newToken); return NS_OK; }
int32_t CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, const nsASingleFragmentString& aUTF16String) { static const uint32_t NOT_ASCII = uint32_t(~0x7F); const char *u8, *u8end; aUTF8String.BeginReading(u8); aUTF8String.EndReading(u8end); const char16_t *u16, *u16end; aUTF16String.BeginReading(u16); aUTF16String.EndReading(u16end); while (u8 != u8end && u16 != u16end) { // Cast away the signedness of *u8 to prevent signextension when // converting to uint32_t uint32_t c8_32 = (uint8_t)*u8; if (c8_32 & NOT_ASCII) { bool err; c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); if (err) return INT32_MIN; uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); // The above UTF16CharEnumerator::NextChar() calls can // fail, but if it does for anything other than no data to // look at (which can't happen here), it returns the // Unicode replacement character 0xFFFD for the invalid // data they were fed. Ignore that error and treat invalid // UTF16 as 0xFFFD. // // This matches what our UTF16 to UTF8 conversion code // does, and thus a UTF8 string that came from an invalid // UTF16 string will compare equal to the invalid UTF16 // string it came from. Same is true for any other UTF16 // string differs only in the invalid part of the string. if (c8_32 != c16_32) return c8_32 < c16_32 ? -1 : 1; } else { if (c8_32 != *u16) return c8_32 > *u16 ? 1 : -1; ++u8; ++u16; } } if (u8 != u8end) { // We get to the end of the UTF16 string, but no to the end of // the UTF8 string. The UTF8 string is longer than the UTF16 // string return 1; } if (u16 != u16end) { // We get to the end of the UTF8 string, but no to the end of // the UTF16 string. The UTF16 string is longer than the UTF8 // string return -1; } // The two strings match. return 0; }