void TxtReaderCoreUtf16::readDocument(ZLInputStream &stream) { const size_t BUFSIZE = 2048; char *buffer = new char[BUFSIZE]; std::string str; size_t length; do { length = stream.read(buffer, BUFSIZE); char *start = buffer; const char *end = buffer + length; for (char *ptr = start; ptr < end; ptr += 2) { const char chr = getAscii(ptr); if (chr == '\n' || chr == '\r') { bool skipNewLine = false; if (chr == '\r' && ptr + 2 != end && getAscii(ptr + 2) == '\n') { skipNewLine = true; setAscii(ptr, '\n'); } if (start != ptr) { str.erase(); myReader.myConverter->convert(str, start, ptr + 2); myReader.characterDataHandler(str); } if (skipNewLine) { ptr += 2; } start = ptr + 2; myReader.newLineHandler(); } else if (chr != 0 && isspace(chr)) { if (chr != '\t') { setAscii(ptr, ' '); } } } if (start != end) { str.erase(); myReader.myConverter->convert(str, start, end); myReader.characterDataHandler(str); } } while (length == BUFSIZE); delete[] buffer; }
void KURLGooglePrivate::replaceComponents(const Replacements& replacements) { url_canon::RawCanonOutputT<char> output; url_parse::Parsed newParsed; m_isValid = url_util::ReplaceComponents(utf8String().data(), utf8String().length(), m_parsed, replacements, 0, &output, &newParsed); m_parsed = newParsed; if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); }
// Note: code mostly duplicated below. void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength, const TextEncoding* queryEncoding) { // As a performance optimization, we do not use the charset converter if // encoding is UTF-8 or other Unicode encodings. Note that this is // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be // more efficient with no charset converter object because it // can do UTF-8 internally with no extra copies. // We feel free to make the charset converter object every time since it's // just a wrapper around a reference. KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; url_canon::RawCanonOutputT<char> output; const CString& baseStr = base.m_url.utf8String(); m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), base.m_url.m_parsed, rel, relLength, charsetConverter, &output, &m_parsed); // See FIXME in KURLGooglePrivate in the header. If canonicalization has not // changed the string, we can avoid an extra allocation by using assignment. // // When KURL encounters an error such that the URL is invalid and empty // (for example, resolving a relative URL on a non-hierarchical base), it // will produce an isNull URL, and calling setUtf8 will produce an empty // non-null URL. This is unlikely to affect anything, but we preserve this // just in case. if (m_isValid || output.length()) { // Without ref, the whole url is guaranteed to be ASCII-only. if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); } else { // WebCore expects resolved URLs to be empty rather than NULL. setUtf8(CString("", 0)); } }
// Note: code mostly duplicated above. See FIXMEs and comments there. void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength, const TextEncoding* queryEncoding) { KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; url_canon::RawCanonOutputT<char> output; const CString& baseStr = base.m_url.utf8String(); m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), base.m_url.m_parsed, rel, relLength, charsetConverter, &output, &m_parsed); if (m_isValid || output.length()) { if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); } else setUtf8(CString("", 0)); }