void KURL::init(const KURL& base, const CHAR* relative, int relativeLength, const WTF::TextEncoding* queryEncoding) { // As a performance optimization, we do not use the charset converter // if encoding is UTF-8 or other Unicode encodings. Note that this is // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more // efficient with no charset converter object because it can do UTF-8 // internally with no extra copies. // We feel free to make the charset converter object every time since it's // just a wrapper around a reference. KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; StringUTF8Adaptor baseUTF8(base.getString()); url::RawCanonOutputT<char> output; m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(), base.m_parsed, relative, relativeLength, charsetConverter, &output, &m_parsed); // See FIXME in KURLPrivate in the header. If canonicalization has not // changed the string, we can avoid an extra allocation by using assignment. m_string = AtomicString::fromUTF8(output.data(), output.length()); }
void KURL::init(const KURL& base, const String& relative, const WTF::TextEncoding* queryEncoding) { // As a performance optimization, we do not use the charset converter // if encoding is UTF-8 or other Unicode encodings. Note that this is // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more // efficient with no charset converter object because it can do UTF-8 // internally with no extra copies. StringUTF8Adaptor baseUTF8(base.getString()); // We feel free to make the charset converter object every time since it's // just a wrapper around a reference. KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; // Clamp to int max to avoid overflow. url::RawCanonOutputT<char> output; if (!relative.isNull() && relative.is8Bit()) { StringUTF8Adaptor relativeUTF8(relative); m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(), base.m_parsed, relativeUTF8.data(), clampTo<int>(relativeUTF8.length()), charsetConverter, &output, &m_parsed); } else { m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(), base.m_parsed, relative.characters16(), clampTo<int>(relative.length()), charsetConverter, &output, &m_parsed); } // AtomicString::fromUTF8 will re-hash the raw output and check the // AtomicStringTable (addWithTranslator) for the string. This can be very // expensive for large URLs. However, since many URLs are generated from // existing AtomicStrings (which already have their hashes computed), this // fast path is used if the input string is already canonicalized. // // Because this optimization does not apply to non-AtomicStrings, explicitly // check that the input is Atomic before moving forward with it. If we mark // non-Atomic input as Atomic here, we will render the (const) input string // thread unsafe. if (!relative.isNull() && relative.impl()->isAtomic() && StringView(output.data(), static_cast<unsigned>(output.length())) == relative) { m_string = relative; } else { m_string = AtomicString::fromUTF8(output.data(), output.length()); } initProtocolIsInHTTPFamily(); initInnerURL(); DCHECK_EQ(protocol(), protocol().lower()); }
// Note: code mostly duplicated below. void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength, const TextEncoding* queryEncoding) { // As a performance optimization, we do not use the charset converter if // encoding is UTF-8 or other Unicode encodings. Note that this is // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be // more efficient with no charset converter object because it // can do UTF-8 internally with no extra copies. // We feel free to make the charset converter object every time since it's // just a wrapper around a reference. KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; url_canon::RawCanonOutputT<char> output; const CString& baseStr = base.m_url.utf8String(); m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), base.m_url.m_parsed, rel, relLength, charsetConverter, &output, &m_parsed); // See FIXME in KURLGooglePrivate in the header. If canonicalization has not // changed the string, we can avoid an extra allocation by using assignment. // // When KURL encounters an error such that the URL is invalid and empty // (for example, resolving a relative URL on a non-hierarchical base), it // will produce an isNull URL, and calling setUtf8 will produce an empty // non-null URL. This is unlikely to affect anything, but we preserve this // just in case. if (m_isValid || output.length()) { // Without ref, the whole url is guaranteed to be ASCII-only. if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); } else { // WebCore expects resolved URLs to be empty rather than NULL. setUtf8(CString("", 0)); } }
// Note: code mostly duplicated above. See FIXMEs and comments there. void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength, const TextEncoding* queryEncoding) { KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; url_canon::RawCanonOutputT<char> output; const CString& baseStr = base.m_url.utf8String(); m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(), base.m_url.m_parsed, rel, relLength, charsetConverter, &output, &m_parsed); if (m_isValid || output.length()) { if (m_parsed.ref.is_nonempty()) setUtf8(CString(output.data(), output.length())); else setAscii(CString(output.data(), output.length())); } else setUtf8(CString("", 0)); }