void KURLPrivate::setASCII(const CString& string) { m_utf8 = string; m_utf8IsASCII = true; m_stringIsValid = false; initProtocolIsInHTTPFamily(); initInnerURL(); }
KURL::KURL(const AtomicString& canonicalString, const url::Parsed& parsed, bool isValid) : m_isValid(isValid) , m_protocolIsInHTTPFamily(false) , m_parsed(parsed) , m_string(canonicalString) { initProtocolIsInHTTPFamily(); initInnerURL(); }
void KURL::init(const KURL& base, const String& relative, const WTF::TextEncoding* queryEncoding) { // As a performance optimization, we do not use the charset converter // if encoding is UTF-8 or other Unicode encodings. Note that this is // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more // efficient with no charset converter object because it can do UTF-8 // internally with no extra copies. StringUTF8Adaptor baseUTF8(base.getString()); // We feel free to make the charset converter object every time since it's // just a wrapper around a reference. KURLCharsetConverter charsetConverterObject(queryEncoding); KURLCharsetConverter* charsetConverter = (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 : &charsetConverterObject; // Clamp to int max to avoid overflow. url::RawCanonOutputT<char> output; if (!relative.isNull() && relative.is8Bit()) { StringUTF8Adaptor relativeUTF8(relative); m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(), base.m_parsed, relativeUTF8.data(), clampTo<int>(relativeUTF8.length()), charsetConverter, &output, &m_parsed); } else { m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(), base.m_parsed, relative.characters16(), clampTo<int>(relative.length()), charsetConverter, &output, &m_parsed); } // AtomicString::fromUTF8 will re-hash the raw output and check the // AtomicStringTable (addWithTranslator) for the string. This can be very // expensive for large URLs. However, since many URLs are generated from // existing AtomicStrings (which already have their hashes computed), this // fast path is used if the input string is already canonicalized. // // Because this optimization does not apply to non-AtomicStrings, explicitly // check that the input is Atomic before moving forward with it. If we mark // non-Atomic input as Atomic here, we will render the (const) input string // thread unsafe. if (!relative.isNull() && relative.impl()->isAtomic() && StringView(output.data(), static_cast<unsigned>(output.length())) == relative) { m_string = relative; } else { m_string = AtomicString::fromUTF8(output.data(), output.length()); } initProtocolIsInHTTPFamily(); initInnerURL(); DCHECK_EQ(protocol(), protocol().lower()); }
void KURL::init(const KURL& base, const String& relative, const WTF::TextEncoding* queryEncoding) { if (!relative.isNull() && relative.is8Bit()) { StringUTF8Adaptor relativeUTF8(relative); init(base, relativeUTF8.data(), relativeUTF8.length(), queryEncoding); } else init(base, relative.characters16(), relative.length(), queryEncoding); initProtocolIsInHTTPFamily(); initInnerURL(); }
// Setters for the data. Using the ASCII version when you know the // data is ASCII will be slightly more efficient. The UTF-8 version // will always be correct if the caller is unsure. void KURLPrivate::setUTF8(const CString& string) { const char* data = string.data(); unsigned dataLength = string.length(); // The m_utf8IsASCII must always be correct since the DeprecatedString // getter must create it with the proper constructor. This test can be // removed when DeprecatedString is gone, but it still might be a // performance win. m_utf8IsASCII = true; for (unsigned i = 0; i < dataLength; i++) { if (static_cast<unsigned char>(data[i]) >= 0x80) { m_utf8IsASCII = false; break; } } m_utf8 = string; m_stringIsValid = false; initProtocolIsInHTTPFamily(); initInnerURL(); }