Example #1
0
void KURLPrivate::setASCII(const CString& string)
{
    m_utf8 = string;
    m_utf8IsASCII = true;
    m_stringIsValid = false;
    initProtocolIsInHTTPFamily();
    initInnerURL();
}
Example #2
0
KURL::KURL(const AtomicString& canonicalString, const url::Parsed& parsed, bool isValid)
    : m_isValid(isValid)
    , m_protocolIsInHTTPFamily(false)
    , m_parsed(parsed)
    , m_string(canonicalString)
{
    initProtocolIsInHTTPFamily();
    initInnerURL();
}
Example #3
0
void KURL::init(const KURL& base,
                const String& relative,
                const WTF::TextEncoding* queryEncoding) {
  // As a performance optimization, we do not use the charset converter
  // if encoding is UTF-8 or other Unicode encodings. Note that this is
  // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more
  // efficient with no charset converter object because it can do UTF-8
  // internally with no extra copies.

  StringUTF8Adaptor baseUTF8(base.getString());

  // We feel free to make the charset converter object every time since it's
  // just a wrapper around a reference.
  KURLCharsetConverter charsetConverterObject(queryEncoding);
  KURLCharsetConverter* charsetConverter =
      (!queryEncoding || isUnicodeEncoding(queryEncoding))
          ? 0
          : &charsetConverterObject;

  // Clamp to int max to avoid overflow.
  url::RawCanonOutputT<char> output;
  if (!relative.isNull() && relative.is8Bit()) {
    StringUTF8Adaptor relativeUTF8(relative);
    m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(),
                                     base.m_parsed, relativeUTF8.data(),
                                     clampTo<int>(relativeUTF8.length()),
                                     charsetConverter, &output, &m_parsed);
  } else {
    m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(),
                                     base.m_parsed, relative.characters16(),
                                     clampTo<int>(relative.length()),
                                     charsetConverter, &output, &m_parsed);
  }

  // AtomicString::fromUTF8 will re-hash the raw output and check the
  // AtomicStringTable (addWithTranslator) for the string. This can be very
  // expensive for large URLs. However, since many URLs are generated from
  // existing AtomicStrings (which already have their hashes computed), this
  // fast path is used if the input string is already canonicalized.
  //
  // Because this optimization does not apply to non-AtomicStrings, explicitly
  // check that the input is Atomic before moving forward with it. If we mark
  // non-Atomic input as Atomic here, we will render the (const) input string
  // thread unsafe.
  if (!relative.isNull() && relative.impl()->isAtomic() &&
      StringView(output.data(), static_cast<unsigned>(output.length())) ==
          relative) {
    m_string = relative;
  } else {
    m_string = AtomicString::fromUTF8(output.data(), output.length());
  }

  initProtocolIsInHTTPFamily();
  initInnerURL();
  DCHECK_EQ(protocol(), protocol().lower());
}
Example #4
0
void KURL::init(const KURL& base, const String& relative, const WTF::TextEncoding* queryEncoding)
{
    if (!relative.isNull() && relative.is8Bit()) {
        StringUTF8Adaptor relativeUTF8(relative);
        init(base, relativeUTF8.data(), relativeUTF8.length(), queryEncoding);
    } else
        init(base, relative.characters16(), relative.length(), queryEncoding);
    initProtocolIsInHTTPFamily();
    initInnerURL();
}
Example #5
0
// Setters for the data. Using the ASCII version when you know the
// data is ASCII will be slightly more efficient. The UTF-8 version
// will always be correct if the caller is unsure.
void KURLPrivate::setUTF8(const CString& string)
{
    const char* data = string.data();
    unsigned dataLength = string.length();

    // The m_utf8IsASCII must always be correct since the DeprecatedString
    // getter must create it with the proper constructor. This test can be
    // removed when DeprecatedString is gone, but it still might be a
    // performance win.
    m_utf8IsASCII = true;
    for (unsigned i = 0; i < dataLength; i++) {
        if (static_cast<unsigned char>(data[i]) >= 0x80) {
            m_utf8IsASCII = false;
            break;
        }
    }

    m_utf8 = string;
    m_stringIsValid = false;
    initProtocolIsInHTTPFamily();
    initInnerURL();
}