Beispiel #1
0
void KURL::init(const KURL& base,
                const CHAR* relative,
                int relativeLength,
                const WTF::TextEncoding* queryEncoding) {
  // As a performance optimization, we do not use the charset converter
  // if encoding is UTF-8 or other Unicode encodings. Note that this is
  // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more
  // efficient with no charset converter object because it can do UTF-8
  // internally with no extra copies.

  // We feel free to make the charset converter object every time since it's
  // just a wrapper around a reference.
  KURLCharsetConverter charsetConverterObject(queryEncoding);
  KURLCharsetConverter* charsetConverter =
      (!queryEncoding || isUnicodeEncoding(queryEncoding))
          ? 0
          : &charsetConverterObject;

  StringUTF8Adaptor baseUTF8(base.getString());

  url::RawCanonOutputT<char> output;
  m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(),
                                   base.m_parsed, relative, relativeLength,
                                   charsetConverter, &output, &m_parsed);

  // See FIXME in KURLPrivate in the header. If canonicalization has not
  // changed the string, we can avoid an extra allocation by using assignment.
  m_string = AtomicString::fromUTF8(output.data(), output.length());
}
Beispiel #2
0
void KURL::init(const KURL& base,
                const String& relative,
                const WTF::TextEncoding* queryEncoding) {
  // As a performance optimization, we do not use the charset converter
  // if encoding is UTF-8 or other Unicode encodings. Note that this is
  // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more
  // efficient with no charset converter object because it can do UTF-8
  // internally with no extra copies.

  StringUTF8Adaptor baseUTF8(base.getString());

  // We feel free to make the charset converter object every time since it's
  // just a wrapper around a reference.
  KURLCharsetConverter charsetConverterObject(queryEncoding);
  KURLCharsetConverter* charsetConverter =
      (!queryEncoding || isUnicodeEncoding(queryEncoding))
          ? 0
          : &charsetConverterObject;

  // Clamp to int max to avoid overflow.
  url::RawCanonOutputT<char> output;
  if (!relative.isNull() && relative.is8Bit()) {
    StringUTF8Adaptor relativeUTF8(relative);
    m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(),
                                     base.m_parsed, relativeUTF8.data(),
                                     clampTo<int>(relativeUTF8.length()),
                                     charsetConverter, &output, &m_parsed);
  } else {
    m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(),
                                     base.m_parsed, relative.characters16(),
                                     clampTo<int>(relative.length()),
                                     charsetConverter, &output, &m_parsed);
  }

  // AtomicString::fromUTF8 will re-hash the raw output and check the
  // AtomicStringTable (addWithTranslator) for the string. This can be very
  // expensive for large URLs. However, since many URLs are generated from
  // existing AtomicStrings (which already have their hashes computed), this
  // fast path is used if the input string is already canonicalized.
  //
  // Because this optimization does not apply to non-AtomicStrings, explicitly
  // check that the input is Atomic before moving forward with it. If we mark
  // non-Atomic input as Atomic here, we will render the (const) input string
  // thread unsafe.
  if (!relative.isNull() && relative.impl()->isAtomic() &&
      StringView(output.data(), static_cast<unsigned>(output.length())) ==
          relative) {
    m_string = relative;
  } else {
    m_string = AtomicString::fromUTF8(output.data(), output.length());
  }

  initProtocolIsInHTTPFamily();
  initInnerURL();
  DCHECK_EQ(protocol(), protocol().lower());
}
Beispiel #3
0
// Note: code mostly duplicated below.
void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength,
                             const TextEncoding* queryEncoding)
{
    // As a performance optimization, we do not use the charset converter if
    // encoding is UTF-8 or other Unicode encodings. Note that this is
    // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be
    // more efficient with no charset converter object because it
    // can do UTF-8 internally with no extra copies.

    // We feel free to make the charset converter object every time since it's
    // just a wrapper around a reference.
    KURLCharsetConverter charsetConverterObject(queryEncoding);
    KURLCharsetConverter* charsetConverter =
        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :
        &charsetConverterObject;

    url_canon::RawCanonOutputT<char> output;
    const CString& baseStr = base.m_url.utf8String();
    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),
                                          base.m_url.m_parsed, rel, relLength,
                                          charsetConverter,
                                          &output, &m_parsed);

    // See FIXME in KURLGooglePrivate in the header. If canonicalization has not
    // changed the string, we can avoid an extra allocation by using assignment.
    //
    // When KURL encounters an error such that the URL is invalid and empty
    // (for example, resolving a relative URL on a non-hierarchical base), it
    // will produce an isNull URL, and calling setUtf8 will produce an empty
    // non-null URL. This is unlikely to affect anything, but we preserve this
    // just in case.
    if (m_isValid || output.length()) {
        // Without ref, the whole url is guaranteed to be ASCII-only.
        if (m_parsed.ref.is_nonempty())
            setUtf8(CString(output.data(), output.length()));
        else
            setAscii(CString(output.data(), output.length()));
    } else {
        // WebCore expects resolved URLs to be empty rather than NULL.
        setUtf8(CString("", 0));
    }
}
Beispiel #4
0
// Note: code mostly duplicated above. See FIXMEs and comments there.
void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength,
                             const TextEncoding* queryEncoding)
{
    KURLCharsetConverter charsetConverterObject(queryEncoding);
    KURLCharsetConverter* charsetConverter =
        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :
        &charsetConverterObject;

    url_canon::RawCanonOutputT<char> output;
    const CString& baseStr = base.m_url.utf8String();
    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),
                                          base.m_url.m_parsed, rel, relLength,
                                          charsetConverter,
                                          &output, &m_parsed);


    if (m_isValid || output.length()) {
        if (m_parsed.ref.is_nonempty())
            setUtf8(CString(output.data(), output.length()));
        else
            setAscii(CString(output.data(), output.length()));
    } else
        setUtf8(CString("", 0));
}