Пример #1
0
CString String::utf8() const
{
    return UTF8Encoding().encode(characters(), length(), QuestionMarksForUnencodables);
}
Пример #2
0
String String::fromUTF8(const char* string, size_t size)
{
    return UTF8Encoding().decode(string, size);
}
Пример #3
0
const TextEncoding& TextEncoding::closestByteBasedEquivalent() const
{
    if (isNonByteBasedEncoding())
        return UTF8Encoding();
    return *this;
}
Пример #4
0
// HTML5 specifies that UTF-8 be used in form submission when a form is
// is a part of a document in UTF-16 probably because UTF-16 is not a
// byte-based encoding and can contain 0x00. By extension, the same
// should be done for UTF-32. In case of UTF-7, it is a byte-based encoding,
// but it's fraught with problems and we'd rather steer clear of it.
const TextEncoding& TextEncoding::encodingForFormSubmission() const
{
    if (isNonByteBasedEncoding() || isUTF7Encoding())
        return UTF8Encoding();
    return *this;
}
Пример #5
0
String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
{
    ASSERT(!m_checkedForBOM);

    // Check to see if we found a BOM.
    size_t numBufferedBytes = m_numBufferedBytes;
    size_t buf1Len = numBufferedBytes;
    size_t buf2Len = length;
    const unsigned char* buf1 = m_bufferedBytes;
    const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;

    const TextEncoding* encodingConsideringBOM = &m_encoding;
    bool foundBOM = true;
    size_t lengthOfBOM = 0;
    if (c1 == 0xFF && c2 == 0xFE) {
        if (c3 != 0 || c4 != 0)  {
            encodingConsideringBOM = &UTF16LittleEndianEncoding();
            lengthOfBOM = 2;
        } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) {
            encodingConsideringBOM = &UTF32LittleEndianEncoding();
            lengthOfBOM = 4;
        } else
            foundBOM = false;
    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
        encodingConsideringBOM = &UTF8Encoding();
        lengthOfBOM = 3;
    } else if (c1 == 0xFE && c2 == 0xFF) {
        encodingConsideringBOM = &UTF16BigEndianEncoding();
        lengthOfBOM = 2;
    } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
        encodingConsideringBOM = &UTF32BigEndianEncoding();
        lengthOfBOM = 4;
    } else
        foundBOM = false;

    if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
        // Continue to look for the BOM.
        memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
        m_numBufferedBytes += length;
        return "";
    }

    // Done checking for BOM.
    m_codec.set(newTextCodec(*encodingConsideringBOM).release());
    if (!m_codec)
        return String();
    m_checkedForBOM = true;

    // Skip the BOM.
    if (foundBOM) {
        ASSERT(numBufferedBytes < lengthOfBOM);
        size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes;
        ASSERT(numUnbufferedBOMBytes <= length);

        data += numUnbufferedBOMBytes;
        length -= numUnbufferedBOMBytes;
        numBufferedBytes = 0;
        m_numBufferedBytes = 0;
    }

    // Handle case where we have some buffered bytes to deal with.
    if (numBufferedBytes) {
        char bufferedBytes[sizeof(m_bufferedBytes)];
        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
        m_numBufferedBytes = 0;

        String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError);
        if (stopOnError && sawError)
            return bufferedResult;
        return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError);
    }

    return m_codec->decode(data, length, flush, stopOnError, sawError);
}
Пример #6
0
KURL::KURL(const QUrl& url)
{
    *this = KURL(KURL(), url.toEncoded().constData(), UTF8Encoding());
}
Пример #7
0
void FileReaderLoader::convertToText()
{
    m_isRawDataConverted = true;

    if (!m_bytesLoaded) {
        m_stringResult = "";
        return;
    }

    // Decode the data.
    // The File API spec says that we should use the supplied encoding if it is valid. However, we choose to ignore this
    // requirement in order to be consistent with how WebKit decodes the web content: always has the BOM override the
    // provided encoding.
    // FIXME: consider supporting incremental decoding to improve the perf.
    StringBuilder builder;
    if (!m_decoder)
        m_decoder = TextResourceDecoder::create("text/plain", m_encoding.isValid() ? m_encoding : UTF8Encoding());
    builder.append(m_decoder->decode(static_cast<const char*>(m_rawData->data()), m_rawData->byteLength()));

    if (m_finishedLoading)
        builder.append(m_decoder->flush());

    m_stringResult = builder.toString();
}
bool WebFrameSerializerImpl::serialize()
{
    bool didSerialization = false;

    Document* document = m_specifiedWebLocalFrameImpl->frame()->document();
    const KURL& url = document->url();

    if (url.isValid()) {
        didSerialization = true;

        const WTF::TextEncoding& textEncoding = document->encoding().isValid() ? document->encoding() : UTF8Encoding();
        if (textEncoding.isNonByteBasedEncoding()) {
            const UChar byteOrderMark = 0xFEFF;
            m_dataBuffer.append(byteOrderMark);
        }

        SerializeDomParam param(url, textEncoding, document);

        Element* documentElement = document->documentElement();
        if (documentElement)
            buildContentForNode(documentElement, &param);

        encodeAndFlushBuffer(WebFrameSerializerClient::CurrentFrameIsFinished, &param, ForceFlush);
    } else {
        // Report empty contents for invalid URLs.
        m_client->didSerializeDataForFrame(
            WebCString(), WebFrameSerializerClient::CurrentFrameIsFinished);
    }

    ASSERT(m_dataBuffer.isEmpty());
    return didSerialization;
}
Пример #9
0
void XMLHttpRequest::send(const String& body, ExceptionCode& ec)
{
    if (!m_doc)
        return;

    if (m_state != Open) {
        ec = INVALID_STATE_ERR;
        return;
    }
  
    // FIXME: Should this abort or raise an exception instead if we already have a m_loader going?
    if (m_loader)
        return;

    m_aborted = false;

    ResourceRequest request(m_url);
    request.setHTTPMethod(m_method);
    
    if (!body.isNull() && m_method != "GET" && m_method != "HEAD" && (m_url.protocol().lower() == "http" || m_url.protocol().lower() == "https")) {
        String contentType = getRequestHeader("Content-Type");
        if (contentType.isEmpty()) {
            ExceptionCode ec = 0;
            setRequestHeader("Content-Type", "application/xml", ec);
            ASSERT(ec == 0);
        }

        // FIXME: must use xmlEncoding for documents.
        String charset = "UTF-8";
      
        TextEncoding m_encoding(charset);
        if (!m_encoding.isValid()) // FIXME: report an error?
            m_encoding = UTF8Encoding();

        request.setHTTPBody(PassRefPtr<FormData>(new FormData(m_encoding.encode(body.characters(), body.length()))));
    }

    if (m_requestHeaders.size() > 0)
        request.addHTTPHeaderFields(m_requestHeaders);

    if (!m_async) {
        Vector<char> data;
        ResourceError error;
        ResourceResponse response;

        {
            // avoid deadlock in case the loader wants to use JS on a background thread
#ifdef __OWB_JS__
            KJS::JSLock::DropAllLocks dropLocks;
            if (m_doc->frame()) 
                m_doc->frame()->loader()->loadResourceSynchronously(request, error, response, data);
#endif
        }

        m_loader = 0;
        if (error.isNull())
            processSyncLoadResults(data, response);
        else
            ec = NETWORK_ERR;
    
        return;
    }

    // Neither this object nor the JavaScript wrapper should be deleted while
    // a request is in progress because we need to keep the listeners alive,
    // and they are referenced by the JavaScript wrapper.
    ref();
    {
#ifdef __OWB_JS__
        KJS::JSLock lock;
        gcProtectNullTolerant(KJS::ScriptInterpreter::getDOMObject(this));
#endif
    }
  
    // create can return null here, for example if we're no longer attached to a page.
    // this is true while running onunload handlers
    // FIXME: Maybe create can return false for other reasons too?
    m_loader = SubresourceLoader::create(m_doc->frame(), this, request);
}
Пример #10
0
void FileReader::convertToText()
{
    if (!m_rawData.size()) {
        m_builder.clear();
        return;
    }

    // Decode the data.
    // The File API spec says that we should use the supplied encoding if it is valid. However, we choose to ignore this
    // requirement in order to be consistent with how WebKit decodes the web content: always has the BOM override the
    // provided encoding.     
    // FIXME: consider supporting incremental decoding to improve the perf.
    if (!m_decoder)
        m_decoder = TextResourceDecoder::create("text/plain", m_encoding.isValid() ? m_encoding : UTF8Encoding());
    m_builder.clear();
    m_builder.append(m_decoder->decode(&m_rawData.at(0), m_rawData.size()));

    if (m_state == Completed && !m_error)
        m_builder.append(m_decoder->flush());
}
String WebPageSerializerImpl::preActionBeforeSerializeOpenTag(
    const Element* element, SerializeDomParam* param, bool* needSkip)
{
    StringBuilder result;

    *needSkip = false;
    if (param->isHTMLDocument) {
        // Skip the open tag of original META tag which declare charset since we
        // have overrided the META which have correct charset declaration after
        // serializing open tag of HEAD element.
        if (element->hasTagName(HTMLNames::metaTag)) {
            const HTMLMetaElement* meta = static_cast<const HTMLMetaElement*>(element);
            // Check whether the META tag has declared charset or not.
            String equiv = meta->httpEquiv();
            if (equalIgnoringCase(equiv, "content-type")) {
                String content = meta->content();
                if (content.length() && content.contains("charset", false)) {
                    // Find META tag declared charset, we need to skip it when
                    // serializing DOM.
                    param->skipMetaElement = element;
                    *needSkip = true;
                }
            }
        } else if (element->hasTagName(HTMLNames::htmlTag)) {
            // Check something before processing the open tag of HEAD element.
            // First we add doc type declaration if original document has it.
            if (!param->haveSeenDocType) {
                param->haveSeenDocType = true;
                result.append(createMarkup(param->document->doctype()));
            }

            // Add MOTW declaration before html tag.
            // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
            result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->url));
        } else if (element->hasTagName(HTMLNames::baseTag)) {
            // Comment the BASE tag when serializing dom.
            result.append("<!--");
        }
    } else {
        // Write XML declaration.
        if (!param->haveAddedXMLProcessingDirective) {
            param->haveAddedXMLProcessingDirective = true;
            // Get encoding info.
            String xmlEncoding = param->document->xmlEncoding();
            if (xmlEncoding.isEmpty())
                xmlEncoding = param->document->loader()->writer()->encoding();
            if (xmlEncoding.isEmpty())
                xmlEncoding = UTF8Encoding().name();
            result.append("<?xml version=\"");
            result.append(param->document->xmlVersion());
            result.append("\" encoding=\"");
            result.append(xmlEncoding);
            if (param->document->xmlStandalone())
                result.append("\" standalone=\"yes");
            result.append("\"?>\n");
        }
        // Add doc type declaration if original document has it.
        if (!param->haveSeenDocType) {
            param->haveSeenDocType = true;
            result.append(createMarkup(param->document->doctype()));
        }
    }
    return result.toString();
}
Пример #12
0
static bool isUnicodeEncoding(const WTF::TextEncoding* encoding)
{
    return encoding->encodingForFormSubmission() == UTF8Encoding();
}
Пример #13
0
String decodeURLEscapeSequences(const String& string)
{
    return decodeURLEscapeSequences(string, UTF8Encoding());
}
Пример #14
0
String String::fromUTF8(const char* string)
{
    return UTF8Encoding().decode(string, strlen(string));
}
Пример #15
0
CString String::utf8() const
{
    return UTF8Encoding().encode(characters(), length());
}