Exemplo n.º 1
0
PassRefPtr<Document> XSLTProcessor::createDocumentFromSource(const String& sourceString,
    const String& sourceEncoding, const String& sourceMIMEType, Node* sourceNode, Frame* frame)
{
    RefPtr<Document> ownerDocument = sourceNode->document();
    bool sourceIsDocument = (sourceNode == ownerDocument.get());
    String documentSource = sourceString;

    RefPtr<Document> result;
    if (sourceMIMEType == "text/plain") {
        result = Document::create(frame);
        transformTextStringToXHTMLDocumentString(documentSource);
    } else
        result = DOMImplementation::createDocument(sourceMIMEType, frame, false);

    // Before parsing, we need to save & detach the old document and get the new document
    // in place. We have to do this only if we're rendering the result document.
    if (frame) {
        if (FrameView* view = frame->view())
            view->clear();
        result->setTransformSourceDocument(frame->document());
        frame->setDocument(result);
    }

    if (sourceIsDocument)
        result->setURL(ownerDocument->url());
    result->open();

    RefPtr<TextResourceDecoder> decoder = TextResourceDecoder::create(sourceMIMEType);
    decoder->setEncoding(sourceEncoding.isEmpty() ? UTF8Encoding() : TextEncoding(sourceEncoding), TextResourceDecoder::EncodingFromXMLHeader);
    result->setDecoder(decoder.release());

    result->write(documentSource);
    result->finishParsing();
    result->close();

    return result.release();
}
	TextEncoding TextEncoding::ASCII()
	{
		return TextEncoding("ASCII");
	}
bool detectTextEncoding(const char* data, size_t len,
                        const char* hintEncodingName,
                        TextEncoding* detectedEncoding)
{
    *detectedEncoding = TextEncoding();
    int matchesCount = 0; 
    UErrorCode status = U_ZERO_ERROR;
    UCharsetDetector* detector = ucsdet_open(&status);
    if (U_FAILURE(status))
        return false;
    ucsdet_enableInputFilter(detector, true);
    ucsdet_setText(detector, data, static_cast<int32_t>(len), &status); 
    if (U_FAILURE(status))
        return false;

    // FIXME: A few things we can do other than improving
    // the ICU detector itself. 
    // 1. Use ucsdet_detectAll and pick the most likely one given
    // "the context" (parent-encoding, referrer encoding, etc).
    // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
    // Chinese, Japanese, Russian, Korean and Hebrew) by picking the 
    // encoding with a highest confidence among the detector-specific
    // limited set of candidate encodings.
    // Below is a partial implementation of the first part of what's outlined
    // above.
    const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status);
    if (U_FAILURE(status)) {
        ucsdet_close(detector);
        return false;
    }

    const char* encoding = 0;
    if (hintEncodingName) {
        TextEncoding hintEncoding(hintEncodingName);
        // 10 is the minimum confidence value consistent with the codepoint
        // allocation in a given encoding. The size of a chunk passed to
        // us varies even for the same html file (apparently depending on 
        // the network load). When we're given a rather short chunk, we 
        // don't have a sufficiently reliable signal other than the fact that
        // the chunk is consistent with a set of encodings. So, instead of
        // setting an arbitrary threshold, we have to scan all the encodings
        // consistent with the data.  
        const int32_t kThresold = 10;
        for (int i = 0; i < matchesCount; ++i) {
            int32_t confidence = ucsdet_getConfidence(matches[i], &status);
            if (U_FAILURE(status)) {
                status = U_ZERO_ERROR;
                continue;
            }
            if (confidence < kThresold)
                break;
            const char* matchEncoding = ucsdet_getName(matches[i], &status);
            if (U_FAILURE(status)) {
                status = U_ZERO_ERROR;
                continue;
            }
            if (TextEncoding(matchEncoding) == hintEncoding) {
                encoding = hintEncodingName;
                break;
            }
        }
    }
    // If no match is found so far, just pick the top match. 
    // This can happen, say, when a parent frame in EUC-JP refers to
    // a child frame in Shift_JIS and both frames do NOT specify the encoding
    // making us resort to auto-detection (when it IS turned on).
    if (!encoding && matchesCount > 0)
        encoding = ucsdet_getName(matches[0], &status);
    if (U_SUCCESS(status)) {
        *detectedEncoding = TextEncoding(encoding);
        ucsdet_close(detector);
        return true;
    }    
    ucsdet_close(detector);
    return false;
}
	TextEncoding TextEncoding::Application() {
		return TextEncoding(::GetACP());
	}
	TextEncoding TextEncoding::UTF32LE()
	{
		return TextEncoding(12000);
	}
	TextEncoding TextEncoding::UTF16BE()
	{
		return TextEncoding(1201);
	}
	TextEncoding TextEncoding::UTF8()
	{
		return TextEncoding(65001);
	}
	TextEncoding TextEncoding::ASCII()
	{
		return TextEncoding(20127);
	}
	TextEncoding TextEncoding::UTF32LE()
	{
		return TextEncoding("UCS-4LE");
	}
Exemplo n.º 10
0
	TextEncoding TextEncoding::UTF32BE()
	{
		return TextEncoding("UCS-4BE");
	}
Exemplo n.º 11
0
	TextEncoding TextEncoding::UTF32()
	{
		return TextEncoding("UCS-4-INTERNAL");
	}
Exemplo n.º 12
0
	TextEncoding TextEncoding::UTF16LE()
	{
		return TextEncoding("UTF-16LE");
	}
Exemplo n.º 13
0
	TextEncoding TextEncoding::UTF16()
	{
		return TextEncoding("UTF-16");
	}
Exemplo n.º 14
0
	TextEncoding TextEncoding::UTF8()
	{
		return TextEncoding("UTF-8");
	}
Exemplo n.º 15
0
	TextEncoding TextEncoding::UTF7()
	{
		return TextEncoding("UTF-7");
	}
Exemplo n.º 16
0
	TextEncoding TextEncoding::WindowsShiftJIS()
	{
		return TextEncoding(932);
	}
Exemplo n.º 17
0
	TextEncoding TextEncoding::PalmShiftJIS()
	{
		return TextEncoding(932);
	}
Exemplo n.º 18
0
	TextEncoding TextEncoding::Application() {
		return TextEncoding("");
	}
Exemplo n.º 19
0
	TextEncoding TextEncoding::UTF7()
	{
		return TextEncoding(65000);
	}
Exemplo n.º 20
0
	TextEncoding TextEncoding::System() {
		return TextEncoding("");
	}
Exemplo n.º 21
0
	TextEncoding TextEncoding::UTF16()
	{
		return TextEncoding(1200);
	}
Exemplo n.º 22
0
DeprecatedString StreamingTextDecoderICU::toUnicode(const char* chs, int len, bool flush)
{
    ASSERT_ARG(len, len >= 0);
    
    if (!chs)
        return DeprecatedString();

    if (len <= 0 && !flush)
        return "";

    // Handle normal case.
    if (!m_atStart)
        return convert(chs, len, flush);

    // Check to see if we found a BOM.
    int numBufferedBytes = m_numBufferedBytes;
    int buf1Len = numBufferedBytes;
    int buf2Len = len;
    const unsigned char* buf1 = m_bufferedBytes;
    const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(chs);
    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    int BOMLength = 0;
    if (c1 == 0xFF && c2 == 0xFE) {
        if (m_encoding != TextEncoding(UTF16Encoding, LittleEndian)) {
            releaseICUConverter();
            m_encoding = TextEncoding(UTF16Encoding, LittleEndian);
            m_littleEndian = true;
        }
        BOMLength = 2;
    } else if (c1 == 0xFE && c2 == 0xFF) {
        if (m_encoding != TextEncoding(UTF16Encoding, BigEndian)) {
            releaseICUConverter();
            m_encoding = TextEncoding(UTF16Encoding, BigEndian);
            m_littleEndian = false;
        }
        BOMLength = 2;
    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
        if (m_encoding != TextEncoding(UTF8Encoding)) {
            releaseICUConverter();
            m_encoding = TextEncoding(UTF8Encoding);
        }
        BOMLength = 3;
    }

    // Handle case where we found a BOM.
    if (BOMLength != 0) {
        ASSERT(numBufferedBytes + len >= BOMLength);
        int skip = BOMLength - numBufferedBytes;
        m_numBufferedBytes = 0;
        m_atStart = false;
        return len == skip ? DeprecatedString("") : convert(chs + skip, len - skip, flush);
    }

    // Handle case where we know there is no BOM coming.
    const int bufferSize = sizeof(m_bufferedBytes);
    if (numBufferedBytes + len > bufferSize || flush) {
        m_atStart = false;
        if (numBufferedBytes == 0) {
            return convert(chs, len, flush);
        }
        unsigned char bufferedBytes[sizeof(m_bufferedBytes)];
        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
        m_numBufferedBytes = 0;
        return convert(bufferedBytes, numBufferedBytes, false) + convert(chs, len, flush);
    }

    // Continue to look for the BOM.
    memcpy(&m_bufferedBytes[numBufferedBytes], chs, len);
    m_numBufferedBytes += len;
    return "";
}
Exemplo n.º 23
0
	TextEncoding TextEncoding::UTF32BE()
	{
		return TextEncoding(12001);
	}
Exemplo n.º 24
0
// parseDataUrl() is taken from the CURL http backend.
static gboolean parseDataUrl(gpointer callback_data)
{
    ResourceHandle* handle = static_cast<ResourceHandle*>(callback_data);
    ResourceHandleClient* client = handle->client();
    ResourceHandleInternal* d = handle->getInternal();
    if (d->m_cancelled)
        return false;

    d->m_idleHandler = 0;

    ASSERT(client);
    if (!client)
        return false;

    String url = handle->request().url().string();
    ASSERT(url.startsWith("data:", false));

    int index = url.find(',');
    if (index == -1) {
        client->cannotShowURL(handle);
        return false;
    }

    String mediaType = url.substring(5, index - 5);
    String data = url.substring(index + 1);

    bool isBase64 = mediaType.endsWith(";base64", false);
    if (isBase64)
        mediaType = mediaType.left(mediaType.length() - 7);

    if (mediaType.isEmpty())
        mediaType = "text/plain;charset=US-ASCII";

    String mimeType = extractMIMETypeFromMediaType(mediaType);
    String charset = extractCharsetFromMediaType(mediaType);

    ResourceResponse response;
    response.setMimeType(mimeType);

    if (isBase64) {
        data = decodeURLEscapeSequences(data);
        response.setTextEncodingName(charset);
        client->didReceiveResponse(handle, response);

        if (d->m_cancelled)
            return false;

        // Use the GLib Base64, since WebCore's decoder isn't
        // general-purpose and fails on Acid3 test 97 (whitespace).
        size_t outLength = 0;
        char* outData = 0;
        outData = reinterpret_cast<char*>(g_base64_decode(data.utf8().data(), &outLength));
        if (outData && outLength > 0)
            client->didReceiveData(handle, outData, outLength, 0);
        g_free(outData);
    } else {
        // We have to convert to UTF-16 early due to limitations in KURL
        data = decodeURLEscapeSequences(data, TextEncoding(charset));
        response.setTextEncodingName("UTF-16");
        client->didReceiveResponse(handle, response);

        if (d->m_cancelled)
            return false;

        if (data.length() > 0)
            client->didReceiveData(handle, reinterpret_cast<const char*>(data.characters()), data.length() * sizeof(UChar), 0);

        if (d->m_cancelled)
            return false;
    }

    client->didFinishLoading(handle);

    return false;
}
Exemplo n.º 25
0
	TextEncoding TextEncoding::Windows(UINT codePage)
	{
		return TextEncoding(codePage);
	}
Exemplo n.º 26
0
	TextEncoding TextEncoding::WindowsLatin1()
	{
		return TextEncoding(1252);
	}
Exemplo n.º 27
0
	TextEncoding TextEncoding::System() {
		return TextEncoding(::GetACP());
	}
Exemplo n.º 28
0
	TextEncoding TextEncoding::PalmLatin1()
	{
		return TextEncoding(1252);
	}
Exemplo n.º 29
0
void FileReaderLoader::setEncoding(const String& encoding)
{
    if (!encoding.isEmpty())
        m_encoding = TextEncoding(encoding);
}
Exemplo n.º 30
0
	TextEncoding TextEncoding::PalmShiftJIS()
	{
		// Palm Shift JIS is based on Windows CP 932, but is not identical.
		return TextEncoding("CP932");
	}