Exemple #1
PassRefPtr<Document> XSLTProcessor::createDocumentFromSource(const String& sourceString,
    const String& sourceEncoding, const String& sourceMIMEType, Node* sourceNode, Frame* frame)
    RefPtr<Document> ownerDocument = sourceNode->document();
    bool sourceIsDocument = (sourceNode == ownerDocument.get());
    String documentSource = sourceString;

    RefPtr<Document> result;
    if (sourceMIMEType == "text/plain") {
        result = Document::create(frame);
    } else
        result = DOMImplementation::createDocument(sourceMIMEType, frame, false);

    // Before parsing, we need to save & detach the old document and get the new document
    // in place. We have to do this only if we're rendering the result document.
    if (frame) {
        if (FrameView* view = frame->view())

    if (sourceIsDocument)

    RefPtr<TextResourceDecoder> decoder = TextResourceDecoder::create(sourceMIMEType);
    decoder->setEncoding(sourceEncoding.isEmpty() ? UTF8Encoding() : TextEncoding(sourceEncoding), TextResourceDecoder::EncodingFromXMLHeader);


    return result.release();
	TextEncoding TextEncoding::ASCII()
		return TextEncoding("ASCII");
bool detectTextEncoding(const char* data, size_t len,
                        const char* hintEncodingName,
                        TextEncoding* detectedEncoding)
    *detectedEncoding = TextEncoding();
    int matchesCount = 0; 
    UErrorCode status = U_ZERO_ERROR;
    UCharsetDetector* detector = ucsdet_open(&status);
    if (U_FAILURE(status))
        return false;
    ucsdet_enableInputFilter(detector, true);
    ucsdet_setText(detector, data, static_cast<int32_t>(len), &status); 
    if (U_FAILURE(status))
        return false;

    // FIXME: A few things we can do other than improving
    // the ICU detector itself. 
    // 1. Use ucsdet_detectAll and pick the most likely one given
    // "the context" (parent-encoding, referrer encoding, etc).
    // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
    // Chinese, Japanese, Russian, Korean and Hebrew) by picking the 
    // encoding with a highest confidence among the detector-specific
    // limited set of candidate encodings.
    // Below is a partial implementation of the first part of what's outlined
    // above.
    const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status);
    if (U_FAILURE(status)) {
        return false;

    const char* encoding = 0;
    if (hintEncodingName) {
        TextEncoding hintEncoding(hintEncodingName);
        // 10 is the minimum confidence value consistent with the codepoint
        // allocation in a given encoding. The size of a chunk passed to
        // us varies even for the same html file (apparently depending on 
        // the network load). When we're given a rather short chunk, we 
        // don't have a sufficiently reliable signal other than the fact that
        // the chunk is consistent with a set of encodings. So, instead of
        // setting an arbitrary threshold, we have to scan all the encodings
        // consistent with the data.  
        const int32_t kThresold = 10;
        for (int i = 0; i < matchesCount; ++i) {
            int32_t confidence = ucsdet_getConfidence(matches[i], &status);
            if (U_FAILURE(status)) {
                status = U_ZERO_ERROR;
            if (confidence < kThresold)
            const char* matchEncoding = ucsdet_getName(matches[i], &status);
            if (U_FAILURE(status)) {
                status = U_ZERO_ERROR;
            if (TextEncoding(matchEncoding) == hintEncoding) {
                encoding = hintEncodingName;
    // If no match is found so far, just pick the top match. 
    // This can happen, say, when a parent frame in EUC-JP refers to
    // a child frame in Shift_JIS and both frames do NOT specify the encoding
    // making us resort to auto-detection (when it IS turned on).
    if (!encoding && matchesCount > 0)
        encoding = ucsdet_getName(matches[0], &status);
    if (U_SUCCESS(status)) {
        *detectedEncoding = TextEncoding(encoding);
        return true;
    return false;
	TextEncoding TextEncoding::Application() {
		return TextEncoding(::GetACP());
	TextEncoding TextEncoding::UTF32LE()
		return TextEncoding(12000);
	TextEncoding TextEncoding::UTF16BE()
		return TextEncoding(1201);
	TextEncoding TextEncoding::UTF8()
		return TextEncoding(65001);
	TextEncoding TextEncoding::ASCII()
		return TextEncoding(20127);
	TextEncoding TextEncoding::UTF32LE()
		return TextEncoding("UCS-4LE");
	TextEncoding TextEncoding::UTF32BE()
		return TextEncoding("UCS-4BE");
	TextEncoding TextEncoding::UTF32()
		return TextEncoding("UCS-4-INTERNAL");
	TextEncoding TextEncoding::UTF16LE()
		return TextEncoding("UTF-16LE");
	TextEncoding TextEncoding::UTF16()
		return TextEncoding("UTF-16");
	TextEncoding TextEncoding::UTF8()
		return TextEncoding("UTF-8");
	TextEncoding TextEncoding::UTF7()
		return TextEncoding("UTF-7");
	TextEncoding TextEncoding::WindowsShiftJIS()
		return TextEncoding(932);
	TextEncoding TextEncoding::PalmShiftJIS()
		return TextEncoding(932);
	TextEncoding TextEncoding::Application() {
		return TextEncoding("");
	TextEncoding TextEncoding::UTF7()
		return TextEncoding(65000);
	TextEncoding TextEncoding::System() {
		return TextEncoding("");
	TextEncoding TextEncoding::UTF16()
		return TextEncoding(1200);
DeprecatedString StreamingTextDecoderICU::toUnicode(const char* chs, int len, bool flush)
    ASSERT_ARG(len, len >= 0);
    if (!chs)
        return DeprecatedString();

    if (len <= 0 && !flush)
        return "";

    // Handle normal case.
    if (!m_atStart)
        return convert(chs, len, flush);

    // Check to see if we found a BOM.
    int numBufferedBytes = m_numBufferedBytes;
    int buf1Len = numBufferedBytes;
    int buf2Len = len;
    const unsigned char* buf1 = m_bufferedBytes;
    const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(chs);
    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
    int BOMLength = 0;
    if (c1 == 0xFF && c2 == 0xFE) {
        if (m_encoding != TextEncoding(UTF16Encoding, LittleEndian)) {
            m_encoding = TextEncoding(UTF16Encoding, LittleEndian);
            m_littleEndian = true;
        BOMLength = 2;
    } else if (c1 == 0xFE && c2 == 0xFF) {
        if (m_encoding != TextEncoding(UTF16Encoding, BigEndian)) {
            m_encoding = TextEncoding(UTF16Encoding, BigEndian);
            m_littleEndian = false;
        BOMLength = 2;
    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
        if (m_encoding != TextEncoding(UTF8Encoding)) {
            m_encoding = TextEncoding(UTF8Encoding);
        BOMLength = 3;

    // Handle case where we found a BOM.
    if (BOMLength != 0) {
        ASSERT(numBufferedBytes + len >= BOMLength);
        int skip = BOMLength - numBufferedBytes;
        m_numBufferedBytes = 0;
        m_atStart = false;
        return len == skip ? DeprecatedString("") : convert(chs + skip, len - skip, flush);

    // Handle case where we know there is no BOM coming.
    const int bufferSize = sizeof(m_bufferedBytes);
    if (numBufferedBytes + len > bufferSize || flush) {
        m_atStart = false;
        if (numBufferedBytes == 0) {
            return convert(chs, len, flush);
        unsigned char bufferedBytes[sizeof(m_bufferedBytes)];
        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
        m_numBufferedBytes = 0;
        return convert(bufferedBytes, numBufferedBytes, false) + convert(chs, len, flush);

    // Continue to look for the BOM.
    memcpy(&m_bufferedBytes[numBufferedBytes], chs, len);
    m_numBufferedBytes += len;
    return "";
	TextEncoding TextEncoding::UTF32BE()
		return TextEncoding(12001);
// parseDataUrl() is taken from the CURL http backend.
static gboolean parseDataUrl(gpointer callback_data)
    ResourceHandle* handle = static_cast<ResourceHandle*>(callback_data);
    ResourceHandleClient* client = handle->client();
    ResourceHandleInternal* d = handle->getInternal();
    if (d->m_cancelled)
        return false;

    d->m_idleHandler = 0;

    if (!client)
        return false;

    String url = handle->request().url().string();
    ASSERT(url.startsWith("data:", false));

    int index = url.find(',');
    if (index == -1) {
        return false;

    String mediaType = url.substring(5, index - 5);
    String data = url.substring(index + 1);

    bool isBase64 = mediaType.endsWith(";base64", false);
    if (isBase64)
        mediaType = mediaType.left(mediaType.length() - 7);

    if (mediaType.isEmpty())
        mediaType = "text/plain;charset=US-ASCII";

    String mimeType = extractMIMETypeFromMediaType(mediaType);
    String charset = extractCharsetFromMediaType(mediaType);

    ResourceResponse response;

    if (isBase64) {
        data = decodeURLEscapeSequences(data);
        client->didReceiveResponse(handle, response);

        if (d->m_cancelled)
            return false;

        // Use the GLib Base64, since WebCore's decoder isn't
        // general-purpose and fails on Acid3 test 97 (whitespace).
        size_t outLength = 0;
        char* outData = 0;
        outData = reinterpret_cast<char*>(g_base64_decode(data.utf8().data(), &outLength));
        if (outData && outLength > 0)
            client->didReceiveData(handle, outData, outLength, 0);
    } else {
        // We have to convert to UTF-16 early due to limitations in KURL
        data = decodeURLEscapeSequences(data, TextEncoding(charset));
        client->didReceiveResponse(handle, response);

        if (d->m_cancelled)
            return false;

        if (data.length() > 0)
            client->didReceiveData(handle, reinterpret_cast<const char*>(data.characters()), data.length() * sizeof(UChar), 0);

        if (d->m_cancelled)
            return false;


    return false;
	TextEncoding TextEncoding::Windows(UINT codePage)
		return TextEncoding(codePage);
	TextEncoding TextEncoding::WindowsLatin1()
		return TextEncoding(1252);
	TextEncoding TextEncoding::System() {
		return TextEncoding(::GetACP());
	TextEncoding TextEncoding::PalmLatin1()
		return TextEncoding(1252);
void FileReaderLoader::setEncoding(const String& encoding)
    if (!encoding.isEmpty())
        m_encoding = TextEncoding(encoding);
	TextEncoding TextEncoding::PalmShiftJIS()
		// Palm Shift JIS is based on Windows CP 932, but is not identical.
		return TextEncoding("CP932");