Esempio n. 1
0
CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
    // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.

    // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    String copy(characters, length);
    copy.replace('\\', m_backslashAsCurrencySymbol);
    CFStringRef cfs = copy.createCFString();

    CFIndex startPos = 0;
    CFIndex charactersLeft = CFStringGetLength(cfs);
    Vector<char> result;
    size_t size = 0;
    UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0;
    while (charactersLeft > 0) {
        CFRange range = CFRangeMake(startPos, charactersLeft);
        CFIndex bufferLength;
        CFStringGetBytes(cfs, range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);

        result.grow(size + bufferLength);
        unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
        CFIndex charactersConverted = CFStringGetBytes(cfs, range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
        size += bufferLength;

        if (charactersConverted != charactersLeft) {
            unsigned badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
            ++charactersConverted;
            if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
                UniChar low = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
                if ((low & 0xFC00) == 0xDC00) { // is low surrogate
                    badChar <<= 10;
                    badChar += low;
                    badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
                    ++charactersConverted;
                }
            }
            UnencodableReplacementArray entity;
            int entityLength = getUnencodableReplacement(badChar, handling, entity);
            result.grow(size + entityLength);
            memcpy(result.data() + size, entity, entityLength);
            size += entityLength;
        }

        startPos += charactersConverted;
        charactersLeft -= charactersConverted;
    }
    CFRelease(cfs);
    return CString(result.data(), size);
}
CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
    QTextCodec::ConverterState state;
    state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader);

    if (!length)
        return "";

    QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state);

    // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b>
    // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we
    // escape it with getUnencodableReplacement, append it, then move to the next chunk.
    if (state.invalidChars) {
        state.invalidChars = 0;
        state.remainingChars = 0;
        int len = 0;
        ba.clear();
        for (size_t pos = 0; pos < length; ++pos) {
            QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state);
            if (state.remainingChars)
                continue;
            if (state.invalidChars) {
                UnencodableReplacementArray replacement;
                getUnencodableReplacement(characters[0], handling, replacement);
                tba.replace('\0', replacement);
                state.invalidChars = 0;
            }
            ba.append(tba);
            characters += len;
            len = 0;
            state.remainingChars = 0;
        }
    }

    return CString(ba.constData(), ba.length());
}