CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling) { // FIXME: We should really use TEC here instead of CFString for consistency with the other direction. // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign. // Encoding will change the yen sign back into a backslash. String copy(characters, length); copy.replace('\\', m_backslashAsCurrencySymbol); CFStringRef cfs = copy.createCFString(); CFIndex startPos = 0; CFIndex charactersLeft = CFStringGetLength(cfs); Vector<char> result; size_t size = 0; UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0; while (charactersLeft > 0) { CFRange range = CFRangeMake(startPos, charactersLeft); CFIndex bufferLength; CFStringGetBytes(cfs, range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength); result.grow(size + bufferLength); unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size); CFIndex charactersConverted = CFStringGetBytes(cfs, range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength); size += bufferLength; if (charactersConverted != charactersLeft) { unsigned badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted); ++charactersConverted; if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate UniChar low = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted); if ((low & 0xFC00) == 0xDC00) { // is low surrogate badChar <<= 10; badChar += low; badChar += 0x10000 - (0xD800 << 10) - 0xDC00; ++charactersConverted; } } UnencodableReplacementArray entity; int entityLength = getUnencodableReplacement(badChar, handling, entity); result.grow(size + entityLength); memcpy(result.data() + size, entity, entityLength); size += entityLength; } startPos += charactersConverted; charactersLeft -= charactersConverted; } CFRelease(cfs); return CString(result.data(), size); }
CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling) { QTextCodec::ConverterState state; state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader); if (!length) return ""; QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state); // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b> // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we // escape it with getUnencodableReplacement, append it, then move to the next chunk. if (state.invalidChars) { state.invalidChars = 0; state.remainingChars = 0; int len = 0; ba.clear(); for (size_t pos = 0; pos < length; ++pos) { QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state); if (state.remainingChars) continue; if (state.invalidChars) { UnencodableReplacementArray replacement; getUnencodableReplacement(characters[0], handling, replacement); tba.replace('\0', replacement); state.invalidChars = 0; } ba.append(tba); characters += len; len = 0; state.remainingChars = 0; } } return CString(ba.constData(), ba.length()); }