UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const { UConverter *conv = 0; if (state) { if (!state->d) { // first time state->flags |= QTextCodec::FreeFunction; QTextCodecUnalignedPointer::encode(state->state_data, qIcuCodecStateFree); UErrorCode error = U_ZERO_ERROR; state->d = ucnv_open(m_name, &error); ucnv_setSubstChars(static_cast<UConverter *>(state->d), state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?", 1, &error); if (U_FAILURE(error)) qDebug() << "getConverter(state) ucnv_open failed" << m_name << u_errorName(error); } conv = static_cast<UConverter *>(state->d); } if (!conv) { // stateless conversion UErrorCode error = U_ZERO_ERROR; conv = ucnv_open(m_name, &error); ucnv_setSubstChars(conv, "?", 1, &error); if (U_FAILURE(error)) qDebug() << "getConverter(no state) ucnv_open failed" << m_name << u_errorName(error); } return conv; }
CF_INLINE UConverter *__CFStringEncodingConverterCreateICUConverter(const char *icuName, uint32_t flags, bool toUnicode) { UConverter *converter; UErrorCode errorCode = U_ZERO_ERROR; uint8_t streamID = CFStringEncodingStreamIDFromMask(flags); if (0 != streamID) { // this is a part of streaming previously created __CFICUThreadData *data = __CFStringEncodingICUGetThreadData(); --streamID; // map to array index if ((streamID < data->_numSlots) && (NULL != data->_converters[streamID])) return data->_converters[streamID]; } converter = ucnv_open(icuName, &errorCode); if (NULL != converter) { char lossyByte = CFStringEncodingMaskToLossyByte(flags); if ((0 == lossyByte) && (0 != (flags & kCFStringEncodingAllowLossyConversion))) lossyByte = '?'; if (0 ==lossyByte) { if (toUnicode) { ucnv_setToUCallBack(converter, &UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); } else { ucnv_setFromUCallBack(converter, &UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); } } else { ucnv_setSubstChars(converter, &lossyByte, 1, &errorCode); } } return converter; }
static UConverter * GSStringOpenConverter (CFStringEncoding encoding, char lossByte) { const char *converterName; UConverter *cnv; UErrorCode err = U_ZERO_ERROR; converterName = CFStringICUConverterName (encoding); cnv = ucnv_open (converterName, &err); if (U_FAILURE (err)) cnv = NULL; if (lossByte) { /* FIXME: for some reason this is returning U_ILLEGAL_ARGUMENTS_ERROR */ ucnv_setSubstChars (cnv, &lossByte, 1, &err); } else { ucnv_setToUCallBack (cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err); ucnv_setFromUCallBack (cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &err); } return cnv; }
CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling) { if (!length) return ""; if (!m_converterICU) createICUConverter(); if (!m_converterICU) return CString(); // FIXME: We should see if there is "force ASCII range" mode in ICU; // until then, we change the backslash into a yen sign. // Encoding will change the yen sign back into a backslash. String copy(characters, length); copy.replace('\\', m_encoding.backslashAsCurrencySymbol()); const UChar* source = copy.characters(); const UChar* sourceLimit = source + copy.length(); UErrorCode err = U_ZERO_ERROR; switch (handling) { case QuestionMarksForUnencodables: ucnv_setSubstChars(m_converterICU, "?", 1, &err); ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); break; case EntitiesForUnencodables: ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); break; case URLEncodedEntitiesForUnencodables: ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); break; } ASSERT(U_SUCCESS(err)); if (U_FAILURE(err)) return CString(); Vector<char> result; size_t size = 0; do { char buffer[ConversionBufferSize]; char* target = buffer; char* targetLimit = target + ConversionBufferSize; err = U_ZERO_ERROR; ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err); size_t count = target - buffer; result.grow(size + count); memcpy(result.data() + size, buffer, count); size += count; } while (err == U_BUFFER_OVERFLOW_ERROR); return CString(result.data(), size); }
CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHandling handling) { const UChar* source = input.begin(); const UChar* end = input.end(); UErrorCode err = U_ZERO_ERROR; switch (handling) { case QuestionMarksForUnencodables: ucnv_setSubstChars(m_converterICU, "?", 1, &err); #if !defined(USING_SYSTEM_ICU) ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); #else ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); #endif break; case EntitiesForUnencodables: #if !defined(USING_SYSTEM_ICU) ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); #else ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); #endif break; case URLEncodedEntitiesForUnencodables: #if !defined(USING_SYSTEM_ICU) ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0, &err); #else ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); #endif break; } ASSERT(U_SUCCESS(err)); if (U_FAILURE(err)) return CString(); Vector<char> result; size_t size = 0; do { char buffer[ConversionBufferSize]; char* target = buffer; char* targetLimit = target + ConversionBufferSize; err = U_ZERO_ERROR; ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err); size_t count = target - buffer; result.grow(size + count); memcpy(result.data() + size, buffer, count); size += count; } while (err == U_BUFFER_OVERFLOW_ERROR); return CString(result.data(), size); }
DeprecatedCString StreamingTextDecoderICU::fromUnicode(const DeprecatedString &qcs, bool allowEntities) { TextEncodingID encoding = m_encoding.effectiveEncoding().encodingID(); if (encoding == WinLatin1Encoding && qcs.isAllLatin1()) return qcs.latin1(); if ((encoding == WinLatin1Encoding || encoding == UTF8Encoding || encoding == ASCIIEncoding) && qcs.isAllASCII()) return qcs.ascii(); // FIXME: We should see if there is "force ASCII range" mode in ICU; // until then, we change the backslash into a yen sign. // Encoding will change the yen sign back into a backslash. DeprecatedString copy = qcs; copy.replace('\\', m_encoding.backslashAsCurrencySymbol()); if (!m_converterICU) createICUConverter(); if (!m_converterICU) return DeprecatedCString(); // FIXME: when DeprecatedString buffer is latin1, it would be nice to // convert from that w/o having to allocate a unicode buffer char buffer[ConversionBufferSize]; const UChar* source = reinterpret_cast<const UChar*>(copy.unicode()); const UChar* sourceLimit = source + copy.length(); UErrorCode err = U_ZERO_ERROR; DeprecatedString normalizedString; if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) { normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), copy.length(), &err); if (err == U_BUFFER_OVERFLOW_ERROR) { err = U_ZERO_ERROR; normalizedString.truncate(normalizedLength); normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), normalizedLength, &err); } source = reinterpret_cast<const UChar*>(normalizedString.unicode()); sourceLimit = source + normalizedLength; } DeprecatedCString result(1); // for trailing zero if (allowEntities) ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); else { ucnv_setSubstChars(m_converterICU, "?", 1, &err); ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); } ASSERT(U_SUCCESS(err)); if (U_FAILURE(err)) return DeprecatedCString(); do { char* target = buffer; char* targetLimit = target + ConversionBufferSize; err = U_ZERO_ERROR; ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err); int count = target - buffer; buffer[count] = 0; result.append(buffer); } while (err == U_BUFFER_OVERFLOW_ERROR); return result; }