예제 #1
0
UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const
{
    UConverter *conv = 0;
    if (state) {
        if (!state->d) {
            // first time
            state->flags |= QTextCodec::FreeFunction;
            QTextCodecUnalignedPointer::encode(state->state_data, qIcuCodecStateFree);
            UErrorCode error = U_ZERO_ERROR;
            state->d = ucnv_open(m_name, &error);
            ucnv_setSubstChars(static_cast<UConverter *>(state->d),
                               state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?", 1, &error);
            if (U_FAILURE(error))
                qDebug() << "getConverter(state) ucnv_open failed" << m_name << u_errorName(error);
        }
        conv = static_cast<UConverter *>(state->d);
    }
    if (!conv) {
        // stateless conversion
        UErrorCode error = U_ZERO_ERROR;
        conv = ucnv_open(m_name, &error);
        ucnv_setSubstChars(conv, "?", 1, &error);
        if (U_FAILURE(error))
            qDebug() << "getConverter(no state) ucnv_open failed" << m_name << u_errorName(error);
    }
    return conv;
}
예제 #2
0
CF_INLINE UConverter *__CFStringEncodingConverterCreateICUConverter(const char *icuName, uint32_t flags, bool toUnicode) {
    UConverter *converter;
    UErrorCode errorCode = U_ZERO_ERROR;
    uint8_t streamID = CFStringEncodingStreamIDFromMask(flags);

    if (0 != streamID) { // this is a part of streaming previously created
        __CFICUThreadData *data = __CFStringEncodingICUGetThreadData();

        --streamID; // map to array index

        if ((streamID < data->_numSlots) && (NULL != data->_converters[streamID])) return data->_converters[streamID];
    }

    converter = ucnv_open(icuName, &errorCode);

    if (NULL != converter) {
        char lossyByte = CFStringEncodingMaskToLossyByte(flags);

        if ((0 == lossyByte) && (0 != (flags & kCFStringEncodingAllowLossyConversion))) lossyByte = '?';

        if (0 ==lossyByte) {
            if (toUnicode) {
                ucnv_setToUCallBack(converter, &UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
            } else {
                ucnv_setFromUCallBack(converter, &UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
            }
        } else {
            ucnv_setSubstChars(converter, &lossyByte, 1, &errorCode);
        }
    }

    return converter;
}
예제 #3
0
static UConverter *
GSStringOpenConverter (CFStringEncoding encoding, char lossByte)
{
  const char *converterName;
  UConverter *cnv;
  UErrorCode err = U_ZERO_ERROR;

  converterName = CFStringICUConverterName (encoding);

  cnv = ucnv_open (converterName, &err);
  if (U_FAILURE (err))
    cnv = NULL;

  if (lossByte)
    {
      /* FIXME: for some reason this is returning U_ILLEGAL_ARGUMENTS_ERROR */
      ucnv_setSubstChars (cnv, &lossByte, 1, &err);
    }
  else
    {
      ucnv_setToUCallBack (cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
                           &err);
      ucnv_setFromUCallBack (cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL,
                             &err);
    }

  return cnv;
}
예제 #4
0
CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
    if (!length)
        return "";

    if (!m_converterICU)
        createICUConverter();
    if (!m_converterICU)
        return CString();

    // FIXME: We should see if there is "force ASCII range" mode in ICU;
    // until then, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    String copy(characters, length);
    copy.replace('\\', m_encoding.backslashAsCurrencySymbol());

    const UChar* source = copy.characters();
    const UChar* sourceLimit = source + copy.length();

    UErrorCode err = U_ZERO_ERROR;

    switch (handling) {
        case QuestionMarksForUnencodables:
            ucnv_setSubstChars(m_converterICU, "?", 1, &err);
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
            break;
        case EntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
            break;
        case URLEncodedEntitiesForUnencodables:
            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
            break;
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return CString();

    Vector<char> result;
    size_t size = 0;
    do {
        char buffer[ConversionBufferSize];
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
        size_t count = target - buffer;
        result.grow(size + count);
        memcpy(result.data() + size, buffer, count);
        size += count;
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return CString(result.data(), size);
}
예제 #5
0
CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHandling handling)
{
    const UChar* source = input.begin();
    const UChar* end = input.end();

    UErrorCode err = U_ZERO_ERROR;

    switch (handling) {
    case QuestionMarksForUnencodables:
        ucnv_setSubstChars(m_converterICU, "?", 1, &err);
#if !defined(USING_SYSTEM_ICU)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
#else
        ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
#endif
        break;
    case EntitiesForUnencodables:
#if !defined(USING_SYSTEM_ICU)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
#else
        ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
#endif
        break;
    case URLEncodedEntitiesForUnencodables:
#if !defined(USING_SYSTEM_ICU)
        ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0, &err);
#else
        ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
#endif
        break;
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return CString();

    Vector<char> result;
    size_t size = 0;
    do {
        char buffer[ConversionBufferSize];
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err);
        size_t count = target - buffer;
        result.grow(size + count);
        memcpy(result.data() + size, buffer, count);
        size += count;
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return CString(result.data(), size);
}
예제 #6
0
DeprecatedCString StreamingTextDecoderICU::fromUnicode(const DeprecatedString &qcs, bool allowEntities)
{
    TextEncodingID encoding = m_encoding.effectiveEncoding().encodingID();

    if (encoding == WinLatin1Encoding && qcs.isAllLatin1())
        return qcs.latin1();

    if ((encoding == WinLatin1Encoding || encoding == UTF8Encoding || encoding == ASCIIEncoding) 
        && qcs.isAllASCII())
        return qcs.ascii();

    // FIXME: We should see if there is "force ASCII range" mode in ICU;
    // until then, we change the backslash into a yen sign.
    // Encoding will change the yen sign back into a backslash.
    DeprecatedString copy = qcs;
    copy.replace('\\', m_encoding.backslashAsCurrencySymbol());

    if (!m_converterICU)
        createICUConverter();
    if (!m_converterICU)
        return DeprecatedCString();

    // FIXME: when DeprecatedString buffer is latin1, it would be nice to
    // convert from that w/o having to allocate a unicode buffer

    char buffer[ConversionBufferSize];
    const UChar* source = reinterpret_cast<const UChar*>(copy.unicode());
    const UChar* sourceLimit = source + copy.length();

    UErrorCode err = U_ZERO_ERROR;
    DeprecatedString normalizedString;
    if (UNORM_YES != unorm_quickCheck(source, copy.length(), UNORM_NFC, &err)) {
        normalizedString.truncate(copy.length()); // normalization to NFC rarely increases the length, so this first attempt will usually succeed
        
        int32_t normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), copy.length(), &err);
        if (err == U_BUFFER_OVERFLOW_ERROR) {
            err = U_ZERO_ERROR;
            normalizedString.truncate(normalizedLength);
            normalizedLength = unorm_normalize(source, copy.length(), UNORM_NFC, 0, reinterpret_cast<UChar*>(const_cast<DeprecatedChar*>(normalizedString.unicode())), normalizedLength, &err);
        }
        
        source = reinterpret_cast<const UChar*>(normalizedString.unicode());
        sourceLimit = source + normalizedLength;
    }

    DeprecatedCString result(1); // for trailing zero

    if (allowEntities)
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
    else {
        ucnv_setSubstChars(m_converterICU, "?", 1, &err);
        ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
    }

    ASSERT(U_SUCCESS(err));
    if (U_FAILURE(err))
        return DeprecatedCString();

    do {
        char* target = buffer;
        char* targetLimit = target + ConversionBufferSize;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true,  &err);
        int count = target - buffer;
        buffer[count] = 0;
        result.append(buffer);
    } while (err == U_BUFFER_OVERFLOW_ERROR);

    return result;
}