void UnicodeTestCase::ConversionUTF16() { static const StringConversionData utf16data[] = { #ifdef wxHAVE_U_ESCAPE StringConversionData( "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0", L"\u041f\u0440\u0438\u0432\u0435\u0442"), StringConversionData( "\x01\0\0b\x01\0\0a\x01\0\0r\0\0", L"\u0100b\u0100a\u0100r"), #endif StringConversionData("\0f\0o\0o\0\0", L"foo"), }; wxCSConv conv(wxFONTENCODING_UTF16BE); for ( size_t n = 0; n < WXSIZEOF(utf16data); n++ ) { const StringConversionData& d = utf16data[n]; d.Test(n, conv); } // special case: this string has consecutive NULs inside it which don't // terminate the string, this exposed a bug in our conversion code which // got confused in this case size_t len; conv.cMB2WC("\x01\0\0B\0C" /* A macron BC */, 6, &len); CPPUNIT_ASSERT_EQUAL( 3, len ); // When using UTF-16 internally (i.e. MSW), we don't have any surrogate // support, so the length of the string below is 2, not 1. #if SIZEOF_WCHAR_T == 4 // Another one: verify that the length of the resulting string is computed // correctly when there is a surrogate in the input. wxMBConvUTF16BE().cMB2WC("\xd8\x03\xdc\x01\0" /* OLD TURKIC LETTER YENISEI A */, wxNO_LEN, &len); CPPUNIT_ASSERT_EQUAL( 1, len ); #endif // UTF-32 internal representation #if SIZEOF_WCHAR_T == 2 // Verify that the length of UTF-32 string is correct even when converting // to it from a longer UTF-16 string with surrogates. // Construct CAT FACE U+1F431 without using \U which is not supported by // ancient compilers and without using \u with surrogates which is // (correctly) flagged as an error by the newer ones. wchar_t ws[2]; ws[0] = 0xd83d; ws[1] = 0xdc31; CPPUNIT_ASSERT_EQUAL( 4, wxMBConvUTF32BE().FromWChar(NULL, 0, ws, 2) ); #endif // UTF-16 internal representation }
void TextStreamTestCase::TestEmbeddedZerosUTF32BEInput() { TestInput(wxMBConvUTF32BE(), txtUtf32be, sizeof(txtUtf32be)); TestInput(wxCSConv(wxFONTENCODING_UTF32BE), txtUtf32be, sizeof(txtUtf32be)); }