int mbsToC16s( const char *mbStr, char16_t *c16Str, size_t len) { if( mbStr == NULL || c16Str == NULL || len == 0) // Sanity checks. return -1; mbstate_t mbstate = {0}; char16_t c16; int count = 0, i = 0, rv = 0, nBytes = (int)strlen(mbStr)+1; do { rv = (int)mbrtoc16( &c16, mbStr+i, nBytes-i, &mbstate); switch( rv) { case 0: c16Str[count] = 0; i = nBytes; // End of string. break; case -1: // Encoding error. case -2: count = -1; break; default: if( count < (int)len-1 ) { c16Str[count++] = c16; if( rv > 0) i += rv; // rv != -3 } else count = -1; } } while( count > 0 && i < nBytes); return count; }
int _RTL_FUNC mbtoc16(char16_t *pc16, const char *s, size_t n) { size_t rv ; if (!s || !*s) return 0 ; if (n ==0) return -1 ; rv = mbrtoc16(pc16,s,n,&__getRtlData()->mbtowc_st); if (rv == (size_t)-2) rv = (size_t)-1; return (int)rv ; }
int main(int argc, char *argv[]) { mbstate_t s; size_t len; char16_t c16; /* * C/POSIX locale. */ printf("1..1\n"); /* Null wide character, internal state. */ assert(mbrtoc16(&c16, "", 1, NULL) == 0); assert(c16 == 0); /* Null wide character. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "", 1, &s) == 0); assert(c16 == 0); /* Latin letter A, internal state. */ assert(mbrtoc16(NULL, 0, 0, NULL) == 0); assert(mbrtoc16(&c16, "A", 1, NULL) == 1); assert(c16 == L'A'); /* Latin letter A. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "A", 1, &s) == 1); assert(c16 == L'A'); /* Incomplete character sequence. */ c16 = L'z'; memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2); assert(c16 == L'z'); /* Check that mbrtoc16() doesn't access the buffer when n == 0. */ c16 = L'z'; memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2); assert(c16 == L'z'); /* Check that mbrtoc16() doesn't read ahead too aggressively. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "AB", 2, &s) == 1); assert(c16 == L'A'); assert(mbrtoc16(&c16, "C", 1, &s) == 1); assert(c16 == L'C'); /* * ISO-8859-1. */ assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-1"), "en_US.ISO8859-1") == 0); /* Currency sign. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "\xa4", 1, &s) == 1); assert(c16 == 0xa4); /* * ISO-8859-15. */ assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-15"), "en_US.ISO8859-15") == 0); /* Euro sign. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "\xa4", 1, &s) == 1); assert(c16 == 0x20ac); /* * UTF-8. */ assert(strcmp(setlocale(LC_CTYPE, "en_US.UTF-8"), "en_US.UTF-8") == 0); /* Null wide character, internal state. */ assert(mbrtoc16(NULL, 0, 0, NULL) == 0); assert(mbrtoc16(&c16, "", 1, NULL) == 0); assert(c16 == 0); /* Null wide character. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "", 1, &s) == 0); assert(c16 == 0); /* Latin letter A, internal state. */ assert(mbrtoc16(NULL, 0, 0, NULL) == 0); assert(mbrtoc16(&c16, "A", 1, NULL) == 1); assert(c16 == L'A'); /* Latin letter A. */ memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "A", 1, &s) == 1); assert(c16 == L'A'); /* Incomplete character sequence (zero length). */ c16 = L'z'; memset(&s, 0, sizeof(s)); assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2); assert(c16 == L'z'); /* Incomplete character sequence (truncated double-byte). */ memset(&s, 0, sizeof(s)); c16 = 0; assert(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2); /* Same as above, but complete. */ memset(&s, 0, sizeof(s)); c16 = 0; assert(mbrtoc16(&c16, "\xc3\x84", 2, &s) == 2); assert(c16 == 0xc4); /* Test restarting behaviour. */ memset(&s, 0, sizeof(s)); c16 = 0; assert(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2); assert(c16 == 0); assert(mbrtoc16(&c16, "\xb7", 1, &s) == 1); assert(c16 == 0xf7); /* Surrogate pair. */ memset(&s, 0, sizeof(s)); c16 = 0; assert(mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s) == 4); assert(c16 == 0xd83d); assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-3); assert(c16 == 0xdca9); /* Letter e with acute, precomposed. */ memset(&s, 0, sizeof(s)); c16 = 0; assert(mbrtoc16(&c16, "\xc3\xa9", 2, &s) == 2); assert(c16 == 0xe9); /* Letter e with acute, combined. */ memset(&s, 0, sizeof(s)); c16 = 0; assert(mbrtoc16(&c16, "\x65\xcc\x81", 3, &s) == 1); assert(c16 == 0x65); assert(mbrtoc16(&c16, "\xcc\x81", 2, &s) == 2); assert(c16 == 0x301); printf("ok 1 - mbrtoc16()\n"); return (0); }
static int do_test (void) { if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) { puts ("cannot set locale"); return 1; } int result = 0; char32_t c32 = 48; do { if (c32 >= 0xd800 && c32 <= 0xe000) continue; char buf[20]; size_t n1 = c32rtomb (buf, c32, NULL); if (n1 <= 0) { printf ("c32rtomb for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32); result = 1; continue; } char32_t c32out; size_t n2 = mbrtoc32 (&c32out, buf, n1, NULL); if ((ssize_t) n2 < 0) { printf ("mbrtoc32 for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32); result = 1; continue; } if (n2 != n1) { printf ("mbrtoc32 for U'\\x%" PRIx32 "' consumed %zu bytes, not %zu\n", (uint32_t) c32, n2, n1); result = 1; } else if (c32out != c32) { printf ("mbrtoc32 for U'\\x%" PRIx32 "' produced U'\\x%" PRIx32 "\n", (uint32_t) c32, (uint32_t) c32out); result = 1; } char16_t c16; size_t n3 = mbrtoc16 (&c16, buf, n1, NULL); if (n3 != n1) { printf ("mbrtoc16 for U'\\x%" PRIx32 "' did not consume all bytes\n", (uint32_t) c32); result = 1; continue; } if (c32 < 0x10000) { if (c16 != c32) { printf ("mbrtoc16 for U'\\x%" PRIx32 "' produce u'\\x%" PRIx16 "'\n", (uint32_t) c32, (uint16_t) c16); result = 1; continue; } } else { buf[0] = '1'; char16_t c16_2; size_t n4 = mbrtoc16 (&c16_2, buf, 1, NULL); if (n4 != (size_t) -3) { printf ("second mbrtoc16 for U'\\x%" PRIx32 "' did not return -3\n", (uint32_t) c32); result = 1; continue; } if (c32 != (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00)) { printf ("mbrtoc16 for U'\\x%" PRIx32 "' returns U'\\x%" PRIx32 "\n", (uint32_t) c32, (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00)); result = 1; continue; } } buf[0] = '\0'; char16_t c16_nul; n3 = mbrtoc16 (&c16_nul, buf, n1, NULL); if (n3 != 0) { printf ("mbrtoc16 for '\\0' returns %zd\n", n3); result = 1; continue; } if (c32 < 0x10000) { size_t n5 = c16rtomb (buf, c16, NULL); if ((ssize_t) n5 < 0) { printf ("c16rtomb for U'\\x%" PRIx32 "' failed with %zd\n", (uint32_t) c32, n5); result = 1; continue; } if (n5 != n1) { printf ("c16rtomb for U'\\x%" PRIx32 "' produced %zu bytes instead of %zu bytes\n", (uint32_t) c32, n5, n1); result = 1; continue; } } } while ((c32 += 0x1111) <= U'\x12000'); return result; }