/* Test for NUL byte processing via empty string. */ static int utf8_test_2 (void) { wchar_t wc; mbstate_t s; wc = 42; /* arbitrary number */ memset (&s, 0, sizeof (s)); /* get s into initial state */ assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */ assert (mbsinit (&s)); wc = 42; /* arbitrary number */ memset (&s, 0, sizeof (s)); /* get s into initial state */ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */ wc = 42; /* arbitrary number */ memset (&s, 0, sizeof (s)); /* get s into initial state */ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */ wc = 42; /* arbitrary number */ memset (&s, 0, sizeof (s)); /* get s into initial state */ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */ assert (mbsinit (&s)); return 0; }
void test_mbrtowc_incomplete(mbstate_t* ps) { ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); uselocale(LC_GLOBAL_LOCALE); wchar_t out; // 2-byte UTF-8. ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(&out, "\xc2", 1, ps)); ASSERT_EQ(1U, mbrtowc(&out, "\xa2" "cdef", 5, ps)); ASSERT_EQ(static_cast<wchar_t>(0x00a2), out); ASSERT_TRUE(mbsinit(ps)); // 3-byte UTF-8. ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(&out, "\xe2", 1, ps)); ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(&out, "\x82", 1, ps)); ASSERT_EQ(1U, mbrtowc(&out, "\xac" "def", 4, ps)); ASSERT_EQ(static_cast<wchar_t>(0x20ac), out); ASSERT_TRUE(mbsinit(ps)); // 4-byte UTF-8. ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(&out, "\xf0", 1, ps)); ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(&out, "\xa4\xad", 2, ps)); ASSERT_EQ(1U, mbrtowc(&out, "\xa2" "ef", 3, ps)); ASSERT_EQ(static_cast<wchar_t>(0x24b62), out); ASSERT_TRUE(mbsinit(ps)); // Invalid 2-byte ASSERT_EQ(static_cast<size_t>(-2), mbrtowc(&out, "\xc2", 1, ps)); ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(&out, "\x20" "cdef", 5, ps)); ASSERT_EQ(EILSEQ, errno); }
void ui_init(bool force_ascii) { // Cell character 0 is always a space strcpy(ui_chars[0], " "); #ifdef __STDC_ISO_10646__ if (force_ascii) { ui_ascii = true; return; } // Encode Unicode cell characters using system locale char *origLocale = setlocale(LC_CTYPE, NULL); setlocale(LC_CTYPE, ""); int ch; mbstate_t mbs; memset(&mbs, 0, sizeof mbs); for (ch = 1; ch < NCHARS; ch++) { int len = wcrtomb(ui_chars[ch], 0x2580 + ch, &mbs); if (len == -1 || !mbsinit(&mbs)) { ui_ascii = true; break; } ui_chars[ch][len] = 0; } // Restore the original locale setlocale(LC_CTYPE, origLocale); #else ui_ascii = true; #endif }
size_t wcrtomb (char *s, wchar_t wc, mbstate_t *ps) { /* This implementation of wcrtomb on top of wctomb() supports only stateless encodings. ps must be in the initial state. */ if (ps != NULL && !mbsinit (ps)) { errno = EINVAL; return (size_t)(-1); } if (s == NULL) /* We know the NUL wide character corresponds to the NUL character. */ return 1; else { int ret = wctomb (s, wc); if (ret >= 0) return ret; else { errno = EILSEQ; return (size_t)(-1); } } }
int test_newbuf(void) { Buf *buf = newbuf(); /* newbuf sets shiftstate to initial shift state */ assert(mbsinit(&buf->shiftstate)); return 0; }
int main( void ) { mbstate_t mbs; memset(&mbs, 0, sizeof mbs); TESTCASE(mbsinit(NULL) != 0); TESTCASE(mbsinit(&mbs) != 0); #ifndef REGTEST // Surrogate pending mbs._Surrogate = 0xFEED; TESTCASE(mbsinit(&mbs) == 0); mbs._Surrogate = 0; mbs._PendState = 1; TESTCASE(mbsinit(&mbs) == 0); #endif return TEST_RESULTS; }
TInt CTestLibcwchar::testmbsinit2L() { int retVal = mbsinit(NULL); if(retVal != 0) { return KErrNone; } return KErrGeneral; }
TEST(wchar, wcrtomb_start_state) { char out[MB_LEN_MAX]; mbstate_t ps; // Any non-initial state is invalid when calling wcrtomb. memset(&ps, 0, sizeof(ps)); EXPECT_EQ(static_cast<size_t>(-2), mbrtowc(NULL, "\xc2", 1, &ps)); EXPECT_EQ(static_cast<size_t>(-1), wcrtomb(out, 0x00a2, &ps)); EXPECT_EQ(EILSEQ, errno); // If the first argument to wcrtomb is NULL or the second is L'\0' the shift // state should be reset. memset(&ps, 0, sizeof(ps)); EXPECT_EQ(static_cast<size_t>(-2), mbrtowc(NULL, "\xc2", 1, &ps)); EXPECT_EQ(1U, wcrtomb(NULL, 0x00a2, &ps)); EXPECT_TRUE(mbsinit(&ps)); memset(&ps, 0, sizeof(ps)); EXPECT_EQ(static_cast<size_t>(-2), mbrtowc(NULL, "\xf0\xa4", 1, &ps)); EXPECT_EQ(1U, wcrtomb(out, L'\0', &ps)); EXPECT_TRUE(mbsinit(&ps)); }
// ----------------------------------------------------------------------------- //Function Name :testmbinit3 //API Tested :mbsinit //TestCase Description:mbrinit returns -> zero if mbstate_t state is not a null // pointer. // ----------------------------------------------------------------------------- TInt CTestLibcwchar::testmbsinit3L() { mbstate_t state; int retVal = mbsinit(&state); if(retVal == 0) { return KErrNone; } return KErrGeneral; }
TInt CTestLibcwchar::testmbsinit1L() { mbstate_t state; memset(&state,0,sizeof(mbstate_t)); int retVal = mbsinit(&state); if(retVal != 0) { return KErrNone; } return KErrGeneral; }
extern size_t tuklib_mbstr_width(const char *str, size_t *bytes) { const size_t len = strlen(str); if (bytes != NULL) *bytes = len; #if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)) // In single-byte mode, the width of the string is the same // as its length. return len; #else mbstate_t state; memset(&state, 0, sizeof(state)); size_t width = 0; size_t i = 0; // Convert one multibyte character at a time to wchar_t // and get its width using wcwidth(). while (i < len) { wchar_t wc; const size_t ret = mbrtowc(&wc, str + i, len - i, &state); if (ret < 1 || ret > len) return (size_t)-1; i += ret; const int wc_width = wcwidth(wc); if (wc_width < 0) return (size_t)-1; width += wc_width; } // Require that the string ends in the initial shift state. // This way the caller can be combine the string with other // strings without needing to worry about the shift states. if (!mbsinit(&state)) return (size_t)-1; return width; #endif }
void TextTerminal::PutChar(TextBuffer* textbuf, char c) { if ( ansimode ) return PutAnsiEscaped(textbuf, c); if ( mbsinit(&ps) ) { switch ( c ) { case '\n': Newline(textbuf); return; case '\r': column = 0; return; case '\b': Backspace(textbuf); return; case '\t': Tab(textbuf); return; case '\e': AnsiReset(); return; case 127: return; default: break; } } wchar_t wc; size_t result = mbrtowc(&wc, &c, 1, &ps); if ( result == (size_t) -2 ) return; if ( result == (size_t) -1 ) { memset(&ps, 0, sizeof(ps)); wc = L'�'; } if ( result == (size_t) 0 ) wc = L' '; if ( textbuf->Width() <= column ) Newline(textbuf); TextPos pos(column++, line); TextChar tc(wc, vgacolor, ATTR_CHAR | next_attr); textbuf->SetChar(pos, tc); next_attr = 0; }
/* Returns the number of columns needed to represent the multibyte character string pointed to by STRING of length NBYTES. If a non-printable character occurs, and MBSW_REJECT_UNPRINTABLE is specified, -1 is returned. */ int mbsnwidth (const char *string, size_t nbytes, int flags) { const char *p = string; const char *plimit = p + nbytes; int width; width = 0; if (MB_CUR_MAX > 1) { while (p < plimit) switch (*p) { case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':': case ';': case '<': case '=': case '>': case '?': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': /* These characters are printable ASCII characters. */ p++; width++; break; default: /* If we have a multibyte sequence, scan it up to its end. */ { mbstate_t mbstate; memset (&mbstate, 0, sizeof mbstate); do { wchar_t wc; size_t bytes; int w; bytes = mbrtowc (&wc, p, plimit - p, &mbstate); if (bytes == (size_t) -1) /* An invalid multibyte sequence was encountered. */ { if (!(flags & MBSW_REJECT_INVALID)) { p++; width++; break; } else return -1; } if (bytes == (size_t) -2) /* An incomplete multibyte character at the end. */ { if (!(flags & MBSW_REJECT_INVALID)) { p = plimit; width++; break; } else return -1; } if (bytes == 0) /* A null wide character was encountered. */ bytes = 1; w = wcwidth (wc); if (w >= 0) /* A printable multibyte character. */ { if (w > INT_MAX - width) goto overflow; width += w; } else /* An unprintable multibyte character. */ if (!(flags & MBSW_REJECT_UNPRINTABLE)) { if (!iswcntrl (wc)) { if (width == INT_MAX) goto overflow; width++; } } else return -1; p += bytes; } while (! mbsinit (&mbstate)); } break; } return width; } while (p < plimit) { unsigned char c = (unsigned char) *p++; if (isprint (c)) { if (width == INT_MAX) goto overflow; width++; } else if (!(flags & MBSW_REJECT_UNPRINTABLE)) { if (!iscntrl (c)) { if (width == INT_MAX) goto overflow; width++; } } else return -1; } return width; overflow: return INT_MAX; }
int main(int argc, char *argv[]) { mbstate_t state; wchar_t wc; size_t ret; int mode; /* configure should already have checked that the locale is supported. */ if (setlocale(LC_ALL, "") == NULL) { fprintf(stderr, "unable to set standard locale\n"); return 1; } /* Test NUL byte input. */ { const char *src; memset(&state, '\0', sizeof(mbstate_t)); src = ""; ret = mbsnrtowcs(NULL, &src, 1, 0, &state); assert(ret == 0); assert(mbsinit (&state)); src = ""; ret = mbsnrtowcs(NULL, &src, 1, 1, &state); assert(ret == 0); assert(mbsinit (&state)); wc = (wchar_t) 0xBADFACE; src = ""; ret = mbsnrtowcs(&wc, &src, 1, 0, &state); assert(ret == 0); assert(wc == (wchar_t) 0xBADFACE); assert(mbsinit (&state)); wc = (wchar_t) 0xBADFACE; src = ""; ret = mbsnrtowcs(&wc, &src, 1, 1, &state); assert(ret == 0); assert(wc == 0); assert(mbsinit (&state)); } for (mode = '1'; mode <= '4'; ++mode) { int unlimited; for (unlimited = 0; unlimited < 2; unlimited++) { wchar_t buf[BUFSIZE]; const char *src; mbstate_t temp_state; { size_t i; for (i = 0; i < BUFSIZE; i++) buf[i] = (wchar_t) 0xBADFACE; } switch (mode) { case '1': /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ printf("ISO8859-1 ...\n"); { char input[] = "B\374\337er"; /* "Büßer" */ memset(&state, '\0', sizeof(mbstate_t)); if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) { fprintf(stderr, "unable to set ISO8859-1 locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input, 1, &state); assert(ret == 1); assert(wc == 'B'); assert(mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input + 1, 1, &state); assert(ret == 1); assert(wctob (wc) == (unsigned char) '\374'); assert(mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsnrtowcs(NULL, &src, 4, unlimited ? BUFSIZE : 1, &temp_state); assert(ret == 3); assert(src == input + 2); assert(mbsinit (&state)); src = input + 2; ret = mbsnrtowcs(buf, &src, 4, unlimited ? BUFSIZE : 1, &state); assert(ret == (unlimited ? 3 : 1)); assert(src == (unlimited ? NULL : input + 3)); assert(wctob (buf[0]) == (unsigned char) '\337'); if (unlimited) { assert(buf[1] == 'e'); assert(buf[2] == 'r'); assert(buf[3] == 0); assert(buf[4] == (wchar_t) 0xBADFACE); } else assert(buf[1] == (wchar_t) 0xBADFACE); assert(mbsinit (&state)); } break; case '2': /* Locale encoding is UTF-8. */ printf("UTF-8 ...\n"); { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset(&state, '\0', sizeof(mbstate_t)); if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) { fprintf(stderr, "unable to set UTF-8 locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input, 1, &state); assert(ret == 1); assert(wc == 'B'); assert(mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input + 1, 1, &state); assert(ret == (size_t)(-2)); assert(wc == (wchar_t) 0xBADFACE); assert(!mbsinit (&state)); input[1] = '\0'; // Copying mbstate_t doesn't really copy the ICU-converter's state, so this // doesn't work on Haiku. #ifndef __HAIKU__ src = input + 2; temp_state = state; ret = mbsnrtowcs(NULL, &src, 6, unlimited ? BUFSIZE : 2, &temp_state); assert(ret == 4); assert(src == input + 2); assert(!mbsinit (&state)); #endif src = input + 2; ret = mbsnrtowcs(buf, &src, 6, unlimited ? BUFSIZE : 2, &state); assert(ret == (unlimited ? 4 : 2)); assert(src == (unlimited ? NULL : input + 5)); assert(wctob (buf[0]) == EOF); assert(wctob (buf[1]) == EOF); if (unlimited) { assert(buf[2] == 'e'); assert(buf[3] == 'r'); assert(buf[4] == 0); assert(buf[5] == (wchar_t) 0xBADFACE); } else assert(buf[2] == (wchar_t) 0xBADFACE); assert(mbsinit (&state)); } break; case '3': /* Locale encoding is EUC-JP. */ printf("EUC-JP ...\n"); { char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ memset(&state, '\0', sizeof(mbstate_t)); if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) { fprintf(stderr, "unable to set EUC-JP locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input, 1, &state); assert(ret == 1); assert(wc == '<'); assert(mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input + 1, 2, &state); assert(ret == 2); assert(wctob (wc) == EOF); assert(mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input + 3, 1, &state); assert(ret == (size_t)(-2)); assert(wc == (wchar_t) 0xBADFACE); assert(!mbsinit (&state)); input[3] = '\0'; // Copying mbstate_t doesn't really copy the ICU-converter's state, so this // doesn't work on Haiku. #ifndef __HAIKU__ src = input + 4; temp_state = state; ret = mbsnrtowcs(NULL, &src, 5, unlimited ? BUFSIZE : 2, &temp_state); assert(ret == 3); assert(src == input + 4); assert(!mbsinit (&state)); #endif src = input + 4; ret = mbsnrtowcs(buf, &src, 5, unlimited ? BUFSIZE : 2, &state); assert(ret == (unlimited ? 3 : 2)); assert(src == (unlimited ? NULL : input + 7)); assert(wctob (buf[0]) == EOF); assert(wctob (buf[1]) == EOF); if (unlimited) { assert(buf[2] == '>'); assert(buf[3] == 0); assert(buf[4] == (wchar_t) 0xBADFACE); } else assert(buf[2] == (wchar_t) 0xBADFACE); assert(mbsinit (&state)); } break; case '4': /* Locale encoding is GB18030. */ printf("GB18030 ...\n"); { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset(&state, '\0', sizeof(mbstate_t)); if (setlocale (LC_ALL, "en_US.GB18030") == NULL) { fprintf(stderr, "unable to set GB18030 locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input, 1, &state); assert(ret == 1); assert(wc == 'B'); assert(mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc(&wc, input + 1, 1, &state); assert(ret == (size_t)(-2)); assert(wc == (wchar_t) 0xBADFACE); assert(!mbsinit (&state)); input[1] = '\0'; // Copying mbstate_t doesn't really copy the ICU-converter's state, so this // doesn't work on Haiku. #ifndef __HAIKU__ src = input + 2; temp_state = state; ret = mbsnrtowcs(NULL, &src, 8, unlimited ? BUFSIZE : 2, &temp_state); assert(ret == 4); assert(src == input + 2); assert(!mbsinit (&state)); #endif src = input + 2; ret = mbsnrtowcs(buf, &src, 8, unlimited ? BUFSIZE : 2, &state); assert(ret == (unlimited ? 4 : 2)); assert(src == (unlimited ? NULL : input + 7)); assert(wctob (buf[0]) == EOF); assert(wctob (buf[1]) == EOF); if (unlimited) { assert(buf[2] == 'e'); assert(buf[3] == 'r'); assert(buf[4] == 0); assert(buf[5] == (wchar_t) 0xBADFACE); } else assert(buf[2] == (wchar_t) 0xBADFACE); assert(mbsinit (&state)); } break; default: return 1; } } } return 0; }
static int test_one_locale (const char *name, int codepage) { mbstate_t state; wchar_t wc; size_t ret; # if 1 /* Portable code to set the locale. */ { char name_with_codepage[1024]; sprintf (name_with_codepage, "%s.%d", name, codepage); /* Set the locale. */ if (setlocale (LC_ALL, name_with_codepage) == NULL) return 77; } # else /* Hacky way to set a locale.codepage combination that setlocale() refuses to set. */ { /* Codepage of the current locale, set with setlocale(). Not necessarily the same as GetACP(). */ extern __declspec(dllimport) unsigned int __lc_codepage; /* Set the locale. */ if (setlocale (LC_ALL, name) == NULL) return 77; /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */ __lc_codepage = codepage; switch (codepage) { case 1252: case 1256: MB_CUR_MAX = 1; break; case 932: case 950: case 936: MB_CUR_MAX = 2; break; case 54936: case 65001: MB_CUR_MAX = 4; break; } /* Test whether the codepage is really available. */ memset (&state, '\0', sizeof (mbstate_t)); if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1)) return 77; } # endif /* Test zero-length input. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "x", 0, &state); /* gnulib's implementation returns (size_t)(-2). The AIX 5.1 implementation returns (size_t)(-1). glibc's implementation returns 0. */ ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0); ASSERT (mbsinit (&state)); } /* Test NUL byte input. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "", 1, &state); ASSERT (ret == 0); ASSERT (wc == 0); ASSERT (mbsinit (&state)); ret = mbrtowc (NULL, "", 1, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); } /* Test single-byte input. */ { int c; char buf[1]; memset (&state, '\0', sizeof (mbstate_t)); for (c = 0; c < 0x100; c++) switch (c) { case '\t': case '\v': case '\f': case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':': case ';': case '<': case '=': case '>': case '?': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': /* c is in the ISO C "basic character set". */ buf[0] = c; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, buf, 1, &state); ASSERT (ret == 1); ASSERT (wc == c); ASSERT (mbsinit (&state)); ret = mbrtowc (NULL, buf, 1, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); break; } } /* Test special calling convention, passing a NULL pointer. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, NULL, 5, &state); ASSERT (ret == 0); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } switch (codepage) { case 1252: /* Locale encoding is CP1252, an extension of ISO-8859-1. */ { char input[] = "B\374\337er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\374'); ASSERT (wc == 0x00FC); ASSERT (mbsinit (&state)); input[1] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\337'); ASSERT (wc == 0x00DF); ASSERT (mbsinit (&state)); input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } return 0; case 1256: /* Locale encoding is CP1256, not the same as ISO-8859-6. */ { char input[] = "x\302\341\346y"; /* "xآلوy" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'x'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\302'); ASSERT (wc == 0x0622); ASSERT (mbsinit (&state)); input[1] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\341'); ASSERT (wc == 0x0644); ASSERT (mbsinit (&state)); input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 2, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\346'); ASSERT (wc == 0x0648); ASSERT (mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'y'); ASSERT (mbsinit (&state)); } return 0; case 932: /* Locale encoding is CP932, similar to Shift_JIS. */ { char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x65E5); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x672C); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x8A9E); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 950: /* Locale encoding is CP950, similar to Big5. */ { char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x65E5); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x672C); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x8A9E); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 936: /* Locale encoding is CP936 = GBK, an extension of GB2312. */ { char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x65E5); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x672C); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x8A9E); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 54936: /* Locale encoding is CP54936 = GB18030. */ { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 7, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00FC); ASSERT (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 6, &state); ASSERT (ret == 4); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 6, &state); ASSERT (ret == 4); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00DF); ASSERT (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 8, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\225\377", 2, &state); /* 0x95 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\045", 2, &state); /* 0x81 0x25 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\060\377", 3, &state); /* 0x81 0x30 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; case 65001: /* Locale encoding is CP65001 = UTF-8. */ { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 5, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00FC); ASSERT (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 4, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 4, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (wc == 0x00DF); ASSERT (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 6, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); /* Test some invalid input. */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\377", 1, &state); /* 0xFF */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\303\300", 2, &state); /* 0xC3 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\343\300", 2, &state); /* 0xE3 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\300", 2, &state); /* 0xF3 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */ ASSERT (ret == (size_t)-1); ASSERT (errno == EILSEQ); } return 0; default: return 1; } }
static size_t multibyte_qmark_chars(char *buf, size_t len) { if (MB_CUR_MAX <= 1) { return unibyte_qmark_chars(buf, len); } else { char const *p = buf; char const *plimit = buf + len; char *q = buf; while (p < plimit) switch (*p) { case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':': case ';': case '<': case '=': case '>': case '?': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': /* These characters are printable ASCII characters. */ *q++ = *p++; break; default: /* If we have a multibyte sequence, copy it until we reach its end, replacing each non-printable multibyte character with a single question mark. */ { mbstate_t mbstate; memset (&mbstate, 0, sizeof mbstate); do { wchar_t wc; size_t bytes; int w; bytes = mbrtowc (&wc, p, plimit - p, &mbstate); if (bytes == (size_t) -1) { /* An invalid multibyte sequence was encountered. Skip one input byte, and put a question mark. */ p++; *q++ = '?'; break; } if (bytes == (size_t) -2) { /* An incomplete multibyte character at the end. Replace it entirely with a question mark. */ p = plimit; *q++ = '?'; break; } if (bytes == 0) /* A null wide character was encountered. */ bytes = 1; w = wcwidth (wc); if (w >= 0) { /* A printable multibyte character. Keep it. */ for (; bytes > 0; --bytes) *q++ = *p++; } else { /* An unprintable multibyte character. Replace it entirely with a question mark. */ p += bytes; *q++ = '?'; } } while (! mbsinit (&mbstate)); } break; } /* The buffer may have shrunk. */ len = q - buf; return len; } }
int main() { mbstate_t mb = {0}; size_t s = 0; tm tm = {0}; wint_t w = 0; ::FILE* fp = 0; __darwin_va_list va; char* ns = 0; wchar_t* ws = 0; static_assert((std::is_same<decltype(fwprintf(fp, L"")), int>::value), ""); static_assert((std::is_same<decltype(fwscanf(fp, L"")), int>::value), ""); static_assert((std::is_same<decltype(swprintf(ws, s, L"")), int>::value), ""); static_assert((std::is_same<decltype(swscanf(L"", L"")), int>::value), ""); static_assert((std::is_same<decltype(vfwprintf(fp, L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vfwscanf(fp, L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vswprintf(ws, s, L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vswscanf(L"", L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vwprintf(L"", va)), int>::value), ""); static_assert((std::is_same<decltype(vwscanf(L"", va)), int>::value), ""); static_assert((std::is_same<decltype(wprintf(L"")), int>::value), ""); static_assert((std::is_same<decltype(wscanf(L"")), int>::value), ""); static_assert((std::is_same<decltype(fgetwc(fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(fgetws(ws, 0, fp)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(fputwc(L' ', fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(fputws(L"", fp)), int>::value), ""); static_assert((std::is_same<decltype(fwide(fp, 0)), int>::value), ""); static_assert((std::is_same<decltype(getwc(fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(getwchar()), wint_t>::value), ""); static_assert((std::is_same<decltype(putwc(L' ', fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(putwchar(L' ')), wint_t>::value), ""); static_assert((std::is_same<decltype(ungetwc(L' ', fp)), wint_t>::value), ""); static_assert((std::is_same<decltype(wcstod(L"", (wchar_t**)0)), double>::value), ""); static_assert((std::is_same<decltype(wcstof(L"", (wchar_t**)0)), float>::value), ""); static_assert((std::is_same<decltype(wcstold(L"", (wchar_t**)0)), long double>::value), ""); static_assert((std::is_same<decltype(wcstol(L"", (wchar_t**)0, 0)), long>::value), ""); static_assert((std::is_same<decltype(wcstoll(L"", (wchar_t**)0, 0)), long long>::value), ""); static_assert((std::is_same<decltype(wcstoul(L"", (wchar_t**)0, 0)), unsigned long>::value), ""); static_assert((std::is_same<decltype(wcstoull(L"", (wchar_t**)0, 0)), unsigned long long>::value), ""); static_assert((std::is_same<decltype(wcscpy(ws, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsncpy(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcscat(ws, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsncat(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcscmp(L"", L"")), int>::value), ""); static_assert((std::is_same<decltype(wcscoll(L"", L"")), int>::value), ""); static_assert((std::is_same<decltype(wcsncmp(L"", L"", s)), int>::value), ""); static_assert((std::is_same<decltype(wcsxfrm(ws, L"", s)), size_t>::value), ""); static_assert((std::is_same<decltype(wcschr((wchar_t*)0, L' ')), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcscspn(L"", L"")), size_t>::value), ""); static_assert((std::is_same<decltype(wcslen(L"")), size_t>::value), ""); static_assert((std::is_same<decltype(wcspbrk((wchar_t*)0, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsrchr((wchar_t*)0, L' ')), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsspn(L"", L"")), size_t>::value), ""); static_assert((std::is_same<decltype(wcsstr((wchar_t*)0, L"")), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcstok(ws, L"", (wchar_t**)0)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemchr((wchar_t*)0, L' ', s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemcmp(L"", L"", s)), int>::value), ""); static_assert((std::is_same<decltype(wmemcpy(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemmove(ws, L"", s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wmemset(ws, L' ', s)), wchar_t*>::value), ""); static_assert((std::is_same<decltype(wcsftime(ws, s, L"", &tm)), size_t>::value), ""); static_assert((std::is_same<decltype(btowc(0)), wint_t>::value), ""); static_assert((std::is_same<decltype(wctob(w)), int>::value), ""); static_assert((std::is_same<decltype(mbsinit(&mb)), int>::value), ""); static_assert((std::is_same<decltype(mbrlen("", s, &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(mbrtowc(ws, "", s, &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(wcrtomb(ns, L' ', &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(mbsrtowcs(ws, (const char**)0, s, &mb)), size_t>::value), ""); static_assert((std::is_same<decltype(wcsrtombs(ns, (const wchar_t**)0, s, &mb)), size_t>::value), ""); }
static int processStream (FILE *inputStream, const char *inputName) { mbstate_t inputState; mbstate_t outputState; memset(&inputState, 0, sizeof(inputState)); memset(&outputState, 0, sizeof(outputState)); while (!feof(inputStream)) { char inputBuffer[0X1000]; size_t inputCount = fread(inputBuffer, 1, sizeof(inputBuffer), inputStream); if (ferror(inputStream)) goto inputError; if (!inputCount) break; { char *byte = inputBuffer; while (inputCount) { wchar_t character; { size_t result = mbrtowc(&character, byte, inputCount, &inputState); if (result == (size_t)-2) break; if (result == (size_t)-1) goto inputError; if (!result) result = 1; byte += result; inputCount -= result; } if (!iswcntrl(character)) { unsigned char dots = toDots(character); if (dots || !iswspace(character)) { if (opt_sixDots) dots &= ~(BRL_DOT_7 | BRL_DOT_8); character = toCharacter(dots); } } if (!writeCharacter(&character, &outputState)) goto outputError; } } } if (!writeCharacter(NULL, &outputState)) goto outputError; fflush(outputStream); if (ferror(outputStream)) goto outputError; if (!mbsinit(&inputState)) { #ifdef EILSEQ errno = EILSEQ; #else /* EILSEQ */ errno = EINVAL; #endif /* EILSEQ */ goto inputError; } return 1; inputError: logMessage(LOG_ERR, "input error: %s: %s", inputName, strerror(errno)); return 0; outputError: logMessage(LOG_ERR, "output error: %s: %s", outputName, strerror(errno)); return 0; }
/** * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string * @ins: input Unicode string buffer * @ins_len: length of input string in Unicode characters * @outs: on return contains the (allocated) output multibyte string * @outs_len: length of output buffer in bytes * * Convert the input little endian, 2-byte Unicode string @ins, of length * @ins_len into the multibyte string format dictated by the current locale. * * If *@outs is NULL, the function allocates the string and the caller is * responsible for calling free(*@outs); when finished with it. * * On success the function returns the number of bytes written to the output * string *@outs (>= 0), not counting the terminating NULL byte. If the output * string buffer was allocated, *@outs is set to it. * * On error, -1 is returned, and errno is set to the error code. The following * error codes can be expected: * EINVAL Invalid arguments (e.g. @ins or @outs is NULL). * EILSEQ The input string cannot be represented as a multibyte * sequence according to the current locale. * ENAMETOOLONG Destination buffer is too small for input string. * ENOMEM Not enough memory to allocate destination buffer. */ int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs, int outs_len) { char *mbs; wchar_t wc; int i, o, mbs_len; int cnt = 0; #ifdef HAVE_MBSINIT mbstate_t mbstate; #endif if (!ins || !outs) { errno = EINVAL; return -1; } mbs = *outs; mbs_len = outs_len; if (mbs && !mbs_len) { errno = ENAMETOOLONG; return -1; } if (!mbs) { mbs_len = (ins_len + 1) * MB_CUR_MAX; mbs = (char*)malloc(mbs_len); if (!mbs) return -1; } #ifdef HAVE_MBSINIT memset(&mbstate, 0, sizeof(mbstate)); #else wctomb(NULL, 0); #endif for (i = o = 0; i < ins_len; i++) { /* Reallocate memory if necessary or abort. */ if ((int)(o + MB_CUR_MAX) > mbs_len) { char *tc; if (mbs == *outs) { errno = ENAMETOOLONG; return -1; } tc = (char*)malloc((mbs_len + 64) & ~63); if (!tc) goto err_out; memcpy(tc, mbs, mbs_len); mbs_len = (mbs_len + 64) & ~63; free(mbs); mbs = tc; } /* Convert the LE Unicode character to a CPU wide character. */ wc = (wchar_t)le16_to_cpu(ins[i]); if (!wc) break; /* Convert the CPU endian wide character to multibyte. */ #ifdef HAVE_MBSINIT cnt = wcrtomb(mbs + o, wc, &mbstate); #else cnt = wctomb(mbs + o, wc); #endif if (cnt == -1) goto err_out; if (cnt <= 0) { ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt); errno = EINVAL; goto err_out; } o += cnt; } #ifdef HAVE_MBSINIT /* Make sure we are back in the initial state. */ if (!mbsinit(&mbstate)) { ntfs_log_debug("Eeek. mbstate not in initial state!\n"); errno = EILSEQ; goto err_out; } #endif /* Now write the NULL character. */ mbs[o] = '\0'; if (*outs != mbs) *outs = mbs; return o; err_out: if (mbs != *outs) { int eo = errno; free(mbs); errno = eo; } return -1; }
size_t wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len, mbstate_t* ps) { static mbstate_t __private_state; mbstate_t* state = (ps == NULL) ? &__private_state : ps; if (!mbsinit(state)) { return reset_and_return_illegal(EILSEQ, state); } char buf[MB_LEN_MAX]; size_t i, o, r; if (dst == NULL) { for (i = o = 0; i < nwc; i++, o += r) { wchar_t wc = (*src)[i]; if (static_cast<uint32_t>(wc) < 0x80) { // Fast path for plain ASCII characters. if (wc == 0) { return o; } r = 1; } else { r = wcrtomb(buf, wc, state); if (r == ERR_ILLEGAL_SEQUENCE) { return r; } } } return o; } for (i = o = 0; i < nwc && o < len; i++, o += r) { wchar_t wc = (*src)[i]; if (static_cast<uint32_t>(wc) < 0x80) { // Fast path for plain ASCII characters. dst[o] = wc; if (wc == 0) { *src = NULL; return o; } r = 1; } else if (len - o >= sizeof(buf)) { // Enough space to translate in-place. r = wcrtomb(dst + o, wc, state); if (r == ERR_ILLEGAL_SEQUENCE) { *src += i; return r; } } else { // May not be enough space; use temp buffer. r = wcrtomb(buf, wc, state); if (r == ERR_ILLEGAL_SEQUENCE) { *src += i; return r; } if (r > len - o) { break; } memcpy(dst + o, buf, r); } } *src += i; return o; }
size_t wcrtomb(char* s, wchar_t wc, mbstate_t* ps) { static mbstate_t __private_state; mbstate_t* state = (ps == NULL) ? &__private_state : ps; if (s == NULL) { // Equivalent to wcrtomb(buf, L'\0', ps). return reset_and_return(1, state); } // POSIX states that if wc is a null wide character, a null byte shall be // stored, preceded by any shift sequence needed to restore the initial shift // state. Since shift states are not supported, only the null byte is stored. if (wc == L'\0') { *s = '\0'; reset_and_return(1, state); } if (!mbsinit(state)) { return reset_and_return_illegal(EILSEQ, state); } if ((wc & ~0x7f) == 0) { // Fast path for plain ASCII characters. *s = wc; return 1; } // Determine the number of octets needed to represent this character. // We always output the shortest sequence possible. Also specify the // first few bits of the first octet, which contains the information // about the sequence length. uint8_t lead; size_t length; if ((wc & ~0x7f) == 0) { lead = 0; length = 1; } else if ((wc & ~0x7ff) == 0) { lead = 0xc0; length = 2; } else if ((wc & ~0xffff) == 0) { lead = 0xe0; length = 3; } else if ((wc & ~0x1fffff) == 0) { lead = 0xf0; length = 4; } else { errno = EILSEQ; return ERR_ILLEGAL_SEQUENCE; } // Output the octets representing the character in chunks // of 6 bits, least significant last. The first octet is // a special case because it contains the sequence length // information. for (size_t i = length - 1; i > 0; i--) { s[i] = (wc & 0x3f) | 0x80; wc >>= 6; } *s = (wc & 0xff) | lead; return length; }
int main (int argc, char *argv[]) { mbstate_t state; wchar_t wc; size_t ret; /* configure should already have checked that the locale is supported. */ if (setlocale (LC_ALL, "") == NULL) return 1; /* Test zero-length input. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "x", 0, &state); ASSERT (ret == (size_t)(-2)); ASSERT (mbsinit (&state)); } /* Test NUL byte input. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "", 1, &state); ASSERT (ret == 0); ASSERT (wc == 0); ASSERT (mbsinit (&state)); ret = mbrtowc (NULL, "", 1, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); } /* Test single-byte input. */ { int c; char buf[1]; memset (&state, '\0', sizeof (mbstate_t)); for (c = 0; c < 0x100; c++) switch (c) { case '\t': case '\v': case '\f': case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':': case ';': case '<': case '=': case '>': case '?': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': /* c is in the ISO C "basic character set". */ buf[0] = c; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, buf, 1, &state); ASSERT (ret == 1); ASSERT (wc == c); ASSERT (mbsinit (&state)); ret = mbrtowc (NULL, buf, 1, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); break; } } /* Test special calling convention, passing a NULL pointer. */ { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, NULL, 5, &state); ASSERT (ret == 0); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } if (argc > 1) switch (argv[1][0]) { case '1': /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ { char input[] = "B\374\337er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\374'); ASSERT (mbsinit (&state)); input[1] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 3, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\337'); ASSERT (mbsinit (&state)); input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } return 0; case '2': /* Locale encoding is UTF-8. */ { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 5, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 4, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 4, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 6, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } return 0; case '3': /* Locale encoding is EUC-JP. */ { char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); ASSERT (ret == 1); ASSERT (wc == '>'); ASSERT (mbsinit (&state)); } return 0; case '4': /* Locale encoding is GB18030. */ { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 7, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 6, &state); ASSERT (ret == 4); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 6, &state); ASSERT (ret == 4); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 2, &state); ASSERT (ret == 1); ASSERT (wc == 'e'); ASSERT (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 8, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'r'); ASSERT (mbsinit (&state)); } return 0; } return 1; }
/* Converts a string to an EWF header2 * Returns 1 if successful, 0 if header2 was not set, or -1 on error */ int8_t libewf_string_copy_to_header2( LIBEWF_CHAR *string, size_t size_string, EWF_HEADER2 *header2, size_t size_header2 ) { #ifdef HAVE_WIDE_CHARACTER_TYPE mbstate_t conversion_state; #endif if( string == NULL ) { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: invalid string.\n" ); return( -1 ); } if( header2 == NULL ) { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: invalid header2.\n" ); return( -1 ); } if( ( size_string > (size_t) SSIZE_MAX ) || ( size_header2 > (size_t) SSIZE_MAX ) ) { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: invalid size value exceeds maximum.\n" ); return( -1 ); } if( size_header2 < ( ( size_string * 2 ) + 2 ) ) { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: header2 too small.\n" ); return( -1 ); } #ifdef HAVE_WIDE_CHARACTER_TYPE if( libewf_common_memset( &conversion_state, 0, sizeof( mbstate_t ) ) == NULL ) { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: unable to clear converion state.\n" ); return( -1 ); } if( mbsinit( &conversion_state ) == 0 ) { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: unable to initialize converion state.\n" ); return( -1 ); } if( wcsrtombs( (char *) &header2[ 2 ], (const wchar_t **) &string, size_header2, &conversion_state ) != ( size_string - 1 ) ) #else if( libewf_string_copy_ascii_to_utf16( string, size_string, (LIBEWF_CHAR *) header2, size_header2, LIBEWF_STRING_LITTLE_ENDIAN ) != 1 ) #endif { LIBEWF_WARNING_PRINT( "libewf_string_copy_to_header2: unable to copy string to header2.\n" ); return( -1 ); } #ifdef HAVE_WIDE_CHARACTER_TYPE if( header2[ 4 ] == (EWF_HEADER2) '\0' ) { header2[ 0 ] = (EWF_HEADER2) 0xfe; header2[ 1 ] = (EWF_HEADER2) 0xff; } else { header2[ 0 ] = (EWF_HEADER2) 0xff; header2[ 1 ] = (EWF_HEADER2) 0xfe; } #endif return( 1 ); }
int main(int argc, char *argv[]) { char srcbuf[128]; wchar_t dstbuf[128]; char *src; mbstate_t s; /* * C/POSIX locale. */ printf("1..1\n"); /* Simple null terminated string. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); src = srcbuf; memset(&s, 0, sizeof(s)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 6, sizeof(dstbuf) / sizeof(*dstbuf), &s) == 5); assert(wcscmp(dstbuf, L"hello") == 0); assert(dstbuf[6] == 0xcccc); assert(src == NULL); /* Simple null terminated string, stopping early. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); src = srcbuf; memset(&s, 0, sizeof(s)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 4, sizeof(dstbuf) / sizeof(*dstbuf), &s) == 4); assert(wmemcmp(dstbuf, L"hell", 4) == 0); assert(dstbuf[5] == 0xcccc); assert(src == srcbuf + 4); /* Not enough space in destination buffer. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); src = srcbuf; memset(&s, 0, sizeof(s)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 6, 4, &s) == 4); assert(wmemcmp(dstbuf, L"hell", 4) == 0); assert(dstbuf[5] == 0xcccc); assert(src == srcbuf + 4); /* Null terminated string, internal dest. buffer */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); src = srcbuf; memset(&s, 0, sizeof(s)); assert(mbsnrtowcs(NULL, (const char **)&src, 6, 0, &s) == 5); /* Null terminated string, internal dest. buffer, stopping early */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); src = srcbuf; memset(&s, 0, sizeof(s)); assert(mbsnrtowcs(NULL, (const char **)&src, 4, 0, &s) == 4); /* Null terminated string, internal state. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); src = srcbuf; assert(mbsnrtowcs(dstbuf, (const char **)&src, 6, sizeof(dstbuf) / sizeof(*dstbuf), NULL) == 5); assert(wcscmp(dstbuf, L"hello") == 0); assert(dstbuf[6] == 0xcccc); assert(src == NULL); /* Null terminated string, internal state, internal dest. buffer. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); src = srcbuf; assert(mbsnrtowcs(NULL, (const char **)&src, 6, 0, NULL) == 5); /* Empty source buffer. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); srcbuf[0] = '\0'; src = srcbuf; memset(&s, 0, sizeof(s)); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 1, 1, &s) == 0); assert(dstbuf[0] == 0); assert(dstbuf[1] == 0xcccc); assert(src == NULL); /* Zero length destination buffer. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "hello"); src = srcbuf; memset(&s, 0, sizeof(s)); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 1, 0, &s) == 0); assert(dstbuf[0] == 0xcccc); assert(src == srcbuf); /* Zero length source buffer. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); src = srcbuf; memset(&s, 0, sizeof(s)); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 0, 1, &s) == 0); assert(dstbuf[0] == 0xcccc); assert(src == srcbuf); /* * Japanese (EUC) locale. */ assert(strcmp(setlocale(LC_CTYPE, "ja_JP.eucJP"), "ja_JP.eucJP") == 0); assert(MB_CUR_MAX > 1); memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "\xA3\xC1 B \xA3\xC3"); src = srcbuf; memset(&s, 0, sizeof(s)); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 8, sizeof(dstbuf) / sizeof(*dstbuf), &s) == 5); assert(dstbuf[0] == 0xA3C1 && dstbuf[1] == 0x20 && dstbuf[2] == 0x42 && dstbuf[3] == 0x20 && dstbuf[4] == 0xA3C3 && dstbuf[5] == 0); assert(src == NULL); /* Partial character. */ memset(srcbuf, 0xcc, sizeof(srcbuf)); strcpy(srcbuf, "\xA3\xC1 B \xA3\xC3"); src = srcbuf; memset(&s, 0, sizeof(s)); wmemset(dstbuf, 0xcccc, sizeof(dstbuf) / sizeof(*dstbuf)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 6, sizeof(dstbuf) / sizeof(*dstbuf), &s) == 4); assert(src == srcbuf + 6); assert(!mbsinit(&s)); assert(mbsnrtowcs(dstbuf, (const char **)&src, 1, sizeof(dstbuf) / sizeof(*dstbuf), &s) == 1); assert(src == srcbuf + 7); assert(mbsnrtowcs(dstbuf, (const char **)&src, 1, sizeof(dstbuf) / sizeof(*dstbuf), &s) == 0); assert(src == NULL); printf("ok 1 - mbsnrtowcs()\n"); return (0); }
int main (int argc, char *argv[]) { mbstate_t state; wchar_t wc; size_t ret; /* configure should already have checked that the locale is supported. */ if (setlocale (LC_ALL, "") == NULL) return 1; /* Test NUL byte input. */ { const char *src; memset (&state, '\0', sizeof (mbstate_t)); src = ""; ret = mbsrtowcs (NULL, &src, 0, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); src = ""; ret = mbsrtowcs (NULL, &src, 1, &state); ASSERT (ret == 0); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; src = ""; ret = mbsrtowcs (&wc, &src, 0, &state); ASSERT (ret == 0); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; src = ""; ret = mbsrtowcs (&wc, &src, 1, &state); ASSERT (ret == 0); ASSERT (wc == 0); ASSERT (mbsinit (&state)); } if (argc > 1) { int unlimited; for (unlimited = 0; unlimited < 2; unlimited++) { #define BUFSIZE 10 wchar_t buf[BUFSIZE]; const char *src; mbstate_t temp_state; { size_t i; for (i = 0; i < BUFSIZE; i++) buf[i] = (wchar_t) 0xBADFACE; } switch (argv[1][0]) { case '1': /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ { char input[] = "B\374\337er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == 1); ASSERT (wctob (wc) == (unsigned char) '\374'); ASSERT (mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state); ASSERT (ret == 3); ASSERT (src == input + 2); ASSERT (mbsinit (&state)); src = input + 2; ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state); ASSERT (ret == (unlimited ? 3 : 1)); ASSERT (src == (unlimited ? NULL : input + 3)); ASSERT (wctob (buf[0]) == (unsigned char) '\337'); if (unlimited) { ASSERT (buf[1] == 'e'); ASSERT (buf[2] == 'r'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (wchar_t) 0xBADFACE); } else ASSERT (buf[1] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; case '2': /* Locale encoding is UTF-8. */ { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state); ASSERT (ret == 4); ASSERT (src == input + 2); ASSERT (!mbsinit (&state)); src = input + 2; ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state); ASSERT (ret == (unlimited ? 4 : 2)); ASSERT (src == (unlimited ? NULL : input + 5)); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == 'e'); ASSERT (buf[3] == 'r'); ASSERT (buf[4] == 0); ASSERT (buf[5] == (wchar_t) 0xBADFACE); } else ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; case '3': /* Locale encoding is EUC-JP. */ { char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == '<'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); ASSERT (ret == 2); ASSERT (wctob (wc) == EOF); ASSERT (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[3] = '\0'; src = input + 4; temp_state = state; ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state); ASSERT (ret == 3); ASSERT (src == input + 4); ASSERT (!mbsinit (&state)); src = input + 4; ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state); ASSERT (ret == (unlimited ? 3 : 2)); ASSERT (src == (unlimited ? NULL : input + 7)); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == '>'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (wchar_t) 0xBADFACE); } else ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; case '4': /* Locale encoding is GB18030. */ { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); ASSERT (ret == 1); ASSERT (wc == 'B'); ASSERT (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (&state)); input[1] = '\0'; src = input + 2; temp_state = state; ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state); ASSERT (ret == 4); ASSERT (src == input + 2); ASSERT (!mbsinit (&state)); src = input + 2; ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state); ASSERT (ret == (unlimited ? 4 : 2)); ASSERT (src == (unlimited ? NULL : input + 7)); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == 'e'); ASSERT (buf[3] == 'r'); ASSERT (buf[4] == 0); ASSERT (buf[5] == (wchar_t) 0xBADFACE); } else ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } break; default: return 1; } } return 0; } return 1; }
size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps) { static mbstate_t __private_state; mbstate_t* state = (ps == NULL) ? &__private_state : ps; // We should never get to a state which has all 4 bytes of the sequence set. // Full state verification is done when decoding the sequence (after we have // all the bytes). if (mbstate_get_byte(state, 3) != 0) { return reset_and_return_illegal(EINVAL, state); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) { return 0; } uint8_t ch; if (mbsinit(state) && (((ch = static_cast<uint8_t>(*s)) & ~0x7f) == 0)) { // Fast path for plain ASCII characters. if (pwc != NULL) { *pwc = ch; } return (ch != '\0' ? 1 : 0); } // Determine the number of octets that make up this character // from the first octet, and a mask that extracts the // interesting bits of the first octet. We already know // the character is at least two bytes long. size_t length; int mask; // We also specify a lower bound for the character code to // detect redundant, non-"shortest form" encodings. For // example, the sequence C0 80 is _not_ a legal representation // of the null character. This enforces a 1-to-1 mapping // between character codes and their multibyte representations. wchar_t lower_bound; // The first byte in the state (if any) tells the length. size_t bytes_so_far = mbstate_bytes_so_far(state); ch = bytes_so_far > 0 ? mbstate_get_byte(state, 0) : static_cast<uint8_t>(*s); if ((ch & 0x80) == 0) { mask = 0x7f; length = 1; lower_bound = 0; } else if ((ch & 0xe0) == 0xc0) { mask = 0x1f; length = 2; lower_bound = 0x80; } else if ((ch & 0xf0) == 0xe0) { mask = 0x0f; length = 3; lower_bound = 0x800; } else if ((ch & 0xf8) == 0xf0) { mask = 0x07; length = 4; lower_bound = 0x10000; } else { // Malformed input; input is not UTF-8. See RFC 3629. return reset_and_return_illegal(EILSEQ, state); } // Fill in the state. size_t bytes_wanted = length - bytes_so_far; size_t i; for (i = 0; i < MIN(bytes_wanted, n); i++) { if (!mbsinit(state) && ((*s & 0xc0) != 0x80)) { // Malformed input; bad characters in the middle of a character. return reset_and_return_illegal(EILSEQ, state); } mbstate_set_byte(state, bytes_so_far + i, *s++); } if (i < bytes_wanted) { return ERR_INCOMPLETE_SEQUENCE; } // Decode the octet sequence representing the character in chunks // of 6 bits, most significant first. wchar_t wch = mbstate_get_byte(state, 0) & mask; for (i = 1; i < length; i++) { wch <<= 6; wch |= mbstate_get_byte(state, i) & 0x3f; } if (wch < lower_bound) { // Malformed input; redundant encoding. return reset_and_return_illegal(EILSEQ, state); } if ((wch >= 0xd800 && wch <= 0xdfff) || wch == 0xfffe || wch == 0xffff) { // Malformed input; invalid code points. return reset_and_return_illegal(EILSEQ, state); } if (pwc != NULL) { *pwc = wch; } return reset_and_return(wch == L'\0' ? 0 : bytes_wanted, state); }
size_t rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) { # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG if (s == NULL) { pwc = NULL; s = ""; n = 1; } # endif # if MBRTOWC_RETVAL_BUG { static mbstate_t internal_state; /* Override mbrtowc's internal state. We cannot call mbsinit() on the hidden internal state, but we can call it on our variable. */ if (ps == NULL) ps = &internal_state; if (!mbsinit (ps)) { /* Parse the rest of the multibyte character byte for byte. */ size_t count = 0; for (; n > 0; s++, n--) { wchar_t wc; size_t ret = mbrtowc (&wc, s, 1, ps); if (ret == (size_t)(-1)) return (size_t)(-1); count++; if (ret != (size_t)(-2)) { /* The multibyte character has been completed. */ if (pwc != NULL) *pwc = wc; return (wc == 0 ? 0 : count); } } return (size_t)(-2); } } # endif # if MBRTOWC_NUL_RETVAL_BUG { wchar_t wc; size_t ret = mbrtowc (&wc, s, n, ps); if (ret != (size_t)(-1) && ret != (size_t)(-2)) { if (pwc != NULL) *pwc = wc; if (wc == 0) ret = 0; } return ret; } # else { # if MBRTOWC_NULL_ARG1_BUG wchar_t dummy; if (pwc == NULL) pwc = &dummy; # endif return mbrtowc (pwc, s, n, ps); } # endif }
/** * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string * @ins: input multibyte string buffer * @outs: on return contains the (allocated) output Unicode string * @outs_len: length of output buffer in Unicode characters * * Convert the input multibyte string @ins, from the current locale into the * corresponding little endian, 2-byte Unicode string. * * If *@outs is NULL, the function allocates the string and the caller is * responsible for calling free(*@outs); when finished with it. * * On success the function returns the number of Unicode characters written to * the output string *@outs (>= 0), not counting the terminating Unicode NULL * character. If the output string buffer was allocated, *@outs is set to it. * * On error, -1 is returned, and errno is set to the error code. The following * error codes can be expected: * EINVAL Invalid arguments (e.g. @ins or @outs is NULL). * EILSEQ The input string cannot be represented as a Unicode * string according to the current locale. * ENAMETOOLONG Destination buffer is too small for input string. * ENOMEM Not enough memory to allocate destination buffer. */ int ntfs_mbstoucs(const char *ins, ntfschar **outs, int outs_len) { ntfschar *ucs; const char *s; wchar_t wc; int i, o, cnt, ins_len, ucs_len, ins_size; #ifdef HAVE_MBSINIT mbstate_t mbstate; #endif if (!ins || !outs) { errno = EINVAL; return -1; } ucs = *outs; ucs_len = outs_len; if (ucs && !ucs_len) { errno = ENAMETOOLONG; return -1; } /* Determine the size of the multi-byte string in bytes. */ ins_size = strlen(ins); /* Determine the length of the multi-byte string. */ s = ins; #if defined(HAVE_MBSINIT) memset(&mbstate, 0, sizeof(mbstate)); ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate); #ifdef __CYGWIN32__ if (!ins_len && *ins) { /* Older Cygwin had broken mbsrtowcs() implementation. */ ins_len = strlen(ins); } #endif #elif !defined(DJGPP) ins_len = mbstowcs(NULL, s, 0); #else /* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */ ins_len = strlen(ins); #endif if (ins_len == -1) return ins_len; #ifdef HAVE_MBSINIT if ((s != ins) || !mbsinit(&mbstate)) { #else if (s != ins) { #endif errno = EILSEQ; return -1; } /* Add the NULL terminator. */ ins_len++; if (!ucs) { ucs_len = ins_len; ucs = (ntfschar*)malloc(ucs_len * sizeof(ntfschar)); if (!ucs) return -1; } #ifdef HAVE_MBSINIT memset(&mbstate, 0, sizeof(mbstate)); #else mbtowc(NULL, NULL, 0); #endif for (i = o = cnt = 0; i < ins_size; i += cnt, o++) { /* Reallocate memory if necessary or abort. */ if (o >= ucs_len) { ntfschar *tc; if (ucs == *outs) { errno = ENAMETOOLONG; return -1; } /* * We will never get here but hey, it's only a bit of * extra code... */ ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63; tc = (ntfschar*)realloc(ucs, ucs_len); if (!tc) goto err_out; ucs = tc; ucs_len /= sizeof(ntfschar); } /* Convert the multibyte character to a wide character. */ #ifdef HAVE_MBSINIT cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate); #else cnt = mbtowc(&wc, ins + i, ins_size - i); #endif if (!cnt) break; if (cnt == -1) goto err_out; if (cnt < -1) { ntfs_log_trace("Eeek. cnt = %i\n", cnt); errno = EINVAL; goto err_out; } /* Make sure we are not overflowing the NTFS Unicode set. */ if ((unsigned long)wc >= (unsigned long)(1 << (8 * sizeof(ntfschar)))) { errno = EILSEQ; goto err_out; } /* Convert the CPU wide character to a LE Unicode character. */ ucs[o] = cpu_to_le16(wc); } #ifdef HAVE_MBSINIT /* Make sure we are back in the initial state. */ if (!mbsinit(&mbstate)) { ntfs_log_trace("Eeek. mbstate not in initial state!\n"); errno = EILSEQ; goto err_out; } #endif /* Now write the NULL character. */ ucs[o] = cpu_to_le16(L'\0'); if (*outs != ucs) *outs = ucs; return o; err_out: if (ucs != *outs) { int eo = errno; free(ucs); errno = eo; } return -1; } /** * ntfs_upcase_table_build - build the default upcase table for NTFS * @uc: destination buffer where to store the built table * @uc_len: size of destination buffer in bytes * * ntfs_upcase_table_build() builds the default upcase table for NTFS and * stores it in the caller supplied buffer @uc of size @uc_len. * * Note, @uc_len must be at least 128kiB in size or bad things will happen! */ void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len) { static int uc_run_table[][3] = { /* Start, End, Add */ {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74}, {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86}, {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100}, {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128}, {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112}, {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126}, {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8}, {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8}, {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8}, {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7}, {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16}, {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26}, {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32}, {0} }; static int uc_dup_table[][2] = { /* Start, End */ {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC}, {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB}, {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5}, {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9}, {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95}, {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9}, {0} }; static int uc_byte_table[][2] = { /* Offset, Value */ {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196}, {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C}, {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D}, {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F}, {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9}, {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE}, {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7}, {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197}, {0} }; int i, r; memset((char*)uc, 0, uc_len); uc_len >>= 1; if (uc_len > 65536) uc_len = 65536; for (i = 0; (u32)i < uc_len; i++) uc[i] = i; for (r = 0; uc_run_table[r][0]; r++) for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) uc[i] += uc_run_table[r][2]; for (r = 0; uc_dup_table[r][0]; r++) for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) uc[i + 1]--; for (r = 0; uc_byte_table[r][0]; r++) uc[uc_byte_table[r][0]] = uc_byte_table[r][1]; } /** * ntfs_str2ucs - convert a string to a valid NTFS file name * @s: input string * @len: length of output buffer in Unicode characters * * Convert the input @s string into the corresponding little endian, * 2-byte Unicode string. The length of the converted string is less * or equal to the maximum length allowed by the NTFS format (255). * * If @s is NULL then return AT_UNNAMED. * * On success the function returns the Unicode string in an allocated * buffer and the caller is responsible to free it when it's not needed * anymore. * * On error NULL is returned and errno is set to the error code. */ ntfschar *ntfs_str2ucs(const char *s, int *len) { ntfschar *ucs = NULL; if (s && ((*len = ntfs_mbstoucs(s, &ucs, 0)) == -1)) { ntfs_log_perror("Couldn't convert '%s' to Unicode", s); return NULL; } if (*len > 0xff) { free(ucs); errno = ENAMETOOLONG; return NULL; } if (!ucs || !*len) { ucs = AT_UNNAMED; *len = 0; } return ucs; }
int fnmatch (const char *pattern, const char *string, int flags) { # if HANDLE_MULTIBYTE # define ALLOCA_LIMIT 2000 if (__builtin_expect (MB_CUR_MAX, 1) != 1) { mbstate_t ps; size_t patsize; size_t strsize; size_t totsize; wchar_t *wpattern; wchar_t *wstring; int res; /* Calculate the size needed to convert the strings to wide characters. */ memset (&ps, '\0', sizeof (ps)); patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1; if (__builtin_expect (patsize != 0, 1)) { assert (mbsinit (&ps)); strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1; if (__builtin_expect (strsize != 0, 1)) { assert (mbsinit (&ps)); totsize = patsize + strsize; if (__builtin_expect (! (patsize <= totsize && totsize <= SIZE_MAX / sizeof (wchar_t)), 0)) { errno = ENOMEM; return -1; } /* Allocate room for the wide characters. */ if (__builtin_expect (totsize < ALLOCA_LIMIT, 1)) wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t)); else { wpattern = malloc (totsize * sizeof (wchar_t)); if (__builtin_expect (! wpattern, 0)) { errno = ENOMEM; return -1; } } wstring = wpattern + patsize; /* Convert the strings into wide characters. */ mbsrtowcs (wpattern, &pattern, patsize, &ps); assert (mbsinit (&ps)); mbsrtowcs (wstring, &string, strsize, &ps); res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1, flags & FNM_PERIOD, flags); if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0)) free (wpattern); return res; } } } # endif /* HANDLE_MULTIBYTE */ return internal_fnmatch (pattern, string, string + strlen (string), flags & FNM_PERIOD, flags); }
int main (int argc, char *argv[]) { mbstate_t state; wchar_t wc; size_t ret; int i; /* configure should already have checked that the locale is supported. */ if (setlocale (LC_ALL, "") == NULL) { fprintf(stderr, "unable to set standard locale\n"); return 1; } /* Test zero-length input. */ printf("zero-length input ...\n"); { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "x", 0, &state); /* gnulib's implementation returns (size_t)(-2). The AIX 5.1 implementation returns (size_t)(-1). glibc's implementation returns 0. */ assert (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0); assert (mbsinit (&state)); } /* Test NUL byte input. */ printf("NUL byte input ...\n"); { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, "", 1, &state); assert (ret == 0); assert (wc == 0); assert (mbsinit (&state)); ret = mbrtowc (NULL, "", 1, &state); assert (ret == 0); assert (mbsinit (&state)); } /* Test single-byte input. */ printf("single-byte input ...\n"); { char buf[1]; int c; memset (&state, '\0', sizeof (mbstate_t)); for (c = 0; c < 0x100; c++) switch (c) { case '\t': case '\v': case '\f': case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '-': case '.': case '/': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ':': case ';': case '<': case '=': case '>': case '?': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '[': case '\\': case ']': case '^': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': /* c is in the ISO C "basic character set". */ buf[0] = c; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, buf, 1, &state); assert (ret == 1); assert (wc == c); assert (mbsinit (&state)); ret = mbrtowc (NULL, buf, 1, &state); assert (ret == 1); assert (mbsinit (&state)); break; } } /* Test special calling convention, passing a NULL pointer. */ printf("special calling convention, passing NULL ...\n"); { memset (&state, '\0', sizeof (mbstate_t)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, NULL, 5, &state); assert (ret == 0); assert (wc == (wchar_t) 0xBADFACE); assert (mbsinit (&state)); } for (i = '1'; i <= '4'; ++i) { switch (i) { case '1': /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ printf("ISO8859-1 ...\n"); { char input[] = "B\374\337er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) { fprintf(stderr, "unable to set ISO8859-1 locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); assert (ret == 1); assert (wc == 'B'); assert (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); assert (ret == 1); assert (wctob (wc) == (unsigned char) '\374'); assert (mbsinit (&state)); input[1] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 2, 3, &state); assert (ret == 1); assert (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 3, &state); assert (ret == 1); assert (wctob (wc) == (unsigned char) '\337'); assert (mbsinit (&state)); input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 2, &state); assert (ret == 1); assert (wc == 'e'); assert (mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 1, &state); assert (ret == 1); assert (wc == 'r'); assert (mbsinit (&state)); } break; case '2': /* Locale encoding is UTF-8. */ printf("UTF-8 ...\n"); { char input[] = "B\303\274\303\237er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) { fprintf(stderr, "unable to set UTF-8 locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); assert (ret == 1); assert (wc == 'B'); assert (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); assert (ret == (size_t)(-2)); assert (wc == (wchar_t) 0xBADFACE); assert (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 5, &state); assert (ret == 1); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 4, &state); assert (ret == 2); assert (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 4, &state); assert (ret == 2); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 2, &state); assert (ret == 1); assert (wc == 'e'); assert (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 6, 1, &state); assert (ret == 1); assert (wc == 'r'); assert (mbsinit (&state)); } break; case '3': /* Locale encoding is EUC-JP. */ printf("EUC-JP ...\n"); { char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ memset (&state, '\0', sizeof (mbstate_t)); if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) { fprintf(stderr, "unable to set EUC-JP locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); assert (ret == 1); assert (wc == '<'); assert (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 2, &state); assert (ret == 2); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[1] = '\0'; input[2] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 1, &state); assert (ret == (size_t)(-2)); assert (wc == (wchar_t) 0xBADFACE); assert (!mbsinit (&state)); input[3] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 4, 4, &state); assert (ret == 1); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[4] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 5, 3, &state); assert (ret == 2); assert (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 5, 3, &state); assert (ret == 2); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 1, &state); assert (ret == 1); assert (wc == '>'); assert (mbsinit (&state)); } break; case '4': /* Locale encoding is GB18030. */ printf("GB18030 ...\n"); { char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ memset (&state, '\0', sizeof (mbstate_t)); if (setlocale (LC_ALL, "en_US.GB18030") == NULL) { fprintf(stderr, "unable to set GB18030 locale, skipping\n"); break; } wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input, 1, &state); assert (ret == 1); assert (wc == 'B'); assert (mbsinit (&state)); input[0] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, &state); assert (ret == (size_t)(-2)); assert (wc == (wchar_t) 0xBADFACE); assert (!mbsinit (&state)); input[1] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 2, 7, &state); assert (ret == 1); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[2] = '\0'; /* Test support of NULL first argument. */ ret = mbrtowc (NULL, input + 3, 6, &state); assert (ret == 4); assert (mbsinit (&state)); wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 3, 6, &state); assert (ret == 4); assert (wctob (wc) == EOF); assert (mbsinit (&state)); input[3] = '\0'; input[4] = '\0'; input[5] = '\0'; input[6] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 7, 2, &state); assert (ret == 1); assert (wc == 'e'); assert (mbsinit (&state)); input[5] = '\0'; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 8, 1, &state); assert (ret == 1); assert (wc == 'r'); assert (mbsinit (&state)); } break; } } return 0; }