void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; IcuTestErrorCode errorCode(*this, "TestCasingImpl"); LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); if(errorCode.isFailure()) { errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); errorCode.reset(); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); errln("expected \"" + output + "\" got \"" + result + "\"" ); } }
/* * API test for UCaseMap; * test cases for actual case mappings using UCaseMap see * intltest utility/UnicodeStringTest/StringCaseTest/TestCasing */ static void TestUCaseMap(void) { static const char aBc[] ={ 0x61, 0x42, 0x63, 0 }, abc[] ={ 0x61, 0x62, 0x63, 0 }, ABCg[]={ 0x41, 0x42, 0x43, 0x67, 0 }, defg[]={ 0x64, 0x65, 0x66, 0x67, 0 }; char utf8Out[8]; UCaseMap *csm; const char *locale; uint32_t options; int32_t length; UErrorCode errorCode; errorCode=U_ZERO_ERROR; csm=ucasemap_open("tur", 0xa5, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucasemap_open(\"tur\") failed - %s\n", u_errorName(errorCode)); return; } locale=ucasemap_getLocale(csm); if(0!=strcmp(locale, "tr")) { log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale); } /* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */ ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode); locale=ucasemap_getLocale(csm); if(0!=strcmp(locale, "i-klingon")) { log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale); } errorCode=U_ZERO_ERROR; options=ucasemap_getOptions(csm); if(options!=0xa5) { log_err("ucasemap_getOptions(ucasemap_open(0xa5))==0x%lx!=0xa5\n", (long)options); } ucasemap_setOptions(csm, 0x333333, &errorCode); options=ucasemap_getOptions(csm); if(options!=0x333333) { log_err("ucasemap_getOptions(ucasemap_setOptions(0x333333))==0x%lx!=0x333333\n", (long)options); } /* test case mapping API; not all permutations necessary due to shared implementation code */ /* NUL terminated source */ errorCode=U_ZERO_ERROR; length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode); if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) { log_err("ucasemap_utf8ToLower(aBc\\0) failed\n"); } /* incoming failure code */ errorCode=U_PARSE_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode); if(errorCode!=U_PARSE_ERROR || 0!=strcmp(defg, utf8Out)) { log_err("ucasemap_utf8ToLower(failure) failed\n"); } /* overlapping input & output */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, aBc); length=ucasemap_utf8ToUpper(csm, utf8Out, 2, utf8Out+1, 2, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) { log_err("ucasemap_utf8ToUpper(overlap 1) failed\n"); } /* overlap in the other direction */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, aBc); length=ucasemap_utf8ToUpper(csm, utf8Out+1, 2, utf8Out, 2, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(aBc, utf8Out)) { log_err("ucasemap_utf8ToUpper(overlap 2) failed\n"); } /* NULL destination */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToLower(csm, NULL, (int32_t)sizeof(utf8Out), aBc, -1, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { log_err("ucasemap_utf8ToLower(dest=NULL) failed\n"); } /* destCapacity<0 */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToLower(csm, utf8Out, -2, aBc, -1, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { log_err("ucasemap_utf8ToLower(destCapacity<0) failed\n"); } /* NULL source */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), NULL, -1, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { log_err("ucasemap_utf8ToLower(src=NULL) failed\n"); } /* srcLength<-1 */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, -2, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || 0!=strcmp(defg, utf8Out)) { log_err("ucasemap_utf8ToLower(srcLength<-1) failed\n"); } /* buffer overflow */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToUpper(csm, utf8Out, 2, aBc, 3, &errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3 || 0!=strcmp(defg+2, utf8Out+2)) { log_err("ucasemap_utf8ToUpper(overflow) failed\n"); } /* dest not terminated (leaves g from defg alone) */ errorCode=U_ZERO_ERROR; strcpy(utf8Out, defg); length=ucasemap_utf8ToUpper(csm, utf8Out, 3, aBc, 3, &errorCode); if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=3 || 0!=strcmp(ABCg, utf8Out)) { log_err("ucasemap_utf8ToUpper(overflow) failed\n"); } /* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */ errorCode=U_ZERO_ERROR; utf8Out[0]=0; length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode); if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) { log_err("ucasemap_utf8FoldCase(aBc) failed\n"); } ucasemap_close(csm); }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; UCaseMap *csm; UErrorCode errorCode; errorCode=U_ZERO_ERROR; csm=ucasemap_open(localeID, options, &errorCode); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. int32_t size=1; // Not 0 because that only gives preflighting. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, &errorCode); ucasemap_setBreakIterator(csm, clone, &errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), &errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? resultLength : 0); if(U_FAILURE(errorCode)) { errln("error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); } ucasemap_close(csm); }
char *casefoldx(int flags, VSTRING *dest, const char *src, ssize_t len) { size_t old_len; #ifdef NO_EAI /* * ASCII mode only. */ if (len < 0) len = strlen(src); if ((flags & CASEF_FLAG_APPEND) == 0) VSTRING_RESET(dest); old_len = VSTRING_LEN(dest); vstring_strncat(dest, src, len); lowercase(STR(dest) + old_len); return (STR(dest)); #else /* * Unicode mode. */ const char myname[] = "casefold"; static VSTRING *fold_buf = 0; static UCaseMap *csm = 0; UErrorCode error; ssize_t space_needed; int n; /* * Handle special cases. */ if (len < 0) len = strlen(src); if (dest == 0) dest = (fold_buf != 0 ? fold_buf : (fold_buf = vstring_alloc(100))); if ((flags & CASEF_FLAG_APPEND) == 0) VSTRING_RESET(dest); old_len = VSTRING_LEN(dest); /* * All-ASCII input, or ASCII mode only. */ if ((flags & CASEF_FLAG_UTF8) == 0 || allascii(src)) { vstring_strncat(dest, src, len); lowercase(STR(dest) + old_len); return (STR(dest)); } /* * ICU 4.8 ucasemap_utf8FoldCase() does not complain about UTF-8 syntax * errors. XXX Based on source-code review we conclude that non-UTF-8 * bytes are copied verbatim, and experiments confirm this. Given that * this behavior is intentional, we assume that it will stay that way. */ #if 0 if (valid_utf8_string(src, len) == 0) { if (err) *err = "malformed UTF-8 or invalid codepoint"; return (0); } #endif /* * One-time initialization. With ICU 4.8 this works while chrooted. */ if (csm == 0) { error = U_ZERO_ERROR; csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error); if (U_SUCCESS(error) == 0) msg_fatal("ucasemap_open error: %s", u_errorName(error)); } /* * Fold the input, adjusting the buffer size if needed. Safety: don't * loop forever. * * Note: the requested amount of space for casemapped output (as reported * with space_needed below) does not include storage for the null * terminator. The terminator is written only when the output buffer is * large enough. This is why we overallocate space when the output does * not fit. But if the output fits exactly, then the ouput will be * unterminated, and we have to terminate the output ourselves. */ for (n = 0; n < 3; n++) { error = U_ZERO_ERROR; space_needed = ucasemap_utf8FoldCase(csm, STR(dest) + old_len, vstring_avail(dest), src, len, &error); if (U_SUCCESS(error)) { VSTRING_AT_OFFSET(dest, old_len + space_needed); if (vstring_avail(dest) == 0) /* exact fit, no terminator */ VSTRING_TERMINATE(dest); /* add terminator */ break; } else if (error == U_BUFFER_OVERFLOW_ERROR) { VSTRING_SPACE(dest, space_needed + 1); /* for terminator */ } else { msg_fatal("%s: conversion error for \"%s\": %s", myname, src, u_errorName(error)); } } return (STR(dest)); #endif /* NO_EAI */ }