int32 ChangeStringLinguisticCase(CaseFlags caseFlags, const char16* sourceString, uint32 sourceLength, char16* destString, uint32 destLength, ApiError* pErrorOut) { int32_t resultStringLength = 0; UErrorCode errorCode = U_ZERO_ERROR; static_assert(sizeof(UChar) == sizeof(char16), "Unexpected char type from ICU, function might have to be updated"); if (caseFlags == CaseFlagsUpper) { resultStringLength = u_strToUpper((UChar*) destString, destLength, (UChar*) sourceString, sourceLength, NULL, &errorCode); } else if (caseFlags == CaseFlagsLower) { resultStringLength = u_strToLower((UChar*) destString, destLength, (UChar*) sourceString, sourceLength, NULL, &errorCode); } else { Assert(false); } if (U_FAILURE(errorCode) && !(destLength == 0 && errorCode == U_BUFFER_OVERFLOW_ERROR)) { *pErrorOut = TranslateUErrorCode(errorCode); return -1; } // Todo: check for resultStringLength > destLength // Return insufficient buffer in that case return resultStringLength; }
/* ** Implementations of scalar functions for case mapping - upper() and ** lower(). Function upper() converts its input to upper-case (ABC). ** Function lower() converts to lower-case (abc). ** ** ICU provides two types of case mapping, "general" case mapping and ** "language specific". Refer to ICU documentation for the differences ** between the two. ** ** To utilise "general" case mapping, the upper() or lower() scalar ** functions are invoked with one argument: ** ** upper('ABC') -> 'abc' ** lower('abc') -> 'ABC' ** ** To access ICU "language specific" case mapping, upper() or lower() ** should be invoked with two arguments. The second argument is the name ** of the locale to use. Passing an empty string ("") or SQL NULL value ** as the second argument is the same as invoking the 1 argument version ** of upper() or lower(). ** ** lower('I', 'en_us') -> 'i' ** lower('I', 'tr_tr') -> '\u131' (small dotless i) ** ** http://www.icu-project.org/userguide/posix.html#case_mappings */ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ const UChar *zInput; /* Pointer to input string */ UChar *zOutput = 0; /* Pointer to output buffer */ int nInput; /* Size of utf-16 input string in bytes */ int nOut; /* Size of output buffer in bytes */ int cnt; int bToUpper; /* True for toupper(), false for tolower() */ UErrorCode status; const char *zLocale = 0; assert(nArg==1 || nArg==2); bToUpper = (sqlite3_user_data(p)!=0); if( nArg==2 ){ zLocale = (const char *)sqlite3_value_text(apArg[1]); } zInput = sqlite3_value_text16(apArg[0]); if( !zInput ){ return; } nOut = nInput = sqlite3_value_bytes16(apArg[0]); if( nOut==0 ){ sqlite3_result_text16(p, "", 0, SQLITE_STATIC); return; } for(cnt=0; cnt<2; cnt++){ UChar *zNew = sqlite3_realloc(zOutput, nOut); if( zNew==0 ){ sqlite3_free(zOutput); sqlite3_result_error_nomem(p); return; } zOutput = zNew; status = U_ZERO_ERROR; if( bToUpper ){ nOut = 2*u_strToUpper(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); }else{ nOut = 2*u_strToLower(zOutput,nOut/2,zInput,nInput/2,zLocale,&status); } if( U_SUCCESS(status) ){ sqlite3_result_text16(p, zOutput, nOut, xFree); }else if( status==U_BUFFER_OVERFLOW_ERROR ){ assert( cnt==0 ); continue; }else{ icuFunctionError(p, bToUpper ? "u_strToUpper" : "u_strToLower", status); } return; } assert( 0 ); /* Unreachable */ }
/// Convert the unicode string to lowercase. This function will return the /// required buffer length as a result. If this length does not match the /// 'DestinationCapacity' this function must be called again with a buffer of /// the required length to get a lowercase version of the string. int32_t swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination, int32_t DestinationCapacity, const uint16_t *Source, int32_t SourceLength) { UErrorCode ErrorCode = U_ZERO_ERROR; #if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__) uint32_t OutputLength = u_strToLower(reinterpret_cast<UChar *>(Destination), DestinationCapacity, reinterpret_cast<const UChar *>(Source), SourceLength, "", &ErrorCode); #else uint32_t OutputLength = u_strToLower(Destination, DestinationCapacity, Source, SourceLength, "", &ErrorCode); #endif if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) { swift::crash("u_strToLower: Unexpected error lowercasing unicode string."); } return OutputLength; }
int helper_normalize_str(const char *src, char *dest, int dest_size) { int type = CTS_LANG_OTHERS; int32_t size; UErrorCode status = 0; UChar tmp_result[CTS_SQL_MAX_LEN*2]; UChar result[CTS_SQL_MAX_LEN*2]; int i = 0; int j = 0; int str_len = strlen(src); int char_len = 0; for (i=0;i<str_len;i+=char_len) { char char_src[10]; char_len = check_utf8(src[i]); memcpy(char_src, &src[i], char_len); char_src[char_len] = '\0'; u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strFromUTF8() Failed(%s)", u_errorName(status)); u_strToLower(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToLower() Failed(%s)", u_errorName(status)); size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0, (UChar *)result, array_sizeof(result), &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "unorm_normalize(%s) Failed(%s)", char_src, u_errorName(status)); if (0 == i) type = helper_check_language(result); helper_extra_normalize(result, size); u_strToUTF8(&dest[j], dest_size-j, &size, result, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToUTF8() Failed(%s)", u_errorName(status)); j += size; dest[j++] = 0x01; } dest[j]='\0'; HELPER_DBG("src(%s) is transformed(%s)", src, dest); return type; }
// lower {{{ static PyObject * icu_lower(PyObject *self, PyObject *args) { char *input, *ans, *buf3 = NULL; const char *loc; int32_t sz; UChar *buf, *buf2; PyObject *ret; UErrorCode status = U_ZERO_ERROR; if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; sz = (int32_t)strlen(input); buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); u_strToLower(buf2, sz*8, buf, -1, loc, &status); ans = input; sz = u_strlen(buf2); free(buf); if (U_SUCCESS(status) && sz > 0) { buf3 = (char*)calloc(sz*5+1, sizeof(char)); if (buf3 == NULL) return PyErr_NoMemory(); u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); if (U_SUCCESS(status)) ans = buf3; } ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); if (ret == NULL) return PyErr_NoMemory(); free(buf2); if (buf3 != NULL) free(buf3); PyMem_Free(input); return ret; } // }}}
/* ** Implementations of scalar functions for case mapping - upper() and ** lower(). Function upper() converts its input to upper-case (ABC). ** Function lower() converts to lower-case (abc). ** ** ICU provides two types of case mapping, "general" case mapping and ** "language specific". Refer to ICU documentation for the differences ** between the two. ** ** To utilise "general" case mapping, the upper() or lower() scalar ** functions are invoked with one argument: ** ** upper('ABC') -> 'abc' ** lower('abc') -> 'ABC' ** ** To access ICU "language specific" case mapping, upper() or lower() ** should be invoked with two arguments. The second argument is the name ** of the locale to use. Passing an empty string ("") or SQL NULL value ** as the second argument is the same as invoking the 1 argument version ** of upper() or lower(). ** ** lower('I', 'en_us') -> 'i' ** lower('I', 'tr_tr') -> 'ı' (small dotless i) ** ** http://www.icu-project.org/userguide/posix.html#case_mappings */ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ const UChar *zInput; UChar *zOutput; int nInput; int nOutput; UErrorCode status = U_ZERO_ERROR; const char *zLocale = 0; assert(nArg==1 || nArg==2); if( nArg==2 ){ zLocale = (const char *)sqlite3_value_text(apArg[1]); } zInput = sqlite3_value_text16(apArg[0]); if( !zInput ){ return; } nInput = sqlite3_value_bytes16(apArg[0]); nOutput = nInput * 2 + 2; zOutput = sqlite3_malloc(nOutput); if( !zOutput ){ return; } if( sqlite3_user_data(p) ){ u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); }else{ u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); } if( !U_SUCCESS(status) ){ icuFunctionError(p, "u_strToLower()/u_strToUpper", status); return; } sqlite3_result_text16(p, zOutput, -1, xFree); }
static PyObject* icu_change_case(PyObject *self, PyObject *args) { char *locale = NULL; PyObject *input = NULL, *result = NULL; int which = UPPER_CASE; UErrorCode status = U_ZERO_ERROR; UChar *input_buf = NULL, *output_buf = NULL; int32_t sz = 0; if (!PyArg_ParseTuple(args, "Oiz", &input, &which, &locale)) return NULL; if (locale == NULL) { PyErr_SetString(PyExc_NotImplementedError, "You must specify a locale"); // We deliberately use NotImplementedError so that this error can be unambiguously identified return NULL; } input_buf = python_to_icu(input, &sz, 1); if (input_buf == NULL) goto end; output_buf = (UChar*) calloc(3 * sz, sizeof(UChar)); if (output_buf == NULL) { PyErr_NoMemory(); goto end; } switch (which) { case TITLE_CASE: sz = u_strToTitle(output_buf, 3 * sz, input_buf, sz, NULL, locale, &status); break; case UPPER_CASE: sz = u_strToUpper(output_buf, 3 * sz, input_buf, sz, locale, &status); break; default: sz = u_strToLower(output_buf, 3 * sz, input_buf, sz, locale, &status); } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } result = icu_to_python(output_buf, sz); end: if (input_buf != NULL) free(input_buf); if (output_buf != NULL) free(output_buf); return result; } // }}}
static uint32_t icu_utf16_sub(struct icu_buf_utf16 *dest16, struct icu_buf_utf16 *src16, const char *locale, char action, UErrorCode *status) { switch (action) { case 'l': case 'L': return u_strToLower(dest16->utf16, dest16->utf16_cap, src16->utf16, src16->utf16_len, locale, status); case 'u': case 'U': return u_strToUpper(dest16->utf16, dest16->utf16_cap, src16->utf16, src16->utf16_len, locale, status); break; case 't': case 'T': return u_strToTitle(dest16->utf16, dest16->utf16_cap, src16->utf16, src16->utf16_len, 0, locale, status); break; case 'f': case 'F': return u_strFoldCase(dest16->utf16, dest16->utf16_cap, src16->utf16, src16->utf16_len, U_FOLD_CASE_DEFAULT, status); break; default: *status = U_UNSUPPORTED_ERROR; break; } return 0; }
static void TestCaseLower(void) { static const UChar beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }; UChar buffer[32]; int32_t length; UErrorCode errorCode; /* lowercase with root locale and separate buffers */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, "", &errorCode); if( U_FAILURE(errorCode) || length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { log_err("error in u_strToLower(root locale)=%ld error=%s string matches: %s\t\nlowerRoot=%s\t\nbuffer=%s\n", length, u_errorName(errorCode), uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", aescstrdup(lowerRoot,-1), aescstrdup(buffer,-1)); } /* lowercase with turkish locale and in the same buffer */ uprv_memcpy(buffer, beforeLower, sizeof(beforeLower)); buffer[sizeof(beforeLower)/U_SIZEOF_UCHAR]=0; errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, buffer, -1, /* implicit srcLength */ "tr", &errorCode); if( U_FAILURE(errorCode) || length!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) || uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { log_err("error in u_strToLower(turkish locale)=%ld error=%s string matches: %s\n", length, u_errorName(errorCode), uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no"); } /* test preflighting */ buffer[0]=buffer[2]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, 2, /* set destCapacity=2 */ beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, "", &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 || buffer[2]!=0xabcd ) { log_err("error in u_strToLower(root locale preflighting)=%ld error=%s string matches: %s\n", length, u_errorName(errorCode), uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)==0 && buffer[2]==0xabcd ? "yes" : "no"); } /* test error handling */ errorCode=U_ZERO_ERROR; length=u_strToLower(NULL, sizeof(buffer)/U_SIZEOF_UCHAR, beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, "", &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { log_err("error in u_strToLower(root locale dest=NULL)=%ld error=%s\n", length, u_errorName(errorCode)); } buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, -1, beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, "", &errorCode); if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[0]!=0xabcd ) { log_err("error in u_strToLower(root locale destCapacity=-1)=%ld error=%s buffer[0]==0x%lx\n", length, u_errorName(errorCode), buffer[0]); } }
int32_t u_sprintf_scientific_handler(u_localized_string *output, const u_sprintf_spec_info *info, const ufmt_args *args) { double num = (double) (args[0].doubleValue); UNumberFormat *format; UChar result [USPRINTF_BUFFER_SIZE]; int32_t minDecimalDigits; int32_t maxDecimalDigits; UErrorCode status = U_ZERO_ERROR; UChar srcExpBuf[USPRINTF_SYMBOL_BUFFER_SIZE]; int32_t srcLen, expLen; UChar expBuf[USPRINTF_SYMBOL_BUFFER_SIZE]; /* mask off any necessary bits */ /* if(! info->fIsLongDouble) num &= DBL_MAX;*/ /* get the formatter */ format = u_locbund_getScientificFormat(output->fBundle); /* handle error */ if(format == 0) return 0; /* set the appropriate flags on the formatter */ /* clone the stream's bundle if it isn't owned */ if(! output->fOwnBundle) { output->fBundle = u_locbund_clone(output->fBundle); output->fOwnBundle = TRUE; format = u_locbund_getScientificFormat(output->fBundle); } srcLen = unum_getSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, sizeof(srcExpBuf), &status); /* Upper/lower case the e */ if (info->fSpec == (UChar)0x65 /* e */) { expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), srcExpBuf, srcLen, output->fBundle->fLocale, &status); } else { expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), srcExpBuf, srcLen, output->fBundle->fLocale, &status); } unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, expBuf, expLen, &status); /* set the number of decimal digits */ /* save the formatter's state */ minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); if(info->fPrecision != -1) { /* set the # of decimal digits */ unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); } else if(info->fPrecision == 0 && ! info->fAlt) { /* no decimal point in this case */ unum_setAttribute(format, UNUM_FRACTION_DIGITS, 0); } else if(info->fAlt) { /* '#' means always show decimal point */ /* copy of printf behavior on Solaris - '#' shows 6 digits */ unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); } else { /* # of decimal digits is 6 if precision not specified */ unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); } /* set whether to show the sign */ u_sprintf_set_sign(format, info, &status); /* format the number */ unum_formatDouble(format, num, result, USPRINTF_BUFFER_SIZE, 0, &status); /* restore the number format */ unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); /* Since we clone the fBundle and we're only using the scientific format, we don't need to save the old exponent value. */ /*unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, srcLen, &status);*/ return u_sprintf_pad_and_justify(output, info, result, u_strlen(result)); }
static int32_t u_scanf_scientific_handler(UFILE *input, u_scanf_spec_info *info, ufmt_args *args, const UChar *fmt, int32_t *fmtConsumed, int32_t *argConverted) { int32_t len; double num; UNumberFormat *format; int32_t parsePos = 0; int32_t skipped; UErrorCode status = U_ZERO_ERROR; UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; int32_t srcLen, expLen; UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; /* skip all ws in the input */ skipped = u_scanf_skip_leading_ws(input, info->fPadChar); /* fill the input's internal buffer */ ufile_fill_uchar_buffer(input); /* determine the size of the input's buffer */ len = (int32_t)(input->str.fLimit - input->str.fPos); /* truncate to the width, if specified */ if(info->fWidth != -1) len = ufmt_min(len, info->fWidth); /* get the formatter */ format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); /* handle error */ if(format == 0) return 0; /* set the appropriate flags on the formatter */ srcLen = unum_getSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, sizeof(srcExpBuf), &status); /* Upper/lower case the e */ if (info->fSpec == (UChar)0x65 /* e */) { expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), srcExpBuf, srcLen, input->str.fBundle.fLocale, &status); } else { expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), srcExpBuf, srcLen, input->str.fBundle.fLocale, &status); } unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, expBuf, expLen, &status); /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ skipped += u_scanf_skip_leading_positive_sign(input, format, &status); /* parse the number */ num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); if (!info->fSkipArg) { if (info->fIsLong) *(double*)(args[0].ptrValue) = num; else if (info->fIsLongDouble) *(long double*)(args[0].ptrValue) = num; else *(float*)(args[0].ptrValue) = (float)num; } /* mask off any necessary bits */ /* if(! info->fIsLong_double) num &= DBL_MAX;*/ /* update the input's position to reflect consumed data */ input->str.fPos += parsePos; /* we converted 1 arg */ *argConverted = !info->fSkipArg; return parsePos + skipped; }
static int32_t u_printf_scientific_handler(const u_printf_stream_handler *handler, void *context, ULocaleBundle *formatBundle, const u_printf_spec_info *info, const ufmt_args *args) { double num = (double) (args[0].doubleValue); UNumberFormat *format; UChar result[UPRINTF_BUFFER_SIZE]; UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; int32_t prefixBufferLen = sizeof(prefixBuffer); int32_t minDecimalDigits; int32_t maxDecimalDigits; UErrorCode status = U_ZERO_ERROR; UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; int32_t srcLen, expLen; int32_t resultLen; UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; prefixBuffer[0] = 0; /* mask off any necessary bits */ /* if(! info->fIsLongDouble) num &= DBL_MAX;*/ /* get the formatter */ format = u_locbund_getNumberFormat(formatBundle, UNUM_SCIENTIFIC); /* handle error */ if(format == 0) return 0; /* set the appropriate flags on the formatter */ srcLen = unum_getSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, sizeof(srcExpBuf), &status); /* Upper/lower case the e */ if (info->fSpec == (UChar)0x65 /* e */) { expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), srcExpBuf, srcLen, formatBundle->fLocale, &status); } else { expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), srcExpBuf, srcLen, formatBundle->fLocale, &status); } unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, expBuf, expLen, &status); /* save the formatter's state */ minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); /* set the appropriate flags and number of decimal digits on the formatter */ if(info->fPrecision != -1) { /* set the # of decimal digits */ if (info->fOrigSpec == (UChar)0x65 /* e */ || info->fOrigSpec == (UChar)0x45 /* E */) { unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); } else { unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, 1); unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, info->fPrecision); } } else if(info->fAlt) { /* '#' means always show decimal point */ /* copy of printf behavior on Solaris - '#' shows 6 digits */ unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); } else { /* # of decimal digits is 6 if precision not specified */ unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); } /* set whether to show the sign */ if (info->fShowSign) { u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); } /* format the number */ resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); if (U_FAILURE(status)) { resultLen = 0; } /* restore the number format */ /* TODO: Is this needed? */ unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); /* Since we're the only one using the scientific format, we don't need to save the old exponent value. */ /*unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, srcLen, &status);*/ if (info->fShowSign) { /* Reset back to original value regardless of what the error was */ UErrorCode localStatus = U_ZERO_ERROR; u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); } return handler->pad_and_justify(context, info, result, resultLen); }
static void demoCaseMapInC() { /* * input= * "aB<capital sigma>" * "iI<small dotless i><capital dotted I> " * "<sharp s> <small lig. ffi>" * "<small final sigma><small sigma><capital sigma>" */ static const UChar input[]={ 0x61, 0x42, 0x3a3, 0x69, 0x49, 0x131, 0x130, 0x20, 0xdf, 0x20, 0xfb03, 0x3c2, 0x3c3, 0x3a3, 0 }; UChar buffer[32]; UErrorCode errorCode; UChar32 c; int32_t i, j, length; UBool isError; printf("\n* demoCaseMapInC() ----------------- ***\n\n"); /* * First, use simple case mapping functions which provide * 1:1 code point mappings without context/locale ID. * * Note that some mappings will not be "right" because some "real" * case mappings require context, depend on the locale ID, * and/or result in a change in the number of code points. */ printUString("input string: ", input, -1); /* uppercase */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_toupper(c); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-uppercased: ", buffer, j); /* lowercase */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_tolower(c); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-lowercased: ", buffer, j); /* titlecase */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_totitle(c); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-titlecased: ", buffer, j); /* case-fold/default */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_foldCase(c, U_FOLD_CASE_DEFAULT); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-case-folded/default: ", buffer, j); /* case-fold/Turkic */ isError=FALSE; for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) { U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */ if(c==0) { break; /* stop at terminating NUL, no need to terminate buffer */ } c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I); U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError); } printUString("simple-case-folded/Turkic: ", buffer, j); /* * Second, use full case mapping functions which provide * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID. * * Note that lower/upper/titlecasing take a locale ID while case-folding * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file. * * Also, string titlecasing requires a BreakIterator to find starts of words. * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default * titlecasing BreakIterator automatically. * For production code where many strings are titlecased it would be more efficient * to open a BreakIterator externally and pass it in. */ printUString("\ninput string: ", input, -1); /* lowercase/English */ errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-lowercased/en: ", buffer, length); } else { printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode)); } /* lowercase/Turkish */ errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-lowercased/tr: ", buffer, length); } else { printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode)); } /* uppercase/English */ errorCode=U_ZERO_ERROR; length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-uppercased/en: ", buffer, length); } else { printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode)); } /* uppercase/Turkish */ errorCode=U_ZERO_ERROR; length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-uppercased/tr: ", buffer, length); } else { printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode)); } /* titlecase/English */ errorCode=U_ZERO_ERROR; length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "en", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-titlecased/en: ", buffer, length); } else { printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode)); } /* titlecase/Turkish */ errorCode=U_ZERO_ERROR; length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-titlecased/tr: ", buffer, length); } else { printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode)); } /* case-fold/default */ errorCode=U_ZERO_ERROR; length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-case-folded/default: ", buffer, length); } else { printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode)); } /* case-fold/Turkic */ errorCode=U_ZERO_ERROR; length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); if(U_SUCCESS(errorCode)) { printUString("full-case-folded/Turkic: ", buffer, length); } else { printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode)); } }
/** * psl_str_to_utf8lower: * @str: string to convert * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL * @lower: return value containing the converted string * * This helper function converts a string to lowercase UTF-8 representation. * Lowercase UTF-8 is needed as input to the domain checking functions. * * @lower is set to %NULL on error. * * The return value 'lower' must be freed after usage. * * Returns: psl_error_t value. * PSL_SUCCESS: Success * PSL_ERR_INVALID_ARG: @str is a %NULL value. * PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding * PSL_ERR_TO_UTF16: Failed to convert @str to unicode * PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase * PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8 * * Since: 0.4 */ psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower) { int ret = PSL_ERR_INVALID_ARG; if (lower) *lower = NULL; if (!str) return PSL_ERR_INVALID_ARG; /* shortcut to avoid costly conversion */ if (_str_is_ascii(str)) { if (lower) { char *p; *lower = strdup(str); /* convert ASCII string to lowercase */ for (p = *lower; *p; p++) if (isupper(*p)) *p = tolower(*p); } return PSL_SUCCESS; } #ifdef WITH_LIBICU do { size_t str_length = strlen(str); UErrorCode status = 0; UChar *utf16_dst, *utf16_lower; int32_t utf16_dst_length; char *utf8_lower; UConverter *uconv; /* C89 allocation */ utf16_dst = alloca(sizeof(UChar) * (str_length * 2 + 1)); utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1)); utf8_lower = alloca(str_length * 2 + 1); uconv = ucnv_open(encoding, &status); if (U_SUCCESS(status)) { utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status); ucnv_close(uconv); if (U_SUCCESS(status)) { int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status); if (U_SUCCESS(status)) { u_strToUTF8(utf8_lower, str_length * 8 + 1, NULL, utf16_lower, utf16_lower_length, &status); if (U_SUCCESS(status)) { if (lower) *lower = strdup(utf8_lower); ret = PSL_SUCCESS; } else { ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */ } } else { ret = PSL_ERR_TO_UTF16; /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */ } } else { ret = PSL_ERR_CONVERTER; /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ } } while (0); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) do { /* find out local charset encoding */ if (!encoding) { encoding = nl_langinfo(CODESET); if (!encoding || !*encoding) encoding = "ASCII"; } /* convert to UTF-8 */ if (strcasecmp(encoding, "utf-8")) { iconv_t cd = iconv_open("utf-8", encoding); if (cd != (iconv_t)-1) { char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */ size_t tmp_len = strlen(str); size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len; char *dst = malloc(dst_len + 1), *dst_tmp = dst; if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1) { uint8_t *resbuf = malloc(dst_len * 2 + 1); size_t len = dst_len * 2; /* leave space for additional \0 byte */ if ((dst = (char *)u8_tolower((uint8_t *)dst, dst_len - dst_len_tmp, 0, UNINORM_NFKC, resbuf, &len))) { /* u8_tolower() does not terminate the result string */ if (lower) *lower = strndup((char *)dst, len); } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ } if (lower) *lower = strndup(dst, dst_len - dst_len_tmp); ret = PSL_SUCCESS; } else { ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ } free(dst); iconv_close(cd); } else { ret = PSL_ERR_TO_UTF8; /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ } } else ret = PSL_SUCCESS; /* convert to lowercase */ if (ret == PSL_SUCCESS) { uint8_t *dst, resbuf[256]; size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ /* we need a conversion to lowercase */ if ((dst = u8_tolower((uint8_t *)str, u8_strlen((uint8_t *)str), 0, UNINORM_NFKC, resbuf, &len))) { /* u8_tolower() does not terminate the result string */ if (lower) *lower = strndup((char *)dst, len); } else { ret = PSL_ERR_TO_LOWER; /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ } } } while (0); #endif return ret; }
int32_t __hs_u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { return u_strToLower(dest, destCapacity, src, srcLength, locale, pErrorCode); }