icu::UnicodeString u16(const char *u8, int32_t u8_len, UErrorCode &error, UChar32 subst /* =0 */) { error = U_ZERO_ERROR; if (u8_len == 0) { return icu::UnicodeString(); } int32_t outlen; if (subst) { u_strFromUTF8WithSub(nullptr, 0, &outlen, u8, u8_len, subst, nullptr, &error); } else { u_strFromUTF8(nullptr, 0, &outlen, u8, u8_len, &error); } if (error != U_BUFFER_OVERFLOW_ERROR) { return icu::UnicodeString(); } icu::UnicodeString ret; auto out = ret.getBuffer(outlen + 1); error = U_ZERO_ERROR; if (subst) { u_strFromUTF8WithSub(out, outlen + 1, &outlen, u8, u8_len, subst, nullptr, &error); } else { u_strFromUTF8(out, outlen + 1, &outlen, u8, u8_len, &error); } ret.releaseBuffer(outlen); if (U_FAILURE(error)) { return icu::UnicodeString(); } return ret; }
// Collator.strcmp {{{ static PyObject * icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) { char *a_, *b_; int32_t asz, bsz; UChar *a, *b; UErrorCode status = U_ZERO_ERROR; UCollationResult res = UCOL_EQUAL; if (!PyArg_ParseTuple(args, "eses", "UTF-8", &a_, "UTF-8", &b_)) return NULL; asz = (int32_t)strlen(a_); bsz = (int32_t)strlen(b_); a = (UChar*)calloc(asz*4 + 1, sizeof(UChar)); b = (UChar*)calloc(bsz*4 + 1, sizeof(UChar)); if (a == NULL || b == NULL) return PyErr_NoMemory(); u_strFromUTF8(a, asz*4 + 1, NULL, a_, asz, &status); u_strFromUTF8(b, bsz*4 + 1, NULL, b_, bsz, &status); PyMem_Free(a_); PyMem_Free(b_); if (U_SUCCESS(status)) res = ucol_strcoll(self->collator, a, -1, b, -1); free(a); free(b); return Py_BuildValue("i", res); } // }}}
static UChar * ustring_from_utf8 (const gchar *utf8, int32_t *ustrLength) { UChar *dest; int32_t destLength, utf8Length = strlen (utf8); UErrorCode errorCode; errorCode = 0; u_strFromUTF8 (NULL, 0, &destLength, utf8, utf8Length, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR) { g_warning ("can't get the number of chars in UTF-8 string: %s", u_errorName (errorCode)); return NULL; } dest = g_malloc0_n (destLength + 1, sizeof(UChar)); errorCode = 0; u_strFromUTF8 (dest, destLength + 1, NULL, utf8, utf8Length, &errorCode); if (errorCode != U_ZERO_ERROR) { g_free (dest); g_warning ("can't convert UTF-8 string to ustring: %s", u_errorName (errorCode)); return NULL; } *ustrLength = destLength; return dest; }
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *s, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status) { // Lacking a UTF-8 normalization API, just converting the input to // UTF-16 seems as good an approach as any. In typical use, input will // be an identifier, which is to say not too long for stack buffers. if (U_FAILURE(*status)) { return 0; } // Buffers for the UChar form of the input and skeleton strings. UChar smallInBuf[USPOOF_STACK_BUFFER_SIZE]; UChar *inBuf = smallInBuf; UChar smallOutBuf[USPOOF_STACK_BUFFER_SIZE]; UChar *outBuf = smallOutBuf; int32_t lengthInUChars = 0; int32_t skelLengthInUChars = 0; int32_t skelLengthInUTF8 = 0; u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars, s, length, status); if (*status == U_BUFFER_OVERFLOW_ERROR) { inBuf = static_cast<UChar *>(uprv_malloc((lengthInUChars+1)*sizeof(UChar))); if (inBuf == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } *status = U_ZERO_ERROR; u_strFromUTF8(inBuf, lengthInUChars+1, &lengthInUChars, s, length, status); } skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars, outBuf, USPOOF_STACK_BUFFER_SIZE, status); if (*status == U_BUFFER_OVERFLOW_ERROR) { outBuf = static_cast<UChar *>(uprv_malloc((skelLengthInUChars+1)*sizeof(UChar))); if (outBuf == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } *status = U_ZERO_ERROR; skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars, outBuf, skelLengthInUChars+1, status); } u_strToUTF8(dest, destCapacity, &skelLengthInUTF8, outBuf, skelLengthInUChars, status); cleanup: if (inBuf != smallInBuf) { uprv_free(inBuf); } if (outBuf != smallOutBuf) { uprv_free(outBuf); } return skelLengthInUTF8; }
UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 *dest16, const char *src8cstr, UErrorCode *status) { size_t src8cstr_len = 0; int32_t utf16_len = 0; *status = U_ZERO_ERROR; src8cstr_len = strlen(src8cstr); u_strFromUTF8(dest16->utf16, dest16->utf16_cap, &utf16_len, src8cstr, src8cstr_len, status); /* check for buffer overflow, resize and retry */ if (*status == U_BUFFER_OVERFLOW_ERROR) { icu_buf_utf16_resize(dest16, utf16_len * 2); *status = U_ZERO_ERROR; u_strFromUTF8(dest16->utf16, dest16->utf16_cap, &utf16_len, src8cstr, src8cstr_len, status); } if (U_SUCCESS(*status) && utf16_len <= dest16->utf16_cap) dest16->utf16_len = utf16_len; else icu_buf_utf16_clear(dest16); return *status; }
//ret_icu_str must be freed after usage int icu_str_from_utf8(const char *utf_str, UChar **ret_icu_str, UErrorCode *ret_icu_err) { int32_t icu_sz; SOL_NULL_CHECK(ret_icu_str, -EINVAL); SOL_NULL_CHECK(ret_icu_err, -EINVAL); icu_sz = 0; *ret_icu_str = NULL; *ret_icu_err = U_ZERO_ERROR; u_strFromUTF8(NULL, 0, &icu_sz, utf_str, -1, ret_icu_err); if (U_FAILURE(*ret_icu_err) && *ret_icu_err != U_BUFFER_OVERFLOW_ERROR) return -EINVAL; *ret_icu_str = calloc(icu_sz + 1, sizeof(UChar)); SOL_NULL_CHECK(*ret_icu_str, -ENOMEM); *ret_icu_err = U_ZERO_ERROR; u_strFromUTF8(*ret_icu_str, icu_sz + 1, NULL, utf_str, -1, ret_icu_err); if (U_FAILURE(*ret_icu_err)) { free(*ret_icu_str); *ret_icu_str = NULL; return -EINVAL; } return 0; }
/** Lexical compare routine. * * Performs a lexical string compare on two normalized UTF-8 strings as * described in RFC 2608, section 6.4. * * @param[in] str1 - A pointer to string to be compared. * @param[in] str2 - A pointer to string to be compared. * @param[in] length - The maximum length to compare in bytes. * * @return Zero if @p str1 is equal to @p str2, less than zero if @p str1 * is greater than @p str2, greater than zero if @p str1 is less than * @p str2. */ static int SLPCompareNormalizedString(const char * str1, const char * str2, size_t length) { #ifdef HAVE_ICU int result; UErrorCode uerr = 0; UChar * ustr1 = xmalloc((length + 1) * sizeof(UChar)); UChar * ustr2 = xmalloc((length + 1) * sizeof(UChar)); if (ustr1 && ustr2) { u_strFromUTF8(ustr1, (int32_t)length + 1, 0, str1, (int32_t)length, &uerr); u_strFromUTF8(ustr2, (int32_t)length + 1, 0, str2, (int32_t)length, &uerr); } if (ustr1 != 0 && ustr2 != 0 && uerr == 0) result = (int)u_strncasecmp(ustr1, ustr2, (int32_t)length, 0); else result = strncasecmp(str1, str2, length); xfree(ustr1); xfree(ustr2); return result; #else return strncasecmp(str1, str2, length); #endif /* HAVE_ICU */ }
/* {{{ intl_convert_utf8_to_utf16 * Convert given string from UTF-8 to UTF-16 to *target buffer. * * It *target is NULL then we allocate a large enough buffer, * store the converted string into it, and make target point to it. * * Otherwise, if *target is non-NULL, we assume that it points to a * dynamically allocated buffer of *target_len bytes length. * In this case the buffer will be used to store the converted string to, * and may be resized (made larger) if needed. * * Note that ICU uses int32_t as string length and PHP uses size_t. While * it is not likely in practical situations to have strings longer than * INT32_MAX, these are different types and need to be handled carefully. * * @param target Where to place the result. * @param target_len Result length. * @param source String to convert. * @param source_len Length of the source string. * @param status Conversion status. * * @return void This function does not return anything. */ void intl_convert_utf8_to_utf16( UChar** target, int32_t* target_len, const char* src, size_t src_len, UErrorCode* status ) { UChar* dst_buf = NULL; int32_t dst_len = 0; /* If *target is NULL determine required destination buffer size (pre-flighting). * Otherwise, attempt to convert source string; if *target buffer is not large enough * it will be resized appropriately. */ *status = U_ZERO_ERROR; if(src_len > INT32_MAX) { /* we can not fit this string */ *status = U_BUFFER_OVERFLOW_ERROR; return; } u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status ); if( *status == U_ZERO_ERROR ) { /* String is converted successfully */ (*target)[dst_len] = 0; *target_len = dst_len; return; } /* Bail out if an unexpected error occurred. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough). * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty). */ if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING ) return; /* Allocate memory for the destination buffer (it will be zero-terminated). */ dst_buf = eumalloc( dst_len + 1 ); /* Convert source string from UTF-8 to UTF-16. */ *status = U_ZERO_ERROR; u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status ); if( U_FAILURE( *status ) ) { efree( dst_buf ); return; } dst_buf[dst_len] = 0; if( *target ) efree( *target ); *target = dst_buf; *target_len = dst_len; }
U_CAPI int32_t U_EXPORT2 uspoof_checkUTF8(const USpoofChecker *sc, const char *text, int32_t length, int32_t *position, UErrorCode *status) { if (U_FAILURE(*status)) { return 0; } UChar stackBuf[USPOOF_STACK_BUFFER_SIZE]; UChar* text16 = stackBuf; int32_t len16; u_strFromUTF8(text16, USPOOF_STACK_BUFFER_SIZE, &len16, text, length, status); if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { return 0; } if (*status == U_BUFFER_OVERFLOW_ERROR) { text16 = static_cast<UChar *>(uprv_malloc(len16 * sizeof(UChar) + 2)); if (text16 == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } *status = U_ZERO_ERROR; u_strFromUTF8(text16, len16+1, NULL, text, length, status); } int32_t position16 = -1; int32_t result = uspoof_check(sc, text16, len16, &position16, status); if (U_FAILURE(*status)) { return 0; } if (position16 > 0) { // Translate a UTF-16 based error position back to a UTF-8 offset. // u_strToUTF8() in preflight mode is an easy way to do it. U_ASSERT(position16 <= len16); u_strToUTF8(NULL, 0, position, text16, position16, status); if (position > 0) { // position is the required buffer length from u_strToUTF8, which includes // space for a terminating NULL, which we don't want, hence the -1. *position -= 1; } *status = U_ZERO_ERROR; // u_strToUTF8, above sets BUFFER_OVERFLOW_ERROR. } if (text16 != stackBuf) { uprv_free(text16); } return result; }
int helper_collation_str(const char *src, char *dest, int dest_size) { HELPER_FN_CALL; int32_t size = 0; UErrorCode status = 0; UChar tmp_result[CTS_SQL_MAX_LEN]; UCollator *collator; const char *region; region = vconf_get_str(VCONFKEY_REGIONFORMAT); HELPER_DBG("region %s", region); collator = ucol_open(region, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "ucol_open() Failed(%s)", u_errorName(status)); if (U_FAILURE(status)){ ERR("ucol_setAttribute Failed(%s)", u_errorName(status)); ucol_close(collator); return CTS_ERR_ICU_FAILED; } u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, src, -1, &status); if (U_FAILURE(status)){ ERR("u_strFromUTF8 Failed(%s)", u_errorName(status)); ucol_close(collator); return CTS_ERR_ICU_FAILED; } size = ucol_getSortKey(collator, tmp_result, -1, (uint8_t *)dest, dest_size); ucol_close(collator); dest[size]='\0'; return CTS_SUCCESS; }
static void to_utf16(VALUE string, UChar *ustr, int32_t *ulen) { UErrorCode status = U_ZERO_ERROR; string = StringValue(string); u_strFromUTF8(ustr, BUF_SIZE, ulen, RSTRING_PTR(string), RSTRING_LEN(string), &status); if (status == U_INVALID_CHAR_FOUND) ulen = 0; }
/** * NIFs */ inline void do_from_utf8( ErlNifBinary in, ErlNifBinary& out, int32_t& ulen, UErrorCode& status) { status = U_ZERO_ERROR; if (!enif_alloc_binary(FROM_ULEN(ulen), &out)) { status = U_MEMORY_ALLOCATION_ERROR; return; } u_strFromUTF8( (UChar*) out.data, /* dest */ ulen, /* capacity */ &ulen, /* len of result */ (char*) in.data, /* src */ (int32_t) in.size, /* len of src */ &status); /* error code */ if (U_FAILURE(status)) { enif_release_binary(&out); return; } if (FROM_ULEN(ulen) != out.size) { /* shrink binary if it was too large */ enif_realloc_binary(&out, FROM_ULEN(ulen)); } }
char *ICUStringMgr::upperUTF8(char *buf, unsigned int maxlen) const { char *ret = buf; int max = (maxlen) ? maxlen : strlen(buf); UErrorCode err = U_ZERO_ERROR; if (!buf || !max) { return ret; } UChar *lowerStr = new UChar[max+10]; UChar *upperStr = new UChar[max+10]; u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err); if (err != U_ZERO_ERROR) { // SWLog::getSystemLog()->logError("from: %s", u_errorName(err)); delete [] lowerStr; delete [] upperStr; return ret; } u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err); if (err != U_ZERO_ERROR) { // SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err)); delete [] lowerStr; delete [] upperStr; return ret; } ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err); delete [] lowerStr; delete [] upperStr; return ret; }
static UChar * convertFromUTF8(UChar *outBuf, int32_t outBufCapacity, int32_t *outputLength, const char *in, int32_t inLength, UErrorCode *status) { if (U_FAILURE(*status)) { return NULL; } UChar *dest = outBuf; u_strFromUTF8(dest, outBufCapacity, outputLength, in, inLength, status); if (*status == U_BUFFER_OVERFLOW_ERROR) { dest = static_cast<UChar *>(uprv_malloc(*outputLength * sizeof(UChar))); if (dest == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } *status = U_ZERO_ERROR; u_strFromUTF8(dest, *outputLength, NULL, in, inLength, status); } return dest; }
int cq_fields_to_utf8(char *buf, size_t buflen, size_t fieldc, char **fieldnames, bool usequotes) { UChar *buf16; UErrorCode status = U_ZERO_ERROR; size_t num_left = fieldc; int rc = 0; if (num_left == 0) return 1; buf16 = calloc(buflen, sizeof(UChar)); if (buf16 == NULL) return -1; for (size_t i = 0; i < fieldc; ++i) { UChar *temp = calloc(buflen, sizeof(UChar)); if (temp == NULL) { rc = -2; break; } u_strFromUTF8(temp, buflen, NULL, fieldnames[i], strlen(fieldnames[i]), &status); if (!U_SUCCESS(status)) { rc = 2; free(temp); break; } bool isstr = false; if (usequotes) { for (int32_t j = 0; j < u_strlen(temp); ++j) { if (!isdigit(temp[j])) { isstr = true; break; } } } if (isstr) u_strcat(buf16, u"'"); u_strcat(buf16, temp); if (isstr) u_strcat(buf16, u"'"); free(temp); if (--num_left > 0) { u_strcat(buf16, u","); } } u_strToUTF8(buf, buflen, NULL, buf16, u_strlen(buf16), &status); if (!U_SUCCESS(status)) rc = 3; free(buf16); return rc; }
inline v8::Local<v8::String> utf8_to_v8_String(const char* cstring) { UErrorCode error_code = U_ZERO_ERROR; UChar dest[characters*2]; int32_t dest_length; u_strFromUTF8(dest, characters*2, &dest_length, cstring, -1, &error_code); if (error_code != U_ZERO_ERROR) { throw UTF8_to_UTF16_Conversion_Error(error_code); } return v8::String::New(dest, dest_length); }
/*---------------------------------------------------------------------------------------------- This method uses an ICU function to convert a string from UTF-8 to UTF-16. Assumptions: If sourceLen is -1, it will be computed (by ICU) Exit conditions: <text> Parameters: <text> Return value: The number of characters required to store the fully-converted string (which may be greater than targetLen) ----------------------------------------------------------------------------------------------*/ int UnicodeConverter::Convert(const char* source, int sourceLen, UChar* target, int targetLen) { UErrorCode status = U_ZERO_ERROR; int32_t spaceRequiredForData; u_strFromUTF8(target, targetLen, &spaceRequiredForData, source, sourceLen, &status); if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) throw std::runtime_error("Unable to convert from UTF-8 to UTF-16"); return spaceRequiredForData; }
inline UString readUTF8String(S& input) { uint16_t len = 0; readRaw(input, len); UString rv(len, 0); std::vector<char> buffer(len); input.read(&buffer[0], len); int32_t olen = 0; UErrorCode status = U_ZERO_ERROR; u_strFromUTF8(&rv[0], len, &olen, &buffer[0], len, &status); rv.resize(olen); return rv; }
// Collator.sort_key {{{ static PyObject * icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { char *input; int32_t sz; UChar *buf; uint8_t *buf2; PyObject *ans; int32_t key_size; UErrorCode status = U_ZERO_ERROR; if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL; sz = (int32_t)strlen(input); buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); if (buf == NULL) return PyErr_NoMemory(); u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status); PyMem_Free(input); if (U_SUCCESS(status)) { buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t)); if (buf2 == NULL) return PyErr_NoMemory(); key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1); if (key_size == 0) { ans = PyBytes_FromString(""); } else { if (key_size >= 7*sz+1) { free(buf2); buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t)); if (buf2 == NULL) return PyErr_NoMemory(); ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1); } ans = PyBytes_FromString((char *)buf2); } free(buf2); } else ans = PyBytes_FromString(""); free(buf); if (ans == NULL) return PyErr_NoMemory(); return ans; } // }}}
int helper_normalize_str(const char *src, char *dest, int dest_size) { int type = CTS_LANG_OTHERS; int32_t size; UErrorCode status = 0; UChar tmp_result[CTS_SQL_MAX_LEN*2]; UChar result[CTS_SQL_MAX_LEN*2]; int i = 0; int j = 0; int str_len = strlen(src); int char_len = 0; for (i=0;i<str_len;i+=char_len) { char char_src[10]; char_len = check_utf8(src[i]); memcpy(char_src, &src[i], char_len); char_src[char_len] = '\0'; u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strFromUTF8() Failed(%s)", u_errorName(status)); u_strToLower(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToLower() Failed(%s)", u_errorName(status)); size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0, (UChar *)result, array_sizeof(result), &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "unorm_normalize(%s) Failed(%s)", char_src, u_errorName(status)); if (0 == i) type = helper_check_language(result); helper_extra_normalize(result, size); u_strToUTF8(&dest[j], dest_size-j, &size, result, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToUTF8() Failed(%s)", u_errorName(status)); j += size; dest[j++] = 0x01; } dest[j]='\0'; HELPER_DBG("src(%s) is transformed(%s)", src, dest); return type; }
// title {{{ static PyObject * icu_title(PyObject *self, PyObject *args) { char *input, *ans, *buf3 = NULL; const char *loc; int32_t sz; UChar *buf, *buf2; PyObject *ret; UErrorCode status = U_ZERO_ERROR; if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; sz = (int32_t)strlen(input); buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); u_strToTitle(buf2, sz*8, buf, -1, NULL, loc, &status); ans = input; sz = u_strlen(buf2); free(buf); if (U_SUCCESS(status) && sz > 0) { buf3 = (char*)calloc(sz*5+1, sizeof(char)); if (buf3 == NULL) return PyErr_NoMemory(); u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); if (U_SUCCESS(status)) ans = buf3; } ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); if (ret == NULL) return PyErr_NoMemory(); free(buf2); if (buf3 != NULL) free(buf3); PyMem_Free(input); return ret; } // }}}
static void TestFPos_SkelWithSeconds() { const LocaleAndSkeletonItem * locSkelItemPtr; for (locSkelItemPtr = locSkelItems; locSkelItemPtr->locale != NULL; locSkelItemPtr++) { UDateIntervalFormat* udifmt; UChar ubuf[kSizeUBuf]; int32_t ulen, uelen; UErrorCode status = U_ZERO_ERROR; u_strFromUTF8(ubuf, kSizeUBuf, &ulen, locSkelItemPtr->skeleton, -1, &status); udifmt = udtitvfmt_open(locSkelItemPtr->locale, ubuf, ulen, zoneGMT, -1, &status); if ( U_FAILURE(status) ) { log_data_err("FAIL: udtitvfmt_open for locale %s, skeleton %s: %s\n", locSkelItemPtr->locale, locSkelItemPtr->skeleton, u_errorName(status)); } else { const double * deltasPtr = deltas; const ExpectPosAndFormat * expectedPtr = locSkelItemPtr->expected; for (; *deltasPtr >= 0.0; deltasPtr++, expectedPtr++) { UFieldPosition fpos = { locSkelItemPtr->fieldToCheck, 0, 0 }; UChar uebuf[kSizeUBuf]; char bbuf[kSizeBBuf]; char bebuf[kSizeBBuf]; status = U_ZERO_ERROR; uelen = u_unescape(expectedPtr->format, uebuf, kSizeUBuf); ulen = udtitvfmt_format(udifmt, startTime, startTime + *deltasPtr, ubuf, kSizeUBuf, &fpos, &status); if ( U_FAILURE(status) ) { log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %.1f: %s\n", locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr, u_errorName(status)); } else if ( ulen != uelen || u_strncmp(ubuf,uebuf,uelen) != 0 || fpos.beginIndex != expectedPtr->posBegin || fpos.endIndex != expectedPtr->posEnd ) { u_strToUTF8(bbuf, kSizeBBuf, NULL, ubuf, ulen, &status); u_strToUTF8(bebuf, kSizeBBuf, NULL, uebuf, uelen, &status); // convert back to get unescaped string log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %12.1f, expect %d-%d \"%s\", get %d-%d \"%s\"\n", locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr, expectedPtr->posBegin, expectedPtr->posEnd, bebuf, fpos.beginIndex, fpos.endIndex, bbuf); } } udtitvfmt_close(udifmt); } } }
MojErr MojDbTextUtils::strToUnicode(const MojString& src, UnicodeVec& destOut) { MojErr err = destOut.resize(src.length() * 2); MojErrCheck(err); MojInt32 destCapacity = 0; MojInt32 destLength = 0; do { UChar* dest = NULL; err = destOut.begin(dest); MojErrCheck(err); destCapacity = (MojInt32) destOut.size(); UErrorCode status = U_ZERO_ERROR; u_strFromUTF8(dest, destCapacity, &destLength, src.data(), (MojInt32) src.length(), &status); if (status != U_BUFFER_OVERFLOW_ERROR) MojUnicodeErrCheck(status); err = destOut.resize(destLength); MojErrCheck(err); } while (destLength > destCapacity); return MojErrNone; }
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e) { if (_str_is_ascii(e->label_buf)) return; /* IDNA2008 UTS#46 punycode conversion */ if (idna) { char lookupname[128] = ""; UErrorCode status = 0; UIDNAInfo info = UIDNA_INFO_INITIALIZER; UChar utf16_dst[128], utf16_src[128]; int32_t utf16_src_length; u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status); if (U_SUCCESS(status)) { int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); if (U_SUCCESS(status)) { u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status); if (U_SUCCESS(status)) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } /* else fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */ } /* else fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */ } }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; IcuTestErrorCode errorCode(*this, "TestCasingImpl"); LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); if(errorCode.isFailure()) { errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); errorCode.reset(); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); errln("expected \"" + output + "\" got \"" + result + "\"" ); } }
//------------------------------------------------------------------------------- // // Read a text data file, convert it from UTF-8 to UChars, and return the data // in one big UChar * buffer, which the caller must delete. // // (Lightly modified version of a similar function in regextst.cpp) // //-------------------------------------------------------------------------------- UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, UErrorCode &status) { UChar *retPtr = NULL; char *fileBuf = NULL; const char *fileBufNoBOM = NULL; FILE *f = NULL; ulen = 0; if (U_FAILURE(status)) { return retPtr; } // // Open the file. // f = fopen(fileName, "rb"); if (f == 0) { dataerrln("Error opening test data file %s\n", fileName); status = U_FILE_ACCESS_ERROR; return NULL; } // // Read it in // int32_t fileSize; int32_t amtRead; int32_t amtReadNoBOM; fseek( f, 0, SEEK_END); fileSize = ftell(f); fileBuf = new char[fileSize]; fseek(f, 0, SEEK_SET); amtRead = fread(fileBuf, 1, fileSize, f); if (amtRead != fileSize || fileSize <= 0) { errln("Error reading test data file."); goto cleanUpAndReturn; } // // Look for a UTF-8 BOM on the data just read. // The test data file is UTF-8. // The BOM needs to be there in the source file to keep the Windows & // EBCDIC machines happy, so force an error if it goes missing. // Many Linux editors will silently strip it. // fileBufNoBOM = fileBuf + 3; amtReadNoBOM = amtRead - 3; if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) { // TODO: restore this check. errln("Test data file %s is missing its BOM", fileName); fileBufNoBOM = fileBuf; amtReadNoBOM = amtRead; } // // Find the length of the input in UTF-16 UChars // (by preflighting the conversion) // u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status); // // Convert file contents from UTF-8 to UTF-16 // if (status == U_BUFFER_OVERFLOW_ERROR) { // Buffer Overflow is expected from the preflight operation. status = U_ZERO_ERROR; retPtr = new UChar[ulen+1]; u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status); } cleanUpAndReturn: fclose(f); delete[] fileBuf; if (U_FAILURE(status)) { errln("ICU Error \"%s\"\n", u_errorName(status)); delete retPtr; retPtr = NULL; }; return retPtr; }
void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen, UErrorCode &status) { // Convert the user input data from UTF-8 to UChar (UTF-16) int32_t inputLen = 0; if (U_FAILURE(status)) { return; } u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status); if (status != U_BUFFER_OVERFLOW_ERROR) { return; } status = U_ZERO_ERROR; fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar))); if (fInput == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status); // Regular Expression to parse a line from Confusables.txt. The expression will match // any line. What was matched is determined by examining which capture groups have a match. // Capture Group 1: the source char // Capture Group 2: the replacement chars // Capture Group 3-6 the table type, SL, SA, ML, or MA // Capture Group 7: A blank or comment only line. // Capture Group 8: A syntactically invalid line. Anything that didn't match before. // Example Line from the confusables.txt source file: // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... " fParseLine = uregex_openC( "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char "[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s) "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued) "\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type "[ \\t]*(?:#.*?)?$" // Match any trailing #comment "|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment "|^(.*?)$", // OR match any line, which catches illegal lines. 0, NULL, &status); // Regular expression for parsing a hex number out of a space-separated list of them. // Capture group 1 gets the number, with spaces removed. fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status); // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. if (*fInput == 0xfeff) { *fInput = 0x20; } // Parse the input, one line per iteration of this loop. uregex_setText(fParseLine, fInput, inputLen, &status); while (uregex_findNext(fParseLine, &status)) { fLineNum++; if (uregex_start(fParseLine, 7, &status) >= 0) { // this was a blank or comment line. continue; } if (uregex_start(fParseLine, 8, &status) >= 0) { // input file syntax error. status = U_PARSE_ERROR; return; } // We have a good input line. Extract the key character and mapping string, and // put them into the appropriate mapping table. UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status), uregex_end(fParseLine, 1, &status), status); int32_t mapStringStart = uregex_start(fParseLine, 2, &status); int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart; uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status); UnicodeString *mapString = new UnicodeString(); if (mapString == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } while (uregex_findNext(fParseHexNum, &status)) { UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status), uregex_end(fParseHexNum, 1, &status), status); mapString->append(c); } U_ASSERT(mapString->length() >= 1); // Put the map (value) string into the string pool // This a little like a Java intern() - any duplicates will be eliminated. SPUString *smapString = stringPool->addString(mapString, status); // Add the UChar32 -> string mapping to the appropriate table. UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable : uregex_start(fParseLine, 4, &status) >= 0 ? fSATable : uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable : uregex_start(fParseLine, 6, &status) >= 0 ? fMATable : NULL; U_ASSERT(table != NULL); uhash_iput(table, keyChar, smapString, &status); fKeySet->add(keyChar); if (U_FAILURE(status)) { return; } } // Input data is now all parsed and collected. // Now create the run-time binary form of the data. // // This is done in two steps. First the data is assembled into vectors and strings, // for ease of construction, then the contents of these collections are dumped // into the actual raw-bytes data storage. // Build up the string array, and record the index of each string therein // in the (build time only) string pool. // Strings of length one are not entered into the strings array. // At the same time, build up the string lengths table, which records the // position in the string table of the first string of each length >= 4. // (Strings in the table are sorted by length) stringPool->sort(status); fStringTable = new UnicodeString(); fStringLengthsTable = new UVector(status); int32_t previousStringLength = 0; int32_t previousStringIndex = 0; int32_t poolSize = stringPool->size(); int32_t i; for (i=0; i<poolSize; i++) { SPUString *s = stringPool->getByIndex(i); int32_t strLen = s->fStr->length(); int32_t strIndex = fStringTable->length(); U_ASSERT(strLen >= previousStringLength); if (strLen == 1) { // strings of length one do not get an entry in the string table. // Keep the single string character itself here, which is the same // convention that is used in the final run-time string table index. s->fStrTableIndex = s->fStr->charAt(0); } else { if ((strLen > previousStringLength) && (previousStringLength >= 4)) { fStringLengthsTable->addElement(previousStringIndex, status); fStringLengthsTable->addElement(previousStringLength, status); } s->fStrTableIndex = strIndex; fStringTable->append(*(s->fStr)); } previousStringLength = strLen; previousStringIndex = strIndex; } // Make the final entry to the string lengths table. // (it holds an entry for the _last_ string of each length, so adding the // final one doesn't happen in the main loop because no longer string was encountered.) if (previousStringLength >= 4) { fStringLengthsTable->addElement(previousStringIndex, status); fStringLengthsTable->addElement(previousStringLength, status); } // Construct the compile-time Key and Value tables // // For each key code point, check which mapping tables it applies to, // and create the final data for the key & value structures. // // The four logical mapping tables are conflated into one combined table. // If multiple logical tables have the same mapping for some key, they // share a single entry in the combined table. // If more than one mapping exists for the same key code point, multiple // entries will be created in the table for (int32_t range=0; range<fKeySet->getRangeCount(); range++) { // It is an oddity of the UnicodeSet API that simply enumerating the contained // code points requires a nested loop. for (UChar32 keyChar=fKeySet->getRangeStart(range); keyChar <= fKeySet->getRangeEnd(range); keyChar++) { addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status); addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status); addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status); addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status); } } // Put the assembled data into the flat runtime array outputData(status); // All of the intermediate allocated data belongs to the ConfusabledataBuilder // object (this), and is deleted in the destructor. return; }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; UCaseMap *csm; UErrorCode errorCode; errorCode=U_ZERO_ERROR; csm=ucasemap_open(localeID, options, &errorCode); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. int32_t size=1; // Not 0 because that only gives preflighting. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, &errorCode); ucasemap_setBreakIterator(csm, clone, &errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), &errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? resultLength : 0); if(U_FAILURE(errorCode)) { errln("error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); } ucasemap_close(csm); }
/** * @param mode same with ICU mode flag UNormalizationMode. */ size_t uni_normalize(char* src, size_t src_len, char* dst, size_t dst_capacity, int mode, int opt){ UNormalizationMode umode = (UNormalizationMode)mode; // status holder UErrorCode ustatus = U_ZERO_ERROR; // UChar source UChar *s; int32_t s_length, s_capacity; // UChar normalized UChar *d; int32_t d_length, d_capacity; // UTF8 normalzied int32_t dst_alloc; // convert UTF-8 -> UChar u_strFromUTF8(NULL, 0, &s_length, src, (int32_t)src_len, &ustatus); if(U_FAILURE(ustatus) && ustatus!=U_BUFFER_OVERFLOW_ERROR){ char buf[1024]; sprintf(buf,"ICU u_strFromUTF8(pre-flighting) error with %d\n", ustatus); fputs(buf, stderr); fflush(stderr); return 0; }else{ ustatus = U_ZERO_ERROR; } s_capacity = (s_length+7)/8*8; // for '\0' termination s = (UChar*)my_malloc(s_capacity*sizeof(UChar), MYF(MY_WME)); if(!s){ fputs("malloc failure\n", stderr); fflush(stderr); return 0; } s = u_strFromUTF8(s, s_length, NULL, src, (int32_t)src_len, &ustatus); if(U_FAILURE(ustatus)){ char buf[1024]; sprintf(buf,"ICU u_strFromUTF8 error with %d\n", ustatus); fputs(buf, stderr); fflush(stderr); my_free(s); return 0; }else{ ustatus = U_ZERO_ERROR; } // normalize d_length = unorm_normalize(s, s_length, umode, (int32_t)opt, NULL, 0, &ustatus); if(U_FAILURE(ustatus) && ustatus!=U_BUFFER_OVERFLOW_ERROR){ char buf[1024]; sprintf(buf,"ICU unorm_normalize(pre-flighting) error with %d\n", ustatus); fputs(buf, stderr); fflush(stderr); my_free(s); return 0; }else{ ustatus = U_ZERO_ERROR; } d_capacity = (d_length+7)/8*8; d = (UChar*)my_malloc(d_capacity*sizeof(UChar), MYF(MY_WME)); if(!d){ fputs("malloc failure\n", stderr); fflush(stderr); my_free(s); return 0; } d_length = unorm_normalize(s, s_length, umode, (int32_t)opt, d, d_capacity, &ustatus); if(U_FAILURE(ustatus)){ char buf[1024]; sprintf(buf,"ICU unorm_normalize error with %d\n", ustatus); fputs(buf, stderr); fflush(stderr); my_free(s); my_free(d); return 0; }else{ ustatus = U_ZERO_ERROR; } my_free(s); // encode UChar -> UTF-8 u_strToUTF8(dst, (int32_t)dst_capacity, &dst_alloc, d, d_length, &ustatus); my_free(d); return (size_t)dst_alloc; }
char * _mongoc_sasl_prep_impl (const char *name, const char *in_utf8, int in_utf8_len, bson_error_t *err) { /* The flow is in_utf8 -> in_utf16 -> SASLPrep -> out_utf16 -> out_utf8. */ UChar *in_utf16, *out_utf16; char *out_utf8; int32_t in_utf16_len, out_utf16_len, out_utf8_len; UErrorCode error_code = U_ZERO_ERROR; UStringPrepProfile *prep; #define SASL_PREP_ERR_RETURN(msg) \ do { \ bson_set_error (err, \ MONGOC_ERROR_SCRAM, \ MONGOC_ERROR_SCRAM_PROTOCOL_ERROR, \ (msg), \ name); \ return NULL; \ } while (0) /* 1. convert str to UTF-16. */ /* preflight to get the destination length. */ (void) u_strFromUTF8 ( NULL, 0, &in_utf16_len, in_utf8, in_utf8_len, &error_code); if (error_code != U_BUFFER_OVERFLOW_ERROR) { SASL_PREP_ERR_RETURN ("could not calculate UTF-16 length of %s"); } /* convert to UTF-16. */ error_code = U_ZERO_ERROR; in_utf16 = bson_malloc (sizeof (UChar) * (in_utf16_len + 1)); /* add one for null byte. */ (void) u_strFromUTF8 ( in_utf16, in_utf16_len + 1, NULL, in_utf8, in_utf8_len, &error_code); if (error_code) { bson_free (in_utf16); SASL_PREP_ERR_RETURN ("could not convert %s to UTF-16"); } /* 2. perform SASLPrep. */ prep = usprep_openByType (USPREP_RFC4013_SASLPREP, &error_code); if (error_code) { bson_free (in_utf16); SASL_PREP_ERR_RETURN ("could not start SASLPrep for %s"); } /* preflight. */ out_utf16_len = usprep_prepare ( prep, in_utf16, in_utf16_len, NULL, 0, USPREP_DEFAULT, NULL, &error_code); if (error_code != U_BUFFER_OVERFLOW_ERROR) { bson_free (in_utf16); usprep_close (prep); SASL_PREP_ERR_RETURN ("could not calculate SASLPrep length of %s"); } /* convert. */ error_code = U_ZERO_ERROR; out_utf16 = bson_malloc (sizeof (UChar) * (out_utf16_len + 1)); (void) usprep_prepare (prep, in_utf16, in_utf16_len, out_utf16, out_utf16_len + 1, USPREP_DEFAULT, NULL, &error_code); if (error_code) { bson_free (in_utf16); bson_free (out_utf16); usprep_close (prep); SASL_PREP_ERR_RETURN ("could not execute SASLPrep for %s"); } bson_free (in_utf16); usprep_close (prep); /* 3. convert back to UTF-8. */ /* preflight. */ (void) u_strToUTF8 ( NULL, 0, &out_utf8_len, out_utf16, out_utf16_len, &error_code); if (error_code != U_BUFFER_OVERFLOW_ERROR) { bson_free (out_utf16); SASL_PREP_ERR_RETURN ("could not calculate UTF-8 length of %s"); } /* convert. */ error_code = U_ZERO_ERROR; out_utf8 = (char *) bson_malloc ( sizeof (char) * (out_utf8_len + 1)); /* add one for null byte. */ (void) u_strToUTF8 ( out_utf8, out_utf8_len + 1, NULL, out_utf16, out_utf16_len, &error_code); if (error_code) { bson_free (out_utf8); bson_free (out_utf16); SASL_PREP_ERR_RETURN ("could not convert %s back to UTF-8"); } bson_free (out_utf16); return out_utf8; #undef SASL_PREP_ERR_RETURN }