void fts_icu_utf16_to_utf8(string_t *dest_utf8, const UChar *src_utf16, unsigned int src_len) { int32_t dest_len = 0; int32_t sub_num = 0; char *dest_data, *retp = NULL; UErrorCode err = U_ZERO_ERROR; /* try to encode with the current buffer size */ dest_data = buffer_get_space_unsafe(dest_utf8, 0, buffer_get_writable_size(dest_utf8)); retp = u_strToUTF8WithSub(dest_data, buffer_get_writable_size(dest_utf8), &dest_len, src_utf16, src_len, UNICODE_REPLACEMENT_CHAR, &sub_num, &err); if (err == U_BUFFER_OVERFLOW_ERROR) { /* try again with a larger buffer */ dest_data = buffer_get_space_unsafe(dest_utf8, 0, dest_len); err = U_ZERO_ERROR; retp = u_strToUTF8WithSub(dest_data, buffer_get_writable_size(dest_utf8), &dest_len, src_utf16, src_len, UNICODE_REPLACEMENT_CHAR, &sub_num, &err); } if (U_FAILURE(err)) { i_panic("LibICU u_strToUTF8WithSub() failed: %s", u_errorName(err)); } buffer_set_used_size(dest_utf8, dest_len); i_assert(retp == dest_data); }
/* {{{ intl_charFromString * faster than doing intl_convert_utf16_to_utf8(&res, &res_len, * from.getBuffer(), from.length(), &status), * but consumes more memory */ int intl_charFromString(const UnicodeString &from, char **res, int *res_len, UErrorCode *status) { if (from.isBogus()) { return FAILURE; } //the number of UTF-8 code units is not larger than that of UTF-16 code //units * 3 + 1 for the terminator int32_t capacity = from.length() * 3 + 1; if (from.isEmpty()) { *res = (char*)emalloc(1); **res = '\0'; *res_len = 0; return SUCCESS; } *res = (char*)emalloc(capacity); *res_len = 0; //tbd const UChar *utf16buf = from.getBuffer(); int32_t actual_len; u_strToUTF8WithSub(*res, capacity - 1, &actual_len, utf16buf, from.length(), U_SENTINEL, NULL, status); if (U_FAILURE(*status)) { efree(*res); *res = NULL; return FAILURE; } (*res)[actual_len] = '\0'; *res_len = (int)actual_len; return SUCCESS; }