static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString) { UErrorCode status = U_ZERO_ERROR; int32_t len = 0; // preflight to get the destination buffer size u_strToUTF8(NULL, 0, &len, outString.getBuffer(), outString.length(), &status); // allocate the buffer char* dest = (char*)uprv_malloc(len); status = U_ZERO_ERROR; // convert the data u_strToUTF8(dest, len, &len, outString.getBuffer(), outString.length(), &status); // write data to out file int32_t ret = T_FileStream_write(fileStream, dest, len); uprv_free(dest); return (ret); }
UTrie2PerfTest(int32_t argc, const char *argv[], UErrorCode &status) : UPerfTest(argc, argv, NULL, 0, "", status), utf8(NULL), utf8Length(0), countInputCodePoints(0) { if (U_SUCCESS(status)) { #if 0 // See comment at unorm_initUTrie2() forward declaration. unorm_initUTrie2(&status); ubidi_initUTrie2(&status); #endif int32_t inputLength; UPerfTest::getBuffer(inputLength, status); if(U_SUCCESS(status) && inputLength>0) { countInputCodePoints = u_countChar32(buffer, bufferLen); // Preflight the UTF-8 length and allocate utf8. u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status); if(status==U_BUFFER_OVERFLOW_ERROR) { utf8=(char *)malloc(utf8Length); if(utf8!=NULL) { status=U_ZERO_ERROR; u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status); } else { status=U_MEMORY_ALLOCATION_ERROR; } } if(verbose) { printf("code points:%ld len16:%ld len8:%ld " "B/cp:%.3g\n", (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (double)utf8Length/countInputCodePoints); } } } }
static gchar * ustring_to_utf8 (const UChar *ustr, int32_t ustrLength) { gchar *dest; int32_t destLength; UErrorCode errorCode; errorCode = 0; u_strToUTF8 (NULL, 0, &destLength, ustr, ustrLength, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR) { g_warning ("can't get the number of byte required to convert ustring: %s", u_errorName (errorCode)); return NULL; } dest = g_malloc0 (destLength + 1); errorCode = 0; u_strToUTF8 (dest, destLength + 1, NULL, ustr, ustrLength, &errorCode); if (errorCode != U_ZERO_ERROR) { g_free (dest); g_warning ("can't convert ustring to UTF-8 string: %s", u_errorName (errorCode)); return NULL; } return dest; }
UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8, const struct icu_buf_utf16 *src16, UErrorCode *status) { int32_t utf8_len = 0; u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap, &utf8_len, src16->utf16, src16->utf16_len, status); /* check for buffer overflow, resize and retry */ if (*status == U_BUFFER_OVERFLOW_ERROR) { icu_buf_utf8_resize(dest8, utf8_len * 2); *status = U_ZERO_ERROR; u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap, &utf8_len, src16->utf16, src16->utf16_len, status); } if (U_SUCCESS(*status) && utf8_len <= dest8->utf8_cap) dest8->utf8_len = utf8_len; else icu_buf_utf8_clear(dest8); return *status; }
static VALUE to_utf8(UChar *ustr, int32_t ulen) { char buffer[BUF_SIZE]; int32_t len = 0; UErrorCode status = U_ZERO_ERROR; /* Figure out the size of the buffer we need to allocate: */ u_strToUTF8(buffer, 0, &len, ustr, ulen, &status); if (status == U_INVALID_CHAR_FOUND) len = 0; else if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) return Qnil; /* Allocate the buffer and encode into it: */ status = U_ZERO_ERROR; char *ptr = ALLOC_N(char, len); u_strToUTF8(ptr, len, &len, ustr, ulen, &status); if (U_FAILURE(status)) { xfree(ptr); return Qnil; } VALUE str = rb_enc_str_new(ptr, len, rb_utf8_encoding()); xfree(ptr); return str; ; }
//ret_utf_str must be freed after usage. -1 on icu_str_sz will assume //icu_str is null terminated static int utf8_from_icu_str_slice(const UChar *icu_str, int32_t icu_str_sz, char **ret_utf_str, UErrorCode *ret_icu_err) { int32_t utf_sz; SOL_NULL_CHECK(ret_utf_str, -EINVAL); SOL_NULL_CHECK(ret_icu_err, -EINVAL); *ret_icu_err = U_ZERO_ERROR; u_strToUTF8(NULL, 0, &utf_sz, icu_str, icu_str_sz, ret_icu_err); if (U_FAILURE(*ret_icu_err) && *ret_icu_err != U_BUFFER_OVERFLOW_ERROR) return -EINVAL; *ret_utf_str = calloc(utf_sz + 1, sizeof(char)); SOL_NULL_CHECK(*ret_utf_str, -ENOMEM); *ret_icu_err = U_ZERO_ERROR; u_strToUTF8(*ret_utf_str, utf_sz + 1, NULL, icu_str, icu_str_sz, ret_icu_err); if (U_FAILURE(*ret_icu_err) || (*ret_utf_str)[utf_sz] != '\0') { free(*ret_utf_str); *ret_utf_str = NULL; return -EINVAL; } return 0; }
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ static U_INLINE int32_t appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s) { UChar32 c; int32_t length, destLength; UErrorCode errorCode; /* decode the result */ if(result<0) { /* (not) original code point */ c=~result; length=-1; } else if(result<=UCASE_MAX_STRING_LENGTH) { c=U_SENTINEL; length=result; } else { c=result; length=-1; } if(destIndex<destCapacity) { /* append the result */ if(length<0) { /* code point */ UBool isError=FALSE; U8_APPEND(dest, destIndex, destCapacity, c, isError); if(isError) { /* overflow, nothing written */ destIndex+=U8_LENGTH(c); } } else { /* string */ errorCode=U_ZERO_ERROR; u_strToUTF8( (char *)(dest+destIndex), destCapacity-destIndex, &destLength, s, length, &errorCode); destIndex+=destLength; /* we might have an overflow, but we know the actual length */ } } else { /* preflight */ if(length<0) { destIndex+=U8_LENGTH(c); } else { errorCode=U_ZERO_ERROR; u_strToUTF8( NULL, 0, &destLength, s, length, &errorCode); destIndex+=destLength; } } return destIndex; }
inline void do_to_utf8( ErlNifBinary in, ErlNifBinary& out, int32_t& len, UErrorCode& status) { status = U_ZERO_ERROR; if (!enif_alloc_binary(len, &out)) { status = U_MEMORY_ALLOCATION_ERROR; return; } u_strToUTF8( (char*) out.data, /* dest */ len, &len, (const UChar*) in.data, /* src */ TO_ULEN(in.size), /* len of src */ &status); if (U_FAILURE(status)) { enif_release_binary(&out); return; } if (len != (int32_t) out.size) { /* shrink binary if it was too large */ enif_realloc_binary(&out, len); } }
char *ICUStringMgr::upperUTF8(char *buf, unsigned int maxlen) const { char *ret = buf; int max = (maxlen) ? maxlen : strlen(buf); UErrorCode err = U_ZERO_ERROR; if (!buf || !max) { return ret; } UChar *lowerStr = new UChar[max+10]; UChar *upperStr = new UChar[max+10]; u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err); if (err != U_ZERO_ERROR) { // SWLog::getSystemLog()->logError("from: %s", u_errorName(err)); delete [] lowerStr; delete [] upperStr; return ret; } u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err); if (err != U_ZERO_ERROR) { // SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err)); delete [] lowerStr; delete [] upperStr; return ret; } ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err); delete [] lowerStr; delete [] upperStr; return ret; }
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *s, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status) { // Lacking a UTF-8 normalization API, just converting the input to // UTF-16 seems as good an approach as any. In typical use, input will // be an identifier, which is to say not too long for stack buffers. if (U_FAILURE(*status)) { return 0; } // Buffers for the UChar form of the input and skeleton strings. UChar smallInBuf[USPOOF_STACK_BUFFER_SIZE]; UChar *inBuf = smallInBuf; UChar smallOutBuf[USPOOF_STACK_BUFFER_SIZE]; UChar *outBuf = smallOutBuf; int32_t lengthInUChars = 0; int32_t skelLengthInUChars = 0; int32_t skelLengthInUTF8 = 0; u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars, s, length, status); if (*status == U_BUFFER_OVERFLOW_ERROR) { inBuf = static_cast<UChar *>(uprv_malloc((lengthInUChars+1)*sizeof(UChar))); if (inBuf == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } *status = U_ZERO_ERROR; u_strFromUTF8(inBuf, lengthInUChars+1, &lengthInUChars, s, length, status); } skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars, outBuf, USPOOF_STACK_BUFFER_SIZE, status); if (*status == U_BUFFER_OVERFLOW_ERROR) { outBuf = static_cast<UChar *>(uprv_malloc((skelLengthInUChars+1)*sizeof(UChar))); if (outBuf == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } *status = U_ZERO_ERROR; skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars, outBuf, skelLengthInUChars+1, status); } u_strToUTF8(dest, destCapacity, &skelLengthInUTF8, outBuf, skelLengthInUChars, status); cleanup: if (inBuf != smallInBuf) { uprv_free(inBuf); } if (outBuf != smallOutBuf) { uprv_free(outBuf); } return skelLengthInUTF8; }
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status) { SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { return 0; } if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); UnicodeString destStr; uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); if (U_FAILURE(*status)) { return 0; } int32_t lengthInUTF8 = 0; u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); return lengthInUTF8; }
int cq_select_all(struct dbconn con, const char *table, struct dlist **out, const char *conditions) { int rc; char *query; const char *fmt = u8"* FROM %s %s"; query = calloc(CQ_QLEN, sizeof(char)); if (query == NULL) return -10; UChar *buf16 = calloc(CQ_QLEN, sizeof(UChar)); if (buf16 == NULL) { free(query); return -11; } rc = u_snprintf(buf16, CQ_QLEN, fmt, table, conditions); if ((size_t) rc >= CQ_QLEN) { free(query); free(buf16); return 100; } UErrorCode status = U_ZERO_ERROR; u_strToUTF8(query, CQ_QLEN, NULL, buf16, u_strlen(buf16), &status); free(buf16); if (!U_SUCCESS(status)) { free(query); return 101; } rc = cq_select_query(con, out, query); return rc; }
static VALUE to_utf8(UChar *ustr, int32_t ulen) { char str[BUF_SIZE]; int32_t len = 0; UErrorCode status = U_ZERO_ERROR; u_strToUTF8(str, BUF_SIZE, &len, ustr, ulen, &status); if (status == U_INVALID_CHAR_FOUND) len = 0; return rb_str_new(str, len); }
int cq_fields_to_utf8(char *buf, size_t buflen, size_t fieldc, char **fieldnames, bool usequotes) { UChar *buf16; UErrorCode status = U_ZERO_ERROR; size_t num_left = fieldc; int rc = 0; if (num_left == 0) return 1; buf16 = calloc(buflen, sizeof(UChar)); if (buf16 == NULL) return -1; for (size_t i = 0; i < fieldc; ++i) { UChar *temp = calloc(buflen, sizeof(UChar)); if (temp == NULL) { rc = -2; break; } u_strFromUTF8(temp, buflen, NULL, fieldnames[i], strlen(fieldnames[i]), &status); if (!U_SUCCESS(status)) { rc = 2; free(temp); break; } bool isstr = false; if (usequotes) { for (int32_t j = 0; j < u_strlen(temp); ++j) { if (!isdigit(temp[j])) { isstr = true; break; } } } if (isstr) u_strcat(buf16, u"'"); u_strcat(buf16, temp); if (isstr) u_strcat(buf16, u"'"); free(temp); if (--num_left > 0) { u_strcat(buf16, u","); } } u_strToUTF8(buf, buflen, NULL, buf16, u_strlen(buf16), &status); if (!U_SUCCESS(status)) rc = 3; free(buf16); return rc; }
static void TestFPos_SkelWithSeconds() { const LocaleAndSkeletonItem * locSkelItemPtr; for (locSkelItemPtr = locSkelItems; locSkelItemPtr->locale != NULL; locSkelItemPtr++) { UDateIntervalFormat* udifmt; UChar ubuf[kSizeUBuf]; int32_t ulen, uelen; UErrorCode status = U_ZERO_ERROR; u_strFromUTF8(ubuf, kSizeUBuf, &ulen, locSkelItemPtr->skeleton, -1, &status); udifmt = udtitvfmt_open(locSkelItemPtr->locale, ubuf, ulen, zoneGMT, -1, &status); if ( U_FAILURE(status) ) { log_data_err("FAIL: udtitvfmt_open for locale %s, skeleton %s: %s\n", locSkelItemPtr->locale, locSkelItemPtr->skeleton, u_errorName(status)); } else { const double * deltasPtr = deltas; const ExpectPosAndFormat * expectedPtr = locSkelItemPtr->expected; for (; *deltasPtr >= 0.0; deltasPtr++, expectedPtr++) { UFieldPosition fpos = { locSkelItemPtr->fieldToCheck, 0, 0 }; UChar uebuf[kSizeUBuf]; char bbuf[kSizeBBuf]; char bebuf[kSizeBBuf]; status = U_ZERO_ERROR; uelen = u_unescape(expectedPtr->format, uebuf, kSizeUBuf); ulen = udtitvfmt_format(udifmt, startTime, startTime + *deltasPtr, ubuf, kSizeUBuf, &fpos, &status); if ( U_FAILURE(status) ) { log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %.1f: %s\n", locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr, u_errorName(status)); } else if ( ulen != uelen || u_strncmp(ubuf,uebuf,uelen) != 0 || fpos.beginIndex != expectedPtr->posBegin || fpos.endIndex != expectedPtr->posEnd ) { u_strToUTF8(bbuf, kSizeBBuf, NULL, ubuf, ulen, &status); u_strToUTF8(bebuf, kSizeBBuf, NULL, uebuf, uelen, &status); // convert back to get unescaped string log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %12.1f, expect %d-%d \"%s\", get %d-%d \"%s\"\n", locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr, expectedPtr->posBegin, expectedPtr->posEnd, bebuf, fpos.beginIndex, fpos.endIndex, bbuf); } } udtitvfmt_close(udifmt); } } }
String u8(const UChar *u16, int32_t u16_len, UErrorCode &error) { error = U_ZERO_ERROR; if (u16_len == 0) { return empty_string(); } int32_t outlen; u_strToUTF8(nullptr, 0, &outlen, u16, u16_len, &error); if (error != U_BUFFER_OVERFLOW_ERROR) { return String(); } String ret(outlen + 1, ReserveString); char *out = ret.get()->mutableData(); error = U_ZERO_ERROR; u_strToUTF8(out, outlen + 1, &outlen, u16, u16_len, &error); if (U_FAILURE(error)) { return String(); } ret.setSize(outlen); return ret; }
inline const char* v8_String_to_utf8(v8::Local<v8::String> string) { UErrorCode error_code = U_ZERO_ERROR; uint16_t src[characters*2]; static char buffer[characters*4]; int32_t buffer_length; string->Write(src, 0, characters*2); u_strToUTF8(buffer, characters*4, &buffer_length, src, std::min(characters*2, string->Length()), &error_code); if (error_code != U_ZERO_ERROR) { throw UTF16_to_UTF8_Conversion_Error(error_code); } return buffer; }
UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) : UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status), utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) { if (U_SUCCESS(status)) { UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape(); set.applyPattern(pattern, status); prefrozen=set; if(0==strcmp(options[FAST_TYPE].value, "fast")) { set.freeze(); } int32_t inputLength; UPerfTest::getBuffer(inputLength, status); if(U_SUCCESS(status) && inputLength>0) { countInputCodePoints = u_countChar32(buffer, bufferLen); countSpans(); // Preflight the UTF-8 length and allocate utf8. u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status); if(status==U_BUFFER_OVERFLOW_ERROR) { utf8=(char *)malloc(utf8Length); if(utf8!=NULL) { status=U_ZERO_ERROR; u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status); } else { status=U_MEMORY_ALLOCATION_ERROR; } } if(verbose) { printf("code points:%ld len16:%ld len8:%ld spans:%ld " "cp/span:%.3g UChar/span:%.3g B/span:%.3g B/cp:%.3g\n", (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount, (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount, (double)utf8Length/countInputCodePoints); } } } }
U_CAPI int32_t U_EXPORT2 uspoof_checkUTF8(const USpoofChecker *sc, const char *text, int32_t length, int32_t *position, UErrorCode *status) { if (U_FAILURE(*status)) { return 0; } UChar stackBuf[USPOOF_STACK_BUFFER_SIZE]; UChar* text16 = stackBuf; int32_t len16; u_strFromUTF8(text16, USPOOF_STACK_BUFFER_SIZE, &len16, text, length, status); if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { return 0; } if (*status == U_BUFFER_OVERFLOW_ERROR) { text16 = static_cast<UChar *>(uprv_malloc(len16 * sizeof(UChar) + 2)); if (text16 == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } *status = U_ZERO_ERROR; u_strFromUTF8(text16, len16+1, NULL, text, length, status); } int32_t position16 = -1; int32_t result = uspoof_check(sc, text16, len16, &position16, status); if (U_FAILURE(*status)) { return 0; } if (position16 > 0) { // Translate a UTF-16 based error position back to a UTF-8 offset. // u_strToUTF8() in preflight mode is an easy way to do it. U_ASSERT(position16 <= len16); u_strToUTF8(NULL, 0, position, text16, position16, status); if (position > 0) { // position is the required buffer length from u_strToUTF8, which includes // space for a terminating NULL, which we don't want, hence the -1. *position -= 1; } *status = U_ZERO_ERROR; // u_strToUTF8, above sets BUFFER_OVERFLOW_ERROR. } if (text16 != stackBuf) { uprv_free(text16); } return result; }
int helper_unicode_to_utf8(char *src, int src_len, char *dest, int dest_size) { int32_t size = 0; UErrorCode status = 0; UChar *unicode_src = (UChar *)src; u_strToUTF8(dest, dest_size, &size, unicode_src, -1, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "u_strToUTF8() Failed(%s)", u_errorName(status)); dest[size]='\0'; return CTS_SUCCESS; }
/*---------------------------------------------------------------------------------------------- This method uses an ICU function to convert a string from UTF-16 to UTF-8. Assumptions: If sourceLen is -1, it will be computed (by ICU) Exit conditions: <text> Parameters: <text> Return value: The number of characters required to store the fully-converted string (which may be greater than targetLen) ----------------------------------------------------------------------------------------------*/ int UnicodeConverter::Convert(const UChar* source, int sourceLen, char* target, int targetLen) { UErrorCode status = U_ZERO_ERROR; int32_t spaceRequiredForData; u_strToUTF8(target, targetLen, &spaceRequiredForData, source, sourceLen, &status); if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) throw std::runtime_error("Unable to convert from UTF-16 to UTF-8"); return spaceRequiredForData; }
void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr) { int32_t len1 = 0, len2 = 0; UErrorCode status = U_ZERO_ERROR; if (iter->org_start < iter->utf16_base) { iter->utf8_base = 0; iter->utf16_base = 0; } u_strToUTF8(0, 0, &len1, iter->org->utf16 + iter->utf16_base, iter->org_start - iter->utf16_base, &status); status = U_ZERO_ERROR; *start = len1 + iter->utf8_base; u_strToUTF8(0, 0, &len2, iter->org->utf16 + iter->utf16_base, iter->org_start - iter->utf16_base + iter->org_len, &status); *len = len2 - len1; if (cstr) { if (!iter->org8) iter->org8 = icu_buf_utf8_create(0); status = U_ZERO_ERROR; icu_utf16_to_utf8(iter->org8, iter->org, &status); *cstr = icu_buf_utf8_to_cstr(iter->org8); } iter->utf8_base = *start; iter->utf16_base = iter->org_start; }
void intl_convert_utf16_to_utf8(char** target, int* target_len, const UChar* src, int src_len, UErrorCode* status) { char* dst_buf = NULL; int32_t dst_len; /* Determine required destination buffer size (pre-flighting). */ *status = U_ZERO_ERROR; u_strToUTF8(NULL, 0, &dst_len, src, src_len, status); /* Bail out if an unexpected error occured. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough). * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string * is empty). */ if (*status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING) { return; } // Allocate memory for the destination buffer (it will be zero-terminated). dst_buf = (char *)malloc(dst_len + 1); /* Convert source string from UTF-16 to UTF-8. */ *status = U_ZERO_ERROR; u_strToUTF8(dst_buf, dst_len, NULL, src, src_len, status); if (U_FAILURE(*status)) { free(dst_buf); return; } /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */ *status = U_ZERO_ERROR; dst_buf[dst_len] = 0; *target = dst_buf; *target_len = dst_len; }
/* Writing Functions */ static void string_write_java(struct SResource *res,UErrorCode *status) { if(uprv_strcmp(srBundle->fKeys+res->fKey,"%%UCARULES")==0 ){ char fileName[1024] ={0}; const char* file = "UCARules.utf8"; FileStream* datFile = NULL; const char* type = "new ICUListResourceBundle.ResourceString("; char* dest = (char*) uprv_malloc( 8 * res->u.fString.fLength); int32_t len = 0; if(outDir){ uprv_strcat(fileName,outDir); if(outDir[uprv_strlen(outDir)-1]!=U_FILE_SEP_CHAR){ uprv_strcat(fileName,U_FILE_SEP_STRING); } } uprv_strcat(fileName,file);/* UCARULES.utf8 UTF-8 file */ write_tabs(out); T_FileStream_write(out, type, (int32_t)uprv_strlen(type)); T_FileStream_write(out, "\"", 1); T_FileStream_write(out, file, (int32_t)uprv_strlen(file)); T_FileStream_write(out, "\")\n", 3); datFile=T_FileStream_open(fileName,"w"); if(!dest){ *status=U_MEMORY_ALLOCATION_ERROR; } u_strToUTF8(dest,8*res->u.fString.fLength,&len,res->u.fString.fChars,res->u.fString.fLength,status); if(U_FAILURE(*status)){ T_FileStream_close(datFile); uprv_free(dest); return; } T_FileStream_write(datFile,dest,len); T_FileStream_close(datFile); uprv_free(dest); }else{ str_write_java(res->u.fString.fChars,res->u.fString.fLength,TRUE,status); if(uprv_strcmp(srBundle->fKeys+res->fKey,"Rule")==0){ UChar* buf = (UChar*) uprv_malloc(sizeof(UChar)*res->u.fString.fLength); uprv_memcpy(buf,res->u.fString.fChars,res->u.fString.fLength); uprv_free(buf); } } }
inline void writeUTF8String(std::ostream& output, const UChar *str, size_t len = 0) { if (len == 0) { len = u_strlen(str); } std::vector<char> buffer(len * 4); int32_t olen = 0; UErrorCode status = U_ZERO_ERROR; u_strToUTF8(&buffer[0], len * 4 - 1, &olen, str, len, &status); uint16_t cs = static_cast<uint16_t>(olen); writeRaw(output, cs); output.write(&buffer[0], cs); }
/* {{{ intl_convert_utf16_to_utf8 * Convert given string from UTF-16 to UTF-8. * * @param source String to convert. * @param source_len Length of the source string. * @param status Conversion status. * * @return zend_string */ zend_string* intl_convert_utf16_to_utf8( const UChar* src, int32_t src_len, UErrorCode* status ) { zend_string* dst; int32_t dst_len; /* Determine required destination buffer size (pre-flighting). */ *status = U_ZERO_ERROR; u_strToUTF8( NULL, 0, &dst_len, src, src_len, status ); /* Bail out if an unexpected error occurred. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough). * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty). */ if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING ) return NULL; /* Allocate memory for the destination buffer (it will be zero-terminated). */ dst = zend_string_alloc(dst_len, 0); /* Convert source string from UTF-8 to UTF-16. */ *status = U_ZERO_ERROR; u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status ); if( U_FAILURE( *status ) ) { zend_string_free(dst); return NULL; } /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */ *status = U_ZERO_ERROR; ZSTR_VAL(dst)[dst_len] = 0; return dst; }
/* * Generate a text file with spaces in it from a file without. */ int generateFile(const UChar *chars, int32_t length) { Locale root(""); UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(chars, length); UErrorCode status = U_ZERO_ERROR; UnicodeSet complexContext(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status); BreakIterator *breakIter = BreakIterator::createWordInstance(root, status); breakIter->adoptText(noSpaceIter); char outbuf[1024]; int32_t strlength; UChar bom = 0xFEFF; printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &bom, 1, &status)); int32_t prevbreak = 0; while (U_SUCCESS(status)) { int32_t nextbreak = breakIter->next(); if (nextbreak == BreakIterator::DONE) { break; } printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &chars[prevbreak], nextbreak-prevbreak, &status)); if (nextbreak > 0 && complexContext.contains(chars[nextbreak-1]) && complexContext.contains(chars[nextbreak])) { printf(" "); } prevbreak = nextbreak; } if (U_FAILURE(status)) { fprintf(stderr, "generate failed: %s\n", u_errorName(status)); return status; } else { return 0; } }
// There are quicker ways to do this conversion, but it's necessary to follow // this to match the functionality of fbcode/multifeed/text/TokenizeTextMap.cpp. std::string icuStringToUTF8(const UnicodeString& ustr) { UErrorCode status = U_ZERO_ERROR; int32_t bufSize = 0; std::string result; // Calculate the size of the buffer needed to hold ustr, converted to UTF-8. u_strToUTF8(NULL, 0, &bufSize, ustr.getBuffer(), ustr.length(), &status); if (status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) { return result; } result.resize(bufSize); status = U_ZERO_ERROR; u_strToUTF8(&result[0], bufSize, NULL, ustr.getBuffer(), ustr.length(), &status); if (U_FAILURE(status)) { result.clear(); } return result; }
int add_string_attribute(int n, v8::Local<v8::Value> value) const { uint16_t source[(max_dbf_field_length+2)*2]; char dest[(max_dbf_field_length+1)*4]; memset(source, 0, (max_dbf_field_length+2)*4); memset(dest, 0, (max_dbf_field_length+1)*4); int32_t dest_length; UErrorCode error_code = U_ZERO_ERROR; value->ToString()->Write(source, 0, max_dbf_field_length+1); u_strToUTF8(dest, m_fields[n].width(), &dest_length, source, std::min(max_dbf_field_length+1, value->ToString()->Length()), &error_code); if (error_code == U_BUFFER_OVERFLOW_ERROR) { // thats ok, it just means we clip the text at that point } else if (U_FAILURE(error_code)) { throw std::runtime_error("UTF-16 to UTF-8 conversion failed"); } return DBFWriteStringAttribute(m_dbf_handle, m_current_shape, n, dest); }
static void add_string_attribute(Osmium::Export::Shapefile* shapefile, int n, v8::Local<v8::Value> value) { uint16_t source[(Osmium::Export::Shapefile::max_dbf_field_length+2)*2]; char dest[(Osmium::Export::Shapefile::max_dbf_field_length+1)*4]; memset(source, 0, (Osmium::Export::Shapefile::max_dbf_field_length+2)*4); memset(dest, 0, (Osmium::Export::Shapefile::max_dbf_field_length+1)*4); int32_t dest_length; UErrorCode error_code = U_ZERO_ERROR; value->ToString()->Write(source, 0, Osmium::Export::Shapefile::max_dbf_field_length+1); u_strToUTF8(dest, shapefile->field(n).width(), &dest_length, source, std::min(Osmium::Export::Shapefile::max_dbf_field_length+1, value->ToString()->Length()), &error_code); if (error_code == U_BUFFER_OVERFLOW_ERROR) { // thats ok, it just means we clip the text at that point } else if (U_FAILURE(error_code)) { throw std::runtime_error("UTF-16 to UTF-8 conversion failed"); } shapefile->add_attribute(n, dest); }