Esempio n. 1
0
static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
{
    UErrorCode status = U_ZERO_ERROR;
    int32_t len = 0;

    // preflight to get the destination buffer size
    u_strToUTF8(NULL,
                0,
                &len,
                outString.getBuffer(),
                outString.length(),
                &status);

    // allocate the buffer
    char* dest = (char*)uprv_malloc(len);
    status = U_ZERO_ERROR;

    // convert the data
    u_strToUTF8(dest,
                len,
                &len,
                outString.getBuffer(),
                outString.length(),
                &status);

    // write data to out file
    int32_t ret = T_FileStream_write(fileStream, dest, len);
    uprv_free(dest);
    return (ret);
}
Esempio n. 2
0
    UTrie2PerfTest(int32_t argc, const char *argv[], UErrorCode &status)
            : UPerfTest(argc, argv, NULL, 0, "", status),
              utf8(NULL), utf8Length(0), countInputCodePoints(0) {
        if (U_SUCCESS(status)) {
#if 0       // See comment at unorm_initUTrie2() forward declaration.
            unorm_initUTrie2(&status);
            ubidi_initUTrie2(&status);
#endif
            int32_t inputLength;
            UPerfTest::getBuffer(inputLength, status);
            if(U_SUCCESS(status) && inputLength>0) {
                countInputCodePoints = u_countChar32(buffer, bufferLen);

                // Preflight the UTF-8 length and allocate utf8.
                u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
                if(status==U_BUFFER_OVERFLOW_ERROR) {
                    utf8=(char *)malloc(utf8Length);
                    if(utf8!=NULL) {
                        status=U_ZERO_ERROR;
                        u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
                    } else {
                        status=U_MEMORY_ALLOCATION_ERROR;
                    }
                }

                if(verbose) {
                    printf("code points:%ld  len16:%ld  len8:%ld  "
                           "B/cp:%.3g\n",
                           (long)countInputCodePoints, (long)bufferLen, (long)utf8Length,
                           (double)utf8Length/countInputCodePoints);
                }
            }
        }
    }
static gchar *
ustring_to_utf8 (const UChar *ustr, int32_t ustrLength)
{
  gchar *dest;
  int32_t destLength;
  UErrorCode errorCode;

  errorCode = 0;
  u_strToUTF8 (NULL, 0, &destLength, ustr, ustrLength, &errorCode);
  if (errorCode != U_BUFFER_OVERFLOW_ERROR)
    {
      g_warning ("can't get the number of byte required to convert ustring: %s",
		 u_errorName (errorCode));
      return NULL;
    }
  dest = g_malloc0 (destLength + 1);

  errorCode = 0;
  u_strToUTF8 (dest, destLength + 1, NULL, ustr, ustrLength, &errorCode);
  if (errorCode != U_ZERO_ERROR)
    {
      g_free (dest);
      g_warning ("can't convert ustring to UTF-8 string: %s",
		 u_errorName (errorCode));
      return NULL;
    }

  return dest;
}
Esempio n. 4
0
File: icu_utf8.c Progetto: nla/yaz
UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
                             const struct icu_buf_utf16 *src16,
                             UErrorCode *status)
{
    int32_t utf8_len = 0;

    u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
                &utf8_len,
                src16->utf16, src16->utf16_len, status);

    /* check for buffer overflow, resize and retry */
    if (*status == U_BUFFER_OVERFLOW_ERROR)
    {
        icu_buf_utf8_resize(dest8, utf8_len * 2);
        *status = U_ZERO_ERROR;
        u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
                    &utf8_len,
                    src16->utf16, src16->utf16_len, status);
    }

    if (U_SUCCESS(*status) && utf8_len <= dest8->utf8_cap)
        dest8->utf8_len = utf8_len;
    else
        icu_buf_utf8_clear(dest8);

    return *status;
}
Esempio n. 5
0
static VALUE to_utf8(UChar *ustr, int32_t ulen) {
    char buffer[BUF_SIZE];
    int32_t len = 0;
    UErrorCode status = U_ZERO_ERROR;

    /* Figure out the size of the buffer we need to allocate: */
    u_strToUTF8(buffer, 0, &len, ustr, ulen, &status);
    if (status == U_INVALID_CHAR_FOUND)
        len = 0;
    else if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
        return Qnil;

    /* Allocate the buffer and encode into it: */
    status = U_ZERO_ERROR;
    char *ptr = ALLOC_N(char, len);
    u_strToUTF8(ptr, len, &len, ustr, ulen, &status);
    if (U_FAILURE(status)) {
        xfree(ptr);
        return Qnil;
    }

    VALUE str = rb_enc_str_new(ptr, len, rb_utf8_encoding());
    xfree(ptr);
    return str;
;
}
Esempio n. 6
0
//ret_utf_str must be freed after usage. -1 on icu_str_sz will assume
//icu_str is null terminated
static int
utf8_from_icu_str_slice(const UChar *icu_str,
    int32_t icu_str_sz,
    char **ret_utf_str,
    UErrorCode *ret_icu_err)
{
    int32_t utf_sz;

    SOL_NULL_CHECK(ret_utf_str, -EINVAL);
    SOL_NULL_CHECK(ret_icu_err, -EINVAL);

    *ret_icu_err = U_ZERO_ERROR;
    u_strToUTF8(NULL, 0, &utf_sz, icu_str, icu_str_sz, ret_icu_err);
    if (U_FAILURE(*ret_icu_err) && *ret_icu_err != U_BUFFER_OVERFLOW_ERROR)
        return -EINVAL;

    *ret_utf_str = calloc(utf_sz + 1, sizeof(char));
    SOL_NULL_CHECK(*ret_utf_str, -ENOMEM);

    *ret_icu_err = U_ZERO_ERROR;
    u_strToUTF8(*ret_utf_str, utf_sz + 1, NULL, icu_str, icu_str_sz,
        ret_icu_err);
    if (U_FAILURE(*ret_icu_err) || (*ret_utf_str)[utf_sz] != '\0') {
        free(*ret_utf_str);
        *ret_utf_str = NULL;
        return -EINVAL;
    }

    return 0;
}
Esempio n. 7
0
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
static U_INLINE int32_t
appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
             int32_t result, const UChar *s) {
    UChar32 c;
    int32_t length, destLength;
    UErrorCode errorCode;

    /* decode the result */
    if(result<0) {
        /* (not) original code point */
        c=~result;
        length=-1;
    } else if(result<=UCASE_MAX_STRING_LENGTH) {
        c=U_SENTINEL;
        length=result;
    } else {
        c=result;
        length=-1;
    }

    if(destIndex<destCapacity) {
        /* append the result */
        if(length<0) {
            /* code point */
            UBool isError=FALSE;
            U8_APPEND(dest, destIndex, destCapacity, c, isError);
            if(isError) {
                /* overflow, nothing written */
                destIndex+=U8_LENGTH(c);
            }
        } else {
            /* string */
            errorCode=U_ZERO_ERROR;
            u_strToUTF8(
                (char *)(dest+destIndex), destCapacity-destIndex, &destLength,
                s, length,
                &errorCode);
            destIndex+=destLength;
            /* we might have an overflow, but we know the actual length */
        }
    } else {
        /* preflight */
        if(length<0) {
            destIndex+=U8_LENGTH(c);
        } else {
            errorCode=U_ZERO_ERROR;
            u_strToUTF8(
                NULL, 0, &destLength,
                s, length,
                &errorCode);
            destIndex+=destLength;
        }
    }
    return destIndex;
}
Esempio n. 8
0
inline void do_to_utf8(
    ErlNifBinary  in,
    ErlNifBinary& out, 
    int32_t& len,
    UErrorCode& status) 
{
    status = U_ZERO_ERROR;
    if (!enif_alloc_binary(len, &out)) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    u_strToUTF8( 
        (char*) out.data,  /* dest */
        len, 
        &len, 
        (const UChar*) in.data, /* src */
        TO_ULEN(in.size),       /* len of src */
        &status);

    if (U_FAILURE(status)) {
        enif_release_binary(&out);
        return;
    }

    if (len != (int32_t) out.size) {
        /* shrink binary if it was too large */
        enif_realloc_binary(&out, len);
    }
}
Esempio n. 9
0
char *ICUStringMgr::upperUTF8(char *buf, unsigned int maxlen) const {
	char *ret = buf;
	int max = (maxlen) ? maxlen : strlen(buf);
		
	UErrorCode err = U_ZERO_ERROR;
		
	if (!buf || !max) {
		return ret;
	}
		
	UChar *lowerStr = new UChar[max+10];
	UChar *upperStr = new UChar[max+10];
		
	u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
	if (err != U_ZERO_ERROR) {
//		SWLog::getSystemLog()->logError("from: %s", u_errorName(err));
		delete [] lowerStr;
		delete [] upperStr;
		return ret;
	}

	u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
	if (err != U_ZERO_ERROR) {
//		SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err));
		delete [] lowerStr;
		delete [] upperStr;
		return ret;
	}

	ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
		
	delete [] lowerStr;
	delete [] upperStr;
	return ret;
}
Esempio n. 10
0
U_CAPI int32_t U_EXPORT2
uspoof_getSkeletonUTF8(const USpoofChecker *sc,
                       uint32_t type,
                       const char *s,  int32_t length,
                       char *dest, int32_t destCapacity,
                       UErrorCode *status) {
    // Lacking a UTF-8 normalization API, just converting the input to
    // UTF-16 seems as good an approach as any.  In typical use, input will
    // be an identifier, which is to say not too long for stack buffers.
    if (U_FAILURE(*status)) {
        return 0;
    }
    // Buffers for the UChar form of the input and skeleton strings.
    UChar    smallInBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar   *inBuf = smallInBuf;
    UChar    smallOutBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar   *outBuf = smallOutBuf;

    int32_t  lengthInUChars = 0;
    int32_t  skelLengthInUChars = 0;
    int32_t  skelLengthInUTF8 = 0;
    
    u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars,
                  s, length, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        inBuf = static_cast<UChar *>(uprv_malloc((lengthInUChars+1)*sizeof(UChar)));
        if (inBuf == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            goto cleanup;
        }
        *status = U_ZERO_ERROR;
        u_strFromUTF8(inBuf, lengthInUChars+1, &lengthInUChars,
                      s, length, status);
    }
    
    skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars,
                                         outBuf, USPOOF_STACK_BUFFER_SIZE, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        outBuf = static_cast<UChar *>(uprv_malloc((skelLengthInUChars+1)*sizeof(UChar)));
        if (outBuf == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            goto cleanup;
        }
        *status = U_ZERO_ERROR;
        skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars,
                                         outBuf, skelLengthInUChars+1, status);
    }

    u_strToUTF8(dest, destCapacity, &skelLengthInUTF8,
                outBuf, skelLengthInUChars, status);

  cleanup:
    if (inBuf != smallInBuf) {
        uprv_free(inBuf);
    }
    if (outBuf != smallOutBuf) {
        uprv_free(outBuf);
    }
    return skelLengthInUTF8;
}
Esempio n. 11
0
U_CAPI int32_t U_EXPORT2
uspoof_getSkeletonUTF8(const USpoofChecker *sc,
                       uint32_t type,
                       const char *id,  int32_t length,
                       char *dest, int32_t destCapacity,
                       UErrorCode *status) {
    SpoofImpl::validateThis(sc, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id)));
    UnicodeString destStr;
    uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
    if (U_FAILURE(*status)) {
        return 0;
    }

    int32_t lengthInUTF8 = 0;
    u_strToUTF8(dest, destCapacity, &lengthInUTF8,
                destStr.getBuffer(), destStr.length(), status);
    return lengthInUTF8;
}
Esempio n. 12
0
int cq_select_all(struct dbconn con, const char *table, struct dlist **out,
        const char *conditions)
{
    int rc;
    char *query;
    const char *fmt = u8"* FROM %s %s";

    query = calloc(CQ_QLEN, sizeof(char));
    if (query == NULL)
        return -10;

    UChar *buf16 = calloc(CQ_QLEN, sizeof(UChar));
    if (buf16 == NULL) {
        free(query);
        return -11;
    }

    rc = u_snprintf(buf16, CQ_QLEN, fmt, table, conditions);
    if ((size_t) rc >= CQ_QLEN) {
        free(query);
        free(buf16);
        return 100;
    }

    UErrorCode status = U_ZERO_ERROR;
    u_strToUTF8(query, CQ_QLEN, NULL, buf16, u_strlen(buf16), &status);
    free(buf16);
    if (!U_SUCCESS(status)) {
        free(query);
        return 101;
    }

    rc = cq_select_query(con, out, query);
    return rc;
}
Esempio n. 13
0
static VALUE to_utf8(UChar *ustr, int32_t ulen) {
    char str[BUF_SIZE];
    int32_t len = 0;
    UErrorCode status = U_ZERO_ERROR;

    u_strToUTF8(str, BUF_SIZE, &len, ustr, ulen, &status);
    if (status == U_INVALID_CHAR_FOUND) len = 0;
    return rb_str_new(str, len);
}
Esempio n. 14
0
int cq_fields_to_utf8(char *buf, size_t buflen, size_t fieldc,
        char **fieldnames, bool usequotes)
{
    UChar *buf16;
    UErrorCode status = U_ZERO_ERROR;
    size_t num_left = fieldc;
    int rc = 0;

    if (num_left == 0)
        return 1;

    buf16 = calloc(buflen, sizeof(UChar));
    if (buf16 == NULL)
        return -1;

    for (size_t i = 0; i < fieldc; ++i) {
        UChar *temp = calloc(buflen, sizeof(UChar));
        if (temp == NULL) {
            rc = -2;
            break;
        }

        u_strFromUTF8(temp, buflen, NULL, fieldnames[i], strlen(fieldnames[i]),
                &status);
        if (!U_SUCCESS(status)) {
            rc = 2;
            free(temp);
            break;
        }

        bool isstr = false;
        if (usequotes) {
            for (int32_t j = 0; j < u_strlen(temp); ++j) {
                if (!isdigit(temp[j])) {
                    isstr = true;
                    break;
                }
            }
        }

        if (isstr) u_strcat(buf16, u"'");
        u_strcat(buf16, temp);
        if (isstr) u_strcat(buf16, u"'");
        free(temp);
        if (--num_left > 0) {
            u_strcat(buf16, u",");
        }
    }

    u_strToUTF8(buf, buflen, NULL, buf16, u_strlen(buf16), &status);
    if (!U_SUCCESS(status))
        rc = 3;

    free(buf16);
    return rc;
}
static void TestFPos_SkelWithSeconds()
{
	const LocaleAndSkeletonItem * locSkelItemPtr;
	for (locSkelItemPtr = locSkelItems; locSkelItemPtr->locale != NULL; locSkelItemPtr++) {
	    UDateIntervalFormat* udifmt;
	    UChar   ubuf[kSizeUBuf];
	    int32_t ulen, uelen;
	    UErrorCode status = U_ZERO_ERROR;
	    
	    u_strFromUTF8(ubuf, kSizeUBuf, &ulen, locSkelItemPtr->skeleton, -1, &status);
	    udifmt = udtitvfmt_open(locSkelItemPtr->locale, ubuf, ulen, zoneGMT, -1, &status);
	    if ( U_FAILURE(status) ) {
           log_data_err("FAIL: udtitvfmt_open for locale %s, skeleton %s: %s\n",
                    locSkelItemPtr->locale, locSkelItemPtr->skeleton, u_errorName(status));
	    } else {
			const double * deltasPtr = deltas;
			const ExpectPosAndFormat * expectedPtr = locSkelItemPtr->expected;
			for (; *deltasPtr >= 0.0; deltasPtr++, expectedPtr++) {
			    UFieldPosition fpos = { locSkelItemPtr->fieldToCheck, 0, 0 };
			    UChar uebuf[kSizeUBuf];
			    char bbuf[kSizeBBuf];
			    char bebuf[kSizeBBuf];
			    status = U_ZERO_ERROR;
			    uelen = u_unescape(expectedPtr->format, uebuf, kSizeUBuf);
			    ulen = udtitvfmt_format(udifmt, startTime, startTime + *deltasPtr, ubuf, kSizeUBuf, &fpos, &status);
			    if ( U_FAILURE(status) ) {
			        log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %.1f: %s\n",
			             locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr, u_errorName(status));
			    } else if ( ulen != uelen || u_strncmp(ubuf,uebuf,uelen) != 0 ||
			                fpos.beginIndex != expectedPtr->posBegin || fpos.endIndex != expectedPtr->posEnd ) {
			        u_strToUTF8(bbuf, kSizeBBuf, NULL, ubuf, ulen, &status);
			        u_strToUTF8(bebuf, kSizeBBuf, NULL, uebuf, uelen, &status); // convert back to get unescaped string
			        log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %12.1f, expect %d-%d \"%s\", get %d-%d \"%s\"\n",
			             locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr,
			             expectedPtr->posBegin, expectedPtr->posEnd, bebuf,
			             fpos.beginIndex, fpos.endIndex, bbuf);
			    }
			}
	        udtitvfmt_close(udifmt);
	    }
    }
}
Esempio n. 16
0
File: icu.cpp Progetto: bd808/hhvm
String u8(const UChar *u16, int32_t u16_len, UErrorCode &error) {
  error = U_ZERO_ERROR;
  if (u16_len == 0) {
    return empty_string();
  }
  int32_t outlen;
  u_strToUTF8(nullptr, 0, &outlen, u16, u16_len, &error);
  if (error != U_BUFFER_OVERFLOW_ERROR) {
    return String();
  }
  String ret(outlen + 1, ReserveString);
  char *out = ret.get()->mutableData();
  error = U_ZERO_ERROR;
  u_strToUTF8(out, outlen + 1, &outlen, u16, u16_len, &error);
  if (U_FAILURE(error)) {
    return String();
  }
  ret.setSize(outlen);
  return ret;
}
Esempio n. 17
0
 inline const char* v8_String_to_utf8(v8::Local<v8::String> string) {
     UErrorCode error_code = U_ZERO_ERROR;
     uint16_t src[characters*2];
     static char buffer[characters*4];
     int32_t buffer_length;
     string->Write(src, 0, characters*2);
     u_strToUTF8(buffer, characters*4, &buffer_length, src, std::min(characters*2, string->Length()), &error_code);
     if (error_code != U_ZERO_ERROR) {
         throw UTF16_to_UTF8_Conversion_Error(error_code);
     }
     return buffer;
 }
Esempio n. 18
0
    UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
            : UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status),
              utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) {
        if (U_SUCCESS(status)) {
            UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape();
            set.applyPattern(pattern, status);
            prefrozen=set;
            if(0==strcmp(options[FAST_TYPE].value, "fast")) {
                set.freeze();
            }

            int32_t inputLength;
            UPerfTest::getBuffer(inputLength, status);
            if(U_SUCCESS(status) && inputLength>0) {
                countInputCodePoints = u_countChar32(buffer, bufferLen);

                countSpans();

                // Preflight the UTF-8 length and allocate utf8.
                u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
                if(status==U_BUFFER_OVERFLOW_ERROR) {
                    utf8=(char *)malloc(utf8Length);
                    if(utf8!=NULL) {
                        status=U_ZERO_ERROR;
                        u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
                    } else {
                        status=U_MEMORY_ALLOCATION_ERROR;
                    }
                }

                if(verbose) {
                    printf("code points:%ld  len16:%ld  len8:%ld  spans:%ld  "
                           "cp/span:%.3g  UChar/span:%.3g  B/span:%.3g  B/cp:%.3g\n",
                           (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount,
                           (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount,
                           (double)utf8Length/countInputCodePoints);
                }
            }
        }
    }
Esempio n. 19
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUTF8(const USpoofChecker *sc,
                 const char *text, int32_t length,
                 int32_t *position,
                 UErrorCode *status) {

    if (U_FAILURE(*status)) {
        return 0;
    }
    UChar stackBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar* text16 = stackBuf;
    int32_t len16;
    
    u_strFromUTF8(text16, USPOOF_STACK_BUFFER_SIZE, &len16, text, length, status);
    if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
        return 0;
    }
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        text16 = static_cast<UChar *>(uprv_malloc(len16 * sizeof(UChar) + 2));
        if (text16 == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        *status = U_ZERO_ERROR;
        u_strFromUTF8(text16, len16+1, NULL, text, length, status);
    }

    int32_t position16 = -1;
    int32_t result = uspoof_check(sc, text16, len16, &position16, status);
    if (U_FAILURE(*status)) {
        return 0;
    }

    if (position16 > 0) {
        // Translate a UTF-16 based error position back to a UTF-8 offset.
        // u_strToUTF8() in preflight mode is an easy way to do it.
        U_ASSERT(position16 <= len16);
        u_strToUTF8(NULL, 0, position, text16, position16, status);
        if (position > 0) {
            // position is the required buffer length from u_strToUTF8, which includes
            // space for a terminating NULL, which we don't want, hence the -1.
            *position -= 1;
        }
        *status = U_ZERO_ERROR;   // u_strToUTF8, above sets BUFFER_OVERFLOW_ERROR.
    }

    if (text16 != stackBuf) {
        uprv_free(text16);
    }
    return result;
    
}
int helper_unicode_to_utf8(char *src, int src_len, char *dest, int dest_size)
{
	int32_t size = 0;
	UErrorCode status = 0;
	UChar *unicode_src = (UChar *)src;

	u_strToUTF8(dest, dest_size, &size, unicode_src, -1, &status);
	h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
			"u_strToUTF8() Failed(%s)", u_errorName(status));

	dest[size]='\0';
	return CTS_SUCCESS;
}
Esempio n. 21
0
/*----------------------------------------------------------------------------------------------
	This method uses an ICU function to convert a string from UTF-16 to UTF-8.

	Assumptions:
		If sourceLen is -1, it will be computed (by ICU)

	Exit conditions:
		<text>

	Parameters:
		<text>

	Return value:
		The number of characters required to store the fully-converted string
			(which may be greater than targetLen)
----------------------------------------------------------------------------------------------*/
int UnicodeConverter::Convert(const UChar* source, int sourceLen,
	char* target, int targetLen)
{
	UErrorCode status = U_ZERO_ERROR;
	int32_t spaceRequiredForData;

	u_strToUTF8(target, targetLen, &spaceRequiredForData, source, sourceLen, &status);

	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
		throw std::runtime_error("Unable to convert from UTF-16 to UTF-8");

	return spaceRequiredForData;
}
Esempio n. 22
0
void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len,
                            const char **cstr)
{
    int32_t len1 = 0, len2 = 0;
    UErrorCode status = U_ZERO_ERROR;

    if (iter->org_start < iter->utf16_base)
    {
        iter->utf8_base = 0;
        iter->utf16_base = 0;
    }
    u_strToUTF8(0, 0, &len1,
                iter->org->utf16 + iter->utf16_base,
                iter->org_start - iter->utf16_base,
                &status);

    status = U_ZERO_ERROR;

    *start = len1 + iter->utf8_base;

    u_strToUTF8(0, 0, &len2,
                iter->org->utf16 + iter->utf16_base,
                iter->org_start - iter->utf16_base + iter->org_len,
                &status);

    *len = len2 - len1;

    if (cstr)
    {
        if (!iter->org8)
            iter->org8 = icu_buf_utf8_create(0);
        status = U_ZERO_ERROR;
        icu_utf16_to_utf8(iter->org8, iter->org, &status);
        *cstr = icu_buf_utf8_to_cstr(iter->org8);
    }
    iter->utf8_base = *start;
    iter->utf16_base = iter->org_start;
}
Esempio n. 23
0
void intl_convert_utf16_to_utf8(char** target, int* target_len,
                                const UChar* src, int  src_len,
                                UErrorCode*  status) {
  char* dst_buf = NULL;
  int32_t dst_len;

  /* Determine required destination buffer size (pre-flighting). */
  *status = U_ZERO_ERROR;
  u_strToUTF8(NULL, 0, &dst_len, src, src_len, status);

  /* Bail out if an unexpected error occured.
   * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
   * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string
   * is empty).
   */
  if (*status != U_BUFFER_OVERFLOW_ERROR &&
      *status != U_STRING_NOT_TERMINATED_WARNING) {
    return;
  }

  // Allocate memory for the destination buffer (it will be zero-terminated).
  dst_buf = (char *)malloc(dst_len + 1);

  /* Convert source string from UTF-16 to UTF-8. */
  *status = U_ZERO_ERROR;
  u_strToUTF8(dst_buf, dst_len, NULL, src, src_len, status);
  if (U_FAILURE(*status)) {
    free(dst_buf);
    return;
  }

  /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
  *status = U_ZERO_ERROR;

  dst_buf[dst_len] = 0;
  *target = dst_buf;
  *target_len = dst_len;
}
Esempio n. 24
0
/* Writing Functions */
static void 
string_write_java(struct SResource *res,UErrorCode *status) {       
    if(uprv_strcmp(srBundle->fKeys+res->fKey,"%%UCARULES")==0 ){
        char fileName[1024] ={0};
        const char* file = "UCARules.utf8";
        FileStream* datFile = NULL;
        const char* type = "new ICUListResourceBundle.ResourceString(";
        char* dest  = (char*) uprv_malloc( 8 * res->u.fString.fLength);
        int32_t len = 0;
        if(outDir){
            uprv_strcat(fileName,outDir);
            if(outDir[uprv_strlen(outDir)-1]!=U_FILE_SEP_CHAR){
                uprv_strcat(fileName,U_FILE_SEP_STRING);
            }
        }
        uprv_strcat(fileName,file);/* UCARULES.utf8 UTF-8 file */
        
        write_tabs(out);

        T_FileStream_write(out, type, (int32_t)uprv_strlen(type));
        T_FileStream_write(out, "\"", 1);
        T_FileStream_write(out, file, (int32_t)uprv_strlen(file));
        T_FileStream_write(out, "\")\n", 3);
        datFile=T_FileStream_open(fileName,"w");
        
        if(!dest){
            *status=U_MEMORY_ALLOCATION_ERROR;
        }
        
        u_strToUTF8(dest,8*res->u.fString.fLength,&len,res->u.fString.fChars,res->u.fString.fLength,status);
        if(U_FAILURE(*status)){
            T_FileStream_close(datFile);
            uprv_free(dest);
            return;
        }
        T_FileStream_write(datFile,dest,len);
        T_FileStream_close(datFile);
        uprv_free(dest);
           
    }else{
        str_write_java(res->u.fString.fChars,res->u.fString.fLength,TRUE,status);

        if(uprv_strcmp(srBundle->fKeys+res->fKey,"Rule")==0){
            UChar* buf = (UChar*) uprv_malloc(sizeof(UChar)*res->u.fString.fLength);
            uprv_memcpy(buf,res->u.fString.fChars,res->u.fString.fLength);      
            uprv_free(buf);
        }
    }

}
Esempio n. 25
0
inline void writeUTF8String(std::ostream& output, const UChar *str, size_t len = 0) {
	if (len == 0) {
		len = u_strlen(str);
	}

	std::vector<char> buffer(len * 4);
	int32_t olen = 0;
	UErrorCode status = U_ZERO_ERROR;
	u_strToUTF8(&buffer[0], len * 4 - 1, &olen, str, len, &status);

	uint16_t cs = static_cast<uint16_t>(olen);
	writeRaw(output, cs);
	output.write(&buffer[0], cs);
}
Esempio n. 26
0
/* {{{ intl_convert_utf16_to_utf8
 * Convert given string from UTF-16 to UTF-8.
 *
 * @param source      String to convert.
 * @param source_len  Length of the source string.
 * @param status      Conversion status.
 *
 * @return zend_string
 */
zend_string* intl_convert_utf16_to_utf8(
	const UChar* src,    int32_t  src_len,
	UErrorCode*  status )
{
	zend_string* dst;
	int32_t      dst_len;

	/* Determine required destination buffer size (pre-flighting). */
	*status = U_ZERO_ERROR;
	u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );

	/* Bail out if an unexpected error occurred.
	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
	 */
	if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
		return NULL;

	/* Allocate memory for the destination buffer (it will be zero-terminated). */
	dst = zend_string_alloc(dst_len, 0);

	/* Convert source string from UTF-8 to UTF-16. */
	*status = U_ZERO_ERROR;
	u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
	if( U_FAILURE( *status ) )
	{
		zend_string_free(dst);
		return NULL;
	}

	/* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
	*status = U_ZERO_ERROR;

	ZSTR_VAL(dst)[dst_len] = 0;
	return dst;
}
Esempio n. 27
0
/*
 * Generate a text file with spaces in it from a file without.
 */
int generateFile(const UChar *chars, int32_t length) {
    Locale root("");
    UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(chars, length);
    UErrorCode status = U_ZERO_ERROR;
    
    UnicodeSet complexContext(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status);
    BreakIterator *breakIter = BreakIterator::createWordInstance(root, status);
    breakIter->adoptText(noSpaceIter);
    char outbuf[1024];
    int32_t strlength;
    UChar bom = 0xFEFF;
    
    printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &bom, 1, &status));
    int32_t prevbreak = 0;
    while (U_SUCCESS(status)) {
        int32_t nextbreak = breakIter->next();
        if (nextbreak == BreakIterator::DONE) {
            break;
        }
        printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &chars[prevbreak],
                                    nextbreak-prevbreak, &status));
        if (nextbreak > 0 && complexContext.contains(chars[nextbreak-1])
            && complexContext.contains(chars[nextbreak])) {
            printf(" ");
        }
        prevbreak = nextbreak;
    }
    
    if (U_FAILURE(status)) {
        fprintf(stderr, "generate failed: %s\n", u_errorName(status));
        return status;
    }
    else {
        return 0;
    }
}
Esempio n. 28
0
// There are quicker ways to do this conversion, but it's necessary to follow
// this to match the functionality of fbcode/multifeed/text/TokenizeTextMap.cpp.
std::string icuStringToUTF8(const UnicodeString& ustr) {
  UErrorCode status = U_ZERO_ERROR;
  int32_t bufSize = 0;
  std::string result;

  // Calculate the size of the buffer needed to hold ustr, converted to UTF-8.
  u_strToUTF8(NULL, 0, &bufSize, ustr.getBuffer(), ustr.length(), &status);
  if (status != U_BUFFER_OVERFLOW_ERROR &&
      status != U_STRING_NOT_TERMINATED_WARNING) {
    return result;
  }

  result.resize(bufSize);

  status = U_ZERO_ERROR;
  u_strToUTF8(&result[0], bufSize, NULL, ustr.getBuffer(), ustr.length(),
              &status);

  if (U_FAILURE(status)) {
    result.clear();
  }

  return result;
}
Esempio n. 29
0
 int add_string_attribute(int n, v8::Local<v8::Value> value) const {
     uint16_t source[(max_dbf_field_length+2)*2];
     char dest[(max_dbf_field_length+1)*4];
     memset(source, 0, (max_dbf_field_length+2)*4);
     memset(dest, 0, (max_dbf_field_length+1)*4);
     int32_t dest_length;
     UErrorCode error_code = U_ZERO_ERROR;
     value->ToString()->Write(source, 0, max_dbf_field_length+1);
     u_strToUTF8(dest, m_fields[n].width(), &dest_length, source, std::min(max_dbf_field_length+1, value->ToString()->Length()), &error_code);
     if (error_code == U_BUFFER_OVERFLOW_ERROR) {
         // thats ok, it just means we clip the text at that point
     } else if (U_FAILURE(error_code)) {
         throw std::runtime_error("UTF-16 to UTF-8 conversion failed");
     }
     return DBFWriteStringAttribute(m_dbf_handle, m_current_shape, n, dest);
 }
Esempio n. 30
0
 static void add_string_attribute(Osmium::Export::Shapefile* shapefile, int n, v8::Local<v8::Value> value) {
     uint16_t source[(Osmium::Export::Shapefile::max_dbf_field_length+2)*2];
     char dest[(Osmium::Export::Shapefile::max_dbf_field_length+1)*4];
     memset(source, 0, (Osmium::Export::Shapefile::max_dbf_field_length+2)*4);
     memset(dest, 0, (Osmium::Export::Shapefile::max_dbf_field_length+1)*4);
     int32_t dest_length;
     UErrorCode error_code = U_ZERO_ERROR;
     value->ToString()->Write(source, 0, Osmium::Export::Shapefile::max_dbf_field_length+1);
     u_strToUTF8(dest, shapefile->field(n).width(), &dest_length, source, std::min(Osmium::Export::Shapefile::max_dbf_field_length+1, value->ToString()->Length()), &error_code);
     if (error_code == U_BUFFER_OVERFLOW_ERROR) {
         // thats ok, it just means we clip the text at that point
     } else if (U_FAILURE(error_code)) {
         throw std::runtime_error("UTF-16 to UTF-8 conversion failed");
     }
     shapefile->add_attribute(n, dest);
 }