示例#1
0
文件: icu.cpp 项目: bd808/hhvm
icu::UnicodeString u16(const char *u8, int32_t u8_len, UErrorCode &error,
                       UChar32 subst /* =0 */) {
  error = U_ZERO_ERROR;
  if (u8_len == 0) {
    return icu::UnicodeString();
  }
  int32_t outlen;
  if (subst) {
    u_strFromUTF8WithSub(nullptr, 0, &outlen, u8, u8_len,
                         subst, nullptr, &error);
  } else {
    u_strFromUTF8(nullptr, 0, &outlen, u8, u8_len, &error);
  }
  if (error != U_BUFFER_OVERFLOW_ERROR) {
    return icu::UnicodeString();
  }
  icu::UnicodeString ret;
  auto out = ret.getBuffer(outlen + 1);
  error = U_ZERO_ERROR;
  if (subst) {
    u_strFromUTF8WithSub(out, outlen + 1, &outlen, u8, u8_len,
                         subst, nullptr, &error);
  } else {
    u_strFromUTF8(out, outlen + 1, &outlen, u8, u8_len, &error);
  }
  ret.releaseBuffer(outlen);
  if (U_FAILURE(error)) {
    return icu::UnicodeString();
  }
  return ret;
}
示例#2
0
文件: icu.c 项目: Kielek/calibre
// Collator.strcmp {{{
static PyObject *
icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) {
    char *a_, *b_;
    int32_t asz, bsz;
    UChar *a, *b;
    UErrorCode status = U_ZERO_ERROR;
    UCollationResult res = UCOL_EQUAL;
  
    if (!PyArg_ParseTuple(args, "eses", "UTF-8", &a_, "UTF-8", &b_)) return NULL;
    
    asz = (int32_t)strlen(a_); bsz = (int32_t)strlen(b_);

    a = (UChar*)calloc(asz*4 + 1, sizeof(UChar));
    b = (UChar*)calloc(bsz*4 + 1, sizeof(UChar));


    if (a == NULL || b == NULL) return PyErr_NoMemory();

    u_strFromUTF8(a, asz*4 + 1, NULL, a_, asz, &status);
    u_strFromUTF8(b, bsz*4 + 1, NULL, b_, bsz, &status);
    PyMem_Free(a_); PyMem_Free(b_);

    if (U_SUCCESS(status))
        res = ucol_strcoll(self->collator, a, -1, b, -1);

    free(a); free(b);

    return Py_BuildValue("i", res);
} // }}}
static UChar *
ustring_from_utf8 (const gchar *utf8, int32_t *ustrLength)
{
  UChar *dest;
  int32_t destLength, utf8Length = strlen (utf8);
  UErrorCode errorCode;

  errorCode = 0;
  u_strFromUTF8 (NULL, 0, &destLength, utf8, utf8Length, &errorCode);
  if (errorCode != U_BUFFER_OVERFLOW_ERROR)
    {
      g_warning ("can't get the number of chars in UTF-8 string: %s",
		 u_errorName (errorCode));
      return NULL;
    }

  dest = g_malloc0_n (destLength + 1, sizeof(UChar));

  errorCode = 0;
  u_strFromUTF8 (dest, destLength + 1, NULL, utf8, utf8Length, &errorCode);
  if (errorCode != U_ZERO_ERROR)
    {
      g_free (dest);
      g_warning ("can't convert UTF-8 string to ustring: %s",
		 u_errorName (errorCode));
      return NULL;
    }

  *ustrLength = destLength;
  return dest;
}
示例#4
0
U_CAPI int32_t U_EXPORT2
uspoof_getSkeletonUTF8(const USpoofChecker *sc,
                       uint32_t type,
                       const char *s,  int32_t length,
                       char *dest, int32_t destCapacity,
                       UErrorCode *status) {
    // Lacking a UTF-8 normalization API, just converting the input to
    // UTF-16 seems as good an approach as any.  In typical use, input will
    // be an identifier, which is to say not too long for stack buffers.
    if (U_FAILURE(*status)) {
        return 0;
    }
    // Buffers for the UChar form of the input and skeleton strings.
    UChar    smallInBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar   *inBuf = smallInBuf;
    UChar    smallOutBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar   *outBuf = smallOutBuf;

    int32_t  lengthInUChars = 0;
    int32_t  skelLengthInUChars = 0;
    int32_t  skelLengthInUTF8 = 0;
    
    u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars,
                  s, length, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        inBuf = static_cast<UChar *>(uprv_malloc((lengthInUChars+1)*sizeof(UChar)));
        if (inBuf == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            goto cleanup;
        }
        *status = U_ZERO_ERROR;
        u_strFromUTF8(inBuf, lengthInUChars+1, &lengthInUChars,
                      s, length, status);
    }
    
    skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars,
                                         outBuf, USPOOF_STACK_BUFFER_SIZE, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        outBuf = static_cast<UChar *>(uprv_malloc((skelLengthInUChars+1)*sizeof(UChar)));
        if (outBuf == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            goto cleanup;
        }
        *status = U_ZERO_ERROR;
        skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars,
                                         outBuf, skelLengthInUChars+1, status);
    }

    u_strToUTF8(dest, destCapacity, &skelLengthInUTF8,
                outBuf, skelLengthInUChars, status);

  cleanup:
    if (inBuf != smallInBuf) {
        uprv_free(inBuf);
    }
    if (outBuf != smallOutBuf) {
        uprv_free(outBuf);
    }
    return skelLengthInUTF8;
}
示例#5
0
文件: icu_utf8.c 项目: nla/yaz
UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 *dest16,
                                    const char *src8cstr,
                                    UErrorCode *status)
{
    size_t src8cstr_len = 0;
    int32_t utf16_len = 0;

    *status = U_ZERO_ERROR;
    src8cstr_len = strlen(src8cstr);

    u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
                  &utf16_len,
                  src8cstr, src8cstr_len, status);

    /* check for buffer overflow, resize and retry */
    if (*status == U_BUFFER_OVERFLOW_ERROR)
    {
        icu_buf_utf16_resize(dest16, utf16_len * 2);
        *status = U_ZERO_ERROR;
        u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
                      &utf16_len,
                      src8cstr, src8cstr_len, status);
    }

    if (U_SUCCESS(*status) && utf16_len <= dest16->utf16_cap)
        dest16->utf16_len = utf16_len;
    else
        icu_buf_utf16_clear(dest16);

    return *status;
}
示例#6
0
//ret_icu_str must be freed after usage
int
icu_str_from_utf8(const char *utf_str,
    UChar **ret_icu_str,
    UErrorCode *ret_icu_err)
{
    int32_t icu_sz;

    SOL_NULL_CHECK(ret_icu_str, -EINVAL);
    SOL_NULL_CHECK(ret_icu_err, -EINVAL);

    icu_sz = 0;
    *ret_icu_str = NULL;
    *ret_icu_err = U_ZERO_ERROR;
    u_strFromUTF8(NULL, 0, &icu_sz, utf_str, -1, ret_icu_err);
    if (U_FAILURE(*ret_icu_err) && *ret_icu_err != U_BUFFER_OVERFLOW_ERROR)
        return -EINVAL;

    *ret_icu_str = calloc(icu_sz + 1, sizeof(UChar));
    SOL_NULL_CHECK(*ret_icu_str, -ENOMEM);

    *ret_icu_err = U_ZERO_ERROR;
    u_strFromUTF8(*ret_icu_str, icu_sz + 1, NULL, utf_str, -1,
        ret_icu_err);
    if (U_FAILURE(*ret_icu_err)) {
        free(*ret_icu_str);
        *ret_icu_str = NULL;
        return -EINVAL;
    }

    return 0;
}
示例#7
0
/** Lexical compare routine.
 *
 * Performs a lexical string compare on two normalized UTF-8 strings as
 * described in RFC 2608, section 6.4.
 *
 * @param[in] str1 - A pointer to string to be compared.
 * @param[in] str2 - A pointer to string to be compared.
 * @param[in] length - The maximum length to compare in bytes.
 *
 * @return Zero if @p str1 is equal to @p str2, less than zero if @p str1
 *    is greater than @p str2, greater than zero if @p str1 is less than
 *    @p str2.
 */
static int SLPCompareNormalizedString(const char * str1,
      const char * str2, size_t length)
{
#ifdef HAVE_ICU
   int result;
   UErrorCode uerr = 0;
   UChar * ustr1 = xmalloc((length + 1) * sizeof(UChar));
   UChar * ustr2 = xmalloc((length + 1) * sizeof(UChar));
   if (ustr1 && ustr2)
   {
      u_strFromUTF8(ustr1, (int32_t)length + 1, 0, str1,
            (int32_t)length, &uerr);
      u_strFromUTF8(ustr2, (int32_t)length + 1, 0, str2,
            (int32_t)length, &uerr);
   }
   if (ustr1 != 0 && ustr2 != 0 && uerr == 0)
      result = (int)u_strncasecmp(ustr1, ustr2, (int32_t)length, 0);
   else
      result = strncasecmp(str1, str2, length);
   xfree(ustr1);
   xfree(ustr2);
   return result;
#else
   return strncasecmp(str1, str2, length);
#endif /* HAVE_ICU */
}
示例#8
0
/* {{{ intl_convert_utf8_to_utf16
 * Convert given string from UTF-8 to UTF-16 to *target buffer.
 *
 * It *target is NULL then we allocate a large enough buffer,
 * store the converted string into it, and make target point to it.
 *
 * Otherwise, if *target is non-NULL, we assume that it points to a
 * dynamically allocated buffer of *target_len bytes length.
 * In this case the buffer will be used to store the converted string to,
 * and may be resized (made larger) if needed.
 *
 * Note that ICU uses int32_t as string length and PHP uses size_t. While
 * it is not likely in practical situations to have strings longer than
 * INT32_MAX, these are different types and need to be handled carefully.
 *
 * @param target      Where to place the result.
 * @param target_len  Result length.
 * @param source      String to convert.
 * @param source_len  Length of the source string.
 * @param status      Conversion status.
 *
 * @return void       This function does not return anything.
 */
void intl_convert_utf8_to_utf16(
	UChar**     target, int32_t* target_len,
	const char* src,    size_t  src_len,
	UErrorCode* status )
{
	UChar*      dst_buf = NULL;
	int32_t     dst_len = 0;

	/* If *target is NULL determine required destination buffer size (pre-flighting).
	 * Otherwise, attempt to convert source string; if *target buffer is not large enough
	 * it will be resized appropriately.
	 */
	*status = U_ZERO_ERROR;

	if(src_len > INT32_MAX) {
		/* we can not fit this string */
		*status = U_BUFFER_OVERFLOW_ERROR;
		return;
	}

	u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );

	if( *status == U_ZERO_ERROR )
	{
		/* String is converted successfully */
		(*target)[dst_len] = 0;
		*target_len = dst_len;
		return;
	}

	/* Bail out if an unexpected error occurred.
	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
	 */
	if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
		return;

	/* Allocate memory for the destination buffer (it will be zero-terminated). */
	dst_buf = eumalloc( dst_len + 1 );

	/* Convert source string from UTF-8 to UTF-16. */
	*status = U_ZERO_ERROR;
	u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
	if( U_FAILURE( *status ) )
	{
		efree( dst_buf );
		return;
	}

	dst_buf[dst_len] = 0;

	if( *target )
		efree( *target );

	*target     = dst_buf;
	*target_len = dst_len;
}
示例#9
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUTF8(const USpoofChecker *sc,
                 const char *text, int32_t length,
                 int32_t *position,
                 UErrorCode *status) {

    if (U_FAILURE(*status)) {
        return 0;
    }
    UChar stackBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar* text16 = stackBuf;
    int32_t len16;
    
    u_strFromUTF8(text16, USPOOF_STACK_BUFFER_SIZE, &len16, text, length, status);
    if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
        return 0;
    }
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        text16 = static_cast<UChar *>(uprv_malloc(len16 * sizeof(UChar) + 2));
        if (text16 == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        *status = U_ZERO_ERROR;
        u_strFromUTF8(text16, len16+1, NULL, text, length, status);
    }

    int32_t position16 = -1;
    int32_t result = uspoof_check(sc, text16, len16, &position16, status);
    if (U_FAILURE(*status)) {
        return 0;
    }

    if (position16 > 0) {
        // Translate a UTF-16 based error position back to a UTF-8 offset.
        // u_strToUTF8() in preflight mode is an easy way to do it.
        U_ASSERT(position16 <= len16);
        u_strToUTF8(NULL, 0, position, text16, position16, status);
        if (position > 0) {
            // position is the required buffer length from u_strToUTF8, which includes
            // space for a terminating NULL, which we don't want, hence the -1.
            *position -= 1;
        }
        *status = U_ZERO_ERROR;   // u_strToUTF8, above sets BUFFER_OVERFLOW_ERROR.
    }

    if (text16 != stackBuf) {
        uprv_free(text16);
    }
    return result;
    
}
int helper_collation_str(const char *src, char *dest, int dest_size)
{
	HELPER_FN_CALL;
	int32_t size = 0;
	UErrorCode status = 0;
	UChar tmp_result[CTS_SQL_MAX_LEN];
	UCollator *collator;
	const char *region;

	region = vconf_get_str(VCONFKEY_REGIONFORMAT);
	HELPER_DBG("region %s", region);
	collator = ucol_open(region, &status);
	h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
			"ucol_open() Failed(%s)", u_errorName(status));

	if (U_FAILURE(status)){
		ERR("ucol_setAttribute Failed(%s)", u_errorName(status));
		ucol_close(collator);
		return CTS_ERR_ICU_FAILED;
	}

	u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, src, -1, &status);
	if (U_FAILURE(status)){
		ERR("u_strFromUTF8 Failed(%s)", u_errorName(status));
		ucol_close(collator);
		return CTS_ERR_ICU_FAILED;
	}
	size = ucol_getSortKey(collator, tmp_result, -1, (uint8_t *)dest, dest_size);
	ucol_close(collator);
	dest[size]='\0';

	return CTS_SUCCESS;
}
示例#11
0
static void to_utf16(VALUE string, UChar *ustr, int32_t *ulen) {
    UErrorCode status = U_ZERO_ERROR;

    string = StringValue(string);
    u_strFromUTF8(ustr, BUF_SIZE, ulen, RSTRING_PTR(string), RSTRING_LEN(string), &status);
    if (status == U_INVALID_CHAR_FOUND) ulen = 0;
}
示例#12
0
/**
 * NIFs
 */
inline void do_from_utf8(
    ErlNifBinary  in,
    ErlNifBinary& out, 
    int32_t& ulen,
    UErrorCode& status) 
{
    status = U_ZERO_ERROR;
    if (!enif_alloc_binary(FROM_ULEN(ulen), &out)) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    u_strFromUTF8(
        (UChar*) out.data,       /* dest */
        ulen,                    /* capacity */
        &ulen,                   /* len of result */
        (char*)   in.data,       /* src */
        (int32_t) in.size,       /* len of src */
        &status);                /* error code */

    if (U_FAILURE(status)) {
        enif_release_binary(&out);
        return;
    }

    if (FROM_ULEN(ulen) != out.size) {
        /* shrink binary if it was too large */
        enif_realloc_binary(&out, FROM_ULEN(ulen));
    }
}
示例#13
0
char *ICUStringMgr::upperUTF8(char *buf, unsigned int maxlen) const {
	char *ret = buf;
	int max = (maxlen) ? maxlen : strlen(buf);
		
	UErrorCode err = U_ZERO_ERROR;
		
	if (!buf || !max) {
		return ret;
	}
		
	UChar *lowerStr = new UChar[max+10];
	UChar *upperStr = new UChar[max+10];
		
	u_strFromUTF8(lowerStr, max+9, 0, buf, -1, &err);
	if (err != U_ZERO_ERROR) {
//		SWLog::getSystemLog()->logError("from: %s", u_errorName(err));
		delete [] lowerStr;
		delete [] upperStr;
		return ret;
	}

	u_strToUpper(upperStr, max+9, lowerStr, -1, 0, &err);
	if (err != U_ZERO_ERROR) {
//		SWLog::getSystemLog()->logError("upperCase: %s", u_errorName(err));
		delete [] lowerStr;
		delete [] upperStr;
		return ret;
	}

	ret = u_strToUTF8(ret, max, 0, upperStr, -1, &err);
		
	delete [] lowerStr;
	delete [] upperStr;
	return ret;
}
示例#14
0
static UChar * convertFromUTF8(UChar *outBuf, int32_t outBufCapacity, int32_t *outputLength,
                               const char *in, int32_t inLength, UErrorCode *status) {
    if (U_FAILURE(*status)) {
        return NULL;
    }
    UChar *dest = outBuf;
    u_strFromUTF8(dest, outBufCapacity, outputLength, in, inLength, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        dest = static_cast<UChar *>(uprv_malloc(*outputLength * sizeof(UChar)));
        if (dest == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        *status = U_ZERO_ERROR;
        u_strFromUTF8(dest, *outputLength, NULL, in, inLength, status);
    }
    return dest;
}
示例#15
0
文件: cquel.c 项目: dardevelin/cquel
int cq_fields_to_utf8(char *buf, size_t buflen, size_t fieldc,
        char **fieldnames, bool usequotes)
{
    UChar *buf16;
    UErrorCode status = U_ZERO_ERROR;
    size_t num_left = fieldc;
    int rc = 0;

    if (num_left == 0)
        return 1;

    buf16 = calloc(buflen, sizeof(UChar));
    if (buf16 == NULL)
        return -1;

    for (size_t i = 0; i < fieldc; ++i) {
        UChar *temp = calloc(buflen, sizeof(UChar));
        if (temp == NULL) {
            rc = -2;
            break;
        }

        u_strFromUTF8(temp, buflen, NULL, fieldnames[i], strlen(fieldnames[i]),
                &status);
        if (!U_SUCCESS(status)) {
            rc = 2;
            free(temp);
            break;
        }

        bool isstr = false;
        if (usequotes) {
            for (int32_t j = 0; j < u_strlen(temp); ++j) {
                if (!isdigit(temp[j])) {
                    isstr = true;
                    break;
                }
            }
        }

        if (isstr) u_strcat(buf16, u"'");
        u_strcat(buf16, temp);
        if (isstr) u_strcat(buf16, u"'");
        free(temp);
        if (--num_left > 0) {
            u_strcat(buf16, u",");
        }
    }

    u_strToUTF8(buf, buflen, NULL, buf16, u_strlen(buf16), &status);
    if (!U_SUCCESS(status))
        rc = 3;

    free(buf16);
    return rc;
}
示例#16
0
文件: unicode.hpp 项目: AMDmi3/osmium
 inline v8::Local<v8::String> utf8_to_v8_String(const char* cstring) {
     UErrorCode error_code = U_ZERO_ERROR;
     UChar dest[characters*2];
     int32_t dest_length;
     u_strFromUTF8(dest, characters*2, &dest_length, cstring, -1, &error_code);
     if (error_code != U_ZERO_ERROR) {
         throw UTF8_to_UTF16_Conversion_Error(error_code);
     }
     return v8::String::New(dest, dest_length);
 }
示例#17
0
/*----------------------------------------------------------------------------------------------
	This method uses an ICU function to convert a string from UTF-8 to UTF-16.

	Assumptions:
		If sourceLen is -1, it will be computed (by ICU)

	Exit conditions:
		<text>

	Parameters:
		<text>

	Return value:
		The number of characters required to store the fully-converted string
			(which may be greater than targetLen)
----------------------------------------------------------------------------------------------*/
int UnicodeConverter::Convert(const char* source, int sourceLen,
	UChar* target, int targetLen)
{
	UErrorCode status = U_ZERO_ERROR;
	int32_t spaceRequiredForData;

	u_strFromUTF8(target, targetLen, &spaceRequiredForData, source, sourceLen, &status);

	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
		throw std::runtime_error("Unable to convert from UTF-8 to UTF-16");

	return spaceRequiredForData;
}
示例#18
0
inline UString readUTF8String(S& input) {
	uint16_t len = 0;
	readRaw(input, len);

	UString rv(len, 0);
	std::vector<char> buffer(len);
	input.read(&buffer[0], len);

	int32_t olen = 0;
	UErrorCode status = U_ZERO_ERROR;
	u_strFromUTF8(&rv[0], len, &olen, &buffer[0], len, &status);

	rv.resize(olen);

	return rv;
}
示例#19
0
文件: icu.c 项目: Kielek/calibre
// Collator.sort_key {{{
static PyObject *
icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) {
    char *input;
    int32_t sz;
    UChar *buf;
    uint8_t *buf2;
    PyObject *ans;
    int32_t key_size;
    UErrorCode status = U_ZERO_ERROR;
  
    if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL;

    sz = (int32_t)strlen(input);

    buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));

    if (buf == NULL) return PyErr_NoMemory();

    u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status);
    PyMem_Free(input);

    if (U_SUCCESS(status)) {
        buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t));
        if (buf2 == NULL) return PyErr_NoMemory();

        key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1);

        if (key_size == 0) {
            ans = PyBytes_FromString("");
        } else {
            if (key_size >= 7*sz+1) {
                free(buf2);
                buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t));
                if (buf2 == NULL) return PyErr_NoMemory();
                ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1);
            }
            ans = PyBytes_FromString((char *)buf2);
        }
        free(buf2);
    } else ans = PyBytes_FromString("");

    free(buf);
    if (ans == NULL) return PyErr_NoMemory();

    return ans;
} // }}}
int helper_normalize_str(const char *src, char *dest, int dest_size)
{
	int type = CTS_LANG_OTHERS;
	int32_t size;
	UErrorCode status = 0;
	UChar tmp_result[CTS_SQL_MAX_LEN*2];
	UChar result[CTS_SQL_MAX_LEN*2];
	int i = 0;
	int j = 0;
	int str_len = strlen(src);
	int char_len = 0;

	for (i=0;i<str_len;i+=char_len) {
		char char_src[10];
		char_len = check_utf8(src[i]);
		memcpy(char_src, &src[i], char_len);
		char_src[char_len] = '\0';

		u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, char_src, -1, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strFromUTF8() Failed(%s)", u_errorName(status));

		u_strToLower(tmp_result, array_sizeof(tmp_result), tmp_result, -1, NULL, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strToLower() Failed(%s)", u_errorName(status));

		size = unorm_normalize(tmp_result, -1, UNORM_NFD, 0,
				(UChar *)result, array_sizeof(result), &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"unorm_normalize(%s) Failed(%s)", char_src, u_errorName(status));

		if (0 == i)
			type = helper_check_language(result);
		helper_extra_normalize(result, size);

		u_strToUTF8(&dest[j], dest_size-j, &size, result, -1, &status);
		h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
				"u_strToUTF8() Failed(%s)", u_errorName(status));
		j += size;
		dest[j++] = 0x01;
	}
	dest[j]='\0';
	HELPER_DBG("src(%s) is transformed(%s)", src, dest);
	return type;
}
示例#21
0
文件: icu.c 项目: Kielek/calibre
// title {{{
static PyObject *
icu_title(PyObject *self, PyObject *args) {
    char *input, *ans, *buf3 = NULL;
    const char *loc;
    int32_t sz;
    UChar *buf, *buf2;
    PyObject *ret;
    UErrorCode status = U_ZERO_ERROR;
  

    if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL;
    
    sz = (int32_t)strlen(input);

    buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));
    buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar));


    if (buf == NULL || buf2 == NULL) return PyErr_NoMemory();

    u_strFromUTF8(buf, sz*4, NULL, input, sz, &status);
    u_strToTitle(buf2, sz*8, buf, -1, NULL, loc, &status);

    ans = input;
    sz = u_strlen(buf2);
    free(buf);

    if (U_SUCCESS(status) && sz > 0) {
        buf3 = (char*)calloc(sz*5+1, sizeof(char));
        if (buf3 == NULL) return PyErr_NoMemory();
        u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status);
        if (U_SUCCESS(status)) ans = buf3;
    }

    ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace");
    if (ret == NULL) return PyErr_NoMemory();

    free(buf2);
    if (buf3 != NULL) free(buf3);
    PyMem_Free(input);

    return ret;
} // }}}
static void TestFPos_SkelWithSeconds()
{
	const LocaleAndSkeletonItem * locSkelItemPtr;
	for (locSkelItemPtr = locSkelItems; locSkelItemPtr->locale != NULL; locSkelItemPtr++) {
	    UDateIntervalFormat* udifmt;
	    UChar   ubuf[kSizeUBuf];
	    int32_t ulen, uelen;
	    UErrorCode status = U_ZERO_ERROR;
	    
	    u_strFromUTF8(ubuf, kSizeUBuf, &ulen, locSkelItemPtr->skeleton, -1, &status);
	    udifmt = udtitvfmt_open(locSkelItemPtr->locale, ubuf, ulen, zoneGMT, -1, &status);
	    if ( U_FAILURE(status) ) {
           log_data_err("FAIL: udtitvfmt_open for locale %s, skeleton %s: %s\n",
                    locSkelItemPtr->locale, locSkelItemPtr->skeleton, u_errorName(status));
	    } else {
			const double * deltasPtr = deltas;
			const ExpectPosAndFormat * expectedPtr = locSkelItemPtr->expected;
			for (; *deltasPtr >= 0.0; deltasPtr++, expectedPtr++) {
			    UFieldPosition fpos = { locSkelItemPtr->fieldToCheck, 0, 0 };
			    UChar uebuf[kSizeUBuf];
			    char bbuf[kSizeBBuf];
			    char bebuf[kSizeBBuf];
			    status = U_ZERO_ERROR;
			    uelen = u_unescape(expectedPtr->format, uebuf, kSizeUBuf);
			    ulen = udtitvfmt_format(udifmt, startTime, startTime + *deltasPtr, ubuf, kSizeUBuf, &fpos, &status);
			    if ( U_FAILURE(status) ) {
			        log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %.1f: %s\n",
			             locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr, u_errorName(status));
			    } else if ( ulen != uelen || u_strncmp(ubuf,uebuf,uelen) != 0 ||
			                fpos.beginIndex != expectedPtr->posBegin || fpos.endIndex != expectedPtr->posEnd ) {
			        u_strToUTF8(bbuf, kSizeBBuf, NULL, ubuf, ulen, &status);
			        u_strToUTF8(bebuf, kSizeBBuf, NULL, uebuf, uelen, &status); // convert back to get unescaped string
			        log_err("FAIL: udtitvfmt_format for locale %s, skeleton %s, delta %12.1f, expect %d-%d \"%s\", get %d-%d \"%s\"\n",
			             locSkelItemPtr->locale, locSkelItemPtr->skeleton, *deltasPtr,
			             expectedPtr->posBegin, expectedPtr->posEnd, bebuf,
			             fpos.beginIndex, fpos.endIndex, bbuf);
			    }
			}
	        udtitvfmt_close(udifmt);
	    }
    }
}
示例#23
0
MojErr MojDbTextUtils::strToUnicode(const MojString& src, UnicodeVec& destOut)
{
	MojErr err = destOut.resize(src.length() * 2);
	MojErrCheck(err);
	MojInt32 destCapacity = 0;
	MojInt32 destLength = 0;
	do {
		UChar* dest = NULL;
		err = destOut.begin(dest);
		MojErrCheck(err);
		destCapacity = (MojInt32) destOut.size();
		UErrorCode status = U_ZERO_ERROR;
		u_strFromUTF8(dest, destCapacity, &destLength, src.data(), (MojInt32) src.length(), &status);
		if (status != U_BUFFER_OVERFLOW_ERROR)
			MojUnicodeErrCheck(status);
		err = destOut.resize(destLength);
		MojErrCheck(err);
	} while (destLength > destCapacity);

	return MojErrNone;
}
示例#24
0
文件: psl.c 项目: jcajka/libpsl
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
{
	if (_str_is_ascii(e->label_buf))
		return;

	/* IDNA2008 UTS#46 punycode conversion */
	if (idna) {
		char lookupname[128] = "";
		UErrorCode status = 0;
		UIDNAInfo info = UIDNA_INFO_INITIALIZER;
		UChar utf16_dst[128], utf16_src[128];
		int32_t utf16_src_length;

		u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status);
		if (U_SUCCESS(status)) {
			int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
			if (U_SUCCESS(status)) {
				u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
				if (U_SUCCESS(status)) {
					if (strcmp(e->label_buf, lookupname)) {
						_psl_entry_t suffix, *suffixp;

						/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
						_suffix_init(&suffix, lookupname, strlen(lookupname));
						suffix.wildcard = e->wildcard;
						suffixp = _vector_get(v, _vector_add(v, &suffix));
						suffixp->label = suffixp->label_buf; /* set label to changed address */
					} /* else ignore */
				} /* else
					fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
			} /* else
				fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */
		} /* else
			fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
	}
}
void
StringCaseTest::TestCasingImpl(const UnicodeString &input,
                               const UnicodeString &output,
                               int32_t whichCase,
                               void *iter, const char *localeID, uint32_t options) {
    // UnicodeString
    UnicodeString result;
    const char *name;
    Locale locale(localeID);

    result=input;
    switch(whichCase) {
    case TEST_LOWER:
        name="toLower";
        result.toLower(locale);
        break;
    case TEST_UPPER:
        name="toUpper";
        result.toUpper(locale);
        break;
#if !UCONFIG_NO_BREAK_ITERATION
    case TEST_TITLE:
        name="toTitle";
        result.toTitle((BreakIterator *)iter, locale, options);
        break;
#endif
    case TEST_FOLD:
        name="foldCase";
        result.foldCase(options);
        break;
    default:
        name="";
        break; // won't happen
    }
    if(result!=output) {
        dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
    }
#if !UCONFIG_NO_BREAK_ITERATION
    if(whichCase==TEST_TITLE && options==0) {
        result=input;
        result.toTitle((BreakIterator *)iter, locale);
        if(result!=output) {
            dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
        }
    }
#endif

    // UTF-8
    char utf8In[100], utf8Out[100];
    int32_t utf8InLength, utf8OutLength, resultLength;
    UChar *buffer;

    IcuTestErrorCode errorCode(*this, "TestCasingImpl");
    LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
#if !UCONFIG_NO_BREAK_ITERATION
    if(iter!=NULL) {
        // Clone the break iterator so that the UCaseMap can safely adopt it.
        UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
        ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
    }
#endif

    u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
    switch(whichCase) {
    case TEST_LOWER:
        name="ucasemap_utf8ToLower";
        utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
    case TEST_UPPER:
        name="ucasemap_utf8ToUpper";
        utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
#if !UCONFIG_NO_BREAK_ITERATION
    case TEST_TITLE:
        name="ucasemap_utf8ToTitle";
        utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
#endif
    case TEST_FOLD:
        name="ucasemap_utf8FoldCase";
        utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
    default:
        name="";
        utf8OutLength=0;
        break; // won't happen
    }
    buffer=result.getBuffer(utf8OutLength);
    u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
    result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);

    if(errorCode.isFailure()) {
        errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
        errorCode.reset();
    } else if(result!=output) {
        errln("error: %s() got a wrong result for a test case from casing.res", name);
        errln("expected \"" + output + "\" got \"" + result + "\"" );
    }
}
示例#26
0
//-------------------------------------------------------------------------------
//      
//  Read a text data file, convert it from UTF-8 to UChars, and return the data
//    in one big UChar * buffer, which the caller must delete.
//
//    (Lightly modified version of a similar function in regextst.cpp)
//
//--------------------------------------------------------------------------------
UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
                                     UErrorCode &status) {
    UChar       *retPtr  = NULL;
    char        *fileBuf = NULL;
    const char  *fileBufNoBOM = NULL;
    FILE        *f       = NULL;

    ulen = 0;
    if (U_FAILURE(status)) {
        return retPtr;
    }

    //
    //  Open the file.
    //
    f = fopen(fileName, "rb");
    if (f == 0) {
        dataerrln("Error opening test data file %s\n", fileName);
        status = U_FILE_ACCESS_ERROR;
        return NULL;
    }
    //
    //  Read it in
    //
    int32_t            fileSize;
    int32_t            amtRead;
    int32_t            amtReadNoBOM;

    fseek( f, 0, SEEK_END);
    fileSize = ftell(f);
    fileBuf = new char[fileSize];
    fseek(f, 0, SEEK_SET);
    amtRead = fread(fileBuf, 1, fileSize, f);
    if (amtRead != fileSize || fileSize <= 0) {
        errln("Error reading test data file.");
        goto cleanUpAndReturn;
    }

    //
    // Look for a UTF-8 BOM on the data just read.
    //    The test data file is UTF-8.
    //    The BOM needs to be there in the source file to keep the Windows & 
    //    EBCDIC machines happy, so force an error if it goes missing.  
    //    Many Linux editors will silently strip it.
    //
    fileBufNoBOM = fileBuf + 3;
    amtReadNoBOM = amtRead - 3;
    if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
        // TODO:  restore this check.
        errln("Test data file %s is missing its BOM", fileName);
        fileBufNoBOM = fileBuf;
        amtReadNoBOM = amtRead;
    }

    //
    // Find the length of the input in UTF-16 UChars
    //  (by preflighting the conversion)
    //
    u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);

    //
    // Convert file contents from UTF-8 to UTF-16
    //
    if (status == U_BUFFER_OVERFLOW_ERROR) {
        // Buffer Overflow is expected from the preflight operation.
        status = U_ZERO_ERROR;
        retPtr = new UChar[ulen+1];
        u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
    }

cleanUpAndReturn:
    fclose(f);
    delete[] fileBuf;
    if (U_FAILURE(status)) {
        errln("ICU Error \"%s\"\n", u_errorName(status));
        delete retPtr;
        retPtr = NULL;
    };
    return retPtr;
}
示例#27
0
void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
               UErrorCode &status) {

    // Convert the user input data from UTF-8 to UChar (UTF-16)
    int32_t inputLen = 0;
    if (U_FAILURE(status)) {
        return;
    }
    u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        return;
    }
    status = U_ZERO_ERROR;
    fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
    if (fInput == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
    }
    u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);


    // Regular Expression to parse a line from Confusables.txt.  The expression will match
    // any line.  What was matched is determined by examining which capture groups have a match.
    //   Capture Group 1:  the source char
    //   Capture Group 2:  the replacement chars
    //   Capture Group 3-6  the table type, SL, SA, ML, or MA
    //   Capture Group 7:  A blank or comment only line.
    //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
    // Example Line from the confusables.txt source file:
    //   "1D702 ;	006E 0329 ;	SL	# MATHEMATICAL ITALIC SMALL ETA ... "
    fParseLine = uregex_openC(
        "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
        "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
           "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
        "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
        "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
        "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
        "|^(.*?)$",                   // OR match any line, which catches illegal lines.
        0, NULL, &status);

    // Regular expression for parsing a hex number out of a space-separated list of them.
    //   Capture group 1 gets the number, with spaces removed.
    fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);

    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
    //   given the syntax of the input.
    if (*fInput == 0xfeff) {
        *fInput = 0x20;
    }

    // Parse the input, one line per iteration of this loop.
    uregex_setText(fParseLine, fInput, inputLen, &status);
    while (uregex_findNext(fParseLine, &status)) {
        fLineNum++;
        if (uregex_start(fParseLine, 7, &status) >= 0) {
            // this was a blank or comment line.
            continue;
        }
        if (uregex_start(fParseLine, 8, &status) >= 0) {
            // input file syntax error.
            status = U_PARSE_ERROR;
            return;
        }

        // We have a good input line.  Extract the key character and mapping string, and
        //    put them into the appropriate mapping table.
        UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
                          uregex_end(fParseLine, 1, &status), status);

        int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
        int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
        uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);

        UnicodeString  *mapString = new UnicodeString();
        if (mapString == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        while (uregex_findNext(fParseHexNum, &status)) {
            UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
                                 uregex_end(fParseHexNum, 1, &status), status);
            mapString->append(c);
        }
        U_ASSERT(mapString->length() >= 1);

        // Put the map (value) string into the string pool
        // This a little like a Java intern() - any duplicates will be eliminated.
        SPUString *smapString = stringPool->addString(mapString, status);

        // Add the UChar32 -> string mapping to the appropriate table.
        UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable :
                            uregex_start(fParseLine, 4, &status) >= 0 ? fSATable :
                            uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable :
                            uregex_start(fParseLine, 6, &status) >= 0 ? fMATable :
                            NULL;
        U_ASSERT(table != NULL);
        uhash_iput(table, keyChar, smapString, &status);
        fKeySet->add(keyChar);
        if (U_FAILURE(status)) {
            return;
        }
    }

    // Input data is now all parsed and collected.
    // Now create the run-time binary form of the data.
    //
    // This is done in two steps.  First the data is assembled into vectors and strings,
    //   for ease of construction, then the contents of these collections are dumped
    //   into the actual raw-bytes data storage.

    // Build up the string array, and record the index of each string therein
    //  in the (build time only) string pool.
    // Strings of length one are not entered into the strings array.
    // At the same time, build up the string lengths table, which records the
    // position in the string table of the first string of each length >= 4.
    // (Strings in the table are sorted by length)
    stringPool->sort(status);
    fStringTable = new UnicodeString();
    fStringLengthsTable = new UVector(status);
    int32_t previousStringLength = 0;
    int32_t previousStringIndex  = 0;
    int32_t poolSize = stringPool->size();
    int32_t i;
    for (i=0; i<poolSize; i++) {
        SPUString *s = stringPool->getByIndex(i);
        int32_t strLen = s->fStr->length();
        int32_t strIndex = fStringTable->length();
        U_ASSERT(strLen >= previousStringLength);
        if (strLen == 1) {
            // strings of length one do not get an entry in the string table.
            // Keep the single string character itself here, which is the same
            //  convention that is used in the final run-time string table index.
            s->fStrTableIndex = s->fStr->charAt(0);
        } else {
            if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
                fStringLengthsTable->addElement(previousStringIndex, status);
                fStringLengthsTable->addElement(previousStringLength, status);
            }
            s->fStrTableIndex = strIndex;
            fStringTable->append(*(s->fStr));
        }
        previousStringLength = strLen;
        previousStringIndex  = strIndex;
    }
    // Make the final entry to the string lengths table.
    //   (it holds an entry for the _last_ string of each length, so adding the
    //    final one doesn't happen in the main loop because no longer string was encountered.)
    if (previousStringLength >= 4) {
        fStringLengthsTable->addElement(previousStringIndex, status);
        fStringLengthsTable->addElement(previousStringLength, status);
    }

    // Construct the compile-time Key and Value tables
    //
    // For each key code point, check which mapping tables it applies to,
    //   and create the final data for the key & value structures.
    //
    //   The four logical mapping tables are conflated into one combined table.
    //   If multiple logical tables have the same mapping for some key, they
    //     share a single entry in the combined table.
    //   If more than one mapping exists for the same key code point, multiple
    //     entries will be created in the table

    for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
        // It is an oddity of the UnicodeSet API that simply enumerating the contained
        //   code points requires a nested loop.
        for (UChar32 keyChar=fKeySet->getRangeStart(range);
                keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
            addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status);
            addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status);
            addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status);
            addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status);
        }
    }

    // Put the assembled data into the flat runtime array
    outputData(status);

    // All of the intermediate allocated data belongs to the ConfusabledataBuilder
    //  object  (this), and is deleted in the destructor.
    return;
}
void
StringCaseTest::TestCasingImpl(const UnicodeString &input,
                               const UnicodeString &output,
                               int32_t whichCase,
                               void *iter, const char *localeID, uint32_t options) {
    // UnicodeString
    UnicodeString result;
    const char *name;
    Locale locale(localeID);

    result=input;
    switch(whichCase) {
    case TEST_LOWER:
        name="toLower";
        result.toLower(locale);
        break;
    case TEST_UPPER:
        name="toUpper";
        result.toUpper(locale);
        break;
#if !UCONFIG_NO_BREAK_ITERATION
    case TEST_TITLE:
        name="toTitle";
        result.toTitle((BreakIterator *)iter, locale, options);
        break;
#endif
    case TEST_FOLD:
        name="foldCase";
        result.foldCase(options);
        break;
    default:
        name="";
        break; // won't happen
    }
    if(result!=output) {
        errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
    }
#if !UCONFIG_NO_BREAK_ITERATION
    if(whichCase==TEST_TITLE && options==0) {
        result=input;
        result.toTitle((BreakIterator *)iter, locale);
        if(result!=output) {
            errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
        }
    }
#endif

    // UTF-8
    char utf8In[100], utf8Out[100];
    int32_t utf8InLength, utf8OutLength, resultLength;
    UChar *buffer;

    UCaseMap *csm;
    UErrorCode errorCode;

    errorCode=U_ZERO_ERROR;
    csm=ucasemap_open(localeID, options, &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
    if(iter!=NULL) {
        // Clone the break iterator so that the UCaseMap can safely adopt it.
        int32_t size=1;  // Not 0 because that only gives preflighting.
        UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, &errorCode);
        ucasemap_setBreakIterator(csm, clone, &errorCode);
    }
#endif

    u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), &errorCode);
    switch(whichCase) {
    case TEST_LOWER:
        name="ucasemap_utf8ToLower";
        utf8OutLength=ucasemap_utf8ToLower(csm,
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, &errorCode);
        break;
    case TEST_UPPER:
        name="ucasemap_utf8ToUpper";
        utf8OutLength=ucasemap_utf8ToUpper(csm,
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, &errorCode);
        break;
#if !UCONFIG_NO_BREAK_ITERATION
    case TEST_TITLE:
        name="ucasemap_utf8ToTitle";
        utf8OutLength=ucasemap_utf8ToTitle(csm,
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, &errorCode);
        break;
#endif
    case TEST_FOLD:
        name="ucasemap_utf8FoldCase";
        utf8OutLength=ucasemap_utf8FoldCase(csm,
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, &errorCode);
        break;
    default:
        name="";
        utf8OutLength=0;
        break; // won't happen
    }
    buffer=result.getBuffer(utf8OutLength);
    u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, &errorCode);
    result.releaseBuffer(U_SUCCESS(errorCode) ? resultLength : 0);

    if(U_FAILURE(errorCode)) {
        errln("error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
    } else if(result!=output) {
        errln("error: %s() got a wrong result for a test case from casing.res", name);
    }
    ucasemap_close(csm);
}
示例#29
0
/**
 * @param mode same with ICU mode flag UNormalizationMode.
 */
size_t uni_normalize(char* src, size_t src_len, char* dst, size_t dst_capacity, int mode, int opt){
    UNormalizationMode umode = (UNormalizationMode)mode;
    // status holder
    UErrorCode ustatus = U_ZERO_ERROR;
    // UChar source
    UChar *s;
    int32_t s_length, s_capacity;
    // UChar normalized
    UChar *d;
    int32_t d_length, d_capacity;
    // UTF8 normalzied
    int32_t dst_alloc;
	
    // convert UTF-8 -> UChar
	u_strFromUTF8(NULL, 0, &s_length, src, (int32_t)src_len, &ustatus);
	if(U_FAILURE(ustatus) && ustatus!=U_BUFFER_OVERFLOW_ERROR){
		char buf[1024];
		sprintf(buf,"ICU u_strFromUTF8(pre-flighting) error with %d\n", ustatus);
		fputs(buf, stderr); fflush(stderr);
		return 0;
	}else{
	    ustatus = U_ZERO_ERROR;
	}
	s_capacity = (s_length+7)/8*8; // for '\0' termination
    s = (UChar*)my_malloc(s_capacity*sizeof(UChar), MYF(MY_WME));
    if(!s){
		fputs("malloc failure\n", stderr); fflush(stderr);
		return 0;
	}
    s = u_strFromUTF8(s, s_length, NULL, src, (int32_t)src_len, &ustatus);
    if(U_FAILURE(ustatus)){
		char buf[1024];
		sprintf(buf,"ICU u_strFromUTF8 error with %d\n", ustatus);
		fputs(buf, stderr); fflush(stderr);
		my_free(s);
		return 0;
	}else{
	    ustatus = U_ZERO_ERROR;
	}
	
    // normalize
	d_length = unorm_normalize(s, s_length, umode, (int32_t)opt, NULL, 0, &ustatus);
	if(U_FAILURE(ustatus) && ustatus!=U_BUFFER_OVERFLOW_ERROR){
		char buf[1024];
		sprintf(buf,"ICU unorm_normalize(pre-flighting) error with %d\n", ustatus);
		fputs(buf, stderr); fflush(stderr);
		my_free(s);
		return 0;
	}else{
	    ustatus = U_ZERO_ERROR;
	}
	d_capacity = (d_length+7)/8*8;
    d = (UChar*)my_malloc(d_capacity*sizeof(UChar), MYF(MY_WME));
    if(!d){
		fputs("malloc failure\n", stderr); fflush(stderr);
		my_free(s);
		return 0;
    }
	d_length = unorm_normalize(s, s_length, umode, (int32_t)opt, d, d_capacity, &ustatus);
	if(U_FAILURE(ustatus)){
		char buf[1024];
		sprintf(buf,"ICU unorm_normalize error with %d\n", ustatus);
		fputs(buf, stderr); fflush(stderr);
		my_free(s);
		my_free(d);
		return 0;
	}else{
	    ustatus = U_ZERO_ERROR;
	}
	my_free(s);
	
    // encode UChar -> UTF-8
    u_strToUTF8(dst, (int32_t)dst_capacity, &dst_alloc, d, d_length, &ustatus);
    my_free(d);
	return (size_t)dst_alloc;
}
示例#30
0
char *
_mongoc_sasl_prep_impl (const char *name,
                        const char *in_utf8,
                        int in_utf8_len,
                        bson_error_t *err)
{
   /* The flow is in_utf8 -> in_utf16 -> SASLPrep -> out_utf16 -> out_utf8. */
   UChar *in_utf16, *out_utf16;
   char *out_utf8;
   int32_t in_utf16_len, out_utf16_len, out_utf8_len;
   UErrorCode error_code = U_ZERO_ERROR;
   UStringPrepProfile *prep;

#define SASL_PREP_ERR_RETURN(msg)                        \
   do {                                                  \
      bson_set_error (err,                               \
                      MONGOC_ERROR_SCRAM,                \
                      MONGOC_ERROR_SCRAM_PROTOCOL_ERROR, \
                      (msg),                             \
                      name);                             \
      return NULL;                                       \
   } while (0)

   /* 1. convert str to UTF-16. */
   /* preflight to get the destination length. */
   (void) u_strFromUTF8 (
      NULL, 0, &in_utf16_len, in_utf8, in_utf8_len, &error_code);
   if (error_code != U_BUFFER_OVERFLOW_ERROR) {
      SASL_PREP_ERR_RETURN ("could not calculate UTF-16 length of %s");
   }

   /* convert to UTF-16. */
   error_code = U_ZERO_ERROR;
   in_utf16 = bson_malloc (sizeof (UChar) *
                           (in_utf16_len + 1)); /* add one for null byte. */
   (void) u_strFromUTF8 (
      in_utf16, in_utf16_len + 1, NULL, in_utf8, in_utf8_len, &error_code);
   if (error_code) {
      bson_free (in_utf16);
      SASL_PREP_ERR_RETURN ("could not convert %s to UTF-16");
   }

   /* 2. perform SASLPrep. */
   prep = usprep_openByType (USPREP_RFC4013_SASLPREP, &error_code);
   if (error_code) {
      bson_free (in_utf16);
      SASL_PREP_ERR_RETURN ("could not start SASLPrep for %s");
   }
   /* preflight. */
   out_utf16_len = usprep_prepare (
      prep, in_utf16, in_utf16_len, NULL, 0, USPREP_DEFAULT, NULL, &error_code);
   if (error_code != U_BUFFER_OVERFLOW_ERROR) {
      bson_free (in_utf16);
      usprep_close (prep);
      SASL_PREP_ERR_RETURN ("could not calculate SASLPrep length of %s");
   }

   /* convert. */
   error_code = U_ZERO_ERROR;
   out_utf16 = bson_malloc (sizeof (UChar) * (out_utf16_len + 1));
   (void) usprep_prepare (prep,
                          in_utf16,
                          in_utf16_len,
                          out_utf16,
                          out_utf16_len + 1,
                          USPREP_DEFAULT,
                          NULL,
                          &error_code);
   if (error_code) {
      bson_free (in_utf16);
      bson_free (out_utf16);
      usprep_close (prep);
      SASL_PREP_ERR_RETURN ("could not execute SASLPrep for %s");
   }
   bson_free (in_utf16);
   usprep_close (prep);

   /* 3. convert back to UTF-8. */
   /* preflight. */
   (void) u_strToUTF8 (
      NULL, 0, &out_utf8_len, out_utf16, out_utf16_len, &error_code);
   if (error_code != U_BUFFER_OVERFLOW_ERROR) {
      bson_free (out_utf16);
      SASL_PREP_ERR_RETURN ("could not calculate UTF-8 length of %s");
   }

   /* convert. */
   error_code = U_ZERO_ERROR;
   out_utf8 = (char *) bson_malloc (
      sizeof (char) * (out_utf8_len + 1)); /* add one for null byte. */
   (void) u_strToUTF8 (
      out_utf8, out_utf8_len + 1, NULL, out_utf16, out_utf16_len, &error_code);
   if (error_code) {
      bson_free (out_utf8);
      bson_free (out_utf16);
      SASL_PREP_ERR_RETURN ("could not convert %s back to UTF-8");
   }
   bson_free (out_utf16);
   return out_utf8;
#undef SASL_PREP_ERR_RETURN
}