Beispiel #1
0
static Variant HHVM_METHOD(Collator, getSortKey, const String& val) {
  FETCH_COL(data, this_, false);
  UErrorCode error = U_ZERO_ERROR;
  icu::UnicodeString strval(u16(val, error));
  if (U_FAILURE(error)) {
    return false;
  }

  int sortkey_len = ucol_getSortKey(data->collator(),
                                    strval.getBuffer(), strval.length(),
                                    nullptr,
                                    0);
  if (sortkey_len <= 0) {
    return false;
  }

  String ret(sortkey_len + 1, ReserveString);
  sortkey_len = ucol_getSortKey(data->collator(),
                                strval.getBuffer(), strval.length(),
                                (uint8_t*) ret.get()->mutableData(),
                                ret.capacity() + 1);
  if (sortkey_len <= 0) {
    return false;
  }

  ret.setSize(sortkey_len);
  return ret;
}
Beispiel #2
0
NS_IMETHODIMP nsCollationMacUC::AllocateRawSortKey(int32_t strength, const nsAString& stringIn,
                                                   uint8_t** key, uint32_t* outLen)
{
  NS_ENSURE_TRUE(mInit, NS_ERROR_NOT_INITIALIZED);
  NS_ENSURE_ARG_POINTER(key);
  NS_ENSURE_ARG_POINTER(outLen);

  nsresult res = EnsureCollator(strength);
  NS_ENSURE_SUCCESS(res, res);

  uint32_t stringInLen = stringIn.Length();

  const UChar* str = (const UChar*)PromiseFlatString(stringIn).get();

  int32_t keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, nullptr, 0);
  NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE);

  // Since key is freed elsewhere with PR_Free, allocate with PR_Malloc.
  uint8_t* newKey = (uint8_t*)PR_Malloc(keyLength + 1);
  if (!newKey) {
      return NS_ERROR_OUT_OF_MEMORY;
  }

  keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, newKey, keyLength + 1);
  NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE);

  *key = newKey;
  *outLen = keyLength;

  return NS_OK;
}
Beispiel #3
0
//static jbyteArray NativeCollation_getSortKey(JNIEnv* env, jclass, jint address, jstring source0) {
JNIEXPORT jbyteArray JNICALL
Java_com_ibm_icu4jni_text_NativeCollation_getSortKey(JNIEnv* env, jclass,
		jint address, jstring source0) {
	ScopedJavaUnicodeString source(env, source0);
	const UCollator* collator = toCollator(address);
	uint8_t byteArray[UCOL_MAX_BUFFER * 2];
	UniquePtr<uint8_t[]> largerByteArray;
	uint8_t* usedByteArray = byteArray;
	const UChar* chars = source.unicodeString().getBuffer();
	size_t charCount = source.unicodeString().length();
	size_t byteArraySize = ucol_getSortKey(collator, chars, charCount,
			usedByteArray, sizeof(byteArray) - 1);
	if (byteArraySize > sizeof(byteArray) - 1) {
		// didn't fit, try again with a larger buffer.
		largerByteArray.reset(new uint8_t[byteArraySize + 1]);
		usedByteArray = largerByteArray.get();
		byteArraySize = ucol_getSortKey(collator, chars, charCount,
				usedByteArray, byteArraySize);
	}
	if (byteArraySize == 0) {
		return NULL;
	}
	jbyteArray result = env->NewByteArray(byteArraySize);
	env->SetByteArrayRegion(result, 0, byteArraySize,
			reinterpret_cast<jbyte*> (usedByteArray));
	return result;
}
Beispiel #4
0
// Collator.sort_key {{{
static PyObject *
icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) {
    int32_t sz = 0, key_size = 0, bsz = 0;
    UChar *buf = NULL;
    uint8_t *buf2 = NULL;
    PyObject *ans = NULL, *input = NULL;
  
    if (!PyArg_ParseTuple(args, "O", &input)) return NULL;
    buf = python_to_icu(input, &sz, 1);
    if (buf == NULL) return NULL;

    bsz = 7 * sz + 1;
    buf2 = (uint8_t*)calloc(bsz, sizeof(uint8_t));
    if (buf2 == NULL) { PyErr_NoMemory(); goto end; }
    key_size = ucol_getSortKey(self->collator, buf, sz, buf2, bsz);
    if (key_size > bsz) {
        buf2 = realloc(buf2, (key_size + 1) * sizeof(uint8_t));
        if (buf2 == NULL) { PyErr_NoMemory(); goto end; }
        key_size = ucol_getSortKey(self->collator, buf, sz, buf2, key_size + 1);
    }
    ans = PyBytes_FromStringAndSize((char*)buf2, key_size);

end:
    if (buf != NULL) free(buf);
    if (buf2 != NULL) free(buf2);

    return ans;
} // }}}
static int32_t
sortkey_from_unicode (UChar *input, uint8_t **output)
{
    UErrorCode status = U_ZERO_ERROR;
    UCollator * collator = ucol_open ("", &status);
    int32_t size;

    if (icu_failure (status))
        return 0;

    ucol_setAttribute (collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

    if (icu_failure (status))
        return 0;

    *output = (uint8_t *) palloc (sizeof (uint8_t) * PREALLOC_SIZE);
    size = ucol_getSortKey (collator, input, -1, *output, PREALLOC_SIZE);

    if (size > PREALLOC_SIZE)
    {
        pfree (*output);
        *output = (uint8_t *) palloc (sizeof (uint8_t) * size);
        ucol_getSortKey (collator, input, -1, *output, size);
    }

    ucol_close (collator);

    if (size < 1)
    {
        ereport(ERROR, (errmsg("ICU sortkey is zero")));
    }

    return size;
}
Beispiel #6
0
static void TestJB581(void)
{
    UChar       dispName    [100]; 
    int32_t     bufferLen   = 0;
    UChar       source      [100];
    UChar       target      [100];
    UCollationResult result     = UCOL_EQUAL;
    uint8_t     sourceKeyArray  [100];
    uint8_t     targetKeyArray  [100]; 
    int32_t     sourceKeyOut    = 0, 
                targetKeyOut    = 0;
    UCollator   *myCollator = 0;
    UErrorCode status = U_ZERO_ERROR;

    /*u_uastrcpy(source, "This is a test.");*/
    /*u_uastrcpy(target, "THISISATEST.");*/
    u_uastrcpy(source, "THISISATEST.");
    u_uastrcpy(target, "Thisisatest.");

    myCollator = ucol_open("en_US", &status);
    if (U_FAILURE(status)){
        bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
        /*Report the error with display name... */
        log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
        return;
    }
    result = ucol_strcoll(myCollator, source, -1, target, -1);
    /* result is 1, secondary differences only for ignorable space characters*/
    if (result != 1)
    {
        log_err("Comparing two strings with only secondary differences in C failed.\n");
    }
    /* To compare them with just primary differences */
    ucol_setStrength(myCollator, UCOL_PRIMARY);
    result = ucol_strcoll(myCollator, source, -1, target, -1);
    /* result is 0 */
    if (result != 0)
    {
        log_err("Comparing two strings with no differences in C failed.\n");
    }
    /* Now, do the same comparison with keys */
    sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
    targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
    bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
    if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
    {
        log_err("Comparing two strings with sort keys in C failed.\n");
    }
    ucol_close(myCollator);
}
int helper_collation_str(const char *src, char *dest, int dest_size)
{
	HELPER_FN_CALL;
	int32_t size = 0;
	UErrorCode status = 0;
	UChar tmp_result[CTS_SQL_MAX_LEN];
	UCollator *collator;
	const char *region;

	region = vconf_get_str(VCONFKEY_REGIONFORMAT);
	HELPER_DBG("region %s", region);
	collator = ucol_open(region, &status);
	h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED,
			"ucol_open() Failed(%s)", u_errorName(status));

	if (U_FAILURE(status)){
		ERR("ucol_setAttribute Failed(%s)", u_errorName(status));
		ucol_close(collator);
		return CTS_ERR_ICU_FAILED;
	}

	u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, src, -1, &status);
	if (U_FAILURE(status)){
		ERR("u_strFromUTF8 Failed(%s)", u_errorName(status));
		ucol_close(collator);
		return CTS_ERR_ICU_FAILED;
	}
	size = ucol_getSortKey(collator, tmp_result, -1, (uint8_t *)dest, dest_size);
	ucol_close(collator);
	dest[size]='\0';

	return CTS_SUCCESS;
}
Beispiel #8
0
/**
 * call-seq:
 *     collator.sort_key(an_ustring) -> String
 *
 * Get a sort key for a string from a UCollator. Sort keys may be compared using strcmp.
 **/
VALUE icu4r_col_sort_key(VALUE self, VALUE str)
{
    int32_t needed , capa ;
    char * buffer ; 
    VALUE ret;
    Check_Class(str, rb_cUString);
    capa = ICU_LEN(str);
    buffer = ALLOC_N(char, capa);
    needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, capa);
    if(needed > capa){
      REALLOC_N(buffer,char, needed);
      needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, needed);
    }
    ret = rb_str_new(buffer, needed);
    free(buffer);
    return ret;
}
Beispiel #9
0
// Collator.sort_key {{{
static PyObject *
icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) {
    char *input;
    int32_t sz;
    UChar *buf;
    uint8_t *buf2;
    PyObject *ans;
    int32_t key_size;
    UErrorCode status = U_ZERO_ERROR;
  
    if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL;

    sz = (int32_t)strlen(input);

    buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar));

    if (buf == NULL) return PyErr_NoMemory();

    u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status);
    PyMem_Free(input);

    if (U_SUCCESS(status)) {
        buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t));
        if (buf2 == NULL) return PyErr_NoMemory();

        key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1);

        if (key_size == 0) {
            ans = PyBytes_FromString("");
        } else {
            if (key_size >= 7*sz+1) {
                free(buf2);
                buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t));
                if (buf2 == NULL) return PyErr_NoMemory();
                ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1);
            }
            ans = PyBytes_FromString((char *)buf2);
        }
        free(buf2);
    } else ans = PyBytes_FromString("");

    free(buf);
    if (ans == NULL) return PyErr_NoMemory();

    return ans;
} // }}}
    virtual void run() {
        //sleep(10000);
        int32_t line = 0;

        uint8_t sk1[1024], sk2[1024];
        uint8_t *oldSk = NULL, *newSk = sk1;
        int32_t resLen = 0, oldLen = 0;
        int32_t i = 0;

        for(i = 0; i < noLines; i++) {
            resLen = ucol_getSortKey(coll, lines[i].buff, lines[i].buflen, newSk, 1024);

            int32_t res = 0, cmpres = 0, cmpres2 = 0;

            if(oldSk != NULL) {
                res = strcmp((char *)oldSk, (char *)newSk);
                cmpres = ucol_strcoll(coll, lines[i-1].buff, lines[i-1].buflen, lines[i].buff, lines[i].buflen);
                cmpres2 = ucol_strcoll(coll, lines[i].buff, lines[i].buflen, lines[i-1].buff, lines[i-1].buflen);
                //cmpres = res;
                //cmpres2 = -cmpres;

                if(cmpres != -cmpres2) {
                    error("Compare result not symmetrical on line "+ line);
                    break;
                }

                if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) {
                    error(UnicodeString("Difference between ucol_strcoll and sortkey compare on line ")+ UnicodeString(line));
                    break;
                }

                if(res > 0) {
                    error(UnicodeString("Line %i is not greater or equal than previous line ")+ UnicodeString(i));
                    break;
                } else if(res == 0) { /* equal */
                    res = u_strcmpCodePointOrder(lines[i-1].buff, lines[i].buff);
                    if (res == 0) {
                        error(UnicodeString("Probable error in test file on line %i (comparing identical strings)")+ UnicodeString(i));
                        break;
                    }
                    /*
                     * UCA 6.0 test files can have lines that compare == if they are
                     * different strings but canonically equivalent.
                    else if (res > 0) {
                        error(UnicodeString("Sortkeys are identical, but code point compare gives >0 on line ")+ UnicodeString(i));
                        break;
                    }
                     */
                }
            }

            oldSk = newSk;
            oldLen = resLen;

            newSk = (newSk == sk1)?sk2:sk1;
        }
    }
Beispiel #11
0
// Very simple example code - sticks a sortkey in the buffer
// Not much error checking
int32_t getSortKey_current(const char *locale, const UChar *string, int32_t sLen, uint8_t *buffer, int32_t bLen) {
  UErrorCode status = U_ZERO_ERROR;
  UCollator *coll = ucol_open(locale, &status);
  if(U_FAILURE(status)) {
    return -1;
  }
  int32_t result = ucol_getSortKey(coll, string, sLen, buffer, bLen);
  ucol_close(coll);
  return result;  
}
static jbyteArray NativeCollation_getSortKey(JNIEnv* env, jclass, jlong address, jstring javaSource) {
    ScopedStringChars source(env, javaSource);
    if (source.get() == NULL) {
        return NULL;
    }
    const UCollator* collator  = toCollator(address);
    uint8_t byteArray[UCOL_MAX_BUFFER * 2];
    UniquePtr<uint8_t[]> largerByteArray;
    uint8_t* usedByteArray = byteArray;
    size_t byteArraySize = ucol_getSortKey(collator, source.get(), source.size(), usedByteArray, sizeof(byteArray) - 1);
    if (byteArraySize > sizeof(byteArray) - 1) {
        // didn't fit, try again with a larger buffer.
        largerByteArray.reset(new uint8_t[byteArraySize + 1]);
        usedByteArray = largerByteArray.get();
        byteArraySize = ucol_getSortKey(collator, source.get(), source.size(), usedByteArray, byteArraySize);
    }
    if (byteArraySize == 0) {
        return NULL;
    }
    jbyteArray result = env->NewByteArray(byteArraySize);
    env->SetByteArrayRegion(result, 0, byteArraySize, reinterpret_cast<jbyte*>(usedByteArray));
    return result;
}
Beispiel #13
0
/*
 * call-seq:
 * string.unicode_sort_key -> string
 *
 * Returns a string that will sort according to the Unicode collation algorithm.
 *
 */
static VALUE unicode_sort_key(VALUE string) {
    char str[BUF_SIZE];
    UChar ustr[BUF_SIZE];
    int32_t len  = 0;
    int32_t ulen = 0;
    UErrorCode status = U_ZERO_ERROR;
    UCollator *col;

    to_utf16(string, ustr, &ulen);

    col = ucol_open("en_US", &status);
    if (U_SUCCESS(status)) {
        len = ucol_getSortKey(col, ustr, ulen, (uint8_t*)str, BUF_SIZE);
        ucol_close(col);
    }

    return rb_str_new(str, len - 1);
}
Beispiel #14
0
extern "C" int32_t GetSortKey(SortHandle* pSortHandle,
                              const UChar* lpStr,
                              int32_t cwStrLength,
                              uint8_t* sortKey,
                              int32_t cbSortKeyLength,
                              int32_t options)
{
    UErrorCode err = U_ZERO_ERROR;
    const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
    int32_t result = 0;

    if (U_SUCCESS(err))
    {
        result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength);
    }

    return result;
}
Beispiel #15
0
extern "C" int32_t GetSortKey(const char* lpLocaleName,
                              const UChar* lpStr,
                              int32_t cwStrLength,
                              uint8_t* sortKey,
                              int32_t cbSortKeyLength,
                              int32_t options)
{
    UErrorCode err = U_ZERO_ERROR;
    UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err);
    int32_t result = 0;

    if (U_SUCCESS(err))
    {
        result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength);

        ucol_close(pColl);
    }

    return result;
}
Beispiel #16
0
MojErr MojDbTextCollator::sortKey(const UChar* chars, MojSize size, MojDbKey& keyOut) const
{
    LOG_TRACE("Entering function %s", __FUNCTION__);

	MojErr err = MojErrNone;
	MojObjectWriter writer;

	if (size == 0) {
		err = writer.stringValue(_T(""), 0);
		MojErrCheck(err);
	} else {
		// get sort key
		MojInt32 destCapacity = 0;
		MojInt32 destLength = 0;
		MojDbKey::ByteVec vec;
		err = vec.resize(size * 3);
		MojErrCheck(err);
		do {
			MojByte* dest = NULL;
			err = vec.begin(dest);
			MojErrCheck(err);
			destCapacity = (MojInt32) vec.size();
			destLength = ucol_getSortKey(m_ucol, chars, (MojInt32) size, dest, destCapacity);
			if (destLength == 0) {
				MojErrThrow(MojErrDbUnicode);
			}
			err = vec.resize(destLength);
			MojErrCheck(err);
		} while (destLength > destCapacity);
		// write it
		MojAssert(vec.size() >= 1 && vec.back() == _T('\0'));
		err = writer.stringValue((const MojChar*) vec.begin(), vec.size() - 1);
		MojErrCheck(err);
	}
	err = keyOut.assign(writer.buf());
	MojErrCheck(err);

	return MojErrNone;
}
Beispiel #17
0
static void TestGetSortKey() {
    /* This is meant to test a buffer reallocation crash while using
    French secondary sorting with a large buffer.
    The fact that Japanese characters are used is irrelevant. */
    static const UChar pucUTF16[] = {
        0x3049,0x30b9,0x3088,0xfffd,0xfffd,0x308f,0xfffd,0x3042,
        0xfffd,0xfffd,0x305e,0xfffd,0x30b6,0x30bb,0x305b,0x30b1,
        0x3050,0x30af,0x304e,0x30bd,0xfffd,0x30c6,0xfffd,0xfffd,
        0x30e1,0xfffd,0xfffd,0x30d9,0xfffd,0x3092,0x3075,0x304a,
        0x3074,0x3070,0x30f5,0x30c4,0x306e,0x30df,0x3053,0xfffd,
        0x30a6,0x30b6,0x30e0,0xfffd,0x30bc,0x30ef,0x3087,0x30cc,
        0x305f,0x30de,0xfffd,0x3090,0x3063,0x30dc,0x30b6,0x30b9,
        0x30d2,0x3072,0x3061,0xfffd,0xfffd,0xfffd,0x307b,0x3092,
        0x30a5,0x30a9,0x30b1,0x30e7,0xfffd,0xfffd,0xfffd,0xfffd,
        0xfffd,0x305e,0xfffd,0x30c7,0x30ae,0x305b,0x308b,0x30c0,
        0x30f5,0xfffd,0xfffd,0xfffd,0x307d,0x304e,0xfffd,0xfffd,
        0x30c0,0x30c8,0x306f,0x307a,0x30dd,0x30e4,0x3084,0xfffd,
        0x308c,0x30f1,0xfffd,0x30c6,0xfffd,0x307a,0xfffd,0x3052,
        0x3056,0x305d,0x30b7,0xfffd,0x305b,0x30b0,0x30b9,0xfffd,
        0x30b2,0x306d,0x3044,0xfffd,0x3073,0xfffd,0x30be,0x30cf,
        0x3080,0xfffd,0x30a8,0x30f5,0x30a5,0x30c7,0x307c,0xfffd,
        0x30d1,0x305f,0x30b2,0xfffd,0x3053,0x30ca,0xfffd,0x30dd,
        0x3058,0x30c0,0x305d,0x30e1,0xfffd,0x30bb,0x305f,0x30d1,
        0x30f2,0x3058,0x3086,0x30ce,0x30db,0x30cb,0x30e9,0xfffd,
        0x308c,0xfffd,0xfffd,0x30af,0x30c4,0x3076,0x304c,0x30f5,
        0x30e8,0x308c,0xfffd,0x30e2,0x3073,0x30a3,0x304e,0x30ea,
        0xfffd,0x304f,0xfffd,0x306c,0x3044,0xfffd,0xfffd,0x30c9,
        0xfffd,0x30f5,0xfffd,0xfffd,0xfffd,0x30eb,0x30a8,0xfffd,
        0x306d,0x307d,0x30d8,0x3069,0xfffd,0xfffd,0x3086,0x30a9,
        0xfffd,0x3076,0x30e9,0x30cc,0x3074,0x30e0,0xfffd,0xfffd,
        0xfffd,0x30f0,0x3086,0x30ac,0x3076,0x3068,0x30c7,0xfffd,
        0x30b7,0x30d2,0x3048,0x308e,0x30e8,0x30d9,0x30ce,0x30d0,
        0x308b,0x30ee,0x30e6,0x3079,0x30f3,0x30af,0xfffd,0x3079,
        0xfffd,0xfffd,0x30ca,0x30bf,0xfffd,0x30b5,0xfffd,0xfffd,
        0x3093,0xfffd,0x30ba,0xfffd,0x3076,0x3047,0x304a,0xfffd,
        0xfffd,0x3086,0xfffd,0x3081,0xfffd,0x30f6,0x3066,0xfffd,
        0xfffd,0x30b6,0x30ef,0x30e2,0x30bf,0xfffd,0x3053,0x304a,
        0xfffd,0xfffd,0x304a,0x30e8,0xfffd,0x30e2,0xfffd,0xfffd,
        0x305c,0x3081,0x30c6,0xfffd,0x3091,0x3046,0x306a,0x3059,
        0xfffd,0xfffd,0x30dd,0x30d1,0x308a,0x30ee,0xfffd,0xfffd,
        0x308a,0x3042,0x30da,0xfffd,0x3064,0x30ef,0x305c,0x306b,
        0xfffd,0x30ca,0x3085,0x3067,0x30ea,0x30c2,0x30c8,0xfffd,
        0x30f5,0xfffd,0xfffd,0xfffd,0x30ca,0xfffd,0x3050,0x30f1,
        0x3050,0x3053,0x3072,0xfffd,0xfffd,0xfffd,0x3074,0xfffd,
        0x304b,0x30dd,0x306d,0xfffd,0x3049,0x30a1,0x30cc,0x30de,
        0x30ae,0x307b,0x308a,0xfffd,0x3065,0xfffd,0xfffd,0x30c0,
        0xfffd,0x3048,0x30dc,0x304f,0x3085,0x3059,0x304b,0x30d3,
        0x30eb,0x30a4,0x3073,0xfffd,0x30ba,0x308f,0x30a7,0x30c3,
        0x3074,0x30cf,0x306c,0x3053,0x30c0,0xfffd,0x3066,0xfffd,
        0x308f,0xfffd,0x30b5,0xfffd,0x3092,0x30c4,0xfffd,0x30d6,
        0x3056,0x30ad,0x30d2,0x30ba,0xfffd,0x30e6,0x304c,0x3088,
        0x30b6,0x3048,0x3077,0x30d1,0xfffd,0x3050,0xfffd,0x3042,
        0xfffd,0xfffd,0x308f,0xfffd,0x30c1,0xfffd,0x3074,0x3061,
        0x3056,0x30e5,0xfffd,0xfffd,0x3057,0xfffd,0xfffd,0xfffd,
        0xfffd,0x30bd,0x30b3,0x30ee,0xfffd,0x30f2,0x3084,0x3050,
        0xfffd,0x30e7,0xfffd,0xfffd,0x3060,0x3049,0x30f2,0x30ad,
        0x30bf,0x30f1,0x30a2,0xfffd,0x30af,0xfffd,0x3060,0x30a1,
        0x30e9,0x30c3,0xfffd,0x3072,0x3093,0x3070,0xfffd,0x308f,
        0x3060,0xfffd,0x3067,0x306f,0x3082,0x308b,0x3051,0xfffd,
        0x3058,0xfffd,0xfffd,0x30a8,0x3051,0x3054,0x30ad,0x30f0,
        0x3053,0xfffd,0x30e1,0x30d7,0x308d,0x307f,0x30be,0x30b0,
        0xfffd,0x30db,0xfffd,0x30d1,0xfffd,0x3054,0x30a5,0xfffd,
        0x306a,0xfffd,0x305c,0xfffd,0x3052,0x3088,0xfffd,0x306e,
        0xfffd,0x30a9,0x30a1,0x30b4,0x3083,0x30bd,0xfffd,0xfffd,
        0x306a,0x3070,0x30cd,0xfffd,0x3072,0x30ed,0x30c6,0x30be,
        0x30c4,0x305e,0x30b3,0x30e1,0x308a,0xfffd,0x305b,0xfffd,
        0x3042,0x3088,0xfffd,0x304c,0xfffd,0x3089,0x3071,0xfffd,
        0xfffd,0x30c6,0x3062,0x3079,0xfffd,0x304b,0x304a,0xfffd,
        0x30ad,0x3045,0x3045,0x3087,0xfffd,0x306a,0x308b,0x0000,
        0x30bd,0x3065,0x30b8,0x3086,0x30d3,0x3076,0xfffd,0xfffd,
        0x308f,0x3053,0x307c,0x3053,0x3084,0x30ae,0x30c4,0x3045,
        0x30a8,0x30d0,0x30e1,0x308c,0x30e6,0x30b7,0xfffd,0xfffd,
        0xfffd,0x3046,0x305f,0xfffd,0x3086,0x30ab,0xfffd,0xfffd,
        0x30c8,0xfffd,0x30a1,0x3052,0x3059,0xfffd,0x30a4,0xfffd,
        0xfffd,0x308c,0x3085,0x30ab,0x30b5,0x3091,0x30bf,0x30e3,
        0xfffd,0xfffd,0x3087,0xfffd,0x30f6,0x3051,0x30bd,0x3092,
        0x3063,0xfffd,0x30a9,0x3063,0x306e,0xfffd,0xfffd,0xfffd,
        0x306c,0xfffd,0x307e,0x30ad,0x3077,0x30c2,0x30e9,0x30d5,
        0xfffd,0xfffd,0x30c6,0x305c,0xfffd,0xfffd,0x3089,0xfffd,
        0x3048,0x30cb,0x308c,0xfffd,0xfffd,0x3044,0xfffd,0x3080,
        0x3063,0x3079,0xfffd,0x308a,0x30cb,0x3042,0x3057,0xfffd,
        0x307c,0x30c1,0x30a8,0x30cf,0xfffd,0x3083,0xfffd,0xfffd,
        0x306c,0xfffd,0x305e,0x3092,0xfffd,0x30dc,0x30b0,0x3081,
        0x30e3,0x30f0,0x304e,0x30cc,0x308e,0x30c4,0x30ad
    };

    UErrorCode status = U_ZERO_ERROR;
    UCollator *pCollator;
    int32_t lenActualSortKey;
    uint8_t pucSortKey[4096];
    static const int32_t LENSORTKEY = (int32_t)sizeof(pucSortKey);

    ucol_prepareShortStringOpen("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);

    pCollator = ucol_openFromShortString("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);

    if (U_FAILURE(status)) {
        log_data_err("error opening collator -> %s. (Are you missing data?)\n", u_errorName(status));
        return;
    }

    lenActualSortKey = ucol_getSortKey(pCollator,
                                       (const UChar *)pucUTF16,
                                       UPRV_LENGTHOF(pucUTF16),
                                       pucSortKey,
                                       LENSORTKEY);

    if (lenActualSortKey > LENSORTKEY) {
        log_err("sort key too big for original buffer. Got: %d Expected: %d\n", lenActualSortKey, LENSORTKEY);
        return;
    }
    /* If the test didn't crash, then the test succeeded. */
    ucol_close(pCollator);
}
Beispiel #18
0
void UCAConformanceTest::testConformance(UCollator *coll) 
{
  if(testFile == 0) {
    return;
  }

  int32_t line = 0;

  UChar b1[1024], b2[1024];
  char lineB[1024];
  UChar *buffer = b1, *oldB = NULL;
  uint8_t sk1[1024], sk2[1024];
  uint8_t *oldSk = NULL, *newSk = sk1;
  int32_t resLen = 0, oldLen = 0;
  int32_t buflen = 0, oldBlen = 0;
  uint32_t first = 0;
  uint32_t offset = 0;


  while (fgets(lineB, 1024, testFile) != NULL) {
    offset = 0;

    line++;
    if(*lineB == 0 || lineB[0] == '#') {
      continue;
    }
    offset = u_parseString(lineB, buffer, 1024, &first, &status);
    buflen = offset;
    buffer[offset++] = 0;

    resLen = ucol_getSortKey(coll, buffer, buflen, newSk, 1024);

    int32_t res = 0, cmpres = 0, cmpres2 = 0;

    if(oldSk != NULL) {
      res = strcmp((char *)oldSk, (char *)newSk);
      cmpres = ucol_strcoll(coll, oldB, oldBlen, buffer, buflen);
      cmpres2 = ucol_strcoll(coll, buffer, buflen, oldB, oldBlen);

      if(cmpres != -cmpres2) {
        errln("Compare result not symmetrical on line %i", line);
      }

      if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) {
        errln("Difference between ucol_strcoll and sortkey compare on line %i", line);
        logln("Data line %s", lineB);
      }

      if(res > 0) {
        errln("Line %i is not greater or equal than previous line", line);
        logln("Data line %s", lineB);
      } else if(res == 0) { /* equal */
        res = u_strcmpCodePointOrder(oldB, buffer);
        if (res == 0) {
          errln("Probable error in test file on line %i (comparing identical strings)", line);
          logln("Data line %s", lineB);
        } else if (res > 0) {
          errln("Sortkeys are identical, but code point comapare gives >0 on line %i", line);
          logln("Data line %s", lineB);
        }
      }
    }

    oldSk = newSk;
    oldLen = resLen;

    newSk = (newSk == sk1)?sk2:sk1;
    oldB = buffer;
    oldBlen = buflen;
    buffer = (buffer == b1)?b2:b1;
  }
}
Beispiel #19
0
bool c_Collator::t_sortwithsortkeys(VRefParam arr) {
  INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::sortwithsortkeys);
  char*       sortKeyBuf = NULL; /* buffer to store sort keys */
  int32_t     sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
  ptrdiff_t   sortKeyBufOffset = 0; /* pos in buffer to store sort key */
  int32_t     sortKeyLen = 0; /* the length of currently processing key */
  int32_t     bufLeft = 0;
  int32_t     bufIncrement = 0;

  /* buffer to store 'indexes' which will be passed to 'qsort' */
  collator_sort_key_index_t* sortKeyIndxBuf = NULL;
  int32_t     sortKeyIndxBufSize   = DEF_SORT_KEYS_INDX_BUF_SIZE;
  int32_t     sortKeyIndxSize      = sizeof( collator_sort_key_index_t );

  int32_t     sortKeyCount         = 0;
  int32_t     j                    = 0;

  /* tmp buffer to hold current processing string in utf-16 */
  UChar*      utf16_buf            = NULL;
  /* the length of utf16_buf */
  int         utf16_buf_size       = DEF_UTF16_BUF_SIZE;
  /* length of converted string */
  int         utf16_len            = 0;

  m_errcode.clear();
  s_intl_error->m_error.clear();

  /*
   * Sort specified array.
   */
  if (!arr.isArray()) {
    return true;
  }
  Array hash = arr.toArray();
  if (hash.size() == 0) {
    return true;
  }

  /* Create bufers */
  sortKeyBuf     = (char*)calloc(sortKeyBufSize, sizeof(char));
  sortKeyIndxBuf = (collator_sort_key_index_t*)malloc(sortKeyIndxBufSize);
  utf16_buf      = (UChar*)malloc(utf16_buf_size);

  /* Iterate through input hash and create a sort key for each value. */
  for (ssize_t pos = hash->iter_begin(); pos != ArrayData::invalid_index;
       pos = hash->iter_advance(pos)) {
    /* Convert current hash item from UTF-8 to UTF-16LE and save the result
     * to utf16_buf. */
    utf16_len = utf16_buf_size;
    /* Process string values only. */
    Variant val(hash->getValue(pos));
    if (val.isString()) {
      String str = val.toString();
      intl_convert_utf8_to_utf16(&utf16_buf, &utf16_len, str.data(),
                                 str.size(), &(m_errcode.code));
      if (U_FAILURE(m_errcode.code)) {
        m_errcode.custom_error_message = "Sort with sort keys failed";
        if (utf16_buf) {
          free(utf16_buf);
        }
        free(sortKeyIndxBuf);
        free(sortKeyBuf);
        return false;
      }
    } else {
      /* Set empty string */
      utf16_len = 0;
      utf16_buf[utf16_len] = 0;
    }

    if ((utf16_len + 1) > utf16_buf_size) {
      utf16_buf_size = utf16_len + 1;
    }

    /* Get sort key, reallocating the buffer if needed. */
    bufLeft = sortKeyBufSize - sortKeyBufOffset;

    sortKeyLen = ucol_getSortKey(m_ucoll,
                    utf16_buf,
                    utf16_len,
                    (uint8_t*)sortKeyBuf + sortKeyBufOffset,
                    bufLeft);

    /* check for sortKeyBuf overflow, increasing its size of the buffer if
       needed */
    if (sortKeyLen > bufLeft) {
      bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ?
        sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
      sortKeyBufSize += bufIncrement;
      bufLeft += bufIncrement;
      sortKeyBuf = (char*)realloc(sortKeyBuf, sortKeyBufSize);
      sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len,
                                   (uint8_t*)sortKeyBuf + sortKeyBufOffset,
                                   bufLeft);
    }

    /* check sortKeyIndxBuf overflow, increasing its size of the buffer if
       needed */
    if ((sortKeyCount + 1) * sortKeyIndxSize > sortKeyIndxBufSize) {
      bufIncrement = (sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT) ?
        sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
      sortKeyIndxBufSize += bufIncrement;
      sortKeyIndxBuf = (collator_sort_key_index_t*)realloc(sortKeyIndxBuf,
                                                           sortKeyIndxBufSize);
    }
    sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;
    sortKeyIndxBuf[sortKeyCount].valPos = pos;
    sortKeyBufOffset += sortKeyLen;
    ++sortKeyCount;
  }

  /* update ptrs to point to valid keys. */
  for( j = 0; j < sortKeyCount; j++ )
    sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;

  /* sort it */
  zend_qsort(sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize,
             collator_cmp_sort_keys, NULL);

  /* for resulting hash we'll assign new hash keys rather then reordering */
  Array sortedHash = Array::Create();

  for (j = 0; j < sortKeyCount; j++) {
    sortedHash.append(hash->getValue(sortKeyIndxBuf[j].valPos));
  }

  /* Save sorted hash into return variable. */
  arr = sortedHash;

  if (utf16_buf)
    free(utf16_buf);

  free(sortKeyIndxBuf);
  free(sortKeyBuf);

  return true;
}
Beispiel #20
0
int main(int /* argc*/ , const char * /*argv*/ []) {
    UErrorCode status = U_ZERO_ERROR;
    int diffs = 0;
    int gbaddiffs =0;
    setup(status);
    if(U_FAILURE(status)) return 1;

    int expected = PROVIDER_COUNT;

    for(int l=0;l<LOCALE_COUNT;l++) {
        printf("\n");
        uint8_t oldBytes[200];
        int32_t oldLen = -1;
        for(int v=0;v<=expected;v++) {

            // Construct the locale ID
            char locID[200];
            strcpy(locID, locale[l]);
            if((v!=expected)) { // -1 = no version
                strcat(locID, "@sp=icu");
                strcat(locID, provider_version[v]);
            }
            
            printf("%-28s =  ", locID);
            
            UErrorCode subStatus = U_ZERO_ERROR;
            uint8_t bytes[200];
            uint8_t bytesb[200];
#define USE_CXX 0

#if USE_CXX
            Collator *col = Collator::createInstance(Locale(locID),subStatus);
            if(U_FAILURE(subStatus)) {
                printf("ERR: %s\n", u_errorName(subStatus));
                continue;
            }
            int32_t len = col->getSortKey(stuff, -1, bytes, 200);
#else
#if 1
            char xbuf2[200];
            strcpy(xbuf2,"X/");
            strcat(xbuf2,locID);
            strcat(xbuf2,"/");
            //printf(" -> %s\n", xbuf2);
            UCollator *col = ucol_openFromShortString(xbuf2, FALSE,NULL, &subStatus);
#else
            UCollator *col = ucol_open(locID, &subStatus);
#endif
            if(U_FAILURE(subStatus)) {
                printf("ERR: %s\n", u_errorName(subStatus));
                continue;
            }
            

            char xbuf3[200];
            {
              int32_t def = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf3,200,&subStatus);
              if(U_FAILURE(subStatus)) {
                printf("Err getting short string name: %s\n", u_errorName(subStatus));
              } else {
                printf(" --> %s\n", xbuf3);
              }              
            }

            int32_t len = ucol_getSortKey(col, stuff, -1, bytes, 200);
#endif

            printf("     ");

            int tdiffs=0;

            for(int i=0;i<len;i++) {
	      if(i<oldLen&&bytes[i]!=oldBytes[i]) {
                diffs++;
                printf("*");
              } else {
                printf(" ");
              }
              printf("%02X", (0xFF&bytes[i]));
            }
            printf("\n");

            char xbuf4[200];
            UCollator *col2 = ucol_openFromShortString(xbuf3, FALSE, NULL, &subStatus);
            if(U_FAILURE(subStatus)) {
              printf("Err opening from new short string : %s\n", u_errorName(subStatus));
              continue;
            } else {
              int32_t def4 = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf4,200,&subStatus);
              if(strcmp(xbuf4,xbuf3)) {
                printf(" --> reopened = %s (%s)\n", xbuf4, u_errorName(subStatus));
              }
            }
            int32_t len2 = ucol_getSortKey(col2, stuff, -1, bytesb, 200);

            int baddiffs=0;
            for(int i=0;i<len;i++) {
	      if(i<len&&bytes[i]!=bytesb[i]) {
                  baddiffs++;
                  printf("!");
                 } else {
                   // printf(" ");
                 }
                // printf("%02X", (0xFF&bytesb[i]));
            }
            if(baddiffs>0) {
              printf(" - ERR! Diffs from %s in %d places\n", xbuf2,baddiffs);
              gbaddiffs+=baddiffs;
            } else {
              //printf("  OK.\n");
            }
            //            printf("\n");

            

#if USE_CXX
            delete col;
#else
            ucol_close(col);
#endif

            oldLen = len;
            memcpy(oldBytes, bytes, len);
        }
    }

    if(diffs==0) {
#if (U_ICU_VERSION_MAJOR_NUM < 49)
      printf("ERROR: 0 differences found between platforms. ICU " U_ICU_VERSION " does not support collator plugins properly (not until 49)\n");
#else
      printf("ERROR: 0 differences found between platforms.. are the platforms installed? Try 'icuinfo -L'\n");
#endif
      return 1;
    } else {
      printf("%d differences found among provider versions!\n", diffs);
    }

    if(gbaddiffs>0) {
      printf("ERROR: %d diffs found between a collator and it's reopened (from shortstring) variant.\n", gbaddiffs);
      return 2;
    } else {
      printf("Collator and reopened (shortstring) are OK.\n");
    }

    printf("Success!\n");
    
    return 0;
}
Beispiel #21
0
static bool HHVM_METHOD(Collator, sortWithSortKeys, VRefParam arr) {
  FETCH_COL(data, this_, false);
  data->clearError();

  if (!arr.isArray()) {
    return true;
  }

  Array hash = arr.toArray();
  if (hash.size() == 0) {
    return true;
  }

  // Preallocate sort keys buffer
  size_t sortKeysOffset = 0;
  size_t sortKeysLength = DEF_SORT_KEYS_BUF_SIZE;
  char*  sortKeys = (char*)smart_malloc(sortKeysLength);
  if (!sortKeys) {
    throw Exception("Out of memory");
  }
  SCOPE_EXIT{ smart_free(sortKeys); };

  // Preallocate index buffer
  size_t sortIndexPos = 0;
  size_t sortIndexLength = DEF_SORT_KEYS_INDX_BUF_SIZE;
  auto   sortIndex = (collator_sort_key_index_t*)smart_malloc(
                  sortIndexLength * sizeof(collator_sort_key_index_t));
  if (!sortIndex) {
    throw Exception("Out of memory");
  }
  SCOPE_EXIT{ smart_free(sortIndex); };

  // Translate input hash to sortable index
  auto pos_limit = hash->iter_end();
  for (ssize_t pos = hash->iter_begin(); pos != pos_limit;
       pos = hash->iter_advance(pos)) {
    Variant val(hash->getValue(pos));

    // Convert to UTF16
    icu::UnicodeString strval;
    if (val.isString()) {
      UErrorCode error = U_ZERO_ERROR;
      strval = u16(val.toString(), error);
      if (U_FAILURE(error)) {
        return false;
      }
     }

    // Generate sort key
    int sortkey_len =
      ucol_getSortKey(data->collator(),
                      strval.getBuffer(), strval.length(),
                      (uint8_t*)(sortKeys + sortKeysOffset),
                      sortKeysLength - sortKeysOffset);

    // Check for key buffer overflow
    if (sortkey_len > (sortKeysLength - sortKeysOffset)) {
      int32_t inc = (sortkey_len > DEF_SORT_KEYS_BUF_INCREMENT)
                  ?  sortkey_len : DEF_SORT_KEYS_BUF_INCREMENT;
      sortKeysLength += inc;
      sortKeys = (char*)smart_realloc(sortKeys, sortKeysLength);
      if (!sortKeys) {
        throw Exception("Out of memory");
      }
      sortkey_len =
        ucol_getSortKey(data->collator(),
                        strval.getBuffer(), strval.length(),
                        (uint8_t*)(sortKeys + sortKeysOffset),
                        sortKeysLength - sortKeysOffset);
      assert(sortkey_len <= (sortKeysLength - sortKeysOffset));
    }

    // Check for index buffer overflow
    if ((sortIndexPos + 1) > sortIndexLength) {
      sortIndexLength += DEF_SORT_KEYS_INDX_BUF_INCREMENT;
      sortIndex = (collator_sort_key_index_t*)smart_realloc(sortIndex,
                      sortIndexLength * sizeof(collator_sort_key_index_t));
      if (!sortIndex) {
        throw Exception("Out of memory");
      }
    }

    // Initially store offset into buffer, update later to deal with reallocs
    sortIndex[sortIndexPos].key = (char*)sortKeysOffset;
    sortKeysOffset += sortkey_len;

    sortIndex[sortIndexPos].valPos = pos;
    ++sortIndexPos;
  }

  // Update keys to location in realloc'd buffer
  for (int i = 0; i < sortIndexPos; ++i) {
    sortIndex[i].key = sortKeys + (ptrdiff_t)sortIndex[i].key;
  }

  zend_qsort(sortIndex, sortIndexPos,
             sizeof(collator_sort_key_index_t),
             collator_cmp_sort_keys, nullptr);

  Array ret = Array::Create();
  for (int i = 0; i < sortIndexPos; ++i) {
    ret.append(hash->getValue(sortIndex[i].valPos));
  }
  arr = ret;
  return true;
}
Beispiel #22
0
void currTest()
{
    /* All the currency symbols, in UCA order*/
    static const UChar currency[][2] =
    {
      { 0x00A4, 0x0000}, /*00A4; L; [14 36, 03, 03]    # [082B.0020.0002] # CURRENCY SIGN*/
      { 0x00A2, 0x0000}, /*00A2; L; [14 38, 03, 03]    # [082C.0020.0002] # CENT SIGN*/
      { 0xFFE0, 0x0000}, /*FFE0; L; [14 38, 03, 05]    # [082C.0020.0003] # FULLWIDTH CENT SIGN*/
      { 0x0024, 0x0000}, /*0024; L; [14 3A, 03, 03]    # [082D.0020.0002] # DOLLAR SIGN*/
      { 0xFF04, 0x0000}, /*FF04; L; [14 3A, 03, 05]    # [082D.0020.0003] # FULLWIDTH DOLLAR SIGN*/
      { 0xFE69, 0x0000}, /*FE69; L; [14 3A, 03, 1D]    # [082D.0020.000F] # SMALL DOLLAR SIGN*/
      { 0x00A3, 0x0000}, /*00A3; L; [14 3C, 03, 03]    # [082E.0020.0002] # POUND SIGN*/
      { 0xFFE1, 0x0000}, /*FFE1; L; [14 3C, 03, 05]    # [082E.0020.0003] # FULLWIDTH POUND SIGN*/
      { 0x00A5, 0x0000}, /*00A5; L; [14 3E, 03, 03]    # [082F.0020.0002] # YEN SIGN*/
      { 0xFFE5, 0x0000}, /*FFE5; L; [14 3E, 03, 05]    # [082F.0020.0003] # FULLWIDTH YEN SIGN*/
      { 0x09F2, 0x0000}, /*09F2; L; [14 40, 03, 03]    # [0830.0020.0002] # BENGALI RUPEE MARK*/
      { 0x09F3, 0x0000}, /*09F3; L; [14 42, 03, 03]    # [0831.0020.0002] # BENGALI RUPEE SIGN*/
      { 0x0E3F, 0x0000}, /*0E3F; L; [14 44, 03, 03]    # [0832.0020.0002] # THAI CURRENCY SYMBOL BAHT*/
      { 0x17DB, 0x0000}, /*17DB; L; [14 46, 03, 03]    # [0833.0020.0002] # KHMER CURRENCY SYMBOL RIEL*/
      { 0x20A0, 0x0000}, /*20A0; L; [14 48, 03, 03]    # [0834.0020.0002] # EURO-CURRENCY SIGN*/
      { 0x20A1, 0x0000}, /*20A1; L; [14 4A, 03, 03]    # [0835.0020.0002] # COLON SIGN*/
      { 0x20A2, 0x0000}, /*20A2; L; [14 4C, 03, 03]    # [0836.0020.0002] # CRUZEIRO SIGN*/
      { 0x20A3, 0x0000}, /*20A3; L; [14 4E, 03, 03]    # [0837.0020.0002] # FRENCH FRANC SIGN*/
      { 0x20A4, 0x0000}, /*20A4; L; [14 50, 03, 03]    # [0838.0020.0002] # LIRA SIGN*/
      { 0x20A5, 0x0000}, /*20A5; L; [14 52, 03, 03]    # [0839.0020.0002] # MILL SIGN*/
      { 0x20A6, 0x0000}, /*20A6; L; [14 54, 03, 03]    # [083A.0020.0002] # NAIRA SIGN*/
      { 0x20A7, 0x0000}, /*20A7; L; [14 56, 03, 03]    # [083B.0020.0002] # PESETA SIGN*/
      { 0x20A9, 0x0000}, /*20A9; L; [14 58, 03, 03]    # [083C.0020.0002] # WON SIGN*/
      { 0xFFE6, 0x0000}, /*FFE6; L; [14 58, 03, 05]    # [083C.0020.0003] # FULLWIDTH WON SIGN*/
      { 0x20AA, 0x0000}, /*20AA; L; [14 5A, 03, 03]    # [083D.0020.0002] # NEW SHEQEL SIGN*/
      { 0x20AB, 0x0000}, /*20AB; L; [14 5C, 03, 03]    # [083E.0020.0002] # DONG SIGN*/
      { 0x20AC, 0x0000}, /*20AC; L; [14 5E, 03, 03]    # [083F.0020.0002] # EURO SIGN*/
      { 0x20AD, 0x0000}, /*20AD; L; [14 60, 03, 03]    # [0840.0020.0002] # KIP SIGN*/
      { 0x20AE, 0x0000}, /*20AE; L; [14 62, 03, 03]    # [0841.0020.0002] # TUGRIK SIGN*/
      { 0x20AF, 0x0000}, /*20AF; L; [14 64, 03, 03]    # [0842.0020.0002] # DRACHMA SIGN*/
    };

#if 0
    /* All the currency symbols, in collation order*/
    static const UChar currency[][2] =
    {
        { 0x00a4, 0x0000}, /* generic currency*/
        { 0x0e3f, 0x0000}, /* baht*/
        { 0x00a2, 0x0000}, /* cent*/
        { 0x20a1, 0x0000}, /* colon*/
        { 0x20a2, 0x0000}, /* cruzeiro*/
        { 0x0024, 0x0000}, /* dollar */
        { 0x20ab, 0x0000}, /* dong */
        { 0x20ac, 0x0000}, /* euro */
        { 0x20a3, 0x0000}, /* franc */
        { 0x20a4, 0x0000}, /* lira */
        { 0x20a5, 0x0000}, /* mill */
        { 0x20a6, 0x0000}, /* naira */
        { 0x20a7, 0x0000}, /* peseta */
        { 0x00a3, 0x0000}, /* pound */
        { 0x20a8, 0x0000}, /* rupee */
        { 0x20aa, 0x0000}, /* shekel*/
        { 0x20a9, 0x0000}, /* won*/
        { 0x00a5, 0x0000}  /* yen*/
    };
#endif

    UChar source[2], target[2];
    int32_t i, j, sortklen;
    int res;
    UCollator *c;
    uint8_t *sortKey1, *sortKey2;
    UErrorCode status = U_ZERO_ERROR;
    UCollationResult compareResult, keyResult;
    UCollationResult expectedResult = UCOL_EQUAL;
    log_verbose("Testing currency of all locales\n");
    c = ucol_open("en_US", &status);
    if (U_FAILURE(status))
    {
        log_err_status(status, "collator open failed! :%s\n", myErrorName(status));
        return;
    }

    /*Compare each currency symbol against all the
     currency symbols, including itself*/
    for (i = 0; i < UPRV_LENGTHOF(currency); i += 1)
    {
        for (j = 0; j < UPRV_LENGTHOF(currency); j += 1)
        {
             u_strcpy(source, currency[i]);
             u_strcpy(target, currency[j]);
            
            if (i < j)
            {
                expectedResult = UCOL_LESS;
            }
            else if ( i == j)
            {
                expectedResult = UCOL_EQUAL;
            }
            else
            {
                expectedResult = UCOL_GREATER;
            }

            compareResult = ucol_strcoll(c, source, u_strlen(source), target, u_strlen(target));
            
            status = U_ZERO_ERROR;

            sortklen=ucol_getSortKey(c, source, u_strlen(source),  NULL, 0);
            sortKey1=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
            ucol_getSortKey(c, source, u_strlen(source), sortKey1, sortklen+1);

            sortklen=ucol_getSortKey(c, target, u_strlen(target),  NULL, 0);
            sortKey2=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
            ucol_getSortKey(c, target, u_strlen(target), sortKey2, sortklen+1);

            res = uprv_memcmp(sortKey1, sortKey2, sortklen);
            if (res < 0) keyResult = (UCollationResult)-1;
            else if (res > 0) keyResult = (UCollationResult)1;
            else keyResult = (UCollationResult)0;
            
            reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResult, expectedResult );

            free(sortKey1);
            free(sortKey2);

        }
    }
    ucol_close(c);
}
Beispiel #23
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
#if 0
    /*
     * Starting with ICU 53, setting the variable top via a pseudo relation string
     * is not supported any more.
     * It was replaced by the [maxVariable symbol] setting.
     * See ICU tickets #9958 and #8032.
     */
    static const char       str[]          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      rules[sizeof(str)];
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    if (U_FAILURE(status)) {
        log_err_status(status, "ERROR: in creation of collator :%s\n", 
                myErrorName(status));
        return;
    }
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        ucol_close(enCollation);
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
#endif
}
Beispiel #24
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
    const char       *str          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      *rules;
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    free(rules);
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
}
Beispiel #25
0
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
{
    int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
    int temp=0, gSortklen1=0,gSortklen2=0;
    UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
    uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
    uint32_t sLen = u_strlen(source);
    uint32_t tLen = u_strlen(target);
    char buffer[256];
    uint32_t len;
    UErrorCode status = U_ZERO_ERROR;
    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);

    UCharIterator sIter, tIter;
    uiter_setString(&sIter, source, sLen);
    uiter_setString(&tIter, target, tLen);
    compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    if(compareResultIter != result) {
      log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
    }

    /* convert the strings to UTF-8 and do try comparing with char iterator */
    if(QUICK <= 0) { /*!QUICK*/
      char utf8Source[256], utf8Target[256];
      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
      u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
      if(U_FAILURE(status)) { /* probably buffer is not big enough */
        log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
      } else {
        u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
          UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result;
          /*UCharIterator sIter, tIter;*/
          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
       /*uiter_setString(&sIter, source, sLen);
      uiter_setString(&tIter, target, tLen);*/
          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          sIter.move(&sIter, 0, UITER_START);
          tIter.move(&tIter, 0, UITER_START);
          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(compareResultUTF8 != compareResultIter) {
            log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
          }
          if(compareResultUTF8 != compareResultUTF8Norm) {
            log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
          }
        } else {
          log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
        }
        if(U_FAILURE(status)) {
          log_verbose("UTF-8 strcoll failed! Ignoring result\n");
        }
      }
    }

    /* testing the partial sortkeys */
    if(1) { /*!QUICK*/
      int32_t i = 0;
      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
      int32_t partialSizesSize = 1;
      if(QUICK <= 0) {
        partialSizesSize = 7;
      }
      /*log_verbose("partial sortkey test piecesize=");*/
      for(i = 0; i < partialSizesSize; i++) {
        UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
        /*log_verbose("%i ", partialSizes[i]);*/

        partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
        if(partialSKResult != result) {
          log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", 
            partialSKResult, result,
            aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
        }

        if(QUICK <= 0 && norm != UCOL_ON) {
          /*log_verbose("N ");*/
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(partialSKResult != partialNormalizedSKResult) {
            log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", 
              aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
          }
        }
      }
      /*log_verbose("\n");*/
    }

    
    compareResult  = ucol_strcoll(myCollation, source, sLen, target, tLen);
    compareResulta = ucol_strcoll(myCollation, source, -1,   target, -1); 
    if (compareResult != compareResulta) {
        log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
    }

    sortklen1=ucol_getSortKey(myCollation, source, sLen,  NULL, 0);
    sortklen2=ucol_getSortKey(myCollation, target, tLen,  NULL, 0);

    sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
    sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);

    sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, source, sLen, sortKey1,  sortklen1+1);
    ucol_getSortKey(myCollation, source, -1,   sortKey1a, sortklen1+1);
    
    sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, target, tLen, sortKey2,  sortklen2+1);
    ucol_getSortKey(myCollation, target, -1,   sortKey2a, sortklen2+1);

    /* Check that sort key generated with null terminated string is identical  */
    /*  to that generted with a length specified.                              */
    if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
        uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
        log_err("Sort Keys from null terminated and explicit length strings differ.\n");
    }

    /*memcmp(sortKey1, sortKey2,sortklenmax);*/
    temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
    gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
    gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
    if(sortklen1 != gSortklen1){
        log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
    }
    if(sortklen2!= gSortklen2){
        log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
    }

    if(temp < 0) {
        keyResult=UCOL_LESS;
    }
    else if(temp > 0) {
        keyResult= UCOL_GREATER;
    }
    else {
        keyResult = UCOL_EQUAL;
    }
    reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
    free(sortKey1);
    free(sortKey2);
    free(sortKey1a);
    free(sortKey2a);

}
/**
 * This function is invoked as:
 *
 *  _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>,
 *             <use_token_index>, <data_tag>)
 *
 * If <use_token_index> is omitted, it is treated as 0.
 * If <data_tag> is omitted, it is treated as NULL.
 *
 * It will split <data> on each instance of <delimiter> and insert each token
 * into <token_table>. The following columns in <token_table> are used:
 * token TEXT, source INTEGER, token_index INTEGER, tag (any type)
 * The token_index column is not required if <use_token_index> is 0.
 * The tag column is not required if <data_tag> is NULL.
 *
 * One row is inserted for each token in <data>.
 * In each inserted row, 'source' is <data_row_id>.
 * In the first inserted row, 'token' is the hex collation key of
 * the entire <data> string, and 'token_index' is 0.
 * In each row I (where 1 <= I < N, and N is the number of tokens in <data>)
 * 'token' will be set to the hex collation key of the I:th token (0-based).
 * If <use_token_index> != 0, 'token_index' is set to I.
 * If <data_tag> is not NULL, 'tag' is set to <data_tag>.
 *
 * In other words, there will be one row for the entire string,
 * and one row for each token except the first one.
 *
 * The function returns the number of tokens generated.
 */
static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
{
    //ALOGD("enter tokenize");
    int err;
    int useTokenIndex = 0;
    int useDataTag = 0;

    if (!(argc >= 4 || argc <= 6)) {
        ALOGE("Tokenize requires 4 to 6 arguments");
        sqlite3_result_null(context);
        return;
    }

    if (argc > 4) {
        useTokenIndex = sqlite3_value_int(argv[4]);
    }

    if (argc > 5) {
        useDataTag = (sqlite3_value_type(argv[5]) != SQLITE_NULL);
    }

    sqlite3 * handle = sqlite3_context_db_handle(context);
    UCollator* collator = (UCollator*)sqlite3_user_data(context);
    char const * tokenTable = (char const *)sqlite3_value_text(argv[0]);
    if (tokenTable == NULL) {
        ALOGE("tokenTable null");
        sqlite3_result_null(context);
        return;
    }

    // Get or create the prepared statement for the insertions
    sqlite3_stmt * statement = (sqlite3_stmt *)sqlite3_get_auxdata(context, 0);
    if (!statement) {
        char const * tokenIndexCol = useTokenIndex ? ", token_index" : "";
        char const * tokenIndexParam = useTokenIndex ? ", ?" : "";
        char const * dataTagCol = useDataTag ? ", tag" : "";
        char const * dataTagParam = useDataTag ? ", ?" : "";
        char * sql = sqlite3_mprintf("INSERT INTO %s (token, source%s%s) VALUES (?, ?%s%s);",
                tokenTable, tokenIndexCol, dataTagCol, tokenIndexParam, dataTagParam);
        err = sqlite3_prepare_v2(handle, sql, -1, &statement, NULL);
        sqlite3_free(sql);
        if (err) {
            ALOGE("prepare failed");
            sqlite3_result_null(context);
            return;
        }
        // This binds the statement to the table it was compiled against, which is argv[0].
        // If this function is ever called with a different table the finalizer will be called
        // and sqlite3_get_auxdata() will return null above, forcing a recompile for the new table.
        sqlite3_set_auxdata(context, 0, statement, tokenize_auxdata_delete);
    } else {
        // Reset the cached statement so that binding the row ID will work properly
        sqlite3_reset(statement);
    }

    // Bind the row ID of the source row
    int64_t rowID = sqlite3_value_int64(argv[1]);
    err = sqlite3_bind_int64(statement, 2, rowID);
    if (err != SQLITE_OK) {
        ALOGE("bind failed");
        sqlite3_result_null(context);
        return;
    }

    // Bind <data_tag> to the tag column
    if (useDataTag) {
        int dataTagParamIndex = useTokenIndex ? 4 : 3;
        err = sqlite3_bind_value(statement, dataTagParamIndex, argv[5]);
        if (err != SQLITE_OK) {
            ALOGE("bind failed");
            sqlite3_result_null(context);
            return;
        }
    }

    // Get the raw bytes for the string to tokenize
    // the string will be modified by following code
    // however, sqlite did not reuse the string, so it is safe to not dup it
    UChar * origData = (UChar *)sqlite3_value_text16(argv[2]);
    if (origData == NULL) {
        sqlite3_result_null(context);
        return;
    }

    // Get the raw bytes for the delimiter
    const UChar * delim = (const UChar *)sqlite3_value_text16(argv[3]);
    if (delim == NULL) {
        ALOGE("can't get delimiter");
        sqlite3_result_null(context);
        return;
    }

    UChar * token = NULL;
    UChar *state;
    int numTokens = 0;

    do {
        if (numTokens == 0) {
            token = origData;
        }

        // Reset the program so we can use it to perform the insert
        sqlite3_reset(statement);
        UErrorCode status = U_ZERO_ERROR;
        char keybuf[1024];
        uint32_t result = ucol_getSortKey(collator, token, -1, (uint8_t*)keybuf, sizeof(keybuf)-1);
        if (result > sizeof(keybuf)) {
            // TODO allocate memory for this super big string
            ALOGE("ucol_getSortKey needs bigger buffer %d", result);
            break;
        }
        uint32_t keysize = result-1;
        uint32_t base16Size = keysize*2;
        char *base16buf = (char*)malloc(base16Size);
        base16Encode(base16buf, keybuf, keysize);
        err = sqlite3_bind_text(statement, 1, base16buf, base16Size, SQLITE_STATIC);

        if (err != SQLITE_OK) {
            ALOGE(" sqlite3_bind_text16 error %d", err);
            free(base16buf);
            break;
        }

        if (useTokenIndex) {
            err = sqlite3_bind_int(statement, 3, numTokens);
            if (err != SQLITE_OK) {
                ALOGE(" sqlite3_bind_int error %d", err);
                free(base16buf);
                break;
            }
        }

        err = sqlite3_step(statement);
        free(base16buf);

        if (err != SQLITE_DONE) {
            ALOGE(" sqlite3_step error %d", err);
            break;
        }
        numTokens++;
        if (numTokens == 1) {
            // first call
            u_strtok_r(origData, delim, &state);
        }
    } while ((token = u_strtok_r(NULL, delim, &state)) != NULL);
    sqlite3_result_int(context, numTokens);
}
Beispiel #27
0
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
                                         uint8_t *result, int32_t resultLength)
                                         const
{
    return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
}
NS_IMETHODIMP nsCollationMacUC::AllocateRawSortKey(int32_t strength, const nsAString& stringIn,
                                                   uint8_t** key, uint32_t* outLen)
{
  NS_ENSURE_TRUE(mInit, NS_ERROR_NOT_INITIALIZED);
  NS_ENSURE_ARG_POINTER(key);
  NS_ENSURE_ARG_POINTER(outLen);

  nsresult res = EnsureCollator(strength);
  NS_ENSURE_SUCCESS(res, res);

  uint32_t stringInLen = stringIn.Length();

  if (mUseICU) {
    const UChar* str = (const UChar*)PromiseFlatString(stringIn).get();

    int32_t keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, nullptr, 0);
    NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE);

    // Since key is freed elsewhere with PR_Free, allocate with PR_Malloc.
    uint8_t* newKey = (uint8_t*)PR_Malloc(keyLength + 1);
    if (!newKey) {
      return NS_ERROR_OUT_OF_MEMORY;
    }

    keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, newKey, keyLength + 1);
    NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE);

    *key = newKey;
    *outLen = keyLength;

    return NS_OK;
  }

  uint32_t maxKeyLen = (1 + stringInLen) * kCollationValueSizeFactor * sizeof(UCCollationValue);
  if (maxKeyLen > mBufferLen) {
    uint32_t newBufferLen = mBufferLen;
    do {
      newBufferLen *= 2;
    } while (newBufferLen < maxKeyLen);
    void* newBuffer = moz_malloc(newBufferLen);
    if (!newBuffer) {
      return NS_ERROR_OUT_OF_MEMORY;
    }

    if (mBuffer) {
      moz_free(mBuffer);
      mBuffer = nullptr;
    }
    mBuffer = newBuffer;
    mBufferLen = newBufferLen;
  }

  ItemCount actual;
  OSStatus err = ::UCGetCollationKey(mCollator, (const UniChar*) PromiseFlatString(stringIn).get(),
                                     (UniCharCount) stringInLen,
                                     (ItemCount) (mBufferLen / sizeof(UCCollationValue)),
                                     &actual, (UCCollationValue *)mBuffer);
  NS_ENSURE_TRUE((err == noErr), NS_ERROR_FAILURE);

  uint32_t keyLength = actual * sizeof(UCCollationValue);
  // Since key is freed elsewhere with PR_Free, allocate with PR_Malloc.
  void* newKey = PR_Malloc(keyLength);
  if (!newKey) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  memcpy(newKey, mBuffer, keyLength);
  *key = (uint8_t *)newKey;
  *outLen = keyLength;

  return NS_OK;
}
Beispiel #29
0
int32_t RuleBasedCollator::getSortKey(const UChar *source,
                                         int32_t sourceLength, uint8_t *result,
                                         int32_t resultLength) const
{
    return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
}
Beispiel #30
0
/**
  * Tests surrogate support.
  * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
  * Therefore, another (unassigned) code point was used for this test.
  */
static void TestSurrogates(void)
{
    const char       *str          = 
                              "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
          int         len          = strlen(str);
          int         rlen         = 0;
          UChar      *rules;
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[][4]    = 
          {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
          UChar       target[][4]    = 
          {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
          int         count        = 0;
          uint8_t enresult[20], myresult[20];
          int enlen, mylen;
          
    /* tests for open rules with surrogate rules */
    rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
    rlen = u_unescape(str, rules, len);
    
    enCollation = ucol_open("en_US", &status);
    myCollation = ucol_openRules(rules, rlen, UCOL_OFF, 
                                 UCOL_TERTIARY,NULL, &status);
    if (U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    /* 
    this test is to verify the supplementary sort key order in the english 
    collator
    */
    log_verbose("start of english collation supplementary characters test\n");
    while (count < 2) {
        doTest(enCollation, source[count], target[count], UCOL_LESS);
        count ++;
    }
    doTest(enCollation, source[count], target[count], UCOL_GREATER);
        
    log_verbose("start of tailored collation supplementary characters test\n");
    count = 0;
    /* tests getting collation elements for surrogates for tailored rules */
    while (count < 4) {
        doTest(myCollation, source[count], target[count], UCOL_LESS);
        count ++;
    }

    /* tests that \uD800\uDC02 still has the same value, not changed */
    enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
    mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
    if (enlen != mylen ||
        uprv_memcmp(enresult, myresult, enlen) != 0) {
        log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
    }

    free(rules);
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
}