static Variant HHVM_METHOD(Collator, getSortKey, const String& val) { FETCH_COL(data, this_, false); UErrorCode error = U_ZERO_ERROR; icu::UnicodeString strval(u16(val, error)); if (U_FAILURE(error)) { return false; } int sortkey_len = ucol_getSortKey(data->collator(), strval.getBuffer(), strval.length(), nullptr, 0); if (sortkey_len <= 0) { return false; } String ret(sortkey_len + 1, ReserveString); sortkey_len = ucol_getSortKey(data->collator(), strval.getBuffer(), strval.length(), (uint8_t*) ret.get()->mutableData(), ret.capacity() + 1); if (sortkey_len <= 0) { return false; } ret.setSize(sortkey_len); return ret; }
NS_IMETHODIMP nsCollationMacUC::AllocateRawSortKey(int32_t strength, const nsAString& stringIn, uint8_t** key, uint32_t* outLen) { NS_ENSURE_TRUE(mInit, NS_ERROR_NOT_INITIALIZED); NS_ENSURE_ARG_POINTER(key); NS_ENSURE_ARG_POINTER(outLen); nsresult res = EnsureCollator(strength); NS_ENSURE_SUCCESS(res, res); uint32_t stringInLen = stringIn.Length(); const UChar* str = (const UChar*)PromiseFlatString(stringIn).get(); int32_t keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, nullptr, 0); NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE); // Since key is freed elsewhere with PR_Free, allocate with PR_Malloc. uint8_t* newKey = (uint8_t*)PR_Malloc(keyLength + 1); if (!newKey) { return NS_ERROR_OUT_OF_MEMORY; } keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, newKey, keyLength + 1); NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE); *key = newKey; *outLen = keyLength; return NS_OK; }
//static jbyteArray NativeCollation_getSortKey(JNIEnv* env, jclass, jint address, jstring source0) { JNIEXPORT jbyteArray JNICALL Java_com_ibm_icu4jni_text_NativeCollation_getSortKey(JNIEnv* env, jclass, jint address, jstring source0) { ScopedJavaUnicodeString source(env, source0); const UCollator* collator = toCollator(address); uint8_t byteArray[UCOL_MAX_BUFFER * 2]; UniquePtr<uint8_t[]> largerByteArray; uint8_t* usedByteArray = byteArray; const UChar* chars = source.unicodeString().getBuffer(); size_t charCount = source.unicodeString().length(); size_t byteArraySize = ucol_getSortKey(collator, chars, charCount, usedByteArray, sizeof(byteArray) - 1); if (byteArraySize > sizeof(byteArray) - 1) { // didn't fit, try again with a larger buffer. largerByteArray.reset(new uint8_t[byteArraySize + 1]); usedByteArray = largerByteArray.get(); byteArraySize = ucol_getSortKey(collator, chars, charCount, usedByteArray, byteArraySize); } if (byteArraySize == 0) { return NULL; } jbyteArray result = env->NewByteArray(byteArraySize); env->SetByteArrayRegion(result, 0, byteArraySize, reinterpret_cast<jbyte*> (usedByteArray)); return result; }
// Collator.sort_key {{{ static PyObject * icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { int32_t sz = 0, key_size = 0, bsz = 0; UChar *buf = NULL; uint8_t *buf2 = NULL; PyObject *ans = NULL, *input = NULL; if (!PyArg_ParseTuple(args, "O", &input)) return NULL; buf = python_to_icu(input, &sz, 1); if (buf == NULL) return NULL; bsz = 7 * sz + 1; buf2 = (uint8_t*)calloc(bsz, sizeof(uint8_t)); if (buf2 == NULL) { PyErr_NoMemory(); goto end; } key_size = ucol_getSortKey(self->collator, buf, sz, buf2, bsz); if (key_size > bsz) { buf2 = realloc(buf2, (key_size + 1) * sizeof(uint8_t)); if (buf2 == NULL) { PyErr_NoMemory(); goto end; } key_size = ucol_getSortKey(self->collator, buf, sz, buf2, key_size + 1); } ans = PyBytes_FromStringAndSize((char*)buf2, key_size); end: if (buf != NULL) free(buf); if (buf2 != NULL) free(buf2); return ans; } // }}}
static int32_t sortkey_from_unicode (UChar *input, uint8_t **output) { UErrorCode status = U_ZERO_ERROR; UCollator * collator = ucol_open ("", &status); int32_t size; if (icu_failure (status)) return 0; ucol_setAttribute (collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); if (icu_failure (status)) return 0; *output = (uint8_t *) palloc (sizeof (uint8_t) * PREALLOC_SIZE); size = ucol_getSortKey (collator, input, -1, *output, PREALLOC_SIZE); if (size > PREALLOC_SIZE) { pfree (*output); *output = (uint8_t *) palloc (sizeof (uint8_t) * size); ucol_getSortKey (collator, input, -1, *output, size); } ucol_close (collator); if (size < 1) { ereport(ERROR, (errmsg("ICU sortkey is zero"))); } return size; }
static void TestJB581(void) { UChar dispName [100]; int32_t bufferLen = 0; UChar source [100]; UChar target [100]; UCollationResult result = UCOL_EQUAL; uint8_t sourceKeyArray [100]; uint8_t targetKeyArray [100]; int32_t sourceKeyOut = 0, targetKeyOut = 0; UCollator *myCollator = 0; UErrorCode status = U_ZERO_ERROR; /*u_uastrcpy(source, "This is a test.");*/ /*u_uastrcpy(target, "THISISATEST.");*/ u_uastrcpy(source, "THISISATEST."); u_uastrcpy(target, "Thisisatest."); myCollator = ucol_open("en_US", &status); if (U_FAILURE(status)){ bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status); /*Report the error with display name... */ log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName); return; } result = ucol_strcoll(myCollator, source, -1, target, -1); /* result is 1, secondary differences only for ignorable space characters*/ if (result != 1) { log_err("Comparing two strings with only secondary differences in C failed.\n"); } /* To compare them with just primary differences */ ucol_setStrength(myCollator, UCOL_PRIMARY); result = ucol_strcoll(myCollator, source, -1, target, -1); /* result is 0 */ if (result != 0) { log_err("Comparing two strings with no differences in C failed.\n"); } /* Now, do the same comparison with keys */ sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100); targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100); bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut); if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0) { log_err("Comparing two strings with sort keys in C failed.\n"); } ucol_close(myCollator); }
int helper_collation_str(const char *src, char *dest, int dest_size) { HELPER_FN_CALL; int32_t size = 0; UErrorCode status = 0; UChar tmp_result[CTS_SQL_MAX_LEN]; UCollator *collator; const char *region; region = vconf_get_str(VCONFKEY_REGIONFORMAT); HELPER_DBG("region %s", region); collator = ucol_open(region, &status); h_retvm_if(U_FAILURE(status), CTS_ERR_ICU_FAILED, "ucol_open() Failed(%s)", u_errorName(status)); if (U_FAILURE(status)){ ERR("ucol_setAttribute Failed(%s)", u_errorName(status)); ucol_close(collator); return CTS_ERR_ICU_FAILED; } u_strFromUTF8(tmp_result, array_sizeof(tmp_result), NULL, src, -1, &status); if (U_FAILURE(status)){ ERR("u_strFromUTF8 Failed(%s)", u_errorName(status)); ucol_close(collator); return CTS_ERR_ICU_FAILED; } size = ucol_getSortKey(collator, tmp_result, -1, (uint8_t *)dest, dest_size); ucol_close(collator); dest[size]='\0'; return CTS_SUCCESS; }
/** * call-seq: * collator.sort_key(an_ustring) -> String * * Get a sort key for a string from a UCollator. Sort keys may be compared using strcmp. **/ VALUE icu4r_col_sort_key(VALUE self, VALUE str) { int32_t needed , capa ; char * buffer ; VALUE ret; Check_Class(str, rb_cUString); capa = ICU_LEN(str); buffer = ALLOC_N(char, capa); needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, capa); if(needed > capa){ REALLOC_N(buffer,char, needed); needed = ucol_getSortKey(UCOLLATOR(self), ICU_PTR(str), ICU_LEN(str), buffer, needed); } ret = rb_str_new(buffer, needed); free(buffer); return ret; }
// Collator.sort_key {{{ static PyObject * icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { char *input; int32_t sz; UChar *buf; uint8_t *buf2; PyObject *ans; int32_t key_size; UErrorCode status = U_ZERO_ERROR; if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL; sz = (int32_t)strlen(input); buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); if (buf == NULL) return PyErr_NoMemory(); u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status); PyMem_Free(input); if (U_SUCCESS(status)) { buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t)); if (buf2 == NULL) return PyErr_NoMemory(); key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1); if (key_size == 0) { ans = PyBytes_FromString(""); } else { if (key_size >= 7*sz+1) { free(buf2); buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t)); if (buf2 == NULL) return PyErr_NoMemory(); ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1); } ans = PyBytes_FromString((char *)buf2); } free(buf2); } else ans = PyBytes_FromString(""); free(buf); if (ans == NULL) return PyErr_NoMemory(); return ans; } // }}}
virtual void run() { //sleep(10000); int32_t line = 0; uint8_t sk1[1024], sk2[1024]; uint8_t *oldSk = NULL, *newSk = sk1; int32_t resLen = 0, oldLen = 0; int32_t i = 0; for(i = 0; i < noLines; i++) { resLen = ucol_getSortKey(coll, lines[i].buff, lines[i].buflen, newSk, 1024); int32_t res = 0, cmpres = 0, cmpres2 = 0; if(oldSk != NULL) { res = strcmp((char *)oldSk, (char *)newSk); cmpres = ucol_strcoll(coll, lines[i-1].buff, lines[i-1].buflen, lines[i].buff, lines[i].buflen); cmpres2 = ucol_strcoll(coll, lines[i].buff, lines[i].buflen, lines[i-1].buff, lines[i-1].buflen); //cmpres = res; //cmpres2 = -cmpres; if(cmpres != -cmpres2) { error("Compare result not symmetrical on line "+ line); break; } if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) { error(UnicodeString("Difference between ucol_strcoll and sortkey compare on line ")+ UnicodeString(line)); break; } if(res > 0) { error(UnicodeString("Line %i is not greater or equal than previous line ")+ UnicodeString(i)); break; } else if(res == 0) { /* equal */ res = u_strcmpCodePointOrder(lines[i-1].buff, lines[i].buff); if (res == 0) { error(UnicodeString("Probable error in test file on line %i (comparing identical strings)")+ UnicodeString(i)); break; } /* * UCA 6.0 test files can have lines that compare == if they are * different strings but canonically equivalent. else if (res > 0) { error(UnicodeString("Sortkeys are identical, but code point compare gives >0 on line ")+ UnicodeString(i)); break; } */ } } oldSk = newSk; oldLen = resLen; newSk = (newSk == sk1)?sk2:sk1; } }
// Very simple example code - sticks a sortkey in the buffer // Not much error checking int32_t getSortKey_current(const char *locale, const UChar *string, int32_t sLen, uint8_t *buffer, int32_t bLen) { UErrorCode status = U_ZERO_ERROR; UCollator *coll = ucol_open(locale, &status); if(U_FAILURE(status)) { return -1; } int32_t result = ucol_getSortKey(coll, string, sLen, buffer, bLen); ucol_close(coll); return result; }
static jbyteArray NativeCollation_getSortKey(JNIEnv* env, jclass, jlong address, jstring javaSource) { ScopedStringChars source(env, javaSource); if (source.get() == NULL) { return NULL; } const UCollator* collator = toCollator(address); uint8_t byteArray[UCOL_MAX_BUFFER * 2]; UniquePtr<uint8_t[]> largerByteArray; uint8_t* usedByteArray = byteArray; size_t byteArraySize = ucol_getSortKey(collator, source.get(), source.size(), usedByteArray, sizeof(byteArray) - 1); if (byteArraySize > sizeof(byteArray) - 1) { // didn't fit, try again with a larger buffer. largerByteArray.reset(new uint8_t[byteArraySize + 1]); usedByteArray = largerByteArray.get(); byteArraySize = ucol_getSortKey(collator, source.get(), source.size(), usedByteArray, byteArraySize); } if (byteArraySize == 0) { return NULL; } jbyteArray result = env->NewByteArray(byteArraySize); env->SetByteArrayRegion(result, 0, byteArraySize, reinterpret_cast<jbyte*>(usedByteArray)); return result; }
/* * call-seq: * string.unicode_sort_key -> string * * Returns a string that will sort according to the Unicode collation algorithm. * */ static VALUE unicode_sort_key(VALUE string) { char str[BUF_SIZE]; UChar ustr[BUF_SIZE]; int32_t len = 0; int32_t ulen = 0; UErrorCode status = U_ZERO_ERROR; UCollator *col; to_utf16(string, ustr, &ulen); col = ucol_open("en_US", &status); if (U_SUCCESS(status)) { len = ucol_getSortKey(col, ustr, ulen, (uint8_t*)str, BUF_SIZE); ucol_close(col); } return rb_str_new(str, len - 1); }
extern "C" int32_t GetSortKey(SortHandle* pSortHandle, const UChar* lpStr, int32_t cwStrLength, uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options) { UErrorCode err = U_ZERO_ERROR; const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err); int32_t result = 0; if (U_SUCCESS(err)) { result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength); } return result; }
extern "C" int32_t GetSortKey(const char* lpLocaleName, const UChar* lpStr, int32_t cwStrLength, uint8_t* sortKey, int32_t cbSortKeyLength, int32_t options) { UErrorCode err = U_ZERO_ERROR; UCollator* pColl = GetCollatorForLocaleAndOptions(lpLocaleName, options, &err); int32_t result = 0; if (U_SUCCESS(err)) { result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength); ucol_close(pColl); } return result; }
MojErr MojDbTextCollator::sortKey(const UChar* chars, MojSize size, MojDbKey& keyOut) const { LOG_TRACE("Entering function %s", __FUNCTION__); MojErr err = MojErrNone; MojObjectWriter writer; if (size == 0) { err = writer.stringValue(_T(""), 0); MojErrCheck(err); } else { // get sort key MojInt32 destCapacity = 0; MojInt32 destLength = 0; MojDbKey::ByteVec vec; err = vec.resize(size * 3); MojErrCheck(err); do { MojByte* dest = NULL; err = vec.begin(dest); MojErrCheck(err); destCapacity = (MojInt32) vec.size(); destLength = ucol_getSortKey(m_ucol, chars, (MojInt32) size, dest, destCapacity); if (destLength == 0) { MojErrThrow(MojErrDbUnicode); } err = vec.resize(destLength); MojErrCheck(err); } while (destLength > destCapacity); // write it MojAssert(vec.size() >= 1 && vec.back() == _T('\0')); err = writer.stringValue((const MojChar*) vec.begin(), vec.size() - 1); MojErrCheck(err); } err = keyOut.assign(writer.buf()); MojErrCheck(err); return MojErrNone; }
static void TestGetSortKey() { /* This is meant to test a buffer reallocation crash while using French secondary sorting with a large buffer. The fact that Japanese characters are used is irrelevant. */ static const UChar pucUTF16[] = { 0x3049,0x30b9,0x3088,0xfffd,0xfffd,0x308f,0xfffd,0x3042, 0xfffd,0xfffd,0x305e,0xfffd,0x30b6,0x30bb,0x305b,0x30b1, 0x3050,0x30af,0x304e,0x30bd,0xfffd,0x30c6,0xfffd,0xfffd, 0x30e1,0xfffd,0xfffd,0x30d9,0xfffd,0x3092,0x3075,0x304a, 0x3074,0x3070,0x30f5,0x30c4,0x306e,0x30df,0x3053,0xfffd, 0x30a6,0x30b6,0x30e0,0xfffd,0x30bc,0x30ef,0x3087,0x30cc, 0x305f,0x30de,0xfffd,0x3090,0x3063,0x30dc,0x30b6,0x30b9, 0x30d2,0x3072,0x3061,0xfffd,0xfffd,0xfffd,0x307b,0x3092, 0x30a5,0x30a9,0x30b1,0x30e7,0xfffd,0xfffd,0xfffd,0xfffd, 0xfffd,0x305e,0xfffd,0x30c7,0x30ae,0x305b,0x308b,0x30c0, 0x30f5,0xfffd,0xfffd,0xfffd,0x307d,0x304e,0xfffd,0xfffd, 0x30c0,0x30c8,0x306f,0x307a,0x30dd,0x30e4,0x3084,0xfffd, 0x308c,0x30f1,0xfffd,0x30c6,0xfffd,0x307a,0xfffd,0x3052, 0x3056,0x305d,0x30b7,0xfffd,0x305b,0x30b0,0x30b9,0xfffd, 0x30b2,0x306d,0x3044,0xfffd,0x3073,0xfffd,0x30be,0x30cf, 0x3080,0xfffd,0x30a8,0x30f5,0x30a5,0x30c7,0x307c,0xfffd, 0x30d1,0x305f,0x30b2,0xfffd,0x3053,0x30ca,0xfffd,0x30dd, 0x3058,0x30c0,0x305d,0x30e1,0xfffd,0x30bb,0x305f,0x30d1, 0x30f2,0x3058,0x3086,0x30ce,0x30db,0x30cb,0x30e9,0xfffd, 0x308c,0xfffd,0xfffd,0x30af,0x30c4,0x3076,0x304c,0x30f5, 0x30e8,0x308c,0xfffd,0x30e2,0x3073,0x30a3,0x304e,0x30ea, 0xfffd,0x304f,0xfffd,0x306c,0x3044,0xfffd,0xfffd,0x30c9, 0xfffd,0x30f5,0xfffd,0xfffd,0xfffd,0x30eb,0x30a8,0xfffd, 0x306d,0x307d,0x30d8,0x3069,0xfffd,0xfffd,0x3086,0x30a9, 0xfffd,0x3076,0x30e9,0x30cc,0x3074,0x30e0,0xfffd,0xfffd, 0xfffd,0x30f0,0x3086,0x30ac,0x3076,0x3068,0x30c7,0xfffd, 0x30b7,0x30d2,0x3048,0x308e,0x30e8,0x30d9,0x30ce,0x30d0, 0x308b,0x30ee,0x30e6,0x3079,0x30f3,0x30af,0xfffd,0x3079, 0xfffd,0xfffd,0x30ca,0x30bf,0xfffd,0x30b5,0xfffd,0xfffd, 0x3093,0xfffd,0x30ba,0xfffd,0x3076,0x3047,0x304a,0xfffd, 0xfffd,0x3086,0xfffd,0x3081,0xfffd,0x30f6,0x3066,0xfffd, 0xfffd,0x30b6,0x30ef,0x30e2,0x30bf,0xfffd,0x3053,0x304a, 0xfffd,0xfffd,0x304a,0x30e8,0xfffd,0x30e2,0xfffd,0xfffd, 0x305c,0x3081,0x30c6,0xfffd,0x3091,0x3046,0x306a,0x3059, 0xfffd,0xfffd,0x30dd,0x30d1,0x308a,0x30ee,0xfffd,0xfffd, 0x308a,0x3042,0x30da,0xfffd,0x3064,0x30ef,0x305c,0x306b, 0xfffd,0x30ca,0x3085,0x3067,0x30ea,0x30c2,0x30c8,0xfffd, 0x30f5,0xfffd,0xfffd,0xfffd,0x30ca,0xfffd,0x3050,0x30f1, 0x3050,0x3053,0x3072,0xfffd,0xfffd,0xfffd,0x3074,0xfffd, 0x304b,0x30dd,0x306d,0xfffd,0x3049,0x30a1,0x30cc,0x30de, 0x30ae,0x307b,0x308a,0xfffd,0x3065,0xfffd,0xfffd,0x30c0, 0xfffd,0x3048,0x30dc,0x304f,0x3085,0x3059,0x304b,0x30d3, 0x30eb,0x30a4,0x3073,0xfffd,0x30ba,0x308f,0x30a7,0x30c3, 0x3074,0x30cf,0x306c,0x3053,0x30c0,0xfffd,0x3066,0xfffd, 0x308f,0xfffd,0x30b5,0xfffd,0x3092,0x30c4,0xfffd,0x30d6, 0x3056,0x30ad,0x30d2,0x30ba,0xfffd,0x30e6,0x304c,0x3088, 0x30b6,0x3048,0x3077,0x30d1,0xfffd,0x3050,0xfffd,0x3042, 0xfffd,0xfffd,0x308f,0xfffd,0x30c1,0xfffd,0x3074,0x3061, 0x3056,0x30e5,0xfffd,0xfffd,0x3057,0xfffd,0xfffd,0xfffd, 0xfffd,0x30bd,0x30b3,0x30ee,0xfffd,0x30f2,0x3084,0x3050, 0xfffd,0x30e7,0xfffd,0xfffd,0x3060,0x3049,0x30f2,0x30ad, 0x30bf,0x30f1,0x30a2,0xfffd,0x30af,0xfffd,0x3060,0x30a1, 0x30e9,0x30c3,0xfffd,0x3072,0x3093,0x3070,0xfffd,0x308f, 0x3060,0xfffd,0x3067,0x306f,0x3082,0x308b,0x3051,0xfffd, 0x3058,0xfffd,0xfffd,0x30a8,0x3051,0x3054,0x30ad,0x30f0, 0x3053,0xfffd,0x30e1,0x30d7,0x308d,0x307f,0x30be,0x30b0, 0xfffd,0x30db,0xfffd,0x30d1,0xfffd,0x3054,0x30a5,0xfffd, 0x306a,0xfffd,0x305c,0xfffd,0x3052,0x3088,0xfffd,0x306e, 0xfffd,0x30a9,0x30a1,0x30b4,0x3083,0x30bd,0xfffd,0xfffd, 0x306a,0x3070,0x30cd,0xfffd,0x3072,0x30ed,0x30c6,0x30be, 0x30c4,0x305e,0x30b3,0x30e1,0x308a,0xfffd,0x305b,0xfffd, 0x3042,0x3088,0xfffd,0x304c,0xfffd,0x3089,0x3071,0xfffd, 0xfffd,0x30c6,0x3062,0x3079,0xfffd,0x304b,0x304a,0xfffd, 0x30ad,0x3045,0x3045,0x3087,0xfffd,0x306a,0x308b,0x0000, 0x30bd,0x3065,0x30b8,0x3086,0x30d3,0x3076,0xfffd,0xfffd, 0x308f,0x3053,0x307c,0x3053,0x3084,0x30ae,0x30c4,0x3045, 0x30a8,0x30d0,0x30e1,0x308c,0x30e6,0x30b7,0xfffd,0xfffd, 0xfffd,0x3046,0x305f,0xfffd,0x3086,0x30ab,0xfffd,0xfffd, 0x30c8,0xfffd,0x30a1,0x3052,0x3059,0xfffd,0x30a4,0xfffd, 0xfffd,0x308c,0x3085,0x30ab,0x30b5,0x3091,0x30bf,0x30e3, 0xfffd,0xfffd,0x3087,0xfffd,0x30f6,0x3051,0x30bd,0x3092, 0x3063,0xfffd,0x30a9,0x3063,0x306e,0xfffd,0xfffd,0xfffd, 0x306c,0xfffd,0x307e,0x30ad,0x3077,0x30c2,0x30e9,0x30d5, 0xfffd,0xfffd,0x30c6,0x305c,0xfffd,0xfffd,0x3089,0xfffd, 0x3048,0x30cb,0x308c,0xfffd,0xfffd,0x3044,0xfffd,0x3080, 0x3063,0x3079,0xfffd,0x308a,0x30cb,0x3042,0x3057,0xfffd, 0x307c,0x30c1,0x30a8,0x30cf,0xfffd,0x3083,0xfffd,0xfffd, 0x306c,0xfffd,0x305e,0x3092,0xfffd,0x30dc,0x30b0,0x3081, 0x30e3,0x30f0,0x304e,0x30cc,0x308e,0x30c4,0x30ad }; UErrorCode status = U_ZERO_ERROR; UCollator *pCollator; int32_t lenActualSortKey; uint8_t pucSortKey[4096]; static const int32_t LENSORTKEY = (int32_t)sizeof(pucSortKey); ucol_prepareShortStringOpen("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status); pCollator = ucol_openFromShortString("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status); if (U_FAILURE(status)) { log_data_err("error opening collator -> %s. (Are you missing data?)\n", u_errorName(status)); return; } lenActualSortKey = ucol_getSortKey(pCollator, (const UChar *)pucUTF16, UPRV_LENGTHOF(pucUTF16), pucSortKey, LENSORTKEY); if (lenActualSortKey > LENSORTKEY) { log_err("sort key too big for original buffer. Got: %d Expected: %d\n", lenActualSortKey, LENSORTKEY); return; } /* If the test didn't crash, then the test succeeded. */ ucol_close(pCollator); }
void UCAConformanceTest::testConformance(UCollator *coll) { if(testFile == 0) { return; } int32_t line = 0; UChar b1[1024], b2[1024]; char lineB[1024]; UChar *buffer = b1, *oldB = NULL; uint8_t sk1[1024], sk2[1024]; uint8_t *oldSk = NULL, *newSk = sk1; int32_t resLen = 0, oldLen = 0; int32_t buflen = 0, oldBlen = 0; uint32_t first = 0; uint32_t offset = 0; while (fgets(lineB, 1024, testFile) != NULL) { offset = 0; line++; if(*lineB == 0 || lineB[0] == '#') { continue; } offset = u_parseString(lineB, buffer, 1024, &first, &status); buflen = offset; buffer[offset++] = 0; resLen = ucol_getSortKey(coll, buffer, buflen, newSk, 1024); int32_t res = 0, cmpres = 0, cmpres2 = 0; if(oldSk != NULL) { res = strcmp((char *)oldSk, (char *)newSk); cmpres = ucol_strcoll(coll, oldB, oldBlen, buffer, buflen); cmpres2 = ucol_strcoll(coll, buffer, buflen, oldB, oldBlen); if(cmpres != -cmpres2) { errln("Compare result not symmetrical on line %i", line); } if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) { errln("Difference between ucol_strcoll and sortkey compare on line %i", line); logln("Data line %s", lineB); } if(res > 0) { errln("Line %i is not greater or equal than previous line", line); logln("Data line %s", lineB); } else if(res == 0) { /* equal */ res = u_strcmpCodePointOrder(oldB, buffer); if (res == 0) { errln("Probable error in test file on line %i (comparing identical strings)", line); logln("Data line %s", lineB); } else if (res > 0) { errln("Sortkeys are identical, but code point comapare gives >0 on line %i", line); logln("Data line %s", lineB); } } } oldSk = newSk; oldLen = resLen; newSk = (newSk == sk1)?sk2:sk1; oldB = buffer; oldBlen = buflen; buffer = (buffer == b1)?b2:b1; } }
bool c_Collator::t_sortwithsortkeys(VRefParam arr) { INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::sortwithsortkeys); char* sortKeyBuf = NULL; /* buffer to store sort keys */ int32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */ ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */ int32_t sortKeyLen = 0; /* the length of currently processing key */ int32_t bufLeft = 0; int32_t bufIncrement = 0; /* buffer to store 'indexes' which will be passed to 'qsort' */ collator_sort_key_index_t* sortKeyIndxBuf = NULL; int32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE; int32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t ); int32_t sortKeyCount = 0; int32_t j = 0; /* tmp buffer to hold current processing string in utf-16 */ UChar* utf16_buf = NULL; /* the length of utf16_buf */ int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* length of converted string */ int utf16_len = 0; m_errcode.clear(); s_intl_error->m_error.clear(); /* * Sort specified array. */ if (!arr.isArray()) { return true; } Array hash = arr.toArray(); if (hash.size() == 0) { return true; } /* Create bufers */ sortKeyBuf = (char*)calloc(sortKeyBufSize, sizeof(char)); sortKeyIndxBuf = (collator_sort_key_index_t*)malloc(sortKeyIndxBufSize); utf16_buf = (UChar*)malloc(utf16_buf_size); /* Iterate through input hash and create a sort key for each value. */ for (ssize_t pos = hash->iter_begin(); pos != ArrayData::invalid_index; pos = hash->iter_advance(pos)) { /* Convert current hash item from UTF-8 to UTF-16LE and save the result * to utf16_buf. */ utf16_len = utf16_buf_size; /* Process string values only. */ Variant val(hash->getValue(pos)); if (val.isString()) { String str = val.toString(); intl_convert_utf8_to_utf16(&utf16_buf, &utf16_len, str.data(), str.size(), &(m_errcode.code)); if (U_FAILURE(m_errcode.code)) { m_errcode.custom_error_message = "Sort with sort keys failed"; if (utf16_buf) { free(utf16_buf); } free(sortKeyIndxBuf); free(sortKeyBuf); return false; } } else { /* Set empty string */ utf16_len = 0; utf16_buf[utf16_len] = 0; } if ((utf16_len + 1) > utf16_buf_size) { utf16_buf_size = utf16_len + 1; } /* Get sort key, reallocating the buffer if needed. */ bufLeft = sortKeyBufSize - sortKeyBufOffset; sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft); /* check for sortKeyBuf overflow, increasing its size of the buffer if needed */ if (sortKeyLen > bufLeft) { bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT; sortKeyBufSize += bufIncrement; bufLeft += bufIncrement; sortKeyBuf = (char*)realloc(sortKeyBuf, sortKeyBufSize); sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft); } /* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */ if ((sortKeyCount + 1) * sortKeyIndxSize > sortKeyIndxBufSize) { bufIncrement = (sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT; sortKeyIndxBufSize += bufIncrement; sortKeyIndxBuf = (collator_sort_key_index_t*)realloc(sortKeyIndxBuf, sortKeyIndxBufSize); } sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; sortKeyIndxBuf[sortKeyCount].valPos = pos; sortKeyBufOffset += sortKeyLen; ++sortKeyCount; } /* update ptrs to point to valid keys. */ for( j = 0; j < sortKeyCount; j++ ) sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key; /* sort it */ zend_qsort(sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize, collator_cmp_sort_keys, NULL); /* for resulting hash we'll assign new hash keys rather then reordering */ Array sortedHash = Array::Create(); for (j = 0; j < sortKeyCount; j++) { sortedHash.append(hash->getValue(sortKeyIndxBuf[j].valPos)); } /* Save sorted hash into return variable. */ arr = sortedHash; if (utf16_buf) free(utf16_buf); free(sortKeyIndxBuf); free(sortKeyBuf); return true; }
int main(int /* argc*/ , const char * /*argv*/ []) { UErrorCode status = U_ZERO_ERROR; int diffs = 0; int gbaddiffs =0; setup(status); if(U_FAILURE(status)) return 1; int expected = PROVIDER_COUNT; for(int l=0;l<LOCALE_COUNT;l++) { printf("\n"); uint8_t oldBytes[200]; int32_t oldLen = -1; for(int v=0;v<=expected;v++) { // Construct the locale ID char locID[200]; strcpy(locID, locale[l]); if((v!=expected)) { // -1 = no version strcat(locID, "@sp=icu"); strcat(locID, provider_version[v]); } printf("%-28s = ", locID); UErrorCode subStatus = U_ZERO_ERROR; uint8_t bytes[200]; uint8_t bytesb[200]; #define USE_CXX 0 #if USE_CXX Collator *col = Collator::createInstance(Locale(locID),subStatus); if(U_FAILURE(subStatus)) { printf("ERR: %s\n", u_errorName(subStatus)); continue; } int32_t len = col->getSortKey(stuff, -1, bytes, 200); #else #if 1 char xbuf2[200]; strcpy(xbuf2,"X/"); strcat(xbuf2,locID); strcat(xbuf2,"/"); //printf(" -> %s\n", xbuf2); UCollator *col = ucol_openFromShortString(xbuf2, FALSE,NULL, &subStatus); #else UCollator *col = ucol_open(locID, &subStatus); #endif if(U_FAILURE(subStatus)) { printf("ERR: %s\n", u_errorName(subStatus)); continue; } char xbuf3[200]; { int32_t def = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf3,200,&subStatus); if(U_FAILURE(subStatus)) { printf("Err getting short string name: %s\n", u_errorName(subStatus)); } else { printf(" --> %s\n", xbuf3); } } int32_t len = ucol_getSortKey(col, stuff, -1, bytes, 200); #endif printf(" "); int tdiffs=0; for(int i=0;i<len;i++) { if(i<oldLen&&bytes[i]!=oldBytes[i]) { diffs++; printf("*"); } else { printf(" "); } printf("%02X", (0xFF&bytes[i])); } printf("\n"); char xbuf4[200]; UCollator *col2 = ucol_openFromShortString(xbuf3, FALSE, NULL, &subStatus); if(U_FAILURE(subStatus)) { printf("Err opening from new short string : %s\n", u_errorName(subStatus)); continue; } else { int32_t def4 = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf4,200,&subStatus); if(strcmp(xbuf4,xbuf3)) { printf(" --> reopened = %s (%s)\n", xbuf4, u_errorName(subStatus)); } } int32_t len2 = ucol_getSortKey(col2, stuff, -1, bytesb, 200); int baddiffs=0; for(int i=0;i<len;i++) { if(i<len&&bytes[i]!=bytesb[i]) { baddiffs++; printf("!"); } else { // printf(" "); } // printf("%02X", (0xFF&bytesb[i])); } if(baddiffs>0) { printf(" - ERR! Diffs from %s in %d places\n", xbuf2,baddiffs); gbaddiffs+=baddiffs; } else { //printf(" OK.\n"); } // printf("\n"); #if USE_CXX delete col; #else ucol_close(col); #endif oldLen = len; memcpy(oldBytes, bytes, len); } } if(diffs==0) { #if (U_ICU_VERSION_MAJOR_NUM < 49) printf("ERROR: 0 differences found between platforms. ICU " U_ICU_VERSION " does not support collator plugins properly (not until 49)\n"); #else printf("ERROR: 0 differences found between platforms.. are the platforms installed? Try 'icuinfo -L'\n"); #endif return 1; } else { printf("%d differences found among provider versions!\n", diffs); } if(gbaddiffs>0) { printf("ERROR: %d diffs found between a collator and it's reopened (from shortstring) variant.\n", gbaddiffs); return 2; } else { printf("Collator and reopened (shortstring) are OK.\n"); } printf("Success!\n"); return 0; }
static bool HHVM_METHOD(Collator, sortWithSortKeys, VRefParam arr) { FETCH_COL(data, this_, false); data->clearError(); if (!arr.isArray()) { return true; } Array hash = arr.toArray(); if (hash.size() == 0) { return true; } // Preallocate sort keys buffer size_t sortKeysOffset = 0; size_t sortKeysLength = DEF_SORT_KEYS_BUF_SIZE; char* sortKeys = (char*)smart_malloc(sortKeysLength); if (!sortKeys) { throw Exception("Out of memory"); } SCOPE_EXIT{ smart_free(sortKeys); }; // Preallocate index buffer size_t sortIndexPos = 0; size_t sortIndexLength = DEF_SORT_KEYS_INDX_BUF_SIZE; auto sortIndex = (collator_sort_key_index_t*)smart_malloc( sortIndexLength * sizeof(collator_sort_key_index_t)); if (!sortIndex) { throw Exception("Out of memory"); } SCOPE_EXIT{ smart_free(sortIndex); }; // Translate input hash to sortable index auto pos_limit = hash->iter_end(); for (ssize_t pos = hash->iter_begin(); pos != pos_limit; pos = hash->iter_advance(pos)) { Variant val(hash->getValue(pos)); // Convert to UTF16 icu::UnicodeString strval; if (val.isString()) { UErrorCode error = U_ZERO_ERROR; strval = u16(val.toString(), error); if (U_FAILURE(error)) { return false; } } // Generate sort key int sortkey_len = ucol_getSortKey(data->collator(), strval.getBuffer(), strval.length(), (uint8_t*)(sortKeys + sortKeysOffset), sortKeysLength - sortKeysOffset); // Check for key buffer overflow if (sortkey_len > (sortKeysLength - sortKeysOffset)) { int32_t inc = (sortkey_len > DEF_SORT_KEYS_BUF_INCREMENT) ? sortkey_len : DEF_SORT_KEYS_BUF_INCREMENT; sortKeysLength += inc; sortKeys = (char*)smart_realloc(sortKeys, sortKeysLength); if (!sortKeys) { throw Exception("Out of memory"); } sortkey_len = ucol_getSortKey(data->collator(), strval.getBuffer(), strval.length(), (uint8_t*)(sortKeys + sortKeysOffset), sortKeysLength - sortKeysOffset); assert(sortkey_len <= (sortKeysLength - sortKeysOffset)); } // Check for index buffer overflow if ((sortIndexPos + 1) > sortIndexLength) { sortIndexLength += DEF_SORT_KEYS_INDX_BUF_INCREMENT; sortIndex = (collator_sort_key_index_t*)smart_realloc(sortIndex, sortIndexLength * sizeof(collator_sort_key_index_t)); if (!sortIndex) { throw Exception("Out of memory"); } } // Initially store offset into buffer, update later to deal with reallocs sortIndex[sortIndexPos].key = (char*)sortKeysOffset; sortKeysOffset += sortkey_len; sortIndex[sortIndexPos].valPos = pos; ++sortIndexPos; } // Update keys to location in realloc'd buffer for (int i = 0; i < sortIndexPos; ++i) { sortIndex[i].key = sortKeys + (ptrdiff_t)sortIndex[i].key; } zend_qsort(sortIndex, sortIndexPos, sizeof(collator_sort_key_index_t), collator_cmp_sort_keys, nullptr); Array ret = Array::Create(); for (int i = 0; i < sortIndexPos; ++i) { ret.append(hash->getValue(sortIndex[i].valPos)); } arr = ret; return true; }
void currTest() { /* All the currency symbols, in UCA order*/ static const UChar currency[][2] = { { 0x00A4, 0x0000}, /*00A4; L; [14 36, 03, 03] # [082B.0020.0002] # CURRENCY SIGN*/ { 0x00A2, 0x0000}, /*00A2; L; [14 38, 03, 03] # [082C.0020.0002] # CENT SIGN*/ { 0xFFE0, 0x0000}, /*FFE0; L; [14 38, 03, 05] # [082C.0020.0003] # FULLWIDTH CENT SIGN*/ { 0x0024, 0x0000}, /*0024; L; [14 3A, 03, 03] # [082D.0020.0002] # DOLLAR SIGN*/ { 0xFF04, 0x0000}, /*FF04; L; [14 3A, 03, 05] # [082D.0020.0003] # FULLWIDTH DOLLAR SIGN*/ { 0xFE69, 0x0000}, /*FE69; L; [14 3A, 03, 1D] # [082D.0020.000F] # SMALL DOLLAR SIGN*/ { 0x00A3, 0x0000}, /*00A3; L; [14 3C, 03, 03] # [082E.0020.0002] # POUND SIGN*/ { 0xFFE1, 0x0000}, /*FFE1; L; [14 3C, 03, 05] # [082E.0020.0003] # FULLWIDTH POUND SIGN*/ { 0x00A5, 0x0000}, /*00A5; L; [14 3E, 03, 03] # [082F.0020.0002] # YEN SIGN*/ { 0xFFE5, 0x0000}, /*FFE5; L; [14 3E, 03, 05] # [082F.0020.0003] # FULLWIDTH YEN SIGN*/ { 0x09F2, 0x0000}, /*09F2; L; [14 40, 03, 03] # [0830.0020.0002] # BENGALI RUPEE MARK*/ { 0x09F3, 0x0000}, /*09F3; L; [14 42, 03, 03] # [0831.0020.0002] # BENGALI RUPEE SIGN*/ { 0x0E3F, 0x0000}, /*0E3F; L; [14 44, 03, 03] # [0832.0020.0002] # THAI CURRENCY SYMBOL BAHT*/ { 0x17DB, 0x0000}, /*17DB; L; [14 46, 03, 03] # [0833.0020.0002] # KHMER CURRENCY SYMBOL RIEL*/ { 0x20A0, 0x0000}, /*20A0; L; [14 48, 03, 03] # [0834.0020.0002] # EURO-CURRENCY SIGN*/ { 0x20A1, 0x0000}, /*20A1; L; [14 4A, 03, 03] # [0835.0020.0002] # COLON SIGN*/ { 0x20A2, 0x0000}, /*20A2; L; [14 4C, 03, 03] # [0836.0020.0002] # CRUZEIRO SIGN*/ { 0x20A3, 0x0000}, /*20A3; L; [14 4E, 03, 03] # [0837.0020.0002] # FRENCH FRANC SIGN*/ { 0x20A4, 0x0000}, /*20A4; L; [14 50, 03, 03] # [0838.0020.0002] # LIRA SIGN*/ { 0x20A5, 0x0000}, /*20A5; L; [14 52, 03, 03] # [0839.0020.0002] # MILL SIGN*/ { 0x20A6, 0x0000}, /*20A6; L; [14 54, 03, 03] # [083A.0020.0002] # NAIRA SIGN*/ { 0x20A7, 0x0000}, /*20A7; L; [14 56, 03, 03] # [083B.0020.0002] # PESETA SIGN*/ { 0x20A9, 0x0000}, /*20A9; L; [14 58, 03, 03] # [083C.0020.0002] # WON SIGN*/ { 0xFFE6, 0x0000}, /*FFE6; L; [14 58, 03, 05] # [083C.0020.0003] # FULLWIDTH WON SIGN*/ { 0x20AA, 0x0000}, /*20AA; L; [14 5A, 03, 03] # [083D.0020.0002] # NEW SHEQEL SIGN*/ { 0x20AB, 0x0000}, /*20AB; L; [14 5C, 03, 03] # [083E.0020.0002] # DONG SIGN*/ { 0x20AC, 0x0000}, /*20AC; L; [14 5E, 03, 03] # [083F.0020.0002] # EURO SIGN*/ { 0x20AD, 0x0000}, /*20AD; L; [14 60, 03, 03] # [0840.0020.0002] # KIP SIGN*/ { 0x20AE, 0x0000}, /*20AE; L; [14 62, 03, 03] # [0841.0020.0002] # TUGRIK SIGN*/ { 0x20AF, 0x0000}, /*20AF; L; [14 64, 03, 03] # [0842.0020.0002] # DRACHMA SIGN*/ }; #if 0 /* All the currency symbols, in collation order*/ static const UChar currency[][2] = { { 0x00a4, 0x0000}, /* generic currency*/ { 0x0e3f, 0x0000}, /* baht*/ { 0x00a2, 0x0000}, /* cent*/ { 0x20a1, 0x0000}, /* colon*/ { 0x20a2, 0x0000}, /* cruzeiro*/ { 0x0024, 0x0000}, /* dollar */ { 0x20ab, 0x0000}, /* dong */ { 0x20ac, 0x0000}, /* euro */ { 0x20a3, 0x0000}, /* franc */ { 0x20a4, 0x0000}, /* lira */ { 0x20a5, 0x0000}, /* mill */ { 0x20a6, 0x0000}, /* naira */ { 0x20a7, 0x0000}, /* peseta */ { 0x00a3, 0x0000}, /* pound */ { 0x20a8, 0x0000}, /* rupee */ { 0x20aa, 0x0000}, /* shekel*/ { 0x20a9, 0x0000}, /* won*/ { 0x00a5, 0x0000} /* yen*/ }; #endif UChar source[2], target[2]; int32_t i, j, sortklen; int res; UCollator *c; uint8_t *sortKey1, *sortKey2; UErrorCode status = U_ZERO_ERROR; UCollationResult compareResult, keyResult; UCollationResult expectedResult = UCOL_EQUAL; log_verbose("Testing currency of all locales\n"); c = ucol_open("en_US", &status); if (U_FAILURE(status)) { log_err_status(status, "collator open failed! :%s\n", myErrorName(status)); return; } /*Compare each currency symbol against all the currency symbols, including itself*/ for (i = 0; i < UPRV_LENGTHOF(currency); i += 1) { for (j = 0; j < UPRV_LENGTHOF(currency); j += 1) { u_strcpy(source, currency[i]); u_strcpy(target, currency[j]); if (i < j) { expectedResult = UCOL_LESS; } else if ( i == j) { expectedResult = UCOL_EQUAL; } else { expectedResult = UCOL_GREATER; } compareResult = ucol_strcoll(c, source, u_strlen(source), target, u_strlen(target)); status = U_ZERO_ERROR; sortklen=ucol_getSortKey(c, source, u_strlen(source), NULL, 0); sortKey1=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1)); ucol_getSortKey(c, source, u_strlen(source), sortKey1, sortklen+1); sortklen=ucol_getSortKey(c, target, u_strlen(target), NULL, 0); sortKey2=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1)); ucol_getSortKey(c, target, u_strlen(target), sortKey2, sortklen+1); res = uprv_memcmp(sortKey1, sortKey2, sortklen); if (res < 0) keyResult = (UCollationResult)-1; else if (res > 0) keyResult = (UCollationResult)1; else keyResult = (UCollationResult)0; reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResult, expectedResult ); free(sortKey1); free(sortKey2); } } ucol_close(c); }
/** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { #if 0 /* * Starting with ICU 53, setting the variable top via a pseudo relation string * is not supported any more. * It was replaced by the [maxVariable symbol] setting. * See ICU tickets #9958 and #8032. */ static const char str[] = "&z = [variable top]"; int len = strlen(str); UChar rules[sizeof(str)]; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of collator :%s\n", myErrorName(status)); return; } myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { ucol_close(enCollation); log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; #endif }
/** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { const char *str = "&z = [variable top]"; int len = strlen(str); UChar *rules; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; rules = (UChar*)malloc(sizeof(UChar*) * (len + 1)); u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } free(rules); ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; }
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; int temp=0, gSortklen1=0,gSortklen2=0; UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; uint32_t sLen = u_strlen(source); uint32_t tLen = u_strlen(target); char buffer[256]; uint32_t len; UErrorCode status = U_ZERO_ERROR; UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); UCharIterator sIter, tIter; uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != result) { log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(QUICK <= 0) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result; /*UCharIterator sIter, tIter;*/ /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } if(compareResultUTF8 != compareResultUTF8Norm) { log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } } else { log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log_verbose("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ if(1) { /*!QUICK*/ int32_t i = 0; int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(QUICK <= 0) { partialSizesSize = 7; } /*log_verbose("partial sortkey test piecesize=");*/ for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = result, partialNormalizedSKResult = result; /*log_verbose("%i ", partialSizes[i]);*/ partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); if(partialSKResult != result) { log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", partialSKResult, result, aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } if(QUICK <= 0 && norm != UCOL_ON) { /*log_verbose("N ");*/ ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } } } /*log_verbose("\n");*/ } compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); if (compareResult != compareResulta) { log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n"); } sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); /* Check that sort key generated with null terminated string is identical */ /* to that generted with a length specified. */ if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { log_err("Sort Keys from null terminated and explicit length strings differ.\n"); } /*memcmp(sortKey1, sortKey2,sortklenmax);*/ temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); gSortklen1 = uprv_strlen((const char *)sortKey1)+1; gSortklen2 = uprv_strlen((const char *)sortKey2)+1; if(sortklen1 != gSortklen1){ log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); } if(sortklen2!= gSortklen2){ log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); } if(temp < 0) { keyResult=UCOL_LESS; } else if(temp > 0) { keyResult= UCOL_GREATER; } else { keyResult = UCOL_EQUAL; } reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); free(sortKey1); free(sortKey2); free(sortKey1a); free(sortKey2a); }
/** * This function is invoked as: * * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>, * <use_token_index>, <data_tag>) * * If <use_token_index> is omitted, it is treated as 0. * If <data_tag> is omitted, it is treated as NULL. * * It will split <data> on each instance of <delimiter> and insert each token * into <token_table>. The following columns in <token_table> are used: * token TEXT, source INTEGER, token_index INTEGER, tag (any type) * The token_index column is not required if <use_token_index> is 0. * The tag column is not required if <data_tag> is NULL. * * One row is inserted for each token in <data>. * In each inserted row, 'source' is <data_row_id>. * In the first inserted row, 'token' is the hex collation key of * the entire <data> string, and 'token_index' is 0. * In each row I (where 1 <= I < N, and N is the number of tokens in <data>) * 'token' will be set to the hex collation key of the I:th token (0-based). * If <use_token_index> != 0, 'token_index' is set to I. * If <data_tag> is not NULL, 'tag' is set to <data_tag>. * * In other words, there will be one row for the entire string, * and one row for each token except the first one. * * The function returns the number of tokens generated. */ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) { //ALOGD("enter tokenize"); int err; int useTokenIndex = 0; int useDataTag = 0; if (!(argc >= 4 || argc <= 6)) { ALOGE("Tokenize requires 4 to 6 arguments"); sqlite3_result_null(context); return; } if (argc > 4) { useTokenIndex = sqlite3_value_int(argv[4]); } if (argc > 5) { useDataTag = (sqlite3_value_type(argv[5]) != SQLITE_NULL); } sqlite3 * handle = sqlite3_context_db_handle(context); UCollator* collator = (UCollator*)sqlite3_user_data(context); char const * tokenTable = (char const *)sqlite3_value_text(argv[0]); if (tokenTable == NULL) { ALOGE("tokenTable null"); sqlite3_result_null(context); return; } // Get or create the prepared statement for the insertions sqlite3_stmt * statement = (sqlite3_stmt *)sqlite3_get_auxdata(context, 0); if (!statement) { char const * tokenIndexCol = useTokenIndex ? ", token_index" : ""; char const * tokenIndexParam = useTokenIndex ? ", ?" : ""; char const * dataTagCol = useDataTag ? ", tag" : ""; char const * dataTagParam = useDataTag ? ", ?" : ""; char * sql = sqlite3_mprintf("INSERT INTO %s (token, source%s%s) VALUES (?, ?%s%s);", tokenTable, tokenIndexCol, dataTagCol, tokenIndexParam, dataTagParam); err = sqlite3_prepare_v2(handle, sql, -1, &statement, NULL); sqlite3_free(sql); if (err) { ALOGE("prepare failed"); sqlite3_result_null(context); return; } // This binds the statement to the table it was compiled against, which is argv[0]. // If this function is ever called with a different table the finalizer will be called // and sqlite3_get_auxdata() will return null above, forcing a recompile for the new table. sqlite3_set_auxdata(context, 0, statement, tokenize_auxdata_delete); } else { // Reset the cached statement so that binding the row ID will work properly sqlite3_reset(statement); } // Bind the row ID of the source row int64_t rowID = sqlite3_value_int64(argv[1]); err = sqlite3_bind_int64(statement, 2, rowID); if (err != SQLITE_OK) { ALOGE("bind failed"); sqlite3_result_null(context); return; } // Bind <data_tag> to the tag column if (useDataTag) { int dataTagParamIndex = useTokenIndex ? 4 : 3; err = sqlite3_bind_value(statement, dataTagParamIndex, argv[5]); if (err != SQLITE_OK) { ALOGE("bind failed"); sqlite3_result_null(context); return; } } // Get the raw bytes for the string to tokenize // the string will be modified by following code // however, sqlite did not reuse the string, so it is safe to not dup it UChar * origData = (UChar *)sqlite3_value_text16(argv[2]); if (origData == NULL) { sqlite3_result_null(context); return; } // Get the raw bytes for the delimiter const UChar * delim = (const UChar *)sqlite3_value_text16(argv[3]); if (delim == NULL) { ALOGE("can't get delimiter"); sqlite3_result_null(context); return; } UChar * token = NULL; UChar *state; int numTokens = 0; do { if (numTokens == 0) { token = origData; } // Reset the program so we can use it to perform the insert sqlite3_reset(statement); UErrorCode status = U_ZERO_ERROR; char keybuf[1024]; uint32_t result = ucol_getSortKey(collator, token, -1, (uint8_t*)keybuf, sizeof(keybuf)-1); if (result > sizeof(keybuf)) { // TODO allocate memory for this super big string ALOGE("ucol_getSortKey needs bigger buffer %d", result); break; } uint32_t keysize = result-1; uint32_t base16Size = keysize*2; char *base16buf = (char*)malloc(base16Size); base16Encode(base16buf, keybuf, keysize); err = sqlite3_bind_text(statement, 1, base16buf, base16Size, SQLITE_STATIC); if (err != SQLITE_OK) { ALOGE(" sqlite3_bind_text16 error %d", err); free(base16buf); break; } if (useTokenIndex) { err = sqlite3_bind_int(statement, 3, numTokens); if (err != SQLITE_OK) { ALOGE(" sqlite3_bind_int error %d", err); free(base16buf); break; } } err = sqlite3_step(statement); free(base16buf); if (err != SQLITE_DONE) { ALOGE(" sqlite3_step error %d", err); break; } numTokens++; if (numTokens == 1) { // first call u_strtok_r(origData, delim, &state); } } while ((token = u_strtok_r(NULL, delim, &state)) != NULL); sqlite3_result_int(context, numTokens); }
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, uint8_t *result, int32_t resultLength) const { return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); }
NS_IMETHODIMP nsCollationMacUC::AllocateRawSortKey(int32_t strength, const nsAString& stringIn, uint8_t** key, uint32_t* outLen) { NS_ENSURE_TRUE(mInit, NS_ERROR_NOT_INITIALIZED); NS_ENSURE_ARG_POINTER(key); NS_ENSURE_ARG_POINTER(outLen); nsresult res = EnsureCollator(strength); NS_ENSURE_SUCCESS(res, res); uint32_t stringInLen = stringIn.Length(); if (mUseICU) { const UChar* str = (const UChar*)PromiseFlatString(stringIn).get(); int32_t keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, nullptr, 0); NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE); // Since key is freed elsewhere with PR_Free, allocate with PR_Malloc. uint8_t* newKey = (uint8_t*)PR_Malloc(keyLength + 1); if (!newKey) { return NS_ERROR_OUT_OF_MEMORY; } keyLength = ucol_getSortKey(mCollatorICU, str, stringInLen, newKey, keyLength + 1); NS_ENSURE_TRUE((stringInLen == 0 || keyLength > 0), NS_ERROR_FAILURE); *key = newKey; *outLen = keyLength; return NS_OK; } uint32_t maxKeyLen = (1 + stringInLen) * kCollationValueSizeFactor * sizeof(UCCollationValue); if (maxKeyLen > mBufferLen) { uint32_t newBufferLen = mBufferLen; do { newBufferLen *= 2; } while (newBufferLen < maxKeyLen); void* newBuffer = moz_malloc(newBufferLen); if (!newBuffer) { return NS_ERROR_OUT_OF_MEMORY; } if (mBuffer) { moz_free(mBuffer); mBuffer = nullptr; } mBuffer = newBuffer; mBufferLen = newBufferLen; } ItemCount actual; OSStatus err = ::UCGetCollationKey(mCollator, (const UniChar*) PromiseFlatString(stringIn).get(), (UniCharCount) stringInLen, (ItemCount) (mBufferLen / sizeof(UCCollationValue)), &actual, (UCCollationValue *)mBuffer); NS_ENSURE_TRUE((err == noErr), NS_ERROR_FAILURE); uint32_t keyLength = actual * sizeof(UCCollationValue); // Since key is freed elsewhere with PR_Free, allocate with PR_Malloc. void* newKey = PR_Malloc(keyLength); if (!newKey) { return NS_ERROR_OUT_OF_MEMORY; } memcpy(newKey, mBuffer, keyLength); *key = (uint8_t *)newKey; *outLen = keyLength; return NS_OK; }
int32_t RuleBasedCollator::getSortKey(const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const { return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); }
/** * Tests surrogate support. * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret * Therefore, another (unassigned) code point was used for this test. */ static void TestSurrogates(void) { const char *str = "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A"; int len = strlen(str); int rlen = 0; UChar *rules; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[][4] = {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}}; UChar target[][4] = {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}}; int count = 0; uint8_t enresult[20], myresult[20]; int enlen, mylen; /* tests for open rules with surrogate rules */ rules = (UChar*)malloc(sizeof(UChar*) * (len + 1)); rlen = u_unescape(str, rules, len); enCollation = ucol_open("en_US", &status); myCollation = ucol_openRules(rules, rlen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); if (U_FAILURE(status)) { log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } /* this test is to verify the supplementary sort key order in the english collator */ log_verbose("start of english collation supplementary characters test\n"); while (count < 2) { doTest(enCollation, source[count], target[count], UCOL_LESS); count ++; } doTest(enCollation, source[count], target[count], UCOL_GREATER); log_verbose("start of tailored collation supplementary characters test\n"); count = 0; /* tests getting collation elements for surrogates for tailored rules */ while (count < 4) { doTest(myCollation, source[count], target[count], UCOL_LESS); count ++; } /* tests that \uD800\uDC02 still has the same value, not changed */ enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20); mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20); if (enlen != mylen || uprv_memcmp(enresult, myresult, enlen) != 0) { log_verbose("Failed : non-tailored supplementary characters should have the same value\n"); } free(rules); ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; }