static int compareUnicodeSlow(const char* str1, size_t len1, const char* str2, size_t len2) { static UCollator* coll = NULL; UCharIterator iterA, iterB; int result; UErrorCode status = U_ZERO_ERROR; if (!coll) { coll = ucol_open("", &status); if (U_FAILURE(status)) { fprintf(stderr, "CouchStore CollateJSON: Couldn't initialize ICU (%d)\n", (int)status); return -1; } } uiter_setUTF8(&iterA, str1, (int)len1); uiter_setUTF8(&iterB, str2, (int)len2); result = ucol_strcollIter(coll, &iterA, &iterB, &status); if (U_FAILURE(status)) { fprintf(stderr, "CouchStore CollateJSON: ICU error %d\n", (int)status); return -1; } if (result < 0) { return -1; } else if (result > 0) { return 1; } return 0; }
static int couch_drv_control(ErlDrvData drv_data, unsigned int command, char *pBuf, int bufLen, char **rbuf, int rlen) { couch_drv_data* pData = (couch_drv_data*)drv_data; switch(command) { case 0: // COLLATE case 1: // COLLATE_NO_CASE: { UErrorCode status = U_ZERO_ERROR; int collResult; char response; UCharIterator iterA; UCharIterator iterB; int32_t length; // 2 strings are in the buffer, consecutively // The strings begin first with a 32 bit integer byte length, then the actual // string bytes follow. // first 32bits are the length memcpy(&length, pBuf, sizeof(length)); pBuf += sizeof(length); // point the iterator at it. uiter_setUTF8(&iterA, pBuf, length); pBuf += length; // now on to string b // first 32bits are the length memcpy(&length, pBuf, sizeof(length)); pBuf += sizeof(length); // point the iterator at it. uiter_setUTF8(&iterB, pBuf, length); if (command == 0) // COLLATE collResult = ucol_strcollIter(pData->coll, &iterA, &iterB, &status); else // COLLATE_NO_CASE collResult = ucol_strcollIter(pData->collNoCase, &iterA, &iterB, &status); if (collResult < 0) response = 0; //lt else if (collResult > 0) response = 2; //gt else response = 1; //eq return return_control_result(&response, sizeof(response), rbuf, rlen); } default: return -1; } }
UCollationResult Collator::compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const { if(U_FAILURE(status)) { return UCOL_EQUAL; } UCharIterator sIter, tIter; uiter_setUTF8(&sIter, source.data(), source.length()); uiter_setUTF8(&tIter, target.data(), target.length()); return compare(sIter, tIter, status); }
int32_t _swift_stdlib_unicode_compare_utf8_utf8(const char *LeftString, int32_t LeftLength, const char *RightString, int32_t RightLength) { UCharIterator LeftIterator; UCharIterator RightIterator; UErrorCode ErrorCode = U_ZERO_ERROR; uiter_setUTF8(&LeftIterator, LeftString, LeftLength); uiter_setUTF8(&RightIterator, RightString, RightLength); uint32_t Diff = ucol_strcollIter(GetRootCollator(), &LeftIterator, &RightIterator, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf8 string comparison."); } return Diff; }
static int collate8(void *p, int n1, const void *v1, int n2, const void *v2) { UCollator *coll = (UCollator *) p; UCharIterator i1, i2; UErrorCode status = U_ZERO_ERROR; uiter_setUTF8(&i1, (const char *) v1, n1); uiter_setUTF8(&i2, (const char *) v2, n2); UCollationResult result = ucol_strcollIter(coll, &i1, &i2, &status); if (U_FAILURE(status)) { // ALOGE("Collation iterator error: %d\n", status); } if (result == UCOL_LESS) { return -1; } else if (result == UCOL_GREATER) { return 1; } else { return 0; } }
int compare_strings(couch_ejson_ctx_t* ctx, ErlNifBinary a, ErlNifBinary b) { UErrorCode status = U_ZERO_ERROR; UCharIterator iterA, iterB; int result; uiter_setUTF8(&iterA, (const char *) a.data, (uint32_t) a.size); uiter_setUTF8(&iterB, (const char *) b.data, (uint32_t) b.size); reserve_coll(ctx); result = ucol_strcollIter(ctx->coll, &iterA, &iterB, &status); if (U_FAILURE(status)) { ctx->error = 1; return 0; } /* ucol_strcollIter returns 0, -1 or 1 * (see type UCollationResult in unicode/ucol.h) */ return result; }
/** * Obtains the first UNICODE letter from the supplied string, normalizes and returns it. */ static void get_phonebook_index( sqlite3_context * context, int argc, sqlite3_value ** argv) { if (argc != 2) { sqlite3_result_null(context); return; } char const * src = (char const *)sqlite3_value_text(argv[0]); char const * locale = (char const *)sqlite3_value_text(argv[1]); if (src == NULL || src[0] == 0 || locale == NULL) { sqlite3_result_null(context); return; } UCharIterator iter; uiter_setUTF8(&iter, src, -1); UBool isError = FALSE; UChar index[SMALL_BUFFER_SIZE]; uint32_t len = android::GetPhonebookIndex(&iter, locale, index, sizeof(index), &isError); if (isError) { sqlite3_result_null(context); return; } uint32_t outlen = 0; uint8_t out[SMALL_BUFFER_SIZE]; for (uint32_t i = 0; i < len; i++) { U8_APPEND(out, outlen, sizeof(out), index[i], isError); if (isError) { sqlite3_result_null(context); return; } } if (outlen == 0) { sqlite3_result_null(context); return; } sqlite3_result_text(context, (const char*)out, outlen, SQLITE_TRANSIENT); }
/// Compares the strings via the Unicode Collation Algorithm on the root locale. /// Results are the usual string comparison results: /// <0 the left string is less than the right string. /// ==0 the strings are equal according to their collation. /// >0 the left string is greater than the right string. int32_t swift::_swift_stdlib_unicode_compare_utf8_utf16(const char *LeftString, int32_t LeftLength, const uint16_t *RightString, int32_t RightLength) { UCharIterator LeftIterator; UCharIterator RightIterator; UErrorCode ErrorCode = U_ZERO_ERROR; uiter_setUTF8(&LeftIterator, LeftString, LeftLength); #if defined(__CYGWIN__) || defined(_MSC_VER) || defined(__MINGW32__) uiter_setString(&RightIterator, reinterpret_cast<const UChar *>(RightString), RightLength); #else uiter_setString(&RightIterator, RightString, RightLength); #endif uint32_t Diff = ucol_strcollIter(GetRootCollator(), &LeftIterator, &RightIterator, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison."); } return Diff; }
int Unicode_CompareWithLocale(const char *str1, // IN const char *str2, // IN const char *locale, // IN UnicodeCompareOption compareOption) // IN { UCollationResult compareResult; UColAttributeValue comparisonStrength; UErrorCode status = U_ZERO_ERROR; int result; UCollator *coll; UCharIterator str1Iter; UCharIterator str2Iter; uiter_setUTF8(&str1Iter, (const char *)str1, -1); uiter_setUTF8(&str2Iter, (const char *)str2, -1); switch (compareOption) { case UNICODE_COMPARE_DEFAULT: comparisonStrength = UCOL_DEFAULT; break; case UNICODE_COMPARE_IGNORE_ACCENTS: comparisonStrength = UCOL_PRIMARY; break; case UNICODE_COMPARE_IGNORE_CASE: comparisonStrength = UCOL_SECONDARY; break; case UNICODE_COMPARE_IGNORE_PUNCTUATION: comparisonStrength = UCOL_TERTIARY; break; default: NOT_IMPLEMENTED(); } coll = ucol_open(locale, &status); ASSERT(U_SUCCESS(status)); ASSERT(coll); if (U_FAILURE(status) || !coll) { return -1; } // Normalize all strings to NFD before comparing. ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); ucol_setAttribute(coll, UCOL_STRENGTH, comparisonStrength, &status); ASSERT(U_SUCCESS(status)); compareResult = ucol_strcollIter(coll, &str1Iter, &str2Iter, &status); ucol_close(coll); if (U_FAILURE(status)) { // We'll probably only get here if the input wasn't UTF-8. ASSERT(U_SUCCESS(status)); return -1; } switch (compareResult) { case UCOL_LESS: result = -1; break; case UCOL_EQUAL: result = 0; break; case UCOL_GREATER: result = 1; break; default: NOT_IMPLEMENTED(); } return result; }
char * Unicode_Normalize(const char *str, // IN UnicodeNormalizationForm form) // IN { UNormalizationMode mode; UChar *uchars; char *result; int32_t normalizedLen; UErrorCode status = U_ZERO_ERROR; UCharIterator strIter; UBool neededToNormalize = FALSE; uiter_setUTF8(&strIter, (const char *)str, -1); switch (form) { case UNICODE_NORMAL_FORM_C: mode = UNORM_NFC; break; case UNICODE_NORMAL_FORM_D: mode = UNORM_NFD; break; default: NOT_REACHED(); } normalizedLen = unorm_next(&strIter, NULL, 0, mode, 0, TRUE, &neededToNormalize, &status); if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { // We expect U_BUFFER_OVERFLOW_ERROR here. Anything else is a problem. ASSERT(U_SUCCESS(status)); return NULL; } uchars = Util_SafeMalloc(sizeof *uchars * normalizedLen); // Reset back to the beginning of the UTF-8 input. (*strIter.move)(&strIter, 0, UITER_START); status = U_ZERO_ERROR; normalizedLen = unorm_next(&strIter, uchars, normalizedLen, mode, 0, TRUE, &neededToNormalize, &status); if (U_FAILURE(status)) { ASSERT(U_SUCCESS(status)); return NULL; } result = Unicode_AllocWithLength(uchars, normalizedLen * 2, STRING_ENCODING_UTF16); free(uchars); return result; }
void IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) { UErrorCode status = U_ZERO_ERROR; UCollator *myCollation = col->toUCollator(); Collator::EComparisonResult compareResult = col->compare(source, target); CollationKey srckey, tgtkey; col->getCollationKey(source, srckey, status); col->getCollationKey(target, tgtkey, status); if (U_FAILURE(status)){ errln("Creation of collation keys failed\n"); } Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); int32_t sLen = source.length(), tLen = target.length(); const UChar* src = source.getBuffer(); const UChar* trg = target.getBuffer(); UCollationResult compareResultIter = (UCollationResult)result; { UCharIterator sIter, tIter; uiter_setString(&sIter, src, sLen); uiter_setString(&tIter, trg, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != (UCollationResult)result) { errln("Different result for iterative comparison "+source+" "+target); } } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(!quick) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; UCharIterator sIter, tIter; /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); } if(compareResultUTF8 != compareResultUTF8Norm) { errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); } } else { log("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ { /*!QUICK*/ int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(!quick) { partialSizesSize = 7; } int32_t i = 0; log("partial sortkey test piecesize="); for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; log("%i ", partialSizes[i]); partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); if(partialSKResult != (UCollationResult)result) { errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); } if(norm != UCOL_ON && !quick) { log("N "); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); } } } log("\n"); } /* if (compareResult != result) { errln("String comparison failed in variant test\n"); } if (keyResult != result) { errln("Collation key comparison failed in variant test\n"); } */ }
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; int temp=0, gSortklen1=0,gSortklen2=0; UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; uint32_t sLen = u_strlen(source); uint32_t tLen = u_strlen(target); char buffer[256]; uint32_t len; UErrorCode status = U_ZERO_ERROR; UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); UCharIterator sIter, tIter; uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != result) { log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(QUICK <= 0) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result; /*UCharIterator sIter, tIter;*/ /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } if(compareResultUTF8 != compareResultUTF8Norm) { log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } } else { log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log_verbose("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ if(1) { /*!QUICK*/ int32_t i = 0; int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(QUICK <= 0) { partialSizesSize = 7; } /*log_verbose("partial sortkey test piecesize=");*/ for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = result, partialNormalizedSKResult = result; /*log_verbose("%i ", partialSizes[i]);*/ partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); if(partialSKResult != result) { log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", partialSKResult, result, aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } if(QUICK <= 0 && norm != UCOL_ON) { /*log_verbose("N ");*/ ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } } } /*log_verbose("\n");*/ } compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); if (compareResult != compareResulta) { log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n"); } sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); /* Check that sort key generated with null terminated string is identical */ /* to that generted with a length specified. */ if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { log_err("Sort Keys from null terminated and explicit length strings differ.\n"); } /*memcmp(sortKey1, sortKey2,sortklenmax);*/ temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); gSortklen1 = uprv_strlen((const char *)sortKey1)+1; gSortklen2 = uprv_strlen((const char *)sortKey2)+1; if(sortklen1 != gSortklen1){ log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); } if(sortklen2!= gSortklen2){ log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); } if(temp < 0) { keyResult=UCOL_LESS; } else if(temp > 0) { keyResult= UCOL_GREATER; } else { keyResult = UCOL_EQUAL; } reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); free(sortKey1); free(sortKey2); free(sortKey1a); free(sortKey2a); }
static UCharIterator createIteratorUTF8(const char* string) { UCharIterator iterator; uiter_setUTF8(&iterator, string, strlen(string)); return iterator; }
void __hs_uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) { uiter_setUTF8(iter, s, length); }