/* this function makes a string with representation of a sortkey */ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) { int32_t strength = UCOL_PRIMARY; uint32_t res_size = 0; UBool doneCase = FALSE; UErrorCode errorCode = U_ZERO_ERROR; char *current = buffer; const uint8_t *currentSk = sortkey; uprv_strcpy(current, "["); while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) { if(strength > UCOL_PRIMARY) { uprv_strcat(current, " . "); } while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */ uprv_appendByteToHexString(current, *currentSk++); uprv_strcat(current, " "); } if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) { doneCase = TRUE; } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) { strength ++; } if (*currentSk) { uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */ } if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) { break; } } if(ucol_getStrength(coll) == UCOL_IDENTICAL) { uprv_strcat(current, " . "); while(*currentSk != 0) { uprv_appendByteToHexString(current, *currentSk++); uprv_strcat(current, " "); } uprv_appendByteToHexString(current, *currentSk++); } uprv_strcat(current, "]"); if(res_size > *len) { return NULL; } return buffer; }
void Collator::createCollator() const { ASSERT(!m_collator); UErrorCode status = U_ZERO_ERROR; { Locker<Mutex> lock(cachedCollatorMutex()); if (cachedCollator) { UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status); ASSERT(U_SUCCESS(status)); if (0 == strcmp(cachedEquivalentLocale, m_equivalentLocale) && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) { m_collator = cachedCollator; cachedCollator = 0; cachedEquivalentLocale[0] = 0; return; } } } m_collator = ucol_open(m_locale, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm. } ASSERT(U_SUCCESS(status)); ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); ASSERT(U_SUCCESS(status)); ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); ASSERT(U_SUCCESS(status)); }
UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &status) { if (U_FAILURE(status)) return UCOL_DEFAULT; return ucol_getAttribute(ucollator, attr, &status); }
void Collator::createCollator() const { ASSERT(!m_collator); UErrorCode status = U_ZERO_ERROR; { Locker<Mutex> lock(cachedCollatorMutex()); if (cachedCollator) { const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status); ASSERT(U_SUCCESS(status)); ASSERT(cachedCollatorLocale); UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status); ASSERT(U_SUCCESS(status)); // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0. if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale) && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) { m_collator = cachedCollator; cachedCollator = 0; return; } } } m_collator = ucol_open(m_locale, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm. } ASSERT(U_SUCCESS(status)); ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); ASSERT(U_SUCCESS(status)); }
/** * call-seq: * collator.get_attr(attribute) * collator[attribute] * * Universal attribute setter. See above for valid attributes and their values **/ VALUE icu4r_col_get_attr(VALUE self, VALUE obj) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue val; Check_Type(obj, T_FIXNUM); val = ucol_getAttribute(UCOLLATOR(self), FIX2INT(obj), &status); ICU_RAISE(status); return INT2FIX(val); }
//static jint NativeCollation_getAttribute(JNIEnv* env, jclass, jint address, jint type) { JNIEXPORT jint JNICALL Java_com_ibm_icu4jni_text_NativeCollation_getAttribute(JNIEnv* env, jclass, jint address, jint type) { UErrorCode status = U_ZERO_ERROR; jint result = ucol_getAttribute(toCollator(address), (UColAttribute) type, &status); icu4jni_error(env, status); return result; }
CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) { UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); UCollationStrength strength = ucol_getStrength(coll); UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; uint32_t variableTop = ucol_getVariableTop(coll, &status); uint32_t strengthMask = 0; int32_t order; if (U_FAILURE(status)) { return; } // **** only set flag if string has Han(gul) **** // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476 switch (strength) { default: strengthMask |= UCOL_TERTIARYORDERMASK; U_FALLTHROUGH; case UCOL_SECONDARY: strengthMask |= UCOL_SECONDARYORDERMASK; U_FALLTHROUGH; case UCOL_PRIMARY: strengthMask |= UCOL_PRIMARYORDERMASK; } ces = ceBuffer; while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) { UBool cont = isContinuation(order); order &= strengthMask; if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) { if (strength >= UCOL_QUATERNARY) { order &= UCOL_PRIMARYORDERMASK; } else { order = UCOL_IGNORABLE; } } if (order == UCOL_IGNORABLE) { continue; } if (cont) { order |= UCOL_CONTINUATION_MARKER; } add(order, status); } ucol_closeElements(elems); }
//static jint NativeCollation_getNormalization(JNIEnv* env, jclass, jint address) { JNIEXPORT jint JNICALL Java_com_ibm_icu4jni_text_NativeCollation_getNormalization(JNIEnv* env, jclass, jint address) { UErrorCode status = U_ZERO_ERROR; jint result = ucol_getAttribute(toCollator(address), UCOL_NORMALIZATION_MODE, &status); icu4jni_error(env, status); return result; }
static int64_t HHVM_METHOD(Collator, getAttribute, int64_t attr) { FETCH_COL(data, this_, 0); data->clearError(); UErrorCode error = U_ZERO_ERROR; int64_t ret = (int64_t)ucol_getAttribute(data->collator(), (UColAttribute)attr, &error); if (U_FAILURE(error)) { data->setError(error, "Error getting attribute value"); return 0; } return ret; }
// Collator.upper_first {{{ static PyObject * icu_Collator_get_upper_first(icu_Collator *self, void *closure) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue val; val = ucol_getAttribute(self->collator, UCOL_CASE_FIRST, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; } if (val == UCOL_OFF) { Py_RETURN_NONE; } if (val) { Py_RETURN_TRUE; } Py_RETURN_FALSE; }
int64_t c_Collator::t_getattribute(int64_t attr) { if (!m_ucoll) { raise_warning("getattribute called on uninitialized Collator object"); return 0; } m_errcode.clearError(); UErrorCode error = U_ZERO_ERROR; int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr, &error); if (U_FAILURE(error)) { m_errcode.setError(error, "Error getting attribute value"); return 0; } return ret; }
int64_t c_Collator::t_getattribute(int64_t attr) { if (!m_ucoll) { raise_warning("getattribute called on uninitialized Collator object"); return 0; } m_errcode.clear(); int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr, &(m_errcode.code)); s_intl_error->m_error.clear(); s_intl_error->m_error.code = m_errcode.code; if (U_FAILURE(m_errcode.code)) { m_errcode.custom_error_message = "Error getting attribute value"; s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message; return 0; } return ret; }
Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) : bufferSize(0), bufferMin(0), bufferMax(0), strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), nfd(*Normalizer2Factory::getNFDInstance(status)), targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) { strength = ucol_getStrength(coll); toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; variableTop = ucol_getVariableTop(coll, &status); // find the largest expansion uint8_t maxExpansion = 0; for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) { if (*expansion > maxExpansion) { maxExpansion = *expansion; } } // room for an extra character on each end, plus 4 for safety bufferSize = patternLength + (2 * maxExpansion) + 4; ceb = NEW_ARRAY(CEI, bufferSize); if (ceb == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } if (target != NULL) { setTargetString(target); } switch (strength) { default: strengthMask |= UCOL_TERTIARYORDERMASK; /* fall through */ case UCOL_SECONDARY: strengthMask |= UCOL_SECONDARYORDERMASK; /* fall through */ case UCOL_PRIMARY: strengthMask |= UCOL_PRIMARYORDERMASK; } }
int64 c_Collator::t_getattribute(int64 attr) { INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::getattribute); if (!m_ucoll) { raise_warning("getattribute called on uninitialized Collator object"); return 0; } m_errcode.clear(); int64 ret = (int64)ucol_getAttribute(m_ucoll, (UColAttribute)attr, &(m_errcode.code)); s_intl_error->m_error.clear(); s_intl_error->m_error.code = m_errcode.code; if (U_FAILURE(m_errcode.code)) { m_errcode.custom_error_message = "Error getting attribute value"; s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message; return 0; } return ret; }
U_CAPI UCollationStrength U_EXPORT2 ucol_getStrength(const UCollator *coll) { UErrorCode status = U_ZERO_ERROR; return ucol_getAttribute(coll, UCOL_STRENGTH, &status); }
U_CAPI UBool U_EXPORT2 ucol_equals(const UCollator *source, const UCollator *target) { UErrorCode status = U_ZERO_ERROR; // if pointers are equal, collators are equal if(source == target) { return TRUE; } int32_t i = 0, j = 0; // if any of attributes are different, collators are not equal for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { return FALSE; } } int32_t sourceRulesLen = 0, targetRulesLen = 0; const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); const UChar *targetRules = ucol_getRules(target, &targetRulesLen); if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { // all the attributes are equal and the rules are equal - collators are equal return(TRUE); } // hard part, need to construct tree from rules and see if they yield the same tailoring UBool result = TRUE; UParseError parseError; UColTokenParser sourceParser, targetParser; int32_t sourceListLen = 0, targetListLen = 0; ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); if(sourceListLen != targetListLen) { // different number of resets result = FALSE; } else { UColToken *sourceReset = NULL, *targetReset = NULL; UChar *sourceResetString = NULL, *targetResetString = NULL; int32_t sourceStringLen = 0, targetStringLen = 0; for(i = 0; i < sourceListLen; i++) { sourceReset = sourceParser.lh[i].reset; sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); sourceStringLen = sourceReset->source >> 24; for(j = 0; j < sourceListLen; j++) { targetReset = targetParser.lh[j].reset; targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); targetStringLen = targetReset->source >> 24; if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { sourceReset = sourceParser.lh[i].first; targetReset = targetParser.lh[j].first; while(sourceReset != NULL && targetReset != NULL) { sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); sourceStringLen = sourceReset->source >> 24; targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); targetStringLen = targetReset->source >> 24; if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { result = FALSE; goto returnResult; } // probably also need to check the expansions if(sourceReset->expansion) { if(!targetReset->expansion) { result = FALSE; goto returnResult; } else { // compare expansions sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); sourceStringLen = sourceReset->expansion >> 24; targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); targetStringLen = targetReset->expansion >> 24; if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { result = FALSE; goto returnResult; } } } else { if(targetReset->expansion) { result = FALSE; goto returnResult; } } sourceReset = sourceReset->next; targetReset = targetReset->next; } if(sourceReset != targetReset) { // at least one is not NULL // there are more tailored elements in one list result = FALSE; goto returnResult; } break; } } // couldn't find the reset anchor, so the collators are not equal if(j == sourceListLen) { result = FALSE; goto returnResult; } }
U_CAPI UCollator* U_EXPORT2 ucol_openFromShortString( const char *definition, UBool forceDefaults, UParseError *parseError, UErrorCode *status) { UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN_FROM_SHORT_STRING); UTRACE_DATA1(UTRACE_INFO, "short string = \"%s\"", definition); if(U_FAILURE(*status)) return 0; UParseError internalParseError; if(!parseError) { parseError = &internalParseError; } parseError->line = 0; parseError->offset = 0; parseError->preContext[0] = 0; parseError->postContext[0] = 0; // first we want to pick stuff out of short string. // we'll end up with an UCA version, locale and a bunch of // settings // analyse the string in order to get everything we need. const char *string = definition; CollatorSpec s; ucol_sit_initCollatorSpecs(&s); string = ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s); char buffer[internalBufferSize]; uprv_memset(buffer, 0, internalBufferSize); uloc_canonicalize(s.locale, buffer, internalBufferSize, status); UCollator *result = ucol_open(buffer, status); int32_t i = 0; for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { if(s.options[i] != UCOL_DEFAULT) { if(forceDefaults || ucol_getAttribute(result, (UColAttribute)i, status) != s.options[i]) { ucol_setAttribute(result, (UColAttribute)i, s.options[i], status); } if(U_FAILURE(*status)) { parseError->offset = (int32_t)(string - definition); ucol_close(result); return NULL; } } } if(s.variableTopSet) { if(s.variableTopString[0]) { ucol_setVariableTop(result, s.variableTopString, s.variableTopStringLen, status); } else { // we set by value, using 'B' ucol_restoreVariableTop(result, s.variableTopValue, status); } } if(U_FAILURE(*status)) { // here it can only be a bogus value ucol_close(result); result = NULL; } UTRACE_EXIT_PTR_STATUS(result, *status); return result; }
/** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { #if 0 /* * Starting with ICU 53, setting the variable top via a pseudo relation string * is not supported any more. * It was replaced by the [maxVariable symbol] setting. * See ICU tickets #9958 and #8032. */ static const char str[] = "&z = [variable top]"; int len = strlen(str); UChar rules[sizeof(str)]; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of collator :%s\n", myErrorName(status)); return; } myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { ucol_close(enCollation); log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; #endif }
static jint NativeCollation_getAttribute(JNIEnv* env, jclass, jlong address, jint type) { UErrorCode status = U_ZERO_ERROR; jint result = ucol_getAttribute(toCollator(address), (UColAttribute) type, &status); maybeThrowIcuException(env, "ucol_getAttribute", status); return result; }
// Collator.numeric {{{ static PyObject * icu_Collator_get_numeric(icu_Collator *self, void *closure) { UErrorCode status = U_ZERO_ERROR; return Py_BuildValue("O", (ucol_getAttribute(self->collator, UCOL_NUMERIC_COLLATION, &status) == UCOL_ON) ? Py_True : Py_False); }
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; int temp=0, gSortklen1=0,gSortklen2=0; UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; uint32_t sLen = u_strlen(source); uint32_t tLen = u_strlen(target); char buffer[256]; uint32_t len; UErrorCode status = U_ZERO_ERROR; UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); UCharIterator sIter, tIter; uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != result) { log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(QUICK <= 0) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result; /*UCharIterator sIter, tIter;*/ /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } if(compareResultUTF8 != compareResultUTF8Norm) { log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } } else { log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log_verbose("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ if(1) { /*!QUICK*/ int32_t i = 0; int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(QUICK <= 0) { partialSizesSize = 7; } /*log_verbose("partial sortkey test piecesize=");*/ for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = result, partialNormalizedSKResult = result; /*log_verbose("%i ", partialSizes[i]);*/ partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); if(partialSKResult != result) { log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", partialSKResult, result, aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } if(QUICK <= 0 && norm != UCOL_ON) { /*log_verbose("N ");*/ ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } } } /*log_verbose("\n");*/ } compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); if (compareResult != compareResulta) { log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n"); } sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); /* Check that sort key generated with null terminated string is identical */ /* to that generted with a length specified. */ if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { log_err("Sort Keys from null terminated and explicit length strings differ.\n"); } /*memcmp(sortKey1, sortKey2,sortklenmax);*/ temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); gSortklen1 = uprv_strlen((const char *)sortKey1)+1; gSortklen2 = uprv_strlen((const char *)sortKey2)+1; if(sortklen1 != gSortklen1){ log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); } if(sortklen2!= gSortklen2){ log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); } if(temp < 0) { keyResult=UCOL_LESS; } else if(temp > 0) { keyResult= UCOL_GREATER; } else { keyResult = UCOL_EQUAL; } reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); free(sortKey1); free(sortKey2); free(sortKey1a); free(sortKey2a); }
/** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { const char *str = "&z = [variable top]"; int len = strlen(str); UChar *rules; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; rules = (UChar*)malloc(sizeof(UChar*) * (len + 1)); u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } free(rules); ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; }
void IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) { UErrorCode status = U_ZERO_ERROR; UCollator *myCollation = col->toUCollator(); Collator::EComparisonResult compareResult = col->compare(source, target); CollationKey srckey, tgtkey; col->getCollationKey(source, srckey, status); col->getCollationKey(target, tgtkey, status); if (U_FAILURE(status)){ errln("Creation of collation keys failed\n"); } Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); int32_t sLen = source.length(), tLen = target.length(); const UChar* src = source.getBuffer(); const UChar* trg = target.getBuffer(); UCollationResult compareResultIter = (UCollationResult)result; { UCharIterator sIter, tIter; uiter_setString(&sIter, src, sLen); uiter_setString(&tIter, trg, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != (UCollationResult)result) { errln("Different result for iterative comparison "+source+" "+target); } } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(!quick) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; UCharIterator sIter, tIter; /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); } if(compareResultUTF8 != compareResultUTF8Norm) { errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); } } else { log("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ { /*!QUICK*/ int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(!quick) { partialSizesSize = 7; } int32_t i = 0; log("partial sortkey test piecesize="); for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; log("%i ", partialSizes[i]); partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); if(partialSKResult != (UCollationResult)result) { errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); } if(norm != UCOL_ON && !quick) { log("N "); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); } } } log("\n"); } /* if (compareResult != result) { errln("String comparison failed in variant test\n"); } if (keyResult != result) { errln("Collation key comparison failed in variant test\n"); } */ }
Collator::ECollationStrength RuleBasedCollator::getStrength(void) const { UErrorCode intStatus = U_ZERO_ERROR; return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH, &intStatus)); }
UColAttributeValue __hs_ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { return ucol_getAttribute(coll, attr, status); }