Ejemplo n.º 1
0
/* this function makes a string with representation of a sortkey */
static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
    int32_t strength = UCOL_PRIMARY;
    uint32_t res_size = 0;
    UBool doneCase = FALSE;
    UErrorCode errorCode = U_ZERO_ERROR;

    char *current = buffer;
    const uint8_t *currentSk = sortkey;

    uprv_strcpy(current, "[");

    while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
        if(strength > UCOL_PRIMARY) {
            uprv_strcat(current, " . ");
        }
        while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
            uprv_appendByteToHexString(current, *currentSk++);
            uprv_strcat(current, " ");
        }
        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
            doneCase = TRUE;
        } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
            strength ++;
        }
        if (*currentSk) {
            uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
        }
        if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
            break;
        }
    }

    if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
        uprv_strcat(current, " . ");
        while(*currentSk != 0) {
            uprv_appendByteToHexString(current, *currentSk++);
            uprv_strcat(current, " ");
        }

        uprv_appendByteToHexString(current, *currentSk++);
    }
    uprv_strcat(current, "]");

    if(res_size > *len) {
        return NULL;
    }

    return buffer;
}
Ejemplo n.º 2
0
void Collator::createCollator() const
{
    ASSERT(!m_collator);
    UErrorCode status = U_ZERO_ERROR;

    {
        Locker<Mutex> lock(cachedCollatorMutex());
        if (cachedCollator) {
            UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status);
            ASSERT(U_SUCCESS(status));

            if (0 == strcmp(cachedEquivalentLocale, m_equivalentLocale)
                && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) {
                m_collator = cachedCollator;
                cachedCollator = 0;
                cachedEquivalentLocale[0] = 0;
                return;
            }
        }
    }

    m_collator = ucol_open(m_locale, &status);
    if (U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm.
    }
    ASSERT(U_SUCCESS(status));

    ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
    ASSERT(U_SUCCESS(status));

    ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    ASSERT(U_SUCCESS(status));
}
Ejemplo n.º 3
0
UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
                                                      UErrorCode &status)
{
    if (U_FAILURE(status))
        return UCOL_DEFAULT;
    return ucol_getAttribute(ucollator, attr, &status);
}
Ejemplo n.º 4
0
void Collator::createCollator() const
{
    ASSERT(!m_collator);
    UErrorCode status = U_ZERO_ERROR;

    {
        Locker<Mutex> lock(cachedCollatorMutex());
        if (cachedCollator) {
            const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status);
            ASSERT(U_SUCCESS(status));
            ASSERT(cachedCollatorLocale);

            UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status);
            ASSERT(U_SUCCESS(status));

            // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0.
            if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale)
                && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) {
                m_collator = cachedCollator;
                cachedCollator = 0;
                return;
            }
        }
    }

    m_collator = ucol_open(m_locale, &status);
    if (U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm.
    }
    ASSERT(U_SUCCESS(status));

    ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
    ASSERT(U_SUCCESS(status));
}
Ejemplo n.º 5
0
/**
 * call-seq:
 *     collator.get_attr(attribute)
 *     collator[attribute]
 *
 * Universal attribute setter. See above for valid attributes and their values
 **/
VALUE icu4r_col_get_attr(VALUE self, VALUE obj)
{
    UErrorCode status = U_ZERO_ERROR;
    UColAttributeValue val;
    Check_Type(obj, T_FIXNUM);
    val = ucol_getAttribute(UCOLLATOR(self), FIX2INT(obj), &status);
    ICU_RAISE(status);
    return INT2FIX(val);
}
Ejemplo n.º 6
0
//static jint NativeCollation_getAttribute(JNIEnv* env, jclass, jint address, jint type) {
JNIEXPORT jint JNICALL
Java_com_ibm_icu4jni_text_NativeCollation_getAttribute(JNIEnv* env, jclass,
		jint address, jint type) {
	UErrorCode status = U_ZERO_ERROR;
	jint result = ucol_getAttribute(toCollator(address), (UColAttribute) type,
			&status);
	icu4jni_error(env, status);
	return result;
}
Ejemplo n.º 7
0
CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
    : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
{
    UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
    UCollationStrength strength = ucol_getStrength(coll);
    UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
    uint32_t variableTop = ucol_getVariableTop(coll, &status);
    uint32_t strengthMask = 0;
    int32_t order;

    if (U_FAILURE(status)) {
        return;
    }

    // **** only set flag if string has Han(gul) ****
    // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476

    switch (strength)
    {
    default:
        strengthMask |= UCOL_TERTIARYORDERMASK;
        U_FALLTHROUGH;
    case UCOL_SECONDARY:
        strengthMask |= UCOL_SECONDARYORDERMASK;
        U_FALLTHROUGH;
    case UCOL_PRIMARY:
        strengthMask |= UCOL_PRIMARYORDERMASK;
    }

    ces = ceBuffer;

    while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
        UBool cont = isContinuation(order);

        order &= strengthMask;

        if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) {
            if (strength >= UCOL_QUATERNARY) {
                order &= UCOL_PRIMARYORDERMASK;
            } else {
                order = UCOL_IGNORABLE;
            }
        }

        if (order == UCOL_IGNORABLE) {
            continue;
        }

        if (cont) {
            order |= UCOL_CONTINUATION_MARKER;
        }

        add(order, status);
    }

    ucol_closeElements(elems);
}
Ejemplo n.º 8
0
//static jint NativeCollation_getNormalization(JNIEnv* env, jclass, jint address) {
JNIEXPORT jint JNICALL
Java_com_ibm_icu4jni_text_NativeCollation_getNormalization(JNIEnv* env, jclass,
		jint address) {
	UErrorCode status = U_ZERO_ERROR;
	jint result = ucol_getAttribute(toCollator(address),
			UCOL_NORMALIZATION_MODE, &status);
	icu4jni_error(env, status);
	return result;
}
Ejemplo n.º 9
0
static int64_t HHVM_METHOD(Collator, getAttribute, int64_t attr) {
  FETCH_COL(data, this_, 0);
  data->clearError();
  UErrorCode error = U_ZERO_ERROR;
  int64_t ret = (int64_t)ucol_getAttribute(data->collator(),
                                           (UColAttribute)attr,
                                           &error);
  if (U_FAILURE(error)) {
    data->setError(error, "Error getting attribute value");
    return 0;
  }
  return ret;
}
Ejemplo n.º 10
0
Archivo: icu.c Proyecto: IvoNet/calibre
// Collator.upper_first {{{
static PyObject *
icu_Collator_get_upper_first(icu_Collator *self, void *closure) {
    UErrorCode status = U_ZERO_ERROR;
    UColAttributeValue val;

    val = ucol_getAttribute(self->collator, UCOL_CASE_FIRST, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; }

    if (val == UCOL_OFF) { Py_RETURN_NONE; }
    if (val) {
        Py_RETURN_TRUE;
    }
    Py_RETURN_FALSE;
}
Ejemplo n.º 11
0
int64_t c_Collator::t_getattribute(int64_t attr) {
  if (!m_ucoll) {
    raise_warning("getattribute called on uninitialized Collator object");
    return 0;
  }
  m_errcode.clearError();
  UErrorCode error = U_ZERO_ERROR;
  int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr,
                                       &error);
  if (U_FAILURE(error)) {
    m_errcode.setError(error, "Error getting attribute value");
    return 0;
  }
  return ret;
}
Ejemplo n.º 12
0
int64_t c_Collator::t_getattribute(int64_t attr) {
  if (!m_ucoll) {
    raise_warning("getattribute called on uninitialized Collator object");
    return 0;
  }
  m_errcode.clear();
  int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr,
                                       &(m_errcode.code));
  s_intl_error->m_error.clear();
  s_intl_error->m_error.code = m_errcode.code;
  if (U_FAILURE(m_errcode.code)) {
    m_errcode.custom_error_message = "Error getting attribute value";
    s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
    return 0;
  }
  return ret;
}
Ejemplo n.º 13
0
Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
    : bufferSize(0), bufferMin(0), bufferMax(0),
      strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
      nfd(*Normalizer2Factory::getNFDInstance(status)),
      targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
{
    strength = ucol_getStrength(coll);
    toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
    variableTop = ucol_getVariableTop(coll, &status);

    // find the largest expansion
    uint8_t maxExpansion = 0;
    for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
        if (*expansion > maxExpansion) {
            maxExpansion = *expansion;
        }
    }

    // room for an extra character on each end, plus 4 for safety
    bufferSize = patternLength + (2 * maxExpansion) + 4;

    ceb = NEW_ARRAY(CEI, bufferSize);

    if (ceb == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    if (target != NULL) {
        setTargetString(target);
    }

    switch (strength)
    {
    default:
        strengthMask |= UCOL_TERTIARYORDERMASK;
        /* fall through */

    case UCOL_SECONDARY:
        strengthMask |= UCOL_SECONDARYORDERMASK;
        /* fall through */

    case UCOL_PRIMARY:
        strengthMask |= UCOL_PRIMARYORDERMASK;
    }
}
Ejemplo n.º 14
0
int64 c_Collator::t_getattribute(int64 attr) {
  INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::getattribute);
  if (!m_ucoll) {
    raise_warning("getattribute called on uninitialized Collator object");
    return 0;
  }
  m_errcode.clear();
  int64 ret = (int64)ucol_getAttribute(m_ucoll, (UColAttribute)attr,
                                       &(m_errcode.code));
  s_intl_error->m_error.clear();
  s_intl_error->m_error.code = m_errcode.code;
  if (U_FAILURE(m_errcode.code)) {
    m_errcode.custom_error_message = "Error getting attribute value";
    s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
    return 0;
  }
  return ret;
}
Ejemplo n.º 15
0
U_CAPI UCollationStrength U_EXPORT2
ucol_getStrength(const UCollator *coll)
{
    UErrorCode status = U_ZERO_ERROR;
    return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
}
U_CAPI UBool U_EXPORT2
ucol_equals(const UCollator *source, const UCollator *target) {
    UErrorCode status = U_ZERO_ERROR;
    // if pointers are equal, collators are equal
    if(source == target) {
        return TRUE;
    }
    int32_t i = 0, j = 0;
    // if any of attributes are different, collators are not equal
    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
        if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
            return FALSE;
        }
    }

    int32_t sourceRulesLen = 0, targetRulesLen = 0;
    const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
    const UChar *targetRules = ucol_getRules(target, &targetRulesLen);

    if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
        // all the attributes are equal and the rules are equal - collators are equal
        return(TRUE);
    }
    // hard part, need to construct tree from rules and see if they yield the same tailoring
    UBool result = TRUE;
    UParseError parseError;
    UColTokenParser sourceParser, targetParser;
    int32_t sourceListLen = 0, targetListLen = 0;
    ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
    ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
    sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
    targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);

    if(sourceListLen != targetListLen) {
        // different number of resets
        result = FALSE;
    } else {
        UColToken *sourceReset = NULL, *targetReset = NULL;
        UChar *sourceResetString = NULL, *targetResetString = NULL;
        int32_t sourceStringLen = 0, targetStringLen = 0;
        for(i = 0; i < sourceListLen; i++) {
            sourceReset = sourceParser.lh[i].reset;
            sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
            sourceStringLen = sourceReset->source >> 24;
            for(j = 0; j < sourceListLen; j++) {
                targetReset = targetParser.lh[j].reset;
                targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
                targetStringLen = targetReset->source >> 24;
                if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
                    sourceReset = sourceParser.lh[i].first;
                    targetReset = targetParser.lh[j].first;
                    while(sourceReset != NULL && targetReset != NULL) {
                        sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
                        sourceStringLen = sourceReset->source >> 24;
                        targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
                        targetStringLen = targetReset->source >> 24;
                        if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
                            result = FALSE;
                            goto returnResult;
                        }
                        // probably also need to check the expansions
                        if(sourceReset->expansion) {
                            if(!targetReset->expansion) {
                                result = FALSE;
                                goto returnResult;
                            } else {
                                // compare expansions
                                sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
                                sourceStringLen = sourceReset->expansion >> 24;
                                targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
                                targetStringLen = targetReset->expansion >> 24;
                                if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
                                    result = FALSE;
                                    goto returnResult;
                                }
                            }
                        } else {
                            if(targetReset->expansion) {
                                result = FALSE;
                                goto returnResult;
                            }
                        }
                        sourceReset = sourceReset->next;
                        targetReset = targetReset->next;
                    }
                    if(sourceReset != targetReset) { // at least one is not NULL
                        // there are more tailored elements in one list
                        result = FALSE;
                        goto returnResult;
                    }


                    break;
                }
            }
            // couldn't find the reset anchor, so the collators are not equal
            if(j == sourceListLen) {
                result = FALSE;
                goto returnResult;
            }
        }
Ejemplo n.º 17
0
U_CAPI UCollator* U_EXPORT2
ucol_openFromShortString( const char *definition,
                          UBool forceDefaults,
                          UParseError *parseError,
                          UErrorCode *status)
{
    UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN_FROM_SHORT_STRING);
    UTRACE_DATA1(UTRACE_INFO, "short string = \"%s\"", definition);

    if(U_FAILURE(*status)) return 0;

    UParseError internalParseError;

    if(!parseError) {
        parseError = &internalParseError;
    }
    parseError->line = 0;
    parseError->offset = 0;
    parseError->preContext[0] = 0;
    parseError->postContext[0] = 0;


    // first we want to pick stuff out of short string.
    // we'll end up with an UCA version, locale and a bunch of
    // settings

    // analyse the string in order to get everything we need.
    const char *string = definition;
    CollatorSpec s;
    ucol_sit_initCollatorSpecs(&s);
    string = ucol_sit_readSpecs(&s, definition, parseError, status);
    ucol_sit_calculateWholeLocale(&s);

    char buffer[internalBufferSize];
    uprv_memset(buffer, 0, internalBufferSize);
    uloc_canonicalize(s.locale, buffer, internalBufferSize, status);

    UCollator *result = ucol_open(buffer, status);
    int32_t i = 0;

    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
        if(s.options[i] != UCOL_DEFAULT) {
            if(forceDefaults || ucol_getAttribute(result, (UColAttribute)i, status) != s.options[i]) {
                ucol_setAttribute(result, (UColAttribute)i, s.options[i], status);
            }

            if(U_FAILURE(*status)) {
                parseError->offset = (int32_t)(string - definition);
                ucol_close(result);
                return NULL;
            }

        }
    }
    if(s.variableTopSet) {
        if(s.variableTopString[0]) {
            ucol_setVariableTop(result, s.variableTopString, s.variableTopStringLen, status);
        } else { // we set by value, using 'B'
            ucol_restoreVariableTop(result, s.variableTopValue, status);
        }
    }


    if(U_FAILURE(*status)) { // here it can only be a bogus value
        ucol_close(result);
        result = NULL;
    }

    UTRACE_EXIT_PTR_STATUS(result, *status);
    return result;
}
Ejemplo n.º 18
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
#if 0
    /*
     * Starting with ICU 53, setting the variable top via a pseudo relation string
     * is not supported any more.
     * It was replaced by the [maxVariable symbol] setting.
     * See ICU tickets #9958 and #8032.
     */
    static const char       str[]          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      rules[sizeof(str)];
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    if (U_FAILURE(status)) {
        log_err_status(status, "ERROR: in creation of collator :%s\n", 
                myErrorName(status));
        return;
    }
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        ucol_close(enCollation);
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
#endif
}
static jint NativeCollation_getAttribute(JNIEnv* env, jclass, jlong address, jint type) {
    UErrorCode status = U_ZERO_ERROR;
    jint result = ucol_getAttribute(toCollator(address), (UColAttribute) type, &status);
    maybeThrowIcuException(env, "ucol_getAttribute", status);
    return result;
}
Ejemplo n.º 20
0
Archivo: icu.c Proyecto: IvoNet/calibre
// Collator.numeric {{{
static PyObject *
icu_Collator_get_numeric(icu_Collator *self, void *closure) {
    UErrorCode status = U_ZERO_ERROR;
    return Py_BuildValue("O", (ucol_getAttribute(self->collator, UCOL_NUMERIC_COLLATION, &status) == UCOL_ON) ? Py_True : Py_False);
}
Ejemplo n.º 21
0
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
{
    int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
    int temp=0, gSortklen1=0,gSortklen2=0;
    UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
    uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
    uint32_t sLen = u_strlen(source);
    uint32_t tLen = u_strlen(target);
    char buffer[256];
    uint32_t len;
    UErrorCode status = U_ZERO_ERROR;
    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);

    UCharIterator sIter, tIter;
    uiter_setString(&sIter, source, sLen);
    uiter_setString(&tIter, target, tLen);
    compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    if(compareResultIter != result) {
      log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
    }

    /* convert the strings to UTF-8 and do try comparing with char iterator */
    if(QUICK <= 0) { /*!QUICK*/
      char utf8Source[256], utf8Target[256];
      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
      u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
      if(U_FAILURE(status)) { /* probably buffer is not big enough */
        log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
      } else {
        u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
          UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result;
          /*UCharIterator sIter, tIter;*/
          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
       /*uiter_setString(&sIter, source, sLen);
      uiter_setString(&tIter, target, tLen);*/
          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          sIter.move(&sIter, 0, UITER_START);
          tIter.move(&tIter, 0, UITER_START);
          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(compareResultUTF8 != compareResultIter) {
            log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
          }
          if(compareResultUTF8 != compareResultUTF8Norm) {
            log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
          }
        } else {
          log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
        }
        if(U_FAILURE(status)) {
          log_verbose("UTF-8 strcoll failed! Ignoring result\n");
        }
      }
    }

    /* testing the partial sortkeys */
    if(1) { /*!QUICK*/
      int32_t i = 0;
      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
      int32_t partialSizesSize = 1;
      if(QUICK <= 0) {
        partialSizesSize = 7;
      }
      /*log_verbose("partial sortkey test piecesize=");*/
      for(i = 0; i < partialSizesSize; i++) {
        UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
        /*log_verbose("%i ", partialSizes[i]);*/

        partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
        if(partialSKResult != result) {
          log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", 
            partialSKResult, result,
            aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
        }

        if(QUICK <= 0 && norm != UCOL_ON) {
          /*log_verbose("N ");*/
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(partialSKResult != partialNormalizedSKResult) {
            log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", 
              aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
          }
        }
      }
      /*log_verbose("\n");*/
    }

    
    compareResult  = ucol_strcoll(myCollation, source, sLen, target, tLen);
    compareResulta = ucol_strcoll(myCollation, source, -1,   target, -1); 
    if (compareResult != compareResulta) {
        log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
    }

    sortklen1=ucol_getSortKey(myCollation, source, sLen,  NULL, 0);
    sortklen2=ucol_getSortKey(myCollation, target, tLen,  NULL, 0);

    sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
    sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);

    sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, source, sLen, sortKey1,  sortklen1+1);
    ucol_getSortKey(myCollation, source, -1,   sortKey1a, sortklen1+1);
    
    sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, target, tLen, sortKey2,  sortklen2+1);
    ucol_getSortKey(myCollation, target, -1,   sortKey2a, sortklen2+1);

    /* Check that sort key generated with null terminated string is identical  */
    /*  to that generted with a length specified.                              */
    if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
        uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
        log_err("Sort Keys from null terminated and explicit length strings differ.\n");
    }

    /*memcmp(sortKey1, sortKey2,sortklenmax);*/
    temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
    gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
    gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
    if(sortklen1 != gSortklen1){
        log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
    }
    if(sortklen2!= gSortklen2){
        log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
    }

    if(temp < 0) {
        keyResult=UCOL_LESS;
    }
    else if(temp > 0) {
        keyResult= UCOL_GREATER;
    }
    else {
        keyResult = UCOL_EQUAL;
    }
    reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
    free(sortKey1);
    free(sortKey2);
    free(sortKey1a);
    free(sortKey2a);

}
Ejemplo n.º 22
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
    const char       *str          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      *rules;
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    free(rules);
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
}
Ejemplo n.º 23
0
void 
IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
{   
  UErrorCode status = U_ZERO_ERROR;

  UCollator *myCollation = col->toUCollator();

  Collator::EComparisonResult compareResult = col->compare(source, target);

  CollationKey srckey, tgtkey;
  col->getCollationKey(source, srckey, status);
  col->getCollationKey(target, tgtkey, status);
  if (U_FAILURE(status)){
    errln("Creation of collation keys failed\n");
  }
  Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);

  reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);

    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);

    int32_t sLen = source.length(), tLen = target.length();
    const UChar* src = source.getBuffer();
    const UChar* trg = target.getBuffer();
    UCollationResult compareResultIter = (UCollationResult)result;

    {
      UCharIterator sIter, tIter;
      uiter_setString(&sIter, src, sLen);
      uiter_setString(&tIter, trg, tLen);
      compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
      if(compareResultIter != (UCollationResult)result) {
        errln("Different result for iterative comparison "+source+" "+target);
      }
    }
    /* convert the strings to UTF-8 and do try comparing with char iterator */
    if(!quick) { /*!QUICK*/
      char utf8Source[256], utf8Target[256];
      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
      u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
      if(U_FAILURE(status)) { /* probably buffer is not big enough */
        log("Src UTF-8 buffer too small! Will not compare!\n");
      } else {
        u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
          UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
          UCharIterator sIter, tIter;
          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
       /*uiter_setString(&sIter, source, sLen);
      uiter_setString(&tIter, target, tLen);*/
          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          sIter.move(&sIter, 0, UITER_START);
          tIter.move(&tIter, 0, UITER_START);
          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(compareResultUTF8 != compareResultIter) {
            errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
          }
          if(compareResultUTF8 != compareResultUTF8Norm) {
            errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
          }
        } else {
          log("Target UTF-8 buffer too small! Did not compare!\n");
        }
        if(U_FAILURE(status)) {
          log("UTF-8 strcoll failed! Ignoring result\n");
        }
      }
    }

    /* testing the partial sortkeys */
    { /*!QUICK*/
      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
      int32_t partialSizesSize = 1;
      if(!quick) {
        partialSizesSize = 7;
      }
      int32_t i = 0;
      log("partial sortkey test piecesize=");
      for(i = 0; i < partialSizesSize; i++) {
        UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
        log("%i ", partialSizes[i]);

        partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
        if(partialSKResult != (UCollationResult)result) {
          errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");           
        }

        if(norm != UCOL_ON && !quick) {
          log("N ");
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(partialSKResult != partialNormalizedSKResult) {
            errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");           
          }
        }
      }
      log("\n");
    }
/*
  if (compareResult != result) {
    errln("String comparison failed in variant test\n");
  }
  if (keyResult != result) {
    errln("Collation key comparison failed in variant test\n");
  }
*/
}
Ejemplo n.º 24
0
Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
{
    UErrorCode intStatus = U_ZERO_ERROR;
    return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
                                &intStatus));
}
Ejemplo n.º 25
0
UColAttributeValue __hs_ucol_getAttribute(const UCollator *coll,
					  UColAttribute attr,
					  UErrorCode *status)
{
    return ucol_getAttribute(coll, attr, status);
}