Exemplo n.º 1
0
static void TestSecondary()
{
    int32_t i;
    int32_t len;
    UChar *rules;
    UCollator *myCollation;
    UErrorCode status=U_ZERO_ERROR;
    const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
    len = strlen(str);
    rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
    u_uastrcpy(rules, str);

    myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
    }
    ucol_setStrength(myCollation, UCOL_SECONDARY);
    for (i = 26; i < 34 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    free(rules);
    ucol_close(myCollation);
    myCollation = 0;
}
Exemplo n.º 2
0
static void TestPrimary( )
{
    int32_t len,i;
    UCollator *myCollation;
    UErrorCode status=U_ZERO_ERROR;
    static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";   
    UChar rules[sizeof(str)];
    len = strlen(str);
    u_uastrcpy(rules, str);

    myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
        return;
    }
    ucol_setStrength(myCollation, UCOL_PRIMARY);
    
    for (i = 17; i < 26 ; i++)
    {
        
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
    myCollation = 0;
}
Exemplo n.º 3
0
/*
 * The collator returned by this function is owned by the callee and must be
 * closed when this method returns with a U_SUCCESS UErrorCode.
 *
 * On error, the return value is undefined.
 */
UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr)
{
    UColAttributeValue strength = ucol_getStrength(pCollator);

    bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase;
    bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
    bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols;

    if (isIgnoreCase)
    {
        strength = UCOL_SECONDARY;
    }

    if (isIgnoreNonSpace)
    {
        strength = UCOL_PRIMARY;
    }

    UCollator* pClonedCollator;
    std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols);
    if (customRules.empty())
    {
        pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr);
    }
    else
    {
        int32_t customRuleLength = customRules.size();

        int32_t localeRulesLength;
        const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength);

        std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0');
        for (int i = 0; i < localeRulesLength; i++)
        {
            completeRules[i] = localeRules[i];
        }
        for (int i = 0; i < customRuleLength; i++)
        {
            completeRules[localeRulesLength + i] = customRules[i];
        }

        pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr);
    }

    if (isIgnoreSymbols)
    {
        ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr);
    }

    ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr);

    // casing differs at the tertiary level.
    // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On
    if (strength < UCOL_TERTIARY && !isIgnoreCase)
    {
        ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr);
    }

    return pClonedCollator;
}
Exemplo n.º 4
0
static void TestExtra()
{
    int32_t i, j;
    int32_t len;
    UChar *rules;
    UCollator *myCollation;
    UErrorCode status = U_ZERO_ERROR;
    const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
    len = strlen(str);
    rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
    u_uastrcpy(rules, str);

    myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
    }
    ucol_setStrength(myCollation, UCOL_TERTIARY);
    for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
    {
        for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
        {
        
            doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
        }
    }
    free(rules);
    ucol_close(myCollation);
    myCollation = 0;
}
Exemplo n.º 5
0
static jint NativeCollation_openCollatorFromRules(JNIEnv* env, jclass, jstring rules0, jint mode, jint strength) {
    ScopedJavaUnicodeString rules(env, rules0);
    UErrorCode status = U_ZERO_ERROR;
    UCollator* c = ucol_openRules(rules.unicodeString().getBuffer(), rules.unicodeString().length(),
            UColAttributeValue(mode), UCollationStrength(strength), NULL, &status);
    icu4jni_error(env, status);
    return static_cast<jint>(reinterpret_cast<uintptr_t>(c));
}
static jlong NativeCollation_openCollatorFromRules(JNIEnv* env, jclass, jstring javaRules, jint mode, jint strength) {
    ScopedStringChars rules(env, javaRules);
    if (rules.get() == NULL) {
        return -1;
    }
    UErrorCode status = U_ZERO_ERROR;
    UCollator* c = ucol_openRules(rules.get(), rules.size(),
            UColAttributeValue(mode), UCollationStrength(strength), NULL, &status);
    maybeThrowIcuException(env, "ucol_openRules", status);
    return static_cast<jlong>(reinterpret_cast<uintptr_t>(c));
}
Exemplo n.º 7
0
static void
TestJitterbug1098(){
    UChar rule[1000];
    UCollator* c1 = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UParseError parseError;
    char preContext[200]={0};
    char postContext[200]={0};
    int i=0;
    const char* rules[] = {
         "&''<\\\\",
         "&\\'<\\\\",
         "&\\\"<'\\'",
         "&'\"'<\\'",
         '\0'

    };
    const UCollationResult results1098[] = {
        UCOL_LESS,
        UCOL_LESS, 
        UCOL_LESS,
        UCOL_LESS,
    };
    const UChar input[][2]= {
        {0x0027,0x005c},
        {0x0027,0x005c},
        {0x0022,0x005c},
        {0x0022,0x0027},
    };
    UChar X[2] ={0};
    UChar Y[2] ={0};
    u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);      
    u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
    for(;rules[i]!=0;i++){
        u_uastrcpy(rule, rules[i]);
        c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
        if(U_FAILURE(status)){
            log_err("Could not parse the rules syntax. Error: %s ", u_errorName(status));

            if (status == U_PARSE_ERROR) {
                u_UCharsToChars(parseError.preContext,preContext,20);
                u_UCharsToChars(parseError.postContext,postContext,20);
                log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
            }

            return;
        }
        X[0] = input[i][0];
        Y[0] = input[i][1];
        doTest(c1,X,Y,results1098[i]);
        ucol_close(c1);
    }
}
U_CDECL_END

/****************************************************************************/
/* Following are the open/close functions                                   */
/*                                                                          */
/****************************************************************************/
static UCollator*
tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
    int32_t rulesLen = 0;
    const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
    return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);

}
Exemplo n.º 9
0
// not aliasing, not write-through
void
RuleBasedCollator::construct(const UnicodeString& rules,
                             UColAttributeValue collationStrength,
                             UColAttributeValue decompositionMode,
                             UErrorCode& status)
{
  urulestring = 0;
  ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
                             decompositionMode, collationStrength,
                             NULL, &status);

  dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
  isWriteThroughAlias = FALSE;

  setRuleStringFromCollator(status);
}
Exemplo n.º 10
0
/*
 *### TODO: Add more invalid rules to test all different scenarios.
 *
 */
static void 
TestInvalidRules(){
#define MAX_ERROR_STATES 2

    static const char* rulesArr[MAX_ERROR_STATES] = {
        "& C < ch, cH, Ch[this should fail]<d",
        "& C < ch, cH, & Ch[variable top]"
    };
    static const char* preContextArr[MAX_ERROR_STATES] = {
        " C < ch, cH, Ch",
        "& C < ch, cH",

    };
    static const char* postContextArr[MAX_ERROR_STATES] = {
        "[this should fa",
        ", & Ch[variable"
    };
    int i;

    for(i = 0;i<MAX_ERROR_STATES;i++){
        UChar rules[1000]       = { '\0' };
        UChar preContextExp[1000]  = { '\0' };
        UChar postContextExp[1000] = { '\0' };
        UParseError parseError;
        UErrorCode status = U_ZERO_ERROR;
        UCollator* coll=0;
        u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1);
        u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1);
        u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1);
        /* clean up stuff in parseError */
        u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);      
        u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
        /* open the rules and test */
        coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status);
        (void)coll;   /* Suppress set but not used warning. */
        if(u_strcmp(parseError.preContext,preContextExp)!=0){
            log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
                           aescstrdup(parseError.preContext, -1));
        }
        if(u_strcmp(parseError.postContext,postContextExp)!=0){
            log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
                           aescstrdup(parseError.postContext, -1));
        }
    }  
}
Exemplo n.º 11
0
void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) {
  UErrorCode status = U_ZERO_ERROR;
  UChar rlz[RULE_BUFFER_LEN] = { 0 };
  uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);

  UCollator *coll = NULL;
  coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
  log_verbose("Rules starter for %s\n", rules);

  if(U_SUCCESS(status)) {
    genericOrderingTestWithResult(coll, s, size, result);
    ucol_close(coll);
  } else if(status == U_FILE_ACCESS_ERROR) {
    log_data_err("Is your data around?\n");
  } else {
    log_err("Unable to open collator with rules %s\n", rules);
  }
}
Exemplo n.º 12
0
void UCAConformanceTest::initRbUCA() 
{
  if(!rbUCA) {
    UParseError parseError;
    UChar      *ucarules = buffer;
    int32_t size = ucol_getRulesEx(UCA, UCOL_FULL_RULES, ucarules, 
                                   BUFFER_SIZE_);
    if (size > BUFFER_SIZE_) {
        ucarules = (UChar *)malloc(size * sizeof(UChar));
        size = ucol_getRulesEx(UCA, UCOL_FULL_RULES, ucarules, size);
    }
    rbUCA = ucol_openRules(ucarules, size, UCOL_DEFAULT, UCOL_TERTIARY, 
                          &parseError, &status);
    if (U_FAILURE(status)) {
        errln("Failure creating UCA rule-based collator.");
        return;
    }
  }
}
Exemplo n.º 13
0
/* currently not used with options */
void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
  UErrorCode status = U_ZERO_ERROR;
  UChar rlz[RULE_BUFFER_LEN] = { 0 };
  uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
  uint32_t i;

  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);

  log_verbose("Rules starter for %s\n", rules);

  if(U_SUCCESS(status)) {
    log_verbose("Setting attributes\n");
    for(i = 0; i < attsize; i++) {
      ucol_setAttribute(coll, attrs[i], values[i], &status);
    }

    genericOrderingTestWithResult(coll, s, size, result);
  } else {
    log_err("Unable to open collator with rules %s\n", rules);
  }
  ucol_close(coll);
}
Exemplo n.º 14
0
// not aliasing, not write-through
void
RuleBasedCollator::construct(const UnicodeString& rules,
                             UColAttributeValue collationStrength,
                             UColAttributeValue decompositionMode,
                             UErrorCode& status)
{
    ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
        decompositionMode, collationStrength,
        NULL, &status);

    dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
    isWriteThroughAlias = FALSE;

    if(ucollator == NULL) {
        if(U_SUCCESS(status)) {
            status = U_MEMORY_ALLOCATION_ERROR;
        }
        return; // Failure
    }

    setRuleStringFromCollator();
}
Exemplo n.º 15
0
static void TestIdentical()
{
    int32_t i;
    int32_t len;
    UCollator *myCollation;
    UErrorCode status=U_ZERO_ERROR;
    static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
    UChar rules[sizeof(str)];
    len = strlen(str);
    u_uastrcpy(rules, str);

    myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
        return;
    }
    for(i= 34; i<37; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
    myCollation = 0;
}
Exemplo n.º 16
0
/**
* Test the incremental normalization
*/
static void TestNormalization()
{
          UErrorCode          status = U_ZERO_ERROR;
    const char               *str    =
                            "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
          UCollator          *coll;
          UChar               rule[50];
          int                 rulelen = u_unescape(str, rule, 50);
          int                 count = 0;
    const char                *testdata[] =
                        {"\\u1ED9", "o\\u0323\\u0302",
                        "\\u0300\\u0315", "\\u0315\\u0300",
                        "A\\u0300\\u0315B", "A\\u0315\\u0300B",
                        "A\\u0316\\u0315B", "A\\u0315\\u0316B",
                        "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
                        "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
                        "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
    int32_t   srclen;
    UChar source[10];
    UCollationElements *iter;

    coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    if (U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
              myErrorName(status));
        return;
    }

    srclen = u_unescape(testdata[0], source, 10);
    iter = ucol_openElements(coll, source, srclen, &status);
    backAndForth(iter);
    ucol_closeElements(iter);

    srclen = u_unescape(testdata[1], source, 10);
    iter = ucol_openElements(coll, source, srclen, &status);
    backAndForth(iter);
    ucol_closeElements(iter);

    while (count < 12) {
        srclen = u_unescape(testdata[count], source, 10);
        iter = ucol_openElements(coll, source, srclen, &status);

        if (U_FAILURE(status)){
            log_err("ERROR: in creation of collator element iterator\n %s\n",
                  myErrorName(status));
            return;
        }
        backAndForth(iter);
        ucol_closeElements(iter);

        iter = ucol_openElements(coll, source, -1, &status);

        if (U_FAILURE(status)){
            log_err("ERROR: in creation of collator element iterator\n %s\n",
                  myErrorName(status));
            return;
        }
        backAndForth(iter);
        ucol_closeElements(iter);
        count ++;
    }
    ucol_close(coll);
}
Exemplo n.º 17
0
/**
 * Test for CollationElementIterator.previous()
 *
 * @bug 4108758 - Make sure it works with contracting characters
 *
 */
static void TestPrevious()
{
    UCollator *coll=NULL;
    UChar rule[50];
    UChar *source;
    UCollator *c1, *c2, *c3;
    UCollationElements *iter;
    UErrorCode status = U_ZERO_ERROR;
    UChar test1[50];
    UChar test2[50];

    u_uastrcpy(test1, "What subset of all possible test cases?");
    u_uastrcpy(test2, "has the highest probability of detecting");
    coll = ucol_open("en_US", &status);

    iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
    log_verbose("English locale testing back and forth\n");
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
            myErrorName(status));
        ucol_close(coll);
        return;
    }
    /* A basic test to see if it's working at all */
    backAndForth(iter);
    ucol_closeElements(iter);
    ucol_close(coll);

    /* Test with a contracting character sequence */
    u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);

    log_verbose("Contraction rule testing back and forth with no normalization\n");

    if (c1 == NULL || U_FAILURE(status))
    {
        log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
            myErrorName(status));
        return;
    }
    source=(UChar*)malloc(sizeof(UChar) * 20);
    u_uastrcpy(source, "abchdcba");
    iter=ucol_openElements(c1, source, u_strlen(source), &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
            myErrorName(status));
        return;
    }
    backAndForth(iter);
    ucol_closeElements(iter);
    ucol_close(c1);

    /* Test with an expanding character sequence */
    u_uastrcpy(rule, "&a < b < c/abd < d");
    c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
    log_verbose("Expansion rule testing back and forth with no normalization\n");
    if (c2 == NULL || U_FAILURE(status))
    {
        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
            myErrorName(status));
        return;
    }
    u_uastrcpy(source, "abcd");
    iter=ucol_openElements(c2, source, u_strlen(source), &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
            myErrorName(status));
        return;
    }
    backAndForth(iter);
    ucol_closeElements(iter);
    ucol_close(c2);
    /* Now try both */
    u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
    c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
    log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");

    if (c3 == NULL || U_FAILURE(status))
    {
        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
            myErrorName(status));
        return;
    }
    u_uastrcpy(source, "abcdbchdc");
    iter=ucol_openElements(c3, source, u_strlen(source), &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
            myErrorName(status));
        return;
    }
    backAndForth(iter);
    ucol_closeElements(iter);
    ucol_close(c3);
    source[0] = 0x0e41;
    source[1] = 0x0e02;
    source[2] = 0x0e41;
    source[3] = 0x0e02;
    source[4] = 0x0e27;
    source[5] = 0x61;
    source[6] = 0x62;
    source[7] = 0x63;
    source[8] = 0;

    coll = ucol_open("th_TH", &status);
    log_verbose("Thai locale testing back and forth with normalization\n");
    iter=ucol_openElements(coll, source, u_strlen(source), &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
            myErrorName(status));
        return;
    }
    backAndForth(iter);
    ucol_closeElements(iter);
    ucol_close(coll);

    /* prev test */
    source[0] = 0x0061;
    source[1] = 0x30CF;
    source[2] = 0x3099;
    source[3] = 0x30FC;
    source[4] = 0;

    coll = ucol_open("ja_JP", &status);
    log_verbose("Japanese locale testing back and forth with normalization\n");
    iter=ucol_openElements(coll, source, u_strlen(source), &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
            myErrorName(status));
        return;
    }
    backAndForth(iter);
    ucol_closeElements(iter);
    ucol_close(coll);

    free(source);
}
Exemplo n.º 18
0
/** @bug 4108762
 * Test for getMaxExpansion()
 */
static void TestMaxExpansion()
{
    UErrorCode          status = U_ZERO_ERROR;
    UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
    UChar               ch     = 0;
    UChar32             unassigned = 0xEFFFD;
    UChar               supplementary[2];
    uint32_t            stringOffset = 0;
    UBool               isError = FALSE;
    uint32_t            sorder = 0;
    UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
    uint32_t            temporder = 0;

    UChar rule[256];
    u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
    coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
        UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_SUCCESS(status) && coll) {
      iter = ucol_openElements(coll, &ch, 1, &status);

      while (ch < 0xFFFF && U_SUCCESS(status)) {
          int      count = 1;
          uint32_t order;
          int32_t  size = 0;

          ch ++;

          ucol_setText(iter, &ch, 1, &status);
          order = ucol_previous(iter, &status);

          /* thai management */
          if (order == 0)
              order = ucol_previous(iter, &status);

          while (U_SUCCESS(status) &&
              ucol_previous(iter, &status) != UCOL_NULLORDER) {
              count ++;
          }

          size = ucol_getMaxExpansion(iter, order);
          if (U_FAILURE(status) || size < count) {
              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
                  ch, count);
          }
      }

      /* testing for exact max expansion */
      ch = 0;
      while (ch < 0x61) {
          uint32_t order;
          int32_t  size;
          ucol_setText(iter, &ch, 1, &status);
          order = ucol_previous(iter, &status);
          size  = ucol_getMaxExpansion(iter, order);
          if (U_FAILURE(status) || size != 1) {
              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
                  ch, 1);
          }
          ch ++;
      }

      ch = 0x63;
      ucol_setText(iter, &ch, 1, &status);
      temporder = ucol_previous(iter, &status);

      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
                  ch, 3);
      }

      ch = 0x64;
      ucol_setText(iter, &ch, 1, &status);
      temporder = ucol_previous(iter, &status);

      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
                  ch, 3);
      }

      U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
      (void)isError;    /* Suppress set but not used warning. */
      ucol_setText(iter, supplementary, 2, &status);
      sorder = ucol_previous(iter, &status);

      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
          log_err("Failure at codepoint %d, maximum expansion count < %d\n",
                  ch, 2);
      }

      /* testing jamo */
      ch = 0x1165;

      ucol_setText(iter, &ch, 1, &status);
      temporder = ucol_previous(iter, &status);
      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
                  ch, 3);
      }

      ucol_closeElements(iter);
      ucol_close(coll);

      /* testing special jamo &a<\u1160 */
      rule[0] = 0x26;
      rule[1] = 0x71;
      rule[2] = 0x3c;
      rule[3] = 0x1165;
      rule[4] = 0x2f;
      rule[5] = 0x71;
      rule[6] = 0x71;
      rule[7] = 0x71;
      rule[8] = 0x71;
      rule[9] = 0;

      coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
          UCOL_DEFAULT_STRENGTH,NULL, &status);
      iter = ucol_openElements(coll, &ch, 1, &status);

      temporder = ucol_previous(iter, &status);
      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
                  ch, 5);
      }

      ucol_closeElements(iter);
      ucol_close(coll);
    } else {
      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
    }

}
Exemplo n.º 19
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
    const char       *str          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      *rules;
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    free(rules);
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
}
Exemplo n.º 20
0
static UCollator*
tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
    int32_t rulesLen = 0;
    const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
    return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
}
Exemplo n.º 21
0
/**
 * Creates a collator
 */
UBool processCollator()
{
	// Set up an ICU collator
    UErrorCode status = U_ZERO_ERROR;
	UChar rules[100];

    if (opt_rules != 0) {
		u_unescape(opt_rules, rules, 100);
        collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, 
			                  NULL, &status);
    }
    else {
        collator = ucol_open(opt_locale, &status);
    }
	if (U_FAILURE(status)) {
        fprintf(stderr, "Collator creation failed.: %d\n", status);
        return FALSE;
    }
    if (status == U_USING_DEFAULT_WARNING) {
        fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", 
			    opt_locale);
    }
    if (status == U_USING_FALLBACK_WARNING) {
        fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", 
			    opt_locale);
    }
    if (opt_norm) {
        ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    }
    if (opt_french) {
        ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
    }
    if (opt_lower) {
        ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, 
			              &status);
    }
    if (opt_upper) {
        ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, 
			              &status);
    }
    if (opt_case) {
        ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
    }
    if (opt_shifted) {
        ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, 
			              &status);
    }
    if (opt_level != 0) {
        switch (opt_level) {
        case 1:
            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
            break;
        case 2:
            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, 
				              &status);
            break;
        case 3:
            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
            break;
        case 4:
            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, 
				              &status);
            break;
        case 5:
            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, 
				              &status);
            break;
        default:
            fprintf(stderr, "-level param must be between 1 and 5\n");
            return FALSE;
        }
    }
    if (U_FAILURE(status)) {
        fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
        return FALSE;
    }
	return TRUE;
}
Exemplo n.º 22
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
#if 0
    /*
     * Starting with ICU 53, setting the variable top via a pseudo relation string
     * is not supported any more.
     * It was replaced by the [maxVariable symbol] setting.
     * See ICU tickets #9958 and #8032.
     */
    static const char       str[]          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      rules[sizeof(str)];
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    if (U_FAILURE(status)) {
        log_err_status(status, "ERROR: in creation of collator :%s\n", 
                myErrorName(status));
        return;
    }
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        ucol_close(enCollation);
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
#endif
}
Exemplo n.º 23
0
/**
  * Tests surrogate support.
  * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
  * Therefore, another (unassigned) code point was used for this test.
  */
static void TestSurrogates(void)
{
    const char       *str          = 
                              "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
          int         len          = strlen(str);
          int         rlen         = 0;
          UChar      *rules;
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[][4]    = 
          {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
          UChar       target[][4]    = 
          {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
          int         count        = 0;
          uint8_t enresult[20], myresult[20];
          int enlen, mylen;
          
    /* tests for open rules with surrogate rules */
    rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
    rlen = u_unescape(str, rules, len);
    
    enCollation = ucol_open("en_US", &status);
    myCollation = ucol_openRules(rules, rlen, UCOL_OFF, 
                                 UCOL_TERTIARY,NULL, &status);
    if (U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    /* 
    this test is to verify the supplementary sort key order in the english 
    collator
    */
    log_verbose("start of english collation supplementary characters test\n");
    while (count < 2) {
        doTest(enCollation, source[count], target[count], UCOL_LESS);
        count ++;
    }
    doTest(enCollation, source[count], target[count], UCOL_GREATER);
        
    log_verbose("start of tailored collation supplementary characters test\n");
    count = 0;
    /* tests getting collation elements for surrogates for tailored rules */
    while (count < 4) {
        doTest(myCollation, source[count], target[count], UCOL_LESS);
        count ++;
    }

    /* tests that \uD800\uDC02 still has the same value, not changed */
    enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
    mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
    if (enlen != mylen ||
        uprv_memcmp(enresult, myresult, enlen) != 0) {
        log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
    }

    free(rules);
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
}
Exemplo n.º 24
0
/*
 * The collator returned by this function is owned by the callee and must be
 * closed when this method returns with a U_SUCCESS UErrorCode.
 *
 * On error, the return value is undefined.
 */
UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr)
{
    UColAttributeValue strength = ucol_getStrength(pCollator);

    bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase;
    bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
    bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols;

    if (isIgnoreCase)
    {
        strength = UCOL_SECONDARY;
    }

    if (isIgnoreNonSpace)
    {
        strength = UCOL_PRIMARY;
    }

    UCollator* pClonedCollator;
    std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols);
    if (customRules.empty())
    {
        pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr);
    }
    else
    {
        int32_t customRuleLength = customRules.size();

        int32_t localeRulesLength;
        const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength);

        std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0');
        for (int i = 0; i < localeRulesLength; i++)
        {
            completeRules[i] = localeRules[i];
        }
        for (int i = 0; i < customRuleLength; i++)
        {
            completeRules[localeRulesLength + i] = customRules[i];
        }

        pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr);
    }

    if (isIgnoreSymbols)
    {
        ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr);

        // by default, ICU alternate shifted handling only ignores punctuation, but
        // IgnoreSymbols needs symbols and currency as well, so change the "variable top"
        // to include all symbols and currency
#if HAVE_SET_MAX_VARIABLE
        ucol_setMaxVariable(pClonedCollator, UCOL_REORDER_CODE_CURRENCY, pErr);
#else
        // 0xfdfc is the last currency character before the first digit character
        // in http://source.icu-project.org/repos/icu/icu/tags/release-52-1/source/data/unidata/FractionalUCA.txt
        const UChar ignoreSymbolsVariableTop[] = { 0xfdfc };
        ucol_setVariableTop(pClonedCollator, ignoreSymbolsVariableTop, 1, pErr);
#endif
    }

    ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr);

    // casing differs at the tertiary level.
    // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On
    if (strength < UCOL_TERTIARY && !isIgnoreCase)
    {
        ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr);
    }

    return pClonedCollator;
}
Exemplo n.º 25
0
/**
* Testing the discontigous contractions
*/
static void TestDiscontiguos() {
    const char               *rulestr    =
                            "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
          UChar               rule[50];
          int                 rulelen = u_unescape(rulestr, rule, 50);
    const char               *src[] = {
     "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
    /* base character blocked */
     "XD\\u0300", "XD\\u0300\\u0315",
    /* non blocking combining character */
     "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
     /* blocking combining character */
     "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
     /* contraction prefix */
     "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
     "X\\u0300\\u031A\\u0315",
     /* ends not with a contraction character */
     "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
     "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
    };
    const char               *tgt[] = {
     /* non blocking combining character */
     "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
    /* base character blocked */
     "X D \\u0300", "X D \\u0300\\u0315",
    /* non blocking combining character */
     "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
     /* blocking combining character */
     "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
     /* contraction prefix */
     "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
     "X\\u0300 \\u031A \\u0315",
     /* ends not with a contraction character */
     "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
     "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
    };
          int                 size   = 20;
          UCollator          *coll;
          UErrorCode          status    = U_ZERO_ERROR;
          int                 count     = 0;
          UCollationElements *iter;
          UCollationElements *resultiter;

    coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    iter       = ucol_openElements(coll, rule, 1, &status);
    resultiter = ucol_openElements(coll, rule, 1, &status);

    if (U_FAILURE(status)) {
        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
        return;
    }

    while (count < size) {
        UChar  str[20];
        UChar  tstr[20];
        int    strLen = u_unescape(src[count], str, 20);
        UChar *s;

        ucol_setText(iter, str, strLen, &status);
        if (U_FAILURE(status)) {
            log_err("Error opening collation iterator\n");
            return;
        }

        u_unescape(tgt[count], tstr, 20);
        s = tstr;

        log_verbose("count %d\n", count);

        for (;;) {
            uint32_t  ce;
            UChar    *e = u_strchr(s, 0x20);
            if (e == 0) {
                e = u_strchr(s, 0);
            }
            ucol_setText(resultiter, s, (int32_t)(e - s), &status);
            ce = ucol_next(resultiter, &status);
            if (U_FAILURE(status)) {
                log_err("Error manipulating collation iterator\n");
                return;
            }
            while (ce != UCOL_NULLORDER) {
                if (ce != (uint32_t)ucol_next(iter, &status) ||
                    U_FAILURE(status)) {
                    log_err("Discontiguos contraction test mismatch\n");
                    return;
                }
                ce = ucol_next(resultiter, &status);
                if (U_FAILURE(status)) {
                    log_err("Error getting next collation element\n");
                    return;
                }
            }
            s = e + 1;
            if (*e == 0) {
                break;
            }
        }
        ucol_reset(iter);
        backAndForth(iter);
        count ++;
    }
    ucol_closeElements(resultiter);
    ucol_closeElements(iter);
    ucol_close(coll);
}