Exemple #1
0
/**
 * Get a set containing the expansions defined by the collator. The set includes
 * both the UCA expansions and the expansions defined by the tailoring
 * @param coll collator
 * @param conts the set to hold the result
 * @param addPrefixes add the prefix contextual elements to contractions
 * @param status to hold the error code
 *
 * @draft ICU 3.4
 */
U_CAPI void U_EXPORT2
ucol_getContractionsAndExpansions( const UCollator *coll,
                  USet *contractions,
                  USet *expansions,
                  UBool addPrefixes,
                  UErrorCode *status)
{
    if(U_FAILURE(*status)) {
        return;
    }
    if(coll == NULL) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    if(contractions) {
      uset_clear(contractions);
    }
    if(expansions) {
      uset_clear(expansions);
    }
    int32_t rulesLen = 0;
    const UChar* rules = ucol_getRules(coll, &rulesLen);
    UColTokenParser src;
    ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA,
                           ucol_tok_getRulesFromBundle, NULL, status);

    contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes, status };

    // Add the UCA contractions
    c.coll = coll->UCA;
    utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c);

    // This is collator specific. Add contractions from a collator
    c.coll = coll;
    c.removedContractions =  NULL;
    utrie_enum(&coll->mapping, NULL, _processSpecials, &c);
    ucol_tok_closeTokenList(&src);
}
Exemple #2
0
U_CAPI int32_t U_EXPORT2
ucol_getUnsafeSet( const UCollator *coll,
                  USet *unsafe,
                  UErrorCode *status)
{
    UChar buffer[internalBufferSize];
    int32_t len = 0;

    uset_clear(unsafe);

    // cccpattern = "[[:^tccc=0:][:^lccc=0:]]", unfortunately variant
    static const UChar cccpattern[25] = { 0x5b, 0x5b, 0x3a, 0x5e, 0x74, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d,
                                    0x5b, 0x3a, 0x5e, 0x6c, 0x63, 0x63, 0x63, 0x3d, 0x30, 0x3a, 0x5d, 0x5d, 0x00 };

    // add chars that fail the fcd check
    uset_applyPattern(unsafe, cccpattern, 24, USET_IGNORE_SPACE, status);

    // add Thai/Lao prevowels
    uset_addRange(unsafe, 0xe40, 0xe44);
    uset_addRange(unsafe, 0xec0, 0xec4);
    // add lead/trail surrogates
    uset_addRange(unsafe, 0xd800, 0xdfff);

    USet *contractions = uset_open(0,0);

    int32_t i = 0, j = 0;
    int32_t contsSize = ucol_getContractions(coll, contractions, status);
    UChar32 c = 0;
    // Contraction set consists only of strings
    // to get unsafe code points, we need to
    // break the strings apart and add them to the unsafe set
    for(i = 0; i < contsSize; i++) {
        len = uset_getItem(contractions, i, NULL, NULL, buffer, internalBufferSize, status);
        if(len > 0) {
            j = 0;
            while(j < len) {
                U16_NEXT(buffer, j, len, c);
                if(j < len) {
                    uset_add(unsafe, c);
                }
            }
        }
    }

    uset_close(contractions);

    return uset_size(unsafe);
}
Exemple #3
0
static bool __CFLocaleCopyExemplarCharSet(CFLocaleRef locale, bool user, CFTypeRef *cf, CFStringRef context) {
    char localeID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY];
    if (CFStringGetCString(locale->_identifier, localeID, sizeof(localeID)/sizeof(char), kCFStringEncodingASCII)) {
        UErrorCode icuStatus = U_ZERO_ERROR;
	ULocaleData* uld = ulocdata_open(localeID, &icuStatus);
        USet *set = ulocdata_getExemplarSet(uld, NULL, USET_ADD_CASE_MAPPINGS, ULOCDATA_ES_STANDARD, &icuStatus);
	ulocdata_close(uld);
        if (U_FAILURE(icuStatus))
            return false;
        if (icuStatus == U_USING_DEFAULT_WARNING)   // If default locale used, force to empty set
            uset_clear(set);
        *cf = (CFTypeRef) _CFCreateCharacterSetFromUSet(set);
        uset_close(set);
        return (*cf != NULL);
    }
    return false;
}
Exemple #4
0
U_CAPI void U_EXPORT2
ucnv_getUnicodeSet(const UConverter *cnv,
                   USet *setFillIn,
                   UConverterUnicodeSet whichSet,
                   UErrorCode *pErrorCode) {
    /* argument checking */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }
    if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    /* does this converter support this function? */
    if(cnv->sharedData->impl->getUnicodeSet==NULL) {
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return;
    }

    {
        USetAdder sa={
            NULL,
            uset_add,
            uset_addRange,
            uset_addString,
            uset_remove,
            uset_removeRange
        };
        sa.set=setFillIn;

        /* empty the set */
        uset_clear(setFillIn);

        /* call the converter to add the code points it supports */
        cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode);
    }
}
Exemple #5
0
/**
 * Basic API test for uset.x
 */
static void TestAPI() {
    USet* set;
    USet* set2;
    UErrorCode ec;
    
    /* [] */
    set = uset_openEmpty();
    expect(set, "", "abc{ab}", NULL);
    uset_close(set);

    set = uset_open(1, 0);
    expect(set, "", "abc{ab}", NULL);
    uset_close(set);

    set = uset_open(1, 1);
    uset_clear(set);
    expect(set, "", "abc{ab}", NULL);
    uset_close(set);

    /* [ABC] */
    set = uset_open(0x0041, 0x0043);
    expect(set, "ABC", "DEF{ab}", NULL);
    uset_close(set);

    /* [a-c{ab}] */
    ec = U_ZERO_ERROR;
    set = uset_openPattern(PAT, PAT_LEN, &ec);
    if(U_FAILURE(ec)) {
        log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
        return;
    }
    if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
        log_err("uset_resemblesPattern of PAT failed\n");
    }
    expect(set, "abc{ab}", "def{bc}", &ec);

    /* [a-d{ab}] */
    uset_add(set, 0x64);
    expect(set, "abcd{ab}", "ef{bc}", NULL);

    /* [acd{ab}{bc}] */
    uset_remove(set, 0x62);
    uset_addString(set, STR_bc, STR_bc_LEN);
    expect(set, "acd{ab}{bc}", "bef{cd}", NULL);

    /* [acd{bc}] */
    uset_removeString(set, STR_ab, STR_ab_LEN);
    expect(set, "acd{bc}", "bfg{ab}", NULL);

    /* [^acd{bc}] */
    uset_complement(set);
    expect(set, "bef{bc}", "acd{ac}", NULL);

    /* [a-e{bc}] */
    uset_complement(set);
    uset_addRange(set, 0x0062, 0x0065);
    expect(set, "abcde{bc}", "fg{ab}", NULL);

    /* [de{bc}] */
    uset_removeRange(set, 0x0050, 0x0063);
    expect(set, "de{bc}", "bcfg{ab}", NULL);

    /* [g-l] */
    uset_set(set, 0x0067, 0x006C);
    expect(set, "ghijkl", "de{bc}", NULL);

    if (uset_indexOf(set, 0x0067) != 0) {
        log_err("uset_indexOf failed finding correct index of 'g'\n");
    }

    if (uset_charAt(set, 0) != 0x0067) {
        log_err("uset_charAt failed finding correct char 'g' at index 0\n");
    }

    /* How to test this one...? */
    uset_compact(set);

    /* [g-i] */
    uset_retain(set, 0x0067, 0x0069);
    expect(set, "ghi", "dejkl{bc}", NULL);

    /* UCHAR_ASCII_HEX_DIGIT */
    uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
    if(U_FAILURE(ec)) {
        log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
        return;
    }
    expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);

    /* [ab] */
    uset_clear(set);
    uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
    expect(set, "ab", "def{ab}", NULL);
    if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
        log_err("set should not conatin all characters of \"bc\" \n");
    }

    /* [] */
    set2 = uset_open(1, 1);
    uset_clear(set2);

    /* space */
    uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
    expect(set2, " ", "abcdefghi{bc}", NULL);

    /* [a-c] */
    uset_set(set2, 0x0061, 0x0063);
    /* [g-i] */
    uset_set(set, 0x0067, 0x0069);

    /* [a-c g-i] */
    if (uset_containsSome(set, set2)) {
        log_err("set should not contain some of set2 yet\n");
    }
    uset_complementAll(set, set2);
    if (!uset_containsSome(set, set2)) {
        log_err("set should contain some of set2\n");
    }
    expect(set, "abcghi", "def{bc}", NULL);

    /* [g-i] */
    uset_removeAll(set, set2);
    expect(set, "ghi", "abcdef{bc}", NULL);

    /* [a-c g-i] */
    uset_addAll(set2, set);
    expect(set2, "abcghi", "def{bc}", NULL);

    /* [g-i] */
    uset_retainAll(set2, set);
    expect(set2, "ghi", "abcdef{bc}", NULL);

    uset_close(set);
    uset_close(set2);
}