示例#1
0
/**
 * Get a set containing the expansions defined by the collator. The set includes
 * both the UCA expansions and the expansions defined by the tailoring
 * @param coll collator
 * @param conts the set to hold the result
 * @param addPrefixes add the prefix contextual elements to contractions
 * @param status to hold the error code
 *
 * @draft ICU 3.4
 */
U_CAPI void U_EXPORT2
ucol_getContractionsAndExpansions( const UCollator *coll,
                  USet *contractions,
                  USet *expansions,
                  UBool addPrefixes,
                  UErrorCode *status)
{
    if(U_FAILURE(*status)) {
        return;
    }
    if(coll == NULL) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    if(contractions) {
      uset_clear(contractions);
    }
    if(expansions) {
      uset_clear(expansions);
    }
    int32_t rulesLen = 0;
    const UChar* rules = ucol_getRules(coll, &rulesLen);
    UColTokenParser src;
    ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA,
                           ucol_tok_getRulesFromBundle, NULL, status);

    contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes, status };

    // Add the UCA contractions
    c.coll = coll->UCA;
    utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c);

    // This is collator specific. Add contractions from a collator
    c.coll = coll;
    c.removedContractions =  NULL;
    utrie_enum(&coll->mapping, NULL, _processSpecials, &c);
    ucol_tok_closeTokenList(&src);
}
示例#2
0
/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
U_CAPI UTrie2 * U_EXPORT2
utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
    NewTrieAndStatus context;
    UChar lead;

    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if(trie1==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    context.exclusiveLimit=TRUE;
    context.errorCode=*pErrorCode;
    utrie_enum(trie1, NULL, copyEnumRange, &context);
    *pErrorCode=context.errorCode;
    for(lead=0xd800; lead<0xdc00; ++lead) {
        uint32_t value;
        if(trie1->data32==NULL) {
            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
        } else {
            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
        }
        if(value!=trie1->initialValue) {
            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
        }
    }
    if(U_SUCCESS(*pErrorCode)) {
        utrie2_freeze(context.trie,
                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
                      pErrorCode);
    }
#ifdef UTRIE2_DEBUG
    if(U_SUCCESS(*pErrorCode)) {
        utrie_printLengths(trie1);
        utrie2_printLengths(context.trie, "fromUTrie");
    }
#endif
    if(U_FAILURE(*pErrorCode)) {
        utrie2_close(context.trie);
        context.trie=NULL;
    }
    return context.trie;
}
示例#3
0
U_CAPI void U_EXPORT2
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
    int32_t i, length;
    UChar32 c, start, limit;

    const uint8_t *jgArray;
    uint8_t prev, jg;

    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /* add the start code point of each same-value range of the trie */
    utrie_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);

    /* add the code points from the bidi mirroring table */
    length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
    for(i=0; i<length; ++i) {
        c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
        sa->addRange(sa->set, c, c+1);
    }

    /* add the code points from the Joining_Group array where the value changes */
    start=bdp->indexes[UBIDI_IX_JG_START];
    limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
    jgArray=bdp->jgArray;
    prev=0;
    while(start<limit) {
        jg=*jgArray++;
        if(jg!=prev) {
            sa->add(sa->set, start);
            prev=jg;
        }
        ++start;
    }
    if(prev!=0) {
        /* add the limit code point if the last value was not 0 (it is now start==limit) */
        sa->add(sa->set, limit);
    }

    /* add code points with hardcoded properties, plus the ones following them */

    /* (none right now) */
}
示例#4
0
static void
testTrieRanges(const char *testName,
               const SetRange setRanges[], int32_t countSetRanges,
               const CheckRange checkRanges[], int32_t countCheckRanges,
               UBool dataIs32, UBool latin1Linear) {
    union{
        double bogus; /* needed for aligining the storage */
        uint8_t storage[32768];
    } storageHolder;
    UTrieGetFoldingOffset *getFoldingOffset;
    UNewTrieGetFoldedValue *getFoldedValue;
    const CheckRange *enumRanges;
    UNewTrie *newTrie;
    UTrie trie={ 0 };
    uint32_t value, value2;
    UChar32 start, limit;
    int32_t i, length;
    UErrorCode errorCode;
    UBool overwrite, ok;

    log_verbose("\ntesting Trie '%s'\n", testName);
    newTrie=utrie_open(NULL, NULL, 2000,
                       checkRanges[0].value, checkRanges[0].value,
                       latin1Linear);

    /* set values from setRanges[] */
    ok=TRUE;
    for(i=0; i<countSetRanges; ++i) {
        start=setRanges[i].start;
        limit=setRanges[i].limit;
        value=setRanges[i].value;
        overwrite=setRanges[i].overwrite;
        if((limit-start)==1 && overwrite) {
            ok&=utrie_set32(newTrie, start, value);
        } else {
            ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
        }
    }
    if(!ok) {
        log_err("error: setting values into a trie failed (%s)\n", testName);
        return;
    }

    /* verify that all these values are in the new Trie */
    start=0;
    for(i=0; i<countCheckRanges; ++i) {
        limit=checkRanges[i].limit;
        value=checkRanges[i].value;

        while(start<limit) {
            if(value!=utrie_get32(newTrie, start, NULL)) {
                log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
                        testName, start, utrie_get32(newTrie, start, NULL), value);
            }
            ++start;
        }
    }

    if(dataIs32) {
        getFoldingOffset=_testFoldingOffset32;
        getFoldedValue=_testFoldedValue32;
    } else {
        getFoldingOffset=_testFoldingOffset16;
        getFoldedValue=_testFoldedValue16;
    }

    /*
     * code coverage for utrie.c/defaultGetFoldedValue(),
     * pick some combination of parameters for selecting the UTrie defaults
     */
    if(!dataIs32 && latin1Linear) {
        getFoldingOffset=NULL;
        getFoldedValue=NULL;
    }

    errorCode=U_ZERO_ERROR;
    length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage),
                           getFoldedValue,
                           (UBool)!dataIs32,
                           &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
        utrie_close(newTrie);
        return;
    }
    if (length >= (int32_t)sizeof(storageHolder.storage)) {
        log_err("error: utrie_serialize(%s) needs more memory\n", testName);
        utrie_close(newTrie);
        return;
    }

    /* test linear Latin-1 range from utrie_getData() */
    if(latin1Linear) {
        uint32_t *data;
        int32_t dataLength;

        data=utrie_getData(newTrie, &dataLength);
        start=0;
        for(i=0; i<countCheckRanges && start<=0xff; ++i) {
            limit=checkRanges[i].limit;
            value=checkRanges[i].value;

            while(start<limit && start<=0xff) {
                if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
                    log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
                            testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
                }
                ++start;
            }
        }
    }

    utrie_close(newTrie);

    errorCode=U_ZERO_ERROR;
    if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) {
        log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
        return;
    }
    if(getFoldingOffset!=NULL) {
        trie.getFoldingOffset=getFoldingOffset;
    }

    if(dataIs32!=(trie.data32!=NULL)) {
        log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
    }
    if(latin1Linear!=trie.isLatin1Linear) {
        log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
    }

    /* verify that all these values are in the unserialized Trie */
    start=0;
    for(i=0; i<countCheckRanges; ++i) {
        limit=checkRanges[i].limit;
        value=checkRanges[i].value;

        if(start==0xd800) {
            /* skip surrogates */
            start=limit;
            continue;
        }

        while(start<limit) {
            if(start<=0xffff) {
                if(dataIs32) {
                    value2=UTRIE_GET32_FROM_BMP(&trie, start);
                } else {
                    value2=UTRIE_GET16_FROM_BMP(&trie, start);
                }
                if(value!=value2) {
                    log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
                            testName, start, value2, value);
                }
                if(!U16_IS_LEAD(start)) {
                    if(dataIs32) {
                        value2=UTRIE_GET32_FROM_LEAD(&trie, start);
                    } else {
                        value2=UTRIE_GET16_FROM_LEAD(&trie, start);
                    }
                    if(value!=value2) {
                        log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
                                testName, start, value2, value);
                    }
                }
            }
            if(dataIs32) {
                UTRIE_GET32(&trie, start, value2);
            } else {
                UTRIE_GET16(&trie, start, value2);
            }
            if(value!=value2) {
                log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
                        testName, start, value2, value);
            }
            ++start;
        }
    }

    /* enumerate and verify all ranges */
    enumRanges=checkRanges+1;
    utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);

    /* test linear Latin-1 range */
    if(trie.isLatin1Linear) {
        if(trie.data32!=NULL) {
            const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);

            for(start=0; start<0x100; ++start) {
                if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
                            testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
                }
            }
        } else {
            const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);

            for(start=0; start<0x100; ++start) {
                if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
                            testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
                }
            }
        }
    }

    testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
}