/** * Get a set containing the expansions defined by the collator. The set includes * both the UCA expansions and the expansions defined by the tailoring * @param coll collator * @param conts the set to hold the result * @param addPrefixes add the prefix contextual elements to contractions * @param status to hold the error code * * @draft ICU 3.4 */ U_CAPI void U_EXPORT2 ucol_getContractionsAndExpansions( const UCollator *coll, USet *contractions, USet *expansions, UBool addPrefixes, UErrorCode *status) { if(U_FAILURE(*status)) { return; } if(coll == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return; } if(contractions) { uset_clear(contractions); } if(expansions) { uset_clear(expansions); } int32_t rulesLen = 0; const UChar* rules = ucol_getRules(coll, &rulesLen); UColTokenParser src; ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, ucol_tok_getRulesFromBundle, NULL, status); contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes, status }; // Add the UCA contractions c.coll = coll->UCA; utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c); // This is collator specific. Add contractions from a collator c.coll = coll; c.removedContractions = NULL; utrie_enum(&coll->mapping, NULL, _processSpecials, &c); ucol_tok_closeTokenList(&src); }
/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ U_CAPI UTrie2 * U_EXPORT2 utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { NewTrieAndStatus context; UChar lead; if(U_FAILURE(*pErrorCode)) { return NULL; } if(trie1==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); if(U_FAILURE(*pErrorCode)) { return NULL; } context.exclusiveLimit=TRUE; context.errorCode=*pErrorCode; utrie_enum(trie1, NULL, copyEnumRange, &context); *pErrorCode=context.errorCode; for(lead=0xd800; lead<0xdc00; ++lead) { uint32_t value; if(trie1->data32==NULL) { value=UTRIE_GET16_FROM_LEAD(trie1, lead); } else { value=UTRIE_GET32_FROM_LEAD(trie1, lead); } if(value!=trie1->initialValue) { utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); } } if(U_SUCCESS(*pErrorCode)) { utrie2_freeze(context.trie, trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, pErrorCode); } #ifdef UTRIE2_DEBUG if(U_SUCCESS(*pErrorCode)) { utrie_printLengths(trie1); utrie2_printLengths(context.trie, "fromUTrie"); } #endif if(U_FAILURE(*pErrorCode)) { utrie2_close(context.trie); context.trie=NULL; } return context.trie; }
U_CAPI void U_EXPORT2 ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { int32_t i, length; UChar32 c, start, limit; const uint8_t *jgArray; uint8_t prev, jg; if(U_FAILURE(*pErrorCode)) { return; } /* add the start code point of each same-value range of the trie */ utrie_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); /* add the code points from the bidi mirroring table */ length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; for(i=0; i<length; ++i) { c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]); sa->addRange(sa->set, c, c+1); } /* add the code points from the Joining_Group array where the value changes */ start=bdp->indexes[UBIDI_IX_JG_START]; limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; jgArray=bdp->jgArray; prev=0; while(start<limit) { jg=*jgArray++; if(jg!=prev) { sa->add(sa->set, start); prev=jg; } ++start; } if(prev!=0) { /* add the limit code point if the last value was not 0 (it is now start==limit) */ sa->add(sa->set, limit); } /* add code points with hardcoded properties, plus the ones following them */ /* (none right now) */ }
static void testTrieRanges(const char *testName, const SetRange setRanges[], int32_t countSetRanges, const CheckRange checkRanges[], int32_t countCheckRanges, UBool dataIs32, UBool latin1Linear) { union{ double bogus; /* needed for aligining the storage */ uint8_t storage[32768]; } storageHolder; UTrieGetFoldingOffset *getFoldingOffset; UNewTrieGetFoldedValue *getFoldedValue; const CheckRange *enumRanges; UNewTrie *newTrie; UTrie trie={ 0 }; uint32_t value, value2; UChar32 start, limit; int32_t i, length; UErrorCode errorCode; UBool overwrite, ok; log_verbose("\ntesting Trie '%s'\n", testName); newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, checkRanges[0].value, latin1Linear); /* set values from setRanges[] */ ok=TRUE; for(i=0; i<countSetRanges; ++i) { start=setRanges[i].start; limit=setRanges[i].limit; value=setRanges[i].value; overwrite=setRanges[i].overwrite; if((limit-start)==1 && overwrite) { ok&=utrie_set32(newTrie, start, value); } else { ok&=utrie_setRange32(newTrie, start, limit, value, overwrite); } } if(!ok) { log_err("error: setting values into a trie failed (%s)\n", testName); return; } /* verify that all these values are in the new Trie */ start=0; for(i=0; i<countCheckRanges; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; while(start<limit) { if(value!=utrie_get32(newTrie, start, NULL)) { log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n", testName, start, utrie_get32(newTrie, start, NULL), value); } ++start; } } if(dataIs32) { getFoldingOffset=_testFoldingOffset32; getFoldedValue=_testFoldedValue32; } else { getFoldingOffset=_testFoldingOffset16; getFoldedValue=_testFoldedValue16; } /* * code coverage for utrie.c/defaultGetFoldedValue(), * pick some combination of parameters for selecting the UTrie defaults */ if(!dataIs32 && latin1Linear) { getFoldingOffset=NULL; getFoldedValue=NULL; } errorCode=U_ZERO_ERROR; length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage), getFoldedValue, (UBool)!dataIs32, &errorCode); if(U_FAILURE(errorCode)) { log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode)); utrie_close(newTrie); return; } if (length >= (int32_t)sizeof(storageHolder.storage)) { log_err("error: utrie_serialize(%s) needs more memory\n", testName); utrie_close(newTrie); return; } /* test linear Latin-1 range from utrie_getData() */ if(latin1Linear) { uint32_t *data; int32_t dataLength; data=utrie_getData(newTrie, &dataLength); start=0; for(i=0; i<countCheckRanges && start<=0xff; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; while(start<limit && start<=0xff) { if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) { log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n", testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value); } ++start; } } } utrie_close(newTrie); errorCode=U_ZERO_ERROR; if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) { log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode)); return; } if(getFoldingOffset!=NULL) { trie.getFoldingOffset=getFoldingOffset; } if(dataIs32!=(trie.data32!=NULL)) { log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName); } if(latin1Linear!=trie.isLatin1Linear) { log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName); } /* verify that all these values are in the unserialized Trie */ start=0; for(i=0; i<countCheckRanges; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; if(start==0xd800) { /* skip surrogates */ start=limit; continue; } while(start<limit) { if(start<=0xffff) { if(dataIs32) { value2=UTRIE_GET32_FROM_BMP(&trie, start); } else { value2=UTRIE_GET16_FROM_BMP(&trie, start); } if(value!=value2) { log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } if(!U16_IS_LEAD(start)) { if(dataIs32) { value2=UTRIE_GET32_FROM_LEAD(&trie, start); } else { value2=UTRIE_GET16_FROM_LEAD(&trie, start); } if(value!=value2) { log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } } } if(dataIs32) { UTRIE_GET32(&trie, start, value2); } else { UTRIE_GET16(&trie, start, value2); } if(value!=value2) { log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } ++start; } } /* enumerate and verify all ranges */ enumRanges=checkRanges+1; utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges); /* test linear Latin-1 range */ if(trie.isLatin1Linear) { if(trie.data32!=NULL) { const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie); for(start=0; start<0x100; ++start) { if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) { log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n", testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start); } } } else { const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie); for(start=0; start<0x100; ++start) { if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) { log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n", testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start); } } } } testTrieIteration(testName, &trie, checkRanges, countCheckRanges); }