extern void repeatProps(uint32_t first, uint32_t last, uint32_t x) { if(!utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE)) { fprintf(stderr, "error: too many entries for the properties trie\n"); exit(U_BUFFER_OVERFLOW_ERROR); } }
U_CAPI void U_CALLCONV upvec_compactToUTrieHandler(void *context, UChar32 start, UChar32 end, int32_t rowIndex, uint32_t *row, int32_t columns, UErrorCode *pErrorCode) { UPVecToUTrieContext *toUTrie=(UPVecToUTrieContext *)context; if(start<UPVEC_FIRST_SPECIAL_CP) { if(!utrie_setRange32(toUTrie->newTrie, start, end+1, (uint32_t)rowIndex, TRUE)) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } } else { switch(start) { case UPVEC_INITIAL_VALUE_CP: toUTrie->initialValue=rowIndex; break; case UPVEC_START_REAL_VALUES_CP: if(rowIndex>0xffff) { /* too many rows for a 16-bit trie */ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; } else { toUTrie->newTrie=utrie_open(NULL, NULL, toUTrie->capacity, toUTrie->initialValue, toUTrie->initialValue, toUTrie->latin1Linear); if(toUTrie->newTrie==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; } } break; default: break; } } }
extern void storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){ uint16_t trieWord = 0; if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){ fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n"); exit(U_ILLEGAL_CHAR_FOUND); } trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */ if(start == end){ uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL); if(savedTrieWord>0){ if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){ /* * A mapping is stored in the trie word * and the only other possible type that a * code point can have is USPREP_PROHIBITED * */ /* turn on the 0th bit in the savedTrieWord */ savedTrieWord += 0x01; /* the downcast is safe since we only save 16 bit values */ trieWord = (uint16_t)savedTrieWord; /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,start,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; }else{ fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } }else if(savedTrieWord != trieWord){ fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* if savedTrieWord == trieWord .. fall through and set the value */ } if(!utrie_set32(sprepTrie,start,trieWord)){ fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start); exit(U_ILLEGAL_ARGUMENT_ERROR); } }else{ if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){ fprintf(stderr,"Value for certain codepoint already set.\n"); exit(U_ILLEGAL_CHAR_FOUND); } } }
extern void generateData(const char *dataDir, UBool csource) { static int32_t indexes[UBIDI_IX_TOP]={ UBIDI_IX_TOP }; static uint8_t trieBlock[40000]; static uint8_t jgArray[0x300]; /* at most for U+0600..U+08FF */ const uint32_t *row; UChar32 start, end, prev, jgStart; int32_t i; UNewDataMemory *pData; UNewTrie *pTrie; UErrorCode errorCode=U_ZERO_ERROR; int32_t trieSize; long dataLength; makeMirror(); pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE); if(pTrie==NULL) { fprintf(stderr, "genbidi error: unable to create a UNewTrie\n"); exit(U_MEMORY_ALLOCATION_ERROR); } prev=jgStart=0; for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP; ++i) { /* store most values from vector column 0 in the trie */ if(!utrie_setRange32(pTrie, start, end+1, *row, TRUE)) { fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n"); exit(U_BUFFER_OVERFLOW_ERROR); } /* store Joining_Group values from vector column 1 in a simple byte array */ if(row[1]!=0) { if(start<0x600 || 0x8ff<end) { fprintf(stderr, "genbidi error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n", (long)start, (long)end); exit(U_ILLEGAL_ARGUMENT_ERROR); } if(prev==0) { /* first code point with any value */ prev=jgStart=start; } else { /* add No_Joining_Group for code points between prev and start */ while(prev<start) { jgArray[prev++ -jgStart]=0; } } /* set Joining_Group value for start..end */ while(prev<=end) { jgArray[prev++ -jgStart]=(uint8_t)row[1]; } } } /* finish jgArray, pad to multiple of 4 */ while((prev-jgStart)&3) { jgArray[prev++ -jgStart]=0; } indexes[UBIDI_IX_JG_START]=jgStart; indexes[UBIDI_IX_JG_LIMIT]=prev; trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize); exit(errorCode); } indexes[UBIDI_IX_TRIE_SIZE]=trieSize; indexes[UBIDI_IX_MIRROR_LENGTH]=mirrorTop; indexes[UBIDI_IX_LENGTH]= (int32_t)sizeof(indexes)+ trieSize+ 4*mirrorTop+ (prev-jgStart); if(beVerbose) { printf("trie size in bytes: %5d\n", (int)trieSize); printf("size in bytes of mirroring table: %5d\n", (int)(4*mirrorTop)); printf("length of Joining_Group array: %5d (U+%04x..U+%04x)\n", (int)(prev-jgStart), (int)jgStart, (int)(prev-1)); printf("data size: %5d\n", (int)indexes[UBIDI_IX_LENGTH]); } indexes[UBIDI_MAX_VALUES_INDEX]= ((int32_t)U_CHAR_DIRECTION_COUNT-1)| (((int32_t)U_JT_COUNT-1)<<UBIDI_JT_SHIFT)| (((int32_t)U_JG_COUNT-1)<<UBIDI_MAX_JG_SHIFT); if(csource) { /* write .c file for hardcoded data */ UTrie trie={ NULL }; UTrie2 *trie2; FILE *f; utrie_unserialize(&trie, trieBlock, trieSize, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genbidi error: failed to utrie_unserialize(ubidi.icu trie) - %s\n", u_errorName(errorCode)); exit(errorCode); } /* use UTrie2 */ dataInfo.formatVersion[0]=2; dataInfo.formatVersion[2]=0; dataInfo.formatVersion[3]=0; trie2=utrie2_fromUTrie(&trie, 0, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genbidi error: utrie2_fromUTrie() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } { /* delete lead surrogate code unit values */ UChar lead; trie2=utrie2_cloneAsThawed(trie2, &errorCode); for(lead=0xd800; lead<0xdc00; ++lead) { utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode); } utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode); if(U_FAILURE(errorCode)) { fprintf( stderr, "genbidi error: deleting lead surrogate code unit values failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } f=usrc_create(dataDir, "ubidi_props_data.c"); if(f!=NULL) { usrc_writeArray(f, "static const UVersionInfo ubidi_props_dataVersion={", dataInfo.dataVersion, 8, 4, "};\n\n"); usrc_writeArray(f, "static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={", indexes, 32, UBIDI_IX_TOP, "};\n\n"); usrc_writeUTrie2Arrays(f, "static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL, trie2, "\n};\n\n"); usrc_writeArray(f, "static const uint32_t ubidi_props_mirrors[%ld]={\n", mirrors, 32, mirrorTop, "\n};\n\n"); usrc_writeArray(f, "static const uint8_t ubidi_props_jgArray[%ld]={\n", jgArray, 8, prev-jgStart, "\n};\n\n"); fputs( "static const UBiDiProps ubidi_props_singleton={\n" " NULL,\n" " ubidi_props_indexes,\n" " ubidi_props_mirrors,\n" " ubidi_props_jgArray,\n", f); usrc_writeUTrie2Struct(f, " {\n", trie2, "ubidi_props_trieIndex", NULL, " },\n"); usrc_writeArray(f, " { ", dataInfo.formatVersion, 8, 4, " }\n"); fputs("};\n", f); fclose(f); } utrie2_close(trie2); } else { /* write the data */ pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode)); exit(errorCode); } udata_writeBlock(pData, indexes, sizeof(indexes)); udata_writeBlock(pData, trieBlock, trieSize); udata_writeBlock(pData, mirrors, 4*mirrorTop); udata_writeBlock(pData, jgArray, prev-jgStart); /* finish up */ dataLength=udata_finish(pData, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "genbidi: error %d writing the output file\n", errorCode); exit(errorCode); } if(dataLength!=indexes[UBIDI_IX_LENGTH]) { fprintf(stderr, "genbidi: data length %ld != calculated size %d\n", dataLength, (int)indexes[UBIDI_IX_LENGTH]); exit(U_INTERNAL_PROGRAM_ERROR); } } utrie_close(pTrie); upvec_close(pv); }
//------------------------------------------------------------------------ // // build Build the list of non-overlapping character ranges // from the Unicode Sets. // //------------------------------------------------------------------------ void RBBISetBuilder::build() { RBBINode *usetNode; RangeDescriptor *rlRange; if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();} // // Initialize the process by creating a single range encompassing all characters // that is in no sets. // fRangeList = new RangeDescriptor(*fStatus); // will check for status here fRangeList->fStartChar = 0; fRangeList->fEndChar = 0x10ffff; if (U_FAILURE(*fStatus)) { return; } // // Find the set of non-overlapping ranges of characters // int ni; for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); if (usetNode==NULL) { break; } UnicodeSet *inputSet = usetNode->fInputSet; int32_t inputSetRangeCount = inputSet->getRangeCount(); int inputSetRangeIndex = 0; rlRange = fRangeList; for (;;) { if (inputSetRangeIndex >= inputSetRangeCount) { break; } UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex); UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex); // skip over ranges from the range list that are completely // below the current range from the input unicode set. while (rlRange->fEndChar < inputSetRangeBegin) { rlRange = rlRange->fNext; } // If the start of the range from the range list is before with // the start of the range from the unicode set, split the range list range // in two, with one part being before (wholly outside of) the unicode set // and the other containing the rest. // Then continue the loop; the post-split current range will then be skipped // over if (rlRange->fStartChar < inputSetRangeBegin) { rlRange->split(inputSetRangeBegin, *fStatus); if (U_FAILURE(*fStatus)) { return; } continue; } // Same thing at the end of the ranges... // If the end of the range from the range list doesn't coincide with // the end of the range from the unicode set, split the range list // range in two. The first part of the split range will be // wholly inside the Unicode set. if (rlRange->fEndChar > inputSetRangeEnd) { rlRange->split(inputSetRangeEnd+1, *fStatus); if (U_FAILURE(*fStatus)) { return; } } // The current rlRange is now entirely within the UnicodeSet range. // Add this unicode set to the list of sets for this rlRange if (rlRange->fIncludesSets->indexOf(usetNode) == -1) { rlRange->fIncludesSets->addElement(usetNode, *fStatus); if (U_FAILURE(*fStatus)) { return; } } // Advance over ranges that we are finished with. if (inputSetRangeEnd == rlRange->fEndChar) { inputSetRangeIndex++; } rlRange = rlRange->fNext; } } if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();} // // Group the above ranges, with each group consisting of one or more // ranges that are in exactly the same set of original UnicodeSets. // The groups are numbered, and these group numbers are the set of // input symbols recognized by the run-time state machine. // // Numbering: # 0 (state table column 0) is unused. // # 1 is reserved - table column 1 is for end-of-input // # 2 is reserved - table column 2 is for beginning-in-input // # 3 is the first range list. // RangeDescriptor *rlSearchRange; for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) { if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) { rlRange->fNum = rlSearchRange->fNum; break; } } if (rlRange->fNum == 0) { fGroupCount ++; rlRange->fNum = fGroupCount+2; rlRange->setDictionaryFlag(); addValToSets(rlRange->fIncludesSets, fGroupCount+2); } } // Handle input sets that contain the special string {eof}. // Column 1 of the state table is reserved for EOF on input. // Column 2 is reserved for before-the-start-input. // (This column can be optimized away later if there are no rule // references to {bof}.) // Add this column value (1 or 2) to the equivalent expression // subtree for each UnicodeSet that contains the string {eof} // Because {bof} and {eof} are not a characters in the normal sense, // they doesn't affect the computation of ranges or TRIE. static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0}; static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0}; UnicodeString eofString(eofUString); UnicodeString bofString(bofUString); for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); if (usetNode==NULL) { break; } UnicodeSet *inputSet = usetNode->fInputSet; if (inputSet->contains(eofString)) { addValToSet(usetNode, 1); } if (inputSet->contains(bofString)) { addValToSet(usetNode, 2); fSawBOF = TRUE; } } if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} // // Build the Trie table for mapping UChar32 values to the corresponding // range group number // fTrie = utrie_open(NULL, // Pre-existing trie to be filled in NULL, // Data array (utrie will allocate one) 100000, // Max Data Length 0, // Initial value for all code points 0, // Lead surrogate unit value TRUE); // Keep Latin 1 in separately for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE); } }
//------------------------------------------------------------------------ // // build Build the list of non-overlapping character ranges // from the Unicode Sets. // //------------------------------------------------------------------------ void RBBISetBuilder::build() { RBBINode *usetNode; RangeDescriptor *rlRange; if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) { printSets(); } // // Initialize the process by creating a single range encompassing all characters // that is in no sets. // fRangeList = new RangeDescriptor(*fStatus); // will check for status here fRangeList->fStartChar = 0; fRangeList->fEndChar = 0x10ffff; if (U_FAILURE(*fStatus)) { return; } // // Find the set of non-overlapping ranges of characters // int ni; for (ni=0; ; ni++) { usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); if (usetNode==NULL) { break; } UnicodeSet *inputSet = usetNode->fInputSet; int32_t inputSetRangeCount = inputSet->getRangeCount(); int inputSetRangeIndex = 0; rlRange = fRangeList; for (;;) { if (inputSetRangeIndex >= inputSetRangeCount) { break; } UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex); UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex); // skip over ranges from the range list that are completely // below the current range from the input unicode set. while (rlRange->fEndChar < inputSetRangeBegin) { rlRange = rlRange->fNext; } // If the start of the range from the range list is before with // the start of the range from the unicode set, split the range list range // in two, with one part being before (wholly outside of) the unicode set // and the other containing the rest. // Then continue the loop; the post-split current range will then be skipped // over if (rlRange->fStartChar < inputSetRangeBegin) { rlRange->split(inputSetRangeBegin, *fStatus); if (U_FAILURE(*fStatus)) { return; } continue; } // Same thing at the end of the ranges... // If the end of the range from the range list doesn't coincide with // the end of the range from the unicode set, split the range list // range in two. The first part of the split range will be // wholly inside the Unicode set. if (rlRange->fEndChar > inputSetRangeEnd) { rlRange->split(inputSetRangeEnd+1, *fStatus); if (U_FAILURE(*fStatus)) { return; } } // The current rlRange is now entirely within the UnicodeSet range. // Add this unicode set to the list of sets for this rlRange if (rlRange->fIncludesSets->indexOf(usetNode) == -1) { rlRange->fIncludesSets->addElement(usetNode, *fStatus); if (U_FAILURE(*fStatus)) { return; } } // Advance over ranges that we are finished with. if (inputSetRangeEnd == rlRange->fEndChar) { inputSetRangeIndex++; } rlRange = rlRange->fNext; } } if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges(); } // // Group the above ranges, with each group consisting of one or more // ranges that are in exactly the same set of original UnicodeSets. // The groups are numbered, and these group numbers are the set of // input symbols recognized by the run-time state machine. // RangeDescriptor *rlSearchRange; for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) { if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) { rlRange->fNum = rlSearchRange->fNum; break; } } if (rlRange->fNum == 0) { fGroupCount ++; rlRange->fNum = fGroupCount; rlRange->setDictionaryFlag(); addValToSets(rlRange->fIncludesSets, fGroupCount); } } if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) { printRangeGroups(); } if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) { printSets(); } // // Build the Trie table for mapping UChar32 values to the corresponding // range group number // fTrie = utrie_open(NULL, // Pre-existing trie to be filled in NULL, // Data array (utrie will allocate one) 100000, // Max Data Length 0, // Initial value for all code points 0, // Lead surrogate unit value TRUE); // Keep Latin 1 in separately for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE); } }
static void testTrieRanges(const char *testName, const SetRange setRanges[], int32_t countSetRanges, const CheckRange checkRanges[], int32_t countCheckRanges, UBool dataIs32, UBool latin1Linear) { union{ double bogus; /* needed for aligining the storage */ uint8_t storage[32768]; } storageHolder; UTrieGetFoldingOffset *getFoldingOffset; UNewTrieGetFoldedValue *getFoldedValue; const CheckRange *enumRanges; UNewTrie *newTrie; UTrie trie={ 0 }; uint32_t value, value2; UChar32 start, limit; int32_t i, length; UErrorCode errorCode; UBool overwrite, ok; log_verbose("\ntesting Trie '%s'\n", testName); newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, checkRanges[0].value, latin1Linear); /* set values from setRanges[] */ ok=TRUE; for(i=0; i<countSetRanges; ++i) { start=setRanges[i].start; limit=setRanges[i].limit; value=setRanges[i].value; overwrite=setRanges[i].overwrite; if((limit-start)==1 && overwrite) { ok&=utrie_set32(newTrie, start, value); } else { ok&=utrie_setRange32(newTrie, start, limit, value, overwrite); } } if(!ok) { log_err("error: setting values into a trie failed (%s)\n", testName); return; } /* verify that all these values are in the new Trie */ start=0; for(i=0; i<countCheckRanges; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; while(start<limit) { if(value!=utrie_get32(newTrie, start, NULL)) { log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n", testName, start, utrie_get32(newTrie, start, NULL), value); } ++start; } } if(dataIs32) { getFoldingOffset=_testFoldingOffset32; getFoldedValue=_testFoldedValue32; } else { getFoldingOffset=_testFoldingOffset16; getFoldedValue=_testFoldedValue16; } /* * code coverage for utrie.c/defaultGetFoldedValue(), * pick some combination of parameters for selecting the UTrie defaults */ if(!dataIs32 && latin1Linear) { getFoldingOffset=NULL; getFoldedValue=NULL; } errorCode=U_ZERO_ERROR; length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage), getFoldedValue, (UBool)!dataIs32, &errorCode); if(U_FAILURE(errorCode)) { log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode)); utrie_close(newTrie); return; } if (length >= (int32_t)sizeof(storageHolder.storage)) { log_err("error: utrie_serialize(%s) needs more memory\n", testName); utrie_close(newTrie); return; } /* test linear Latin-1 range from utrie_getData() */ if(latin1Linear) { uint32_t *data; int32_t dataLength; data=utrie_getData(newTrie, &dataLength); start=0; for(i=0; i<countCheckRanges && start<=0xff; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; while(start<limit && start<=0xff) { if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) { log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n", testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value); } ++start; } } } utrie_close(newTrie); errorCode=U_ZERO_ERROR; if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) { log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode)); return; } if(getFoldingOffset!=NULL) { trie.getFoldingOffset=getFoldingOffset; } if(dataIs32!=(trie.data32!=NULL)) { log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName); } if(latin1Linear!=trie.isLatin1Linear) { log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName); } /* verify that all these values are in the unserialized Trie */ start=0; for(i=0; i<countCheckRanges; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; if(start==0xd800) { /* skip surrogates */ start=limit; continue; } while(start<limit) { if(start<=0xffff) { if(dataIs32) { value2=UTRIE_GET32_FROM_BMP(&trie, start); } else { value2=UTRIE_GET16_FROM_BMP(&trie, start); } if(value!=value2) { log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } if(!U16_IS_LEAD(start)) { if(dataIs32) { value2=UTRIE_GET32_FROM_LEAD(&trie, start); } else { value2=UTRIE_GET16_FROM_LEAD(&trie, start); } if(value!=value2) { log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } } } if(dataIs32) { UTRIE_GET32(&trie, start, value2); } else { UTRIE_GET16(&trie, start, value2); } if(value!=value2) { log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } ++start; } } /* enumerate and verify all ranges */ enumRanges=checkRanges+1; utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges); /* test linear Latin-1 range */ if(trie.isLatin1Linear) { if(trie.data32!=NULL) { const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie); for(start=0; start<0x100; ++start) { if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) { log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n", testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start); } } } else { const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie); for(start=0; start<0x100; ++start) { if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) { log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n", testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start); } } } } testTrieIteration(testName, &trie, checkRanges, countCheckRanges); }