/* folding value: just store the offset (16 bits) if there is any non-0 entry */ U_CDECL_BEGIN static uint32_t U_CALLCONV getFoldedRBBIValue(UNewTrie * trie, UChar32 start, int32_t offset) { uint32_t value; UChar32 limit; UBool inBlockZero; limit = start + 0x400; while (start < limit) { value = utrie_get32(trie, start, &inBlockZero); if (inBlockZero) { start += UTRIE_DATA_BLOCK_LENGTH; } else if (value != 0) { return (uint32_t)(offset | 0x8000); } else { ++start; } } return 0; }
static uint32_t U_CALLCONV _testFoldedValue16(UNewTrie *trie, UChar32 start, int32_t offset) { uint32_t foldedValue, value; UChar32 limit; UBool inBlockZero; foldedValue=0; limit=start+0x400; while(start<limit) { value=utrie_get32(trie, start, &inBlockZero); if(inBlockZero) { start+=UTRIE_DATA_BLOCK_LENGTH; } else { foldedValue|=value; ++start; } } if(foldedValue!=0) { return (uint32_t)(offset|0x8000); } else { return 0; } }
extern void storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){ uint16_t trieWord = 0; if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){ fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n"); exit(U_ILLEGAL_CHAR_FOUND); } trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */ if(start == end){ uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL); if(savedTrieWord>0){ if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){ /* * A mapping is stored in the trie word * and the only other possible type that a * code point can have is USPREP_PROHIBITED * */ /* turn on the 0th bit in the savedTrieWord */ savedTrieWord += 0x01; /* the downcast is safe since we only save 16 bit values */ trieWord = (uint16_t)savedTrieWord; /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,start,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; }else{ fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } }else if(savedTrieWord != trieWord){ fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* if savedTrieWord == trieWord .. fall through and set the value */ } if(!utrie_set32(sprepTrie,start,trieWord)){ fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start); exit(U_ILLEGAL_ARGUMENT_ERROR); } }else{ if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){ fprintf(stderr,"Value for certain codepoint already set.\n"); exit(U_ILLEGAL_CHAR_FOUND); } } }
U_CAPI int32_t U_EXPORT2 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) { int32_t i = 0, j = 0; if(U_FAILURE(*status) || table->size == 0) { return 0; } table->position = 0; if(table->offsets != NULL) { uprv_free(table->offsets); } table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t)); if(table->offsets == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } /* See how much memory we need */ for(i = 0; i<table->size; i++) { table->offsets[i] = table->position+mainOffset; table->position += table->elements[i]->position; } /* Allocate it */ if(table->CEs != NULL) { uprv_free(table->CEs); } table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t)); if(table->CEs == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; uprv_free(table->offsets); table->offsets = NULL; return 0; } uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t)); if(table->codePoints != NULL) { uprv_free(table->codePoints); } table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar)); if(table->codePoints == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; uprv_free(table->offsets); table->offsets = NULL; uprv_free(table->CEs); table->CEs = NULL; return 0; } uprv_memset(table->codePoints, '?', table->position*sizeof(UChar)); /* Now stuff the things in*/ UChar *cpPointer = table->codePoints; uint32_t *CEPointer = table->CEs; for(i = 0; i<table->size; i++) { int32_t size = table->elements[i]->position; uint8_t ccMax = 0, ccMin = 255, cc = 0; for(j = 1; j<size; j++) { cc = u_getCombiningClass(table->elements[i]->codePoints[j]); if(cc>ccMax) { ccMax = cc; } if(cc<ccMin) { ccMin = cc; } *(cpPointer+j) = table->elements[i]->codePoints[j]; } *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax; uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t)); for(j = 0; j<size; j++) { if(isCntTableElement(*(CEPointer+j))) { *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]); } } cpPointer += size; CEPointer += size; } // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the // start of the flat file). However, what is done below is just wrong and it affects building of // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also, // keeping a list of code points that are contractions might be smart, although I'm not sure if it's // feasible. uint32_t CE; for(i = 0; i<=0x10FFFF; i++) { /*CE = ucmpe32_get(table->mapping, i);*/ CE = utrie_get32(table->mapping, i, NULL); if(isCntTableElement(CE)) { CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]); /*ucmpe32_set(table->mapping, i, CE);*/ utrie_set32(table->mapping, i, CE); } } return table->position; }
extern uint32_t getProps(uint32_t c) { return utrie_get32(pTrie, (UChar32)c, NULL); }
extern void storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status){ UChar* map = NULL; int16_t adjustedLen=0, i; uint16_t trieWord = 0; ValueStruct *value = NULL; uint32_t savedTrieWord = 0; /* initialize the hashtable */ if(hashTable==NULL){ hashTable = uhash_open(hashEntry, compareEntries, NULL, status); uhash_setValueDeleter(hashTable, valueDeleter); } /* figure out if the code point has type already stored */ savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); if(savedTrieWord!=0){ if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ /* turn on the first bit in trie word */ trieWord += 0x01; }else{ /* * the codepoint has value something other than prohibited * and a mapping .. error! */ fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); exit(U_ILLEGAL_ARGUMENT_ERROR); } } /* figure out the real length */ for(i=0; i<length; i++){ if(mapping[i] > 0xFFFF){ adjustedLen +=2; }else{ adjustedLen++; } } if(adjustedLen == 0){ trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2); /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; }else{ fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } } if(adjustedLen == 1){ /* calculate the delta */ int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]); if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){ trieWord = delta << 2; /* make sure that the second bit is OFF */ if((trieWord & 0x02) != 0 ){ fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n"); exit(U_INTERNAL_PROGRAM_ERROR); } /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; } } /* * if the delta is not in the given range or if the trieWord is larger than the threshold * just fall through for storing the mapping in the mapping table */ } map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR); i=0; while(i<length){ if(mapping[i] <= 0xFFFF){ map[i] = (uint16_t)mapping[i]; }else{ map[i] = U16_LEAD(mapping[i]); map[i+1] = U16_TRAIL(mapping[i]); } i++; } value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct)); value->mapping = map; value->type = type; value->length = adjustedLen; if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){ mappingDataCapacity++; } if(maxLength < value->length){ maxLength = value->length; } uhash_iput(hashTable,codepoint,value,status); mappingDataCapacity += adjustedLen; if(U_FAILURE(*status)){ fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status)); exit(*status); } }
static void storeMappingData(){ int32_t pos = -1; const UHashElement* element = NULL; ValueStruct* value = NULL; int32_t codepoint = 0; int32_t elementCount = 0; int32_t writtenElementCount = 0; int32_t mappingLength = 1; /* minimum mapping length */ int32_t oldMappingLength = 0; uint16_t trieWord =0; int32_t limitIndex = 0; if (hashTable == NULL) { return; } elementCount = uhash_count(hashTable); /*initialize the mapping data */ mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR); while(writtenElementCount < elementCount){ while( (element = uhash_nextElement(hashTable, &pos))!=NULL){ codepoint = element->key.integer; value = (ValueStruct*)element->value.pointer; /* store the start of indexes */ if(oldMappingLength != mappingLength){ /* Assume that index[] is used according to the enums defined */ if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){ indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex; } if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH && mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){ limitIndex = currentIndex; } oldMappingLength = mappingLength; } if(value->length == mappingLength){ uint32_t savedTrieWord = 0; trieWord = currentIndex << 2; /* turn on the 2nd bit to signal that the following bits contain an index */ trieWord += 0x02; if(trieWord > _SPREP_TYPE_THRESHOLD){ fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } /* figure out if the code point has type already stored */ savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); if(savedTrieWord!=0){ if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ /* turn on the first bit in trie word */ trieWord += 0x01; }else{ /* * the codepoint has value something other than prohibited * and a mapping .. error! */ fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); exit(U_ILLEGAL_ARGUMENT_ERROR); } } /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* written the trie word for the codepoint... increment the count*/ writtenElementCount++; /* sanity check are we exceeding the max number allowed */ if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){ fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* copy the mapping data */ if(currentIndex+value->length+1 <= mappingDataCapacity){ /* write the length */ if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ /* the cast here is safe since we donot expect the length to be > 65535 */ mappingData[currentIndex++] = (uint16_t) mappingLength; } /* copy the contents to mappindData array */ uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); currentIndex += value->length; }else{ /* realloc */ UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2); if(newMappingData == NULL){ fprintf(stderr, "Could not realloc the mapping data!\n"); exit(U_MEMORY_ALLOCATION_ERROR); } uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity); mappingDataCapacity *= 2; uprv_free(mappingData); mappingData = newMappingData; /* write the length */ if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ /* the cast here is safe since we donot expect the length to be > 65535 */ mappingData[currentIndex++] = (uint16_t) mappingLength; } /* continue copying */ uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); currentIndex += value->length; } } } mappingLength++; pos = -1; } /* set the last length for range check */ if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){ indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1; }else{ indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex; } }
static void testTrieRanges(const char *testName, const SetRange setRanges[], int32_t countSetRanges, const CheckRange checkRanges[], int32_t countCheckRanges, UBool dataIs32, UBool latin1Linear) { union{ double bogus; /* needed for aligining the storage */ uint8_t storage[32768]; } storageHolder; UTrieGetFoldingOffset *getFoldingOffset; UNewTrieGetFoldedValue *getFoldedValue; const CheckRange *enumRanges; UNewTrie *newTrie; UTrie trie={ 0 }; uint32_t value, value2; UChar32 start, limit; int32_t i, length; UErrorCode errorCode; UBool overwrite, ok; log_verbose("\ntesting Trie '%s'\n", testName); newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, checkRanges[0].value, latin1Linear); /* set values from setRanges[] */ ok=TRUE; for(i=0; i<countSetRanges; ++i) { start=setRanges[i].start; limit=setRanges[i].limit; value=setRanges[i].value; overwrite=setRanges[i].overwrite; if((limit-start)==1 && overwrite) { ok&=utrie_set32(newTrie, start, value); } else { ok&=utrie_setRange32(newTrie, start, limit, value, overwrite); } } if(!ok) { log_err("error: setting values into a trie failed (%s)\n", testName); return; } /* verify that all these values are in the new Trie */ start=0; for(i=0; i<countCheckRanges; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; while(start<limit) { if(value!=utrie_get32(newTrie, start, NULL)) { log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n", testName, start, utrie_get32(newTrie, start, NULL), value); } ++start; } } if(dataIs32) { getFoldingOffset=_testFoldingOffset32; getFoldedValue=_testFoldedValue32; } else { getFoldingOffset=_testFoldingOffset16; getFoldedValue=_testFoldedValue16; } /* * code coverage for utrie.c/defaultGetFoldedValue(), * pick some combination of parameters for selecting the UTrie defaults */ if(!dataIs32 && latin1Linear) { getFoldingOffset=NULL; getFoldedValue=NULL; } errorCode=U_ZERO_ERROR; length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage), getFoldedValue, (UBool)!dataIs32, &errorCode); if(U_FAILURE(errorCode)) { log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode)); utrie_close(newTrie); return; } if (length >= (int32_t)sizeof(storageHolder.storage)) { log_err("error: utrie_serialize(%s) needs more memory\n", testName); utrie_close(newTrie); return; } /* test linear Latin-1 range from utrie_getData() */ if(latin1Linear) { uint32_t *data; int32_t dataLength; data=utrie_getData(newTrie, &dataLength); start=0; for(i=0; i<countCheckRanges && start<=0xff; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; while(start<limit && start<=0xff) { if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) { log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n", testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value); } ++start; } } } utrie_close(newTrie); errorCode=U_ZERO_ERROR; if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) { log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode)); return; } if(getFoldingOffset!=NULL) { trie.getFoldingOffset=getFoldingOffset; } if(dataIs32!=(trie.data32!=NULL)) { log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName); } if(latin1Linear!=trie.isLatin1Linear) { log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName); } /* verify that all these values are in the unserialized Trie */ start=0; for(i=0; i<countCheckRanges; ++i) { limit=checkRanges[i].limit; value=checkRanges[i].value; if(start==0xd800) { /* skip surrogates */ start=limit; continue; } while(start<limit) { if(start<=0xffff) { if(dataIs32) { value2=UTRIE_GET32_FROM_BMP(&trie, start); } else { value2=UTRIE_GET16_FROM_BMP(&trie, start); } if(value!=value2) { log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } if(!U16_IS_LEAD(start)) { if(dataIs32) { value2=UTRIE_GET32_FROM_LEAD(&trie, start); } else { value2=UTRIE_GET16_FROM_LEAD(&trie, start); } if(value!=value2) { log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } } } if(dataIs32) { UTRIE_GET32(&trie, start, value2); } else { UTRIE_GET16(&trie, start, value2); } if(value!=value2) { log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n", testName, start, value2, value); } ++start; } } /* enumerate and verify all ranges */ enumRanges=checkRanges+1; utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges); /* test linear Latin-1 range */ if(trie.isLatin1Linear) { if(trie.data32!=NULL) { const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie); for(start=0; start<0x100; ++start) { if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) { log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n", testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start); } } } else { const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie); for(start=0; start<0x100; ++start) { if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) { log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n", testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start); } } } } testTrieIteration(testName, &trie, checkRanges, countCheckRanges); }