예제 #1
0
파일: rbbisetb.cpp 프로젝트: Botyto/Core
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
U_CDECL_BEGIN
static uint32_t U_CALLCONV
getFoldedRBBIValue(UNewTrie * trie, UChar32 start, int32_t offset)
{
	uint32_t value;
	UChar32 limit;
	UBool inBlockZero;

	limit = start + 0x400;
	while (start < limit)
	{
		value = utrie_get32(trie, start, &inBlockZero);
		if (inBlockZero)
		{
			start += UTRIE_DATA_BLOCK_LENGTH;
		}
		else if (value != 0)
		{
			return (uint32_t)(offset | 0x8000);
		}
		else
		{
			++start;
		}
	}
	return 0;
}
예제 #2
0
static uint32_t U_CALLCONV
_testFoldedValue16(UNewTrie *trie, UChar32 start, int32_t offset) {
    uint32_t foldedValue, value;
    UChar32 limit;
    UBool inBlockZero;

    foldedValue=0;

    limit=start+0x400;
    while(start<limit) {
        value=utrie_get32(trie, start, &inBlockZero);
        if(inBlockZero) {
            start+=UTRIE_DATA_BLOCK_LENGTH;
        } else {
            foldedValue|=value;
            ++start;
        }
    }

    if(foldedValue!=0) {
        return (uint32_t)(offset|0x8000);
    } else {
        return 0;
    }
}
예제 #3
0
extern void
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
    uint16_t trieWord = 0;

    if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
        exit(U_ILLEGAL_CHAR_FOUND);
    }
    trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
    if(start == end){
        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
        if(savedTrieWord>0){
            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
                /* 
                 * A mapping is stored in the trie word 
                 * and the only other possible type that a 
                 * code point can have is USPREP_PROHIBITED
                 *
                 */

                /* turn on the 0th bit in the savedTrieWord */
                savedTrieWord += 0x01;

                /* the downcast is safe since we only save 16 bit values */
                trieWord = (uint16_t)savedTrieWord;

                /* make sure that the value of trieWord is less than the threshold */
                if(trieWord < _SPREP_TYPE_THRESHOLD){   
                    /* now set the value in the trie */
                    if(!utrie_set32(sprepTrie,start,trieWord)){
                        fprintf(stderr,"Could not set the value for code point.\n");
                        exit(U_ILLEGAL_ARGUMENT_ERROR);   
                    }
                    /* value is set so just return */
                    return;
                }else{
                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
                    exit(U_ILLEGAL_CHAR_FOUND);
                }
 
            }else if(savedTrieWord != trieWord){
                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }
            /* if savedTrieWord == trieWord .. fall through and set the value */
        }
        if(!utrie_set32(sprepTrie,start,trieWord)){
            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
            exit(U_ILLEGAL_ARGUMENT_ERROR);   
        }
    }else{
        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
            fprintf(stderr,"Value for certain codepoint already set.\n");
            exit(U_ILLEGAL_CHAR_FOUND);   
        }
    }

}
예제 #4
0
U_CAPI int32_t  U_EXPORT2
uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
    int32_t i = 0, j = 0;
    if(U_FAILURE(*status) || table->size == 0) {
        return 0;
    }

    table->position = 0;

    if(table->offsets != NULL) {
        uprv_free(table->offsets);
    }
    table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
    if(table->offsets == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return 0;
    }


    /* See how much memory we need */
    for(i = 0; i<table->size; i++) {
        table->offsets[i] = table->position+mainOffset;
        table->position += table->elements[i]->position;
    }

    /* Allocate it */
    if(table->CEs != NULL) {
        uprv_free(table->CEs);
    }
    table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
    if(table->CEs == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        uprv_free(table->offsets);
        table->offsets = NULL;
        return 0;
    }
    uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));

    if(table->codePoints != NULL) {
        uprv_free(table->codePoints);
    }
    table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
    if(table->codePoints == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        uprv_free(table->offsets);
        table->offsets = NULL;
        uprv_free(table->CEs);
        table->CEs = NULL;
        return 0;
    }
    uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));

    /* Now stuff the things in*/

    UChar *cpPointer = table->codePoints;
    uint32_t *CEPointer = table->CEs;
    for(i = 0; i<table->size; i++) {
        int32_t size = table->elements[i]->position;
        uint8_t ccMax = 0, ccMin = 255, cc = 0;
        for(j = 1; j<size; j++) {
            cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
            if(cc>ccMax) {
                ccMax = cc;
            }
            if(cc<ccMin) {
                ccMin = cc;
            }
            *(cpPointer+j) = table->elements[i]->codePoints[j];
        }
        *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;

        uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
        for(j = 0; j<size; j++) {
            if(isCntTableElement(*(CEPointer+j))) {
                *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
            }
        }
        cpPointer += size;
        CEPointer += size;
    }

    // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the 
    // start of the flat file). However, what is done below is just wrong and it affects building of 
    // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
    // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
    // feasible.
    uint32_t CE;
    for(i = 0; i<=0x10FFFF; i++) {
        /*CE = ucmpe32_get(table->mapping, i);*/
        CE = utrie_get32(table->mapping, i, NULL);
        if(isCntTableElement(CE)) {
            CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
            /*ucmpe32_set(table->mapping, i, CE);*/
            utrie_set32(table->mapping, i, CE);
        }
    }


    return table->position;
}
예제 #5
0
extern uint32_t
getProps(uint32_t c) {
    return utrie_get32(pTrie, (UChar32)c, NULL);
}
예제 #6
0
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
             UStringPrepType type, UErrorCode* status){
    
 
    UChar* map = NULL;
    int16_t adjustedLen=0, i;
    uint16_t trieWord = 0;
    ValueStruct *value = NULL;
    uint32_t savedTrieWord = 0;

    /* initialize the hashtable */
    if(hashTable==NULL){
        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
        uhash_setValueDeleter(hashTable, valueDeleter);
    }
    
    /* figure out if the code point has type already stored */
    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
    if(savedTrieWord!=0){
        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
            /* turn on the first bit in trie word */
            trieWord += 0x01;
        }else{
            /* 
             * the codepoint has value something other than prohibited
             * and a mapping .. error! 
             */
            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
            exit(U_ILLEGAL_ARGUMENT_ERROR); 
        } 
    }

    /* figure out the real length */ 
    for(i=0; i<length; i++){
        if(mapping[i] > 0xFFFF){
            adjustedLen +=2;
        }else{
            adjustedLen++;
        }      
    }

    if(adjustedLen == 0){
        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
        /* make sure that the value of trieWord is less than the threshold */
        if(trieWord < _SPREP_TYPE_THRESHOLD){   
            /* now set the value in the trie */
            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                fprintf(stderr,"Could not set the value for code point.\n");
                exit(U_ILLEGAL_ARGUMENT_ERROR);   
            }
            /* value is set so just return */
            return;
        }else{
            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
            exit(U_ILLEGAL_CHAR_FOUND);
        }
    }

    if(adjustedLen == 1){
        /* calculate the delta */
        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){

            trieWord = delta << 2;


            /* make sure that the second bit is OFF */
            if((trieWord & 0x02) != 0 ){
                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
                exit(U_INTERNAL_PROGRAM_ERROR);
            }
            /* make sure that the value of trieWord is less than the threshold */
            if(trieWord < _SPREP_TYPE_THRESHOLD){   
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }
                /* value is set so just return */
                return;
            }
        }
        /* 
         * if the delta is not in the given range or if the trieWord is larger than the threshold
         * just fall through for storing the mapping in the mapping table
         */
    }

    map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
    i=0;
    
    while(i<length){
        if(mapping[i] <= 0xFFFF){
            map[i] = (uint16_t)mapping[i];
        }else{
            map[i]   = U16_LEAD(mapping[i]);
            map[i+1] = U16_TRAIL(mapping[i]);
        }
        i++;
    }
    
    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
    value->mapping = map;
    value->type   = type;
    value->length  = adjustedLen;
    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
        mappingDataCapacity++;
    }
    if(maxLength < value->length){
        maxLength = value->length;
    }
    uhash_iput(hashTable,codepoint,value,status);
    mappingDataCapacity += adjustedLen;

    if(U_FAILURE(*status)){
        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
        exit(*status);
    }
}
예제 #7
0
static void 
storeMappingData(){

    int32_t pos = -1;
    const UHashElement* element = NULL;
    ValueStruct* value  = NULL;
    int32_t codepoint = 0;
    int32_t elementCount = 0;
    int32_t writtenElementCount = 0;
    int32_t mappingLength = 1; /* minimum mapping length */
    int32_t oldMappingLength = 0;
    uint16_t trieWord =0;
    int32_t limitIndex = 0;

    if (hashTable == NULL) {
        return;
    }
    elementCount = uhash_count(hashTable);

	/*initialize the mapping data */
    mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR);

    while(writtenElementCount < elementCount){

        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
            
            codepoint = element->key.integer;
            value = (ValueStruct*)element->value.pointer;
            
            /* store the start of indexes */
            if(oldMappingLength != mappingLength){
                /* Assume that index[] is used according to the enums defined */
                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
                }
                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
                   
                    limitIndex = currentIndex;
                     
                }
                oldMappingLength = mappingLength;
            }

            if(value->length == mappingLength){
                uint32_t savedTrieWord = 0;
                trieWord = currentIndex << 2;
                /* turn on the 2nd bit to signal that the following bits contain an index */
                trieWord += 0x02;
            
                if(trieWord > _SPREP_TYPE_THRESHOLD){
                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
                    exit(U_ILLEGAL_CHAR_FOUND);
                }
                /* figure out if the code point has type already stored */
                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
                if(savedTrieWord!=0){
                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
                        /* turn on the first bit in trie word */
                        trieWord += 0x01;
                    }else{
                        /* 
                         * the codepoint has value something other than prohibited
                         * and a mapping .. error! 
                         */
                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
                        exit(U_ILLEGAL_ARGUMENT_ERROR); 
                    } 
                } 
                
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }

                /* written the trie word for the codepoint... increment the count*/
                writtenElementCount++;

                /* sanity check are we exceeding the max number allowed */
                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
                }

                /* copy the mapping data */
                if(currentIndex+value->length+1 <= mappingDataCapacity){
                    /* write the length */
                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
                         /* the cast here is safe since we donot expect the length to be > 65535 */
                         mappingData[currentIndex++] = (uint16_t) mappingLength;
                    }
                    /* copy the contents to mappindData array */
                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
                    currentIndex += value->length;
                   
                }else{
                    /* realloc */
                    UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
                    if(newMappingData == NULL){
                        fprintf(stderr, "Could not realloc the mapping data!\n");
                        exit(U_MEMORY_ALLOCATION_ERROR);
                    }
                    uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
                    mappingDataCapacity *= 2;
                    uprv_free(mappingData);
                    mappingData = newMappingData;
                    /* write the length */
                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
                         /* the cast here is safe since we donot expect the length to be > 65535 */
                         mappingData[currentIndex++] = (uint16_t) mappingLength;
                    }
                    /* continue copying */
                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
                    currentIndex += value->length;
                }
                          
            }
        }
        mappingLength++;
        pos = -1;
    }
    /* set the last length for range check */
    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
    }else{
        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
    }
    
}
예제 #8
0
static void
testTrieRanges(const char *testName,
               const SetRange setRanges[], int32_t countSetRanges,
               const CheckRange checkRanges[], int32_t countCheckRanges,
               UBool dataIs32, UBool latin1Linear) {
    union{
        double bogus; /* needed for aligining the storage */
        uint8_t storage[32768];
    } storageHolder;
    UTrieGetFoldingOffset *getFoldingOffset;
    UNewTrieGetFoldedValue *getFoldedValue;
    const CheckRange *enumRanges;
    UNewTrie *newTrie;
    UTrie trie={ 0 };
    uint32_t value, value2;
    UChar32 start, limit;
    int32_t i, length;
    UErrorCode errorCode;
    UBool overwrite, ok;

    log_verbose("\ntesting Trie '%s'\n", testName);
    newTrie=utrie_open(NULL, NULL, 2000,
                       checkRanges[0].value, checkRanges[0].value,
                       latin1Linear);

    /* set values from setRanges[] */
    ok=TRUE;
    for(i=0; i<countSetRanges; ++i) {
        start=setRanges[i].start;
        limit=setRanges[i].limit;
        value=setRanges[i].value;
        overwrite=setRanges[i].overwrite;
        if((limit-start)==1 && overwrite) {
            ok&=utrie_set32(newTrie, start, value);
        } else {
            ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
        }
    }
    if(!ok) {
        log_err("error: setting values into a trie failed (%s)\n", testName);
        return;
    }

    /* verify that all these values are in the new Trie */
    start=0;
    for(i=0; i<countCheckRanges; ++i) {
        limit=checkRanges[i].limit;
        value=checkRanges[i].value;

        while(start<limit) {
            if(value!=utrie_get32(newTrie, start, NULL)) {
                log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
                        testName, start, utrie_get32(newTrie, start, NULL), value);
            }
            ++start;
        }
    }

    if(dataIs32) {
        getFoldingOffset=_testFoldingOffset32;
        getFoldedValue=_testFoldedValue32;
    } else {
        getFoldingOffset=_testFoldingOffset16;
        getFoldedValue=_testFoldedValue16;
    }

    /*
     * code coverage for utrie.c/defaultGetFoldedValue(),
     * pick some combination of parameters for selecting the UTrie defaults
     */
    if(!dataIs32 && latin1Linear) {
        getFoldingOffset=NULL;
        getFoldedValue=NULL;
    }

    errorCode=U_ZERO_ERROR;
    length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage),
                           getFoldedValue,
                           (UBool)!dataIs32,
                           &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
        utrie_close(newTrie);
        return;
    }
    if (length >= (int32_t)sizeof(storageHolder.storage)) {
        log_err("error: utrie_serialize(%s) needs more memory\n", testName);
        utrie_close(newTrie);
        return;
    }

    /* test linear Latin-1 range from utrie_getData() */
    if(latin1Linear) {
        uint32_t *data;
        int32_t dataLength;

        data=utrie_getData(newTrie, &dataLength);
        start=0;
        for(i=0; i<countCheckRanges && start<=0xff; ++i) {
            limit=checkRanges[i].limit;
            value=checkRanges[i].value;

            while(start<limit && start<=0xff) {
                if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
                    log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
                            testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
                }
                ++start;
            }
        }
    }

    utrie_close(newTrie);

    errorCode=U_ZERO_ERROR;
    if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) {
        log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
        return;
    }
    if(getFoldingOffset!=NULL) {
        trie.getFoldingOffset=getFoldingOffset;
    }

    if(dataIs32!=(trie.data32!=NULL)) {
        log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
    }
    if(latin1Linear!=trie.isLatin1Linear) {
        log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
    }

    /* verify that all these values are in the unserialized Trie */
    start=0;
    for(i=0; i<countCheckRanges; ++i) {
        limit=checkRanges[i].limit;
        value=checkRanges[i].value;

        if(start==0xd800) {
            /* skip surrogates */
            start=limit;
            continue;
        }

        while(start<limit) {
            if(start<=0xffff) {
                if(dataIs32) {
                    value2=UTRIE_GET32_FROM_BMP(&trie, start);
                } else {
                    value2=UTRIE_GET16_FROM_BMP(&trie, start);
                }
                if(value!=value2) {
                    log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
                            testName, start, value2, value);
                }
                if(!U16_IS_LEAD(start)) {
                    if(dataIs32) {
                        value2=UTRIE_GET32_FROM_LEAD(&trie, start);
                    } else {
                        value2=UTRIE_GET16_FROM_LEAD(&trie, start);
                    }
                    if(value!=value2) {
                        log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
                                testName, start, value2, value);
                    }
                }
            }
            if(dataIs32) {
                UTRIE_GET32(&trie, start, value2);
            } else {
                UTRIE_GET16(&trie, start, value2);
            }
            if(value!=value2) {
                log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
                        testName, start, value2, value);
            }
            ++start;
        }
    }

    /* enumerate and verify all ranges */
    enumRanges=checkRanges+1;
    utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);

    /* test linear Latin-1 range */
    if(trie.isLatin1Linear) {
        if(trie.data32!=NULL) {
            const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);

            for(start=0; start<0x100; ++start) {
                if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
                            testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
                }
            }
        } else {
            const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);

            for(start=0; start<0x100; ++start) {
                if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
                            testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
                }
            }
        }
    }

    testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
}