Ejemplo n.º 1
0
/* open uprops.icu */
static void
_openProps(UCharProps *ucp, UErrorCode *pErrorCode) {
    const uint32_t *p;
    int32_t length;

    ucp->propsData=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    ucp->pData32=p=(const uint32_t *)udata_getMemory(ucp->propsData);

    /* unserialize the trie; it is directly after the int32_t indexes[UPROPS_INDEX_COUNT] */
    length=(int32_t)p[UPROPS_PROPS32_INDEX]*4;
    length=utrie_unserialize(&ucp->propsTrie, (const uint8_t *)(p+UPROPS_INDEX_COUNT), length-64, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /* unserialize the properties vectors trie */
    length=(int32_t)(p[UPROPS_ADDITIONAL_VECTORS_INDEX]-p[UPROPS_ADDITIONAL_TRIE_INDEX])*4;
    if(length>0) {
        length=utrie_unserialize(&ucp->propsVectorsTrie, (const uint8_t *)(p+p[UPROPS_ADDITIONAL_TRIE_INDEX]), length, pErrorCode);
    }
    if(length<=0 || U_FAILURE(*pErrorCode)) {
        /*
         * length==0:
         * Allow the properties vectors trie to be missing -
         * also requires propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]
         * to be zero so that this trie is never accessed.
         */
        uprv_memset(&ucp->propsVectorsTrie, 0, sizeof(ucp->propsVectorsTrie));
    }
}
Ejemplo n.º 2
0
static UCaseProps *
ucase_openData(UCaseProps *cspProto,
               const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
    UCaseProps *csp;
    int32_t size;

    cspProto->indexes=(const int32_t *)bin;
    if( (length>=0 && length<16*4) ||
        cspProto->indexes[UCASE_IX_INDEX_TOP]<16
    ) {
        /* length or indexes[] too short for minimum indexes[] length of 16 */
        *pErrorCode=U_INVALID_FORMAT_ERROR;
        return NULL;
    }
    size=cspProto->indexes[UCASE_IX_INDEX_TOP]*4;
    if(length>=0) {
        if(length>=size && length>=cspProto->indexes[UCASE_IX_LENGTH]) {
            length-=size;
        } else {
            /* length too short for indexes[] or for the whole data length */
            *pErrorCode=U_INVALID_FORMAT_ERROR;
            return NULL;
        }
    }
    bin+=size;
    /* from here on, assume that the sizes of the items fit into the total length */

    /* unserialize the trie, after indexes[] */
    size=cspProto->indexes[UCASE_IX_TRIE_SIZE];
    utrie_unserialize(&cspProto->trie, bin, size, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    bin+=size;

    /* get exceptions[] */
    size=2*cspProto->indexes[UCASE_IX_EXC_LENGTH];
    cspProto->exceptions=(const uint16_t *)bin;
    bin+=size;

    /* get unfold[] */
    size=2*cspProto->indexes[UCASE_IX_UNFOLD_LENGTH];
    if(size!=0) {
        cspProto->unfold=(const UChar *)bin;
        bin+=size;
    } else {
        cspProto->unfold=NULL;
    }

    /* allocate, copy, and return the new UCaseProps */
    csp=(UCaseProps *)uprv_malloc(sizeof(UCaseProps));
    if(csp==NULL) {
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    } else {
        uprv_memcpy(csp, cspProto, sizeof(UCaseProps));
        return csp;
    }
}
Ejemplo n.º 3
0
static UBiDiProps *
ubidi_openData(UBiDiProps *bdpProto,
               const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
    UBiDiProps *bdp;
    int32_t size;

    bdpProto->indexes=(const int32_t *)bin;
    if( (length>=0 && length<16*4) ||
        bdpProto->indexes[UBIDI_IX_INDEX_TOP]<16
    ) {
        /* length or indexes[] too short for minimum indexes[] length of 16 */
        *pErrorCode=U_INVALID_FORMAT_ERROR;
        return NULL;
    }
    size=bdpProto->indexes[UBIDI_IX_INDEX_TOP]*4;
    if(length>=0) {
        if(length>=size && length>=bdpProto->indexes[UBIDI_IX_LENGTH]) {
            length-=size;
        } else {
            /* length too short for indexes[] or for the whole data length */
            *pErrorCode=U_INVALID_FORMAT_ERROR;
            return NULL;
        }
    }
    bin+=size;
    /* from here on, assume that the sizes of the items fit into the total length */

    /* unserialize the trie, after indexes[] */
    size=bdpProto->indexes[UBIDI_IX_TRIE_SIZE];
    utrie_unserialize(&bdpProto->trie, bin, size, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    bin+=size;

    /* get mirrors[] */
    size=4*bdpProto->indexes[UBIDI_IX_MIRROR_LENGTH];
    bdpProto->mirrors=(const uint32_t *)bin;
    bin+=size;

    /* get jgArray[] */
    size=bdpProto->indexes[UBIDI_IX_JG_LIMIT]-bdpProto->indexes[UBIDI_IX_JG_START];
    bdpProto->jgArray=bin;
    bin+=size;

    /* allocate, copy, and return the new UBiDiProps */
    bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps));
    if(bdp==NULL) {
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
        return NULL;
    } else {
        uprv_memcpy(bdp, bdpProto, sizeof(UBiDiProps));
        return bdp;
    }
}
Ejemplo n.º 4
0
//-----------------------------------------------------------------------------
//
//    init().   Does most of the work of construction, shared between the
//              constructors.
//
//-----------------------------------------------------------------------------
void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }
    fHeader = data;
    if (fHeader->fMagic != 0xb1a0 || 
        !(fHeader->fFormatVersion[0] == 3 ||         // ICU 3.4 
          *(int32_t *)fHeader->fFormatVersion == 1))  // ICU 3.2 and earlier.
    {
        status = U_INVALID_FORMAT_ERROR;
        return;
    }

    fUDataMem     = NULL;
    fReverseTable = NULL;
    fSafeFwdTable = NULL;
    fSafeRevTable = NULL;
    if (data->fFTableLen != 0) {
        fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
    }
    if (data->fRTableLen != 0) {
        fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
    }
    if (data->fSFTableLen != 0) {
        fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
    }
    if (data->fSRTableLen != 0) {
        fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
    }


    utrie_unserialize(&fTrie,
                       (uint8_t *)data + fHeader->fTrie,
                       fHeader->fTrieLen,
                       &status);
    if (U_FAILURE(status)) {
        return;
    }
    fTrie.getFoldingOffset=getFoldingOffset;


    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
    fRuleString.setTo(TRUE, fRuleSource, -1);
    U_ASSERT(data->fRuleSourceLen > 0);

    fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
    fStatusMaxIdx    = data->fStatusTableLen / sizeof(int32_t);

    fRefCount = 1;

#ifdef RBBI_DEBUG
    char *debugEnv = getenv("U_RBBIDEBUG");
    if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
#endif
}
Ejemplo n.º 5
0
void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }
    fHeader = data;
    if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) 
    {
        status = U_INVALID_FORMAT_ERROR;
        return;
    }
    // Note: in ICU version 3.2 and earlier, there was a formatVersion 1
    //       that is no longer supported.  At that time fFormatVersion was
    //       an int32_t field, rather than an array of 4 bytes.

    fDontFreeData = FALSE;
    if (data->fFTableLen != 0) {
        fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
    }
    if (data->fRTableLen != 0) {
        fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
    }
    if (data->fSFTableLen != 0) {
        fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
    }
    if (data->fSRTableLen != 0) {
        fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
    }


    utrie_unserialize(&fTrie,
                       (uint8_t *)data + fHeader->fTrie,
                       fHeader->fTrieLen,
                       &status);
    if (U_FAILURE(status)) {
        return;
    }
    fTrie.getFoldingOffset=getFoldingOffset;


    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
    fRuleString.setTo(TRUE, fRuleSource, -1);
    U_ASSERT(data->fRuleSourceLen > 0);

    fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
    fStatusMaxIdx    = data->fStatusTableLen / sizeof(int32_t);

    fRefCount = 1;

#ifdef RBBI_DEBUG
    char *debugEnv = getenv("U_RBBIDEBUG");
    if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
#endif
}
Ejemplo n.º 6
0
void BreakIterator::init(const RBBIDataHeader* data, UErrorCode &status)
{
	if (U_FAILURE(status)) {
        return;
    }
	fHeader = data;
    if (fHeader->fMagic != 0xb1a0 || 
        !(fHeader->fFormatVersion[0] == 3 ||         // ICU 3.4 
          *(int32_t *)fHeader->fFormatVersion == 1))  // ICU 3.2 and earlier.
    {
        status = U_INVALID_FORMAT_ERROR;
        return;
    }

    fReverseTable = NULL;
    fSafeFwdTable = NULL;
    fSafeRevTable = NULL;
    if (data->fFTableLen != 0) {
        fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
    }
    if (data->fRTableLen != 0) {
        fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
    }
    if (data->fSFTableLen != 0) {
        fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
    }
    if (data->fSRTableLen != 0) {
        fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
    }

    utrie_unserialize(&fTrie,
                       (uint8_t *)data + fHeader->fTrie,
                       fHeader->fTrieLen,
                       &status);
    if (U_FAILURE(status)) {
        return;
    }
    fTrie.getFoldingOffset=getFoldingOffset;


    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
    fRuleString.setTo(TRUE, fRuleSource, -1);
    U_ASSERT(data->fRuleSourceLen > 0);

    fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
    fStatusMaxIdx    = data->fStatusTableLen / sizeof(int32_t);
}
Ejemplo n.º 7
0
//-----------------------------------------------------------------------------
//
//    init().   Does most of the work of construction, shared between the
//              constructors.
//
//-----------------------------------------------------------------------------
void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }
    fHeader = data;
    if (fHeader->fMagic != 0xb1a0) {
        status = U_BRK_INTERNAL_ERROR;
        return;
    }

    fUDataMem     = NULL;
    fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
    fReverseTable = NULL;
    if (data->fRTableLen != 0) {
        fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
    }


    utrie_unserialize(&fTrie,
                       (uint8_t *)data + fHeader->fTrie,
                       fHeader->fTrieLen,
                       &status);
    if (U_FAILURE(status)) {
        return;
    }
    fTrie.getFoldingOffset=getFoldingOffset;


    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
    fRuleString.setTo(TRUE, fRuleSource, -1);

    fRefCount = 1;

#ifdef RBBI_DEBUG
    char *debugEnv = getenv("U_RBBIDEBUG");
    if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
#endif
}
Ejemplo n.º 8
0
extern void
generateData(const char *dataDir, UBool csource) {
    static int32_t indexes[UBIDI_IX_TOP]={
        UBIDI_IX_TOP
    };
    static uint8_t trieBlock[40000];
    static uint8_t jgArray[0x300]; /* at most for U+0600..U+08FF */

    const uint32_t *row;
    UChar32 start, end, prev, jgStart;
    int32_t i;

    UNewDataMemory *pData;
    UNewTrie *pTrie;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t trieSize;
    long dataLength;

    makeMirror();

    pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
    if(pTrie==NULL) {
        fprintf(stderr, "genbidi error: unable to create a UNewTrie\n");
        exit(U_MEMORY_ALLOCATION_ERROR);
    }

    prev=jgStart=0;
    for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP; ++i) {
        /* store most values from vector column 0 in the trie */
        if(!utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
            fprintf(stderr, "genbidi error: unable to set trie value (overflow)\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        /* store Joining_Group values from vector column 1 in a simple byte array */
        if(row[1]!=0) {
            if(start<0x600 || 0x8ff<end) {
                fprintf(stderr, "genbidi error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
                        (long)start, (long)end);
                exit(U_ILLEGAL_ARGUMENT_ERROR);
            }

            if(prev==0) {
                /* first code point with any value */
                prev=jgStart=start;
            } else {
                /* add No_Joining_Group for code points between prev and start */
                while(prev<start) {
                    jgArray[prev++ -jgStart]=0;
                }
            }

            /* set Joining_Group value for start..end */
            while(prev<=end) {
                jgArray[prev++ -jgStart]=(uint8_t)row[1];
            }
        }
    }

    /* finish jgArray, pad to multiple of 4 */
    while((prev-jgStart)&3) {
        jgArray[prev++ -jgStart]=0;
    }
    indexes[UBIDI_IX_JG_START]=jgStart;
    indexes[UBIDI_IX_JG_LIMIT]=prev;

    trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genbidi error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
        exit(errorCode);
    }

    indexes[UBIDI_IX_TRIE_SIZE]=trieSize;
    indexes[UBIDI_IX_MIRROR_LENGTH]=mirrorTop;
    indexes[UBIDI_IX_LENGTH]=
        (int32_t)sizeof(indexes)+
        trieSize+
        4*mirrorTop+
        (prev-jgStart);

    if(beVerbose) {
        printf("trie size in bytes:                    %5d\n", (int)trieSize);
        printf("size in bytes of mirroring table:      %5d\n", (int)(4*mirrorTop));
        printf("length of Joining_Group array:         %5d (U+%04x..U+%04x)\n", (int)(prev-jgStart), (int)jgStart, (int)(prev-1));
        printf("data size:                             %5d\n", (int)indexes[UBIDI_IX_LENGTH]);
    }

    indexes[UBIDI_MAX_VALUES_INDEX]=
        ((int32_t)U_CHAR_DIRECTION_COUNT-1)|
        (((int32_t)U_JT_COUNT-1)<<UBIDI_JT_SHIFT)|
        (((int32_t)U_JG_COUNT-1)<<UBIDI_MAX_JG_SHIFT);

    if(csource) {
        /* write .c file for hardcoded data */
        UTrie trie={ NULL };
        UTrie2 *trie2;
        FILE *f;

        utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genbidi error: failed to utrie_unserialize(ubidi.icu trie) - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* use UTrie2 */
        dataInfo.formatVersion[0]=2;
        dataInfo.formatVersion[2]=0;
        dataInfo.formatVersion[3]=0;
        trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genbidi error: utrie2_fromUTrie() failed - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }
        {
            /* delete lead surrogate code unit values */
            UChar lead;
            trie2=utrie2_cloneAsThawed(trie2, &errorCode);
            for(lead=0xd800; lead<0xdc00; ++lead) {
                utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
            }
            utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(
                    stderr,
                    "genbidi error: deleting lead surrogate code unit values failed - %s\n",
                    u_errorName(errorCode));
                exit(errorCode);
            }
        }

        f=usrc_create(dataDir, "ubidi_props_data.c");
        if(f!=NULL) {
            usrc_writeArray(f,
                "static const UVersionInfo ubidi_props_dataVersion={",
                dataInfo.dataVersion, 8, 4,
                "};\n\n");
            usrc_writeArray(f,
                "static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={",
                indexes, 32, UBIDI_IX_TOP,
                "};\n\n");
            usrc_writeUTrie2Arrays(f,
                "static const uint16_t ubidi_props_trieIndex[%ld]={\n", NULL,
                trie2,
                "\n};\n\n");
            usrc_writeArray(f,
                "static const uint32_t ubidi_props_mirrors[%ld]={\n",
                mirrors, 32, mirrorTop,
                "\n};\n\n");
            usrc_writeArray(f,
                "static const uint8_t ubidi_props_jgArray[%ld]={\n",
                jgArray, 8, prev-jgStart,
                "\n};\n\n");
            fputs(
                "static const UBiDiProps ubidi_props_singleton={\n"
                "  NULL,\n"
                "  ubidi_props_indexes,\n"
                "  ubidi_props_mirrors,\n"
                "  ubidi_props_jgArray,\n",
                f);
            usrc_writeUTrie2Struct(f,
                "  {\n",
                trie2, "ubidi_props_trieIndex", NULL,
                "  },\n");
            usrc_writeArray(f, "  { ", dataInfo.formatVersion, 8, 4, " }\n");
            fputs("};\n", f);
            fclose(f);
        }
        utrie2_close(trie2);
    } else {
        /* write the data */
        pData=udata_create(dataDir, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &dataInfo,
                        haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genbidi: unable to create data memory, %s\n", u_errorName(errorCode));
            exit(errorCode);
        }

        udata_writeBlock(pData, indexes, sizeof(indexes));
        udata_writeBlock(pData, trieBlock, trieSize);
        udata_writeBlock(pData, mirrors, 4*mirrorTop);
        udata_writeBlock(pData, jgArray, prev-jgStart);

        /* finish up */
        dataLength=udata_finish(pData, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genbidi: error %d writing the output file\n", errorCode);
            exit(errorCode);
        }

        if(dataLength!=indexes[UBIDI_IX_LENGTH]) {
            fprintf(stderr, "genbidi: data length %ld != calculated size %d\n",
                dataLength, (int)indexes[UBIDI_IX_LENGTH]);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
    }

    utrie_close(pTrie);
    upvec_close(pv);
}
Ejemplo n.º 9
0
static UBool U_CALLCONV
loadData(UStringPrepProfile* profile,
         const char* path,
         const char* name,
         const char* type,
         UErrorCode* errorCode) {
    /* load Unicode SPREP data from file */
    UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
    UDataMemory *dataMemory;
    const int32_t *p=NULL;
    const uint8_t *pb;
    UVersionInfo normUnicodeVersion;
    int32_t normUniVer, sprepUniVer, normCorrVer;

    if(errorCode==NULL || U_FAILURE(*errorCode)) {
        return 0;
    }

    /* open the data outside the mutex block */
    //TODO: change the path
    dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
    if(U_FAILURE(*errorCode)) {
        return FALSE;
    }

    p=(const int32_t *)udata_getMemory(dataMemory);
    pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
    utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
    _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;


    if(U_FAILURE(*errorCode)) {
        udata_close(dataMemory);
        return FALSE;
    }

    /* in the mutex block, set the data for this process */
    umtx_lock(usprepMutex());
    if(profile->sprepData==NULL) {
        profile->sprepData=dataMemory;
        dataMemory=NULL;
        uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
        uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
    } else {
        p=(const int32_t *)udata_getMemory(profile->sprepData);
    }
    umtx_unlock(usprepMutex());
    /* initialize some variables */
    profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);

    u_getUnicodeVersion(normUnicodeVersion);
    normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
                 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
    sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
                  (dataVersion[2] << 8 ) + (dataVersion[3]);
    normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];

    if(U_FAILURE(*errorCode)){
        udata_close(dataMemory);
        return FALSE;
    }
    if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
        normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
        ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
      ){
        *errorCode = U_INVALID_FORMAT_ERROR;
        udata_close(dataMemory);
        return FALSE;
    }
    profile->isDataLoaded = TRUE;

    /* if a different thread set it first, then close the extra data */
    if(dataMemory!=NULL) {
        udata_close(dataMemory); /* NULL if it was set correctly */
    }


    return profile->isDataLoaded;
}
Ejemplo n.º 10
0
extern void
generateData(const char *dataDir, UBool csource) {
    static int32_t indexes[UPROPS_INDEX_COUNT]={
        0, 0, 0, 0,
        0, 0, 0, 0,
        0, 0, 0, 0,
        0, 0, 0, 0
    };
    static uint8_t trieBlock[40000];
    static uint8_t additionalProps[120000];

    UNewDataMemory *pData;
    UErrorCode errorCode=U_ZERO_ERROR;
    uint32_t size = 0;
    int32_t trieSize, additionalPropsSize, offset;
    long dataLength;

    trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
        exit(errorCode);
    }

    offset=sizeof(indexes)/4;               /* uint32_t offset to the properties trie */

    /* round up trie size to 4-alignment */
    trieSize=(trieSize+3)&~3;
    offset+=trieSize>>2;
    indexes[UPROPS_PROPS32_INDEX]=          /* set indexes to the same offsets for empty */
    indexes[UPROPS_EXCEPTIONS_INDEX]=       /* structures from the old format version 3 */
    indexes[UPROPS_EXCEPTIONS_TOP_INDEX]=   /* so that less runtime code has to be changed */
    indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset;

    if(beVerbose) {
        printf("trie size in bytes:                    %5u\n", (int)trieSize);
    }

    if(csource) {
        /* write .c file for hardcoded data */
        UTrie trie={ NULL };
        UTrie2 *trie2;
        FILE *f;

        utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* use UTrie2 */
        trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(
                stderr,
                "genprops error: utrie2_fromUTrie() failed - %s\n",
                u_errorName(errorCode));
            exit(errorCode);
        }
        {
            /* delete lead surrogate code unit values */
            UChar lead;
            trie2=utrie2_cloneAsThawed(trie2, &errorCode);
            for(lead=0xd800; lead<0xdc00; ++lead) {
                utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
            }
            utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(
                    stderr,
                    "genprops error: deleting lead surrogate code unit values failed - %s\n",
                    u_errorName(errorCode));
                exit(errorCode);
            }
        }

        f=usrc_create(dataDir, "uchar_props_data.c");
        if(f!=NULL) {
            /* unused
            usrc_writeArray(f,
                "static const UVersionInfo formatVersion={",
                dataInfo.formatVersion, 8, 4,
                "};\n\n");
             */
            usrc_writeArray(f,
                "static const UVersionInfo dataVersion={",
                dataInfo.dataVersion, 8, 4,
                "};\n\n");
            usrc_writeUTrie2Arrays(f,
                "static const uint16_t propsTrie_index[%ld]={\n", NULL,
                trie2,
                "\n};\n\n");
            usrc_writeUTrie2Struct(f,
                "static const UTrie2 propsTrie={\n",
                trie2, "propsTrie_index", NULL,
                "};\n\n");

            additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes);
            size=4*offset+additionalPropsSize;      /* total size of data */

            usrc_writeArray(f,
                "static const int32_t indexes[UPROPS_INDEX_COUNT]={",
                indexes, 32, UPROPS_INDEX_COUNT,
                "};\n\n");
            fclose(f);
        }
        utrie2_close(trie2);
    } else {
        /* write the data */
        pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo,
                        haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode));
            exit(errorCode);
        }

        additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes);
        size=4*offset+additionalPropsSize;      /* total size of data */

        udata_writeBlock(pData, indexes, sizeof(indexes));
        udata_writeBlock(pData, trieBlock, trieSize);
        udata_writeBlock(pData, additionalProps, additionalPropsSize);

        /* finish up */
        dataLength=udata_finish(pData, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "genprops: error %d writing the output file\n", errorCode);
            exit(errorCode);
        }

        if(dataLength!=(long)size) {
            fprintf(stderr, "genprops: data length %ld != calculated size %lu\n",
                dataLength, (unsigned long)size);
            exit(U_INTERNAL_PROGRAM_ERROR);
        }
    }

    if(beVerbose) {
        printf("data size:                            %6lu\n", (unsigned long)size);
    }
}
Ejemplo n.º 11
0
U_CDECL_END

#if !UNORM_HARDCODE_DATA

static int8_t
loadNormData(UErrorCode &errorCode) {
    /* load Unicode normalization data from file */

    /*
     * This lazy intialization with double-checked locking (without mutex protection for
     * haveNormData==0) is transiently unsafe under certain circumstances.
     * Check the readme and use u_init() if necessary.
     *
     * While u_init() initializes the main normalization data via this functions,
     * it does not do so for exclusion sets (which are fully mutexed).
     * This is because
     * - there can be many exclusion sets
     * - they are rarely used
     * - they are not usually used in execution paths that are
     *   as performance-sensitive as others
     *   (e.g., IDNA takes more time than unorm_quickCheck() anyway)
     *
     *  TODO:  Remove code in support for non-hardcoded data.  u_init() is now advertised
     *         as not being required for thread safety, and we can't reasonably
     *         revert to requiring it.
     */
    if(haveNormData==0) {
        UTrie _normTrie={ 0,0,0,0,0,0,0 }, _fcdTrie={ 0,0,0,0,0,0,0 }, _auxTrie={ 0,0,0,0,0,0,0 };
        UDataMemory *data;

        const int32_t *p=NULL;
        const uint8_t *pb;

        if(&errorCode==NULL || U_FAILURE(errorCode)) {
            return 0;
        }

        /* open the data outside the mutex block */
        data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
        dataErrorCode=errorCode;
        if(U_FAILURE(errorCode)) {
            return haveNormData=-1;
        }

        p=(const int32_t *)udata_getMemory(data);
        pb=(const uint8_t *)(p+_NORM_INDEX_TOP);
        utrie_unserialize(&_normTrie, pb, p[_NORM_INDEX_TRIE_SIZE], &errorCode);
        _normTrie.getFoldingOffset=getFoldingNormOffset;

        pb+=p[_NORM_INDEX_TRIE_SIZE]+p[_NORM_INDEX_UCHAR_COUNT]*2+p[_NORM_INDEX_COMBINE_DATA_COUNT]*2;
        if(p[_NORM_INDEX_FCD_TRIE_SIZE]!=0) {
            utrie_unserialize(&_fcdTrie, pb, p[_NORM_INDEX_FCD_TRIE_SIZE], &errorCode);
        }
        pb+=p[_NORM_INDEX_FCD_TRIE_SIZE];

        if(p[_NORM_INDEX_AUX_TRIE_SIZE]!=0) {
            utrie_unserialize(&_auxTrie, pb, p[_NORM_INDEX_AUX_TRIE_SIZE], &errorCode);
            _auxTrie.getFoldingOffset=getFoldingAuxOffset;
        }

        if(U_FAILURE(errorCode)) {
            dataErrorCode=errorCode;
            udata_close(data);
            return haveNormData=-1;
        }

        /* in the mutex block, set the data for this process */
        umtx_lock(NULL);
        if(normData==NULL) {
            normData=data;
            data=NULL;

            uprv_memcpy(&indexes, p, sizeof(indexes));
            uprv_memcpy(&normTrie, &_normTrie, sizeof(UTrie));
            uprv_memcpy(&fcdTrie, &_fcdTrie, sizeof(UTrie));
            uprv_memcpy(&auxTrie, &_auxTrie, sizeof(UTrie));
        } else {
            p=(const int32_t *)udata_getMemory(normData);
        }

        /* initialize some variables */
        extraData=(uint16_t *)((uint8_t *)(p+_NORM_INDEX_TOP)+indexes[_NORM_INDEX_TRIE_SIZE]);
        combiningTable=extraData+indexes[_NORM_INDEX_UCHAR_COUNT];
        formatVersion_2_1=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=1);
        formatVersion_2_2=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=2);
        if(formatVersion_2_1) {
            canonStartSets=combiningTable+
                indexes[_NORM_INDEX_COMBINE_DATA_COUNT]+
                (indexes[_NORM_INDEX_FCD_TRIE_SIZE]+indexes[_NORM_INDEX_AUX_TRIE_SIZE])/2;
        }
        haveNormData=1;
        ucln_common_registerCleanup(UCLN_COMMON_UNORM, unorm_cleanup);
        umtx_unlock(NULL);

        /* if a different thread set it first, then close the extra data */
        if(data!=NULL) {
            udata_close(data); /* NULL if it was set correctly */
        }
    }

    return haveNormData;
}
Ejemplo n.º 12
0
U_CFUNC int32_t
writeAdditionalData(FILE *f, uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_COUNT]) {
    const uint32_t *pvArray;
    int32_t pvRows, pvCount;
    int32_t length;
    UErrorCode errorCode;

    pvArray=upvec_getArray(pv, &pvRows, NULL);
    pvCount=pvRows*UPROPS_VECTOR_WORDS;

    errorCode=U_ZERO_ERROR;
    length=utrie_serialize(newTrie, p, capacity, NULL, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: unable to serialize trie for additional properties: %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    if(p!=NULL) {
        if(beVerbose) {
            printf("size in bytes of additional props trie:%5u\n", (int)length);
        }
        if(f!=NULL) {
            UTrie trie={ NULL };
            UTrie2 *trie2;

            utrie_unserialize(&trie, p, length, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(
                    stderr,
                    "genprops error: failed to utrie_unserialize(trie for additional properties) - %s\n",
                    u_errorName(errorCode));
                exit(errorCode);
            }

            /* use UTrie2 */
            trie2=utrie2_fromUTrie(&trie, trie.initialValue, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(
                    stderr,
                    "genprops error: utrie2_fromUTrie() failed - %s\n",
                    u_errorName(errorCode));
                exit(errorCode);
            }
            {
                /* delete lead surrogate code unit values */
                UChar lead;
                trie2=utrie2_cloneAsThawed(trie2, &errorCode);
                for(lead=0xd800; lead<0xdc00; ++lead) {
                    utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
                }
                utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
                if(U_FAILURE(errorCode)) {
                    fprintf(
                        stderr,
                        "genbidi error: deleting lead surrogate code unit values failed - %s\n",
                        u_errorName(errorCode));
                    exit(errorCode);
                }
            }

            usrc_writeUTrie2Arrays(f,
                "static const uint16_t propsVectorsTrie_index[%ld]={\n", NULL,
                trie2,
                "\n};\n\n");
            usrc_writeUTrie2Struct(f,
                "static const UTrie2 propsVectorsTrie={\n",
                trie2, "propsVectorsTrie_index", NULL,
                "};\n\n");

            utrie2_close(trie2);
        }

        p+=length;
        capacity-=length;

        /* set indexes */
        indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]=
            indexes[UPROPS_ADDITIONAL_TRIE_INDEX]+length/4;
        indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
        indexes[UPROPS_RESERVED_INDEX]=
            indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;

        indexes[UPROPS_MAX_VALUES_INDEX]=
            (((int32_t)U_EA_COUNT-1)<<UPROPS_EA_SHIFT)|
            (((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
            (((int32_t)USCRIPT_CODE_LIMIT-1)&UPROPS_SCRIPT_MASK);
        indexes[UPROPS_MAX_VALUES_2_INDEX]=
            (((int32_t)U_LB_COUNT-1)<<UPROPS_LB_SHIFT)|
            (((int32_t)U_SB_COUNT-1)<<UPROPS_SB_SHIFT)|
            (((int32_t)U_WB_COUNT-1)<<UPROPS_WB_SHIFT)|
            (((int32_t)U_GCB_COUNT-1)<<UPROPS_GCB_SHIFT)|
            ((int32_t)U_DT_COUNT-1);
    }

    if(p!=NULL && (pvCount*4)<=capacity) {
        if(f!=NULL) {
            usrc_writeArray(f,
                "static const uint32_t propsVectors[%ld]={\n",
                pvArray, 32, pvCount,
                "};\n\n");
            fprintf(f, "static const int32_t countPropsVectors=%ld;\n", (long)pvCount);
            fprintf(f, "static const int32_t propsVectorsColumns=%ld;\n", (long)indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]);
        } else {
            uprv_memcpy(p, pvArray, pvCount*4);
        }
        if(beVerbose) {
            printf("number of additional props vectors:    %5u\n", (int)pvRows);
            printf("number of 32-bit words per vector:     %5u\n", UPROPS_VECTOR_WORDS);
        }
    }
    length+=pvCount*4;

    return length;
}
Ejemplo n.º 13
0
static void
testTrieRanges(const char *testName,
               const SetRange setRanges[], int32_t countSetRanges,
               const CheckRange checkRanges[], int32_t countCheckRanges,
               UBool dataIs32, UBool latin1Linear) {
    union{
        double bogus; /* needed for aligining the storage */
        uint8_t storage[32768];
    } storageHolder;
    UTrieGetFoldingOffset *getFoldingOffset;
    UNewTrieGetFoldedValue *getFoldedValue;
    const CheckRange *enumRanges;
    UNewTrie *newTrie;
    UTrie trie={ 0 };
    uint32_t value, value2;
    UChar32 start, limit;
    int32_t i, length;
    UErrorCode errorCode;
    UBool overwrite, ok;

    log_verbose("\ntesting Trie '%s'\n", testName);
    newTrie=utrie_open(NULL, NULL, 2000,
                       checkRanges[0].value, checkRanges[0].value,
                       latin1Linear);

    /* set values from setRanges[] */
    ok=TRUE;
    for(i=0; i<countSetRanges; ++i) {
        start=setRanges[i].start;
        limit=setRanges[i].limit;
        value=setRanges[i].value;
        overwrite=setRanges[i].overwrite;
        if((limit-start)==1 && overwrite) {
            ok&=utrie_set32(newTrie, start, value);
        } else {
            ok&=utrie_setRange32(newTrie, start, limit, value, overwrite);
        }
    }
    if(!ok) {
        log_err("error: setting values into a trie failed (%s)\n", testName);
        return;
    }

    /* verify that all these values are in the new Trie */
    start=0;
    for(i=0; i<countCheckRanges; ++i) {
        limit=checkRanges[i].limit;
        value=checkRanges[i].value;

        while(start<limit) {
            if(value!=utrie_get32(newTrie, start, NULL)) {
                log_err("error: newTrie(%s)[U+%04lx]==0x%lx instead of 0x%lx\n",
                        testName, start, utrie_get32(newTrie, start, NULL), value);
            }
            ++start;
        }
    }

    if(dataIs32) {
        getFoldingOffset=_testFoldingOffset32;
        getFoldedValue=_testFoldedValue32;
    } else {
        getFoldingOffset=_testFoldingOffset16;
        getFoldedValue=_testFoldedValue16;
    }

    /*
     * code coverage for utrie.c/defaultGetFoldedValue(),
     * pick some combination of parameters for selecting the UTrie defaults
     */
    if(!dataIs32 && latin1Linear) {
        getFoldingOffset=NULL;
        getFoldedValue=NULL;
    }

    errorCode=U_ZERO_ERROR;
    length=utrie_serialize(newTrie, storageHolder.storage, sizeof(storageHolder.storage),
                           getFoldedValue,
                           (UBool)!dataIs32,
                           &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: utrie_serialize(%s) failed: %s\n", testName, u_errorName(errorCode));
        utrie_close(newTrie);
        return;
    }
    if (length >= (int32_t)sizeof(storageHolder.storage)) {
        log_err("error: utrie_serialize(%s) needs more memory\n", testName);
        utrie_close(newTrie);
        return;
    }

    /* test linear Latin-1 range from utrie_getData() */
    if(latin1Linear) {
        uint32_t *data;
        int32_t dataLength;

        data=utrie_getData(newTrie, &dataLength);
        start=0;
        for(i=0; i<countCheckRanges && start<=0xff; ++i) {
            limit=checkRanges[i].limit;
            value=checkRanges[i].value;

            while(start<limit && start<=0xff) {
                if(value!=data[UTRIE_DATA_BLOCK_LENGTH+start]) {
                    log_err("error: newTrie(%s).latin1Data[U+%04lx]==0x%lx instead of 0x%lx\n",
                            testName, start, data[UTRIE_DATA_BLOCK_LENGTH+start], value);
                }
                ++start;
            }
        }
    }

    utrie_close(newTrie);

    errorCode=U_ZERO_ERROR;
    if(!utrie_unserialize(&trie, storageHolder.storage, length, &errorCode)) {
        log_err("error: utrie_unserialize() failed, %s\n", u_errorName(errorCode));
        return;
    }
    if(getFoldingOffset!=NULL) {
        trie.getFoldingOffset=getFoldingOffset;
    }

    if(dataIs32!=(trie.data32!=NULL)) {
        log_err("error: trie serialization (%s) did not preserve 32-bitness\n", testName);
    }
    if(latin1Linear!=trie.isLatin1Linear) {
        log_err("error: trie serialization (%s) did not preserve Latin-1-linearity\n", testName);
    }

    /* verify that all these values are in the unserialized Trie */
    start=0;
    for(i=0; i<countCheckRanges; ++i) {
        limit=checkRanges[i].limit;
        value=checkRanges[i].value;

        if(start==0xd800) {
            /* skip surrogates */
            start=limit;
            continue;
        }

        while(start<limit) {
            if(start<=0xffff) {
                if(dataIs32) {
                    value2=UTRIE_GET32_FROM_BMP(&trie, start);
                } else {
                    value2=UTRIE_GET16_FROM_BMP(&trie, start);
                }
                if(value!=value2) {
                    log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
                            testName, start, value2, value);
                }
                if(!U16_IS_LEAD(start)) {
                    if(dataIs32) {
                        value2=UTRIE_GET32_FROM_LEAD(&trie, start);
                    } else {
                        value2=UTRIE_GET16_FROM_LEAD(&trie, start);
                    }
                    if(value!=value2) {
                        log_err("error: unserialized trie(%s).fromLead(U+%04lx)==0x%lx instead of 0x%lx\n",
                                testName, start, value2, value);
                    }
                }
            }
            if(dataIs32) {
                UTRIE_GET32(&trie, start, value2);
            } else {
                UTRIE_GET16(&trie, start, value2);
            }
            if(value!=value2) {
                log_err("error: unserialized trie(%s).get(U+%04lx)==0x%lx instead of 0x%lx\n",
                        testName, start, value2, value);
            }
            ++start;
        }
    }

    /* enumerate and verify all ranges */
    enumRanges=checkRanges+1;
    utrie_enum(&trie, _testEnumValue, _testEnumRange, &enumRanges);

    /* test linear Latin-1 range */
    if(trie.isLatin1Linear) {
        if(trie.data32!=NULL) {
            const uint32_t *latin1=UTRIE_GET32_LATIN1(&trie);

            for(start=0; start<0x100; ++start) {
                if(latin1[start]!=UTRIE_GET32_FROM_LEAD(&trie, start)) {
                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get32(U+%04lx)\n",
                            testName, start, latin1[start], UTRIE_GET32_FROM_LEAD(&trie, start), start);
                }
            }
        } else {
            const uint16_t *latin1=UTRIE_GET16_LATIN1(&trie);

            for(start=0; start<0x100; ++start) {
                if(latin1[start]!=UTRIE_GET16_FROM_LEAD(&trie, start)) {
                    log_err("error: (%s) trie.latin1[U+%04lx]=0x%lx!=0x%lx=trie.get16(U+%04lx)\n",
                            testName, start, latin1[start], UTRIE_GET16_FROM_LEAD(&trie, start), start);
                }
            }
        }
    }

    testTrieIteration(testName, &trie, checkRanges, countCheckRanges);
}