Example #1
0
U_CAPI void U_CALLCONV
upvec_compactToUTrie2Handler(void *context,
                             UChar32 start, UChar32 end,
                             int32_t rowIndex, uint32_t *row, int32_t columns,
                             UErrorCode *pErrorCode) {
    UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
    if(start<UPVEC_FIRST_SPECIAL_CP) {
        utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
    } else {
        switch(start) {
        case UPVEC_INITIAL_VALUE_CP:
            toUTrie2->initialValue=rowIndex;
            break;
        case UPVEC_ERROR_VALUE_CP:
            toUTrie2->errorValue=rowIndex;
            break;
        case UPVEC_START_REAL_VALUES_CP:
            toUTrie2->maxValue=rowIndex;
            if(rowIndex>0xffff) {
                /* too many rows for a 16-bit trie */
                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            } else {
                toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
                                           toUTrie2->errorValue, pErrorCode);
            }
            break;
        default:
            break;
        }
    }
}
Example #2
0
U_CDECL_END

Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
    phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL),
    norm16TrieLength(0) {
    memset(unicodeVersion, 0, sizeof(unicodeVersion));
    normTrie=utrie2_open(0, 0, &errorCode);
    normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
    norms=allocNorm();  // unused Norm struct at index 0
    memset(indexes, 0, sizeof(indexes));
    memset(smallFCD, 0, sizeof(smallFCD));
}
Example #3
0
/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
U_CAPI UTrie2 * U_EXPORT2
utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
    NewTrieAndStatus context;
    UChar lead;

    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if(trie1==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    context.exclusiveLimit=TRUE;
    context.errorCode=*pErrorCode;
    utrie_enum(trie1, NULL, copyEnumRange, &context);
    *pErrorCode=context.errorCode;
    for(lead=0xd800; lead<0xdc00; ++lead) {
        uint32_t value;
        if(trie1->data32==NULL) {
            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
        } else {
            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
        }
        if(value!=trie1->initialValue) {
            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
        }
    }
    if(U_SUCCESS(*pErrorCode)) {
        utrie2_freeze(context.trie,
                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
                      pErrorCode);
    }
#ifdef UTRIE2_DEBUG
    if(U_SUCCESS(*pErrorCode)) {
        utrie_printLengths(trie1);
        utrie2_printLengths(context.trie, "fromUTrie");
    }
#endif
    if(U_FAILURE(*pErrorCode)) {
        utrie2_close(context.trie);
        context.trie=NULL;
    }
    return context.trie;
}
Example #4
0
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number.
//
void RBBISetBuilder::buildTrie() {
    RangeDescriptor *rlRange;

    fTrie = utrie2_open(0,       //  Initial value for all code points.
                        0,       //  Error value for out-of-range input.
                        fStatus);

    for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
        utrie2_setRange32(fTrie,
                          rlRange->fStartChar,     // Range start
                          rlRange->fEndChar,       // Range end (inclusive)
                          rlRange->fNum,           // value for range
                          TRUE,                    // Overwrite previously written values
                          fStatus);
    }
}
Example #5
0
U_CAPI UTrie2 * U_EXPORT2
utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
    NewTrieAndStatus context;
    UChar lead;

    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }
    if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
    }

    /* Clone the frozen trie by enumerating it and building a new one. */
    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    context.exclusiveLimit=FALSE;
    context.errorCode=*pErrorCode;
    utrie2_enum(other, NULL, copyEnumRange, &context);
    *pErrorCode=context.errorCode;
    for(lead=0xd800; lead<0xdc00; ++lead) {
        uint32_t value;
        if(other->data32==NULL) {
            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
        } else {
            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
        }
        if(value!=other->initialValue) {
            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
        }
    }
    if(U_FAILURE(*pErrorCode)) {
        utrie2_close(context.trie);
        context.trie=NULL;
    }
    return context.trie;
}
BiDiPropsBuilder::BiDiPropsBuilder(UErrorCode &errorCode)
        : pTrie(NULL),
          mirrorTop(0) {
    // This builder encodes the following properties.
    relevantProps.
        add(UCHAR_BIDI_CONTROL).
        add(UCHAR_BIDI_MIRRORED).
        add(UCHAR_BIDI_CLASS).
        add(UCHAR_BIDI_MIRRORING_GLYPH).
        add(UCHAR_JOIN_CONTROL).
        add(UCHAR_JOINING_GROUP).
        add(UCHAR_JOINING_TYPE);
    pTrie=utrie2_open(0, 0, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: bidipropsbuilder utrie2_open() failed - %s\n",
                u_errorName(errorCode));
    }
    uprv_memset(jgArray, U_JG_NO_JOINING_GROUP, sizeof(jgArray));
    uprv_memset(jgArray2, U_JG_NO_JOINING_GROUP, sizeof(jgArray2));
}
CasePropsBuilder::CasePropsBuilder(UErrorCode &errorCode)
        : excProps(NULL), excPropsCount(0), maxFullLength(U16_MAX_LENGTH), pTrie(NULL) {
    // This builder encodes the following properties.
    relevantProps.
        add(UCHAR_CANONICAL_COMBINING_CLASS).  // 0 vs. 230 vs. other
        add(UCHAR_SOFT_DOTTED).
        add(UCHAR_LOWERCASE).
        add(UCHAR_UPPERCASE).
        add(UCHAR_CASE_IGNORABLE).
        add(UCHAR_SIMPLE_CASE_FOLDING).
        add(UCHAR_SIMPLE_LOWERCASE_MAPPING).
        add(UCHAR_SIMPLE_TITLECASE_MAPPING).
        add(UCHAR_SIMPLE_UPPERCASE_MAPPING).
        add(UCHAR_CASE_FOLDING).
        add(UCHAR_LOWERCASE_MAPPING).
        add(UCHAR_TITLECASE_MAPPING).
        add(UCHAR_UPPERCASE_MAPPING).
        add(PPUCD_CONDITIONAL_CASE_MAPPINGS).
        add(PPUCD_TURKIC_CASE_FOLDING);
    // Write "unfold" meta data into the first row. Must be UGENCASE_UNFOLD_WIDTH UChars.
    unfold.
        append(0).
        append((UChar)UGENCASE_UNFOLD_WIDTH).
        append((UChar)UGENCASE_UNFOLD_STRING_WIDTH).
        append(0).
        append(0);
    U_ASSERT(unfold.length()==UGENCASE_UNFOLD_WIDTH);
    pTrie=utrie2_open(0, 0, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: casepropsbuilder utrie2_open() failed - %s\n",
                u_errorName(errorCode));
        return;
    }
    excProps=new ExcProps *[MAX_EXC_COUNT];
    if(excProps==NULL) {
        fprintf(stderr,
                "genprops error: casepropsbuilder out of memory allocating "
                "the array of exceptions properties\n");
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
}
int main(int argc, char** argv)
{
    // Create a value array of all possible code points.
    const UChar32 size = kMaxCodepoint + 1;
    CharacterProperty* values = new CharacterProperty[size];
    memset(values, 0, sizeof(CharacterProperty) * size);

    setRanges(values,
        cjkIdeographRanges, ARRAY_LENGTH(cjkIdeographRanges),
        CharacterProperty::isCJKIdeographOrSymbol);
    setRanges(values,
        cjkSymbolRanges, ARRAY_LENGTH(cjkSymbolRanges),
        CharacterProperty::isCJKIdeographOrSymbol);
    setValues(values,
        cjkIsolatedSymbolsArray, ARRAY_LENGTH(cjkIsolatedSymbolsArray),
        CharacterProperty::isCJKIdeographOrSymbol);

    setRanges(values,
        isUprightInMixedVerticalRanges,
        ARRAY_LENGTH(isUprightInMixedVerticalRanges),
        CharacterProperty::isUprightInMixedVertical);
    setValues(values,
        isUprightInMixedVerticalArray,
        ARRAY_LENGTH(isUprightInMixedVerticalArray),
        CharacterProperty::isUprightInMixedVertical);

    // Create a trie from the value array.
    UErrorCode error = U_ZERO_ERROR;
    UTrie2* trie = utrie2_open(0, 0, &error);
    assert(error == U_ZERO_ERROR);
    UChar32 start = 0;
    CharacterProperty value = values[0];
    for (UChar32 c = 1;; c++) {
        if (c < size && values[c] == value)
            continue;
        if (static_cast<uint32_t>(value)) {
            utrie2_setRange32(trie, start, c - 1,
                static_cast<uint32_t>(value), TRUE, &error);
            assert(error == U_ZERO_ERROR);
        }
        if (c >= size)
            break;
        start = c;
        value = values[start];
    }

    // Freeze and serialize the trie to a byte array.
    utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error);
    assert(error == U_ZERO_ERROR);
    int32_t serializedSize = utrie2_serialize(trie, nullptr, 0, &error);
    error = U_ZERO_ERROR;
    uint8_t* serialized = new uint8_t[serializedSize];
    serializedSize = utrie2_serialize(trie, serialized, serializedSize, &error);
    assert(error == U_ZERO_ERROR);

    // Write the serialized array to the source file.
    if (argc <= 1) {
        generate(stdout, serializedSize, serialized);
    } else {
        FILE* fp = fopen(argv[1], "wb");
        generate(fp, serializedSize, serialized);
        fclose(fp);
    }

    utrie2_close(trie);

    return 0;
}
//  Build the Whole Script Confusable data
//
//     TODO:  Reorganize.  Either get rid of the WSConfusableDataBuilder class,
//                         because everything is local to this one build function anyhow,
//                           OR
//                         break this function into more reasonably sized pieces, with
//                         state in WSConfusableDataBuilder.
//
void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
          int32_t confusablesWSLen, UParseError *pe, UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    URegularExpression *parseRegexp = NULL;
    int32_t             inputLen    = 0;
    UChar              *input       = NULL;
    int32_t             lineNum     = 0;
    
    UVector            *scriptSets        = NULL;
    uint32_t            rtScriptSetsCount = 2;

    UTrie2             *anyCaseTrie   = NULL;
    UTrie2             *lowerCaseTrie = NULL;

    anyCaseTrie = utrie2_open(0, 0, &status);
    lowerCaseTrie = utrie2_open(0, 0, &status);
    

    // The scriptSets vector provides a mapping from TRIE values to the set of scripts.
    //
    // Reserved TRIE values:
    //   0:  Code point has no whole script confusables.
    //   1:  Code point is of script Common or Inherited.
    //       These code points do not participate in whole script confusable detection.
    //       (This is logically equivalent to saying that they contain confusables in
    //        all scripts)
    //
    // Because Trie values are indexes into the ScriptSets vector, pre-fill
    // vector positions 0 and 1 to avoid conflicts with the reserved values.
    
    scriptSets = new UVector(status);
    if (scriptSets == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto cleanup;
    }
    scriptSets->addElement((void *)NULL, status);
    scriptSets->addElement((void *)NULL, status);

    // Convert the user input data from UTF-8 to UChar (UTF-16)
    u_strFromUTF8(NULL, 0, &inputLen, confusablesWS, confusablesWSLen, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        goto cleanup;
    }
    status = U_ZERO_ERROR;
    input = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
    if (input == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto cleanup;
    }
    u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status);



    parseRegexp = uregex_openC(parseExp, 0, NULL, &status);
    
    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
    //   given the syntax of the input.
    if (*input == 0xfeff) {
        *input = 0x20;
    }

    // Parse the input, one line per iteration of this loop.
    uregex_setText(parseRegexp, input, inputLen, &status);
    while (uregex_findNext(parseRegexp, &status)) {
        lineNum++;
        UChar  line[200];
        uregex_group(parseRegexp, 0, line, 200, &status);
        if (uregex_start(parseRegexp, 1, &status) >= 0) {
            // this was a blank or comment line.
            continue;
        }
        if (uregex_start(parseRegexp, 8, &status) >= 0) {
            // input file syntax error.
            status = U_PARSE_ERROR;
            goto cleanup;
        }
        if (U_FAILURE(status)) {
            goto cleanup;
        }

        // Pick up the start and optional range end code points from the parsed line.
        UChar32  startCodePoint = SpoofImpl::ScanHex(
            input, uregex_start(parseRegexp, 2, &status), uregex_end(parseRegexp, 2, &status), status);
        UChar32  endCodePoint = startCodePoint;
        if (uregex_start(parseRegexp, 3, &status) >=0) {
            endCodePoint = SpoofImpl::ScanHex(
                input, uregex_start(parseRegexp, 3, &status), uregex_end(parseRegexp, 3, &status), status);
        }

        // Extract the two script names from the source line.  We need these in an 8 bit
        //   default encoding (will be EBCDIC on IBM mainframes) in order to pass them on
        //   to the ICU u_getPropertyValueEnum() function.  Ugh.
        char  srcScriptName[20];
        char  targScriptName[20];
        extractGroup(parseRegexp, 4, srcScriptName, sizeof(srcScriptName), status);
        extractGroup(parseRegexp, 5, targScriptName, sizeof(targScriptName), status);
        UScriptCode srcScript  =
            static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, srcScriptName));
        UScriptCode targScript =
            static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, targScriptName));
        if (U_FAILURE(status)) {
            goto cleanup;
        }
        if (srcScript == USCRIPT_INVALID_CODE || targScript == USCRIPT_INVALID_CODE) {
            status = U_INVALID_FORMAT_ERROR;
            goto cleanup;
        }

        // select the table - (A) any case or (L) lower case only
        UTrie2 *table = anyCaseTrie;
        if (uregex_start(parseRegexp, 7, &status) >= 0) {
            table = lowerCaseTrie;
        }

        // Build the set of scripts containing confusable characters for
        //   the code point(s) specified in this input line.
        // Sanity check that the script of the source code point is the same
        //   as the source script indicated in the input file.  Failure of this check is
        //   an error in the input file.
        // Include the source script in the set (needed for Mixed Script Confusable detection).
        //
        UChar32 cp;
        for (cp=startCodePoint; cp<=endCodePoint; cp++) {
            int32_t setIndex = utrie2_get32(table, cp);
            BuilderScriptSet *bsset = NULL;
            if (setIndex > 0) {
                U_ASSERT(setIndex < scriptSets->size());
                bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(setIndex));
            } else {
                bsset = new BuilderScriptSet();
                if (bsset == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    goto cleanup;
                }
                bsset->codePoint = cp;
                bsset->trie = table;
                bsset->sset = new ScriptSet();
                setIndex = scriptSets->size();
                bsset->index = setIndex;
                bsset->rindex = 0;
                if (bsset->sset == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    goto cleanup;
                }
                scriptSets->addElement(bsset, status);
                utrie2_set32(table, cp, setIndex, &status);
            }
            bsset->sset->Union(targScript);
            bsset->sset->Union(srcScript);

            if (U_FAILURE(status)) {
                goto cleanup;
            }
            UScriptCode cpScript = uscript_getScript(cp, &status);
            if (cpScript != srcScript) {
                status = U_INVALID_FORMAT_ERROR;
                goto cleanup;
            }
        }
    }

    // Eliminate duplicate script sets.  At this point we have a separate
    // script set for every code point that had data in the input file.
    //
    // We eliminate underlying ScriptSet objects, not the BuildScriptSets that wrap them
    //
    // printf("Number of scriptSets: %d\n", scriptSets->size());
    {
        int32_t duplicateCount = 0;
        rtScriptSetsCount = 2;
        for (int32_t outeri=2; outeri<scriptSets->size(); outeri++) {
            BuilderScriptSet *outerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(outeri));
            if (outerSet->index != static_cast<uint32_t>(outeri)) {
                // This set was already identified as a duplicate.
                //   It will not be allocated a position in the runtime array of ScriptSets.
                continue;
            }
            outerSet->rindex = rtScriptSetsCount++;
            for (int32_t inneri=outeri+1; inneri<scriptSets->size(); inneri++) {
                BuilderScriptSet *innerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(inneri));
                if (*(outerSet->sset) == *(innerSet->sset) && outerSet->sset != innerSet->sset) {
                    delete innerSet->sset;
                    innerSet->scriptSetOwned = FALSE;
                    innerSet->sset = outerSet->sset;
                    innerSet->index = outeri;
                    innerSet->rindex = outerSet->rindex;
                    duplicateCount++;
                }
                // But this doesn't get all.  We need to fix the TRIE.
            }
        }
        // printf("Number of distinct script sets: %d\n", rtScriptSetsCount);
    }

    

    // Update the Trie values to be reflect the run time script indexes (after duplicate merging).
    //    (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets
    //     are unused, which is why the loop index starts at 2.)
    {
        for (int32_t i=2; i<scriptSets->size(); i++) {
            BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
            if (bSet->rindex != (uint32_t)i) {
                utrie2_set32(bSet->trie, bSet->codePoint, bSet->rindex, &status);
            }
        }
    }

    // For code points with script==Common or script==Inherited,
    //   Set the reserved value of 1 into both Tries.  These characters do not participate
    //   in Whole Script Confusable detection; this reserved value is the means
    //   by which they are detected.
    {
        UnicodeSet ignoreSet;
        ignoreSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
        UnicodeSet inheritedSet;
        inheritedSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
        ignoreSet.addAll(inheritedSet);
        for (int32_t rn=0; rn<ignoreSet.getRangeCount(); rn++) {
            UChar32 rangeStart = ignoreSet.getRangeStart(rn);
            UChar32 rangeEnd   = ignoreSet.getRangeEnd(rn);
            utrie2_setRange32(anyCaseTrie,   rangeStart, rangeEnd, 1, TRUE, &status);
            utrie2_setRange32(lowerCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status);
        }
    }

    // Serialize the data to the Spoof Detector
    {
        utrie2_freeze(anyCaseTrie,   UTRIE2_16_VALUE_BITS, &status);
        int32_t size = utrie2_serialize(anyCaseTrie, NULL, 0, &status);
        // printf("Any case Trie size: %d\n", size);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            goto cleanup;
        }
        status = U_ZERO_ERROR;
        spImpl->fSpoofData->fRawData->fAnyCaseTrie = spImpl->fSpoofData->fMemLimit;
        spImpl->fSpoofData->fRawData->fAnyCaseTrieLength = size;
        spImpl->fSpoofData->fAnyCaseTrie = anyCaseTrie;
        void *where = spImpl->fSpoofData->reserveSpace(size, status);
        utrie2_serialize(anyCaseTrie, where, size, &status);
        
        utrie2_freeze(lowerCaseTrie, UTRIE2_16_VALUE_BITS, &status);
        size = utrie2_serialize(lowerCaseTrie, NULL, 0, &status);
        // printf("Lower case Trie size: %d\n", size);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            goto cleanup;
        }
        status = U_ZERO_ERROR;
        spImpl->fSpoofData->fRawData->fLowerCaseTrie = spImpl->fSpoofData->fMemLimit;
        spImpl->fSpoofData->fRawData->fLowerCaseTrieLength = size;
        spImpl->fSpoofData->fLowerCaseTrie = lowerCaseTrie;
        where = spImpl->fSpoofData->reserveSpace(size, status);
        utrie2_serialize(lowerCaseTrie, where, size, &status);

        spImpl->fSpoofData->fRawData->fScriptSets = spImpl->fSpoofData->fMemLimit;
        spImpl->fSpoofData->fRawData->fScriptSetsLength = rtScriptSetsCount;
        ScriptSet *rtScriptSets =  static_cast<ScriptSet *>
            (spImpl->fSpoofData->reserveSpace(rtScriptSetsCount * sizeof(ScriptSet), status));
        uint32_t rindex = 2;
        for (int32_t i=2; i<scriptSets->size(); i++) {
            BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
            if (bSet->rindex < rindex) {
                // We have already copied this script set to the serialized data.
                continue;
            }
            U_ASSERT(rindex == bSet->rindex);
            rtScriptSets[rindex] = *bSet->sset;   // Assignment of a ScriptSet just copies the bits.
            rindex++;
        }
    }

    // Open new utrie2s from the serialized data.  We don't want to keep the ones
    //   we just built because we would then have two copies of the data, one internal to
    //   the utries that we have already constructed, and one in the serialized data area.
    //   An alternative would be to not pre-serialize the Trie data, but that makes the
    //   spoof detector data different, depending on how the detector was constructed.
    //   It's simpler to keep the data always the same.
    
    spImpl->fSpoofData->fAnyCaseTrie = utrie2_openFromSerialized(
            UTRIE2_16_VALUE_BITS,
            (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fAnyCaseTrie,
            spImpl->fSpoofData->fRawData->fAnyCaseTrieLength,
            NULL,
            &status);

    spImpl->fSpoofData->fLowerCaseTrie = utrie2_openFromSerialized(
            UTRIE2_16_VALUE_BITS,
            (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fLowerCaseTrie,
            spImpl->fSpoofData->fRawData->fAnyCaseTrieLength,
            NULL,
            &status);

    

cleanup:
    if (U_FAILURE(status)) {
        pe->line = lineNum;
    }
    uregex_close(parseRegexp);
    uprv_free(input);

    int32_t i;
    for (i=0; i<scriptSets->size(); i++) {
        BuilderScriptSet *bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
        delete bsset;
    }
    delete scriptSets;
    utrie2_close(anyCaseTrie);
    utrie2_close(lowerCaseTrie);
    return;
}