예제 #1
0
U_CAPI void U_CALLCONV
upvec_compactToUTrie2Handler(void *context,
                             UChar32 start, UChar32 end,
                             int32_t rowIndex, uint32_t *row, int32_t columns,
                             UErrorCode *pErrorCode) {
    UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
    if(start<UPVEC_FIRST_SPECIAL_CP) {
        utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
    } else {
        switch(start) {
        case UPVEC_INITIAL_VALUE_CP:
            toUTrie2->initialValue=rowIndex;
            break;
        case UPVEC_ERROR_VALUE_CP:
            toUTrie2->errorValue=rowIndex;
            break;
        case UPVEC_START_REAL_VALUES_CP:
            toUTrie2->maxValue=rowIndex;
            if(rowIndex>0xffff) {
                /* too many rows for a 16-bit trie */
                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            } else {
                toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
                                           toUTrie2->errorValue, pErrorCode);
            }
            break;
        default:
            break;
        }
    }
}
예제 #2
0
void
BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                           UErrorCode &errorCode) {
    if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }

    UChar32 start=props.start;
    UChar32 end=props.end;

    // The runtime code relies on this invariant for returning both bmg and bpb
    // from the same data.
    int32_t bpt=props.getIntProp(UCHAR_BIDI_PAIRED_BRACKET_TYPE);
    if(!(bpt==0 ? props.bpb==U_SENTINEL : props.bpb==props.bmg)) {
        fprintf(stderr,
                "genprops error: invariant not true: "
                "if(bpt==None) then bpb=<none> else bpb=bmg\n");
        return;
    }
    int32_t delta=encodeBidiMirroringGlyph(start, end, props.bmg, errorCode);
    uint32_t value=(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT;
    if(props.binProps[UCHAR_BIDI_MIRRORED]) {
        value|=U_MASK(UBIDI_IS_MIRRORED_SHIFT);
    }
    if(props.binProps[UCHAR_BIDI_CONTROL]) {
        value|=U_MASK(UBIDI_BIDI_CONTROL_SHIFT);
    }
    if(props.binProps[UCHAR_JOIN_CONTROL]) {
        value|=U_MASK(UBIDI_JOIN_CONTROL_SHIFT);
    }
    value|=(uint32_t)bpt<<UBIDI_BPT_SHIFT;
    value|=(uint32_t)props.getIntProp(UCHAR_JOINING_TYPE)<<UBIDI_JT_SHIFT;
    value|=(uint32_t)props.getIntProp(UCHAR_BIDI_CLASS);
    utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: BiDiPropsBuilder utrie2_setRange32() failed - %s\n",
                u_errorName(errorCode));
        return;
    }

    // Store Joining_Group values from vector column 1 in simple byte arrays.
    int32_t jg=props.getIntProp(UCHAR_JOINING_GROUP);
    for(UChar32 c=start; c<=end; ++c) {
        int32_t jgStart;
        if(MIN_JG_START<=c && c<MAX_JG_LIMIT) {
            jgArray[c-MIN_JG_START]=(uint8_t)jg;
        } else if(MIN_JG_START2<=c && c<MAX_JG_LIMIT2) {
            jgArray2[c-MIN_JG_START2]=(uint8_t)jg;
        } else if(jg!=U_JG_NO_JOINING_GROUP) {
            fprintf(stderr, "genprops error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
                    (long)start, (long)end);
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
    }
}
예제 #3
0
파일: rbbisetb.cpp 프로젝트: winlibs/icu4c
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number.
//
void RBBISetBuilder::buildTrie() {
    RangeDescriptor *rlRange;

    fTrie = utrie2_open(0,       //  Initial value for all code points.
                        0,       //  Error value for out-of-range input.
                        fStatus);

    for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
        utrie2_setRange32(fTrie,
                          rlRange->fStartChar,     // Range start
                          rlRange->fEndChar,       // Range end (inclusive)
                          rlRange->fNum,           // value for range
                          TRUE,                    // Overwrite previously written values
                          fStatus);
    }
}
예제 #4
0
static UBool U_CALLCONV
copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
    if(value!=nt->trie->initialValue) {
        if(nt->exclusiveLimit) {
            --end;
        }
        if(start==end) {
            utrie2_set32(nt->trie, start, value, &nt->errorCode);
        } else {
            utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
        }
        return U_SUCCESS(nt->errorCode);
    } else {
        return TRUE;
    }
}
int main(int argc, char** argv)
{
    // Create a value array of all possible code points.
    const UChar32 size = kMaxCodepoint + 1;
    CharacterProperty* values = new CharacterProperty[size];
    memset(values, 0, sizeof(CharacterProperty) * size);

    setRanges(values,
        cjkIdeographRanges, ARRAY_LENGTH(cjkIdeographRanges),
        CharacterProperty::isCJKIdeographOrSymbol);
    setRanges(values,
        cjkSymbolRanges, ARRAY_LENGTH(cjkSymbolRanges),
        CharacterProperty::isCJKIdeographOrSymbol);
    setValues(values,
        cjkIsolatedSymbolsArray, ARRAY_LENGTH(cjkIsolatedSymbolsArray),
        CharacterProperty::isCJKIdeographOrSymbol);

    setRanges(values,
        isUprightInMixedVerticalRanges,
        ARRAY_LENGTH(isUprightInMixedVerticalRanges),
        CharacterProperty::isUprightInMixedVertical);
    setValues(values,
        isUprightInMixedVerticalArray,
        ARRAY_LENGTH(isUprightInMixedVerticalArray),
        CharacterProperty::isUprightInMixedVertical);

    // Create a trie from the value array.
    UErrorCode error = U_ZERO_ERROR;
    UTrie2* trie = utrie2_open(0, 0, &error);
    assert(error == U_ZERO_ERROR);
    UChar32 start = 0;
    CharacterProperty value = values[0];
    for (UChar32 c = 1;; c++) {
        if (c < size && values[c] == value)
            continue;
        if (static_cast<uint32_t>(value)) {
            utrie2_setRange32(trie, start, c - 1,
                static_cast<uint32_t>(value), TRUE, &error);
            assert(error == U_ZERO_ERROR);
        }
        if (c >= size)
            break;
        start = c;
        value = values[start];
    }

    // Freeze and serialize the trie to a byte array.
    utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error);
    assert(error == U_ZERO_ERROR);
    int32_t serializedSize = utrie2_serialize(trie, nullptr, 0, &error);
    error = U_ZERO_ERROR;
    uint8_t* serialized = new uint8_t[serializedSize];
    serializedSize = utrie2_serialize(trie, serialized, serializedSize, &error);
    assert(error == U_ZERO_ERROR);

    // Write the serialized array to the source file.
    if (argc <= 1) {
        generate(stdout, serializedSize, serialized);
    } else {
        FILE* fp = fopen(argv[1], "wb");
        generate(fp, serializedSize, serialized);
        fclose(fp);
    }

    utrie2_close(trie);

    return 0;
}
예제 #6
0
void
CasePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                           UErrorCode &errorCode) {
    if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }

    UChar32 start=props.start;
    UChar32 end=props.end;

    /* default: map to self */
    int32_t delta=0;

    uint32_t type;
    if(props.binProps[UCHAR_LOWERCASE]) {
        type=UCASE_LOWER;
    } else if(props.binProps[UCHAR_UPPERCASE]) {
        type=UCASE_UPPER;
    } else if(props.getIntProp(UCHAR_GENERAL_CATEGORY)==U_TITLECASE_LETTER) {
        type=UCASE_TITLE;
    } else {
        type=UCASE_NONE;
    }
    uint32_t value=type;

    UBool hasMapping=FALSE;
    if(props.suc>=0) {
        /* uppercase mapping as delta if the character is lowercase */
        hasMapping=TRUE;
        if(type==UCASE_LOWER) {
            delta=props.suc-start;
        } else {
            value|=UCASE_EXCEPTION;
        }
    }
    if(props.slc>=0) {
        /* lowercase mapping as delta if the character is uppercase or titlecase */
        hasMapping=TRUE;
        if(type>=UCASE_UPPER) {
            delta=props.slc-start;
        } else {
            value|=UCASE_EXCEPTION;
        }
    }
    if(props.stc>=0) {
        hasMapping=TRUE;
    }
    if(props.suc!=props.stc) {
        value|=UCASE_EXCEPTION;
    }
    if(!props.lc.isEmpty() || !props.uc.isEmpty() || !props.tc.isEmpty() ||
        newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS)
    ) {
        hasMapping=TRUE;
        value|=UCASE_EXCEPTION;
    }
    if( (props.scf>=0 && props.scf!=props.slc) ||
        (!props.cf.isEmpty() && props.cf!=UnicodeString(props.scf)) ||
        newValues.contains(PPUCD_TURKIC_CASE_FOLDING)
    ) {
        hasMapping=TRUE;
        value|=UCASE_EXCEPTION;
    }

    // Simple case folding falls back to simple lowercasing.
    // If there is no case folding but there is a lowercase mapping,
    // then add a case folding mapping to the code point.
    // For example: Cherokee uppercase syllables since Unicode 8.
    // (Full case folding falls back to simple case folding,
    // not to full lowercasing, so we need not also handle it specially
    // for such cases.)
    UChar32 scf=props.scf;
    if(scf<0 && props.slc>=0) {
        scf=start;
        hasMapping=TRUE;
        value|=UCASE_EXCEPTION;
    }

    if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
        value|=UCASE_EXCEPTION;
    }

    if(props.binProps[UCHAR_SOFT_DOTTED]) {
        value|=UCASE_SOFT_DOTTED;
    }
    int32_t cc=props.getIntProp(UCHAR_CANONICAL_COMBINING_CLASS);
    if(cc!=0) {
        if(props.binProps[UCHAR_SOFT_DOTTED]) {
            fprintf(stderr, "genprops error: a soft-dotted character has ccc!=0\n");
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        if(cc==230) {
            value|=UCASE_ABOVE;
        } else {
            value|=UCASE_OTHER_ACCENT;
        }
    }

    if(props.binProps[UCHAR_CASE_IGNORABLE]) {
        value|=UCASE_IGNORABLE;
    }

    if((hasMapping || (value&UCASE_EXCEPTION)) && start!=end) {
        fprintf(stderr,
                "genprops error: range %04lX..%04lX has case mappings "
                "or reasons for data structure exceptions\n",
                (long)start, (long)end);
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    /* handle exceptions */
    if(value&UCASE_EXCEPTION) {
        /* simply store exceptions for later processing and encoding */
        if(excPropsCount==MAX_EXC_COUNT) {
            fprintf(stderr, "genprops error: casepropsbuilder: too many exceptions\n");
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return;
        }
        ExcProps *newExcProps=new ExcProps(props);
        if(newExcProps==NULL) {
            fprintf(stderr,
                    "genprops error: casepropsbuilder out of memory allocating "
                    "exceptions properties\n");
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        newExcProps->props.scf=scf;
        newExcProps->hasConditionalCaseMappings=newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS);
        newExcProps->hasTurkicCaseFolding=newValues.contains(PPUCD_TURKIC_CASE_FOLDING);
        value|=(uint32_t)excPropsCount<<UGENCASE_EXC_SHIFT;
        excProps[excPropsCount++]=newExcProps;
    } else {
        /* store the simple case mapping delta */
        value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
    }

    utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: unable to set case mapping values: %s\n",
                u_errorName(errorCode));
        return;
    }

    if(hasMapping) {
        /* update the case-sensitive set */
        caseSensitive.add(start);
        if(scf>=0) { caseSensitive.add(scf); }
        if(props.slc>=0) { caseSensitive.add(props.slc); }
        if(props.suc>=0) { caseSensitive.add(props.suc); }
        if(props.stc>=0) { caseSensitive.add(props.stc); }
        caseSensitive.addAll(props.cf);
        caseSensitive.addAll(props.lc);
        caseSensitive.addAll(props.uc);
        caseSensitive.addAll(props.tc);

        /* update maxFullLength */
        if(props.cf.length()>maxFullLength) { maxFullLength=props.cf.length(); }
        if(props.lc.length()>maxFullLength) { maxFullLength=props.lc.length(); }
        if(props.uc.length()>maxFullLength) { maxFullLength=props.uc.length(); }
        if(props.tc.length()>maxFullLength) { maxFullLength=props.tc.length(); }
    }

    /* add the multi-character case folding to the "unfold" data */
    if(props.cf.hasMoreChar32Than(0, 0x7fffffff, 1)) {
        addUnfolding(start, props.cf, errorCode);
    }
}
예제 #7
0
//  Build the Whole Script Confusable data
//
//     TODO:  Reorganize.  Either get rid of the WSConfusableDataBuilder class,
//                         because everything is local to this one build function anyhow,
//                           OR
//                         break this function into more reasonably sized pieces, with
//                         state in WSConfusableDataBuilder.
//
void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
          int32_t confusablesWSLen, UParseError *pe, UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    URegularExpression *parseRegexp = NULL;
    int32_t             inputLen    = 0;
    UChar              *input       = NULL;
    int32_t             lineNum     = 0;
    
    UVector            *scriptSets        = NULL;
    uint32_t            rtScriptSetsCount = 2;

    UTrie2             *anyCaseTrie   = NULL;
    UTrie2             *lowerCaseTrie = NULL;

    anyCaseTrie = utrie2_open(0, 0, &status);
    lowerCaseTrie = utrie2_open(0, 0, &status);
    

    // The scriptSets vector provides a mapping from TRIE values to the set of scripts.
    //
    // Reserved TRIE values:
    //   0:  Code point has no whole script confusables.
    //   1:  Code point is of script Common or Inherited.
    //       These code points do not participate in whole script confusable detection.
    //       (This is logically equivalent to saying that they contain confusables in
    //        all scripts)
    //
    // Because Trie values are indexes into the ScriptSets vector, pre-fill
    // vector positions 0 and 1 to avoid conflicts with the reserved values.
    
    scriptSets = new UVector(status);
    if (scriptSets == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto cleanup;
    }
    scriptSets->addElement((void *)NULL, status);
    scriptSets->addElement((void *)NULL, status);

    // Convert the user input data from UTF-8 to UChar (UTF-16)
    u_strFromUTF8(NULL, 0, &inputLen, confusablesWS, confusablesWSLen, &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        goto cleanup;
    }
    status = U_ZERO_ERROR;
    input = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
    if (input == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto cleanup;
    }
    u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status);



    parseRegexp = uregex_openC(parseExp, 0, NULL, &status);
    
    // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
    //   given the syntax of the input.
    if (*input == 0xfeff) {
        *input = 0x20;
    }

    // Parse the input, one line per iteration of this loop.
    uregex_setText(parseRegexp, input, inputLen, &status);
    while (uregex_findNext(parseRegexp, &status)) {
        lineNum++;
        UChar  line[200];
        uregex_group(parseRegexp, 0, line, 200, &status);
        if (uregex_start(parseRegexp, 1, &status) >= 0) {
            // this was a blank or comment line.
            continue;
        }
        if (uregex_start(parseRegexp, 8, &status) >= 0) {
            // input file syntax error.
            status = U_PARSE_ERROR;
            goto cleanup;
        }
        if (U_FAILURE(status)) {
            goto cleanup;
        }

        // Pick up the start and optional range end code points from the parsed line.
        UChar32  startCodePoint = SpoofImpl::ScanHex(
            input, uregex_start(parseRegexp, 2, &status), uregex_end(parseRegexp, 2, &status), status);
        UChar32  endCodePoint = startCodePoint;
        if (uregex_start(parseRegexp, 3, &status) >=0) {
            endCodePoint = SpoofImpl::ScanHex(
                input, uregex_start(parseRegexp, 3, &status), uregex_end(parseRegexp, 3, &status), status);
        }

        // Extract the two script names from the source line.  We need these in an 8 bit
        //   default encoding (will be EBCDIC on IBM mainframes) in order to pass them on
        //   to the ICU u_getPropertyValueEnum() function.  Ugh.
        char  srcScriptName[20];
        char  targScriptName[20];
        extractGroup(parseRegexp, 4, srcScriptName, sizeof(srcScriptName), status);
        extractGroup(parseRegexp, 5, targScriptName, sizeof(targScriptName), status);
        UScriptCode srcScript  =
            static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, srcScriptName));
        UScriptCode targScript =
            static_cast<UScriptCode>(u_getPropertyValueEnum(UCHAR_SCRIPT, targScriptName));
        if (U_FAILURE(status)) {
            goto cleanup;
        }
        if (srcScript == USCRIPT_INVALID_CODE || targScript == USCRIPT_INVALID_CODE) {
            status = U_INVALID_FORMAT_ERROR;
            goto cleanup;
        }

        // select the table - (A) any case or (L) lower case only
        UTrie2 *table = anyCaseTrie;
        if (uregex_start(parseRegexp, 7, &status) >= 0) {
            table = lowerCaseTrie;
        }

        // Build the set of scripts containing confusable characters for
        //   the code point(s) specified in this input line.
        // Sanity check that the script of the source code point is the same
        //   as the source script indicated in the input file.  Failure of this check is
        //   an error in the input file.
        // Include the source script in the set (needed for Mixed Script Confusable detection).
        //
        UChar32 cp;
        for (cp=startCodePoint; cp<=endCodePoint; cp++) {
            int32_t setIndex = utrie2_get32(table, cp);
            BuilderScriptSet *bsset = NULL;
            if (setIndex > 0) {
                U_ASSERT(setIndex < scriptSets->size());
                bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(setIndex));
            } else {
                bsset = new BuilderScriptSet();
                if (bsset == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    goto cleanup;
                }
                bsset->codePoint = cp;
                bsset->trie = table;
                bsset->sset = new ScriptSet();
                setIndex = scriptSets->size();
                bsset->index = setIndex;
                bsset->rindex = 0;
                if (bsset->sset == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    goto cleanup;
                }
                scriptSets->addElement(bsset, status);
                utrie2_set32(table, cp, setIndex, &status);
            }
            bsset->sset->Union(targScript);
            bsset->sset->Union(srcScript);

            if (U_FAILURE(status)) {
                goto cleanup;
            }
            UScriptCode cpScript = uscript_getScript(cp, &status);
            if (cpScript != srcScript) {
                status = U_INVALID_FORMAT_ERROR;
                goto cleanup;
            }
        }
    }

    // Eliminate duplicate script sets.  At this point we have a separate
    // script set for every code point that had data in the input file.
    //
    // We eliminate underlying ScriptSet objects, not the BuildScriptSets that wrap them
    //
    // printf("Number of scriptSets: %d\n", scriptSets->size());
    {
        int32_t duplicateCount = 0;
        rtScriptSetsCount = 2;
        for (int32_t outeri=2; outeri<scriptSets->size(); outeri++) {
            BuilderScriptSet *outerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(outeri));
            if (outerSet->index != static_cast<uint32_t>(outeri)) {
                // This set was already identified as a duplicate.
                //   It will not be allocated a position in the runtime array of ScriptSets.
                continue;
            }
            outerSet->rindex = rtScriptSetsCount++;
            for (int32_t inneri=outeri+1; inneri<scriptSets->size(); inneri++) {
                BuilderScriptSet *innerSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(inneri));
                if (*(outerSet->sset) == *(innerSet->sset) && outerSet->sset != innerSet->sset) {
                    delete innerSet->sset;
                    innerSet->scriptSetOwned = FALSE;
                    innerSet->sset = outerSet->sset;
                    innerSet->index = outeri;
                    innerSet->rindex = outerSet->rindex;
                    duplicateCount++;
                }
                // But this doesn't get all.  We need to fix the TRIE.
            }
        }
        // printf("Number of distinct script sets: %d\n", rtScriptSetsCount);
    }

    

    // Update the Trie values to be reflect the run time script indexes (after duplicate merging).
    //    (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets
    //     are unused, which is why the loop index starts at 2.)
    {
        for (int32_t i=2; i<scriptSets->size(); i++) {
            BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
            if (bSet->rindex != (uint32_t)i) {
                utrie2_set32(bSet->trie, bSet->codePoint, bSet->rindex, &status);
            }
        }
    }

    // For code points with script==Common or script==Inherited,
    //   Set the reserved value of 1 into both Tries.  These characters do not participate
    //   in Whole Script Confusable detection; this reserved value is the means
    //   by which they are detected.
    {
        UnicodeSet ignoreSet;
        ignoreSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
        UnicodeSet inheritedSet;
        inheritedSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
        ignoreSet.addAll(inheritedSet);
        for (int32_t rn=0; rn<ignoreSet.getRangeCount(); rn++) {
            UChar32 rangeStart = ignoreSet.getRangeStart(rn);
            UChar32 rangeEnd   = ignoreSet.getRangeEnd(rn);
            utrie2_setRange32(anyCaseTrie,   rangeStart, rangeEnd, 1, TRUE, &status);
            utrie2_setRange32(lowerCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status);
        }
    }

    // Serialize the data to the Spoof Detector
    {
        utrie2_freeze(anyCaseTrie,   UTRIE2_16_VALUE_BITS, &status);
        int32_t size = utrie2_serialize(anyCaseTrie, NULL, 0, &status);
        // printf("Any case Trie size: %d\n", size);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            goto cleanup;
        }
        status = U_ZERO_ERROR;
        spImpl->fSpoofData->fRawData->fAnyCaseTrie = spImpl->fSpoofData->fMemLimit;
        spImpl->fSpoofData->fRawData->fAnyCaseTrieLength = size;
        spImpl->fSpoofData->fAnyCaseTrie = anyCaseTrie;
        void *where = spImpl->fSpoofData->reserveSpace(size, status);
        utrie2_serialize(anyCaseTrie, where, size, &status);
        
        utrie2_freeze(lowerCaseTrie, UTRIE2_16_VALUE_BITS, &status);
        size = utrie2_serialize(lowerCaseTrie, NULL, 0, &status);
        // printf("Lower case Trie size: %d\n", size);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            goto cleanup;
        }
        status = U_ZERO_ERROR;
        spImpl->fSpoofData->fRawData->fLowerCaseTrie = spImpl->fSpoofData->fMemLimit;
        spImpl->fSpoofData->fRawData->fLowerCaseTrieLength = size;
        spImpl->fSpoofData->fLowerCaseTrie = lowerCaseTrie;
        where = spImpl->fSpoofData->reserveSpace(size, status);
        utrie2_serialize(lowerCaseTrie, where, size, &status);

        spImpl->fSpoofData->fRawData->fScriptSets = spImpl->fSpoofData->fMemLimit;
        spImpl->fSpoofData->fRawData->fScriptSetsLength = rtScriptSetsCount;
        ScriptSet *rtScriptSets =  static_cast<ScriptSet *>
            (spImpl->fSpoofData->reserveSpace(rtScriptSetsCount * sizeof(ScriptSet), status));
        uint32_t rindex = 2;
        for (int32_t i=2; i<scriptSets->size(); i++) {
            BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
            if (bSet->rindex < rindex) {
                // We have already copied this script set to the serialized data.
                continue;
            }
            U_ASSERT(rindex == bSet->rindex);
            rtScriptSets[rindex] = *bSet->sset;   // Assignment of a ScriptSet just copies the bits.
            rindex++;
        }
    }

    // Open new utrie2s from the serialized data.  We don't want to keep the ones
    //   we just built because we would then have two copies of the data, one internal to
    //   the utries that we have already constructed, and one in the serialized data area.
    //   An alternative would be to not pre-serialize the Trie data, but that makes the
    //   spoof detector data different, depending on how the detector was constructed.
    //   It's simpler to keep the data always the same.
    
    spImpl->fSpoofData->fAnyCaseTrie = utrie2_openFromSerialized(
            UTRIE2_16_VALUE_BITS,
            (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fAnyCaseTrie,
            spImpl->fSpoofData->fRawData->fAnyCaseTrieLength,
            NULL,
            &status);

    spImpl->fSpoofData->fLowerCaseTrie = utrie2_openFromSerialized(
            UTRIE2_16_VALUE_BITS,
            (const char *)spImpl->fSpoofData->fRawData + spImpl->fSpoofData->fRawData->fLowerCaseTrie,
            spImpl->fSpoofData->fRawData->fAnyCaseTrieLength,
            NULL,
            &status);

    

cleanup:
    if (U_FAILURE(status)) {
        pe->line = lineNum;
    }
    uregex_close(parseRegexp);
    uprv_free(input);

    int32_t i;
    for (i=0; i<scriptSets->size(); i++) {
        BuilderScriptSet *bsset = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
        delete bsset;
    }
    delete scriptSets;
    utrie2_close(anyCaseTrie);
    utrie2_close(lowerCaseTrie);
    return;
}