Exemplo n.º 1
0
static void
parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) {
    char *fields[5][2];
    int32_t i, j;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode);

    /* sort the special casing entries by code point */
    if(specialCasingCount>0) {
        uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
                       compareSpecialCasings, NULL, FALSE, pErrorCode);
    }
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /* replace multiple entries for any code point by one "complex" one */
    j=0;
    for(i=1; i<specialCasingCount; ++i) {
        if(specialCasings[i-1].code==specialCasings[i].code) {
            /* there is a duplicate code point */
            specialCasings[i-1].code=0x7fffffff;    /* remove this entry in the following sorting */
            specialCasings[i].isComplex=TRUE;       /* make the following one complex */
            specialCasings[i].lowerCase[0]=0;
            specialCasings[i].upperCase[0]=0;
            specialCasings[i].titleCase[0]=0;
            ++j;
        }
    }

    /* if some entries just were removed, then re-sort */
    if(j>0) {
        uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
                       compareSpecialCasings, NULL, FALSE, pErrorCode);
        specialCasingCount-=j;
    }
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /*
     * Add one complex mapping to caseSensitive that was filtered out above:
     * Greek final Sigma has a conditional mapping but not locale-sensitive,
     * and it is taken when lowercasing just U+03A3 alone.
     * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
     */
    uset_add(caseSensitive, 0x3c2);
}
Exemplo n.º 2
0
U_CDECL_END

U_CAPI void U_EXPORT2
ucm_sortTable(UCMTable *t) {
    UErrorCode errorCode;
    int32_t i;

    if(t->isSorted) {
        return;
    }

    errorCode=U_ZERO_ERROR;

    /* 1. sort by Unicode first */
    uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping),
                   compareMappingsUnicodeFirst, t,
                   FALSE, &errorCode);

    /* build the reverseMap */
    if(t->reverseMap==NULL) {
        /*
         * allocate mappingsCapacity instead of mappingsLength so that
         * if mappings are added, the reverseMap need not be
         * reallocated each time
         * (see ucm_moveMappings() and ucm_addMapping())
         */
        t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t));
        if(t->reverseMap==NULL) {
            fprintf(stderr, "ucm error: unable to allocate reverseMap\n");
            exit(U_MEMORY_ALLOCATION_ERROR);
        }
    }
    for(i=0; i<t->mappingsLength; ++i) {
        t->reverseMap[i]=i;
    }

    /* 2. sort reverseMap by mappings bytes first */
    uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t),
                   compareMappingsBytesFirst, t,
                   FALSE, &errorCode);

    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n",
                u_errorName(errorCode));
        exit(errorCode);
    }

    t->isSorted=TRUE;
}
Exemplo n.º 3
0
UBool
TimeArrayTimeZoneRule::initStartTimes(const UDate source[], int32_t size, UErrorCode& status) {
    // Free old array
    if (fStartTimes != NULL && fStartTimes != fLocalStartTimes) {
        uprv_free(fStartTimes);
    }
    // Allocate new one if needed
    if (size > TIMEARRAY_STACK_BUFFER_SIZE) {
        fStartTimes = (UDate*)uprv_malloc(sizeof(UDate)*size);
        if (fStartTimes == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            fNumStartTimes = 0;
            return FALSE;
        }
    } else {
        fStartTimes = (UDate*)fLocalStartTimes;
    }
    uprv_memcpy(fStartTimes, source, sizeof(UDate)*size);
    fNumStartTimes = size;
    // Sort dates
    uprv_sortArray(fStartTimes, fNumStartTimes, (int32_t)sizeof(UDate), compareDates, NULL, TRUE, &status);
    if (U_FAILURE(status)) {
        if (fStartTimes != NULL && fStartTimes != fLocalStartTimes) {
            uprv_free(fStartTimes);
        }
        fNumStartTimes = 0;
        return FALSE;
    }
    return TRUE;
}
Exemplo n.º 4
0
void
CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }

    UChar *p, *q;
    int32_t i, j, k;

    /* sort the data */
    int32_t unfoldLength=unfold.length();
    int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1;
    UChar *unfoldBuffer=unfold.getBuffer(-1);
    uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
                   compareUnfold, NULL, FALSE, &errorCode);

    /* make unique-string rows by merging adjacent ones' code point columns */

    /* make p point to row i-1 */
    p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH;

    for(i=1; i<unfoldRows;) {
        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
            /* concatenate code point columns */
            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
            }
            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
                fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
                errorCode=U_BUFFER_OVERFLOW_ERROR;
                return;
            }

            /* move following rows up one */
            --unfoldRows;
            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
        } else {
            p+=UGENCASE_UNFOLD_WIDTH;
            ++i;
        }
    }

    unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;

    if(beVerbose) {
        puts("unfold data:");

        p=unfoldBuffer;
        for(i=0; i<unfoldRows; ++i) {
            p+=UGENCASE_UNFOLD_WIDTH;
            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
                   (int)i, p[0], p[1], p[2], p[3], p[4]);
        }
    }

    unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH);
}
Exemplo n.º 5
0
void
Package::sortItems() {
    UErrorCode errorCode=U_ZERO_ERROR;
    uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
}
Exemplo n.º 6
0
void
UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    if(uchars!=NULL && ucharsLength>0) {
        // Already built.
        return;
    }
    if(ucharsLength==0) {
        if(elementsLength==0) {
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return;
        }
        if(strings.isBogus()) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
                      compareElementStrings, &strings,
                      FALSE,  // need not be a stable sort
                      &errorCode);
        if(U_FAILURE(errorCode)) {
            return;
        }
        // Duplicate strings are not allowed.
        UnicodeString prev=elements[0].getString(strings);
        for(int32_t i=1; i<elementsLength; ++i) {
            UnicodeString current=elements[i].getString(strings);
            if(prev==current) {
                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
                return;
            }
            prev.fastCopyFrom(current);
        }
    }
    // Create and UChar-serialize the trie for the elements.
    ucharsLength=0;
    int32_t capacity=strings.length();
    if(capacity<1024) {
        capacity=1024;
    }
    if(ucharsCapacity<capacity) {
        uprv_free(uchars);
        uchars=static_cast<UChar *>(uprv_malloc(capacity*2));
        if(uchars==NULL) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            ucharsCapacity=0;
            return;
        }
        ucharsCapacity=capacity;
    }
    StringTrieBuilder::build(buildOption, elementsLength, errorCode);
    if(uchars==NULL) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
}
Exemplo n.º 7
0
U_CDECL_END

U_CAPI void U_EXPORT2
ucm_optimizeStates(UCMStates *states,
                   uint16_t **pUnicodeCodeUnits,
                   _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
                   UBool verbose) {
    UErrorCode errorCode;
    int32_t state, cell, entry;

    /* test each state table entry */
    for(state=0; state<states->countStates; ++state) {
        for(cell=0; cell<256; ++cell) {
            entry=states->stateTable[state][cell];
            /*
             * if the entry is a final one with an MBCS_STATE_VALID_DIRECT_16 action code
             * and the code point is "unassigned" (0xfffe), then change it to
             * the "unassigned" action code with bits 26..23 set to zero and U+fffe.
             */
            if(MBCS_ENTRY_SET_STATE(entry, 0)==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) {
                states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED);
            }
        }
    }

    /* try to compact the toUnicode tables */
    if(states->maxCharLength==2) {
        compactToUnicode2(states, pUnicodeCodeUnits, toUFallbacks, countToUFallbacks, verbose);
    } else if(states->maxCharLength>2) {
        if(verbose) {
            compactToUnicodeHelper(states, *pUnicodeCodeUnits, toUFallbacks, countToUFallbacks);
        }
    }

    /* sort toUFallbacks */
    /*
     * It should be safe to sort them before compactToUnicode2() is called,
     * because it should not change the relative order of the offset values
     * that it adjusts, but they need to be sorted at some point, and
     * it is safest here.
     */
    if(countToUFallbacks>0) {
        errorCode=U_ZERO_ERROR; /* nothing bad will happen... */
        uprv_sortArray(toUFallbacks, countToUFallbacks,
                       sizeof(_MBCSToUFallback),
                       compareFallbacks, NULL, FALSE, &errorCode);
    }
}
Exemplo n.º 8
0
EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {

    // Build the enum => name map

    // This is a 1->n map.  Each enum maps to 1 or more names.  To
    // accomplish this the index entry points to an element of the
    // NAME_GROUP array.  This is the short name (which may be empty).
    // From there, subsequent elements of NAME_GROUP are alternate
    // names for this enum, up to and including the first one that is
    // negative (negate for actual index).

    int32_t i, j, k;
    int32_t count = list.count();
    
    EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
    for (i=0; i<count; ++i) {
        const Alias& p = list[i];
        enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
    }

    UErrorCode errorCode = U_ZERO_ERROR;
    uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
                   compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
    if (debug>1) {
        printf("Property enums: %d\n", (int)count);
        for (i=0; i<count; ++i) {
            printf("%d => %d: ",
                   (int)enumIndex[i].enumValue,
                   (int)enumIndex[i].nameGroupIndex);
            UBool done = FALSE;
            for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
                k = NAME_GROUP[j];
                if (k < 0) {
                    k = -k;
                    done = TRUE;
                }
                printf("\"%s\"", STRING_TABLE[k].str);
                if (!done) printf(", ");
            }
            printf("\n");
        }
        printf("\n");
    }
    return enumIndex;
}
Exemplo n.º 9
0
void
BiDiPropsBuilder::makeMirror(UErrorCode &errorCode) {
    /* sort the mirroring table by source code points */
    uprv_sortArray(mirrors, mirrorTop, 8,
                   compareMirror, NULL, FALSE, &errorCode);
    if(U_FAILURE(errorCode)) { return; }

    /*
     * reduce the 2-column table to a single column
     * by putting the index to the mirror entry into the source entry
     *
     * first:
     * find each mirror code point in the source column and set each other's indexes
     *
     * second:
     * reduce the table, combine the source code points with their indexes
     * and store as a simple array of uint32_t
     */
    for(int32_t i=0; i<mirrorTop; ++i) {
        uint32_t c=mirrors[i][1]; /* mirror code point */
        if(c>0x1fffff) {
            continue; /* this entry already has an index */
        }

        /* search for the mirror code point in the source column */
        int32_t start, limit, step;
        if(c<mirrors[i][0]) {
            /* search before i */
            start=i-1;
            limit=-1;
            step=-1;
        } else {
            start=i+1;
            limit=mirrorTop;
            step=1;
        }

        for(int32_t j=start;; j+=step) {
            if(j==limit) {
                fprintf(stderr,
                        "genprops error: bidi mirror does not roundtrip - %04lx->%04lx->?\n",
                        (long)mirrors[i][0], (long)mirrors[i][1]);
                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            }
            if(c==mirrors[j][0]) {
                /*
                 * found the mirror code point c in the source column,
                 * set both entries' indexes to each other
                 */
                if(UBIDI_GET_MIRROR_CODE_POINT(mirrors[i][0])!=UBIDI_GET_MIRROR_CODE_POINT(mirrors[j][1])) {
                    /* roundtrip check fails */
                    fprintf(stderr,
                            "genprops error: bidi mirrors do not roundtrip - %04lx->%04lx->%04lx\n",
                            (long)mirrors[i][0], (long)mirrors[i][1], (long)mirrors[j][1]);
                    errorCode=U_ILLEGAL_ARGUMENT_ERROR;
                } else {
                    mirrors[i][1]|=(uint32_t)j<<UBIDI_MIRROR_INDEX_SHIFT;
                    mirrors[j][1]|=(uint32_t)i<<UBIDI_MIRROR_INDEX_SHIFT;
                }
                break;
            }
        }
    }

    /* now the second step, the actual reduction of the table */
    uint32_t *reducedMirror=mirrors[0];
    for(int32_t i=0; i<mirrorTop; ++i) {
        reducedMirror[i]=mirrors[i][0]|(mirrors[i][1]&~0x1fffff);
    }
}
Exemplo n.º 10
0
U_CAPI int32_t U_EXPORT2
ucnv_swapAliases(const UDataSwapper *ds,
                 const void *inData, int32_t length, void *outData,
                 UErrorCode *pErrorCode) {
    const UDataInfo *pInfo;
    int32_t headerSize;

    const uint16_t *inTable;
    const uint32_t *inSectionSizes;
    uint32_t toc[offsetsCount];
    uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
    uint32_t i, count, tocLength, topOffset;

    TempRow rows[STACK_ROW_CAPACITY];
    uint16_t resort[STACK_ROW_CAPACITY];
    TempAliasTable tempTable;

    /* udata_swapDataHeader checks the arguments */
    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /* check data format and format version */
    pInfo=(const UDataInfo *)((const char *)inData+4);
    if(!(
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
        pInfo->dataFormat[1]==0x76 &&
        pInfo->dataFormat[2]==0x41 &&
        pInfo->dataFormat[3]==0x6c &&
        pInfo->formatVersion[0]==3
    )) {
        udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
                         pInfo->formatVersion[0]);
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return 0;
    }

    /* an alias table must contain at least the table of contents array */
    if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
        udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
                         length-headerSize);
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        return 0;
    }

    inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
    inTable=(const uint16_t *)inSectionSizes;
    uprv_memset(toc, 0, sizeof(toc));
    toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
    if(tocLength<minTocLength || offsetsCount<=tocLength) {
        udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
        *pErrorCode=U_INVALID_FORMAT_ERROR;
        return 0;
    }

    /* read the known part of the table of contents */
    for(i=converterListIndex; i<=tocLength; ++i) {
        toc[i]=ds->readUInt32(inSectionSizes[i]);
    }

    /* compute offsets */
    uprv_memset(offsets, 0, sizeof(offsets));
    offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
    for(i=tagListIndex; i<=tocLength; ++i) {
        offsets[i]=offsets[i-1]+toc[i-1];
    }

    /* compute the overall size of the after-header data, in numbers of 16-bit units */
    topOffset=offsets[i-1]+toc[i-1];

    if(length>=0) {
        uint16_t *outTable;
        const uint16_t *p, *p2;
        uint16_t *q, *q2;
        uint16_t oldIndex;

        if((length-headerSize)<(2*(int32_t)topOffset)) {
            udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
                             length-headerSize);
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }

        outTable=(uint16_t *)((char *)outData+headerSize);

        /* swap the entire table of contents */
        ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);

        /* swap unormalized strings & normalized strings */
        ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
                             outTable+offsets[stringTableIndex], pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
            return 0;
        }

        if(ds->inCharset==ds->outCharset) {
            /* no need to sort, just swap all 16-bit values together */
            ds->swapArray16(ds,
                            inTable+offsets[converterListIndex],
                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
                            outTable+offsets[converterListIndex],
                            pErrorCode);
        } else {
            /* allocate the temporary table for sorting */
            count=toc[aliasListIndex];

            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */

            if(count<=STACK_ROW_CAPACITY) {
                tempTable.rows=rows;
                tempTable.resort=resort;
            } else {
                tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
                if(tempTable.rows==NULL) {
                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
                                     count);
                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
                    return 0;
                }
                tempTable.resort=(uint16_t *)(tempTable.rows+count);
            }

            if(ds->outCharset==U_ASCII_FAMILY) {
                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
            } else /* U_EBCDIC_FAMILY */ {
                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
            }

            /*
             * Sort unique aliases+mapped names.
             *
             * We need to sort the list again by outCharset strings because they
             * sort differently for different charset families.
             * First we set up a temporary table with the string indexes and
             * sorting indexes and sort that.
             * Then we permutate and copy/swap the actual values.
             */
            p=inTable+offsets[aliasListIndex];
            q=outTable+offsets[aliasListIndex];

            p2=inTable+offsets[untaggedConvArrayIndex];
            q2=outTable+offsets[untaggedConvArrayIndex];

            for(i=0; i<count; ++i) {
                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
                tempTable.rows[i].sortIndex=(uint16_t)i;
            }

            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
                           io_compareRows, &tempTable,
                           FALSE, pErrorCode);

            if(U_SUCCESS(*pErrorCode)) {
                /* copy/swap/permutate items */
                if(p!=q) {
                    for(i=0; i<count; ++i) {
                        oldIndex=tempTable.rows[i].sortIndex;
                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
                    }
                } else {
                    /*
                     * If we swap in-place, then the permutation must use another
                     * temporary array (tempTable.resort)
                     * before the results are copied to the outBundle.
                     */
                    uint16_t *r=tempTable.resort;

                    for(i=0; i<count; ++i) {
                        oldIndex=tempTable.rows[i].sortIndex;
                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
                    }
                    uprv_memcpy(q, r, 2*count);

                    for(i=0; i<count; ++i) {
                        oldIndex=tempTable.rows[i].sortIndex;
                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
                    }
                    uprv_memcpy(q2, r, 2*count);
                }
            }

            if(tempTable.rows!=rows) {
                uprv_free(tempTable.rows);
            }

            if(U_FAILURE(*pErrorCode)) {
                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
                                 count);
                return 0;
            }

            /* swap remaining 16-bit values */
            ds->swapArray16(ds,
                            inTable+offsets[converterListIndex],
                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
                            outTable+offsets[converterListIndex],
                            pErrorCode);
            ds->swapArray16(ds,
                            inTable+offsets[taggedAliasArrayIndex],
                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
                            outTable+offsets[taggedAliasArrayIndex],
                            pErrorCode);
        }
    }

    return headerSize+2*(int32_t)topOffset;
}
Exemplo n.º 11
0
/**
 *  Stable sort with a user supplied comparator of type UComparator.
 */
void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) {
    if (U_SUCCESS(ec)) {
        uprv_sortArray(elements, count, sizeof(UElement),
                       compare, context, TRUE, &ec);
    }
}
Exemplo n.º 12
0
/**
 *  Sort with a user supplied comparator.
 *
 *    The comparator function handling is confusing because the function type
 *    for UVector  (as defined for sortedInsert()) is different from the signature
 *    required by uprv_sortArray().  This is handled by passing the
 *    the UVector sort function pointer via the context pointer to a
 *    sortArray() comparator function, which can then call back to
 *    the original user functtion.
 *
 *    An additional twist is that it's not safe to pass a pointer-to-function
 *    as  a (void *) data pointer, so instead we pass a (data) pointer to a
 *    pointer-to-function variable.
 */
void UVector::sort(UElementComparator *compare, UErrorCode &ec) {
    if (U_SUCCESS(ec)) {
        uprv_sortArray(elements, count, sizeof(UElement),
                       sortComparator, &compare, FALSE, &ec);
    }
}
Exemplo n.º 13
0
/**
  * Sort the vector, assuming it constains ints.
  *     (A more general sort would take a comparison function, but it's
  *     not clear whether UVector's UElementComparator or
  *     UComparator from uprv_sortAray would be more appropriate.)
  */
void UVector::sorti(UErrorCode &ec) {
    if (U_SUCCESS(ec)) {
        uprv_sortArray(elements, count, sizeof(UElement),
                       sortiComparator, NULL,  FALSE, &ec);
    }
}
Exemplo n.º 14
0
U_CDECL_END

int32_t
NameToEnum::swap(const UDataSwapper *ds,
                   const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
                   uint8_t *temp, int32_t pos,
                   UErrorCode *pErrorCode) {
    const NameToEnum *inMap;
    NameToEnum *outMap, *tempMap;

    const EnumValue *inEnumArray;
    EnumValue *outEnumArray;

    const Offset *inNameArray;
    Offset *outNameArray;

    NameAndIndex *sortArray;
    CompareContext cmp;

    int32_t i, size, oldIndex;

    tempMap=(NameToEnum *)(temp+pos);
    if(tempMap->count!=0) {
        /* this map was swapped already */
        size=tempMap->getSize();
        return size;
    }

    inMap=(const NameToEnum *)(inBytes+pos);
    outMap=(NameToEnum *)(outBytes+pos);

    tempMap->count=udata_readInt32(ds, inMap->count);
    size=tempMap->getSize();

    if(length>=0) {
        if(length<(pos+size)) {
            if(length<(int32_t)sizeof(PropertyAliases)) {
                udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n"
                                     "    for pnames.icu NameToEnum[%d] at %d\n",
                                 length, tempMap->count, pos);
                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                return 0;
            }
        }

        /* swap count */
        ds->swapArray32(ds, inMap, 4, outMap, pErrorCode);

        inEnumArray=inMap->getEnumArray();
        outEnumArray=outMap->getEnumArray();

        inNameArray=(const Offset *)(inEnumArray+tempMap->count);
        outNameArray=(Offset *)(outEnumArray+tempMap->count);

        if(ds->inCharset==ds->outCharset) {
            /* no need to sort, just swap the enum/name arrays */
            ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode);
            ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode);
            return size;
        }

        /*
         * The name and enum arrays are sorted by names and must be resorted
         * if inCharset!=outCharset.
         * We use the corresponding part of the temp array to sort an array
         * of pairs of name offsets and sorting indexes.
         * Then the sorting indexes are used to permutate-swap the name and enum arrays.
         *
         * The outBytes must already contain the swapped strings.
         */
        sortArray=(NameAndIndex *)tempMap->getEnumArray();
        for(i=0; i<tempMap->count; ++i) {
            sortArray[i].name=udata_readInt16(ds, inNameArray[i]);
            sortArray[i].index=(Offset)i;
        }

        /*
         * use a stable sort to avoid shuffling of equal strings,
         * which makes testing harder
         */
        cmp.chars=(const char *)outBytes;
        if (ds->outCharset==U_ASCII_FAMILY) {
            cmp.propCompare=uprv_compareASCIIPropertyNames;
        }
        else {
            cmp.propCompare=uprv_compareEBCDICPropertyNames;
        }
        uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
                       upname_compareRows, &cmp,
                       TRUE, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n",
                             tempMap->count);
            return 0;
        }

        /* copy/swap/permutate _enumArray[] and _nameArray[] */
        if(inEnumArray!=outEnumArray) {
            for(i=0; i<tempMap->count; ++i) {
                oldIndex=sortArray[i].index;
                ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode);
                ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode);
            }
        } else {
            /*
             * in-place swapping: need to permutate into a temporary array
             * and then copy back to not destroy the data
             */
            EnumValue *tempEnumArray;
            Offset *oldIndexes;

            /* write name offsets directly from sortArray */
            for(i=0; i<tempMap->count; ++i) {
                ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name);
            }

            /*
             * compress the oldIndexes into a separate array to make space for tempEnumArray
             * the tempMap _nameArray becomes oldIndexes[], getting the index
             *   values from the 2D sortArray[],
             * while sortArray=tempMap _enumArray[] becomes tempEnumArray[]
             * this saves us allocating more memory
             *
             * it works because sizeof(NameAndIndex)<=sizeof(EnumValue)
             * and because the nameArray[] can be used for oldIndexes[]
             */
            tempEnumArray=(EnumValue *)sortArray;
            oldIndexes=(Offset *)(sortArray+tempMap->count);

            /* copy sortArray[].index values into oldIndexes[] */
            for(i=0; i<tempMap->count; ++i) {
                oldIndexes[i]=sortArray[i].index;
            }

            /* permutate inEnumArray[] into tempEnumArray[] */
            for(i=0; i<tempMap->count; ++i) {
                ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode);
            }

            /* copy tempEnumArray[] to outEnumArray[] */
            uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4);
        }
    }

    return size;
}
Exemplo n.º 15
0
U_CDECL_END

U_CFUNC int32_t U_CALLCONV
udata_swapPackage(const UDataSwapper *ds,
                  const void *inData, int32_t length, void *outData,
                  UErrorCode *pErrorCode) {
    const UDataInfo *pInfo;
    int32_t headerSize;

    const uint8_t *inBytes;
    uint8_t *outBytes;

    uint32_t itemCount, offset, i;
    int32_t itemLength;

    const UDataOffsetTOCEntry *inEntries;
    UDataOffsetTOCEntry *outEntries;

    ToCEntry *table;

    char inPkgName[32], outPkgName[32];
    int32_t inPkgNameLength, outPkgNameLength;

    /* udata_swapDataHeader checks the arguments */
    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /* check data format and format version */
    pInfo=(const UDataInfo *)((const char *)inData+4);
    if(!(
                pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
                pInfo->dataFormat[1]==0x6d &&
                pInfo->dataFormat[2]==0x6e &&
                pInfo->dataFormat[3]==0x44 &&
                pInfo->formatVersion[0]==1
            )) {
        udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
                         pInfo->formatVersion[0]);
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return 0;
    }

    /*
     * We need to change the ToC name entries so that they have the correct
     * package name prefix.
     * Extract the package names from the in/out filenames.
     */
    inPkgNameLength=extractPackageName(
                        ds, inFilename,
                        inPkgName, (int32_t)sizeof(inPkgName),
                        pErrorCode);
    outPkgNameLength=extractPackageName(
                         ds, outFilename,
                         outPkgName, (int32_t)sizeof(outPkgName),
                         pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /*
     * It is possible to work with inPkgNameLength!=outPkgNameLength,
     * but then the length of the data file would change more significantly,
     * which we are not currently prepared for.
     */
    if(inPkgNameLength!=outPkgNameLength) {
        udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
                         inPkgName, outPkgName);
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    inBytes=(const uint8_t *)inData+headerSize;
    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);

    if(length<0) {
        /* preflighting */
        itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
        if(itemCount==0) {
            /* no items: count only the item count and return */
            return headerSize+4;
        }

        /* read the last item's offset and preflight it */
        offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
        itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);

        if(U_SUCCESS(*pErrorCode)) {
            return headerSize+offset+(uint32_t)itemLength;
        } else {
            return 0;
        }
    } else {
        /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
        length-=headerSize;
        if(length<4) {
            /* itemCount does not fit */
            offset=0xffffffff;
            itemCount=0; /* make compilers happy */
        } else {
            itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
            if(itemCount==0) {
                offset=4;
            } else if((uint32_t)length<(4+8*itemCount)) {
                /* ToC table does not fit */
                offset=0xffffffff;
            } else {
                /* offset of the last item plus at least 20 bytes for its header */
                offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
            }
        }
        if((uint32_t)length<offset) {
            udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n",
                             length);
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }

        outBytes=(uint8_t *)outData+headerSize;

        /* swap the item count */
        ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);

        if(itemCount==0) {
            /* no items: just return now */
            return headerSize+4;
        }

        /* swap the item name strings */
        offset=4+8*itemCount;
        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
        udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
            return 0;
        }
        /* keep offset and itemLength in case we allocate and copy the strings below */

        /* swap the package names into the output charset */
        if(ds->outCharset!=U_CHARSET_FAMILY) {
            UDataSwapper *ds2;
            ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
            ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
            ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
            udata_closeSwapper(ds2);
            if(U_FAILURE(*pErrorCode)) {
                udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
            }
        }

        /* change the prefix of each ToC entry name from the old to the new package name */
        {
            char *entryName;

            for(i=0; i<itemCount; ++i) {
                entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);

                if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
                    uprv_memcpy(entryName, outPkgName, inPkgNameLength);
                } else {
                    udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
                                     (long)i);
                    *pErrorCode=U_INVALID_FORMAT_ERROR;
                    return 0;
                }
            }
        }

        /*
         * Allocate the ToC table and, if necessary, a temporary buffer for
         * pseudo-in-place swapping.
         *
         * We cannot swap in-place because:
         *
         * 1. If the swapping of an item fails mid-way, then in-place swapping
         * has destroyed its data.
         * Out-of-place swapping allows us to then copy its original data.
         *
         * 2. If swapping changes the charset family, then we must resort
         * not only the ToC table but also the data items themselves.
         * This requires a permutation and is best done with separate in/out
         * buffers.
         *
         * We swapped the strings above to avoid the malloc below if string swapping fails.
         */
        if(inData==outData) {
            /* +15: prepare for extra padding of a newly-last item */
            table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15);
            if(table!=NULL) {
                outBytes=(uint8_t *)(table+itemCount);

                /* copy the item count and the swapped strings */
                uprv_memcpy(outBytes, inBytes, 4);
                uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
            }
        } else {
            table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
        }
        if(table==NULL) {
            udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
                             inData==outData ?
                             itemCount*sizeof(ToCEntry)+length+15 :
                             itemCount*sizeof(ToCEntry));
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        outEntries=(UDataOffsetTOCEntry *)(outBytes+4);

        /* read the ToC table */
        for(i=0; i<itemCount; ++i) {
            table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
            table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
            if(i>0) {
                table[i-1].length=table[i].inOffset-table[i-1].inOffset;
            }
        }
        table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;

        if(ds->inCharset==ds->outCharset) {
            /* no charset swapping, no resorting: keep item offsets the same */
            for(i=0; i<itemCount; ++i) {
                table[i].outOffset=table[i].inOffset;
            }
        } else {
            /* charset swapping: resort items by their swapped names */

            /*
             * Before the actual sorting, we need to make sure that each item
             * has a length that is a multiple of 16 bytes so that all items
             * are 16-aligned.
             * Only the old last item may be missing up to 15 padding bytes.
             * Add padding bytes for it.
             * Since the icuswap main() function has already allocated enough
             * input buffer space and set the last 15 bytes there to 0xaa,
             * we only need to increase the total data length and the length
             * of the last item here.
             */
            if((length&0xf)!=0) {
                int32_t delta=16-(length&0xf);
                length+=delta;
                table[itemCount-1].length+=(uint32_t)delta;
            }

            uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
                           compareToCEntries, outBytes, FALSE, pErrorCode);

            /*
             * Note: Before sorting, the inOffset values were in order.
             * Now the outOffset values are in order.
             */

            /* assign outOffset values */
            offset=table[0].inOffset;
            for(i=0; i<itemCount; ++i) {
                table[i].outOffset=offset;
                offset+=table[i].length;
            }
        }

        /* write the output ToC table */
        for(i=0; i<itemCount; ++i) {
            ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
            ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
        }

        /* swap each data item */
        for(i=0; i<itemCount; ++i) {
            /* first copy the item bytes to make sure that unreachable bytes are copied */
            uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);

            /* swap the item */
            udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
                       outBytes+table[i].outOffset, pErrorCode);

            if(U_FAILURE(*pErrorCode)) {
                if(ds->outCharset==U_CHARSET_FAMILY) {
                    udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
                                     "    at inOffset 0x%x length 0x%x - %s\n"
                                     "    the data item will be copied, not swapped\n\n",
                                     (char *)outBytes+table[i].nameOffset,
                                     table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
                } else {
                    udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
                                     "    at inOffset 0x%x length 0x%x - %s\n"
                                     "    the data item will be copied, not swapped\n\n",
                                     table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
                }
                /* reset the error code, copy the data item, and continue */
                *pErrorCode=U_ZERO_ERROR;
                uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
            }
        }

        if(inData==outData) {
            /* copy the data from the temporary buffer to the in-place buffer */
            uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
        }
        uprv_free(table);

        return headerSize+length;
    }
}
Exemplo n.º 16
0
static void
makeUnfoldData() {
    static const UChar
        iDot[2]=        { 0x69, 0x307 };

    UChar *p, *q;
    int32_t i, j, k;
    UErrorCode errorCode;

    /*
     * add a case folding that we missed because it's conditional:
     * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
     */
    addUnfolding(0x130, iDot, 2);

    /* sort the data */
    errorCode=U_ZERO_ERROR;
    uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
                   compareUnfold, NULL, FALSE, &errorCode);

    /* make unique-string rows by merging adjacent ones' code point columns */

    /* make p point to row i-1 */
    p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;

    for(i=1; i<unfoldRows;) {
        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
            /* concatenate code point columns */
            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
            }
            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
                fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
                exit(U_BUFFER_OVERFLOW_ERROR);
            }

            /* move following rows up one */
            --unfoldRows;
            unfoldTop-=UGENCASE_UNFOLD_WIDTH;
            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
        } else {
            p+=UGENCASE_UNFOLD_WIDTH;
            ++i;
        }
    }

    unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;

    if(beVerbose) {
        puts("unfold data:");

        p=(UChar *)unfold;
        for(i=0; i<unfoldRows; ++i) {
            p+=UGENCASE_UNFOLD_WIDTH;
            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
                   (int)i, p[0], p[1], p[2], p[3], p[4]);
        }
    }
}
Exemplo n.º 17
0
int genpname::MMain(int argc, char* argv[])
{
    int32_t i, j;
    UErrorCode status = U_ZERO_ERROR;

    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
        status = U_ZERO_ERROR;
    }


    /* preset then read command line options */
    options[3].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if (argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }

    debug = options[5].doesOccur ? (*options[5].value - '0') : 0;

    if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
       debug < 0 || debug > 9) {
        fprintf(stderr,
            "usage: %s [-options]\n"
            "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     turn on verbose output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
            argv[0]);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    useCopyright=options[2].doesOccur;
    verbose = options[4].doesOccur;

    // ------------------------------------------------------------
    // Do not sort the string table, instead keep it in data.h order.
    // This simplifies data swapping and testing thereof because the string
    // table itself need not be sorted during swapping.
    // The NameToEnum sorter sorts each such map's string offsets instead.

    if (debug>1) {
        printf("String pool: %d\n", (int)STRING_COUNT);
        for (i=0; i<STRING_COUNT; ++i) {
            if (i != 0) {
                printf(", ");
            }
            printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
        }
        printf("\n\n");
    }

    // ------------------------------------------------------------
    // Create top-level property indices

    PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
    int32_t propNameCount;
    NameToEnumEntry* propName = createNameIndex(props, propNameCount);
    EnumToNameGroupEntry* propEnum = createEnumIndex(props);

    // ------------------------------------------------------------
    // Create indices for the value list for each enumerated property

    // This will have more entries than we need...
    EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
    int32_t enumToValue_count = 0;
    for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
        if (PROPERTY[i].valueCount == 0) continue;
        AliasArrayList values(PROPERTY[i].valueList,
                              PROPERTY[i].valueCount);
        enumToValue[j].enumValue = PROPERTY[i].enumValue;
        enumToValue[j].enumToName = createEnumIndex(values);
        enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
        enumToValue[j].nameToEnum = createNameIndex(values,
                                                    enumToValue[j].nameToEnum_count);
        ++j;
    }
    enumToValue_count = j;

    uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
                   compareEnumToValueEntry, NULL, FALSE, &status);

    // ------------------------------------------------------------
    // Build PropertyAliases layout in memory

    Builder builder(debug);

    builder.buildTopLevelProperties(propName,
                                    propNameCount,
                                    propEnum,
                                    PROPERTY_COUNT);
    
    builder.buildValues(enumToValue,
                        enumToValue_count);

    builder.buildStringPool(STRING_TABLE,
                            STRING_COUNT,
                            NAME_GROUP,
                            NAME_GROUP_COUNT);

    builder.fixup();

    ////////////////////////////////////////////////////////////
    // Write the output file
    ////////////////////////////////////////////////////////////
    int32_t wlen = writeDataFile(options[3].value, builder);
    if (verbose) {
        fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
            U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
    }

    return 0; // success
}
Exemplo n.º 18
0
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
    uint32_t *row;
    int32_t i, columns, valueColumns, rows, count;
    UChar32 start, limit;

    /* argument checking */
    if(U_FAILURE(*pErrorCode)) {
        return;
    }
    if(handler==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    if(pv->isCompacted) {
        return;
    }

    /* Set the flag now: Sorting and compacting destroys the builder data structure. */
    pv->isCompacted=TRUE;

    rows=pv->rows;
    columns=pv->columns;
    valueColumns=columns-2; /* not counting start & limit */

    /* sort the properties vectors to find unique vector values */
    uprv_sortArray(pv->v, rows, columns*4,
                   upvec_compareRows, pv, FALSE, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /*
     * Find and set the special values.
     * This has to do almost the same work as the compaction below,
     * to find the indexes where the special-value rows will move.
     */
    row=pv->v;
    count=-valueColumns;
    for(i=0; i<rows; ++i) {
        start=(UChar32)row[0];

        /* count a new values vector if it is different from the current one */
        if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
            count+=valueColumns;
        }

        if(start>=UPVEC_FIRST_SPECIAL_CP) {
            handler(context, start, start, count, row+2, valueColumns, pErrorCode);
            if(U_FAILURE(*pErrorCode)) {
                return;
            }
        }

        row+=columns;
    }

    /* count is at the beginning of the last vector, add valueColumns to include that last vector */
    count+=valueColumns;

    /* Call the handler once more to signal the start of delivering real values. */
    handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
            count, row-valueColumns, valueColumns, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /*
     * Move vector contents up to a contiguous array with only unique
     * vector values, and call the handler function for each vector.
     *
     * This destroys the Properties Vector structure and replaces it
     * with an array of just vector values.
     */
    row=pv->v;
    count=-valueColumns;
    for(i=0; i<rows; ++i) {
        /* fetch these first before memmove() may overwrite them */
        start=(UChar32)row[0];
        limit=(UChar32)row[1];

        /* add a new values vector if it is different from the current one */
        if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
            count+=valueColumns;
            uprv_memmove(pv->v+count, row+2, valueColumns*4);
        }

        if(start<UPVEC_FIRST_SPECIAL_CP) {
            handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
            if(U_FAILURE(*pErrorCode)) {
                return;
            }
        }

        row+=columns;
    }

    /* count is at the beginning of the last vector, add one to include that last vector */
    pv->rows=count/valueColumns+1;
}
Exemplo n.º 19
0
static void
compress(UErrorCode &errorCode) {
    uint32_t i, letterCount;
    int16_t wordNumber;

    /* sort the words in reverse order by weight */
    uprv_sortArray(words, wordCount, sizeof(Word),
                   compareWords, NULL, FALSE, &errorCode);

    /* remove the words that do not save anything */
    while(wordCount>0 && words[wordCount-1].weight<1) {
        --wordCount;
    }

    /* count the letters in the token range */
    letterCount=0;
    for(i=LEADBYTE_LIMIT; i<256; ++i) {
        if(tokens[i]==-1) {
            ++letterCount;
        }
    }
    if(!beQuiet) {
        printf("number of letters used in the names: %d\n", (int)letterCount);
    }

    /* do we need double-byte tokens? */
    if(wordCount+letterCount<=256) {
        /* no, single-byte tokens are enough */
        leadByteCount=0;
        for(i=0, wordNumber=0; wordNumber<(int16_t)wordCount; ++i) {
            if(tokens[i]!=-1) {
                tokens[i]=wordNumber;
                if(beVerbose) {
                    printf("tokens[0x%03x]: word%8ld \"%.*s\"\n",
                            (int)i, (long)words[wordNumber].weight,
                            words[wordNumber].length, words[wordNumber].s);
                }
                ++wordNumber;
            }
        }
        tokenCount=i;
    } else {
        /*
         * The tokens that need two token bytes
         * get their weight reduced by their count
         * because they save less.
         */
        tokenCount=256-letterCount;
        for(i=tokenCount; i<wordCount; ++i) {
            words[i].weight-=words[i].count;
        }

        /* sort these words in reverse order by weight */
        errorCode=U_ZERO_ERROR;
        uprv_sortArray(words+tokenCount, wordCount-tokenCount, sizeof(Word),
                        compareWords, NULL, FALSE, &errorCode);

        /* remove the words that do not save anything */
        while(wordCount>0 && words[wordCount-1].weight<1) {
            --wordCount;
        }

        /* how many tokens and lead bytes do we have now? */
        tokenCount=wordCount+letterCount+(LEADBYTE_LIMIT-1);
        /*
         * adjust upwards to take into account that
         * double-byte tokens must not
         * use NAME_SEPARATOR_CHAR as a second byte
         */
        tokenCount+=(tokenCount-256+254)/255;

        leadByteCount=(int16_t)(tokenCount>>8);
        if(leadByteCount<LEADBYTE_LIMIT) {
            /* adjust for the real number of lead bytes */
            tokenCount-=(LEADBYTE_LIMIT-1)-leadByteCount;
        } else {
            /* limit the number of lead bytes */
            leadByteCount=LEADBYTE_LIMIT-1;
            tokenCount=LEADBYTE_LIMIT*256;
            wordCount=tokenCount-letterCount-(LEADBYTE_LIMIT-1);
            /* adjust again to skip double-byte tokens with ';' */
            wordCount-=(tokenCount-256+254)/255;
        }

        /* set token 0 to word 0 */
        tokens[0]=0;
        if(beVerbose) {
            printf("tokens[0x000]: word%8ld \"%.*s\"\n",
                    (long)words[0].weight,
                    words[0].length, words[0].s);
        }
        wordNumber=1;

        /* set the lead byte tokens */
        for(i=1; (int16_t)i<=leadByteCount; ++i) {
            tokens[i]=-2;
        }

        /* set the tokens */
        for(; i<256; ++i) {
            /* if store10Names then the parser set tokens[NAME_SEPARATOR_CHAR]=-1 */
            if(tokens[i]!=-1) {
                tokens[i]=wordNumber;
                if(beVerbose) {
                    printf("tokens[0x%03x]: word%8ld \"%.*s\"\n",
                            (int)i, (long)words[wordNumber].weight,
                            words[wordNumber].length, words[wordNumber].s);
                }
                ++wordNumber;
            }
        }

        /* continue above 255 where there are no letters */
        for(; (uint32_t)wordNumber<wordCount; ++i) {
            if((i&0xff)==NAME_SEPARATOR_CHAR) {
                tokens[i]=-1; /* do not use NAME_SEPARATOR_CHAR as a second token byte */
            } else {
                tokens[i]=wordNumber;
                if(beVerbose) {
                    printf("tokens[0x%03x]: word%8ld \"%.*s\"\n",
                            (int)i, (long)words[wordNumber].weight,
                            words[wordNumber].length, words[wordNumber].s);
                }
                ++wordNumber;
            }
        }
        tokenCount=i; /* should be already tokenCount={i or i+1} */
    }

    if(!beQuiet) {
        printf("number of lead bytes: %d\n", leadByteCount);
        printf("number of single-byte tokens: %lu\n",
            (unsigned long)256-letterCount-leadByteCount);
        printf("number of tokens: %lu\n", (unsigned long)tokenCount);
    }

    compressLines();
}
Exemplo n.º 20
0
NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
                                           int32_t& nameIndexCount) {

    // Build name => enum map

    // This is an n->1 map.  There are typically multiple names
    // mapping to one enum.  The name index is sorted in order of the name,
    // as defined by the uprv_compareAliasNames() function.

    int32_t i, j;
    int32_t count = list.count();
    
    // compute upper limit on number of names in the index
    int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
    NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);

    nameIndexCount = 0;
    int32_t names[MAX_NAMES_PER_GROUP];
    for (i=0; i<count; ++i) {
        const Alias& p = list[i];
        int32_t n = p.getUniqueNames(names);
        for (j=0; j<n; ++j) {
            U_ASSERT(nameIndexCount < nameIndexCapacity);
            nameIndex[nameIndexCount++] =
                NameToEnumEntry(names[j], p.enumValue);
        }
    }

    /*
     * use a stable sort to ensure consistent results between
     * genpname.cpp and the propname.cpp swapping code
     */
    UErrorCode errorCode = U_ZERO_ERROR;
    uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
                   compareNameToEnumEntry, NULL, TRUE, &errorCode);
    if (debug>1) {
        printf("Alias names: %d\n", (int)nameIndexCount);
        for (i=0; i<nameIndexCount; ++i) {
            printf("%s => %d\n",
                   STRING_TABLE[nameIndex[i].nameIndex].str,
                   (int)nameIndex[i].enumValue);
        }
        printf("\n");
    }
    // make sure there are no duplicates.  for a sorted list we need
    // only compare adjacent items.  Alias.getUniqueNames() has
    // already eliminated duplicate names for a single property, which
    // does occur, so we're checking for duplicate names between two
    // properties, which should never occur.
    UBool ok = TRUE;
    for (i=1; i<nameIndexCount; ++i) {
        if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
            STRING_TABLE[nameIndex[i].nameIndex]) {
            printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
                   STRING_TABLE[nameIndex[i-1].nameIndex].str,
                   STRING_TABLE[nameIndex[i].nameIndex].str);
            ok = FALSE;
        }
    }
    if (!ok) {
        die("Two or more duplicate names in property list");
    }

    return nameIndex;
}