static void parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) { char *fields[5][2]; int32_t i, j; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode); /* sort the special casing entries by code point */ if(specialCasingCount>0) { uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings, NULL, FALSE, pErrorCode); } if(U_FAILURE(*pErrorCode)) { return; } /* replace multiple entries for any code point by one "complex" one */ j=0; for(i=1; i<specialCasingCount; ++i) { if(specialCasings[i-1].code==specialCasings[i].code) { /* there is a duplicate code point */ specialCasings[i-1].code=0x7fffffff; /* remove this entry in the following sorting */ specialCasings[i].isComplex=TRUE; /* make the following one complex */ specialCasings[i].lowerCase[0]=0; specialCasings[i].upperCase[0]=0; specialCasings[i].titleCase[0]=0; ++j; } } /* if some entries just were removed, then re-sort */ if(j>0) { uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings, NULL, FALSE, pErrorCode); specialCasingCount-=j; } if(U_FAILURE(*pErrorCode)) { return; } /* * Add one complex mapping to caseSensitive that was filtered out above: * Greek final Sigma has a conditional mapping but not locale-sensitive, * and it is taken when lowercasing just U+03A3 alone. * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA */ uset_add(caseSensitive, 0x3c2); }
U_CDECL_END U_CAPI void U_EXPORT2 ucm_sortTable(UCMTable *t) { UErrorCode errorCode; int32_t i; if(t->isSorted) { return; } errorCode=U_ZERO_ERROR; /* 1. sort by Unicode first */ uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), compareMappingsUnicodeFirst, t, FALSE, &errorCode); /* build the reverseMap */ if(t->reverseMap==NULL) { /* * allocate mappingsCapacity instead of mappingsLength so that * if mappings are added, the reverseMap need not be * reallocated each time * (see ucm_moveMappings() and ucm_addMapping()) */ t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); if(t->reverseMap==NULL) { fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); exit(U_MEMORY_ALLOCATION_ERROR); } } for(i=0; i<t->mappingsLength; ++i) { t->reverseMap[i]=i; } /* 2. sort reverseMap by mappings bytes first */ uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), compareMappingsBytesFirst, t, FALSE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", u_errorName(errorCode)); exit(errorCode); } t->isSorted=TRUE; }
UBool TimeArrayTimeZoneRule::initStartTimes(const UDate source[], int32_t size, UErrorCode& status) { // Free old array if (fStartTimes != NULL && fStartTimes != fLocalStartTimes) { uprv_free(fStartTimes); } // Allocate new one if needed if (size > TIMEARRAY_STACK_BUFFER_SIZE) { fStartTimes = (UDate*)uprv_malloc(sizeof(UDate)*size); if (fStartTimes == NULL) { status = U_MEMORY_ALLOCATION_ERROR; fNumStartTimes = 0; return FALSE; } } else { fStartTimes = (UDate*)fLocalStartTimes; } uprv_memcpy(fStartTimes, source, sizeof(UDate)*size); fNumStartTimes = size; // Sort dates uprv_sortArray(fStartTimes, fNumStartTimes, (int32_t)sizeof(UDate), compareDates, NULL, TRUE, &status); if (U_FAILURE(status)) { if (fStartTimes != NULL && fStartTimes != fLocalStartTimes) { uprv_free(fStartTimes); } fNumStartTimes = 0; return FALSE; } return TRUE; }
void CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } UChar *p, *q; int32_t i, j, k; /* sort the data */ int32_t unfoldLength=unfold.length(); int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1; UChar *unfoldBuffer=unfold.getBuffer(-1); uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2, compareUnfold, NULL, FALSE, &errorCode); /* make unique-string rows by merging adjacent ones' code point columns */ /* make p point to row i-1 */ p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH; for(i=1; i<unfoldRows;) { if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) { /* concatenate code point columns */ q=p+UGENCASE_UNFOLD_STRING_WIDTH; for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {} for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) { q[j]=q[UGENCASE_UNFOLD_WIDTH+k]; } if(j>UGENCASE_UNFOLD_CP_WIDTH) { fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n", (long)j, UGENCASE_UNFOLD_CP_WIDTH); errorCode=U_BUFFER_OVERFLOW_ERROR; return; } /* move following rows up one */ --unfoldRows; u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH); } else { p+=UGENCASE_UNFOLD_WIDTH; ++i; } } unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows; if(beVerbose) { puts("unfold data:"); p=unfoldBuffer; for(i=0; i<unfoldRows; ++i) { p+=UGENCASE_UNFOLD_WIDTH; printf("[%2d] %04x %04x %04x <- %04x %04x\n", (int)i, p[0], p[1], p[2], p[3], p[4]); } } unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH); }
void Package::sortItems() { UErrorCode errorCode=U_ZERO_ERROR; uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); exit(errorCode); } }
void UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } if(uchars!=NULL && ucharsLength>0) { // Already built. return; } if(ucharsLength==0) { if(elementsLength==0) { errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } if(strings.isBogus()) { errorCode=U_MEMORY_ALLOCATION_ERROR; return; } uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement), compareElementStrings, &strings, FALSE, // need not be a stable sort &errorCode); if(U_FAILURE(errorCode)) { return; } // Duplicate strings are not allowed. UnicodeString prev=elements[0].getString(strings); for(int32_t i=1; i<elementsLength; ++i) { UnicodeString current=elements[i].getString(strings); if(prev==current) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } prev.fastCopyFrom(current); } } // Create and UChar-serialize the trie for the elements. ucharsLength=0; int32_t capacity=strings.length(); if(capacity<1024) { capacity=1024; } if(ucharsCapacity<capacity) { uprv_free(uchars); uchars=static_cast<UChar *>(uprv_malloc(capacity*2)); if(uchars==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; ucharsCapacity=0; return; } ucharsCapacity=capacity; } StringTrieBuilder::build(buildOption, elementsLength, errorCode); if(uchars==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; } }
U_CDECL_END U_CAPI void U_EXPORT2 ucm_optimizeStates(UCMStates *states, uint16_t **pUnicodeCodeUnits, _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, UBool verbose) { UErrorCode errorCode; int32_t state, cell, entry; /* test each state table entry */ for(state=0; state<states->countStates; ++state) { for(cell=0; cell<256; ++cell) { entry=states->stateTable[state][cell]; /* * if the entry is a final one with an MBCS_STATE_VALID_DIRECT_16 action code * and the code point is "unassigned" (0xfffe), then change it to * the "unassigned" action code with bits 26..23 set to zero and U+fffe. */ if(MBCS_ENTRY_SET_STATE(entry, 0)==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) { states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED); } } } /* try to compact the toUnicode tables */ if(states->maxCharLength==2) { compactToUnicode2(states, pUnicodeCodeUnits, toUFallbacks, countToUFallbacks, verbose); } else if(states->maxCharLength>2) { if(verbose) { compactToUnicodeHelper(states, *pUnicodeCodeUnits, toUFallbacks, countToUFallbacks); } } /* sort toUFallbacks */ /* * It should be safe to sort them before compactToUnicode2() is called, * because it should not change the relative order of the offset values * that it adjusts, but they need to be sorted at some point, and * it is safest here. */ if(countToUFallbacks>0) { errorCode=U_ZERO_ERROR; /* nothing bad will happen... */ uprv_sortArray(toUFallbacks, countToUFallbacks, sizeof(_MBCSToUFallback), compareFallbacks, NULL, FALSE, &errorCode); } }
EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) { // Build the enum => name map // This is a 1->n map. Each enum maps to 1 or more names. To // accomplish this the index entry points to an element of the // NAME_GROUP array. This is the short name (which may be empty). // From there, subsequent elements of NAME_GROUP are alternate // names for this enum, up to and including the first one that is // negative (negate for actual index). int32_t i, j, k; int32_t count = list.count(); EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count); for (i=0; i<count; ++i) { const Alias& p = list[i]; enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex); } UErrorCode errorCode = U_ZERO_ERROR; uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]), compareEnumToNameGroupEntry, NULL, FALSE, &errorCode); if (debug>1) { printf("Property enums: %d\n", (int)count); for (i=0; i<count; ++i) { printf("%d => %d: ", (int)enumIndex[i].enumValue, (int)enumIndex[i].nameGroupIndex); UBool done = FALSE; for (j=enumIndex[i].nameGroupIndex; !done; ++j) { k = NAME_GROUP[j]; if (k < 0) { k = -k; done = TRUE; } printf("\"%s\"", STRING_TABLE[k].str); if (!done) printf(", "); } printf("\n"); } printf("\n"); } return enumIndex; }
void BiDiPropsBuilder::makeMirror(UErrorCode &errorCode) { /* sort the mirroring table by source code points */ uprv_sortArray(mirrors, mirrorTop, 8, compareMirror, NULL, FALSE, &errorCode); if(U_FAILURE(errorCode)) { return; } /* * reduce the 2-column table to a single column * by putting the index to the mirror entry into the source entry * * first: * find each mirror code point in the source column and set each other's indexes * * second: * reduce the table, combine the source code points with their indexes * and store as a simple array of uint32_t */ for(int32_t i=0; i<mirrorTop; ++i) { uint32_t c=mirrors[i][1]; /* mirror code point */ if(c>0x1fffff) { continue; /* this entry already has an index */ } /* search for the mirror code point in the source column */ int32_t start, limit, step; if(c<mirrors[i][0]) { /* search before i */ start=i-1; limit=-1; step=-1; } else { start=i+1; limit=mirrorTop; step=1; } for(int32_t j=start;; j+=step) { if(j==limit) { fprintf(stderr, "genprops error: bidi mirror does not roundtrip - %04lx->%04lx->?\n", (long)mirrors[i][0], (long)mirrors[i][1]); errorCode=U_ILLEGAL_ARGUMENT_ERROR; } if(c==mirrors[j][0]) { /* * found the mirror code point c in the source column, * set both entries' indexes to each other */ if(UBIDI_GET_MIRROR_CODE_POINT(mirrors[i][0])!=UBIDI_GET_MIRROR_CODE_POINT(mirrors[j][1])) { /* roundtrip check fails */ fprintf(stderr, "genprops error: bidi mirrors do not roundtrip - %04lx->%04lx->%04lx\n", (long)mirrors[i][0], (long)mirrors[i][1], (long)mirrors[j][1]); errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { mirrors[i][1]|=(uint32_t)j<<UBIDI_MIRROR_INDEX_SHIFT; mirrors[j][1]|=(uint32_t)i<<UBIDI_MIRROR_INDEX_SHIFT; } break; } } } /* now the second step, the actual reduction of the table */ uint32_t *reducedMirror=mirrors[0]; for(int32_t i=0; i<mirrorTop; ++i) { reducedMirror[i]=mirrors[i][0]|(mirrors[i][1]&~0x1fffff); } }
U_CAPI int32_t U_EXPORT2 ucnv_swapAliases(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint16_t *inTable; const uint32_t *inSectionSizes; uint32_t toc[offsetsCount]; uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ uint32_t i, count, tocLength, topOffset; TempRow rows[STACK_ROW_CAPACITY]; uint16_t resort[STACK_ROW_CAPACITY]; TempAliasTable tempTable; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ pInfo->dataFormat[1]==0x76 && pInfo->dataFormat[2]==0x41 && pInfo->dataFormat[3]==0x6c && pInfo->formatVersion[0]==3 )) { udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* an alias table must contain at least the table of contents array */ if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); inTable=(const uint16_t *)inSectionSizes; uprv_memset(toc, 0, sizeof(toc)); toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); if(tocLength<minTocLength || offsetsCount<=tocLength) { udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } /* read the known part of the table of contents */ for(i=converterListIndex; i<=tocLength; ++i) { toc[i]=ds->readUInt32(inSectionSizes[i]); } /* compute offsets */ uprv_memset(offsets, 0, sizeof(offsets)); offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ for(i=tagListIndex; i<=tocLength; ++i) { offsets[i]=offsets[i-1]+toc[i-1]; } /* compute the overall size of the after-header data, in numbers of 16-bit units */ topOffset=offsets[i-1]+toc[i-1]; if(length>=0) { uint16_t *outTable; const uint16_t *p, *p2; uint16_t *q, *q2; uint16_t oldIndex; if((length-headerSize)<(2*(int32_t)topOffset)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTable=(uint16_t *)((char *)outData+headerSize); /* swap the entire table of contents */ ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); /* swap unormalized strings & normalized strings */ ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), outTable+offsets[stringTableIndex], pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); return 0; } if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap all 16-bit values together */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); } else { /* allocate the temporary table for sorting */ count=toc[aliasListIndex]; tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ if(count<=STACK_ROW_CAPACITY) { tempTable.rows=rows; tempTable.resort=resort; } else { tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); if(tempTable.rows==NULL) { udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", count); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } tempTable.resort=(uint16_t *)(tempTable.rows+count); } if(ds->outCharset==U_ASCII_FAMILY) { tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; } else /* U_EBCDIC_FAMILY */ { tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; } /* * Sort unique aliases+mapped names. * * We need to sort the list again by outCharset strings because they * sort differently for different charset families. * First we set up a temporary table with the string indexes and * sorting indexes and sort that. * Then we permutate and copy/swap the actual values. */ p=inTable+offsets[aliasListIndex]; q=outTable+offsets[aliasListIndex]; p2=inTable+offsets[untaggedConvArrayIndex]; q2=outTable+offsets[untaggedConvArrayIndex]; for(i=0; i<count; ++i) { tempTable.rows[i].strIndex=ds->readUInt16(p[i]); tempTable.rows[i].sortIndex=(uint16_t)i; } uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), io_compareRows, &tempTable, FALSE, pErrorCode); if(U_SUCCESS(*pErrorCode)) { /* copy/swap/permutate items */ if(p!=q) { for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); } } else { /* * If we swap in-place, then the permutation must use another * temporary array (tempTable.resort) * before the results are copied to the outBundle. */ uint16_t *r=tempTable.resort; for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q, r, 2*count); for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q2, r, 2*count); } } if(tempTable.rows!=rows) { uprv_free(tempTable.rows); } if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", count); return 0; } /* swap remaining 16-bit values */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); ds->swapArray16(ds, inTable+offsets[taggedAliasArrayIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), outTable+offsets[taggedAliasArrayIndex], pErrorCode); } } return headerSize+2*(int32_t)topOffset; }
/** * Stable sort with a user supplied comparator of type UComparator. */ void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) { if (U_SUCCESS(ec)) { uprv_sortArray(elements, count, sizeof(UElement), compare, context, TRUE, &ec); } }
/** * Sort with a user supplied comparator. * * The comparator function handling is confusing because the function type * for UVector (as defined for sortedInsert()) is different from the signature * required by uprv_sortArray(). This is handled by passing the * the UVector sort function pointer via the context pointer to a * sortArray() comparator function, which can then call back to * the original user functtion. * * An additional twist is that it's not safe to pass a pointer-to-function * as a (void *) data pointer, so instead we pass a (data) pointer to a * pointer-to-function variable. */ void UVector::sort(UElementComparator *compare, UErrorCode &ec) { if (U_SUCCESS(ec)) { uprv_sortArray(elements, count, sizeof(UElement), sortComparator, &compare, FALSE, &ec); } }
/** * Sort the vector, assuming it constains ints. * (A more general sort would take a comparison function, but it's * not clear whether UVector's UElementComparator or * UComparator from uprv_sortAray would be more appropriate.) */ void UVector::sorti(UErrorCode &ec) { if (U_SUCCESS(ec)) { uprv_sortArray(elements, count, sizeof(UElement), sortiComparator, NULL, FALSE, &ec); } }
U_CDECL_END int32_t NameToEnum::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NameToEnum *inMap; NameToEnum *outMap, *tempMap; const EnumValue *inEnumArray; EnumValue *outEnumArray; const Offset *inNameArray; Offset *outNameArray; NameAndIndex *sortArray; CompareContext cmp; int32_t i, size, oldIndex; tempMap=(NameToEnum *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NameToEnum *)(inBytes+pos); outMap=(NameToEnum *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n" " for pnames.icu NameToEnum[%d] at %d\n", length, tempMap->count, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count */ ds->swapArray32(ds, inMap, 4, outMap, pErrorCode); inEnumArray=inMap->getEnumArray(); outEnumArray=outMap->getEnumArray(); inNameArray=(const Offset *)(inEnumArray+tempMap->count); outNameArray=(Offset *)(outEnumArray+tempMap->count); if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap the enum/name arrays */ ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode); ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode); return size; } /* * The name and enum arrays are sorted by names and must be resorted * if inCharset!=outCharset. * We use the corresponding part of the temp array to sort an array * of pairs of name offsets and sorting indexes. * Then the sorting indexes are used to permutate-swap the name and enum arrays. * * The outBytes must already contain the swapped strings. */ sortArray=(NameAndIndex *)tempMap->getEnumArray(); for(i=0; i<tempMap->count; ++i) { sortArray[i].name=udata_readInt16(ds, inNameArray[i]); sortArray[i].index=(Offset)i; } /* * use a stable sort to avoid shuffling of equal strings, * which makes testing harder */ cmp.chars=(const char *)outBytes; if (ds->outCharset==U_ASCII_FAMILY) { cmp.propCompare=uprv_compareASCIIPropertyNames; } else { cmp.propCompare=uprv_compareEBCDICPropertyNames; } uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex), upname_compareRows, &cmp, TRUE, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n", tempMap->count); return 0; } /* copy/swap/permutate _enumArray[] and _nameArray[] */ if(inEnumArray!=outEnumArray) { for(i=0; i<tempMap->count; ++i) { oldIndex=sortArray[i].index; ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode); ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode); } } else { /* * in-place swapping: need to permutate into a temporary array * and then copy back to not destroy the data */ EnumValue *tempEnumArray; Offset *oldIndexes; /* write name offsets directly from sortArray */ for(i=0; i<tempMap->count; ++i) { ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name); } /* * compress the oldIndexes into a separate array to make space for tempEnumArray * the tempMap _nameArray becomes oldIndexes[], getting the index * values from the 2D sortArray[], * while sortArray=tempMap _enumArray[] becomes tempEnumArray[] * this saves us allocating more memory * * it works because sizeof(NameAndIndex)<=sizeof(EnumValue) * and because the nameArray[] can be used for oldIndexes[] */ tempEnumArray=(EnumValue *)sortArray; oldIndexes=(Offset *)(sortArray+tempMap->count); /* copy sortArray[].index values into oldIndexes[] */ for(i=0; i<tempMap->count; ++i) { oldIndexes[i]=sortArray[i].index; } /* permutate inEnumArray[] into tempEnumArray[] */ for(i=0; i<tempMap->count; ++i) { ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode); } /* copy tempEnumArray[] to outEnumArray[] */ uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4); } } return size; }
U_CDECL_END U_CFUNC int32_t U_CALLCONV udata_swapPackage(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; uint32_t itemCount, offset, i; int32_t itemLength; const UDataOffsetTOCEntry *inEntries; UDataOffsetTOCEntry *outEntries; ToCEntry *table; char inPkgName[32], outPkgName[32]; int32_t inPkgNameLength, outPkgNameLength; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* * We need to change the ToC name entries so that they have the correct * package name prefix. * Extract the package names from the in/out filenames. */ inPkgNameLength=extractPackageName( ds, inFilename, inPkgName, (int32_t)sizeof(inPkgName), pErrorCode); outPkgNameLength=extractPackageName( ds, outFilename, outPkgName, (int32_t)sizeof(outPkgName), pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } /* * It is possible to work with inPkgNameLength!=outPkgNameLength, * but then the length of the data file would change more significantly, * which we are not currently prepared for. */ if(inPkgNameLength!=outPkgNameLength) { udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", inPkgName, outPkgName); *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); if(length<0) { /* preflighting */ itemCount=ds->readUInt32(*(const uint32_t *)inBytes); if(itemCount==0) { /* no items: count only the item count and return */ return headerSize+4; } /* read the last item's offset and preflight it */ offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); if(U_SUCCESS(*pErrorCode)) { return headerSize+offset+(uint32_t)itemLength; } else { return 0; } } else { /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerSize; if(length<4) { /* itemCount does not fit */ offset=0xffffffff; itemCount=0; /* make compilers happy */ } else { itemCount=ds->readUInt32(*(const uint32_t *)inBytes); if(itemCount==0) { offset=4; } else if((uint32_t)length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0xffffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if((uint32_t)length<offset) { udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outBytes=(uint8_t *)outData+headerSize; /* swap the item count */ ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); if(itemCount==0) { /* no items: just return now */ return headerSize+4; } /* swap the item name strings */ offset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); return 0; } /* keep offset and itemLength in case we allocate and copy the strings below */ /* swap the package names into the output charset */ if(ds->outCharset!=U_CHARSET_FAMILY) { UDataSwapper *ds2; ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); udata_closeSwapper(ds2); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); } } /* change the prefix of each ToC entry name from the old to the new package name */ { char *entryName; for(i=0; i<itemCount; ++i) { entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { uprv_memcpy(entryName, outPkgName, inPkgNameLength); } else { udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", (long)i); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } } } /* * Allocate the ToC table and, if necessary, a temporary buffer for * pseudo-in-place swapping. * * We cannot swap in-place because: * * 1. If the swapping of an item fails mid-way, then in-place swapping * has destroyed its data. * Out-of-place swapping allows us to then copy its original data. * * 2. If swapping changes the charset family, then we must resort * not only the ToC table but also the data items themselves. * This requires a permutation and is best done with separate in/out * buffers. * * We swapped the strings above to avoid the malloc below if string swapping fails. */ if(inData==outData) { /* +15: prepare for extra padding of a newly-last item */ table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15); if(table!=NULL) { outBytes=(uint8_t *)(table+itemCount); /* copy the item count and the swapped strings */ uprv_memcpy(outBytes, inBytes, 4); uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); } } else { table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); } if(table==NULL) { udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", inData==outData ? itemCount*sizeof(ToCEntry)+length+15 : itemCount*sizeof(ToCEntry)); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } outEntries=(UDataOffsetTOCEntry *)(outBytes+4); /* read the ToC table */ for(i=0; i<itemCount; ++i) { table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); if(i>0) { table[i-1].length=table[i].inOffset-table[i-1].inOffset; } } table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; if(ds->inCharset==ds->outCharset) { /* no charset swapping, no resorting: keep item offsets the same */ for(i=0; i<itemCount; ++i) { table[i].outOffset=table[i].inOffset; } } else { /* charset swapping: resort items by their swapped names */ /* * Before the actual sorting, we need to make sure that each item * has a length that is a multiple of 16 bytes so that all items * are 16-aligned. * Only the old last item may be missing up to 15 padding bytes. * Add padding bytes for it. * Since the icuswap main() function has already allocated enough * input buffer space and set the last 15 bytes there to 0xaa, * we only need to increase the total data length and the length * of the last item here. */ if((length&0xf)!=0) { int32_t delta=16-(length&0xf); length+=delta; table[itemCount-1].length+=(uint32_t)delta; } uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), compareToCEntries, outBytes, FALSE, pErrorCode); /* * Note: Before sorting, the inOffset values were in order. * Now the outOffset values are in order. */ /* assign outOffset values */ offset=table[0].inOffset; for(i=0; i<itemCount; ++i) { table[i].outOffset=offset; offset+=table[i].length; } } /* write the output ToC table */ for(i=0; i<itemCount; ++i) { ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); } /* swap each data item */ for(i=0; i<itemCount; ++i) { /* first copy the item bytes to make sure that unreachable bytes are copied */ uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); /* swap the item */ udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, outBytes+table[i].outOffset, pErrorCode); if(U_FAILURE(*pErrorCode)) { if(ds->outCharset==U_CHARSET_FAMILY) { udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" " at inOffset 0x%x length 0x%x - %s\n" " the data item will be copied, not swapped\n\n", (char *)outBytes+table[i].nameOffset, table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); } else { udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" " at inOffset 0x%x length 0x%x - %s\n" " the data item will be copied, not swapped\n\n", table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); } /* reset the error code, copy the data item, and continue */ *pErrorCode=U_ZERO_ERROR; uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); } } if(inData==outData) { /* copy the data from the temporary buffer to the in-place buffer */ uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); } uprv_free(table); return headerSize+length; } }
static void makeUnfoldData() { static const UChar iDot[2]= { 0x69, 0x307 }; UChar *p, *q; int32_t i, j, k; UErrorCode errorCode; /* * add a case folding that we missed because it's conditional: * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ addUnfolding(0x130, iDot, 2); /* sort the data */ errorCode=U_ZERO_ERROR; uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2, compareUnfold, NULL, FALSE, &errorCode); /* make unique-string rows by merging adjacent ones' code point columns */ /* make p point to row i-1 */ p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH; for(i=1; i<unfoldRows;) { if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) { /* concatenate code point columns */ q=p+UGENCASE_UNFOLD_STRING_WIDTH; for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {} for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) { q[j]=q[UGENCASE_UNFOLD_WIDTH+k]; } if(j>UGENCASE_UNFOLD_CP_WIDTH) { fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n", (long)j, UGENCASE_UNFOLD_CP_WIDTH); exit(U_BUFFER_OVERFLOW_ERROR); } /* move following rows up one */ --unfoldRows; unfoldTop-=UGENCASE_UNFOLD_WIDTH; u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH); } else { p+=UGENCASE_UNFOLD_WIDTH; ++i; } } unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows; if(beVerbose) { puts("unfold data:"); p=(UChar *)unfold; for(i=0; i<unfoldRows; ++i) { p+=UGENCASE_UNFOLD_WIDTH; printf("[%2d] %04x %04x %04x <- %04x %04x\n", (int)i, p[0], p[1], p[2], p[3], p[4]); } } }
int genpname::MMain(int argc, char* argv[]) { int32_t i, j; UErrorCode status = U_ZERO_ERROR; u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status)); status = U_ZERO_ERROR; } /* preset then read command line options */ options[3].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if (argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } debug = options[5].doesOccur ? (*options[5].value - '0') : 0; if (argc!=1 || options[0].doesOccur || options[1].doesOccur || debug < 0 || debug > 9) { fprintf(stderr, "usage: %s [-options]\n" "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n" "options:\n" "\t-h or -? or --help this usage text\n" "\t-v or --verbose turn on verbose output\n" "\t-c or --copyright include a copyright notice\n" "\t-d or --destdir destination directory, followed by the path\n" "\t-D or --debug 0..9 emit debugging messages (if > 0)\n", argv[0]); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } /* get the options values */ useCopyright=options[2].doesOccur; verbose = options[4].doesOccur; // ------------------------------------------------------------ // Do not sort the string table, instead keep it in data.h order. // This simplifies data swapping and testing thereof because the string // table itself need not be sorted during swapping. // The NameToEnum sorter sorts each such map's string offsets instead. if (debug>1) { printf("String pool: %d\n", (int)STRING_COUNT); for (i=0; i<STRING_COUNT; ++i) { if (i != 0) { printf(", "); } printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index); } printf("\n\n"); } // ------------------------------------------------------------ // Create top-level property indices PropertyArrayList props(PROPERTY, PROPERTY_COUNT); int32_t propNameCount; NameToEnumEntry* propName = createNameIndex(props, propNameCount); EnumToNameGroupEntry* propEnum = createEnumIndex(props); // ------------------------------------------------------------ // Create indices for the value list for each enumerated property // This will have more entries than we need... EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT); int32_t enumToValue_count = 0; for (i=0, j=0; i<PROPERTY_COUNT; ++i) { if (PROPERTY[i].valueCount == 0) continue; AliasArrayList values(PROPERTY[i].valueList, PROPERTY[i].valueCount); enumToValue[j].enumValue = PROPERTY[i].enumValue; enumToValue[j].enumToName = createEnumIndex(values); enumToValue[j].enumToName_count = PROPERTY[i].valueCount; enumToValue[j].nameToEnum = createNameIndex(values, enumToValue[j].nameToEnum_count); ++j; } enumToValue_count = j; uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]), compareEnumToValueEntry, NULL, FALSE, &status); // ------------------------------------------------------------ // Build PropertyAliases layout in memory Builder builder(debug); builder.buildTopLevelProperties(propName, propNameCount, propEnum, PROPERTY_COUNT); builder.buildValues(enumToValue, enumToValue_count); builder.buildStringPool(STRING_TABLE, STRING_COUNT, NAME_GROUP, NAME_GROUP_COUNT); builder.fixup(); //////////////////////////////////////////////////////////// // Write the output file //////////////////////////////////////////////////////////// int32_t wlen = writeDataFile(options[3].value, builder); if (verbose) { fprintf(stdout, "Output file: %s.%s, %ld bytes\n", U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen); } return 0; // success }
U_CAPI void U_EXPORT2 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { uint32_t *row; int32_t i, columns, valueColumns, rows, count; UChar32 start, limit; /* argument checking */ if(U_FAILURE(*pErrorCode)) { return; } if(handler==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } if(pv->isCompacted) { return; } /* Set the flag now: Sorting and compacting destroys the builder data structure. */ pv->isCompacted=TRUE; rows=pv->rows; columns=pv->columns; valueColumns=columns-2; /* not counting start & limit */ /* sort the properties vectors to find unique vector values */ uprv_sortArray(pv->v, rows, columns*4, upvec_compareRows, pv, FALSE, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } /* * Find and set the special values. * This has to do almost the same work as the compaction below, * to find the indexes where the special-value rows will move. */ row=pv->v; count=-valueColumns; for(i=0; i<rows; ++i) { start=(UChar32)row[0]; /* count a new values vector if it is different from the current one */ if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { count+=valueColumns; } if(start>=UPVEC_FIRST_SPECIAL_CP) { handler(context, start, start, count, row+2, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } } row+=columns; } /* count is at the beginning of the last vector, add valueColumns to include that last vector */ count+=valueColumns; /* Call the handler once more to signal the start of delivering real values. */ handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, count, row-valueColumns, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } /* * Move vector contents up to a contiguous array with only unique * vector values, and call the handler function for each vector. * * This destroys the Properties Vector structure and replaces it * with an array of just vector values. */ row=pv->v; count=-valueColumns; for(i=0; i<rows; ++i) { /* fetch these first before memmove() may overwrite them */ start=(UChar32)row[0]; limit=(UChar32)row[1]; /* add a new values vector if it is different from the current one */ if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { count+=valueColumns; uprv_memmove(pv->v+count, row+2, valueColumns*4); } if(start<UPVEC_FIRST_SPECIAL_CP) { handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } } row+=columns; } /* count is at the beginning of the last vector, add one to include that last vector */ pv->rows=count/valueColumns+1; }
static void compress(UErrorCode &errorCode) { uint32_t i, letterCount; int16_t wordNumber; /* sort the words in reverse order by weight */ uprv_sortArray(words, wordCount, sizeof(Word), compareWords, NULL, FALSE, &errorCode); /* remove the words that do not save anything */ while(wordCount>0 && words[wordCount-1].weight<1) { --wordCount; } /* count the letters in the token range */ letterCount=0; for(i=LEADBYTE_LIMIT; i<256; ++i) { if(tokens[i]==-1) { ++letterCount; } } if(!beQuiet) { printf("number of letters used in the names: %d\n", (int)letterCount); } /* do we need double-byte tokens? */ if(wordCount+letterCount<=256) { /* no, single-byte tokens are enough */ leadByteCount=0; for(i=0, wordNumber=0; wordNumber<(int16_t)wordCount; ++i) { if(tokens[i]!=-1) { tokens[i]=wordNumber; if(beVerbose) { printf("tokens[0x%03x]: word%8ld \"%.*s\"\n", (int)i, (long)words[wordNumber].weight, words[wordNumber].length, words[wordNumber].s); } ++wordNumber; } } tokenCount=i; } else { /* * The tokens that need two token bytes * get their weight reduced by their count * because they save less. */ tokenCount=256-letterCount; for(i=tokenCount; i<wordCount; ++i) { words[i].weight-=words[i].count; } /* sort these words in reverse order by weight */ errorCode=U_ZERO_ERROR; uprv_sortArray(words+tokenCount, wordCount-tokenCount, sizeof(Word), compareWords, NULL, FALSE, &errorCode); /* remove the words that do not save anything */ while(wordCount>0 && words[wordCount-1].weight<1) { --wordCount; } /* how many tokens and lead bytes do we have now? */ tokenCount=wordCount+letterCount+(LEADBYTE_LIMIT-1); /* * adjust upwards to take into account that * double-byte tokens must not * use NAME_SEPARATOR_CHAR as a second byte */ tokenCount+=(tokenCount-256+254)/255; leadByteCount=(int16_t)(tokenCount>>8); if(leadByteCount<LEADBYTE_LIMIT) { /* adjust for the real number of lead bytes */ tokenCount-=(LEADBYTE_LIMIT-1)-leadByteCount; } else { /* limit the number of lead bytes */ leadByteCount=LEADBYTE_LIMIT-1; tokenCount=LEADBYTE_LIMIT*256; wordCount=tokenCount-letterCount-(LEADBYTE_LIMIT-1); /* adjust again to skip double-byte tokens with ';' */ wordCount-=(tokenCount-256+254)/255; } /* set token 0 to word 0 */ tokens[0]=0; if(beVerbose) { printf("tokens[0x000]: word%8ld \"%.*s\"\n", (long)words[0].weight, words[0].length, words[0].s); } wordNumber=1; /* set the lead byte tokens */ for(i=1; (int16_t)i<=leadByteCount; ++i) { tokens[i]=-2; } /* set the tokens */ for(; i<256; ++i) { /* if store10Names then the parser set tokens[NAME_SEPARATOR_CHAR]=-1 */ if(tokens[i]!=-1) { tokens[i]=wordNumber; if(beVerbose) { printf("tokens[0x%03x]: word%8ld \"%.*s\"\n", (int)i, (long)words[wordNumber].weight, words[wordNumber].length, words[wordNumber].s); } ++wordNumber; } } /* continue above 255 where there are no letters */ for(; (uint32_t)wordNumber<wordCount; ++i) { if((i&0xff)==NAME_SEPARATOR_CHAR) { tokens[i]=-1; /* do not use NAME_SEPARATOR_CHAR as a second token byte */ } else { tokens[i]=wordNumber; if(beVerbose) { printf("tokens[0x%03x]: word%8ld \"%.*s\"\n", (int)i, (long)words[wordNumber].weight, words[wordNumber].length, words[wordNumber].s); } ++wordNumber; } } tokenCount=i; /* should be already tokenCount={i or i+1} */ } if(!beQuiet) { printf("number of lead bytes: %d\n", leadByteCount); printf("number of single-byte tokens: %lu\n", (unsigned long)256-letterCount-leadByteCount); printf("number of tokens: %lu\n", (unsigned long)tokenCount); } compressLines(); }
NameToEnumEntry* genpname::createNameIndex(const AliasList& list, int32_t& nameIndexCount) { // Build name => enum map // This is an n->1 map. There are typically multiple names // mapping to one enum. The name index is sorted in order of the name, // as defined by the uprv_compareAliasNames() function. int32_t i, j; int32_t count = list.count(); // compute upper limit on number of names in the index int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP; NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity); nameIndexCount = 0; int32_t names[MAX_NAMES_PER_GROUP]; for (i=0; i<count; ++i) { const Alias& p = list[i]; int32_t n = p.getUniqueNames(names); for (j=0; j<n; ++j) { U_ASSERT(nameIndexCount < nameIndexCapacity); nameIndex[nameIndexCount++] = NameToEnumEntry(names[j], p.enumValue); } } /* * use a stable sort to ensure consistent results between * genpname.cpp and the propname.cpp swapping code */ UErrorCode errorCode = U_ZERO_ERROR; uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]), compareNameToEnumEntry, NULL, TRUE, &errorCode); if (debug>1) { printf("Alias names: %d\n", (int)nameIndexCount); for (i=0; i<nameIndexCount; ++i) { printf("%s => %d\n", STRING_TABLE[nameIndex[i].nameIndex].str, (int)nameIndex[i].enumValue); } printf("\n"); } // make sure there are no duplicates. for a sorted list we need // only compare adjacent items. Alias.getUniqueNames() has // already eliminated duplicate names for a single property, which // does occur, so we're checking for duplicate names between two // properties, which should never occur. UBool ok = TRUE; for (i=1; i<nameIndexCount; ++i) { if (STRING_TABLE[nameIndex[i-1].nameIndex] == STRING_TABLE[nameIndex[i].nameIndex]) { printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n", STRING_TABLE[nameIndex[i-1].nameIndex].str, STRING_TABLE[nameIndex[i].nameIndex].str); ok = FALSE; } } if (!ok) { die("Two or more duplicate names in property list"); } return nameIndex; }