/* code adapted from ures_swap() */ static void ures_enumDependencies(const UDataSwapper *ds, const char *itemName, const UDataInfo *pInfo, const uint8_t *inBytes, int32_t length, CheckDependency check, void *context, UErrorCode *pErrorCode) { const Resource *inBundle; Resource rootRes; /* the following integers count Resource item offsets (4 bytes each), not bytes */ int32_t bundleLength; /* check format version */ if(pInfo->formatVersion[0]!=1) { fprintf(stderr, "icupkg: .res format version %02x not supported\n", pInfo->formatVersion[0]); exit(U_UNSUPPORTED_ERROR); } /* a resource bundle must contain at least one resource item */ bundleLength=length/4; /* formatVersion 1.1 must have a root item and at least 5 indexes */ if( bundleLength< (pInfo->formatVersion[1]==0 ? 1 : 1+5) ) { fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n", length); exit(U_INDEX_OUTOFBOUNDS_ERROR); } inBundle=(const Resource *)inBytes; rootRes=ds->readUInt32(*inBundle); ures_enumDependencies( ds, itemName, inBundle, bundleLength, rootRes, NULL, 0, check, context, pErrorCode); /* * if the bundle attributes are present and the nofallback flag is not set, * then add the parent bundle as a dependency */ if(pInfo->formatVersion[1]>=1) { int32_t indexes[URES_INDEX_TOP]; const int32_t *inIndexes; inIndexes=(const int32_t *)inBundle+1; indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]); if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) { indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]); if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) { /* this bundle participates in locale fallback */ checkParent(itemName, check, context, pErrorCode); } } } }
int32_t EnumToOffset::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const EnumToOffset *inMap; EnumToOffset *outMap, *tempMap; int32_t size; tempMap=(EnumToOffset *)(temp+pos); if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const EnumToOffset *)(inBytes+pos); outMap=(EnumToOffset *)(outBytes+pos); tempMap->enumStart=udata_readInt32(ds, inMap->enumStart); tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n" " for pnames.icu EnumToOffset{%d..%d} at %d\n", length, tempMap->enumStart, tempMap->enumLimit, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap enumStart and enumLimit */ ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode); /* swap _offsetArray[] */ ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset), outMap->getOffsetArray(), pErrorCode); } return size; }
int32_t NonContiguousEnumToOffset::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NonContiguousEnumToOffset *inMap; NonContiguousEnumToOffset *outMap, *tempMap; int32_t size; tempMap=(NonContiguousEnumToOffset *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NonContiguousEnumToOffset *)(inBytes+pos); outMap=(NonContiguousEnumToOffset *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n" " for pnames.icu NonContiguousEnumToOffset[%d] at %d\n", length, tempMap->count, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count and _enumArray[] */ length=(1+tempMap->count)*sizeof(EnumValue); ds->swapArray32(ds, inMap, length, outMap, pErrorCode); /* swap _offsetArray[] */ pos+=length; ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset), outBytes+pos, pErrorCode); } return size; }
/** * swap a selector into the desired Endianness and Asciiness of * the system. Just as FYI, selectors are always saved in the format * of the system that created them. They are only converted if used * on another system. In other words, selectors created on different * system can be different even if the params are identical (endianness * and Asciiness differences only) * * @param ds pointer to data swapper containing swapping info * @param inData pointer to incoming data * @param length length of inData in bytes * @param outData pointer to output data. Capacity should * be at least equal to capacity of inData * @param status an in/out ICU UErrorCode * @return 0 on failure, number of bytes swapped on success * number of bytes swapped can be smaller than length */ static int32_t ucnvsel_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *status) { /* udata_swapDataHeader checks the arguments */ int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); if(U_FAILURE(*status)) { return 0; } /* check data format and format version */ const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); if(!( pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ pInfo->dataFormat[1] == 0x53 && pInfo->dataFormat[2] == 0x65 && pInfo->dataFormat[3] == 0x6c )) { udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3]); *status = U_INVALID_FORMAT_ERROR; return 0; } if(pInfo->formatVersion[0] != 1) { udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", pInfo->formatVersion[0]); *status = U_UNSUPPORTED_ERROR; return 0; } if(length >= 0) { length -= headerSize; if(length < 16*4) { udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", length); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } const uint8_t *inBytes = (const uint8_t *)inData + headerSize; uint8_t *outBytes = (uint8_t *)outData + headerSize; /* read the indexes */ const int32_t *inIndexes = (const int32_t *)inBytes; int32_t indexes[16]; int32_t i; for(i = 0; i < 16; ++i) { indexes[i] = udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ int32_t size = indexes[UCNVSEL_INDEX_SIZE]; if(length >= 0) { if(length < size) { udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", length); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes != outBytes) { uprv_memcpy(outBytes, inBytes, size); } int32_t offset = 0, count; /* swap the int32_t indexes[] */ count = UCNVSEL_INDEX_COUNT*4; ds->swapArray32(ds, inBytes, count, outBytes, status); offset += count; /* swap the UTrie2 */ count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); offset += count; /* swap the uint32_t pv[] */ count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); offset += count; /* swap the encoding names */ count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); offset += count; U_ASSERT(offset == size); } return headerSize + size; }
U_NAMESPACE_USE /* definitions */ /* Unicode property (value) aliases data swapping --------------------------- */ static int32_t U_CALLCONV upname_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { /* udata_swapDataHeader checks the arguments */ int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ const UDataInfo *pInfo= reinterpret_cast<const UDataInfo *>( static_cast<const char *>(inData)+4); if(!( pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==2 )) { udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; if(length>=0) { length-=headerSize; // formatVersion 2 initially has indexes[8], 32 bytes. if(length<32) { udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", (int)length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); if(length>=0) { if(length<totalSize) { udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " "for pnames.icu\n", (int)length, (int)totalSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } int32_t numBytesIndexesAndValueMaps= udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); // Swap the indexes[] and the valueMaps[]. ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); // Copy the rest of the data. if(inBytes!=outBytes) { uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, inBytes+numBytesIndexesAndValueMaps, totalSize-numBytesIndexesAndValueMaps); } // We need not swap anything else: // // The ByteTries are already byte-serialized, and are fixed on ASCII. // (On an EBCDIC machine, the input string is converted to lowercase ASCII // while matching.) // // The name groups are mostly invariant characters, but since we only // generate, and keep in subversion, ASCII versions of pnames.icu, // and since only ICU4J uses the pnames.icu data file // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, // we just copy those bytes too. } return headerSize+totalSize; }
static int32_t U_CALLCONV unorm_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[32]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ pInfo->dataFormat[1]==0x6f && pInfo->dataFormat[2]==0x72 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==2 )) { udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<32*4) { udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ for(i=0; i<32; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* calculate the total length of the data */ size= 32*4+ /* size of indexes[] */ indexes[_NORM_INDEX_TRIE_SIZE]+ indexes[_NORM_INDEX_UCHAR_COUNT]*2+ indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ indexes[_NORM_INDEX_CANON_SET_COUNT]*2; if(length>=0) { if(length<size) { udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the indexes[] */ count=32*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the main UTrie */ count=indexes[_NORM_INDEX_TRIE_SIZE]; utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the FCD UTrie */ count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; if(count!=0) { utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } /* swap the aux UTrie */ count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; if(count!=0) { utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } /* swap the uint16_t combiningTable[] */ count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } return headerSize+size; }
static int32_t U_CALLCONV ubidi_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[16]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ pInfo->dataFormat[1]==UBIDI_FMT_1 && pInfo->dataFormat[2]==UBIDI_FMT_2 && pInfo->dataFormat[3]==UBIDI_FMT_3 && ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || pInfo->formatVersion[0]==2) )) { udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<16*4) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ for(i=0; i<16; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ size=indexes[UBIDI_IX_LENGTH]; if(length>=0) { if(length<size) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the int32_t indexes[] */ count=indexes[UBIDI_IX_INDEX_TOP]*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* just skip the uint8_t jgArray[] and jgArray2[] */ count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; offset+=count; count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; offset+=count; U_ASSERT(offset==size); } return headerSize+size; }
static int32_t U_CALLCONV uprops_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize, i; int32_t dataIndexes[UPROPS_INDEX_COUNT]; const int32_t *inData32; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ pInfo->dataFormat[1]==0x50 && pInfo->dataFormat[2]==0x72 && pInfo->dataFormat[3]==0x6f && (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && (pInfo->formatVersion[0]>=7 || (pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) )) { udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* the properties file must contain at least the indexes array */ if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* read the indexes */ inData32=(const int32_t *)((const char *)inData+headerSize); for(i=0; i<UPROPS_INDEX_COUNT; ++i) { dataIndexes[i]=udata_readInt32(ds, inData32[i]); } /* * comments are copied from the data format description in genprops/store.c * indexes[] constants are in uprops.h */ int32_t dataTop; if(length>=0) { int32_t *outData32; /* * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. * In earlier formatVersions, it is 0 and a lower dataIndexes entry * has the top of the last item. */ for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} if((length-headerSize)<(4*dataTop)) { udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outData32=(int32_t *)((char *)outData+headerSize); /* copy everything for inaccessible data (padding) */ if(inData32!=outData32) { uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); } /* swap the indexes[16] */ ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); /* * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) */ utrie2_swapAnyVersion(ds, inData32+UPROPS_INDEX_COUNT, 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), outData32+UPROPS_INDEX_COUNT, pErrorCode); /* * swap the properties and exceptions words * P const uint32_t props32[i1-i0]; * E const uint32_t exceptions[i2-i1]; */ ds->swapArray32(ds, inData32+dataIndexes[UPROPS_PROPS32_INDEX], 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), outData32+dataIndexes[UPROPS_PROPS32_INDEX], pErrorCode); /* * swap the UChars * U const UChar uchars[2*(i3-i2)]; */ ds->swapArray16(ds, inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], pErrorCode); /* * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties */ utrie2_swapAnyVersion(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], pErrorCode); /* * swap the properties vectors * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; */ ds->swapArray32(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], pErrorCode); // swap the Script_Extensions data // SCX const uint16_t scriptExtensions[2*(i7-i6)]; ds->swapArray16(ds, inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], pErrorCode); } /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; }
/* * Enumerate one resource item and its children and extract dependencies from * aliases. * Code adapted from ures_preflightResource() and ures_swapResource(). */ static void ures_enumDependencies(const UDataSwapper *ds, const char *itemName, const Resource *inBundle, int32_t length, Resource res, const char *inKey, int32_t depth, CheckDependency check, void *context, UErrorCode *pErrorCode) { const Resource *p; int32_t offset; if(res==0 || RES_GET_TYPE(res)==URES_INT) { /* empty string or integer, nothing to do */ return; } /* all other types use an offset to point to their data */ offset=(int32_t)RES_GET_OFFSET(res); if(0<=length && length<=offset) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n", itemName, res, length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } p=inBundle+offset; switch(RES_GET_TYPE(res)) { /* strings and aliases have physically the same value layout */ case URES_STRING: // we ignore all strings except top-level strings with a %%ALIAS key if(depth!=1) { break; } else { char key[8]; int32_t keyLength; keyLength=(int32_t)strlen(inKey); if(keyLength!=gAliasKeyLength) { break; } ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n", itemName, res); return; } if(0!=strcmp(key, gAliasKey)) { break; } } // for the top-level %%ALIAS string fall through to URES_ALIAS case URES_ALIAS: { char localeID[32]; const uint16_t *p16; int32_t i, stringLength; uint16_t u16, ored16; stringLength=udata_readInt32(ds, (int32_t)*p); /* top=offset+1+(string length +1)/2 rounded up */ offset+=1+((stringLength+1)+1)/2; if(offset>length) { break; // the resource does not fit into the bundle, print error below } // extract the locale ID from alias strings like // locale_ID/key1/key2/key3 // locale_ID if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { u16=0x2f; // slash in local endianness } else { u16=0x2f00; // slash in opposite endianness } p16=(const uint16_t *)(p+1); // Unicode string contents // search for the first slash for(i=0; i<stringLength && p16[i]!=u16; ++i) {} if(RES_GET_TYPE(res)==URES_ALIAS) { // ignore aliases with an initial slash: // /ICUDATA/... and /pkgname/... go to a different package // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle if(i==0) { break; // initial slash ('/') } // ignore the intra-bundle path starting from the first slash ('/') stringLength=i; } else /* URES_STRING */ { // the whole string should only consist of a locale ID if(i!=stringLength) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", itemName, res); *pErrorCode=U_UNSUPPORTED_ERROR; return; } } // convert the Unicode string to char * and // check that it has a bundle path but no package if(stringLength>=(int32_t)sizeof(localeID)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", itemName, res, stringLength); *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return; } // convert the alias Unicode string to US-ASCII ored16=0; if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { for(i=0; i<stringLength; ++i) { u16=p16[i]; ored16|=u16; localeID[i]=(char)u16; } } else { for(i=0; i<stringLength; ++i) { u16=p16[i]; ored16|=u16; localeID[i]=(char)(u16>>8); } ored16=(uint16_t)((ored16<<8)|(ored16>>8)); } localeID[stringLength]=0; if(ored16>0x7f) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n", itemName, res); *pErrorCode=U_INVALID_CHAR_FOUND; return; } #if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) // swap to EBCDIC // our swapper is probably not the right one, but // the function uses it only for printing errors uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } #endif #if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY # error Unknown U_CHARSET_FAMILY value! #endif checkIDSuffix(itemName, localeID, -1, ".res", check, context, pErrorCode); } break; case URES_TABLE: case URES_TABLE32: { const uint16_t *pKey16; const int32_t *pKey32; Resource item; int32_t i, count; if(RES_GET_TYPE(res)==URES_TABLE) { /* get table item count */ pKey16=(const uint16_t *)p; count=ds->readUInt16(*pKey16++); pKey32=NULL; /* top=((1+ table item count)/2 rounded up)+(table item count) */ offset+=((1+count)+1)/2; } else { /* get table item count */ pKey32=(const int32_t *)p; count=udata_readInt32(ds, *pKey32++); pKey16=NULL; /* top=(1+ table item count)+(table item count) */ offset+=1+count; } p=inBundle+offset; /* pointer to table resources */ offset+=count; if(offset>length) { break; // the resource does not fit into the bundle, print error below } /* recurse */ for(i=0; i<count; ++i) { item=ds->readUInt32(*p++); ures_enumDependencies( ds, itemName, inBundle, length, item, ((const char *)inBundle)+ (pKey16!=NULL ? ds->readUInt16(pKey16[i]) : udata_readInt32(ds, pKey32[i])), depth+1, check, context, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n", itemName, res, i, item); break; } } } break; case URES_ARRAY: { Resource item; int32_t i, count; /* top=offset+1+(array length) */ count=udata_readInt32(ds, (int32_t)*p++); offset+=1+count; if(offset>length) { break; // the resource does not fit into the bundle, print error below } /* recurse */ for(i=0; i<count; ++i) { item=ds->readUInt32(*p++); ures_enumDependencies( ds, itemName, inBundle, length, item, NULL, depth+1, check, context, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", itemName, res, i, item); break; } } } break; default: break; } if(U_FAILURE(*pErrorCode)) { /* nothing to do */ } else if(0<=length && length<offset) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n", itemName, res, length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; } }
U_NAMESPACE_END U_NAMESPACE_USE U_CAPI int32_t U_EXPORT2 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[DictionaryData::IX_COUNT]; int32_t i, offset, size; headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0; pInfo = (const UDataInfo *)((const char *)inData + 4); if (!(pInfo->dataFormat[0] == 0x44 && pInfo->dataFormat[1] == 0x69 && pInfo->dataFormat[2] == 0x63 && pInfo->dataFormat[3] == 0x74 && pInfo->formatVersion[0] == 1)) { udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode = U_UNSUPPORTED_ERROR; return 0; } inBytes = (const uint8_t *)inData + headerSize; outBytes = (uint8_t *)outData + headerSize; inIndexes = (const int32_t *)inBytes; if (length >= 0) { length -= headerSize; if (length < (int32_t)(sizeof(indexes))) { udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length); *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } for (i = 0; i < DictionaryData::IX_COUNT; i++) { indexes[i] = udata_readInt32(ds, inIndexes[i]); } size = indexes[DictionaryData::IX_TOTAL_SIZE]; if (length >= 0) { if (length < size) { udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length); *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if (inBytes != outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset = 0; ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode); offset = (int32_t)sizeof(indexes); int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET]; if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode); } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) { // nothing to do } else { udata_printError(ds, "udict_swap(): unknown trie type!\n"); *pErrorCode = U_UNSUPPORTED_ERROR; return 0; } // these next two sections are empty in the current format, // but may be used later. offset = nextOffset; nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; offset = nextOffset; nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; offset = nextOffset; } return headerSize + size; }
/* swap a header-less collation binary, inside a resource bundle or ucadata.icu */ U_CAPI int32_t U_EXPORT2 ucol_swapBinary(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const uint8_t *inBytes; uint8_t *outBytes; const UCATableHeader *inHeader; UCATableHeader *outHeader; UCATableHeader header={ 0 }; uint32_t count; /* argument checking in case we were not called from ucol_swap() */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } inBytes=(const uint8_t *)inData; outBytes=(uint8_t *)outData; inHeader=(const UCATableHeader *)inData; outHeader=(UCATableHeader *)outData; /* * The collation binary must contain at least the UCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==42*4 in ICU 2.8 * check the length against the header size before reading the size field */ if(length<0) { header.size=udata_readInt32(ds, inHeader->size); } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } header.magic=ds->readUInt32(inHeader->magic); if(!( header.magic==UCOL_HEADER_MAGIC && inHeader->formatVersion[0]==2 && inHeader->formatVersion[1]>=3 )) { udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n", header.magic, inHeader->formatVersion[0], inHeader->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n", inHeader->isBigEndian, inHeader->charSetFamily); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, header.size); } /* swap the necessary pieces in the order of their occurrence in the data */ /* read more of the UCATableHeader (the size field was read above) */ header.options= ds->readUInt32(inHeader->options); header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); header.expansion= ds->readUInt32(inHeader->expansion); header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); header.contractionSize= ds->readUInt32(inHeader->contractionSize); header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); /* swap the 32-bit integers in the header */ ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), outHeader, pErrorCode); /* set the output platform properties */ outHeader->isBigEndian=ds->outIsBigEndian; outHeader->charSetFamily=ds->outCharset; /* swap the options */ if(header.options!=0) { ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, outBytes+header.options, pErrorCode); } /* swap the expansions */ if(header.mappingPosition!=0 && header.expansion!=0) { if(header.contractionIndex!=0) { /* expansions bounded by contractions */ count=header.contractionIndex-header.expansion; } else { /* no contractions: expansions bounded by the main trie */ count=header.mappingPosition-header.expansion; } ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, outBytes+header.expansion, pErrorCode); } /* swap the contractions */ if(header.contractionSize!=0) { /* contractionIndex: UChar[] */ ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, outBytes+header.contractionIndex, pErrorCode); /* contractionCEs: CEs[] */ ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, outBytes+header.contractionCEs, pErrorCode); } /* swap the main trie */ if(header.mappingPosition!=0) { count=header.endExpansionCE-header.mappingPosition; utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, outBytes+header.mappingPosition, pErrorCode); } /* swap the max expansion table */ if(header.endExpansionCECount!=0) { ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, outBytes+header.endExpansionCE, pErrorCode); } /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ /* swap UCA constants */ if(header.UCAConsts!=0) { /* * if UCAConsts!=0 then contractionUCACombos because we are swapping * the UCA data file, and we know that the UCA contains contractions */ count=header.contractionUCACombos-header.UCAConsts; ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, outBytes+header.UCAConsts, pErrorCode); } /* swap UCA contractions */ if(header.contractionUCACombosSize!=0) { count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, outBytes+header.contractionUCACombos, pErrorCode); } } return header.size; }
/* swap inverse UCA collation data (invuca.icu) */ U_CAPI int32_t U_EXPORT2 ucol_swapInverseUCA(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const InverseUCATableHeader *inHeader; InverseUCATableHeader *outHeader; InverseUCATableHeader header={ 0 }; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x43 && pInfo->formatVersion[0]==2 && pInfo->formatVersion[1]>=1 )) { udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inHeader=(const InverseUCATableHeader *)inBytes; outHeader=(InverseUCATableHeader *)outBytes; /* * The inverse UCA collation binary must contain at least the InverseUCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==8*4 in ICU 2.8 * check the length against the header size before reading the size field */ if(length<0) { header.byteSize=udata_readInt32(ds, inHeader->byteSize); } else if( ((length-headerSize)<(8*4) || (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) ) { udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, header.byteSize); } /* swap the necessary pieces in the order of their occurrence in the data */ /* read more of the InverseUCATableHeader (the byteSize field was read above) */ header.tableSize= ds->readUInt32(inHeader->tableSize); header.contsSize= ds->readUInt32(inHeader->contsSize); header.table= ds->readUInt32(inHeader->table); header.conts= ds->readUInt32(inHeader->conts); /* swap the 32-bit integers in the header */ ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); /* swap the inverse table; tableSize counts uint32_t[3] rows */ ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, outBytes+header.table, pErrorCode); /* swap the continuation table; contsSize counts UChars */ ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, outBytes+header.conts, pErrorCode); } return headerSize+header.byteSize; }
U_CAPI int32_t U_EXPORT2 ucnv_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; uint32_t offset, count, staticDataSize; int32_t size; const UConverterStaticData *inStaticData; UConverterStaticData *outStaticData; const _MBCSHeader *inMBCSHeader; _MBCSHeader *outMBCSHeader; _MBCSHeader mbcsHeader; uint8_t outputType; const int32_t *inExtIndexes; int32_t extOffset; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x74 && pInfo->formatVersion[0]==6 && pInfo->formatVersion[1]>=2 )) { udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; /* read the initial UConverterStaticData structure after the UDataInfo header */ inStaticData=(const UConverterStaticData *)inBytes; outStaticData=(UConverterStaticData *)outBytes; if(length<0) { staticDataSize=ds->readUInt32(inStaticData->structSize); } else { length-=headerSize; if( length<sizeof(UConverterStaticData) || (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) ) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } if(length>=0) { /* swap the static data */ if(inStaticData!=outStaticData) { uprv_memcpy(outStaticData, inStaticData, staticDataSize); } ds->swapArray32(ds, &inStaticData->structSize, 4, &outStaticData->structSize, pErrorCode); ds->swapArray32(ds, &inStaticData->codepage, 4, &outStaticData->codepage, pErrorCode); ds->swapInvChars(ds, inStaticData->name, uprv_strlen(inStaticData->name), outStaticData->name, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swap(): error swapping converter name - %s\n", u_errorName(*pErrorCode)); return 0; } } inBytes+=staticDataSize; outBytes+=staticDataSize; if(length>=0) { length-=(int32_t)staticDataSize; } /* check for supported conversionType values */ if(inStaticData->conversionType==UCNV_MBCS) { /* swap MBCS data */ inMBCSHeader=(const _MBCSHeader *)inBytes; outMBCSHeader=(_MBCSHeader *)outBytes; if(!(inMBCSHeader->version[0]==4 || inMBCSHeader->version[1]>=1)) { udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", inMBCSHeader->version[0], inMBCSHeader->version[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); extOffset=(int32_t)mbcsHeader.flags>>8; outputType=(uint8_t)mbcsHeader.flags; /* make sure that the output type is known */ switch(outputType) { case MBCS_OUTPUT_1: case MBCS_OUTPUT_2: case MBCS_OUTPUT_3: case MBCS_OUTPUT_4: case MBCS_OUTPUT_3_EUC: case MBCS_OUTPUT_4_EUC: case MBCS_OUTPUT_2_SISO: case MBCS_OUTPUT_EXT_ONLY: /* OK */ break; default: udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", outputType); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* calculate the length of the MBCS data */ if(extOffset==0) { size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsHeader.fromUBytesLength); /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ inExtIndexes=NULL; } else { /* there is extension data after the base data, see ucnv_ext.h */ if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inExtIndexes=(const int32_t *)(inBytes+extOffset); size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); } if(length>=0) { if(length<size) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } /* swap the _MBCSHeader */ ds->swapArray32(ds, &inMBCSHeader->countStates, 7*4, &outMBCSHeader->countStates, pErrorCode); if(outputType==MBCS_OUTPUT_EXT_ONLY) { /* * extension-only file, * contains a base name instead of normal base table data */ /* swap the base name, between the header and the extension data */ ds->swapInvChars(ds, inMBCSHeader+1, uprv_strlen((const char *)(inMBCSHeader+1)), outMBCSHeader+1, pErrorCode); } else { /* normal file with base table data */ /* swap the state table, 1kB per state */ ds->swapArray32(ds, inMBCSHeader+1, (int32_t)(mbcsHeader.countStates*1024), outMBCSHeader+1, pErrorCode); /* swap the toUFallbacks[] */ offset=sizeof(_MBCSHeader)+mbcsHeader.countStates*1024; ds->swapArray32(ds, inBytes+offset, (int32_t)(mbcsHeader.countToUFallbacks*8), outBytes+offset, pErrorCode); /* swap the unicodeCodeUnits[] */ offset=mbcsHeader.offsetToUCodeUnits; count=mbcsHeader.offsetFromUTable-offset; ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* offset to the stage 1 table, independent of the outputType */ offset=mbcsHeader.offsetFromUTable; if(outputType==MBCS_OUTPUT_1) { /* SBCS: swap the fromU tables, all 16 bits wide */ count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); } else { /* otherwise: swap the stage tables separately */ /* stage 1 table: uint16_t[0x440 or 0x40] */ if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { count=0x440*2; /* for all of Unicode */ } else { count=0x40*2; /* only BMP */ } ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* stage 2 table: uint32_t[] */ offset+=count; count=mbcsHeader.offsetFromUBytes-offset; ds->swapArray32(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ offset=mbcsHeader.offsetFromUBytes; count=mbcsHeader.fromUBytesLength; switch(outputType) { case MBCS_OUTPUT_2: case MBCS_OUTPUT_3_EUC: case MBCS_OUTPUT_2_SISO: ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); break; case MBCS_OUTPUT_4: ds->swapArray32(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); break; default: /* just uint8_t[], nothing to swap */ break; } } } if(extOffset!=0) { /* swap the extension data */ inBytes+=extOffset; outBytes+=extOffset; /* swap toUTable[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* swap toUUChars[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUTableUChars[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUTableValues[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); /* same length as for fromUTableUChars[] */ ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* no need to swap fromUBytes[] */ /* swap fromUStage12[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUStage3[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUStage3b[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* swap indexes[] */ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); } } } else {
U_CDECL_END int32_t NameToEnum::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NameToEnum *inMap; NameToEnum *outMap, *tempMap; const EnumValue *inEnumArray; EnumValue *outEnumArray; const Offset *inNameArray; Offset *outNameArray; NameAndIndex *sortArray; CompareContext cmp; int32_t i, size, oldIndex; tempMap=(NameToEnum *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NameToEnum *)(inBytes+pos); outMap=(NameToEnum *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n" " for pnames.icu NameToEnum[%d] at %d\n", length, tempMap->count, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count */ ds->swapArray32(ds, inMap, 4, outMap, pErrorCode); inEnumArray=inMap->getEnumArray(); outEnumArray=outMap->getEnumArray(); inNameArray=(const Offset *)(inEnumArray+tempMap->count); outNameArray=(Offset *)(outEnumArray+tempMap->count); if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap the enum/name arrays */ ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode); ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode); return size; } /* * The name and enum arrays are sorted by names and must be resorted * if inCharset!=outCharset. * We use the corresponding part of the temp array to sort an array * of pairs of name offsets and sorting indexes. * Then the sorting indexes are used to permutate-swap the name and enum arrays. * * The outBytes must already contain the swapped strings. */ sortArray=(NameAndIndex *)tempMap->getEnumArray(); for(i=0; i<tempMap->count; ++i) { sortArray[i].name=udata_readInt16(ds, inNameArray[i]); sortArray[i].index=(Offset)i; } /* * use a stable sort to avoid shuffling of equal strings, * which makes testing harder */ cmp.chars=(const char *)outBytes; if (ds->outCharset==U_ASCII_FAMILY) { cmp.propCompare=uprv_compareASCIIPropertyNames; } else { cmp.propCompare=uprv_compareEBCDICPropertyNames; } uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex), upname_compareRows, &cmp, TRUE, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n", tempMap->count); return 0; } /* copy/swap/permutate _enumArray[] and _nameArray[] */ if(inEnumArray!=outEnumArray) { for(i=0; i<tempMap->count; ++i) { oldIndex=sortArray[i].index; ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode); ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode); } } else { /* * in-place swapping: need to permutate into a temporary array * and then copy back to not destroy the data */ EnumValue *tempEnumArray; Offset *oldIndexes; /* write name offsets directly from sortArray */ for(i=0; i<tempMap->count; ++i) { ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name); } /* * compress the oldIndexes into a separate array to make space for tempEnumArray * the tempMap _nameArray becomes oldIndexes[], getting the index * values from the 2D sortArray[], * while sortArray=tempMap _enumArray[] becomes tempEnumArray[] * this saves us allocating more memory * * it works because sizeof(NameAndIndex)<=sizeof(EnumValue) * and because the nameArray[] can be used for oldIndexes[] */ tempEnumArray=(EnumValue *)sortArray; oldIndexes=(Offset *)(sortArray+tempMap->count); /* copy sortArray[].index values into oldIndexes[] */ for(i=0; i<tempMap->count; ++i) { oldIndexes[i]=sortArray[i].index; } /* permutate inEnumArray[] into tempEnumArray[] */ for(i=0; i<tempMap->count; ++i) { ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode); } /* copy tempEnumArray[] to outEnumArray[] */ uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4); } } return size; }
void Package::readPackage(const char *filename) { UDataSwapper *ds; const UDataInfo *pInfo; UErrorCode errorCode; const uint8_t *inBytes; int32_t length, offset, i; int32_t itemLength, typeEnum; char type; const UDataOffsetTOCEntry *inEntries; extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); /* read the file */ inData=readFile(NULL, filename, inLength, type); length=inLength; /* * swap the header - even if the swapping itself is a no-op * because it tells us the header length */ errorCode=U_ZERO_ERROR; makeTypeProps(type, inCharset, inIsBigEndian); ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", filename, u_errorName(errorCode)); exit(errorCode); } ds->printError=printPackageError; ds->printErrorContext=stderr; headerLength=sizeof(header); if(length<headerLength) { headerLength=length; } headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); if(U_FAILURE(errorCode)) { exit(errorCode); } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); exit(U_UNSUPPORTED_ERROR); } inIsBigEndian=(UBool)pInfo->isBigEndian; inCharset=pInfo->charsetFamily; inBytes=(const uint8_t *)inData+headerLength; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerLength; if(length<4) { /* itemCount does not fit */ offset=0x7fffffff; } else { itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); if(itemCount==0) { offset=4; } else if(length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0x7fffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if(length<offset) { fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", (long)length); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* do not modify the package length variable until the last item's length is set */ if(itemCount>0) { char prefix[MAX_PKG_NAME_LENGTH+4]; char *s, *inItemStrings; int32_t inPkgNameLength, prefixLength, stringsOffset; if(itemCount>MAX_FILE_COUNT) { fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); exit(U_BUFFER_OVERFLOW_ERROR); } /* swap the item name strings */ stringsOffset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; // don't include padding bytes at the end of the item names while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { --itemLength; } if((inStringTop+itemLength)>STRING_STORE_SIZE) { fprintf(stderr, "icupkg: total length of item name strings too long\n"); exit(U_BUFFER_OVERFLOW_ERROR); } inItemStrings=inStrings+inStringTop; ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); exit(U_INVALID_FORMAT_ERROR); } inStringTop+=itemLength; // reset the Item entries memset(items, 0, itemCount*sizeof(Item)); inPkgNameLength=strlen(inPkgName); memcpy(prefix, inPkgName, inPkgNameLength); prefixLength=inPkgNameLength; /* * Get the common prefix of the items. * New-style ICU .dat packages use tree separators ('/') between package names, * tree names, and item names, * while old-style ICU .dat packages (before multi-tree support) * use an underscore ('_') between package and item names. */ offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; s=inItemStrings+offset; if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 0==memcmp(s, inPkgName, inPkgNameLength) && s[inPkgNameLength]=='_' ) { // old-style .dat package prefix[prefixLength++]='_'; } else { // new-style .dat package prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR // then the test in the loop below will fail } prefix[prefixLength]=0; /* read the ToC table */ for(i=0; i<itemCount; ++i) { // skip the package part of the item name, error if it does not match the actual package name // or if nothing follows the package name offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; s=inItemStrings+offset; if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", s, prefix); exit(U_UNSUPPORTED_ERROR); } items[i].name=s+prefixLength; // set the item's data items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); if(i>0) { items[i-1].length=(int32_t)(items[i].data-items[i-1].data); // set the previous item's platform type typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[i-1].type=makeTypeLetter(typeEnum); } items[i].isDataOwned=FALSE; } // set the last item's length items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); // set the last item's platform type typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[itemCount-1].type=makeTypeLetter(typeEnum); if(type!=U_ICUDATA_TYPE_LETTER[0]) { // sort the item names for the local charset sortItems(); } } udata_closeSwapper(ds); }