/* Swap 'Test' data from gentest */ static int32_t U_CALLCONV test_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; int32_t offset; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ pInfo->dataFormat[1]==0x65 && pInfo->dataFormat[2]==0x73 && pInfo->dataFormat[3]==0x74 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; int32_t size16 = 2; // 16bit plus padding int32_t sizeStr = 5; // 4 char inv-str plus null int32_t size = size16 + sizeStr; if(length>=0) { if(length<size) { udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", length, size); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } offset =0; /* swap a 1 entry array */ ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); offset+=size16; ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); } return headerSize+size; }
static int32_t extractPackageName(const UDataSwapper *ds, const char *filename, char pkg[], int32_t capacity, UErrorCode *pErrorCode) { const char *basename; int32_t len; if(U_FAILURE(*pErrorCode)) { return 0; } basename=findBasename(filename); len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", basename); *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(len>=capacity) { udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", (long)capacity); *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } uprv_memcpy(pkg, basename, len); pkg[len]=0; return len; }
U_CAPI int32_t U_EXPORT2 upname_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; if(length>=0) { length-=headerSize; if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode); }
int32_t NonContiguousEnumToOffset::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NonContiguousEnumToOffset *inMap; NonContiguousEnumToOffset *outMap, *tempMap; int32_t size; tempMap=(NonContiguousEnumToOffset *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NonContiguousEnumToOffset *)(inBytes+pos); outMap=(NonContiguousEnumToOffset *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n" " for pnames.icu NonContiguousEnumToOffset[%d] at %d\n", length, tempMap->count, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count and _enumArray[] */ length=(1+tempMap->count)*sizeof(EnumValue); ds->swapArray32(ds, inMap, length, outMap, pErrorCode); /* swap _offsetArray[] */ pos+=length; ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset), outBytes+pos, pErrorCode); } return size; }
int32_t EnumToOffset::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const EnumToOffset *inMap; EnumToOffset *outMap, *tempMap; int32_t size; tempMap=(EnumToOffset *)(temp+pos); if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const EnumToOffset *)(inBytes+pos); outMap=(EnumToOffset *)(outBytes+pos); tempMap->enumStart=udata_readInt32(ds, inMap->enumStart); tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n" " for pnames.icu EnumToOffset{%d..%d} at %d\n", length, tempMap->enumStart, tempMap->enumLimit, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap enumStart and enumLimit */ ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode); /* swap _offsetArray[] */ ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset), outMap->getOffsetArray(), pErrorCode); } return size; }
/* swap ICU collation data like ucadata.icu */ U_CAPI int32_t U_EXPORT2 ucol_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize, collationSize; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */ pInfo->dataFormat[1]==0x43 && pInfo->dataFormat[2]==0x6f && pInfo->dataFormat[3]==0x6c && pInfo->formatVersion[0]==2 && pInfo->formatVersion[1]>=3 )) { udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } collationSize=ucol_swapBinary(ds, (const char *)inData+headerSize, length>=0 ? length-headerSize : -1, (char *)outData+headerSize, pErrorCode); if(U_SUCCESS(*pErrorCode)) { return headerSize+collationSize; } else { return 0; } }
/* this function only checks and copies EBCDIC strings without conversion */ U_CFUNC int32_t uprv_copyEbcdic(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const uint8_t *s; uint8_t c; int32_t count; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* setup and checking */ s=(const uint8_t *)inData; count=length; while(count>0) { c=*s++; if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", length, length-count); *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } --count; } if(length>0 && inData!=outData) { uprv_memcpy(outData, inData, length); } return length; }
/* convert ASCII to EBCDIC and verify that all characters are invariant */ U_CAPI int32_t U_EXPORT2 uprv_ebcdicFromAscii(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const uint8_t *s; uint8_t *t; uint8_t c; int32_t count; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* setup and swapping */ s=(const uint8_t *)inData; t=(uint8_t *)outData; count=length; while(count>0) { c=*s++; if(!UCHAR_IS_INVARIANT(c)) { udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", length, length-count); *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } *t++=ebcdicFromAscii[c]; --count; } return length; }
U_CDECL_END U_CFUNC int32_t U_CALLCONV udata_swapPackage(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; uint32_t itemCount, offset, i; int32_t itemLength; const UDataOffsetTOCEntry *inEntries; UDataOffsetTOCEntry *outEntries; ToCEntry *table; char inPkgName[32], outPkgName[32]; int32_t inPkgNameLength, outPkgNameLength; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* * We need to change the ToC name entries so that they have the correct * package name prefix. * Extract the package names from the in/out filenames. */ inPkgNameLength=extractPackageName( ds, inFilename, inPkgName, (int32_t)sizeof(inPkgName), pErrorCode); outPkgNameLength=extractPackageName( ds, outFilename, outPkgName, (int32_t)sizeof(outPkgName), pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } /* * It is possible to work with inPkgNameLength!=outPkgNameLength, * but then the length of the data file would change more significantly, * which we are not currently prepared for. */ if(inPkgNameLength!=outPkgNameLength) { udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", inPkgName, outPkgName); *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); if(length<0) { /* preflighting */ itemCount=ds->readUInt32(*(const uint32_t *)inBytes); if(itemCount==0) { /* no items: count only the item count and return */ return headerSize+4; } /* read the last item's offset and preflight it */ offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); if(U_SUCCESS(*pErrorCode)) { return headerSize+offset+(uint32_t)itemLength; } else { return 0; } } else { /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerSize; if(length<4) { /* itemCount does not fit */ offset=0xffffffff; itemCount=0; /* make compilers happy */ } else { itemCount=ds->readUInt32(*(const uint32_t *)inBytes); if(itemCount==0) { offset=4; } else if((uint32_t)length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0xffffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if((uint32_t)length<offset) { udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outBytes=(uint8_t *)outData+headerSize; /* swap the item count */ ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); if(itemCount==0) { /* no items: just return now */ return headerSize+4; } /* swap the item name strings */ offset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); return 0; } /* keep offset and itemLength in case we allocate and copy the strings below */ /* swap the package names into the output charset */ if(ds->outCharset!=U_CHARSET_FAMILY) { UDataSwapper *ds2; ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); udata_closeSwapper(ds2); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); } } /* change the prefix of each ToC entry name from the old to the new package name */ { char *entryName; for(i=0; i<itemCount; ++i) { entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { uprv_memcpy(entryName, outPkgName, inPkgNameLength); } else { udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", (long)i); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } } } /* * Allocate the ToC table and, if necessary, a temporary buffer for * pseudo-in-place swapping. * * We cannot swap in-place because: * * 1. If the swapping of an item fails mid-way, then in-place swapping * has destroyed its data. * Out-of-place swapping allows us to then copy its original data. * * 2. If swapping changes the charset family, then we must resort * not only the ToC table but also the data items themselves. * This requires a permutation and is best done with separate in/out * buffers. * * We swapped the strings above to avoid the malloc below if string swapping fails. */ if(inData==outData) { /* +15: prepare for extra padding of a newly-last item */ table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15); if(table!=NULL) { outBytes=(uint8_t *)(table+itemCount); /* copy the item count and the swapped strings */ uprv_memcpy(outBytes, inBytes, 4); uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); } } else { table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); } if(table==NULL) { udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", inData==outData ? itemCount*sizeof(ToCEntry)+length+15 : itemCount*sizeof(ToCEntry)); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } outEntries=(UDataOffsetTOCEntry *)(outBytes+4); /* read the ToC table */ for(i=0; i<itemCount; ++i) { table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); if(i>0) { table[i-1].length=table[i].inOffset-table[i-1].inOffset; } } table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; if(ds->inCharset==ds->outCharset) { /* no charset swapping, no resorting: keep item offsets the same */ for(i=0; i<itemCount; ++i) { table[i].outOffset=table[i].inOffset; } } else { /* charset swapping: resort items by their swapped names */ /* * Before the actual sorting, we need to make sure that each item * has a length that is a multiple of 16 bytes so that all items * are 16-aligned. * Only the old last item may be missing up to 15 padding bytes. * Add padding bytes for it. * Since the icuswap main() function has already allocated enough * input buffer space and set the last 15 bytes there to 0xaa, * we only need to increase the total data length and the length * of the last item here. */ if((length&0xf)!=0) { int32_t delta=16-(length&0xf); length+=delta; table[itemCount-1].length+=(uint32_t)delta; } uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), compareToCEntries, outBytes, FALSE, pErrorCode); /* * Note: Before sorting, the inOffset values were in order. * Now the outOffset values are in order. */ /* assign outOffset values */ offset=table[0].inOffset; for(i=0; i<itemCount; ++i) { table[i].outOffset=offset; offset+=table[i].length; } } /* write the output ToC table */ for(i=0; i<itemCount; ++i) { ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); } /* swap each data item */ for(i=0; i<itemCount; ++i) { /* first copy the item bytes to make sure that unreachable bytes are copied */ uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); /* swap the item */ udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, outBytes+table[i].outOffset, pErrorCode); if(U_FAILURE(*pErrorCode)) { if(ds->outCharset==U_CHARSET_FAMILY) { udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" " at inOffset 0x%x length 0x%x - %s\n" " the data item will be copied, not swapped\n\n", (char *)outBytes+table[i].nameOffset, table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); } else { udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" " at inOffset 0x%x length 0x%x - %s\n" " the data item will be copied, not swapped\n\n", table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); } /* reset the error code, copy the data item, and continue */ *pErrorCode=U_ZERO_ERROR; uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); } } if(inData==outData) { /* copy the data from the temporary buffer to the in-place buffer */ uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); } uprv_free(table); return headerSize+length; } }
U_NAMESPACE_END U_NAMESPACE_USE //----------------------------------------------------------------------------- // // ubrk_swap - byte swap and char encoding swap of RBBI data // //----------------------------------------------------------------------------- U_CAPI int32_t U_EXPORT2 ubrk_swap(const UDataSwapper * ds, const void * inData, int32_t length, void * outData, UErrorCode * status) { if (status == NULL || U_FAILURE(*status)) { return 0; } if (ds == NULL || inData == NULL || length < -1 || (length > 0 && outData == NULL)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } // // Check that the data header is for for break data. // (Header contents are defined in genbrk.cpp) // const UDataInfo * pInfo = (const UDataInfo *)((const char *)inData + 4); if (!(pInfo->dataFormat[0] == 0x42 && /* dataFormat="Brk " */ pInfo->dataFormat[1] == 0x72 && pInfo->dataFormat[2] == 0x6b && pInfo->dataFormat[3] == 0x20 && pInfo->formatVersion[0] == 3)) { udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *status = U_UNSUPPORTED_ERROR; return 0; } // // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific // RBBIDataHeader). This swap also conveniently gets us // the size of the ICU d.h., which lets us locate the start // of the RBBI specific data. // int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); // // Get the RRBI Data Header, and check that it appears to be OK. // // Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually // an int32_t with a value of 1. Starting with ICU 3.4, // RBBI's fDataFormat matches the dataFormat field from the // UDataInfo header, four int8_t bytes. The value is {3,1,0,0} // const uint8_t * inBytes = (const uint8_t *)inData + headerSize; RBBIDataHeader * rbbiDH = (RBBIDataHeader *)inBytes; if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || rbbiDH->fFormatVersion[0] != 3 || ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) { udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); *status = U_UNSUPPORTED_ERROR; return 0; } // // Prefight operation? Just return the size // int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength); int32_t totalSize = headerSize + breakDataLength; if (length < 0) { return totalSize; } // // Check that length passed in is consistent with length from RBBI data header. // if (length < totalSize) { udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n", breakDataLength); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } // // Swap the Data. Do the data itself first, then the RBBI Data Header, because // we need to reference the header to locate the data, and an // inplace swap of the header leaves it unusable. // uint8_t * outBytes = (uint8_t *)outData + headerSize; RBBIDataHeader * outputDH = (RBBIDataHeader *)outBytes; int32_t tableStartOffset; int32_t tableLength; // // If not swapping in place, zero out the output buffer before starting. // Individual tables and other data items within are aligned to 8 byte boundaries // when originally created. Any unused space between items needs to be zero. // if (inBytes != outBytes) { uprv_memset(outBytes, 0, breakDataLength); } // // Each state table begins with several 32 bit fields. Calculate the size // in bytes of these. // int32_t topSize = offsetof(RBBIStateTable, fTableData); // Forward state table. tableStartOffset = ds->readUInt32(rbbiDH->fFTable); tableLength = ds->readUInt32(rbbiDH->fFTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Reverse state table. Same layout as forward table, above. tableStartOffset = ds->readUInt32(rbbiDH->fRTable); tableLength = ds->readUInt32(rbbiDH->fRTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Safe Forward state table. Same layout as forward table, above. tableStartOffset = ds->readUInt32(rbbiDH->fSFTable); tableLength = ds->readUInt32(rbbiDH->fSFTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Safe Reverse state table. Same layout as forward table, above. tableStartOffset = ds->readUInt32(rbbiDH->fSRTable); tableLength = ds->readUInt32(rbbiDH->fSRTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Trie table for character categories utrie_swap(ds, inBytes + ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), outBytes + ds->readUInt32(rbbiDH->fTrie), status); // Source Rules Text. It's UChar data ds->swapArray16(ds, inBytes + ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen), outBytes + ds->readUInt32(rbbiDH->fRuleSource), status); // Table of rule status values. It's all int_32 values ds->swapArray32(ds, inBytes + ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen), outBytes + ds->readUInt32(rbbiDH->fStatusTable), status); // And, last, the header. // It is all int32_t values except for fFormataVersion, which is an array of four bytes. // Swap the whole thing as int32_t, then re-swap the one field. // ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status); ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status); return totalSize; }
U_CAPI int32_t U_EXPORT2 ucnv_swapAliases(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint16_t *inTable; const uint32_t *inSectionSizes; uint32_t toc[offsetsCount]; uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ uint32_t i, count, tocLength, topOffset; TempRow rows[STACK_ROW_CAPACITY]; uint16_t resort[STACK_ROW_CAPACITY]; TempAliasTable tempTable; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ pInfo->dataFormat[1]==0x76 && pInfo->dataFormat[2]==0x41 && pInfo->dataFormat[3]==0x6c && pInfo->formatVersion[0]==3 )) { udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* an alias table must contain at least the table of contents array */ if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); inTable=(const uint16_t *)inSectionSizes; uprv_memset(toc, 0, sizeof(toc)); toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); if(tocLength<minTocLength || offsetsCount<=tocLength) { udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } /* read the known part of the table of contents */ for(i=converterListIndex; i<=tocLength; ++i) { toc[i]=ds->readUInt32(inSectionSizes[i]); } /* compute offsets */ uprv_memset(offsets, 0, sizeof(offsets)); offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ for(i=tagListIndex; i<=tocLength; ++i) { offsets[i]=offsets[i-1]+toc[i-1]; } /* compute the overall size of the after-header data, in numbers of 16-bit units */ topOffset=offsets[i-1]+toc[i-1]; if(length>=0) { uint16_t *outTable; const uint16_t *p, *p2; uint16_t *q, *q2; uint16_t oldIndex; if((length-headerSize)<(2*(int32_t)topOffset)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTable=(uint16_t *)((char *)outData+headerSize); /* swap the entire table of contents */ ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); /* swap unormalized strings & normalized strings */ ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), outTable+offsets[stringTableIndex], pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); return 0; } if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap all 16-bit values together */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); } else { /* allocate the temporary table for sorting */ count=toc[aliasListIndex]; tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ if(count<=STACK_ROW_CAPACITY) { tempTable.rows=rows; tempTable.resort=resort; } else { tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); if(tempTable.rows==NULL) { udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", count); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } tempTable.resort=(uint16_t *)(tempTable.rows+count); } if(ds->outCharset==U_ASCII_FAMILY) { tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; } else /* U_EBCDIC_FAMILY */ { tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; } /* * Sort unique aliases+mapped names. * * We need to sort the list again by outCharset strings because they * sort differently for different charset families. * First we set up a temporary table with the string indexes and * sorting indexes and sort that. * Then we permutate and copy/swap the actual values. */ p=inTable+offsets[aliasListIndex]; q=outTable+offsets[aliasListIndex]; p2=inTable+offsets[untaggedConvArrayIndex]; q2=outTable+offsets[untaggedConvArrayIndex]; for(i=0; i<count; ++i) { tempTable.rows[i].strIndex=ds->readUInt16(p[i]); tempTable.rows[i].sortIndex=(uint16_t)i; } uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), io_compareRows, &tempTable, FALSE, pErrorCode); if(U_SUCCESS(*pErrorCode)) { /* copy/swap/permutate items */ if(p!=q) { for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); } } else { /* * If we swap in-place, then the permutation must use another * temporary array (tempTable.resort) * before the results are copied to the outBundle. */ uint16_t *r=tempTable.resort; for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q, r, 2*count); for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q2, r, 2*count); } } if(tempTable.rows!=rows) { uprv_free(tempTable.rows); } if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", count); return 0; } /* swap remaining 16-bit values */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); ds->swapArray16(ds, inTable+offsets[taggedAliasArrayIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), outTable+offsets[taggedAliasArrayIndex], pErrorCode); } } return headerSize+2*(int32_t)topOffset; }
U_NAMESPACE_USE /* definitions */ /* Unicode property (value) aliases data swapping --------------------------- */ static int32_t U_CALLCONV upname_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { /* udata_swapDataHeader checks the arguments */ int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ const UDataInfo *pInfo= reinterpret_cast<const UDataInfo *>( static_cast<const char *>(inData)+4); if(!( pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==2 )) { udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; if(length>=0) { length-=headerSize; // formatVersion 2 initially has indexes[8], 32 bytes. if(length<32) { udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", (int)length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); if(length>=0) { if(length<totalSize) { udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " "for pnames.icu\n", (int)length, (int)totalSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } int32_t numBytesIndexesAndValueMaps= udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); // Swap the indexes[] and the valueMaps[]. ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); // Copy the rest of the data. if(inBytes!=outBytes) { uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, inBytes+numBytesIndexesAndValueMaps, totalSize-numBytesIndexesAndValueMaps); } // We need not swap anything else: // // The ByteTries are already byte-serialized, and are fixed on ASCII. // (On an EBCDIC machine, the input string is converted to lowercase ASCII // while matching.) // // The name groups are mostly invariant characters, but since we only // generate, and keep in subversion, ASCII versions of pnames.icu, // and since only ICU4J uses the pnames.icu data file // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, // we just copy those bytes too. } return headerSize+totalSize; }
U_CAPI int32_t U_EXPORT2 udata_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { char dataFormatChars[4]; const UDataInfo *pInfo; int32_t i, swappedLength; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* * Preflight the header first; checks for illegal arguments, too. * Do not swap the header right away because the format-specific swapper * will swap it, get the headerSize again, and also use the header * information. Otherwise we would have to pass some of the information * and not be able to use the UDataSwapFn signature. */ udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); /* * If we wanted udata_swap() to also handle non-loadable data like a UTrie, * then we could check here for further known magic values and structures. */ if(U_FAILURE(*pErrorCode)) { return 0; /* the data format was not recognized */ } pInfo=(const UDataInfo *)((const char *)inData+4); { /* convert the data format from ASCII to Unicode to the system charset */ UChar u[4]={ pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3] }; if(uprv_isInvariantUString(u, 4)) { u_UCharsToChars(u, dataFormatChars, 4); } else { dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; } } /* dispatch to the swap function for the dataFormat */ for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], dataFormatChars[0], dataFormatChars[1], dataFormatChars[2], dataFormatChars[3], u_errorName(*pErrorCode)); } else if(swappedLength<(length-15)) { /* swapped less than expected */ udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", swappedLength, length, pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], dataFormatChars[0], dataFormatChars[1], dataFormatChars[2], dataFormatChars[3], u_errorName(*pErrorCode)); } return swappedLength; } } /* the dataFormat was not recognized */ udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], dataFormatChars[0], dataFormatChars[1], dataFormatChars[2], dataFormatChars[3]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; }
static int32_t U_CALLCONV unorm_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[32]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ pInfo->dataFormat[1]==0x6f && pInfo->dataFormat[2]==0x72 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==2 )) { udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<32*4) { udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ for(i=0; i<32; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* calculate the total length of the data */ size= 32*4+ /* size of indexes[] */ indexes[_NORM_INDEX_TRIE_SIZE]+ indexes[_NORM_INDEX_UCHAR_COUNT]*2+ indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ indexes[_NORM_INDEX_CANON_SET_COUNT]*2; if(length>=0) { if(length<size) { udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the indexes[] */ count=32*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the main UTrie */ count=indexes[_NORM_INDEX_TRIE_SIZE]; utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the FCD UTrie */ count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; if(count!=0) { utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } /* swap the aux UTrie */ count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; if(count!=0) { utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } /* swap the uint16_t combiningTable[] */ count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } return headerSize+size; }
U_CDECL_END int32_t NameToEnum::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, uint8_t *temp, int32_t pos, UErrorCode *pErrorCode) { const NameToEnum *inMap; NameToEnum *outMap, *tempMap; const EnumValue *inEnumArray; EnumValue *outEnumArray; const Offset *inNameArray; Offset *outNameArray; NameAndIndex *sortArray; CompareContext cmp; int32_t i, size, oldIndex; tempMap=(NameToEnum *)(temp+pos); if(tempMap->count!=0) { /* this map was swapped already */ size=tempMap->getSize(); return size; } inMap=(const NameToEnum *)(inBytes+pos); outMap=(NameToEnum *)(outBytes+pos); tempMap->count=udata_readInt32(ds, inMap->count); size=tempMap->getSize(); if(length>=0) { if(length<(pos+size)) { if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n" " for pnames.icu NameToEnum[%d] at %d\n", length, tempMap->count, pos); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* swap count */ ds->swapArray32(ds, inMap, 4, outMap, pErrorCode); inEnumArray=inMap->getEnumArray(); outEnumArray=outMap->getEnumArray(); inNameArray=(const Offset *)(inEnumArray+tempMap->count); outNameArray=(Offset *)(outEnumArray+tempMap->count); if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap the enum/name arrays */ ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode); ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode); return size; } /* * The name and enum arrays are sorted by names and must be resorted * if inCharset!=outCharset. * We use the corresponding part of the temp array to sort an array * of pairs of name offsets and sorting indexes. * Then the sorting indexes are used to permutate-swap the name and enum arrays. * * The outBytes must already contain the swapped strings. */ sortArray=(NameAndIndex *)tempMap->getEnumArray(); for(i=0; i<tempMap->count; ++i) { sortArray[i].name=udata_readInt16(ds, inNameArray[i]); sortArray[i].index=(Offset)i; } /* * use a stable sort to avoid shuffling of equal strings, * which makes testing harder */ cmp.chars=(const char *)outBytes; if (ds->outCharset==U_ASCII_FAMILY) { cmp.propCompare=uprv_compareASCIIPropertyNames; } else { cmp.propCompare=uprv_compareEBCDICPropertyNames; } uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex), upname_compareRows, &cmp, TRUE, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n", tempMap->count); return 0; } /* copy/swap/permutate _enumArray[] and _nameArray[] */ if(inEnumArray!=outEnumArray) { for(i=0; i<tempMap->count; ++i) { oldIndex=sortArray[i].index; ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode); ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode); } } else { /* * in-place swapping: need to permutate into a temporary array * and then copy back to not destroy the data */ EnumValue *tempEnumArray; Offset *oldIndexes; /* write name offsets directly from sortArray */ for(i=0; i<tempMap->count; ++i) { ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name); } /* * compress the oldIndexes into a separate array to make space for tempEnumArray * the tempMap _nameArray becomes oldIndexes[], getting the index * values from the 2D sortArray[], * while sortArray=tempMap _enumArray[] becomes tempEnumArray[] * this saves us allocating more memory * * it works because sizeof(NameAndIndex)<=sizeof(EnumValue) * and because the nameArray[] can be used for oldIndexes[] */ tempEnumArray=(EnumValue *)sortArray; oldIndexes=(Offset *)(sortArray+tempMap->count); /* copy sortArray[].index values into oldIndexes[] */ for(i=0; i<tempMap->count; ++i) { oldIndexes[i]=sortArray[i].index; } /* permutate inEnumArray[] into tempEnumArray[] */ for(i=0; i<tempMap->count; ++i) { ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode); } /* copy tempEnumArray[] to outEnumArray[] */ uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4); } } return size; }
static int32_t U_CALLCONV uprops_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize, i; int32_t dataIndexes[UPROPS_INDEX_COUNT]; const int32_t *inData32; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ pInfo->dataFormat[1]==0x50 && pInfo->dataFormat[2]==0x72 && pInfo->dataFormat[3]==0x6f && (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && (pInfo->formatVersion[0]>=7 || (pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) )) { udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* the properties file must contain at least the indexes array */ if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* read the indexes */ inData32=(const int32_t *)((const char *)inData+headerSize); for(i=0; i<UPROPS_INDEX_COUNT; ++i) { dataIndexes[i]=udata_readInt32(ds, inData32[i]); } /* * comments are copied from the data format description in genprops/store.c * indexes[] constants are in uprops.h */ int32_t dataTop; if(length>=0) { int32_t *outData32; /* * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. * In earlier formatVersions, it is 0 and a lower dataIndexes entry * has the top of the last item. */ for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} if((length-headerSize)<(4*dataTop)) { udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outData32=(int32_t *)((char *)outData+headerSize); /* copy everything for inaccessible data (padding) */ if(inData32!=outData32) { uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); } /* swap the indexes[16] */ ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); /* * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) */ utrie2_swapAnyVersion(ds, inData32+UPROPS_INDEX_COUNT, 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), outData32+UPROPS_INDEX_COUNT, pErrorCode); /* * swap the properties and exceptions words * P const uint32_t props32[i1-i0]; * E const uint32_t exceptions[i2-i1]; */ ds->swapArray32(ds, inData32+dataIndexes[UPROPS_PROPS32_INDEX], 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), outData32+dataIndexes[UPROPS_PROPS32_INDEX], pErrorCode); /* * swap the UChars * U const UChar uchars[2*(i3-i2)]; */ ds->swapArray16(ds, inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], pErrorCode); /* * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties */ utrie2_swapAnyVersion(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], pErrorCode); /* * swap the properties vectors * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; */ ds->swapArray32(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], pErrorCode); // swap the Script_Extensions data // SCX const uint16_t scriptExtensions[2*(i7-i6)]; ds->swapArray16(ds, inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], pErrorCode); } /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; }
static int32_t U_CALLCONV ubidi_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[16]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ pInfo->dataFormat[1]==UBIDI_FMT_1 && pInfo->dataFormat[2]==UBIDI_FMT_2 && pInfo->dataFormat[3]==UBIDI_FMT_3 && ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || pInfo->formatVersion[0]==2) )) { udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<16*4) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ for(i=0; i<16; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ size=indexes[UBIDI_IX_LENGTH]; if(length>=0) { if(length<size) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the int32_t indexes[] */ count=indexes[UBIDI_IX_INDEX_TOP]*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* just skip the uint8_t jgArray[] and jgArray2[] */ count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; offset+=count; count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; offset+=count; U_ASSERT(offset==size); } return headerSize+size; }
int32_t PropertyAliases::swap(const UDataSwapper *ds, const uint8_t *inBytes, int32_t length, uint8_t *outBytes, UErrorCode *pErrorCode) { const PropertyAliases *inAliases; PropertyAliases *outAliases; PropertyAliases aliases; const ValueMap *inValueMaps; ValueMap *outValueMaps; ValueMap valueMap; uint8_t *temp; int32_t i; inAliases=(const PropertyAliases *)inBytes; outAliases=(PropertyAliases *)outBytes; /* read the input PropertyAliases - all 16-bit values */ for(i=0; i<(int32_t)sizeof(PropertyAliases)/2; ++i) { ((uint16_t *)&aliases)[i]=ds->readUInt16(((const uint16_t *)inBytes)[i]); } if(length>=0) { if(length<aliases.total_size) { udata_printError(ds, "upname_swap(): too few bytes (%d after header) for all of pnames.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, aliases.total_size); } /* swap the PropertyAliases class fields */ ds->swapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode); /* swap the name groups */ ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset, aliases.stringPool_offset-aliases.nameGroupPool_offset, outBytes+aliases.nameGroupPool_offset, pErrorCode); /* swap the strings */ udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset, aliases.total_size-aliases.stringPool_offset, outBytes+aliases.stringPool_offset, pErrorCode); /* * alloc uint8_t temp[total_size] and reset it * swap each top-level struct, put at least the count fields into temp * use subclass-specific swap() functions * enumerate value maps, for each * if temp does not have count!=0 yet * read count, put it into temp * swap the array(s) * resort strings in name->enum maps * swap value maps */ temp=(uint8_t *)uprv_malloc(aliases.total_size); if(temp==NULL) { udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n", aliases.total_size); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } uprv_memset(temp, 0, aliases.total_size); /* swap properties->name groups map */ NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes, temp, aliases.enumToName_offset, pErrorCode); /* swap name->properties map */ NameToEnum::swap(ds, inBytes, length, outBytes, temp, aliases.nameToEnum_offset, pErrorCode); /* swap properties->value maps map */ NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes, temp, aliases.enumToValue_offset, pErrorCode); /* enumerate all ValueMaps and swap them */ inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset); outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset); for(i=0; i<aliases.valueMap_count; ++i) { valueMap.enumToName_offset=udata_readInt16(ds, inValueMaps[i].enumToName_offset); valueMap.ncEnumToName_offset=udata_readInt16(ds, inValueMaps[i].ncEnumToName_offset); valueMap.nameToEnum_offset=udata_readInt16(ds, inValueMaps[i].nameToEnum_offset); if(valueMap.enumToName_offset!=0) { EnumToOffset::swap(ds, inBytes, length, outBytes, temp, valueMap.enumToName_offset, pErrorCode); } else if(valueMap.ncEnumToName_offset!=0) { NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes, temp, valueMap.ncEnumToName_offset, pErrorCode); } if(valueMap.nameToEnum_offset!=0) { NameToEnum::swap(ds, inBytes, length, outBytes, temp, valueMap.nameToEnum_offset, pErrorCode); } } /* swap the ValueMaps array itself */ ds->swapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap), outValueMaps, pErrorCode); /* name groups and strings were swapped above */ /* release temp */ uprv_free(temp); } return aliases.total_size; }
/* code adapted from ucnv_swap() */ static void ucnv_enumDependencies(const UDataSwapper *ds, const char *itemName, const UDataInfo *pInfo, const uint8_t *inBytes, int32_t length, CheckDependency check, void *context, UErrorCode *pErrorCode) { uint32_t staticDataSize; const UConverterStaticData *inStaticData; const _MBCSHeader *inMBCSHeader; uint8_t outputType; /* check format version */ if(!( pInfo->formatVersion[0]==6 && pInfo->formatVersion[1]>=2 )) { fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n", pInfo->formatVersion[0], pInfo->formatVersion[1]); exit(U_UNSUPPORTED_ERROR); } /* read the initial UConverterStaticData structure after the UDataInfo header */ inStaticData=(const UConverterStaticData *)inBytes; if( length<(int32_t)sizeof(UConverterStaticData) || (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) ) { udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } inBytes+=staticDataSize; length-=(int32_t)staticDataSize; /* check for supported conversionType values */ if(inStaticData->conversionType==UCNV_MBCS) { /* MBCS data */ uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions; int32_t extOffset; inMBCSHeader=(const _MBCSHeader *)inBytes; if(length<(int32_t)sizeof(_MBCSHeader)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))& MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 ) { mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK; } else { udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", inMBCSHeader->version[0], inMBCSHeader->version[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return; } mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags); extOffset=(int32_t)(mbcsHeaderFlags>>8); outputType=(uint8_t)mbcsHeaderFlags; if(outputType==MBCS_OUTPUT_EXT_ONLY) { /* * extension-only file, * contains a base name instead of normal base table data */ char baseName[32]; int32_t baseNameLength; /* there is extension data after the base data, see ucnv_ext.h */ if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } /* swap the base name, between the header and the extension data */ const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4; baseNameLength=(int32_t)strlen(inBaseName); if(baseNameLength>=(int32_t)sizeof(baseName)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", itemName, baseNameLength); *pErrorCode=U_UNSUPPORTED_ERROR; return; } ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode); checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); } }
U_CAPI int32_t U_EXPORT2 ucnv_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; uint32_t offset, count, staticDataSize; int32_t size; const UConverterStaticData *inStaticData; UConverterStaticData *outStaticData; const _MBCSHeader *inMBCSHeader; _MBCSHeader *outMBCSHeader; _MBCSHeader mbcsHeader; uint8_t outputType; const int32_t *inExtIndexes; int32_t extOffset; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x74 && pInfo->formatVersion[0]==6 && pInfo->formatVersion[1]>=2 )) { udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; /* read the initial UConverterStaticData structure after the UDataInfo header */ inStaticData=(const UConverterStaticData *)inBytes; outStaticData=(UConverterStaticData *)outBytes; if(length<0) { staticDataSize=ds->readUInt32(inStaticData->structSize); } else { length-=headerSize; if( length<sizeof(UConverterStaticData) || (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) ) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } if(length>=0) { /* swap the static data */ if(inStaticData!=outStaticData) { uprv_memcpy(outStaticData, inStaticData, staticDataSize); } ds->swapArray32(ds, &inStaticData->structSize, 4, &outStaticData->structSize, pErrorCode); ds->swapArray32(ds, &inStaticData->codepage, 4, &outStaticData->codepage, pErrorCode); ds->swapInvChars(ds, inStaticData->name, uprv_strlen(inStaticData->name), outStaticData->name, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swap(): error swapping converter name - %s\n", u_errorName(*pErrorCode)); return 0; } } inBytes+=staticDataSize; outBytes+=staticDataSize; if(length>=0) { length-=(int32_t)staticDataSize; } /* check for supported conversionType values */ if(inStaticData->conversionType==UCNV_MBCS) { /* swap MBCS data */ inMBCSHeader=(const _MBCSHeader *)inBytes; outMBCSHeader=(_MBCSHeader *)outBytes; if(!(inMBCSHeader->version[0]==4 || inMBCSHeader->version[1]>=1)) { udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", inMBCSHeader->version[0], inMBCSHeader->version[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); extOffset=(int32_t)mbcsHeader.flags>>8; outputType=(uint8_t)mbcsHeader.flags; /* make sure that the output type is known */ switch(outputType) { case MBCS_OUTPUT_1: case MBCS_OUTPUT_2: case MBCS_OUTPUT_3: case MBCS_OUTPUT_4: case MBCS_OUTPUT_3_EUC: case MBCS_OUTPUT_4_EUC: case MBCS_OUTPUT_2_SISO: case MBCS_OUTPUT_EXT_ONLY: /* OK */ break; default: udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", outputType); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* calculate the length of the MBCS data */ if(extOffset==0) { size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsHeader.fromUBytesLength); /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ inExtIndexes=NULL; } else { /* there is extension data after the base data, see ucnv_ext.h */ if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inExtIndexes=(const int32_t *)(inBytes+extOffset); size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); } if(length>=0) { if(length<size) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } /* swap the _MBCSHeader */ ds->swapArray32(ds, &inMBCSHeader->countStates, 7*4, &outMBCSHeader->countStates, pErrorCode); if(outputType==MBCS_OUTPUT_EXT_ONLY) { /* * extension-only file, * contains a base name instead of normal base table data */ /* swap the base name, between the header and the extension data */ ds->swapInvChars(ds, inMBCSHeader+1, uprv_strlen((const char *)(inMBCSHeader+1)), outMBCSHeader+1, pErrorCode); } else { /* normal file with base table data */ /* swap the state table, 1kB per state */ ds->swapArray32(ds, inMBCSHeader+1, (int32_t)(mbcsHeader.countStates*1024), outMBCSHeader+1, pErrorCode); /* swap the toUFallbacks[] */ offset=sizeof(_MBCSHeader)+mbcsHeader.countStates*1024; ds->swapArray32(ds, inBytes+offset, (int32_t)(mbcsHeader.countToUFallbacks*8), outBytes+offset, pErrorCode); /* swap the unicodeCodeUnits[] */ offset=mbcsHeader.offsetToUCodeUnits; count=mbcsHeader.offsetFromUTable-offset; ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* offset to the stage 1 table, independent of the outputType */ offset=mbcsHeader.offsetFromUTable; if(outputType==MBCS_OUTPUT_1) { /* SBCS: swap the fromU tables, all 16 bits wide */ count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); } else { /* otherwise: swap the stage tables separately */ /* stage 1 table: uint16_t[0x440 or 0x40] */ if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { count=0x440*2; /* for all of Unicode */ } else { count=0x40*2; /* only BMP */ } ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* stage 2 table: uint32_t[] */ offset+=count; count=mbcsHeader.offsetFromUBytes-offset; ds->swapArray32(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ offset=mbcsHeader.offsetFromUBytes; count=mbcsHeader.fromUBytesLength; switch(outputType) { case MBCS_OUTPUT_2: case MBCS_OUTPUT_3_EUC: case MBCS_OUTPUT_2_SISO: ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); break; case MBCS_OUTPUT_4: ds->swapArray32(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); break; default: /* just uint8_t[], nothing to swap */ break; } } } if(extOffset!=0) { /* swap the extension data */ inBytes+=extOffset; outBytes+=extOffset; /* swap toUTable[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* swap toUUChars[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUTableUChars[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUTableValues[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); /* same length as for fromUTableUChars[] */ ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* no need to swap fromUBytes[] */ /* swap fromUStage12[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUStage3[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUStage3b[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* swap indexes[] */ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); } } } else {
/* swap inverse UCA collation data (invuca.icu) */ U_CAPI int32_t U_EXPORT2 ucol_swapInverseUCA(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const InverseUCATableHeader *inHeader; InverseUCATableHeader *outHeader; InverseUCATableHeader header={ 0 }; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x43 && pInfo->formatVersion[0]==2 && pInfo->formatVersion[1]>=1 )) { udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inHeader=(const InverseUCATableHeader *)inBytes; outHeader=(InverseUCATableHeader *)outBytes; /* * The inverse UCA collation binary must contain at least the InverseUCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==8*4 in ICU 2.8 * check the length against the header size before reading the size field */ if(length<0) { header.byteSize=udata_readInt32(ds, inHeader->byteSize); } else if( ((length-headerSize)<(8*4) || (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) ) { udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, header.byteSize); } /* swap the necessary pieces in the order of their occurrence in the data */ /* read more of the InverseUCATableHeader (the byteSize field was read above) */ header.tableSize= ds->readUInt32(inHeader->tableSize); header.contsSize= ds->readUInt32(inHeader->contsSize); header.table= ds->readUInt32(inHeader->table); header.conts= ds->readUInt32(inHeader->conts); /* swap the 32-bit integers in the header */ ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); /* swap the inverse table; tableSize counts uint32_t[3] rows */ ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, outBytes+header.table, pErrorCode); /* swap the continuation table; contsSize counts UChars */ ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, outBytes+header.conts, pErrorCode); } return headerSize+header.byteSize; }
U_NAMESPACE_END U_NAMESPACE_USE //----------------------------------------------------------------------------- // // uspoof_swap - byte swap and char encoding swap of spoof data // //----------------------------------------------------------------------------- U_CAPI int32_t U_EXPORT2 uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *status) { if (status == NULL || U_FAILURE(*status)) { return 0; } if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { *status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } // // Check that the data header is for spoof data. // (Header contents are defined in gencfu.cpp) // const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */ pInfo->dataFormat[1]==0x66 && pInfo->dataFormat[2]==0x75 && pInfo->dataFormat[3]==0x20 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x " "(format version %02x %02x %02x %02x) is not recognized\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1], pInfo->formatVersion[2], pInfo->formatVersion[3]); *status=U_UNSUPPORTED_ERROR; return 0; } // // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific // header). This swap also conveniently gets us // the size of the ICU d.h., which lets us locate the start // of the uspoof specific data. // int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); // // Get the Spoof Data Header, and check that it appears to be OK. // // const uint8_t *inBytes =(const uint8_t *)inData+headerSize; SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes; if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC || ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader)) { udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n"); *status=U_UNSUPPORTED_ERROR; return 0; } // // Prefight operation? Just return the size // int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength); int32_t totalSize = headerSize + spoofDataLength; if (length < 0) { return totalSize; } // // Check that length passed in is consistent with length from Spoof data header. // if (length < totalSize) { udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n", spoofDataLength); *status=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } // // Swap the Data. Do the data itself first, then the Spoof Data Header, because // we need to reference the header to locate the data, and an // inplace swap of the header leaves it unusable. // uint8_t *outBytes = (uint8_t *)outData + headerSize; SpoofDataHeader *outputDH = (SpoofDataHeader *)outBytes; int32_t sectionStart; int32_t sectionLength; // // If not swapping in place, zero out the output buffer before starting. // Gaps may exist between the individual sections, and these must be zeroed in // the output buffer. The simplest way to do that is to just zero the whole thing. // if (inBytes != outBytes) { uprv_memset(outBytes, 0, spoofDataLength); } // Confusables Keys Section (fCFUKeys) sectionStart = ds->readUInt32(spoofDH->fCFUKeys); sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4; ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // String Index Section sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex); sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2; ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // String Table Section sectionStart = ds->readUInt32(spoofDH->fCFUStringTable); sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2; ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // String Lengths Section sectionStart = ds->readUInt32(spoofDH->fCFUStringLengths); sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4; ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // Any Case Trie sectionStart = ds->readUInt32(spoofDH->fAnyCaseTrie); sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength); utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // Lower Case Trie sectionStart = ds->readUInt32(spoofDH->fLowerCaseTrie); sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength); utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // Script Sets. The data is an array of int32_t sectionStart = ds->readUInt32(spoofDH->fScriptSets); sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet); ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // And, last, swap the header itself. // int32_t fMagic // swap this // uint8_t fFormatVersion[4] // Do not swap this, just copy // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff. // uint32_t magic = ds->readUInt32(spoofDH->fMagic); ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic); if (outputDH->fFormatVersion != spoofDH->fFormatVersion) { uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion)); } // swap starting at fLength ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status); return totalSize; }
U_NAMESPACE_END U_NAMESPACE_USE U_CAPI int32_t U_EXPORT2 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[DictionaryData::IX_COUNT]; int32_t i, offset, size; headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0; pInfo = (const UDataInfo *)((const char *)inData + 4); if (!(pInfo->dataFormat[0] == 0x44 && pInfo->dataFormat[1] == 0x69 && pInfo->dataFormat[2] == 0x63 && pInfo->dataFormat[3] == 0x74 && pInfo->formatVersion[0] == 1)) { udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode = U_UNSUPPORTED_ERROR; return 0; } inBytes = (const uint8_t *)inData + headerSize; outBytes = (uint8_t *)outData + headerSize; inIndexes = (const int32_t *)inBytes; if (length >= 0) { length -= headerSize; if (length < (int32_t)(sizeof(indexes))) { udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length); *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } for (i = 0; i < DictionaryData::IX_COUNT; i++) { indexes[i] = udata_readInt32(ds, inIndexes[i]); } size = indexes[DictionaryData::IX_TOTAL_SIZE]; if (length >= 0) { if (length < size) { udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length); *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if (inBytes != outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset = 0; ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode); offset = (int32_t)sizeof(indexes); int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET]; if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode); } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) { // nothing to do } else { udata_printError(ds, "udict_swap(): unknown trie type!\n"); *pErrorCode = U_UNSUPPORTED_ERROR; return 0; } // these next two sections are empty in the current format, // but may be used later. offset = nextOffset; nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; offset = nextOffset; nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; offset = nextOffset; } return headerSize + size; }
/* swap a header-less collation binary, inside a resource bundle or ucadata.icu */ U_CAPI int32_t U_EXPORT2 ucol_swapBinary(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const uint8_t *inBytes; uint8_t *outBytes; const UCATableHeader *inHeader; UCATableHeader *outHeader; UCATableHeader header={ 0 }; uint32_t count; /* argument checking in case we were not called from ucol_swap() */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } inBytes=(const uint8_t *)inData; outBytes=(uint8_t *)outData; inHeader=(const UCATableHeader *)inData; outHeader=(UCATableHeader *)outData; /* * The collation binary must contain at least the UCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==42*4 in ICU 2.8 * check the length against the header size before reading the size field */ if(length<0) { header.size=udata_readInt32(ds, inHeader->size); } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } header.magic=ds->readUInt32(inHeader->magic); if(!( header.magic==UCOL_HEADER_MAGIC && inHeader->formatVersion[0]==2 && inHeader->formatVersion[1]>=3 )) { udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n", header.magic, inHeader->formatVersion[0], inHeader->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n", inHeader->isBigEndian, inHeader->charSetFamily); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, header.size); } /* swap the necessary pieces in the order of their occurrence in the data */ /* read more of the UCATableHeader (the size field was read above) */ header.options= ds->readUInt32(inHeader->options); header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); header.expansion= ds->readUInt32(inHeader->expansion); header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); header.contractionSize= ds->readUInt32(inHeader->contractionSize); header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); /* swap the 32-bit integers in the header */ ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), outHeader, pErrorCode); /* set the output platform properties */ outHeader->isBigEndian=ds->outIsBigEndian; outHeader->charSetFamily=ds->outCharset; /* swap the options */ if(header.options!=0) { ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, outBytes+header.options, pErrorCode); } /* swap the expansions */ if(header.mappingPosition!=0 && header.expansion!=0) { if(header.contractionIndex!=0) { /* expansions bounded by contractions */ count=header.contractionIndex-header.expansion; } else { /* no contractions: expansions bounded by the main trie */ count=header.mappingPosition-header.expansion; } ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, outBytes+header.expansion, pErrorCode); } /* swap the contractions */ if(header.contractionSize!=0) { /* contractionIndex: UChar[] */ ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, outBytes+header.contractionIndex, pErrorCode); /* contractionCEs: CEs[] */ ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, outBytes+header.contractionCEs, pErrorCode); } /* swap the main trie */ if(header.mappingPosition!=0) { count=header.endExpansionCE-header.mappingPosition; utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, outBytes+header.mappingPosition, pErrorCode); } /* swap the max expansion table */ if(header.endExpansionCECount!=0) { ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, outBytes+header.endExpansionCE, pErrorCode); } /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ /* swap UCA constants */ if(header.UCAConsts!=0) { /* * if UCAConsts!=0 then contractionUCACombos because we are swapping * the UCA data file, and we know that the UCA contains contractions */ count=header.contractionUCACombos-header.UCAConsts; ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, outBytes+header.UCAConsts, pErrorCode); } /* swap UCA contractions */ if(header.contractionUCACombosSize!=0) { count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, outBytes+header.contractionUCACombos, pErrorCode); } } return header.size; }
/** * swap a selector into the desired Endianness and Asciiness of * the system. Just as FYI, selectors are always saved in the format * of the system that created them. They are only converted if used * on another system. In other words, selectors created on different * system can be different even if the params are identical (endianness * and Asciiness differences only) * * @param ds pointer to data swapper containing swapping info * @param inData pointer to incoming data * @param length length of inData in bytes * @param outData pointer to output data. Capacity should * be at least equal to capacity of inData * @param status an in/out ICU UErrorCode * @return 0 on failure, number of bytes swapped on success * number of bytes swapped can be smaller than length */ static int32_t ucnvsel_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *status) { /* udata_swapDataHeader checks the arguments */ int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); if(U_FAILURE(*status)) { return 0; } /* check data format and format version */ const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); if(!( pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ pInfo->dataFormat[1] == 0x53 && pInfo->dataFormat[2] == 0x65 && pInfo->dataFormat[3] == 0x6c )) { udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3]); *status = U_INVALID_FORMAT_ERROR; return 0; } if(pInfo->formatVersion[0] != 1) { udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", pInfo->formatVersion[0]); *status = U_UNSUPPORTED_ERROR; return 0; } if(length >= 0) { length -= headerSize; if(length < 16*4) { udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", length); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } const uint8_t *inBytes = (const uint8_t *)inData + headerSize; uint8_t *outBytes = (uint8_t *)outData + headerSize; /* read the indexes */ const int32_t *inIndexes = (const int32_t *)inBytes; int32_t indexes[16]; int32_t i; for(i = 0; i < 16; ++i) { indexes[i] = udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ int32_t size = indexes[UCNVSEL_INDEX_SIZE]; if(length >= 0) { if(length < size) { udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", length); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes != outBytes) { uprv_memcpy(outBytes, inBytes, size); } int32_t offset = 0, count; /* swap the int32_t indexes[] */ count = UCNVSEL_INDEX_COUNT*4; ds->swapArray32(ds, inBytes, count, outBytes, status); offset += count; /* swap the UTrie2 */ count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); offset += count; /* swap the uint32_t pv[] */ count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); offset += count; /* swap the encoding names */ count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); offset += count; U_ASSERT(offset == size); } return headerSize + size; }
/* * Enumerate one resource item and its children and extract dependencies from * aliases. * Code adapted from ures_preflightResource() and ures_swapResource(). */ static void ures_enumDependencies(const UDataSwapper *ds, const char *itemName, const Resource *inBundle, int32_t length, Resource res, const char *inKey, int32_t depth, CheckDependency check, void *context, UErrorCode *pErrorCode) { const Resource *p; int32_t offset; if(res==0 || RES_GET_TYPE(res)==URES_INT) { /* empty string or integer, nothing to do */ return; } /* all other types use an offset to point to their data */ offset=(int32_t)RES_GET_OFFSET(res); if(0<=length && length<=offset) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n", itemName, res, length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } p=inBundle+offset; switch(RES_GET_TYPE(res)) { /* strings and aliases have physically the same value layout */ case URES_STRING: // we ignore all strings except top-level strings with a %%ALIAS key if(depth!=1) { break; } else { char key[8]; int32_t keyLength; keyLength=(int32_t)strlen(inKey); if(keyLength!=gAliasKeyLength) { break; } ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n", itemName, res); return; } if(0!=strcmp(key, gAliasKey)) { break; } } // for the top-level %%ALIAS string fall through to URES_ALIAS case URES_ALIAS: { char localeID[32]; const uint16_t *p16; int32_t i, stringLength; uint16_t u16, ored16; stringLength=udata_readInt32(ds, (int32_t)*p); /* top=offset+1+(string length +1)/2 rounded up */ offset+=1+((stringLength+1)+1)/2; if(offset>length) { break; // the resource does not fit into the bundle, print error below } // extract the locale ID from alias strings like // locale_ID/key1/key2/key3 // locale_ID if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { u16=0x2f; // slash in local endianness } else { u16=0x2f00; // slash in opposite endianness } p16=(const uint16_t *)(p+1); // Unicode string contents // search for the first slash for(i=0; i<stringLength && p16[i]!=u16; ++i) {} if(RES_GET_TYPE(res)==URES_ALIAS) { // ignore aliases with an initial slash: // /ICUDATA/... and /pkgname/... go to a different package // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle if(i==0) { break; // initial slash ('/') } // ignore the intra-bundle path starting from the first slash ('/') stringLength=i; } else /* URES_STRING */ { // the whole string should only consist of a locale ID if(i!=stringLength) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", itemName, res); *pErrorCode=U_UNSUPPORTED_ERROR; return; } } // convert the Unicode string to char * and // check that it has a bundle path but no package if(stringLength>=(int32_t)sizeof(localeID)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", itemName, res, stringLength); *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return; } // convert the alias Unicode string to US-ASCII ored16=0; if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { for(i=0; i<stringLength; ++i) { u16=p16[i]; ored16|=u16; localeID[i]=(char)u16; } } else { for(i=0; i<stringLength; ++i) { u16=p16[i]; ored16|=u16; localeID[i]=(char)(u16>>8); } ored16=(uint16_t)((ored16<<8)|(ored16>>8)); } localeID[stringLength]=0; if(ored16>0x7f) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n", itemName, res); *pErrorCode=U_INVALID_CHAR_FOUND; return; } #if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) // swap to EBCDIC // our swapper is probably not the right one, but // the function uses it only for printing errors uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } #endif #if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY # error Unknown U_CHARSET_FAMILY value! #endif checkIDSuffix(itemName, localeID, -1, ".res", check, context, pErrorCode); } break; case URES_TABLE: case URES_TABLE32: { const uint16_t *pKey16; const int32_t *pKey32; Resource item; int32_t i, count; if(RES_GET_TYPE(res)==URES_TABLE) { /* get table item count */ pKey16=(const uint16_t *)p; count=ds->readUInt16(*pKey16++); pKey32=NULL; /* top=((1+ table item count)/2 rounded up)+(table item count) */ offset+=((1+count)+1)/2; } else { /* get table item count */ pKey32=(const int32_t *)p; count=udata_readInt32(ds, *pKey32++); pKey16=NULL; /* top=(1+ table item count)+(table item count) */ offset+=1+count; } p=inBundle+offset; /* pointer to table resources */ offset+=count; if(offset>length) { break; // the resource does not fit into the bundle, print error below } /* recurse */ for(i=0; i<count; ++i) { item=ds->readUInt32(*p++); ures_enumDependencies( ds, itemName, inBundle, length, item, ((const char *)inBundle)+ (pKey16!=NULL ? ds->readUInt16(pKey16[i]) : udata_readInt32(ds, pKey32[i])), depth+1, check, context, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n", itemName, res, i, item); break; } } } break; case URES_ARRAY: { Resource item; int32_t i, count; /* top=offset+1+(array length) */ count=udata_readInt32(ds, (int32_t)*p++); offset+=1+count; if(offset>length) { break; // the resource does not fit into the bundle, print error below } /* recurse */ for(i=0; i<count; ++i) { item=ds->readUInt32(*p++); ures_enumDependencies( ds, itemName, inBundle, length, item, NULL, depth+1, check, context, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", itemName, res, i, item); break; } } } break; default: break; } if(U_FAILURE(*pErrorCode)) { /* nothing to do */ } else if(0<=length && length<offset) { udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n", itemName, res, length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; } }