/* Swap 'Test' data from gentest */ static int32_t U_CALLCONV test_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; int32_t offset; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ pInfo->dataFormat[1]==0x65 && pInfo->dataFormat[2]==0x73 && pInfo->dataFormat[3]==0x74 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; int32_t size16 = 2; // 16bit plus padding int32_t sizeStr = 5; // 4 char inv-str plus null int32_t size = size16 + sizeStr; if(length>=0) { if(length<size) { udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", length, size); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } offset =0; /* swap a 1 entry array */ ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); offset+=size16; ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); } return headerSize+size; }
U_CAPI int32_t U_EXPORT2 upname_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; if(length>=0) { length-=headerSize; if(length<(int32_t)sizeof(PropertyAliases)) { udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode); }
/* swap ICU collation data like ucadata.icu */ U_CAPI int32_t U_EXPORT2 ucol_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize, collationSize; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */ pInfo->dataFormat[1]==0x43 && pInfo->dataFormat[2]==0x6f && pInfo->dataFormat[3]==0x6c && pInfo->formatVersion[0]==2 && pInfo->formatVersion[1]>=3 )) { udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } collationSize=ucol_swapBinary(ds, (const char *)inData+headerSize, length>=0 ? length-headerSize : -1, (char *)outData+headerSize, pErrorCode); if(U_SUCCESS(*pErrorCode)) { return headerSize+collationSize; } else { return 0; } }
U_NAMESPACE_END U_NAMESPACE_USE //----------------------------------------------------------------------------- // // ubrk_swap - byte swap and char encoding swap of RBBI data // //----------------------------------------------------------------------------- U_CAPI int32_t U_EXPORT2 ubrk_swap(const UDataSwapper * ds, const void * inData, int32_t length, void * outData, UErrorCode * status) { if (status == NULL || U_FAILURE(*status)) { return 0; } if (ds == NULL || inData == NULL || length < -1 || (length > 0 && outData == NULL)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } // // Check that the data header is for for break data. // (Header contents are defined in genbrk.cpp) // const UDataInfo * pInfo = (const UDataInfo *)((const char *)inData + 4); if (!(pInfo->dataFormat[0] == 0x42 && /* dataFormat="Brk " */ pInfo->dataFormat[1] == 0x72 && pInfo->dataFormat[2] == 0x6b && pInfo->dataFormat[3] == 0x20 && pInfo->formatVersion[0] == 3)) { udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *status = U_UNSUPPORTED_ERROR; return 0; } // // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific // RBBIDataHeader). This swap also conveniently gets us // the size of the ICU d.h., which lets us locate the start // of the RBBI specific data. // int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); // // Get the RRBI Data Header, and check that it appears to be OK. // // Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually // an int32_t with a value of 1. Starting with ICU 3.4, // RBBI's fDataFormat matches the dataFormat field from the // UDataInfo header, four int8_t bytes. The value is {3,1,0,0} // const uint8_t * inBytes = (const uint8_t *)inData + headerSize; RBBIDataHeader * rbbiDH = (RBBIDataHeader *)inBytes; if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || rbbiDH->fFormatVersion[0] != 3 || ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) { udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); *status = U_UNSUPPORTED_ERROR; return 0; } // // Prefight operation? Just return the size // int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength); int32_t totalSize = headerSize + breakDataLength; if (length < 0) { return totalSize; } // // Check that length passed in is consistent with length from RBBI data header. // if (length < totalSize) { udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n", breakDataLength); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } // // Swap the Data. Do the data itself first, then the RBBI Data Header, because // we need to reference the header to locate the data, and an // inplace swap of the header leaves it unusable. // uint8_t * outBytes = (uint8_t *)outData + headerSize; RBBIDataHeader * outputDH = (RBBIDataHeader *)outBytes; int32_t tableStartOffset; int32_t tableLength; // // If not swapping in place, zero out the output buffer before starting. // Individual tables and other data items within are aligned to 8 byte boundaries // when originally created. Any unused space between items needs to be zero. // if (inBytes != outBytes) { uprv_memset(outBytes, 0, breakDataLength); } // // Each state table begins with several 32 bit fields. Calculate the size // in bytes of these. // int32_t topSize = offsetof(RBBIStateTable, fTableData); // Forward state table. tableStartOffset = ds->readUInt32(rbbiDH->fFTable); tableLength = ds->readUInt32(rbbiDH->fFTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Reverse state table. Same layout as forward table, above. tableStartOffset = ds->readUInt32(rbbiDH->fRTable); tableLength = ds->readUInt32(rbbiDH->fRTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Safe Forward state table. Same layout as forward table, above. tableStartOffset = ds->readUInt32(rbbiDH->fSFTable); tableLength = ds->readUInt32(rbbiDH->fSFTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Safe Reverse state table. Same layout as forward table, above. tableStartOffset = ds->readUInt32(rbbiDH->fSRTable); tableLength = ds->readUInt32(rbbiDH->fSRTableLen); if (tableLength > 0) { ds->swapArray32(ds, inBytes + tableStartOffset, topSize, outBytes + tableStartOffset, status); ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize, outBytes + tableStartOffset + topSize, status); } // Trie table for character categories utrie_swap(ds, inBytes + ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), outBytes + ds->readUInt32(rbbiDH->fTrie), status); // Source Rules Text. It's UChar data ds->swapArray16(ds, inBytes + ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen), outBytes + ds->readUInt32(rbbiDH->fRuleSource), status); // Table of rule status values. It's all int_32 values ds->swapArray32(ds, inBytes + ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen), outBytes + ds->readUInt32(rbbiDH->fStatusTable), status); // And, last, the header. // It is all int32_t values except for fFormataVersion, which is an array of four bytes. // Swap the whole thing as int32_t, then re-swap the one field. // ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status); ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status); return totalSize; }
U_CAPI int32_t U_EXPORT2 udata_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { char dataFormatChars[4]; const UDataInfo *pInfo; int32_t i, swappedLength; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* * Preflight the header first; checks for illegal arguments, too. * Do not swap the header right away because the format-specific swapper * will swap it, get the headerSize again, and also use the header * information. Otherwise we would have to pass some of the information * and not be able to use the UDataSwapFn signature. */ udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); /* * If we wanted udata_swap() to also handle non-loadable data like a UTrie, * then we could check here for further known magic values and structures. */ if(U_FAILURE(*pErrorCode)) { return 0; /* the data format was not recognized */ } pInfo=(const UDataInfo *)((const char *)inData+4); { /* convert the data format from ASCII to Unicode to the system charset */ UChar u[4]={ pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3] }; if(uprv_isInvariantUString(u, 4)) { u_UCharsToChars(u, dataFormatChars, 4); } else { dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; } } /* dispatch to the swap function for the dataFormat */ for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], dataFormatChars[0], dataFormatChars[1], dataFormatChars[2], dataFormatChars[3], u_errorName(*pErrorCode)); } else if(swappedLength<(length-15)) { /* swapped less than expected */ udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", swappedLength, length, pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], dataFormatChars[0], dataFormatChars[1], dataFormatChars[2], dataFormatChars[3], u_errorName(*pErrorCode)); } return swappedLength; } } /* the dataFormat was not recognized */ udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], dataFormatChars[0], dataFormatChars[1], dataFormatChars[2], dataFormatChars[3]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; }
U_NAMESPACE_USE /* definitions */ /* Unicode property (value) aliases data swapping --------------------------- */ static int32_t U_CALLCONV upname_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { /* udata_swapDataHeader checks the arguments */ int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ const UDataInfo *pInfo= reinterpret_cast<const UDataInfo *>( static_cast<const char *>(inData)+4); if(!( pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x61 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==2 )) { udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; if(length>=0) { length-=headerSize; // formatVersion 2 initially has indexes[8], 32 bytes. if(length<32) { udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", (int)length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); if(length>=0) { if(length<totalSize) { udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " "for pnames.icu\n", (int)length, (int)totalSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } int32_t numBytesIndexesAndValueMaps= udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); // Swap the indexes[] and the valueMaps[]. ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); // Copy the rest of the data. if(inBytes!=outBytes) { uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, inBytes+numBytesIndexesAndValueMaps, totalSize-numBytesIndexesAndValueMaps); } // We need not swap anything else: // // The ByteTries are already byte-serialized, and are fixed on ASCII. // (On an EBCDIC machine, the input string is converted to lowercase ASCII // while matching.) // // The name groups are mostly invariant characters, but since we only // generate, and keep in subversion, ASCII versions of pnames.icu, // and since only ICU4J uses the pnames.icu data file // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, // we just copy those bytes too. } return headerSize+totalSize; }
static int32_t U_CALLCONV unorm_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[32]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ pInfo->dataFormat[1]==0x6f && pInfo->dataFormat[2]==0x72 && pInfo->dataFormat[3]==0x6d && pInfo->formatVersion[0]==2 )) { udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<32*4) { udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ for(i=0; i<32; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* calculate the total length of the data */ size= 32*4+ /* size of indexes[] */ indexes[_NORM_INDEX_TRIE_SIZE]+ indexes[_NORM_INDEX_UCHAR_COUNT]*2+ indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ indexes[_NORM_INDEX_CANON_SET_COUNT]*2; if(length>=0) { if(length<size) { udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the indexes[] */ count=32*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the main UTrie */ count=indexes[_NORM_INDEX_TRIE_SIZE]; utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the FCD UTrie */ count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; if(count!=0) { utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } /* swap the aux UTrie */ count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; if(count!=0) { utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } /* swap the uint16_t combiningTable[] */ count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; } return headerSize+size; }
static int32_t U_CALLCONV ubidi_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[16]; int32_t i, offset, count, size; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ pInfo->dataFormat[1]==UBIDI_FMT_1 && pInfo->dataFormat[2]==UBIDI_FMT_2 && pInfo->dataFormat[3]==UBIDI_FMT_3 && ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || pInfo->formatVersion[0]==2) )) { udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inIndexes=(const int32_t *)inBytes; if(length>=0) { length-=headerSize; if(length<16*4) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ for(i=0; i<16; ++i) { indexes[i]=udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ size=indexes[UBIDI_IX_LENGTH]; if(length>=0) { if(length<size) { udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset=0; /* swap the int32_t indexes[] */ count=indexes[UBIDI_IX_INDEX_TOP]*4; ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); offset+=count; /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* just skip the uint8_t jgArray[] and jgArray2[] */ count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; offset+=count; count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; offset+=count; U_ASSERT(offset==size); } return headerSize+size; }
void Package::writePackage(const char *filename, char outType, const char *comment) { char prefix[MAX_PKG_NAME_LENGTH+4]; UDataOffsetTOCEntry entry; UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; FILE *file; Item *pItem; char *name; UErrorCode errorCode; int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; uint8_t outCharset; UBool outIsBigEndian; extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); // if there is an explicit comment, then use it, else use what's in the current header if(comment!=NULL) { /* get the header size minus the current comment */ DataHeader *pHeader; int32_t length; pHeader=(DataHeader *)header; headerLength=4+pHeader->info.size; length=(int32_t)strlen(comment); if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { fprintf(stderr, "icupkg: comment too long\n"); exit(U_BUFFER_OVERFLOW_ERROR); } memcpy(header+headerLength, comment, length+1); headerLength+=length; if(headerLength&0xf) { /* NUL-pad the header to a multiple of 16 */ length=(headerLength+0xf)&~0xf; memset(header+headerLength, 0, length-headerLength); headerLength=length; } pHeader->dataHeader.headerSize=(uint16_t)headerLength; } makeTypeProps(outType, outCharset, outIsBigEndian); // open (TYPE_COUNT-2) swappers // one is a no-op for local type==outType // one type (TYPE_LE) is bogus errorCode=U_ZERO_ERROR; i=makeTypeEnum(outType); ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); ds[TYPE_LE]=NULL; ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } for(i=0; i<TYPE_COUNT; ++i) { if(ds[i]!=NULL) { ds[i]->printError=printPackageError; ds[i]->printErrorContext=stderr; } } dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; // create the file and write its contents file=fopen(filename, "wb"); if(file==NULL) { fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // swap and write the header if(dsLocalToOut!=NULL) { udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(header, 1, headerLength, file); if(length!=headerLength) { fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // prepare and swap the package name with a tree separator // for prepending to item names strcat(prefix, U_TREE_ENTRY_SEP_STRING); prefixLength=(int32_t)strlen(prefix); if(dsLocalToOut!=NULL) { dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } // swap and sort the item names (sorting needs to be done in the output charset) dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } sortItems(); } // create the output item names in sorted order, with the package name prepended to each for(i=0; i<itemCount; ++i) { length=(int32_t)strlen(items[i].name); name=allocString(FALSE, length+prefixLength); memcpy(name, prefix, prefixLength); memcpy(name+prefixLength, items[i].name, length+1); items[i].name=name; } // calculate offsets for item names and items, pad to 16-align items // align only the first item; each item's length is a multiple of 16 basenameOffset=4+8*itemCount; offset=basenameOffset+outStringTop; if((length=(offset&15))!=0) { length=16-length; memset(allocString(FALSE, length-1), 0xaa, length); offset+=length; } // write the table of contents // first the itemCount outInt32=itemCount; if(dsLocalToOut!=NULL) { dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(&outInt32, 1, 4, file); if(length!=4) { fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // then write the item entries (and collect the maxItemLength) maxItemLength=0; for(i=0; i<itemCount; ++i) { entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); entry.dataOffset=(uint32_t)offset; if(dsLocalToOut!=NULL) { dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(&entry, 1, 8, file); if(length!=8) { fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); exit(U_FILE_ACCESS_ERROR); } length=items[i].length; if(length>maxItemLength) { maxItemLength=length; } offset+=length; } // write the item names length=(int32_t)fwrite(outStrings, 1, outStringTop, file); if(length!=outStringTop) { fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // write the items for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { int32_t type=makeTypeEnum(pItem->type); if(ds[type]!=NULL) { // swap each item from its platform properties to the desired ones udata_swap( ds[type], pItem->data, pItem->length, pItem->data, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); if(length!=pItem->length) { fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); exit(U_FILE_ACCESS_ERROR); } } if(ferror(file)) { fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } fclose(file); for(i=0; i<TYPE_COUNT; ++i) { udata_closeSwapper(ds[i]); } }
U_CDECL_END U_CFUNC int32_t U_CALLCONV udata_swapPackage(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; uint32_t itemCount, offset, i; int32_t itemLength; const UDataOffsetTOCEntry *inEntries; UDataOffsetTOCEntry *outEntries; ToCEntry *table; char inPkgName[32], outPkgName[32]; int32_t inPkgNameLength, outPkgNameLength; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* * We need to change the ToC name entries so that they have the correct * package name prefix. * Extract the package names from the in/out filenames. */ inPkgNameLength=extractPackageName( ds, inFilename, inPkgName, (int32_t)sizeof(inPkgName), pErrorCode); outPkgNameLength=extractPackageName( ds, outFilename, outPkgName, (int32_t)sizeof(outPkgName), pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } /* * It is possible to work with inPkgNameLength!=outPkgNameLength, * but then the length of the data file would change more significantly, * which we are not currently prepared for. */ if(inPkgNameLength!=outPkgNameLength) { udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", inPkgName, outPkgName); *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); if(length<0) { /* preflighting */ itemCount=ds->readUInt32(*(const uint32_t *)inBytes); if(itemCount==0) { /* no items: count only the item count and return */ return headerSize+4; } /* read the last item's offset and preflight it */ offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); if(U_SUCCESS(*pErrorCode)) { return headerSize+offset+(uint32_t)itemLength; } else { return 0; } } else { /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerSize; if(length<4) { /* itemCount does not fit */ offset=0xffffffff; itemCount=0; /* make compilers happy */ } else { itemCount=ds->readUInt32(*(const uint32_t *)inBytes); if(itemCount==0) { offset=4; } else if((uint32_t)length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0xffffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if((uint32_t)length<offset) { udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outBytes=(uint8_t *)outData+headerSize; /* swap the item count */ ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); if(itemCount==0) { /* no items: just return now */ return headerSize+4; } /* swap the item name strings */ offset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); return 0; } /* keep offset and itemLength in case we allocate and copy the strings below */ /* swap the package names into the output charset */ if(ds->outCharset!=U_CHARSET_FAMILY) { UDataSwapper *ds2; ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); udata_closeSwapper(ds2); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); } } /* change the prefix of each ToC entry name from the old to the new package name */ { char *entryName; for(i=0; i<itemCount; ++i) { entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { uprv_memcpy(entryName, outPkgName, inPkgNameLength); } else { udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", (long)i); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } } } /* * Allocate the ToC table and, if necessary, a temporary buffer for * pseudo-in-place swapping. * * We cannot swap in-place because: * * 1. If the swapping of an item fails mid-way, then in-place swapping * has destroyed its data. * Out-of-place swapping allows us to then copy its original data. * * 2. If swapping changes the charset family, then we must resort * not only the ToC table but also the data items themselves. * This requires a permutation and is best done with separate in/out * buffers. * * We swapped the strings above to avoid the malloc below if string swapping fails. */ if(inData==outData) { /* +15: prepare for extra padding of a newly-last item */ table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15); if(table!=NULL) { outBytes=(uint8_t *)(table+itemCount); /* copy the item count and the swapped strings */ uprv_memcpy(outBytes, inBytes, 4); uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); } } else { table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); } if(table==NULL) { udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", inData==outData ? itemCount*sizeof(ToCEntry)+length+15 : itemCount*sizeof(ToCEntry)); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } outEntries=(UDataOffsetTOCEntry *)(outBytes+4); /* read the ToC table */ for(i=0; i<itemCount; ++i) { table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); if(i>0) { table[i-1].length=table[i].inOffset-table[i-1].inOffset; } } table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; if(ds->inCharset==ds->outCharset) { /* no charset swapping, no resorting: keep item offsets the same */ for(i=0; i<itemCount; ++i) { table[i].outOffset=table[i].inOffset; } } else { /* charset swapping: resort items by their swapped names */ /* * Before the actual sorting, we need to make sure that each item * has a length that is a multiple of 16 bytes so that all items * are 16-aligned. * Only the old last item may be missing up to 15 padding bytes. * Add padding bytes for it. * Since the icuswap main() function has already allocated enough * input buffer space and set the last 15 bytes there to 0xaa, * we only need to increase the total data length and the length * of the last item here. */ if((length&0xf)!=0) { int32_t delta=16-(length&0xf); length+=delta; table[itemCount-1].length+=(uint32_t)delta; } uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), compareToCEntries, outBytes, FALSE, pErrorCode); /* * Note: Before sorting, the inOffset values were in order. * Now the outOffset values are in order. */ /* assign outOffset values */ offset=table[0].inOffset; for(i=0; i<itemCount; ++i) { table[i].outOffset=offset; offset+=table[i].length; } } /* write the output ToC table */ for(i=0; i<itemCount; ++i) { ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); } /* swap each data item */ for(i=0; i<itemCount; ++i) { /* first copy the item bytes to make sure that unreachable bytes are copied */ uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); /* swap the item */ udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, outBytes+table[i].outOffset, pErrorCode); if(U_FAILURE(*pErrorCode)) { if(ds->outCharset==U_CHARSET_FAMILY) { udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" " at inOffset 0x%x length 0x%x - %s\n" " the data item will be copied, not swapped\n\n", (char *)outBytes+table[i].nameOffset, table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); } else { udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" " at inOffset 0x%x length 0x%x - %s\n" " the data item will be copied, not swapped\n\n", table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); } /* reset the error code, copy the data item, and continue */ *pErrorCode=U_ZERO_ERROR; uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); } } if(inData==outData) { /* copy the data from the temporary buffer to the in-place buffer */ uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); } uprv_free(table); return headerSize+length; } }
U_NAMESPACE_END U_NAMESPACE_USE //----------------------------------------------------------------------------- // // uspoof_swap - byte swap and char encoding swap of spoof data // //----------------------------------------------------------------------------- U_CAPI int32_t U_EXPORT2 uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *status) { if (status == NULL || U_FAILURE(*status)) { return 0; } if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { *status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } // // Check that the data header is for spoof data. // (Header contents are defined in gencfu.cpp) // const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */ pInfo->dataFormat[1]==0x66 && pInfo->dataFormat[2]==0x75 && pInfo->dataFormat[3]==0x20 && pInfo->formatVersion[0]==1 )) { udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x " "(format version %02x %02x %02x %02x) is not recognized\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1], pInfo->formatVersion[2], pInfo->formatVersion[3]); *status=U_UNSUPPORTED_ERROR; return 0; } // // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific // header). This swap also conveniently gets us // the size of the ICU d.h., which lets us locate the start // of the uspoof specific data. // int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); // // Get the Spoof Data Header, and check that it appears to be OK. // // const uint8_t *inBytes =(const uint8_t *)inData+headerSize; SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes; if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC || ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader)) { udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n"); *status=U_UNSUPPORTED_ERROR; return 0; } // // Prefight operation? Just return the size // int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength); int32_t totalSize = headerSize + spoofDataLength; if (length < 0) { return totalSize; } // // Check that length passed in is consistent with length from Spoof data header. // if (length < totalSize) { udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n", spoofDataLength); *status=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } // // Swap the Data. Do the data itself first, then the Spoof Data Header, because // we need to reference the header to locate the data, and an // inplace swap of the header leaves it unusable. // uint8_t *outBytes = (uint8_t *)outData + headerSize; SpoofDataHeader *outputDH = (SpoofDataHeader *)outBytes; int32_t sectionStart; int32_t sectionLength; // // If not swapping in place, zero out the output buffer before starting. // Gaps may exist between the individual sections, and these must be zeroed in // the output buffer. The simplest way to do that is to just zero the whole thing. // if (inBytes != outBytes) { uprv_memset(outBytes, 0, spoofDataLength); } // Confusables Keys Section (fCFUKeys) sectionStart = ds->readUInt32(spoofDH->fCFUKeys); sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4; ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // String Index Section sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex); sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2; ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // String Table Section sectionStart = ds->readUInt32(spoofDH->fCFUStringTable); sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2; ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // String Lengths Section sectionStart = ds->readUInt32(spoofDH->fCFUStringLengths); sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4; ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // Any Case Trie sectionStart = ds->readUInt32(spoofDH->fAnyCaseTrie); sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength); utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // Lower Case Trie sectionStart = ds->readUInt32(spoofDH->fLowerCaseTrie); sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength); utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // Script Sets. The data is an array of int32_t sectionStart = ds->readUInt32(spoofDH->fScriptSets); sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet); ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); // And, last, swap the header itself. // int32_t fMagic // swap this // uint8_t fFormatVersion[4] // Do not swap this, just copy // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff. // uint32_t magic = ds->readUInt32(spoofDH->fMagic); ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic); if (outputDH->fFormatVersion != spoofDH->fFormatVersion) { uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion)); } // swap starting at fLength ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status); return totalSize; }
U_NAMESPACE_END U_NAMESPACE_USE U_CAPI int32_t U_EXPORT2 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const int32_t *inIndexes; int32_t indexes[DictionaryData::IX_COUNT]; int32_t i, offset, size; headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0; pInfo = (const UDataInfo *)((const char *)inData + 4); if (!(pInfo->dataFormat[0] == 0x44 && pInfo->dataFormat[1] == 0x69 && pInfo->dataFormat[2] == 0x63 && pInfo->dataFormat[3] == 0x74 && pInfo->formatVersion[0] == 1)) { udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode = U_UNSUPPORTED_ERROR; return 0; } inBytes = (const uint8_t *)inData + headerSize; outBytes = (uint8_t *)outData + headerSize; inIndexes = (const int32_t *)inBytes; if (length >= 0) { length -= headerSize; if (length < (int32_t)(sizeof(indexes))) { udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length); *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } for (i = 0; i < DictionaryData::IX_COUNT; i++) { indexes[i] = udata_readInt32(ds, inIndexes[i]); } size = indexes[DictionaryData::IX_TOTAL_SIZE]; if (length >= 0) { if (length < size) { udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length); *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if (inBytes != outBytes) { uprv_memcpy(outBytes, inBytes, size); } offset = 0; ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode); offset = (int32_t)sizeof(indexes); int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET]; if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode); } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) { // nothing to do } else { udata_printError(ds, "udict_swap(): unknown trie type!\n"); *pErrorCode = U_UNSUPPORTED_ERROR; return 0; } // these next two sections are empty in the current format, // but may be used later. offset = nextOffset; nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; offset = nextOffset; nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; offset = nextOffset; } return headerSize + size; }
/* swap inverse UCA collation data (invuca.icu) */ U_CAPI int32_t U_EXPORT2 ucol_swapInverseUCA(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; const InverseUCATableHeader *inHeader; InverseUCATableHeader *outHeader; InverseUCATableHeader header={ 0 }; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x43 && pInfo->formatVersion[0]==2 && pInfo->formatVersion[1]>=1 )) { udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; inHeader=(const InverseUCATableHeader *)inBytes; outHeader=(InverseUCATableHeader *)outBytes; /* * The inverse UCA collation binary must contain at least the InverseUCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==8*4 in ICU 2.8 * check the length against the header size before reading the size field */ if(length<0) { header.byteSize=udata_readInt32(ds, inHeader->byteSize); } else if( ((length-headerSize)<(8*4) || (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) ) { udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, header.byteSize); } /* swap the necessary pieces in the order of their occurrence in the data */ /* read more of the InverseUCATableHeader (the byteSize field was read above) */ header.tableSize= ds->readUInt32(inHeader->tableSize); header.contsSize= ds->readUInt32(inHeader->contsSize); header.table= ds->readUInt32(inHeader->table); header.conts= ds->readUInt32(inHeader->conts); /* swap the 32-bit integers in the header */ ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); /* swap the inverse table; tableSize counts uint32_t[3] rows */ ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, outBytes+header.table, pErrorCode); /* swap the continuation table; contsSize counts UChars */ ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, outBytes+header.conts, pErrorCode); } return headerSize+header.byteSize; }
U_CAPI int32_t U_EXPORT2 ucnv_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint8_t *inBytes; uint8_t *outBytes; uint32_t offset, count, staticDataSize; int32_t size; const UConverterStaticData *inStaticData; UConverterStaticData *outStaticData; const _MBCSHeader *inMBCSHeader; _MBCSHeader *outMBCSHeader; _MBCSHeader mbcsHeader; uint8_t outputType; const int32_t *inExtIndexes; int32_t extOffset; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x74 && pInfo->formatVersion[0]==6 && pInfo->formatVersion[1]>=2 )) { udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0], pInfo->formatVersion[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } inBytes=(const uint8_t *)inData+headerSize; outBytes=(uint8_t *)outData+headerSize; /* read the initial UConverterStaticData structure after the UDataInfo header */ inStaticData=(const UConverterStaticData *)inBytes; outStaticData=(UConverterStaticData *)outBytes; if(length<0) { staticDataSize=ds->readUInt32(inStaticData->structSize); } else { length-=headerSize; if( length<sizeof(UConverterStaticData) || (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) ) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } if(length>=0) { /* swap the static data */ if(inStaticData!=outStaticData) { uprv_memcpy(outStaticData, inStaticData, staticDataSize); } ds->swapArray32(ds, &inStaticData->structSize, 4, &outStaticData->structSize, pErrorCode); ds->swapArray32(ds, &inStaticData->codepage, 4, &outStaticData->codepage, pErrorCode); ds->swapInvChars(ds, inStaticData->name, uprv_strlen(inStaticData->name), outStaticData->name, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swap(): error swapping converter name - %s\n", u_errorName(*pErrorCode)); return 0; } } inBytes+=staticDataSize; outBytes+=staticDataSize; if(length>=0) { length-=(int32_t)staticDataSize; } /* check for supported conversionType values */ if(inStaticData->conversionType==UCNV_MBCS) { /* swap MBCS data */ inMBCSHeader=(const _MBCSHeader *)inBytes; outMBCSHeader=(_MBCSHeader *)outBytes; if(!(inMBCSHeader->version[0]==4 || inMBCSHeader->version[1]>=1)) { udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", inMBCSHeader->version[0], inMBCSHeader->version[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); extOffset=(int32_t)mbcsHeader.flags>>8; outputType=(uint8_t)mbcsHeader.flags; /* make sure that the output type is known */ switch(outputType) { case MBCS_OUTPUT_1: case MBCS_OUTPUT_2: case MBCS_OUTPUT_3: case MBCS_OUTPUT_4: case MBCS_OUTPUT_3_EUC: case MBCS_OUTPUT_4_EUC: case MBCS_OUTPUT_2_SISO: case MBCS_OUTPUT_EXT_ONLY: /* OK */ break; default: udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", outputType); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* calculate the length of the MBCS data */ if(extOffset==0) { size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsHeader.fromUBytesLength); /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ inExtIndexes=NULL; } else { /* there is extension data after the base data, see ucnv_ext.h */ if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inExtIndexes=(const int32_t *)(inBytes+extOffset); size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); } if(length>=0) { if(length<size) { udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes!=outBytes) { uprv_memcpy(outBytes, inBytes, size); } /* swap the _MBCSHeader */ ds->swapArray32(ds, &inMBCSHeader->countStates, 7*4, &outMBCSHeader->countStates, pErrorCode); if(outputType==MBCS_OUTPUT_EXT_ONLY) { /* * extension-only file, * contains a base name instead of normal base table data */ /* swap the base name, between the header and the extension data */ ds->swapInvChars(ds, inMBCSHeader+1, uprv_strlen((const char *)(inMBCSHeader+1)), outMBCSHeader+1, pErrorCode); } else { /* normal file with base table data */ /* swap the state table, 1kB per state */ ds->swapArray32(ds, inMBCSHeader+1, (int32_t)(mbcsHeader.countStates*1024), outMBCSHeader+1, pErrorCode); /* swap the toUFallbacks[] */ offset=sizeof(_MBCSHeader)+mbcsHeader.countStates*1024; ds->swapArray32(ds, inBytes+offset, (int32_t)(mbcsHeader.countToUFallbacks*8), outBytes+offset, pErrorCode); /* swap the unicodeCodeUnits[] */ offset=mbcsHeader.offsetToUCodeUnits; count=mbcsHeader.offsetFromUTable-offset; ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* offset to the stage 1 table, independent of the outputType */ offset=mbcsHeader.offsetFromUTable; if(outputType==MBCS_OUTPUT_1) { /* SBCS: swap the fromU tables, all 16 bits wide */ count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); } else { /* otherwise: swap the stage tables separately */ /* stage 1 table: uint16_t[0x440 or 0x40] */ if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { count=0x440*2; /* for all of Unicode */ } else { count=0x40*2; /* only BMP */ } ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* stage 2 table: uint32_t[] */ offset+=count; count=mbcsHeader.offsetFromUBytes-offset; ds->swapArray32(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ offset=mbcsHeader.offsetFromUBytes; count=mbcsHeader.fromUBytesLength; switch(outputType) { case MBCS_OUTPUT_2: case MBCS_OUTPUT_3_EUC: case MBCS_OUTPUT_2_SISO: ds->swapArray16(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); break; case MBCS_OUTPUT_4: ds->swapArray32(ds, inBytes+offset, (int32_t)count, outBytes+offset, pErrorCode); break; default: /* just uint8_t[], nothing to swap */ break; } } } if(extOffset!=0) { /* swap the extension data */ inBytes+=extOffset; outBytes+=extOffset; /* swap toUTable[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* swap toUUChars[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUTableUChars[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUTableValues[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); /* same length as for fromUTableUChars[] */ ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* no need to swap fromUBytes[] */ /* swap fromUStage12[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUStage3[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); /* swap fromUStage3b[] */ offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); /* swap indexes[] */ length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); } } } else {
U_CAPI int32_t U_EXPORT2 ucnv_swapAliases(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize; const uint16_t *inTable; const uint32_t *inSectionSizes; uint32_t toc[offsetsCount]; uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ uint32_t i, count, tocLength, topOffset; TempRow rows[STACK_ROW_CAPACITY]; uint16_t resort[STACK_ROW_CAPACITY]; TempAliasTable tempTable; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ pInfo->dataFormat[1]==0x76 && pInfo->dataFormat[2]==0x41 && pInfo->dataFormat[3]==0x6c && pInfo->formatVersion[0]==3 )) { udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* an alias table must contain at least the table of contents array */ if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); inTable=(const uint16_t *)inSectionSizes; uprv_memset(toc, 0, sizeof(toc)); toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); if(tocLength<minTocLength || offsetsCount<=tocLength) { udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } /* read the known part of the table of contents */ for(i=converterListIndex; i<=tocLength; ++i) { toc[i]=ds->readUInt32(inSectionSizes[i]); } /* compute offsets */ uprv_memset(offsets, 0, sizeof(offsets)); offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ for(i=tagListIndex; i<=tocLength; ++i) { offsets[i]=offsets[i-1]+toc[i-1]; } /* compute the overall size of the after-header data, in numbers of 16-bit units */ topOffset=offsets[i-1]+toc[i-1]; if(length>=0) { uint16_t *outTable; const uint16_t *p, *p2; uint16_t *q, *q2; uint16_t oldIndex; if((length-headerSize)<(2*(int32_t)topOffset)) { udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTable=(uint16_t *)((char *)outData+headerSize); /* swap the entire table of contents */ ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); /* swap unormalized strings & normalized strings */ ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), outTable+offsets[stringTableIndex], pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); return 0; } if(ds->inCharset==ds->outCharset) { /* no need to sort, just swap all 16-bit values together */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); } else { /* allocate the temporary table for sorting */ count=toc[aliasListIndex]; tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ if(count<=STACK_ROW_CAPACITY) { tempTable.rows=rows; tempTable.resort=resort; } else { tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); if(tempTable.rows==NULL) { udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", count); *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } tempTable.resort=(uint16_t *)(tempTable.rows+count); } if(ds->outCharset==U_ASCII_FAMILY) { tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; } else /* U_EBCDIC_FAMILY */ { tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; } /* * Sort unique aliases+mapped names. * * We need to sort the list again by outCharset strings because they * sort differently for different charset families. * First we set up a temporary table with the string indexes and * sorting indexes and sort that. * Then we permutate and copy/swap the actual values. */ p=inTable+offsets[aliasListIndex]; q=outTable+offsets[aliasListIndex]; p2=inTable+offsets[untaggedConvArrayIndex]; q2=outTable+offsets[untaggedConvArrayIndex]; for(i=0; i<count; ++i) { tempTable.rows[i].strIndex=ds->readUInt16(p[i]); tempTable.rows[i].sortIndex=(uint16_t)i; } uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), io_compareRows, &tempTable, FALSE, pErrorCode); if(U_SUCCESS(*pErrorCode)) { /* copy/swap/permutate items */ if(p!=q) { for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); } } else { /* * If we swap in-place, then the permutation must use another * temporary array (tempTable.resort) * before the results are copied to the outBundle. */ uint16_t *r=tempTable.resort; for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q, r, 2*count); for(i=0; i<count; ++i) { oldIndex=tempTable.rows[i].sortIndex; ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); } uprv_memcpy(q2, r, 2*count); } } if(tempTable.rows!=rows) { uprv_free(tempTable.rows); } if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", count); return 0; } /* swap remaining 16-bit values */ ds->swapArray16(ds, inTable+offsets[converterListIndex], 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), outTable+offsets[converterListIndex], pErrorCode); ds->swapArray16(ds, inTable+offsets[taggedAliasArrayIndex], 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), outTable+offsets[taggedAliasArrayIndex], pErrorCode); } } return headerSize+2*(int32_t)topOffset; }
static int32_t U_CALLCONV uprops_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t headerSize, i; int32_t dataIndexes[UPROPS_INDEX_COUNT]; const int32_t *inData32; /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ pInfo->dataFormat[1]==0x50 && pInfo->dataFormat[2]==0x72 && pInfo->dataFormat[3]==0x6f && (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && (pInfo->formatVersion[0]>=7 || (pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) )) { udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); *pErrorCode=U_UNSUPPORTED_ERROR; return 0; } /* the properties file must contain at least the indexes array */ if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* read the indexes */ inData32=(const int32_t *)((const char *)inData+headerSize); for(i=0; i<UPROPS_INDEX_COUNT; ++i) { dataIndexes[i]=udata_readInt32(ds, inData32[i]); } /* * comments are copied from the data format description in genprops/store.c * indexes[] constants are in uprops.h */ int32_t dataTop; if(length>=0) { int32_t *outData32; /* * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. * In earlier formatVersions, it is 0 and a lower dataIndexes entry * has the top of the last item. */ for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} if((length-headerSize)<(4*dataTop)) { udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", length-headerSize); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outData32=(int32_t *)((char *)outData+headerSize); /* copy everything for inaccessible data (padding) */ if(inData32!=outData32) { uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); } /* swap the indexes[16] */ ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); /* * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) */ utrie2_swapAnyVersion(ds, inData32+UPROPS_INDEX_COUNT, 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), outData32+UPROPS_INDEX_COUNT, pErrorCode); /* * swap the properties and exceptions words * P const uint32_t props32[i1-i0]; * E const uint32_t exceptions[i2-i1]; */ ds->swapArray32(ds, inData32+dataIndexes[UPROPS_PROPS32_INDEX], 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), outData32+dataIndexes[UPROPS_PROPS32_INDEX], pErrorCode); /* * swap the UChars * U const UChar uchars[2*(i3-i2)]; */ ds->swapArray16(ds, inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], pErrorCode); /* * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties */ utrie2_swapAnyVersion(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], pErrorCode); /* * swap the properties vectors * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; */ ds->swapArray32(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], pErrorCode); // swap the Script_Extensions data // SCX const uint16_t scriptExtensions[2*(i7-i6)]; ds->swapArray16(ds, inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], pErrorCode); } /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; }
/** * swap a selector into the desired Endianness and Asciiness of * the system. Just as FYI, selectors are always saved in the format * of the system that created them. They are only converted if used * on another system. In other words, selectors created on different * system can be different even if the params are identical (endianness * and Asciiness differences only) * * @param ds pointer to data swapper containing swapping info * @param inData pointer to incoming data * @param length length of inData in bytes * @param outData pointer to output data. Capacity should * be at least equal to capacity of inData * @param status an in/out ICU UErrorCode * @return 0 on failure, number of bytes swapped on success * number of bytes swapped can be smaller than length */ static int32_t ucnvsel_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *status) { /* udata_swapDataHeader checks the arguments */ int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); if(U_FAILURE(*status)) { return 0; } /* check data format and format version */ const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); if(!( pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ pInfo->dataFormat[1] == 0x53 && pInfo->dataFormat[2] == 0x65 && pInfo->dataFormat[3] == 0x6c )) { udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3]); *status = U_INVALID_FORMAT_ERROR; return 0; } if(pInfo->formatVersion[0] != 1) { udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", pInfo->formatVersion[0]); *status = U_UNSUPPORTED_ERROR; return 0; } if(length >= 0) { length -= headerSize; if(length < 16*4) { udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", length); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } const uint8_t *inBytes = (const uint8_t *)inData + headerSize; uint8_t *outBytes = (uint8_t *)outData + headerSize; /* read the indexes */ const int32_t *inIndexes = (const int32_t *)inBytes; int32_t indexes[16]; int32_t i; for(i = 0; i < 16; ++i) { indexes[i] = udata_readInt32(ds, inIndexes[i]); } /* get the total length of the data */ int32_t size = indexes[UCNVSEL_INDEX_SIZE]; if(length >= 0) { if(length < size) { udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", length); *status = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* copy the data for inaccessible bytes */ if(inBytes != outBytes) { uprv_memcpy(outBytes, inBytes, size); } int32_t offset = 0, count; /* swap the int32_t indexes[] */ count = UCNVSEL_INDEX_COUNT*4; ds->swapArray32(ds, inBytes, count, outBytes, status); offset += count; /* swap the UTrie2 */ count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); offset += count; /* swap the uint32_t pv[] */ count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); offset += count; /* swap the encoding names */ count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); offset += count; U_ASSERT(offset == size); } return headerSize + size; }
void Package::readPackage(const char *filename) { UDataSwapper *ds; const UDataInfo *pInfo; UErrorCode errorCode; const uint8_t *inBytes; int32_t length, offset, i; int32_t itemLength, typeEnum; char type; const UDataOffsetTOCEntry *inEntries; extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); /* read the file */ inData=readFile(NULL, filename, inLength, type); length=inLength; /* * swap the header - even if the swapping itself is a no-op * because it tells us the header length */ errorCode=U_ZERO_ERROR; makeTypeProps(type, inCharset, inIsBigEndian); ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", filename, u_errorName(errorCode)); exit(errorCode); } ds->printError=printPackageError; ds->printErrorContext=stderr; headerLength=sizeof(header); if(length<headerLength) { headerLength=length; } headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); if(U_FAILURE(errorCode)) { exit(errorCode); } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); exit(U_UNSUPPORTED_ERROR); } inIsBigEndian=(UBool)pInfo->isBigEndian; inCharset=pInfo->charsetFamily; inBytes=(const uint8_t *)inData+headerLength; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerLength; if(length<4) { /* itemCount does not fit */ offset=0x7fffffff; } else { itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); if(itemCount==0) { offset=4; } else if(length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0x7fffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if(length<offset) { fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", (long)length); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* do not modify the package length variable until the last item's length is set */ if(itemCount>0) { char prefix[MAX_PKG_NAME_LENGTH+4]; char *s, *inItemStrings; int32_t inPkgNameLength, prefixLength, stringsOffset; if(itemCount>MAX_FILE_COUNT) { fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); exit(U_BUFFER_OVERFLOW_ERROR); } /* swap the item name strings */ stringsOffset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; // don't include padding bytes at the end of the item names while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { --itemLength; } if((inStringTop+itemLength)>STRING_STORE_SIZE) { fprintf(stderr, "icupkg: total length of item name strings too long\n"); exit(U_BUFFER_OVERFLOW_ERROR); } inItemStrings=inStrings+inStringTop; ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); exit(U_INVALID_FORMAT_ERROR); } inStringTop+=itemLength; // reset the Item entries memset(items, 0, itemCount*sizeof(Item)); inPkgNameLength=strlen(inPkgName); memcpy(prefix, inPkgName, inPkgNameLength); prefixLength=inPkgNameLength; /* * Get the common prefix of the items. * New-style ICU .dat packages use tree separators ('/') between package names, * tree names, and item names, * while old-style ICU .dat packages (before multi-tree support) * use an underscore ('_') between package and item names. */ offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; s=inItemStrings+offset; if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 0==memcmp(s, inPkgName, inPkgNameLength) && s[inPkgNameLength]=='_' ) { // old-style .dat package prefix[prefixLength++]='_'; } else { // new-style .dat package prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR // then the test in the loop below will fail } prefix[prefixLength]=0; /* read the ToC table */ for(i=0; i<itemCount; ++i) { // skip the package part of the item name, error if it does not match the actual package name // or if nothing follows the package name offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; s=inItemStrings+offset; if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", s, prefix); exit(U_UNSUPPORTED_ERROR); } items[i].name=s+prefixLength; // set the item's data items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); if(i>0) { items[i-1].length=(int32_t)(items[i].data-items[i-1].data); // set the previous item's platform type typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[i-1].type=makeTypeLetter(typeEnum); } items[i].isDataOwned=FALSE; } // set the last item's length items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); // set the last item's platform type typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[itemCount-1].type=makeTypeLetter(typeEnum); if(type!=U_ICUDATA_TYPE_LETTER[0]) { // sort the item names for the local charset sortItems(); } } udata_closeSwapper(ds); }