示例#1
0
static int32_t U_CALLCONV
unorm_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode) {
    const UDataInfo *pInfo;
    int32_t headerSize;

    const uint8_t *inBytes;
    uint8_t *outBytes;

    const int32_t *inIndexes;
    int32_t indexes[32];

    int32_t i, offset, count, size;

    /* udata_swapDataHeader checks the arguments */
    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /* check data format and format version */
    pInfo=(const UDataInfo *)((const char *)inData+4);
    if(!(
        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
        pInfo->dataFormat[1]==0x6f &&
        pInfo->dataFormat[2]==0x72 &&
        pInfo->dataFormat[3]==0x6d &&
        pInfo->formatVersion[0]==2
    )) {
        udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
                         pInfo->formatVersion[0]);
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return 0;
    }

    inBytes=(const uint8_t *)inData+headerSize;
    outBytes=(uint8_t *)outData+headerSize;

    inIndexes=(const int32_t *)inBytes;

    if(length>=0) {
        length-=headerSize;
        if(length<32*4) {
            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
                             length);
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }
    }

    /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
    for(i=0; i<32; ++i) {
        indexes[i]=udata_readInt32(ds, inIndexes[i]);
    }

    /* calculate the total length of the data */
    size=
        32*4+ /* size of indexes[] */
        indexes[_NORM_INDEX_TRIE_SIZE]+
        indexes[_NORM_INDEX_UCHAR_COUNT]*2+
        indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
        indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
        indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
        indexes[_NORM_INDEX_CANON_SET_COUNT]*2;

    if(length>=0) {
        if(length<size) {
            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
                             length);
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }

        /* copy the data for inaccessible bytes */
        if(inBytes!=outBytes) {
            uprv_memcpy(outBytes, inBytes, size);
        }

        offset=0;

        /* swap the indexes[] */
        count=32*4;
        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
        offset+=count;

        /* swap the main UTrie */
        count=indexes[_NORM_INDEX_TRIE_SIZE];
        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
        offset+=count;

        /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
        count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
        offset+=count;

        /* swap the FCD UTrie */
        count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
        if(count!=0) {
            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
            offset+=count;
        }

        /* swap the aux UTrie */
        count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
        if(count!=0) {
            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
            offset+=count;
        }

        /* swap the uint16_t combiningTable[] */
        count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
        offset+=count;
    }

    return headerSize+size;
}
示例#2
0
文件: rbbidata.cpp 项目: Botyto/Core
U_NAMESPACE_END
U_NAMESPACE_USE

//-----------------------------------------------------------------------------
//
//  ubrk_swap   -  byte swap and char encoding swap of RBBI data
//
//-----------------------------------------------------------------------------

U_CAPI int32_t U_EXPORT2
ubrk_swap(const UDataSwapper * ds, const void * inData, int32_t length, void * outData,
          UErrorCode * status)
{

	if (status == NULL || U_FAILURE(*status))
	{
		return 0;
	}
	if (ds == NULL || inData == NULL || length < -1 || (length > 0 && outData == NULL))
	{
		*status = U_ILLEGAL_ARGUMENT_ERROR;
		return 0;
	}

	//
	//  Check that the data header is for for break data.
	//    (Header contents are defined in genbrk.cpp)
	//
	const UDataInfo * pInfo = (const UDataInfo *)((const char *)inData + 4);
	if (!(pInfo->dataFormat[0] == 0x42 &&  /* dataFormat="Brk " */
	      pInfo->dataFormat[1] == 0x72 &&
	      pInfo->dataFormat[2] == 0x6b &&
	      pInfo->dataFormat[3] == 0x20 &&
	      pInfo->formatVersion[0] == 3))
	{
		udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
		                 pInfo->dataFormat[0], pInfo->dataFormat[1],
		                 pInfo->dataFormat[2], pInfo->dataFormat[3],
		                 pInfo->formatVersion[0]);
		*status = U_UNSUPPORTED_ERROR;
		return 0;
	}

	//
	// Swap the data header.  (This is the generic ICU Data Header, not the RBBI Specific
	//                         RBBIDataHeader).  This swap also conveniently gets us
	//                         the size of the ICU d.h., which lets us locate the start
	//                         of the RBBI specific data.
	//
	int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status);


	//
	// Get the RRBI Data Header, and check that it appears to be OK.
	//
	//    Note:  ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
	//           an int32_t with a value of 1.  Starting with ICU 3.4,
	//           RBBI's fDataFormat matches the dataFormat field from the
	//           UDataInfo header, four int8_t bytes.  The value is {3,1,0,0}
	//
	const uint8_t * inBytes = (const uint8_t *)inData + headerSize;
	RBBIDataHeader * rbbiDH = (RBBIDataHeader *)inBytes;
	if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
	    rbbiDH->fFormatVersion[0] != 3 ||
	    ds->readUInt32(rbbiDH->fLength)  <  sizeof(RBBIDataHeader))
	{
		udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
		*status = U_UNSUPPORTED_ERROR;
		return 0;
	}

	//
	// Prefight operation?  Just return the size
	//
	int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
	int32_t totalSize = headerSize + breakDataLength;
	if (length < 0)
	{
		return totalSize;
	}

	//
	// Check that length passed in is consistent with length from RBBI data header.
	//
	if (length < totalSize)
	{
		udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
		                 breakDataLength);
		*status = U_INDEX_OUTOFBOUNDS_ERROR;
		return 0;
	}


	//
	// Swap the Data.  Do the data itself first, then the RBBI Data Header, because
	//                 we need to reference the header to locate the data, and an
	//                 inplace swap of the header leaves it unusable.
	//
	uint8_t     *    outBytes = (uint8_t *)outData + headerSize;
	RBBIDataHeader * outputDH = (RBBIDataHeader *)outBytes;

	int32_t   tableStartOffset;
	int32_t   tableLength;

	//
	// If not swapping in place, zero out the output buffer before starting.
	//    Individual tables and other data items within are aligned to 8 byte boundaries
	//    when originally created.  Any unused space between items needs to be zero.
	//
	if (inBytes != outBytes)
	{
		uprv_memset(outBytes, 0, breakDataLength);
	}

	//
	// Each state table begins with several 32 bit fields.  Calculate the size
	//   in bytes of these.
	//
	int32_t         topSize = offsetof(RBBIStateTable, fTableData);

	// Forward state table.
	tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
	tableLength      = ds->readUInt32(rbbiDH->fFTableLen);

	if (tableLength > 0)
	{
		ds->swapArray32(ds, inBytes + tableStartOffset, topSize,
		                outBytes + tableStartOffset, status);
		ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize,
		                outBytes + tableStartOffset + topSize, status);
	}

	// Reverse state table.  Same layout as forward table, above.
	tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
	tableLength      = ds->readUInt32(rbbiDH->fRTableLen);

	if (tableLength > 0)
	{
		ds->swapArray32(ds, inBytes + tableStartOffset, topSize,
		                outBytes + tableStartOffset, status);
		ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize,
		                outBytes + tableStartOffset + topSize, status);
	}

	// Safe Forward state table.  Same layout as forward table, above.
	tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
	tableLength      = ds->readUInt32(rbbiDH->fSFTableLen);

	if (tableLength > 0)
	{
		ds->swapArray32(ds, inBytes + tableStartOffset, topSize,
		                outBytes + tableStartOffset, status);
		ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize,
		                outBytes + tableStartOffset + topSize, status);
	}

	// Safe Reverse state table.  Same layout as forward table, above.
	tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
	tableLength      = ds->readUInt32(rbbiDH->fSRTableLen);

	if (tableLength > 0)
	{
		ds->swapArray32(ds, inBytes + tableStartOffset, topSize,
		                outBytes + tableStartOffset, status);
		ds->swapArray16(ds, inBytes + tableStartOffset + topSize, tableLength - topSize,
		                outBytes + tableStartOffset + topSize, status);
	}

	// Trie table for character categories
	utrie_swap(ds, inBytes + ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
	           outBytes + ds->readUInt32(rbbiDH->fTrie), status);

	// Source Rules Text.  It's UChar data
	ds->swapArray16(ds, inBytes + ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
	                outBytes + ds->readUInt32(rbbiDH->fRuleSource), status);

	// Table of rule status values.  It's all int_32 values
	ds->swapArray32(ds, inBytes + ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
	                outBytes + ds->readUInt32(rbbiDH->fStatusTable), status);

	// And, last, the header.
	//   It is all int32_t values except for fFormataVersion, which is an array of four bytes.
	//   Swap the whole thing as int32_t, then re-swap the one field.
	//
	ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
	ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);

	return totalSize;
}
示例#3
0
/* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
U_CAPI int32_t U_EXPORT2
ucol_swapBinary(const UDataSwapper *ds,
                const void *inData, int32_t length, void *outData,
                UErrorCode *pErrorCode) {
    const uint8_t *inBytes;
    uint8_t *outBytes;

    const UCATableHeader *inHeader;
    UCATableHeader *outHeader;
    UCATableHeader header={ 0 };

    uint32_t count;

    /* argument checking in case we were not called from ucol_swap() */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    inBytes=(const uint8_t *)inData;
    outBytes=(uint8_t *)outData;

    inHeader=(const UCATableHeader *)inData;
    outHeader=(UCATableHeader *)outData;

    /*
     * The collation binary must contain at least the UCATableHeader,
     * starting with its size field.
     * sizeof(UCATableHeader)==42*4 in ICU 2.8
     * check the length against the header size before reading the size field
     */
    if(length<0) {
        header.size=udata_readInt32(ds, inHeader->size);
    } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
        udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
                         length);
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        return 0;
    }

    header.magic=ds->readUInt32(inHeader->magic);
    if(!(
        header.magic==UCOL_HEADER_MAGIC &&
        inHeader->formatVersion[0]==2 &&
        inHeader->formatVersion[1]>=3
    )) {
        udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
                         header.magic,
                         inHeader->formatVersion[0], inHeader->formatVersion[1]);
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return 0;
    }

    if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
        udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
                         inHeader->isBigEndian, inHeader->charSetFamily);
        *pErrorCode=U_INVALID_FORMAT_ERROR;
        return 0;
    }

    if(length>=0) {
        /* copy everything, takes care of data that needs no swapping */
        if(inBytes!=outBytes) {
            uprv_memcpy(outBytes, inBytes, header.size);
        }

        /* swap the necessary pieces in the order of their occurrence in the data */

        /* read more of the UCATableHeader (the size field was read above) */
        header.options=                 ds->readUInt32(inHeader->options);
        header.UCAConsts=               ds->readUInt32(inHeader->UCAConsts);
        header.contractionUCACombos=    ds->readUInt32(inHeader->contractionUCACombos);
        header.mappingPosition=         ds->readUInt32(inHeader->mappingPosition);
        header.expansion=               ds->readUInt32(inHeader->expansion);
        header.contractionIndex=        ds->readUInt32(inHeader->contractionIndex);
        header.contractionCEs=          ds->readUInt32(inHeader->contractionCEs);
        header.contractionSize=         ds->readUInt32(inHeader->contractionSize);
        header.endExpansionCE=          ds->readUInt32(inHeader->endExpansionCE);
        header.expansionCESize=         ds->readUInt32(inHeader->expansionCESize);
        header.endExpansionCECount=     udata_readInt32(ds, inHeader->endExpansionCECount);
        header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);

        /* swap the 32-bit integers in the header */
        ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
                           outHeader, pErrorCode);

        /* set the output platform properties */
        outHeader->isBigEndian=ds->outIsBigEndian;
        outHeader->charSetFamily=ds->outCharset;

        /* swap the options */
        if(header.options!=0) {
            ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
                               outBytes+header.options, pErrorCode);
        }

        /* swap the expansions */
        if(header.mappingPosition!=0 && header.expansion!=0) {
            if(header.contractionIndex!=0) {
                /* expansions bounded by contractions */
                count=header.contractionIndex-header.expansion;
            } else {
                /* no contractions: expansions bounded by the main trie */
                count=header.mappingPosition-header.expansion;
            }
            ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
                               outBytes+header.expansion, pErrorCode);
        }

        /* swap the contractions */
        if(header.contractionSize!=0) {
            /* contractionIndex: UChar[] */
            ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
                               outBytes+header.contractionIndex, pErrorCode);

            /* contractionCEs: CEs[] */
            ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
                               outBytes+header.contractionCEs, pErrorCode);
        }

        /* swap the main trie */
        if(header.mappingPosition!=0) {
            count=header.endExpansionCE-header.mappingPosition;
            utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
                          outBytes+header.mappingPosition, pErrorCode);
        }

        /* swap the max expansion table */
        if(header.endExpansionCECount!=0) {
            ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
                               outBytes+header.endExpansionCE, pErrorCode);
        }

        /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */

        /* swap UCA constants */
        if(header.UCAConsts!=0) {
            /*
             * if UCAConsts!=0 then contractionUCACombos because we are swapping
             * the UCA data file, and we know that the UCA contains contractions
             */
            count=header.contractionUCACombos-header.UCAConsts;
            ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
                               outBytes+header.UCAConsts, pErrorCode);
        }

        /* swap UCA contractions */
        if(header.contractionUCACombosSize!=0) {
            count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
            ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
                               outBytes+header.contractionUCACombos, pErrorCode);
        }
    }

    return header.size;
}