Example #1
0
// ---------------------------------------------------------------------------
//  XMLChTranscoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLChTranscoder::transcodeFrom( const   XMLByte* const          srcData
                                , const unsigned int            srcCount
                                ,       XMLCh* const            toFill
                                , const unsigned int            maxChars
                                ,       unsigned int&           bytesEaten
                                ,       unsigned char* const    charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of chars in the source.
    //
    const unsigned int srcChars = srcCount / sizeof(XMLCh);
    const unsigned int countToDo = srcChars < maxChars ? srcChars : maxChars;

    //
    //  Copy over the count of chars that we precalculated. Notice we
    //  convert char count to byte count here!!!
    //
    memcpy(toFill, srcData, countToDo * sizeof(XMLCh));

    // Set the bytes eaten
    bytesEaten = countToDo * sizeof(XMLCh);

    // Set the character sizes to the fixed size
    memset(charSizes, sizeof(XMLCh), countToDo);

    // Return the chars we transcoded
    return countToDo;
}
Example #2
0
unsigned int
XMLChTranscoder::transcodeTo(const  XMLCh* const    srcData
                            , const unsigned int    srcCount
                            ,       XMLByte* const  toFill
                            , const unsigned int    maxBytes
                            ,       unsigned int&   charsEaten
                                , const UnRepOpts)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
    #endif


    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max chars we can store in the output byte buffer, and the number
    //  of chars in the source.
    //
    const unsigned int maxOutChars  = maxBytes / sizeof(XMLCh);
    const unsigned int countToDo    = maxOutChars < srcCount
                                    ? maxOutChars : srcCount;

    //
    //  Copy over the number of chars we calculated. Note that we have
    //  to convert the char count to a byte count!!
    //
    memcpy(toFill, srcData, countToDo * sizeof(XMLCh));

    // Set the chars eaten
    charsEaten = countToDo;

    // Return the bytes we transcoded
    return countToDo * sizeof(XMLCh);
}
Example #3
0
unsigned int
XMLASCIITranscoder::transcodeTo(const   XMLCh* const    srcData
                                , const unsigned int    srcCount
                                ,       XMLByte* const  toFill
                                , const unsigned int    maxBytes
                                ,       unsigned int&   charsEaten
                                , const UnRepOpts       options)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the source byte count.
    //
    const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes;

    const XMLCh*    srcPtr = srcData;
    XMLByte*        outPtr = toFill;
    for (unsigned int index = 0; index < countToDo; index++)
    {
        // If its legal, do it and jump back to the top
        if (*srcPtr < 0x80)
        {
            *outPtr++ = XMLByte(*srcPtr++);
            continue;
        }

        //
        //  Its not representable so use a replacement char. According to
        //  the options, either throw or use the replacement.
        //
        if (options == UnRep_Throw)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_Unrepresentable
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }

        // Use the replacement char
        *outPtr++ = 0x1A;
        srcPtr++;
    }

    // Set the chars we ate
    charsEaten = countToDo;

    // Return the byte we transcoded
    return countToDo;
}
// ---------------------------------------------------------------------------
//  XMLASCIITranscoder390: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLASCIITranscoder390::transcodeFrom(  const   XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the source byte count.
    //
    const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Now loop through that many source chars and just cast each one
    //  over to the XMLCh format. Check each source that its really a
    //  valid ASCI char.
    //
    const XMLByte*  srcPtr = srcData;
    XMLCh*          outPtr = toFill;
    unsigned int    countDone = countToDo;
    int             flag = 0;

    // if flag is set to 1, an non-ASCII character is encountered
    TROTASC(srcPtr, toFill, &countDone, padding_temp.gFromTable, 0xFFFF, &flag);

    if (flag == 1 && countDone < 32){
	    XMLCh tmpBuf[17];
        XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
        ThrowXMLwithMemMgr2
        (
            TranscodingException
            , XMLExcepts::Trans_Unrepresentable
            , tmpBuf
            , getEncodingName()
            , getMemoryManager()
        );
    }//end if
       

    // Set the bytes we ate
    bytesEaten = countDone;

    // Set the char sizes to the fixed size
    memset(charSizes, 1, countDone);

    // Return the chars we transcoded
    return countDone;
}
void InputPersistenceBlock::readByteArray(Common::Array<byte> &value) {
	if (checkMarker(BLOCK_MARKER)) {
		uint size;
		read(size);

		if (checkBlockSize(size)) {
			value = Common::Array<byte>(_iter, size);
			_iter += size;
		}
	}
}
bool InputPersistenceBlock::checkMarker(byte marker) {
	if (!isGood() || !checkBlockSize(1))
		return false;

	if (*_iter++ == marker) {
		return true;
	} else {
		_errorState = OUT_OF_SYNC;
		error("Wrong type marker found in persistence block.");
		return false;
	}
}
void InputPersistenceBlock::readString(Common::String &value) {
	value = "";

	if (checkMarker(STRING_MARKER)) {
		uint size;
		read(size);

		if (checkBlockSize(size)) {
			value = Common::String(reinterpret_cast<const char *>(&*_iter), size);
			_iter += size;
		}
	}
}
// ---------------------------------------------------------------------------
//  XML256TableTranscoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XML256TableTranscoder::transcodeFrom(const  XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of chars in the source.
    //
    const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Loop through the count we have to do and map each char via the
    //  lookup table.
    //
    const XMLByte*  srcPtr = srcData;
    const XMLByte*  endPtr = (srcPtr + countToDo);
    XMLCh*          outPtr = toFill;
    while (srcPtr < endPtr)
    {
        const XMLCh uniCh = fFromTable[*srcPtr++];
        if (uniCh != 0xFFFF)
        {
            *outPtr++ = uniCh;
            continue;
        }
    }


    // Set the bytes eaten
    bytesEaten = countToDo;

    // Set the character sizes to the fixed size
    memset(charSizes, 1, countToDo);

    // Return the chars we transcoded
    return countToDo;
}
Example #9
0
// ---------------------------------------------------------------------------
//  XML88591Transcoder390: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XML88591Transcoder390::transcodeFrom(  const   XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of bytes in the source.
    //
    const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Loop through the bytes to do and convert over each byte. Its just
    //  a cast to the wide char type.
    //
    const XMLByte*  srcPtr = srcData;
    XMLCh*          destPtr = toFill;
    const XMLByte*  srcEnd = srcPtr + countToDo;

    TROT(srcPtr, destPtr, countToDo, padding_temp.gFromTable, 0xFFFF);

    // Set the bytes eaten, and set the char size array to the fixed size
    bytesEaten = countToDo;
    memset(charSizes, 1, countToDo);

    // Return the chars we transcoded
    return countToDo;
}
Example #10
0
// ---------------------------------------------------------------------------
//  XMLUTF8Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLUTF8Transcoder::transcodeFrom(const  XMLByte* const          srcData
                                , const unsigned int            srcCount
                                ,       XMLCh* const            toFill
                                , const unsigned int            maxChars
                                ,       unsigned int&           bytesEaten
                                ,       unsigned char* const    charSizes)
{
    // Watch for pathological scenario. Shouldn't happen, but...
    if (!srcCount || !maxChars)
        return 0;

    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Get pointers to our start and end points of the input and output
    //  buffers.
    //
    const XMLByte*  srcPtr = srcData;
    const XMLByte*  srcEnd = srcPtr + srcCount;
    XMLCh*          outPtr = toFill;
    XMLCh*          outEnd = outPtr + maxChars;
    unsigned char*  sizePtr = charSizes;



    //
    //  We now loop until we either run out of input data, or room to store
    //  output chars.
    //
    while ((srcPtr < srcEnd) && (outPtr < outEnd))
    {
        // Special-case ASCII, which is a leading byte value of <= 127
        if (*srcPtr <= 127)
        {
            *outPtr++ = XMLCh(*srcPtr++);
            *sizePtr++ = 1;
            continue;
        }

        // See how many trailing src bytes this sequence is going to require
        const unsigned int trailingBytes = gUTFBytes[*srcPtr];

        //
        //  If there are not enough source bytes to do this one, then we
        //  are done. Note that we done >= here because we are implicitly
        //  counting the 1 byte we get no matter what.
        //
        //  If we break out here, then there is nothing to undo since we
        //  haven't updated any pointers yet.
        //
        if (srcPtr + trailingBytes >= srcEnd)
            break;

        // Looks ok, so lets build up the value
        // or at least let's try to do so--remembering that
        // we cannot assume the encoding to be valid:

        // first, test first byte
        if((gUTFByteIndicatorTest[trailingBytes] & *srcPtr) != gUTFByteIndicator[trailingBytes]) {
            char pos[2] = {(char)0x31, 0}; 
            char len[2] = {(char)(trailingBytes+0x31), 0};
            char byte[2] = {*srcPtr,0};
            ThrowXMLwithMemMgr3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, pos, byte, len, getMemoryManager());
        }

        /***
         * http://www.unicode.org/reports/tr27/
         *
         * Table 3.1B. lists all of the byte sequences that are legal in UTF-8. 
         * A range of byte values such as A0..BF indicates that any byte from A0 to BF (inclusive) 
         * is legal in that position. 
         * Any byte value outside of the ranges listed is illegal. 
         * For example, 
         * the byte sequence <C0 AF> is illegal  since C0 is not legal in the 1st Byte column. 
         * The byte sequence <E0 9F 80> is illegal since in the row 
         *    where E0 is legal as a first byte, 
         *    9F is not legal as a second byte. 
         * The byte sequence <F4 80 83 92> is legal, since every byte in that sequence matches 
         * a byte range in a row of the table (the last row). 
         *
         *
         * Table 3.1B. Legal UTF-8 Byte Sequences  
         * Code Points              1st Byte    2nd Byte    3rd Byte    4th Byte 
         * =========================================================================
         * U+0000..U+007F            00..7F       
         * -------------------------------------------------------------------------
         * U+0080..U+07FF            C2..DF      80..BF      
         *
         * -------------------------------------------------------------------------
         * U+0800..U+0FFF            E0          A0..BF     80..BF   
         *                                       -- 
         *                          
         * U+1000..U+FFFF            E1..EF      80..BF     80..BF    
         *
         * --------------------------------------------------------------------------
         * U+10000..U+3FFFF          F0          90..BF     80..BF       80..BF 
         *                                       --
         * U+40000..U+FFFFF          F1..F3      80..BF     80..BF       80..BF 
         * U+100000..U+10FFFF        F4          80..8F     80..BF       80..BF 
         *                                           --
         * ==========================================================================
         *
         *  Cases where a trailing byte range is not 80..BF are underlined in the table to 
         *  draw attention to them. These occur only in the second byte of a sequence.
         *
         ***/

        XMLUInt32 tmpVal = 0;

        switch(trailingBytes)
        {
            case 1 :
                // UTF-8:   [110y yyyy] [10xx xxxx]
                // Unicode: [0000 0yyy] [yyxx xxxx]
                //
                // 0xC0, 0xC1 has been filtered out             
                checkTrailingBytes(*(srcPtr+1), 1, 1);

                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            case 2 :
                // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
                // Unicode: [zzzz yyyy] [yyxx xxxx]
                //
                if (( *srcPtr == 0xE0) && ( *(srcPtr+1) < 0xA0)) 
                {
                    char byte0[2] = {*srcPtr    ,0};
                    char byte1[2] = {*(srcPtr+1),0};

                    ThrowXMLwithMemMgr2(UTFDataFormatException
                                      , XMLExcepts::UTF8_Invalid_3BytesSeq
                                      , byte0
                                      , byte1
                                      , getMemoryManager());
                }

                checkTrailingBytes(*(srcPtr+1), 2, 1);
                checkTrailingBytes(*(srcPtr+2), 2, 2);

                //
                // D36 (a) UTF-8 is the Unicode Transformation Format that serializes 
                //         a Unicode code point as a sequence of one to four bytes, 
                //         as specified in Table 3.1, UTF-8 Bit Distribution.
                //     (b) An illegal UTF-8 code unit sequence is any byte sequence that 
                //         does not match the patterns listed in Table 3.1B, Legal UTF-8 
                //         Byte Sequences.
                //     (c) An irregular UTF-8 code unit sequence is a six-byte sequence 
                //         where the first three bytes correspond to a high surrogate, 
                //         and the next three bytes correspond to a low surrogate. 
                //         As a consequence of C12, these irregular UTF-8 sequences shall 
                //         not be generated by a conformant process. 
                //
                //irregular three bytes sequence
                // that is zzzzyy matches leading surrogate tag 110110 or 
                //                       trailing surrogate tag 110111
                // *srcPtr=1110 1101 
                // *(srcPtr+1)=1010 yyyy or 
                // *(srcPtr+1)=1011 yyyy
                //
                // 0xED 1110 1101
                // 0xA0 1010 0000

                if ((*srcPtr == 0xED) && (*(srcPtr+1) >= 0xA0))
                {
                    char byte0[2] = {*srcPtr,    0};
                    char byte1[2] = {*(srcPtr+1),0};

                     ThrowXMLwithMemMgr2(UTFDataFormatException
                              , XMLExcepts::UTF8_Irregular_3BytesSeq
                              , byte0
                              , byte1
                              , getMemoryManager());
                }

                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            case 3 : 
                // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
                // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
                //          [1101 11yy] [yyxx xxxx] (low surrogate)
                //          * uuuuu = wwww + 1
                //
                if (((*srcPtr == 0xF0) && (*(srcPtr+1) < 0x90)) ||
                    ((*srcPtr == 0xF4) && (*(srcPtr+1) > 0x8F))  )
                {
                    char byte0[2] = {*srcPtr    ,0};
                    char byte1[2] = {*(srcPtr+1),0};

                    ThrowXMLwithMemMgr2(UTFDataFormatException
                                      , XMLExcepts::UTF8_Invalid_4BytesSeq
                                      , byte0
                                      , byte1
                                      , getMemoryManager());
                }

                checkTrailingBytes(*(srcPtr+1), 3, 1);
                checkTrailingBytes(*(srcPtr+2), 3, 2);
                checkTrailingBytes(*(srcPtr+3), 3, 3);
                
                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            default: // trailingBytes > 3

                /***
                 * The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also allows 
                 * for the use of five- and six-byte sequences to encode characters that 
                 * are outside the range of the Unicode character set; those five- and 
                 * six-byte sequences are illegal for the use of UTF-8 as a transformation 
                 * of Unicode characters. ISO/IEC 10646 does not allow mapping of unpaired 
                 * surrogates, nor U+FFFE and U+FFFF (but it does allow other noncharacters).
                 ***/
                char len[2]  = {(char)(trailingBytes+0x31), 0};
                char byte[2] = {*srcPtr,0};

                ThrowXMLwithMemMgr2(UTFDataFormatException
                                  , XMLExcepts::UTF8_Exceede_BytesLimit
                                  , byte
                                  , len
                                  , getMemoryManager());

                break;
        }


        // since trailingBytes comes from an array, this logic is redundant
        //  default :
        //      ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
        //}
        tmpVal -= gUTFOffsets[trailingBytes];

        //
        //  If it will fit into a single char, then put it in. Otherwise
        //  encode it as a surrogate pair. If its not valid, use the
        //  replacement char.
        //
        if (!(tmpVal & 0xFFFF0000))
        {
            *sizePtr++ = trailingBytes + 1;
            *outPtr++ = XMLCh(tmpVal);
        }
         else if (tmpVal > 0x10FFFF)
        {
            //
            //  If we've gotten more than 32 chars so far, then just break
            //  out for now and lets process those. When we come back in
            //  here again, we'll get no chars and throw an exception. This
            //  way, the error will have a line and col number closer to
            //  the real problem area.
            //
            if ((outPtr - toFill) > 32)
                break;

            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
        }
         else
        {
            //
            //  If we have enough room to store the leading and trailing
            //  chars, then lets do it. Else, pretend this one never
            //  happened, and leave it for the next time. Since we don't
            //  update the bytes read until the bottom of the loop, by
            //  breaking out here its like it never happened.
            //
            if (outPtr + 1 >= outEnd)
                break;

            // Store the leading surrogate char
            tmpVal -= 0x10000;
            *sizePtr++ = trailingBytes + 1;
            *outPtr++ = XMLCh((tmpVal >> 10) + 0xD800);

            //
            //  And then the trailing char. This one accounts for no
            //  bytes eaten from the source, so set the char size for this
            //  one to be zero.
            //
            *sizePtr++ = 0;
            *outPtr++ = XMLCh((tmpVal & 0x3FF) + 0xDC00);
        }
    }

    // Update the bytes eaten
    bytesEaten = srcPtr - srcData;

    // Return the characters read
    return outPtr - toFill;
}
Example #11
0
unsigned int
XML256TableTranscoder390::transcodeTo( const   XMLCh* const    srcData
                                       , const unsigned int    srcCount
                                       ,       XMLByte* const  toFill
                                       , const unsigned int    maxBytes
                                       ,       unsigned int&   charsEaten
                                       , const UnRepOpts       options)
{
    // If debugging, make sure that the block size is legal
#if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
#endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of chars in the source.
    //
    const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes;

    //
    //  Loop through the count we have to do and map each char via the
    //  lookup table.
    //
    const XMLCh*    srcPtr = srcData;
    const XMLCh*    endPtr = (srcPtr + countToDo);
    XMLByte*        outPtr = toFill;
    XMLByte         nextOut;
    while (srcPtr < endPtr)
    {
        //
        //  Get the next src char out to a temp, then do a binary search
        //  of the 'to' table for this entry.
        //
        if ((nextOut = xlatOneTo(*srcPtr)))
        {
            *outPtr++ = nextOut;
            srcPtr++;
            continue;
        }

        //
        //  Its not representable so, according to the options, either
        //  throw or use the replacement.
        //
        if (options == UnRep_Throw)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_Unrepresentable
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }

        // Eat the source char and use the replacement char
        srcPtr++;
        *outPtr++ = 0x3F;
    }

    // Set the chars eaten
    charsEaten = countToDo;

    // Return the bytes we transcoded
    return countToDo;
}
Example #12
0
// ---------------------------------------------------------------------------
//  ICUTranscoder: The virtual transcoder API
// ---------------------------------------------------------------------------
unsigned int
ICUTranscoder::transcodeFrom(const  XMLByte* const          srcData
                            , const unsigned int            srcCount
                            ,       XMLCh* const            toFill
                            , const unsigned int            maxChars
                            ,       unsigned int&           bytesEaten
                            ,       unsigned char* const    charSizes)
{
    // If debugging, insure the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    // Set up pointers to the start and end of the source buffer
    const XMLByte*  startSrc = srcData;
    const XMLByte*  endSrc = srcData + srcCount;

    //
    //  And now do the target buffer. This works differently according to
    //  whether XMLCh and UChar are the same size or not.
    //
    UChar* startTarget;
    if (sizeof(XMLCh) == sizeof(UChar))
        startTarget = (UChar*)toFill;
     else
        startTarget = (UChar*) getMemoryManager()->allocate
        (
            maxChars * sizeof(UChar)
        );//new UChar[maxChars];
    UChar* orgTarget = startTarget;

    //
    //  Transoode the buffer.  Buffer overflow errors are normal, occuring
    //  when the raw input buffer holds more characters than will fit in
    //  the Unicode output buffer.
    //
    UErrorCode  err = U_ZERO_ERROR;
    ucnv_toUnicode
    (
        fConverter
        , &startTarget
        , startTarget + maxChars
        , (const char**)&startSrc
        , (const char*)endSrc
        , (fFixed ? 0 : (int32_t*)fSrcOffsets)
        , false
        , &err
    );

    if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))
    {
        if (orgTarget != (UChar*)toFill)
            getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;

        if (fFixed)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_BadSrcCP
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }
        else
        {
            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
        }
    }

    // Calculate the bytes eaten and store in caller's param
    bytesEaten = startSrc - srcData;

    // And the characters decoded
    const unsigned int charsDecoded = startTarget - orgTarget;

    //
    //  Translate the array of char offsets into an array of character
    //  sizes, which is what the transcoder interface semantics requires.
    //  If its fixed, then we can optimize it.
    //
    if (fFixed)
    {
        const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);
        memset(charSizes, fillSize, maxChars);
    }
     else
    {
        //
        //  We have to convert the series of offsets into a series of
        //  sizes. If just one char was decoded, then its the total bytes
        //  eaten. Otherwise, do a loop and subtract out each element from
        //  its previous element.
        //
        if (charsDecoded == 1)
        {
            charSizes[0] = (unsigned char)bytesEaten;
        }
         else
        {
            //  ICU does not return an extra element to allow us to figure
            //  out the last char size, so we have to compute it from the
            //  total bytes used.
            unsigned int index;
            for (index = 0; index < charsDecoded - 1; index++)
            {
                charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
                                                    - fSrcOffsets[index]);
            }
            if( charsDecoded > 0 ) {
                charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten
                                              - fSrcOffsets[charsDecoded - 1]);
            }
        }
    }

    //
    //  If XMLCh and UChar are not the same size, then we need to copy over
    //  the temp buffer to the new one.
    //
    if (sizeof(UChar) != sizeof(XMLCh))
    {
        XMLCh* outPtr = toFill;
        startTarget = orgTarget;
        for (unsigned int index = 0; index < charsDecoded; index++)
            *outPtr++ = XMLCh(*startTarget++);

        // And delete the temp buffer
        getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
    }

    // Return the chars we put into the target buffer
    return charsDecoded;
}
Example #13
0
unsigned int
XML88591Transcoder390::transcodeTo(const   XMLCh* const    srcData
                                , const unsigned int    srcCount
                                ,       XMLByte* const  toFill
                                , const unsigned int    maxBytes
                                ,       unsigned int&   charsEaten
                                , const UnRepOpts       options)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output bytes and the number of chars in the source.
    //
    const unsigned int countToDo = srcCount < maxBytes ? srcCount : maxBytes;

    //
    //  Loop through the bytes to do and convert over each byte. Its just
    //  a downcast of the wide char, checking for unrepresentable chars.
    //
    const XMLCh*    srcPtr  = srcData;
    const XMLCh*    srcEnd  = srcPtr + countToDo;
    XMLByte*        destPtr = toFill;
    while (srcPtr < srcEnd)
    {
        // If its legal, take it and jump back to top
        if (*srcPtr < 256)
        {
            *destPtr++ = XMLByte(*srcPtr++);
            continue;
        }

        //
        //  Its not representable so use a replacement char. According to
        //  the options, either throw or use the replacement.
        //
        if (options == UnRep_Throw)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_Unrepresentable
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }
        *destPtr++ = 0x1A;
        srcPtr++;
    }

    // Set the chars eaten
    charsEaten = countToDo;

    // Return the bytes we transcoded
    return countToDo;
}
// ---------------------------------------------------------------------------
//  XMLUCS4Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLUCS4Transcoder::transcodeFrom(const  XMLByte* const          srcData
                                , const unsigned int            srcCount
                                ,       XMLCh* const            toFill
                                , const unsigned int            maxChars
                                ,       unsigned int&           bytesEaten
                                ,       unsigned char* const    charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Get pointers to the start and end of the source buffer in terms of
    //  UCS-4 characters.
    //
    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));

    //
    //  Get pointers to the start and end of the target buffer, which is
    //  in terms of the XMLCh chars we output.
    //
    XMLCh*  outPtr = toFill;
    XMLCh*  outEnd = toFill + maxChars;

    //
    //  And get a pointer into the char sizes buffer. We will run this
    //  up as we put chars into the output buffer.
    //
    unsigned char* sizePtr = charSizes;

    //
    //  Now process chars until we either use up all our source or all of
    //  our output space.
    //
    while ((outPtr < outEnd) && (srcPtr < srcEnd))
    {
        //
        //  Get the next UCS char out of the buffer. Don't bump the ptr
        //  yet since we might not have enough storage for it in the target
        //  (if its causes a surrogate pair to be created.
        //
        UCS4Ch nextVal = *srcPtr;

        // If it needs to be swapped, then do it
        if (fSwapped)
            nextVal = BitOps::swapBytes(nextVal);

        // Handle a surrogate pair if needed
        if (nextVal & 0xFFFF0000)
        {
            //
            //  If we don't have room for both of the chars, then we
            //  bail out now.
            //
            if (outPtr + 1 == outEnd)
                break;

            const XMLCh ch1 = XMLCh(((nextVal - 0x10000) >> 10) + 0xD800);
            const XMLCh ch2 = XMLCh(((nextVal - 0x10000) & 0x3FF) + 0xDC00);

            //
            //  We have room so store them both. But note that the
            //  second one took up no source bytes!
            //
            *sizePtr++ = sizeof(UCS4Ch);
            *outPtr++ = ch1;
            *sizePtr++ = 0;
            *outPtr++ = ch2;
        }
         else
        {
            //
            //  No surrogate, so just store it and bump the count of chars
            //  read. Update the char sizes buffer for this char's entry.
            //
            *sizePtr++ = sizeof(UCS4Ch);
            *outPtr++ = XMLCh(nextVal);
        }

        // Indicate that we ate another UCS char's worth of bytes
        srcPtr++;
    }
unsigned int
XMLUTF16Transcoder::transcodeTo(const   XMLCh* const    srcData
                                , const unsigned int    srcCount
                                ,       XMLByte* const  toFill
                                , const unsigned int    maxBytes
                                ,       unsigned int&   charsEaten
                                , const UnRepOpts)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxBytes);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  chars that we can fit into the output buffer, and the source
    //  chars available.
    //
    const unsigned int maxOutChars = maxBytes / sizeof(UTF16Ch);
    const unsigned int countToDo = srcCount < maxOutChars ? srcCount : maxOutChars;

    //
    //  Get a pointer tot he output buffer in the UTF-16 character format
    //  that we need to work with. And get a mutable pointer to the source
    //  character buffer.
    //
    UTF16Ch*        outPtr = (UTF16Ch*)toFill;
    const XMLCh*    srcPtr = srcData;

    //
    //  If the target format is swapped from our native format, then handle
    //  it one way, else handle it another.
    //
    if (fSwapped)
    {
        //
        //  And then do the swapping loop for the count we precalculated. Note
        //  that this also handles size conversion as well if XMLCh is not the
        //  same size as UTF16Ch.
        //
        for (unsigned int index = 0; index < countToDo; index++)
        {
            // To avoid flakey compilers, use a temp
            const UTF16Ch tmpCh = UTF16Ch(*srcPtr++);
            *outPtr++ = BitOps::swapBytes(tmpCh);
        }
    }
     else
    {
        //
        //  If XMLCh and UTF16Ch are the same size, we can just do a fast
        //  memory copy. Otherwise, we have to do a loop and downcast each
        //  character into its new 16 bit storage.
        //
        if (sizeof(XMLCh) == sizeof(UTF16Ch))
        {
            //  Notice we convert char count to byte count here!!!
            memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch));
        }
         else
        {
            for (unsigned int index = 0; index < countToDo; index++)
                *outPtr++ = UTF16Ch(*srcPtr++);
        }
    }

    // Set the chars eaten to the calculated number we ate
    charsEaten = countToDo;

    //Return the bytes we ate. Note we convert to a byte count here!
    return countToDo * sizeof(UTF16Ch);
}
// ---------------------------------------------------------------------------
//  XMLUTF16Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLUTF16Transcoder::transcodeFrom(  const   XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of chars in the source.
    //
    const unsigned int srcChars = srcCount / sizeof(UTF16Ch);
    const unsigned int countToDo = srcChars < maxChars ? srcChars : maxChars;

    // Look at the source data as UTF16 chars
    const UTF16Ch* asUTF16 = (const UTF16Ch*)srcData;

    // And get a mutable pointer to the output
    XMLCh* outPtr = toFill;

    //
    //  If its swapped, we have to do a char by char swap and cast. Else
    //  we have to check whether our XMLCh and UTF16Ch types are the same
    //  size or not. If so, we can optimize by just doing a buffer copy.
    //
    if (fSwapped)
    {
        //
        //  And then do the swapping loop for the count we precalculated. Note
        //  that this also handles size conversion as well if XMLCh is not the
        //  same size as UTF16Ch.
        //
        for (unsigned int index = 0; index < countToDo; index++)
            *outPtr++ = BitOps::swapBytes(*asUTF16++);
    }
     else
    {
        //
        //  If the XMLCh type is the same size as a UTF16 value on this
        //  platform, then we can do just a buffer copy straight to the target
        //  buffer since our source chars are UTF-16 chars. If its not, then
        //  we still have to do a loop and assign each one, in order to
        //  implicitly convert.
        //
        if (sizeof(XMLCh) == sizeof(UTF16Ch))
        {
            //  Notice we convert char count to byte count here!!!
            memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch));
        }
         else
        {
            for (unsigned int index = 0; index < countToDo; index++)
                *outPtr++ = XMLCh(*asUTF16++);
        }
    }

    // Set the bytes eaten
    bytesEaten = countToDo * sizeof(UTF16Ch);

    // Set the character sizes to the fixed size
    memset(charSizes, sizeof(UTF16Ch), countToDo);

    // Return the chars we transcoded
    return countToDo;
}
Example #17
0
void InputPersistenceBlock::rawRead(void *destPtr, size_t size) {
	if (checkBlockSize(size)) {
		memcpy(destPtr, &*_iter, size);
		_iter += size;
	}
}
Example #18
0
// ---------------------------------------------------------------------------
//  XMLASCIITranscoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLASCIITranscoder::transcodeFrom(  const   XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the source byte count.
    //
    const unsigned int countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Now loop through that many source chars and just cast each one
    //  over to the XMLCh format. Check each source that its really a
    //  valid ASCI char.
    //
    const XMLByte*  srcPtr = srcData;
    XMLCh*          outPtr = toFill;
    unsigned int    countDone = 0;
    for (; countDone < countToDo; countDone++)
    {
        // Do the optimistic work up front
        if (*srcPtr < 0x80)
        {
            *outPtr++ = XMLCh(*srcPtr++);
            continue;
        }

        //
        //  We got non source encoding char. If we got more than 32 chars,
        //  the just break out. We'll come back here later to hit this again
        //  and give an error much closer to the real source position.
        //
        if (countDone > 32)
            break;

        XMLCh tmpBuf[17];
        XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
        ThrowXMLwithMemMgr2
        (
            TranscodingException
            , XMLExcepts::Trans_Unrepresentable
            , tmpBuf
            , getEncodingName()
            , getMemoryManager()
        );
    }

    // Set the bytes we ate
    bytesEaten = countDone;

    // Set the char sizes to the fixed size
    memset(charSizes, 1, countDone);

    // Return the chars we transcoded
    return countDone;
}