// ---------------------------------------------------------------------------
//  XML88591Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
XMLSize_t
XML88591Transcoder::transcodeFrom(  const   XMLByte* const       srcData
                                    , const XMLSize_t            srcCount
                                    ,       XMLCh* const         toFill
                                    , const XMLSize_t            maxChars
                                    ,       XMLSize_t&           bytesEaten
                                    ,       unsigned char* const charSizes)
{
    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of bytes in the source.
    //
    const XMLSize_t countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Loop through the bytes to do and convert over each byte. Its just
    //  a cast to the wide char type.
    //
    const XMLByte*  srcPtr = srcData;
    XMLCh*          destPtr = toFill;
    const XMLByte*  srcEnd = srcPtr + countToDo;
    while (srcPtr < srcEnd)
        *destPtr++ = XMLCh(*srcPtr++);

    // Set the bytes eaten, and set the char size array to the fixed size
    bytesEaten = countToDo;
    memset(charSizes, 1, countToDo);

    // Return the chars we transcoded
    return countToDo;
}
// ---------------------------------------------------------------------------
//  XMLASCIITranscoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
XMLSize_t
XMLASCIITranscoder::transcodeFrom(  const   XMLByte* const       srcData
                                    , const XMLSize_t            srcCount
                                    ,       XMLCh* const         toFill
                                    , const XMLSize_t            maxChars
                                    ,       XMLSize_t&           bytesEaten
                                    ,       unsigned char* const charSizes)
{
    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the source byte count.
    //
    const XMLSize_t countToDo = srcCount < maxChars ? srcCount : maxChars;

    //
    //  Now loop through that many source chars and just cast each one
    //  over to the XMLCh format. Check each source that its really a
    //  valid ASCI char.
    //
    const XMLByte*  srcPtr = srcData;
    XMLCh*          outPtr = toFill;
    XMLSize_t       countDone = 0;
    for (; countDone < countToDo; countDone++)
    {
        // Do the optimistic work up front
        if (*srcPtr < 0x80)
        {
            *outPtr++ = XMLCh(*srcPtr++);
            continue;
        }

        //
        //  We got non source encoding char. If we got more than 32 chars,
        //  the just break out. We'll come back here later to hit this again
        //  and give an error much closer to the real source position.
        //
        if (countDone > 32)
            break;

        XMLCh tmpBuf[17];
        XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
        ThrowXMLwithMemMgr2
        (
            TranscodingException
            , XMLExcepts::Trans_Unrepresentable
            , tmpBuf
            , getEncodingName()
            , getMemoryManager()
        );
    }

    // Set the bytes we ate
    bytesEaten = countDone;

    // Set the char sizes to the fixed size
    memset(charSizes, 1, countDone);

    // Return the chars we transcoded
    return countDone;
}
Exemple #3
0
bool XMLPlatformUtils::isRelative(const XMLCh* const toCheck
                                  , MemoryManager* const manager)
{
    if (!toCheck[0] || toCheck[0] == XMLCh('/'))
        return false;
    return true;
}
Exemple #4
0
XERCES_CPP_NAMESPACE_BEGIN

XMLCh* RegxUtil::decomposeToSurrogates(XMLInt32 ch,
                                       MemoryManager* const manager) {

	XMLCh* pszStr = (XMLCh*) manager->allocate(3 *  sizeof(XMLCh));//new XMLCh[3];

	ch -= 0x10000;
	pszStr[0] = XMLCh((ch >> 10) + 0xD800);
	pszStr[1] = XMLCh((ch & 0x03FF) + 0xDC00);
	pszStr[2] = chNull;

	return pszStr;
}
bool CygwinTranscoder::canTranscodeTo(const unsigned int toCheck) const
{
    //
    //  If the passed value is really a surrogate embedded together, then
    //  we need to break it out into its two chars. Else just one.
    //
    XMLCh           srcBuf[2];
    unsigned int    srcCount = 1;
    if (toCheck & 0xFFFF0000)
    {
        srcBuf[0] = XMLCh((toCheck >> 10) + 0xD800);
        srcBuf[1] = XMLCh(toCheck & 0x3FF) + 0xDC00;
        srcCount++;
    }
bool
MacOSTranscoder::canTranscodeTo(const unsigned int toCheck)
{
	//
    //  If the passed value is really a surrogate embedded together, then
    //  we need to break it out into its two chars. Else just one.
    //
    unsigned int    srcCnt = 0;
    UniChar         srcBuf[2];

    if (toCheck & 0xFFFF0000)
    {
        srcBuf[srcCnt++] = XMLCh(toCheck >> 10)   + 0xD800;
        srcBuf[srcCnt++] = XMLCh(toCheck & 0x3FF) + 0xDC00;
    }
Exemple #7
0
static XMLCh* convertToXMLCh( const UChar* const toConvert,
                            MemoryManager* const manager = 0)
{
    const unsigned int srcLen = u_strlen(toConvert);
    XMLCh* retBuf = (manager)
        ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh))
        : new XMLCh[srcLen + 1];

    XMLCh* outPtr = retBuf;
    const UChar* srcPtr = toConvert;
    while (*srcPtr)
        *outPtr++ = XMLCh(*srcPtr++);
    *outPtr = 0;

    return retBuf;
}
    //@{
    XMLBuffer(const XMLSize_t capacity = 1023
              , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) :

        fIndex(0)
        , fCapacity(capacity)
        , fFullSize(0)
        , fUsed(false)
        , fMemoryManager(manager)
        , fFullHandler(0)
        , fBuffer(0)
    {
        // Buffer is one larger than capacity, to allow for zero term
        fBuffer = (XMLCh*) manager->allocate((capacity+1) * sizeof(XMLCh)); //new XMLCh[fCapacity+1];

        // Keep it null terminated
        fBuffer[0] = XMLCh(0);
    }
Exemple #9
0
bool XMLPlatformUtils::isRelative(const XMLCh* const toCheck)
{
    // Check for pathological case of empty path
    if (!toCheck[0])
        return false;

    //
    //  If it starts with a slash, then it cannot be relative. This covers
    //  both something like "\Test\File.xml" and an NT Lan type remote path
    //  that starts with a node like "\\MyNode\Test\File.xml".
    //
    if (toCheck[0] == XMLCh('/'))
        return false;

    // Else assume its a relative path
    return true;
}
Exemple #10
0
XMLCh ReaderMgr::peekNextChar()
{
    XMLCh chRet;
    if (fCurReader->peekNextChar(chRet))
        return chRet;

    //
    //  Didn't get anything back so this reader is hosed. So lets move to
    //  the next reader on the stack. If this fails, it will be because
    //  its the end of the original file, and we just return zero.
    //
    if (!popReader())
        return XMLCh(0);

    // Else peek again and return the character
    fCurReader->peekNextChar(chRet);
    return chRet;
}
bool XMLPlatformUtils::isRelative(const XMLCh* const toCheck
                                  , MemoryManager* const manager)
{
    // Check for pathological case of empty path
    if (!toCheck[0])
        return false;

    //
    //  If its starts with a drive, then it cannot be relative. Note that
    //  we checked the drive not being empty above, so worst case its one
    //  char long and the check of the 1st char will fail because its really
    //  a null character.
    //
    if (toCheck[1] == chColon)
    {
        if (((toCheck[0] >= chLatin_A) && (toCheck[0] <= chLatin_Z))
        ||  ((toCheck[0] >= chLatin_a) && (toCheck[0] <= chLatin_z)))
        {
            return false;
        }
    }

    //
    //  If it starts with a double slash, then it cannot be relative since
    //  it's a remote file.
    //
    if (isBackSlash(toCheck[0]) && isBackSlash(toCheck[1]))
        return false;

    //
    //  If it starts with a slash, then it cannot be relative. This covers
    //  both something like "\Test\File.xml" and an NT Lan type remote path
    //  that starts with a node like "\\MyNode\Test\File.xml".
    //
    if (toCheck[0] == XMLCh('/'))
        return false;

    // Else assume its a relative path
    return true;
}
Exemple #12
0
// ---------------------------------------------------------------------------
//  ReaderMgr: Scanning APIs
// ---------------------------------------------------------------------------
XMLCh ReaderMgr::getNextChar()
{
    XMLCh chRet;
    if (fCurReader->getNextChar(chRet))
        return chRet;

    //
    //  Didn't get anything back so this reader is hosed. So lets move to
    //  the next reader on the stack. If this fails, it will be because
    //  its the end of the original file, and we just return zero.
    //
    //  If its the end of an entity and fThrowEOE is set, it will throw out
    //  of here. Otherwise, it will take us down to the next reader and
    //  we'll have more chars.
    //
    if (!popReader())
        return XMLCh(0);

    // Else try again and return the new character
    fCurReader->getNextChar(chRet);
    return chRet;
}
Exemple #13
0
BinInputStream* XMLURL::makeNewStream() const
{
    //
    //  If its a local host, then we short circuit it and use our own file
    //  stream support. Otherwise, we just let it fall through and let the
    //  installed network access object provide a stream.
    //
    if (fProtocol == XMLURL::File)
    {
        if (!fHost || !XMLString::compareIStringASCII(fHost, XMLUni::fgLocalHostString))
        {

            XMLCh* realPath = XMLString::replicate(fPath, fMemoryManager);
            ArrayJanitor<XMLCh> basePathName(realPath, fMemoryManager);

            //
            // Need to manually replace any character reference %xx first
            // HTTP protocol will be done automatically by the netaccessor
            //
            int end = XMLString::stringLen(realPath);
            int percentIndex = XMLString::indexOf(realPath, chPercent, 0, fMemoryManager);

            while (percentIndex != -1) {

                if (percentIndex+2 >= end ||
                    !isHexDigit(realPath[percentIndex+1]) ||
                    !isHexDigit(realPath[percentIndex+2]))
                {
                    XMLCh value1[4];
                    XMLString::moveChars(value1, &(realPath[percentIndex]), 3);
                    value1[3] = chNull;
                    ThrowXMLwithMemMgr2(MalformedURLException
                            , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
                            , realPath
                            , value1
                            , fMemoryManager);
                }

                unsigned int value = (xlatHexDigit(realPath[percentIndex+1]) * 16) + xlatHexDigit(realPath[percentIndex+2]);

                realPath[percentIndex] = XMLCh(value);

                int i =0;
                for (i = percentIndex + 1; i < end - 2 ; i++)
                    realPath[i] = realPath[i+2];
                realPath[i] = chNull;
                end = i;

                percentIndex = XMLString::indexOf(realPath, chPercent, percentIndex, fMemoryManager);
            }


            BinFileInputStream* retStrm = new (fMemoryManager) BinFileInputStream(realPath, fMemoryManager);
            if (!retStrm->getIsOpen())
            {
                delete retStrm;
                return 0;
            }
            return retStrm;
        }
    }

    //
    //  If we don't have have an installed net accessor object, then we
    //  have to just throw here.
    //
    if (!XMLPlatformUtils::fgNetAccessor)
        ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);

    // Else ask the net accessor to create the stream
    return XMLPlatformUtils::fgNetAccessor->makeNew(*this);
}
Exemple #14
0
// ---------------------------------------------------------------------------
//  XMLUTF8Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLUTF8Transcoder::transcodeFrom(const  XMLByte* const          srcData
                                , const unsigned int            srcCount
                                ,       XMLCh* const            toFill
                                , const unsigned int            maxChars
                                ,       unsigned int&           bytesEaten
                                ,       unsigned char* const    charSizes)
{
    // Watch for pathological scenario. Shouldn't happen, but...
    if (!srcCount || !maxChars)
        return 0;

    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Get pointers to our start and end points of the input and output
    //  buffers.
    //
    const XMLByte*  srcPtr = srcData;
    const XMLByte*  srcEnd = srcPtr + srcCount;
    XMLCh*          outPtr = toFill;
    XMLCh*          outEnd = outPtr + maxChars;
    unsigned char*  sizePtr = charSizes;



    //
    //  We now loop until we either run out of input data, or room to store
    //  output chars.
    //
    while ((srcPtr < srcEnd) && (outPtr < outEnd))
    {
        // Special-case ASCII, which is a leading byte value of <= 127
        if (*srcPtr <= 127)
        {
            *outPtr++ = XMLCh(*srcPtr++);
            *sizePtr++ = 1;
            continue;
        }

        // See how many trailing src bytes this sequence is going to require
        const unsigned int trailingBytes = gUTFBytes[*srcPtr];

        //
        //  If there are not enough source bytes to do this one, then we
        //  are done. Note that we done >= here because we are implicitly
        //  counting the 1 byte we get no matter what.
        //
        //  If we break out here, then there is nothing to undo since we
        //  haven't updated any pointers yet.
        //
        if (srcPtr + trailingBytes >= srcEnd)
            break;

        // Looks ok, so lets build up the value
        // or at least let's try to do so--remembering that
        // we cannot assume the encoding to be valid:

        // first, test first byte
        if((gUTFByteIndicatorTest[trailingBytes] & *srcPtr) != gUTFByteIndicator[trailingBytes]) {
            char pos[2] = {(char)0x31, 0}; 
            char len[2] = {(char)(trailingBytes+0x31), 0};
            char byte[2] = {*srcPtr,0};
            ThrowXMLwithMemMgr3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, pos, byte, len, getMemoryManager());
        }

        /***
         * http://www.unicode.org/reports/tr27/
         *
         * Table 3.1B. lists all of the byte sequences that are legal in UTF-8. 
         * A range of byte values such as A0..BF indicates that any byte from A0 to BF (inclusive) 
         * is legal in that position. 
         * Any byte value outside of the ranges listed is illegal. 
         * For example, 
         * the byte sequence <C0 AF> is illegal  since C0 is not legal in the 1st Byte column. 
         * The byte sequence <E0 9F 80> is illegal since in the row 
         *    where E0 is legal as a first byte, 
         *    9F is not legal as a second byte. 
         * The byte sequence <F4 80 83 92> is legal, since every byte in that sequence matches 
         * a byte range in a row of the table (the last row). 
         *
         *
         * Table 3.1B. Legal UTF-8 Byte Sequences  
         * Code Points              1st Byte    2nd Byte    3rd Byte    4th Byte 
         * =========================================================================
         * U+0000..U+007F            00..7F       
         * -------------------------------------------------------------------------
         * U+0080..U+07FF            C2..DF      80..BF      
         *
         * -------------------------------------------------------------------------
         * U+0800..U+0FFF            E0          A0..BF     80..BF   
         *                                       -- 
         *                          
         * U+1000..U+FFFF            E1..EF      80..BF     80..BF    
         *
         * --------------------------------------------------------------------------
         * U+10000..U+3FFFF          F0          90..BF     80..BF       80..BF 
         *                                       --
         * U+40000..U+FFFFF          F1..F3      80..BF     80..BF       80..BF 
         * U+100000..U+10FFFF        F4          80..8F     80..BF       80..BF 
         *                                           --
         * ==========================================================================
         *
         *  Cases where a trailing byte range is not 80..BF are underlined in the table to 
         *  draw attention to them. These occur only in the second byte of a sequence.
         *
         ***/

        XMLUInt32 tmpVal = 0;

        switch(trailingBytes)
        {
            case 1 :
                // UTF-8:   [110y yyyy] [10xx xxxx]
                // Unicode: [0000 0yyy] [yyxx xxxx]
                //
                // 0xC0, 0xC1 has been filtered out             
                checkTrailingBytes(*(srcPtr+1), 1, 1);

                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            case 2 :
                // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
                // Unicode: [zzzz yyyy] [yyxx xxxx]
                //
                if (( *srcPtr == 0xE0) && ( *(srcPtr+1) < 0xA0)) 
                {
                    char byte0[2] = {*srcPtr    ,0};
                    char byte1[2] = {*(srcPtr+1),0};

                    ThrowXMLwithMemMgr2(UTFDataFormatException
                                      , XMLExcepts::UTF8_Invalid_3BytesSeq
                                      , byte0
                                      , byte1
                                      , getMemoryManager());
                }

                checkTrailingBytes(*(srcPtr+1), 2, 1);
                checkTrailingBytes(*(srcPtr+2), 2, 2);

                //
                // D36 (a) UTF-8 is the Unicode Transformation Format that serializes 
                //         a Unicode code point as a sequence of one to four bytes, 
                //         as specified in Table 3.1, UTF-8 Bit Distribution.
                //     (b) An illegal UTF-8 code unit sequence is any byte sequence that 
                //         does not match the patterns listed in Table 3.1B, Legal UTF-8 
                //         Byte Sequences.
                //     (c) An irregular UTF-8 code unit sequence is a six-byte sequence 
                //         where the first three bytes correspond to a high surrogate, 
                //         and the next three bytes correspond to a low surrogate. 
                //         As a consequence of C12, these irregular UTF-8 sequences shall 
                //         not be generated by a conformant process. 
                //
                //irregular three bytes sequence
                // that is zzzzyy matches leading surrogate tag 110110 or 
                //                       trailing surrogate tag 110111
                // *srcPtr=1110 1101 
                // *(srcPtr+1)=1010 yyyy or 
                // *(srcPtr+1)=1011 yyyy
                //
                // 0xED 1110 1101
                // 0xA0 1010 0000

                if ((*srcPtr == 0xED) && (*(srcPtr+1) >= 0xA0))
                {
                    char byte0[2] = {*srcPtr,    0};
                    char byte1[2] = {*(srcPtr+1),0};

                     ThrowXMLwithMemMgr2(UTFDataFormatException
                              , XMLExcepts::UTF8_Irregular_3BytesSeq
                              , byte0
                              , byte1
                              , getMemoryManager());
                }

                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            case 3 : 
                // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
                // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
                //          [1101 11yy] [yyxx xxxx] (low surrogate)
                //          * uuuuu = wwww + 1
                //
                if (((*srcPtr == 0xF0) && (*(srcPtr+1) < 0x90)) ||
                    ((*srcPtr == 0xF4) && (*(srcPtr+1) > 0x8F))  )
                {
                    char byte0[2] = {*srcPtr    ,0};
                    char byte1[2] = {*(srcPtr+1),0};

                    ThrowXMLwithMemMgr2(UTFDataFormatException
                                      , XMLExcepts::UTF8_Invalid_4BytesSeq
                                      , byte0
                                      , byte1
                                      , getMemoryManager());
                }

                checkTrailingBytes(*(srcPtr+1), 3, 1);
                checkTrailingBytes(*(srcPtr+2), 3, 2);
                checkTrailingBytes(*(srcPtr+3), 3, 3);
                
                tmpVal = *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;
                tmpVal <<= 6;
                tmpVal += *srcPtr++;

                break;
            default: // trailingBytes > 3

                /***
                 * The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also allows 
                 * for the use of five- and six-byte sequences to encode characters that 
                 * are outside the range of the Unicode character set; those five- and 
                 * six-byte sequences are illegal for the use of UTF-8 as a transformation 
                 * of Unicode characters. ISO/IEC 10646 does not allow mapping of unpaired 
                 * surrogates, nor U+FFFE and U+FFFF (but it does allow other noncharacters).
                 ***/
                char len[2]  = {(char)(trailingBytes+0x31), 0};
                char byte[2] = {*srcPtr,0};

                ThrowXMLwithMemMgr2(UTFDataFormatException
                                  , XMLExcepts::UTF8_Exceede_BytesLimit
                                  , byte
                                  , len
                                  , getMemoryManager());

                break;
        }


        // since trailingBytes comes from an array, this logic is redundant
        //  default :
        //      ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
        //}
        tmpVal -= gUTFOffsets[trailingBytes];

        //
        //  If it will fit into a single char, then put it in. Otherwise
        //  encode it as a surrogate pair. If its not valid, use the
        //  replacement char.
        //
        if (!(tmpVal & 0xFFFF0000))
        {
            *sizePtr++ = trailingBytes + 1;
            *outPtr++ = XMLCh(tmpVal);
        }
         else if (tmpVal > 0x10FFFF)
        {
            //
            //  If we've gotten more than 32 chars so far, then just break
            //  out for now and lets process those. When we come back in
            //  here again, we'll get no chars and throw an exception. This
            //  way, the error will have a line and col number closer to
            //  the real problem area.
            //
            if ((outPtr - toFill) > 32)
                break;

            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
        }
         else
        {
            //
            //  If we have enough room to store the leading and trailing
            //  chars, then lets do it. Else, pretend this one never
            //  happened, and leave it for the next time. Since we don't
            //  update the bytes read until the bottom of the loop, by
            //  breaking out here its like it never happened.
            //
            if (outPtr + 1 >= outEnd)
                break;

            // Store the leading surrogate char
            tmpVal -= 0x10000;
            *sizePtr++ = trailingBytes + 1;
            *outPtr++ = XMLCh((tmpVal >> 10) + 0xD800);

            //
            //  And then the trailing char. This one accounts for no
            //  bytes eaten from the source, so set the char size for this
            //  one to be zero.
            //
            *sizePtr++ = 0;
            *outPtr++ = XMLCh((tmpVal & 0x3FF) + 0xDC00);
        }
    }

    // Update the bytes eaten
    bytesEaten = srcPtr - srcData;

    // Return the characters read
    return outPtr - toFill;
}
Exemple #15
0
// ---------------------------------------------------------------------------
//  ICUTranscoder: The virtual transcoder API
// ---------------------------------------------------------------------------
unsigned int
ICUTranscoder::transcodeFrom(const  XMLByte* const          srcData
                            , const unsigned int            srcCount
                            ,       XMLCh* const            toFill
                            , const unsigned int            maxChars
                            ,       unsigned int&           bytesEaten
                            ,       unsigned char* const    charSizes)
{
    // If debugging, insure the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    // Set up pointers to the start and end of the source buffer
    const XMLByte*  startSrc = srcData;
    const XMLByte*  endSrc = srcData + srcCount;

    //
    //  And now do the target buffer. This works differently according to
    //  whether XMLCh and UChar are the same size or not.
    //
    UChar* startTarget;
    if (sizeof(XMLCh) == sizeof(UChar))
        startTarget = (UChar*)toFill;
     else
        startTarget = (UChar*) getMemoryManager()->allocate
        (
            maxChars * sizeof(UChar)
        );//new UChar[maxChars];
    UChar* orgTarget = startTarget;

    //
    //  Transoode the buffer.  Buffer overflow errors are normal, occuring
    //  when the raw input buffer holds more characters than will fit in
    //  the Unicode output buffer.
    //
    UErrorCode  err = U_ZERO_ERROR;
    ucnv_toUnicode
    (
        fConverter
        , &startTarget
        , startTarget + maxChars
        , (const char**)&startSrc
        , (const char*)endSrc
        , (fFixed ? 0 : (int32_t*)fSrcOffsets)
        , false
        , &err
    );

    if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))
    {
        if (orgTarget != (UChar*)toFill)
            getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;

        if (fFixed)
        {
            XMLCh tmpBuf[17];
            XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager());
            ThrowXMLwithMemMgr2
            (
                TranscodingException
                , XMLExcepts::Trans_BadSrcCP
                , tmpBuf
                , getEncodingName()
                , getMemoryManager()
            );
        }
        else
        {
            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
        }
    }

    // Calculate the bytes eaten and store in caller's param
    bytesEaten = startSrc - srcData;

    // And the characters decoded
    const unsigned int charsDecoded = startTarget - orgTarget;

    //
    //  Translate the array of char offsets into an array of character
    //  sizes, which is what the transcoder interface semantics requires.
    //  If its fixed, then we can optimize it.
    //
    if (fFixed)
    {
        const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);
        memset(charSizes, fillSize, maxChars);
    }
     else
    {
        //
        //  We have to convert the series of offsets into a series of
        //  sizes. If just one char was decoded, then its the total bytes
        //  eaten. Otherwise, do a loop and subtract out each element from
        //  its previous element.
        //
        if (charsDecoded == 1)
        {
            charSizes[0] = (unsigned char)bytesEaten;
        }
         else
        {
            //  ICU does not return an extra element to allow us to figure
            //  out the last char size, so we have to compute it from the
            //  total bytes used.
            unsigned int index;
            for (index = 0; index < charsDecoded - 1; index++)
            {
                charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
                                                    - fSrcOffsets[index]);
            }
            if( charsDecoded > 0 ) {
                charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten
                                              - fSrcOffsets[charsDecoded - 1]);
            }
        }
    }

    //
    //  If XMLCh and UChar are not the same size, then we need to copy over
    //  the temp buffer to the new one.
    //
    if (sizeof(UChar) != sizeof(XMLCh))
    {
        XMLCh* outPtr = toFill;
        startTarget = orgTarget;
        for (unsigned int index = 0; index < charsDecoded; index++)
            *outPtr++ = XMLCh(*startTarget++);

        // And delete the temp buffer
        getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
    }

    // Return the chars we put into the target buffer
    return charsDecoded;
}
// ---------------------------------------------------------------------------
//  XMLUTF16Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLUTF16Transcoder::transcodeFrom(  const   XMLByte* const       srcData
                                    , const unsigned int         srcCount
                                    ,       XMLCh* const         toFill
                                    , const unsigned int         maxChars
                                    ,       unsigned int&        bytesEaten
                                    ,       unsigned char* const charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Calculate the max chars we can do here. Its the lesser of the
    //  max output chars and the number of chars in the source.
    //
    const unsigned int srcChars = srcCount / sizeof(UTF16Ch);
    const unsigned int countToDo = srcChars < maxChars ? srcChars : maxChars;

    // Look at the source data as UTF16 chars
    const UTF16Ch* asUTF16 = (const UTF16Ch*)srcData;

    // And get a mutable pointer to the output
    XMLCh* outPtr = toFill;

    //
    //  If its swapped, we have to do a char by char swap and cast. Else
    //  we have to check whether our XMLCh and UTF16Ch types are the same
    //  size or not. If so, we can optimize by just doing a buffer copy.
    //
    if (fSwapped)
    {
        //
        //  And then do the swapping loop for the count we precalculated. Note
        //  that this also handles size conversion as well if XMLCh is not the
        //  same size as UTF16Ch.
        //
        for (unsigned int index = 0; index < countToDo; index++)
            *outPtr++ = BitOps::swapBytes(*asUTF16++);
    }
     else
    {
        //
        //  If the XMLCh type is the same size as a UTF16 value on this
        //  platform, then we can do just a buffer copy straight to the target
        //  buffer since our source chars are UTF-16 chars. If its not, then
        //  we still have to do a loop and assign each one, in order to
        //  implicitly convert.
        //
        if (sizeof(XMLCh) == sizeof(UTF16Ch))
        {
            //  Notice we convert char count to byte count here!!!
            memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch));
        }
         else
        {
            for (unsigned int index = 0; index < countToDo; index++)
                *outPtr++ = XMLCh(*asUTF16++);
        }
    }

    // Set the bytes eaten
    bytesEaten = countToDo * sizeof(UTF16Ch);

    // Set the character sizes to the fixed size
    memset(charSizes, sizeof(UTF16Ch), countToDo);

    // Return the chars we transcoded
    return countToDo;
}
// ---------------------------------------------------------------------------
//  XMLUCS4Transcoder: Implementation of the transcoder API
// ---------------------------------------------------------------------------
unsigned int
XMLUCS4Transcoder::transcodeFrom(const  XMLByte* const          srcData
                                , const unsigned int            srcCount
                                ,       XMLCh* const            toFill
                                , const unsigned int            maxChars
                                ,       unsigned int&           bytesEaten
                                ,       unsigned char* const    charSizes)
{
    // If debugging, make sure that the block size is legal
    #if defined(XERCES_DEBUG)
    checkBlockSize(maxChars);
    #endif

    //
    //  Get pointers to the start and end of the source buffer in terms of
    //  UCS-4 characters.
    //
    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));

    //
    //  Get pointers to the start and end of the target buffer, which is
    //  in terms of the XMLCh chars we output.
    //
    XMLCh*  outPtr = toFill;
    XMLCh*  outEnd = toFill + maxChars;

    //
    //  And get a pointer into the char sizes buffer. We will run this
    //  up as we put chars into the output buffer.
    //
    unsigned char* sizePtr = charSizes;

    //
    //  Now process chars until we either use up all our source or all of
    //  our output space.
    //
    while ((outPtr < outEnd) && (srcPtr < srcEnd))
    {
        //
        //  Get the next UCS char out of the buffer. Don't bump the ptr
        //  yet since we might not have enough storage for it in the target
        //  (if its causes a surrogate pair to be created.
        //
        UCS4Ch nextVal = *srcPtr;

        // If it needs to be swapped, then do it
        if (fSwapped)
            nextVal = BitOps::swapBytes(nextVal);

        // Handle a surrogate pair if needed
        if (nextVal & 0xFFFF0000)
        {
            //
            //  If we don't have room for both of the chars, then we
            //  bail out now.
            //
            if (outPtr + 1 == outEnd)
                break;

            const XMLCh ch1 = XMLCh(((nextVal - 0x10000) >> 10) + 0xD800);
            const XMLCh ch2 = XMLCh(((nextVal - 0x10000) & 0x3FF) + 0xDC00);

            //
            //  We have room so store them both. But note that the
            //  second one took up no source bytes!
            //
            *sizePtr++ = sizeof(UCS4Ch);
            *outPtr++ = ch1;
            *sizePtr++ = 0;
            *outPtr++ = ch2;
        }
         else
        {
            //
            //  No surrogate, so just store it and bump the count of chars
            //  read. Update the char sizes buffer for this char's entry.
            //
            *sizePtr++ = sizeof(UCS4Ch);
            *outPtr++ = XMLCh(nextVal);
        }

        // Indicate that we ate another UCS char's worth of bytes
        srcPtr++;
    }
Exemple #18
0
const XMLCh chOpenCurly             = 0x7B;
const XMLCh chOpenParen             = 0x28;
const XMLCh chOpenSquare            = 0x5B;
const XMLCh chPercent               = 0x25;
const XMLCh chPeriod                = 0x2E;
const XMLCh chPipe                  = 0x7C;
const XMLCh chPlus                  = 0x2B;
const XMLCh chPound                 = 0x23;
const XMLCh chQuestion              = 0x3F;
const XMLCh chSingleQuote           = 0x27;
const XMLCh chSpace                 = 0x20;
const XMLCh chSemiColon             = 0x3B;
const XMLCh chTilde                 = 0x7E;
const XMLCh chUnderscore            = 0x5F;

const XMLCh chSwappedUnicodeMarker  = XMLCh(0xFFFE);
const XMLCh chUnicodeMarker         = XMLCh(0xFEFF);

const XMLCh chDigit_0               = 0x30;
const XMLCh chDigit_1               = 0x31;
const XMLCh chDigit_2               = 0x32;
const XMLCh chDigit_3               = 0x33;
const XMLCh chDigit_4               = 0x34;
const XMLCh chDigit_5               = 0x35;
const XMLCh chDigit_6               = 0x36;
const XMLCh chDigit_7               = 0x37;
const XMLCh chDigit_8               = 0x38;
const XMLCh chDigit_9               = 0x39;

const XMLCh chLatin_A               = 0x41;
const XMLCh chLatin_B               = 0x42;