// --------------------------------------------------------------------------- // XML88591Transcoder: Implementation of the transcoder API // --------------------------------------------------------------------------- XMLSize_t XML88591Transcoder::transcodeFrom( const XMLByte* const srcData , const XMLSize_t srcCount , XMLCh* const toFill , const XMLSize_t maxChars , XMLSize_t& bytesEaten , unsigned char* const charSizes) { // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the number of bytes in the source. // const XMLSize_t countToDo = srcCount < maxChars ? srcCount : maxChars; // // Loop through the bytes to do and convert over each byte. Its just // a cast to the wide char type. // const XMLByte* srcPtr = srcData; XMLCh* destPtr = toFill; const XMLByte* srcEnd = srcPtr + countToDo; while (srcPtr < srcEnd) *destPtr++ = XMLCh(*srcPtr++); // Set the bytes eaten, and set the char size array to the fixed size bytesEaten = countToDo; memset(charSizes, 1, countToDo); // Return the chars we transcoded return countToDo; }
// --------------------------------------------------------------------------- // XMLASCIITranscoder: Implementation of the transcoder API // --------------------------------------------------------------------------- XMLSize_t XMLASCIITranscoder::transcodeFrom( const XMLByte* const srcData , const XMLSize_t srcCount , XMLCh* const toFill , const XMLSize_t maxChars , XMLSize_t& bytesEaten , unsigned char* const charSizes) { // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the source byte count. // const XMLSize_t countToDo = srcCount < maxChars ? srcCount : maxChars; // // Now loop through that many source chars and just cast each one // over to the XMLCh format. Check each source that its really a // valid ASCI char. // const XMLByte* srcPtr = srcData; XMLCh* outPtr = toFill; XMLSize_t countDone = 0; for (; countDone < countToDo; countDone++) { // Do the optimistic work up front if (*srcPtr < 0x80) { *outPtr++ = XMLCh(*srcPtr++); continue; } // // We got non source encoding char. If we got more than 32 chars, // the just break out. We'll come back here later to hit this again // and give an error much closer to the real source position. // if (countDone > 32) break; XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_Unrepresentable , tmpBuf , getEncodingName() , getMemoryManager() ); } // Set the bytes we ate bytesEaten = countDone; // Set the char sizes to the fixed size memset(charSizes, 1, countDone); // Return the chars we transcoded return countDone; }
bool XMLPlatformUtils::isRelative(const XMLCh* const toCheck , MemoryManager* const manager) { if (!toCheck[0] || toCheck[0] == XMLCh('/')) return false; return true; }
XERCES_CPP_NAMESPACE_BEGIN XMLCh* RegxUtil::decomposeToSurrogates(XMLInt32 ch, MemoryManager* const manager) { XMLCh* pszStr = (XMLCh*) manager->allocate(3 * sizeof(XMLCh));//new XMLCh[3]; ch -= 0x10000; pszStr[0] = XMLCh((ch >> 10) + 0xD800); pszStr[1] = XMLCh((ch & 0x03FF) + 0xDC00); pszStr[2] = chNull; return pszStr; }
bool CygwinTranscoder::canTranscodeTo(const unsigned int toCheck) const { // // If the passed value is really a surrogate embedded together, then // we need to break it out into its two chars. Else just one. // XMLCh srcBuf[2]; unsigned int srcCount = 1; if (toCheck & 0xFFFF0000) { srcBuf[0] = XMLCh((toCheck >> 10) + 0xD800); srcBuf[1] = XMLCh(toCheck & 0x3FF) + 0xDC00; srcCount++; }
bool MacOSTranscoder::canTranscodeTo(const unsigned int toCheck) { // // If the passed value is really a surrogate embedded together, then // we need to break it out into its two chars. Else just one. // unsigned int srcCnt = 0; UniChar srcBuf[2]; if (toCheck & 0xFFFF0000) { srcBuf[srcCnt++] = XMLCh(toCheck >> 10) + 0xD800; srcBuf[srcCnt++] = XMLCh(toCheck & 0x3FF) + 0xDC00; }
static XMLCh* convertToXMLCh( const UChar* const toConvert, MemoryManager* const manager = 0) { const unsigned int srcLen = u_strlen(toConvert); XMLCh* retBuf = (manager) ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh)) : new XMLCh[srcLen + 1]; XMLCh* outPtr = retBuf; const UChar* srcPtr = toConvert; while (*srcPtr) *outPtr++ = XMLCh(*srcPtr++); *outPtr = 0; return retBuf; }
//@{ XMLBuffer(const XMLSize_t capacity = 1023 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) : fIndex(0) , fCapacity(capacity) , fFullSize(0) , fUsed(false) , fMemoryManager(manager) , fFullHandler(0) , fBuffer(0) { // Buffer is one larger than capacity, to allow for zero term fBuffer = (XMLCh*) manager->allocate((capacity+1) * sizeof(XMLCh)); //new XMLCh[fCapacity+1]; // Keep it null terminated fBuffer[0] = XMLCh(0); }
bool XMLPlatformUtils::isRelative(const XMLCh* const toCheck) { // Check for pathological case of empty path if (!toCheck[0]) return false; // // If it starts with a slash, then it cannot be relative. This covers // both something like "\Test\File.xml" and an NT Lan type remote path // that starts with a node like "\\MyNode\Test\File.xml". // if (toCheck[0] == XMLCh('/')) return false; // Else assume its a relative path return true; }
XMLCh ReaderMgr::peekNextChar() { XMLCh chRet; if (fCurReader->peekNextChar(chRet)) return chRet; // // Didn't get anything back so this reader is hosed. So lets move to // the next reader on the stack. If this fails, it will be because // its the end of the original file, and we just return zero. // if (!popReader()) return XMLCh(0); // Else peek again and return the character fCurReader->peekNextChar(chRet); return chRet; }
bool XMLPlatformUtils::isRelative(const XMLCh* const toCheck , MemoryManager* const manager) { // Check for pathological case of empty path if (!toCheck[0]) return false; // // If its starts with a drive, then it cannot be relative. Note that // we checked the drive not being empty above, so worst case its one // char long and the check of the 1st char will fail because its really // a null character. // if (toCheck[1] == chColon) { if (((toCheck[0] >= chLatin_A) && (toCheck[0] <= chLatin_Z)) || ((toCheck[0] >= chLatin_a) && (toCheck[0] <= chLatin_z))) { return false; } } // // If it starts with a double slash, then it cannot be relative since // it's a remote file. // if (isBackSlash(toCheck[0]) && isBackSlash(toCheck[1])) return false; // // If it starts with a slash, then it cannot be relative. This covers // both something like "\Test\File.xml" and an NT Lan type remote path // that starts with a node like "\\MyNode\Test\File.xml". // if (toCheck[0] == XMLCh('/')) return false; // Else assume its a relative path return true; }
// --------------------------------------------------------------------------- // ReaderMgr: Scanning APIs // --------------------------------------------------------------------------- XMLCh ReaderMgr::getNextChar() { XMLCh chRet; if (fCurReader->getNextChar(chRet)) return chRet; // // Didn't get anything back so this reader is hosed. So lets move to // the next reader on the stack. If this fails, it will be because // its the end of the original file, and we just return zero. // // If its the end of an entity and fThrowEOE is set, it will throw out // of here. Otherwise, it will take us down to the next reader and // we'll have more chars. // if (!popReader()) return XMLCh(0); // Else try again and return the new character fCurReader->getNextChar(chRet); return chRet; }
BinInputStream* XMLURL::makeNewStream() const { // // If its a local host, then we short circuit it and use our own file // stream support. Otherwise, we just let it fall through and let the // installed network access object provide a stream. // if (fProtocol == XMLURL::File) { if (!fHost || !XMLString::compareIStringASCII(fHost, XMLUni::fgLocalHostString)) { XMLCh* realPath = XMLString::replicate(fPath, fMemoryManager); ArrayJanitor<XMLCh> basePathName(realPath, fMemoryManager); // // Need to manually replace any character reference %xx first // HTTP protocol will be done automatically by the netaccessor // int end = XMLString::stringLen(realPath); int percentIndex = XMLString::indexOf(realPath, chPercent, 0, fMemoryManager); while (percentIndex != -1) { if (percentIndex+2 >= end || !isHexDigit(realPath[percentIndex+1]) || !isHexDigit(realPath[percentIndex+2])) { XMLCh value1[4]; XMLString::moveChars(value1, &(realPath[percentIndex]), 3); value1[3] = chNull; ThrowXMLwithMemMgr2(MalformedURLException , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence , realPath , value1 , fMemoryManager); } unsigned int value = (xlatHexDigit(realPath[percentIndex+1]) * 16) + xlatHexDigit(realPath[percentIndex+2]); realPath[percentIndex] = XMLCh(value); int i =0; for (i = percentIndex + 1; i < end - 2 ; i++) realPath[i] = realPath[i+2]; realPath[i] = chNull; end = i; percentIndex = XMLString::indexOf(realPath, chPercent, percentIndex, fMemoryManager); } BinFileInputStream* retStrm = new (fMemoryManager) BinFileInputStream(realPath, fMemoryManager); if (!retStrm->getIsOpen()) { delete retStrm; return 0; } return retStrm; } } // // If we don't have have an installed net accessor object, then we // have to just throw here. // if (!XMLPlatformUtils::fgNetAccessor) ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager); // Else ask the net accessor to create the stream return XMLPlatformUtils::fgNetAccessor->makeNew(*this); }
// --------------------------------------------------------------------------- // XMLUTF8Transcoder: Implementation of the transcoder API // --------------------------------------------------------------------------- unsigned int XMLUTF8Transcoder::transcodeFrom(const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // Watch for pathological scenario. Shouldn't happen, but... if (!srcCount || !maxChars) return 0; // If debugging, make sure that the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // // Get pointers to our start and end points of the input and output // buffers. // const XMLByte* srcPtr = srcData; const XMLByte* srcEnd = srcPtr + srcCount; XMLCh* outPtr = toFill; XMLCh* outEnd = outPtr + maxChars; unsigned char* sizePtr = charSizes; // // We now loop until we either run out of input data, or room to store // output chars. // while ((srcPtr < srcEnd) && (outPtr < outEnd)) { // Special-case ASCII, which is a leading byte value of <= 127 if (*srcPtr <= 127) { *outPtr++ = XMLCh(*srcPtr++); *sizePtr++ = 1; continue; } // See how many trailing src bytes this sequence is going to require const unsigned int trailingBytes = gUTFBytes[*srcPtr]; // // If there are not enough source bytes to do this one, then we // are done. Note that we done >= here because we are implicitly // counting the 1 byte we get no matter what. // // If we break out here, then there is nothing to undo since we // haven't updated any pointers yet. // if (srcPtr + trailingBytes >= srcEnd) break; // Looks ok, so lets build up the value // or at least let's try to do so--remembering that // we cannot assume the encoding to be valid: // first, test first byte if((gUTFByteIndicatorTest[trailingBytes] & *srcPtr) != gUTFByteIndicator[trailingBytes]) { char pos[2] = {(char)0x31, 0}; char len[2] = {(char)(trailingBytes+0x31), 0}; char byte[2] = {*srcPtr,0}; ThrowXMLwithMemMgr3(UTFDataFormatException, XMLExcepts::UTF8_FormatError, pos, byte, len, getMemoryManager()); } /*** * http://www.unicode.org/reports/tr27/ * * Table 3.1B. lists all of the byte sequences that are legal in UTF-8. * A range of byte values such as A0..BF indicates that any byte from A0 to BF (inclusive) * is legal in that position. * Any byte value outside of the ranges listed is illegal. * For example, * the byte sequence <C0 AF> is illegal since C0 is not legal in the 1st Byte column. * The byte sequence <E0 9F 80> is illegal since in the row * where E0 is legal as a first byte, * 9F is not legal as a second byte. * The byte sequence <F4 80 83 92> is legal, since every byte in that sequence matches * a byte range in a row of the table (the last row). * * * Table 3.1B. Legal UTF-8 Byte Sequences * Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte * ========================================================================= * U+0000..U+007F 00..7F * ------------------------------------------------------------------------- * U+0080..U+07FF C2..DF 80..BF * * ------------------------------------------------------------------------- * U+0800..U+0FFF E0 A0..BF 80..BF * -- * * U+1000..U+FFFF E1..EF 80..BF 80..BF * * -------------------------------------------------------------------------- * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF * -- * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF * -- * ========================================================================== * * Cases where a trailing byte range is not 80..BF are underlined in the table to * draw attention to them. These occur only in the second byte of a sequence. * ***/ XMLUInt32 tmpVal = 0; switch(trailingBytes) { case 1 : // UTF-8: [110y yyyy] [10xx xxxx] // Unicode: [0000 0yyy] [yyxx xxxx] // // 0xC0, 0xC1 has been filtered out checkTrailingBytes(*(srcPtr+1), 1, 1); tmpVal = *srcPtr++; tmpVal <<= 6; tmpVal += *srcPtr++; break; case 2 : // UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx] // Unicode: [zzzz yyyy] [yyxx xxxx] // if (( *srcPtr == 0xE0) && ( *(srcPtr+1) < 0xA0)) { char byte0[2] = {*srcPtr ,0}; char byte1[2] = {*(srcPtr+1),0}; ThrowXMLwithMemMgr2(UTFDataFormatException , XMLExcepts::UTF8_Invalid_3BytesSeq , byte0 , byte1 , getMemoryManager()); } checkTrailingBytes(*(srcPtr+1), 2, 1); checkTrailingBytes(*(srcPtr+2), 2, 2); // // D36 (a) UTF-8 is the Unicode Transformation Format that serializes // a Unicode code point as a sequence of one to four bytes, // as specified in Table 3.1, UTF-8 Bit Distribution. // (b) An illegal UTF-8 code unit sequence is any byte sequence that // does not match the patterns listed in Table 3.1B, Legal UTF-8 // Byte Sequences. // (c) An irregular UTF-8 code unit sequence is a six-byte sequence // where the first three bytes correspond to a high surrogate, // and the next three bytes correspond to a low surrogate. // As a consequence of C12, these irregular UTF-8 sequences shall // not be generated by a conformant process. // //irregular three bytes sequence // that is zzzzyy matches leading surrogate tag 110110 or // trailing surrogate tag 110111 // *srcPtr=1110 1101 // *(srcPtr+1)=1010 yyyy or // *(srcPtr+1)=1011 yyyy // // 0xED 1110 1101 // 0xA0 1010 0000 if ((*srcPtr == 0xED) && (*(srcPtr+1) >= 0xA0)) { char byte0[2] = {*srcPtr, 0}; char byte1[2] = {*(srcPtr+1),0}; ThrowXMLwithMemMgr2(UTFDataFormatException , XMLExcepts::UTF8_Irregular_3BytesSeq , byte0 , byte1 , getMemoryManager()); } tmpVal = *srcPtr++; tmpVal <<= 6; tmpVal += *srcPtr++; tmpVal <<= 6; tmpVal += *srcPtr++; break; case 3 : // UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]* // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate) // [1101 11yy] [yyxx xxxx] (low surrogate) // * uuuuu = wwww + 1 // if (((*srcPtr == 0xF0) && (*(srcPtr+1) < 0x90)) || ((*srcPtr == 0xF4) && (*(srcPtr+1) > 0x8F)) ) { char byte0[2] = {*srcPtr ,0}; char byte1[2] = {*(srcPtr+1),0}; ThrowXMLwithMemMgr2(UTFDataFormatException , XMLExcepts::UTF8_Invalid_4BytesSeq , byte0 , byte1 , getMemoryManager()); } checkTrailingBytes(*(srcPtr+1), 3, 1); checkTrailingBytes(*(srcPtr+2), 3, 2); checkTrailingBytes(*(srcPtr+3), 3, 3); tmpVal = *srcPtr++; tmpVal <<= 6; tmpVal += *srcPtr++; tmpVal <<= 6; tmpVal += *srcPtr++; tmpVal <<= 6; tmpVal += *srcPtr++; break; default: // trailingBytes > 3 /*** * The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also allows * for the use of five- and six-byte sequences to encode characters that * are outside the range of the Unicode character set; those five- and * six-byte sequences are illegal for the use of UTF-8 as a transformation * of Unicode characters. ISO/IEC 10646 does not allow mapping of unpaired * surrogates, nor U+FFFE and U+FFFF (but it does allow other noncharacters). ***/ char len[2] = {(char)(trailingBytes+0x31), 0}; char byte[2] = {*srcPtr,0}; ThrowXMLwithMemMgr2(UTFDataFormatException , XMLExcepts::UTF8_Exceede_BytesLimit , byte , len , getMemoryManager()); break; } // since trailingBytes comes from an array, this logic is redundant // default : // ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq); //} tmpVal -= gUTFOffsets[trailingBytes]; // // If it will fit into a single char, then put it in. Otherwise // encode it as a surrogate pair. If its not valid, use the // replacement char. // if (!(tmpVal & 0xFFFF0000)) { *sizePtr++ = trailingBytes + 1; *outPtr++ = XMLCh(tmpVal); } else if (tmpVal > 0x10FFFF) { // // If we've gotten more than 32 chars so far, then just break // out for now and lets process those. When we come back in // here again, we'll get no chars and throw an exception. This // way, the error will have a line and col number closer to // the real problem area. // if ((outPtr - toFill) > 32) break; ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); } else { // // If we have enough room to store the leading and trailing // chars, then lets do it. Else, pretend this one never // happened, and leave it for the next time. Since we don't // update the bytes read until the bottom of the loop, by // breaking out here its like it never happened. // if (outPtr + 1 >= outEnd) break; // Store the leading surrogate char tmpVal -= 0x10000; *sizePtr++ = trailingBytes + 1; *outPtr++ = XMLCh((tmpVal >> 10) + 0xD800); // // And then the trailing char. This one accounts for no // bytes eaten from the source, so set the char size for this // one to be zero. // *sizePtr++ = 0; *outPtr++ = XMLCh((tmpVal & 0x3FF) + 0xDC00); } } // Update the bytes eaten bytesEaten = srcPtr - srcData; // Return the characters read return outPtr - toFill; }
// --------------------------------------------------------------------------- // ICUTranscoder: The virtual transcoder API // --------------------------------------------------------------------------- unsigned int ICUTranscoder::transcodeFrom(const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // If debugging, insure the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // Set up pointers to the start and end of the source buffer const XMLByte* startSrc = srcData; const XMLByte* endSrc = srcData + srcCount; // // And now do the target buffer. This works differently according to // whether XMLCh and UChar are the same size or not. // UChar* startTarget; if (sizeof(XMLCh) == sizeof(UChar)) startTarget = (UChar*)toFill; else startTarget = (UChar*) getMemoryManager()->allocate ( maxChars * sizeof(UChar) );//new UChar[maxChars]; UChar* orgTarget = startTarget; // // Transoode the buffer. Buffer overflow errors are normal, occuring // when the raw input buffer holds more characters than will fit in // the Unicode output buffer. // UErrorCode err = U_ZERO_ERROR; ucnv_toUnicode ( fConverter , &startTarget , startTarget + maxChars , (const char**)&startSrc , (const char*)endSrc , (fFixed ? 0 : (int32_t*)fSrcOffsets) , false , &err ); if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR)) { if (orgTarget != (UChar*)toFill) getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; if (fFixed) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_BadSrcCP , tmpBuf , getEncodingName() , getMemoryManager() ); } else { ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); } } // Calculate the bytes eaten and store in caller's param bytesEaten = startSrc - srcData; // And the characters decoded const unsigned int charsDecoded = startTarget - orgTarget; // // Translate the array of char offsets into an array of character // sizes, which is what the transcoder interface semantics requires. // If its fixed, then we can optimize it. // if (fFixed) { const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter); memset(charSizes, fillSize, maxChars); } else { // // We have to convert the series of offsets into a series of // sizes. If just one char was decoded, then its the total bytes // eaten. Otherwise, do a loop and subtract out each element from // its previous element. // if (charsDecoded == 1) { charSizes[0] = (unsigned char)bytesEaten; } else { // ICU does not return an extra element to allow us to figure // out the last char size, so we have to compute it from the // total bytes used. unsigned int index; for (index = 0; index < charsDecoded - 1; index++) { charSizes[index] = (unsigned char)(fSrcOffsets[index + 1] - fSrcOffsets[index]); } if( charsDecoded > 0 ) { charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten - fSrcOffsets[charsDecoded - 1]); } } } // // If XMLCh and UChar are not the same size, then we need to copy over // the temp buffer to the new one. // if (sizeof(UChar) != sizeof(XMLCh)) { XMLCh* outPtr = toFill; startTarget = orgTarget; for (unsigned int index = 0; index < charsDecoded; index++) *outPtr++ = XMLCh(*startTarget++); // And delete the temp buffer getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; } // Return the chars we put into the target buffer return charsDecoded; }
// --------------------------------------------------------------------------- // XMLUTF16Transcoder: Implementation of the transcoder API // --------------------------------------------------------------------------- unsigned int XMLUTF16Transcoder::transcodeFrom( const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // If debugging, make sure that the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // // Calculate the max chars we can do here. Its the lesser of the // max output chars and the number of chars in the source. // const unsigned int srcChars = srcCount / sizeof(UTF16Ch); const unsigned int countToDo = srcChars < maxChars ? srcChars : maxChars; // Look at the source data as UTF16 chars const UTF16Ch* asUTF16 = (const UTF16Ch*)srcData; // And get a mutable pointer to the output XMLCh* outPtr = toFill; // // If its swapped, we have to do a char by char swap and cast. Else // we have to check whether our XMLCh and UTF16Ch types are the same // size or not. If so, we can optimize by just doing a buffer copy. // if (fSwapped) { // // And then do the swapping loop for the count we precalculated. Note // that this also handles size conversion as well if XMLCh is not the // same size as UTF16Ch. // for (unsigned int index = 0; index < countToDo; index++) *outPtr++ = BitOps::swapBytes(*asUTF16++); } else { // // If the XMLCh type is the same size as a UTF16 value on this // platform, then we can do just a buffer copy straight to the target // buffer since our source chars are UTF-16 chars. If its not, then // we still have to do a loop and assign each one, in order to // implicitly convert. // if (sizeof(XMLCh) == sizeof(UTF16Ch)) { // Notice we convert char count to byte count here!!! memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch)); } else { for (unsigned int index = 0; index < countToDo; index++) *outPtr++ = XMLCh(*asUTF16++); } } // Set the bytes eaten bytesEaten = countToDo * sizeof(UTF16Ch); // Set the character sizes to the fixed size memset(charSizes, sizeof(UTF16Ch), countToDo); // Return the chars we transcoded return countToDo; }
// --------------------------------------------------------------------------- // XMLUCS4Transcoder: Implementation of the transcoder API // --------------------------------------------------------------------------- unsigned int XMLUCS4Transcoder::transcodeFrom(const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes) { // If debugging, make sure that the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // // Get pointers to the start and end of the source buffer in terms of // UCS-4 characters. // const UCS4Ch* srcPtr = (const UCS4Ch*)srcData; const UCS4Ch* srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch)); // // Get pointers to the start and end of the target buffer, which is // in terms of the XMLCh chars we output. // XMLCh* outPtr = toFill; XMLCh* outEnd = toFill + maxChars; // // And get a pointer into the char sizes buffer. We will run this // up as we put chars into the output buffer. // unsigned char* sizePtr = charSizes; // // Now process chars until we either use up all our source or all of // our output space. // while ((outPtr < outEnd) && (srcPtr < srcEnd)) { // // Get the next UCS char out of the buffer. Don't bump the ptr // yet since we might not have enough storage for it in the target // (if its causes a surrogate pair to be created. // UCS4Ch nextVal = *srcPtr; // If it needs to be swapped, then do it if (fSwapped) nextVal = BitOps::swapBytes(nextVal); // Handle a surrogate pair if needed if (nextVal & 0xFFFF0000) { // // If we don't have room for both of the chars, then we // bail out now. // if (outPtr + 1 == outEnd) break; const XMLCh ch1 = XMLCh(((nextVal - 0x10000) >> 10) + 0xD800); const XMLCh ch2 = XMLCh(((nextVal - 0x10000) & 0x3FF) + 0xDC00); // // We have room so store them both. But note that the // second one took up no source bytes! // *sizePtr++ = sizeof(UCS4Ch); *outPtr++ = ch1; *sizePtr++ = 0; *outPtr++ = ch2; } else { // // No surrogate, so just store it and bump the count of chars // read. Update the char sizes buffer for this char's entry. // *sizePtr++ = sizeof(UCS4Ch); *outPtr++ = XMLCh(nextVal); } // Indicate that we ate another UCS char's worth of bytes srcPtr++; }
const XMLCh chOpenCurly = 0x7B; const XMLCh chOpenParen = 0x28; const XMLCh chOpenSquare = 0x5B; const XMLCh chPercent = 0x25; const XMLCh chPeriod = 0x2E; const XMLCh chPipe = 0x7C; const XMLCh chPlus = 0x2B; const XMLCh chPound = 0x23; const XMLCh chQuestion = 0x3F; const XMLCh chSingleQuote = 0x27; const XMLCh chSpace = 0x20; const XMLCh chSemiColon = 0x3B; const XMLCh chTilde = 0x7E; const XMLCh chUnderscore = 0x5F; const XMLCh chSwappedUnicodeMarker = XMLCh(0xFFFE); const XMLCh chUnicodeMarker = XMLCh(0xFEFF); const XMLCh chDigit_0 = 0x30; const XMLCh chDigit_1 = 0x31; const XMLCh chDigit_2 = 0x32; const XMLCh chDigit_3 = 0x33; const XMLCh chDigit_4 = 0x34; const XMLCh chDigit_5 = 0x35; const XMLCh chDigit_6 = 0x36; const XMLCh chDigit_7 = 0x37; const XMLCh chDigit_8 = 0x38; const XMLCh chDigit_9 = 0x39; const XMLCh chLatin_A = 0x41; const XMLCh chLatin_B = 0x42;