bool TextEncoding::isNonByteBasedEncoding() const { if (noExtendedTextEncodingNameUsed()) { return *this == UTF16LittleEndianEncoding() || *this == UTF16BigEndianEncoding(); } return *this == UTF16LittleEndianEncoding() || *this == UTF16BigEndianEncoding() || *this == UTF32BigEndianEncoding() || *this == UTF32LittleEndianEncoding(); }
String TextDecoder::checkForBOM(const char* data, size_t length, bool flush) { // Check to see if we found a BOM. size_t numBufferedBytes = m_numBufferedBytes; size_t buf1Len = numBufferedBytes; size_t buf2Len = length; const unsigned char* buf1 = m_bufferedBytes; const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c3 = buf2Len ? (--buf2Len, *buf2++) : 0; const TextEncoding* encodingConsideringBOM = &m_encoding; if (c1 == 0xFF && c2 == 0xFE) encodingConsideringBOM = &UTF16LittleEndianEncoding(); else if (c1 == 0xFE && c2 == 0xFF) encodingConsideringBOM = &UTF16BigEndianEncoding(); else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) encodingConsideringBOM = &UTF8Encoding(); else if (numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { // Continue to look for the BOM. memcpy(&m_bufferedBytes[numBufferedBytes], data, length); m_numBufferedBytes += length; return ""; } // Done checking for BOM. m_codec.set(newTextCodec(*encodingConsideringBOM).release()); if (!m_codec) return String(); m_checkedForBOM = true; // Handle case where we have some buffered bytes to deal with. if (numBufferedBytes) { char bufferedBytes[sizeof(m_bufferedBytes)]; memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes); m_numBufferedBytes = 0; return m_codec->decode(bufferedBytes, numBufferedBytes, false) + m_codec->decode(data, length, flush); } return m_codec->decode(data, length, flush); }
const TextEncoding& TextEncoding::closest8BitEquivalent() const { if (*this == UTF16BigEndianEncoding() || *this == UTF16LittleEndianEncoding()) return UTF8Encoding(); return *this; }
String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) { ASSERT(!m_checkedForBOM); // Check to see if we found a BOM. size_t numBufferedBytes = m_numBufferedBytes; size_t buf1Len = numBufferedBytes; size_t buf2Len = length; const unsigned char* buf1 = m_bufferedBytes; const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; const TextEncoding* encodingConsideringBOM = &m_encoding; bool foundBOM = true; size_t lengthOfBOM = 0; if (c1 == 0xFF && c2 == 0xFE) { if (c3 != 0 || c4 != 0) { encodingConsideringBOM = &UTF16LittleEndianEncoding(); lengthOfBOM = 2; } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) { encodingConsideringBOM = &UTF32LittleEndianEncoding(); lengthOfBOM = 4; } else foundBOM = false; } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { encodingConsideringBOM = &UTF8Encoding(); lengthOfBOM = 3; } else if (c1 == 0xFE && c2 == 0xFF) { encodingConsideringBOM = &UTF16BigEndianEncoding(); lengthOfBOM = 2; } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) { encodingConsideringBOM = &UTF32BigEndianEncoding(); lengthOfBOM = 4; } else foundBOM = false; if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { // Continue to look for the BOM. memcpy(&m_bufferedBytes[numBufferedBytes], data, length); m_numBufferedBytes += length; return ""; } // Done checking for BOM. m_codec.set(newTextCodec(*encodingConsideringBOM).release()); if (!m_codec) return String(); m_checkedForBOM = true; // Skip the BOM. if (foundBOM) { ASSERT(numBufferedBytes < lengthOfBOM); size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes; ASSERT(numUnbufferedBOMBytes <= length); data += numUnbufferedBOMBytes; length -= numUnbufferedBOMBytes; numBufferedBytes = 0; m_numBufferedBytes = 0; } // Handle case where we have some buffered bytes to deal with. if (numBufferedBytes) { char bufferedBytes[sizeof(m_bufferedBytes)]; memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes); m_numBufferedBytes = 0; String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError); if (stopOnError && sawError) return bufferedResult; return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError); } return m_codec->decode(data, length, flush, stopOnError, sawError); }