String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&) { UChar* characters; String result = String::createUninitialized(length, characters); const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); const uint8_t* end = reinterpret_cast<const uint8_t*>(bytes + length); const uint8_t* alignedEnd = alignToMachineWord(end); UChar* destination = characters; while (source < end) { if (isASCII(*source)) { // Fast path for ASCII. Most Latin-1 text will be ASCII. if (isAlignedToMachineWord(source)) { while (source < alignedEnd) { MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source); if (!isAllASCII<LChar>(chunk)) goto useLookupTable; copyASCIIMachineWord(destination, source); source += sizeof(MachineWord); destination += sizeof(MachineWord); } if (source == end) break; } *destination = *source; } else { useLookupTable: *destination = table[*source]; } ++source; ++destination; } return result; }
String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&) { LChar* characters; if (!length) return emptyString(); String result = String::createUninitialized(length, characters); const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); const uint8_t* end = reinterpret_cast<const uint8_t*>(bytes + length); const uint8_t* alignedEnd = alignToMachineWord(end); LChar* destination = characters; while (source < end) { if (isASCII(*source)) { // Fast path for ASCII. Most Latin-1 text will be ASCII. if (isAlignedToMachineWord(source)) { while (source < alignedEnd) { MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source); if (!isAllASCII<LChar>(chunk)) goto useLookupTable; copyASCIIMachineWord(destination, source); source += sizeof(MachineWord); destination += sizeof(MachineWord); } if (source == end) break; } *destination = *source; } else { useLookupTable: if (table[*source] > 0xff) goto upConvertTo16Bit; *destination = table[*source]; } ++source; ++destination; } return result; upConvertTo16Bit: UChar* characters16; String result16 = String::createUninitialized(length, characters16); UChar* destination16 = characters16; // Zero extend and copy already processed 8 bit data LChar* ptr8 = characters; LChar* endPtr8 = destination; while (ptr8 < endPtr8) *destination16++ = *ptr8++; // Handle the character that triggered the 16 bit path *destination16 = table[*source]; ++source; ++destination16; while (source < end) { if (isASCII(*source)) { // Fast path for ASCII. Most Latin-1 text will be ASCII. if (isAlignedToMachineWord(source)) { while (source < alignedEnd) { MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source); if (!isAllASCII<LChar>(chunk)) goto useLookupTable16; copyASCIIMachineWord(destination16, source); source += sizeof(MachineWord); destination16 += sizeof(MachineWord); } if (source == end) break; } *destination16 = *source; } else { useLookupTable16: *destination16 = table[*source]; } ++source; ++destination16; } return result16; }
String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) { // Each input byte might turn into a character. // That includes all bytes in the partial-sequence buffer because // each byte in an invalid sequence will turn into a replacement character. StringBuffer<LChar> buffer(m_partialSequenceSize + length); const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); const uint8_t* end = source + length; const uint8_t* alignedEnd = alignToMachineWord(end); LChar* destination = buffer.characters(); do { if (m_partialSequenceSize) { // Explicitly copy destination and source pointers to avoid taking pointers to the // local variables, which may harm code generation by disabling some optimizations // in some compilers. LChar* destinationForHandlePartialSequence = destination; const uint8_t* sourceForHandlePartialSequence = source; if (handlePartialSequence(destinationForHandlePartialSequence, sourceForHandlePartialSequence, end, flush, stopOnError, sawError)) { source = sourceForHandlePartialSequence; goto upConvertTo16Bit; } destination = destinationForHandlePartialSequence; source = sourceForHandlePartialSequence; if (m_partialSequenceSize) break; } while (source < end) { if (isASCII(*source)) { // Fast path for ASCII. Most UTF-8 text will be ASCII. if (isAlignedToMachineWord(source)) { while (source < alignedEnd) { MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source); if (!isAllASCII<LChar>(chunk)) break; copyASCIIMachineWord(destination, source); source += sizeof(MachineWord); destination += sizeof(MachineWord); } if (source == end) break; if (!isASCII(*source)) continue; } *destination++ = *source++; continue; } int count = nonASCIISequenceLength(*source); int character; if (!count) character = nonCharacter; else { if (count > end - source) { ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<ptrdiff_t>(sizeof(m_partialSequence))); ASSERT(!m_partialSequenceSize); m_partialSequenceSize = end - source; memcpy(m_partialSequence, source, m_partialSequenceSize); source = end; break; } character = decodeNonASCIISequence(source, count); } if (character == nonCharacter) { sawError = true; if (stopOnError) break; goto upConvertTo16Bit; } if (character > 0xff) goto upConvertTo16Bit; source += count; *destination++ = character; } } while (flush && m_partialSequenceSize); buffer.shrink(destination - buffer.characters()); return String::adopt(buffer); upConvertTo16Bit: StringBuffer<UChar> buffer16(m_partialSequenceSize + length); UChar* destination16 = buffer16.characters(); // Copy the already converted characters for (LChar* converted8 = buffer.characters(); converted8 < destination;) *destination16++ = *converted8++; do { if (m_partialSequenceSize) { // Explicitly copy destination and source pointers to avoid taking pointers to the // local variables, which may harm code generation by disabling some optimizations // in some compilers. UChar* destinationForHandlePartialSequence = destination16; const uint8_t* sourceForHandlePartialSequence = source; handlePartialSequence(destinationForHandlePartialSequence, sourceForHandlePartialSequence, end, flush, stopOnError, sawError); destination16 = destinationForHandlePartialSequence; source = sourceForHandlePartialSequence; if (m_partialSequenceSize) break; } while (source < end) { if (isASCII(*source)) { // Fast path for ASCII. Most UTF-8 text will be ASCII. if (isAlignedToMachineWord(source)) { while (source < alignedEnd) { MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source); if (!isAllASCII<LChar>(chunk)) break; copyASCIIMachineWord(destination16, source); source += sizeof(MachineWord); destination16 += sizeof(MachineWord); } if (source == end) break; if (!isASCII(*source)) continue; } *destination16++ = *source++; continue; } int count = nonASCIISequenceLength(*source); int character; if (!count) character = nonCharacter; else { if (count > end - source) { ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<ptrdiff_t>(sizeof(m_partialSequence))); ASSERT(!m_partialSequenceSize); m_partialSequenceSize = end - source; memcpy(m_partialSequence, source, m_partialSequenceSize); source = end; break; } character = decodeNonASCIISequence(source, count); } if (character == nonCharacter) { sawError = true; if (stopOnError) break; // Each error generates a replacement character and consumes one byte. *destination16++ = replacementCharacter; ++source; continue; } source += count; destination16 = appendCharacter(destination16, character); } } while (flush && m_partialSequenceSize); buffer16.shrink(destination16 - buffer16.characters()); return String::adopt(buffer16); }