size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen, const char **rest) { union U { unsigned short word; char bytes[2]; }; if (subDecoder_) return subDecoder_->decode(to, from, fromLen, rest); if (fromLen < 2) { *rest = from; return 0; } minBytesPerChar_ = 2; U u; u.bytes[0] = from[0]; u.bytes[1] = from[1]; if (u.word == byteOrderMark) { hadByteOrderMark_ = 1; from += 2; fromLen -= 2; } else if (u.word == swappedByteOrderMark) { hadByteOrderMark_ = 1; from += 2; fromLen -= 2; swapBytes_ = 1; } if (hadByteOrderMark_ || !subCodingSystem_) subCodingSystem_ = new UTF16CodingSystem; subDecoder_ = subCodingSystem_->makeDecoder(swapBytes_); minBytesPerChar_ = subDecoder_->minBytesPerChar(); return subDecoder_->decode(to, from, fromLen, rest); }
size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen, const char **rest) { union U { unsigned short word; char bytes[2]; }; if (subDecoder_) return subDecoder_->decode(to, from, fromLen, rest); if (!hadFirstChar_) { hadFirstChar_ = 1; minBytesPerChar_ = 2; if (fromLen < 2) { *rest = from; return 0; } U u; u.bytes[0] = from[0]; u.bytes[1] = from[1]; if (u.word == byteOrderMark) { hadByteOrderMark_ = 1; from += 2; fromLen -= 2; } else if (u.word == swappedByteOrderMark) { hadByteOrderMark_ = 1; from += 2; fromLen -= 2; swapBytes_ = 1; } else if (subCodingSystem_) { subDecoder_ = subCodingSystem_->makeDecoder(); minBytesPerChar_ = subDecoder_->minBytesPerChar(); return subDecoder_->decode(to, from, fromLen, rest); } } fromLen &= ~1; *rest = from + fromLen; if (sizeof(Char) == 2) { if (!swapBytes_) { if (from != (char *)to) memmove(to, from, fromLen); return fromLen/2; } } if (swapBytes_) { for (size_t n = fromLen; n > 0; n -= 2) { U u; u.bytes[1] = *from++; u.bytes[0] = *from++; *to++ = u.word; } } else { for (size_t n = fromLen; n > 0; n -= 2) { U u; u.bytes[0] = *from++; u.bytes[1] = *from++; *to++ = u.word; } } return fromLen/2; }