size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
			      const char **rest)
{
  union U {
    unsigned short word;
    char bytes[2];
  };
    
  if (subDecoder_)
    return subDecoder_->decode(to, from, fromLen, rest);
    if (fromLen < 2) {
      *rest = from;
      return 0;
    }
    minBytesPerChar_ = 2;
    U u;
    u.bytes[0] = from[0];
    u.bytes[1] = from[1];
    if (u.word == byteOrderMark) {
      hadByteOrderMark_ = 1;
      from += 2;
      fromLen -= 2;
    }
    else if (u.word == swappedByteOrderMark) {
      hadByteOrderMark_ = 1;
      from += 2;
      fromLen -= 2;
      swapBytes_ = 1;
    }
  if (hadByteOrderMark_ || !subCodingSystem_)
    subCodingSystem_ = new UTF16CodingSystem;
  subDecoder_ = subCodingSystem_->makeDecoder(swapBytes_);
      minBytesPerChar_ = subDecoder_->minBytesPerChar();
      return subDecoder_->decode(to, from, fromLen, rest);
}
예제 #2
0
size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
			      const char **rest)
{
  union U {
    unsigned short word;
    char bytes[2];
  };
    
  if (subDecoder_)
    return subDecoder_->decode(to, from, fromLen, rest);
  if (!hadFirstChar_) {
    hadFirstChar_ = 1;
    minBytesPerChar_ = 2;
    if (fromLen < 2) {
      *rest = from;
      return 0;
    }
    U u;
    u.bytes[0] = from[0];
    u.bytes[1] = from[1];
    if (u.word == byteOrderMark) {
      hadByteOrderMark_ = 1;
      from += 2;
      fromLen -= 2;
    }
    else if (u.word == swappedByteOrderMark) {
      hadByteOrderMark_ = 1;
      from += 2;
      fromLen -= 2;
      swapBytes_ = 1;
    }
    else if (subCodingSystem_) {
      subDecoder_ = subCodingSystem_->makeDecoder();
      minBytesPerChar_ = subDecoder_->minBytesPerChar();
      return subDecoder_->decode(to, from, fromLen, rest);
    }
  }
  fromLen &= ~1;
  *rest = from + fromLen;
  if (sizeof(Char) == 2) {
    if (!swapBytes_) {
      if (from != (char *)to)
	memmove(to, from, fromLen);
      return fromLen/2;
    }
  }
  if (swapBytes_) {
    for (size_t n = fromLen; n > 0; n -= 2) {
      U u;
      u.bytes[1] = *from++;
      u.bytes[0] = *from++;
      *to++ = u.word;
    }
  }
  else  {
    for (size_t n = fromLen; n > 0; n -= 2) {
      U u;
      u.bytes[0] = *from++;
      u.bytes[1] = *from++;
      *to++ = u.word;
    }
  }
  return fromLen/2;
}