Пример #1
0
static SgString* utf16ToUtf32(wchar_t *s)
{
  const SgChar offset = (0xd800 << 10UL) + 0xdc00 - 0x10000;
  size_t i = 0, n = wcslen(s);
  SgPort *out;
  SgStringPort tp;
  SgObject r;

  out = Sg_InitStringOutputPort(&tp, (int)n);
  while (i < n) {
    SgChar c0 = s[i++];
    if (isLead(c0)) {
      SgChar c1;
      if (i < n && isTrail((c1 = s[i]))) {
	i++;
	c0 = (c0 << 10) + c1 - offset;
      } else {
	return SG_MAKE_STRING("bad char");
      }
    }
    Sg_PutcUnsafe(out, c0);
  }
  r = Sg_GetStringFromStringPort(&tp);
  SG_CLEAN_STRING_PORT(&tp);
  return r;
}
Пример #2
0
static SgString* utf16ToUtf32WithRegion(wchar_t *s, wchar_t *e)
{
  const SgChar offset = (0xd800 << 10UL) + 0xdc00 - 0x10000;
  SgPort *out;
  SgStringPort tp;
  SgObject r;

  out = Sg_InitStringOutputPort(&tp, (int)((e - s) * 2));
  while (s < e) {
    SgChar c0 = *s++;
    if (isLead(c0)) {
      SgChar c1;
      if (s < e && isTrail((c1 = *s))) {
	s++;
	c0 = (c0 << 10) + c1 - offset;
      } else {
	return SG_MAKE_STRING("bad char");
      }
    }
    Sg_PutcUnsafe(out, c0);
  }
  r = Sg_GetStringFromStringPort(&tp);
  SG_CLEAN_STRING_PORT(&tp);
  return r;
}
Пример #3
0
rbool utf8GetNextCharacter(const byte     *string,
                                 size_t    stringSize,
                                 size_t   *cursor,
                                 rbool    *isValid,
                                 unsigned *codePoint) {

    size_t position = *cursor;
    size_t rest_size = stringSize - position;
    byte character;
    byte min;
    byte max;

    *codePoint = 0;
    *isValid = yes;

    if (*cursor >= stringSize) {
        return no;
    }

    character = string[position];

    if (rest_size < 1) {
        *isValid = no;
        position += 1;
    } else if (character < (byte)0x80) {
        *codePoint = string[position];
        *isValid = yes;
        position += 1;
    } else if (character < (byte)0xC2) {
        *isValid = no;
        position += 1;
    } else if (character < (byte)0xE0) {

        if (rest_size < 2 || !isTrail(string[position + 1])) {
            *isValid = no;
            position += 1;
        } else {
            *codePoint = ((string[position] & (byte)0x1F) << 6) | (string[position + 1] & (byte)0x3F);
            *isValid = yes;
            position += 2;
        }

    } else if (character < (byte)0xF0) {

        min = (character == (byte)0xE0) ? (byte)0xA0 : (byte)0x80;
        max = (character == (byte)0xED) ? (byte)0x9F : (byte)0xBF;

        if (rest_size < 2 || string[position + 1] < min || max < string[position + 1]) {
            *isValid = no;
            position += 1;
        } else if (rest_size < 3 || !isTrail(string[position + 2])) {
            *isValid = no;
            position += 2;
        } else {
            *codePoint = ((string[position]     & (byte)0x1F) << 12)
                       | ((string[position + 1] & (byte)0x3F) << 6)
                       |  (string[position + 2] & (byte)0x3F);
            *isValid = yes;
            position += 3;
        }

    } else if (character < (byte)0xF5) {

        min = (character == (byte)0xF0) ? (byte)0x90 : (byte)0x80;
        max = (character == (byte)0xF4) ? (byte)0x8F : (byte)0xBF;

        if (rest_size < 2 || string[position + 1] < min || max < string[position + 1]) {
            *isValid = no;
            position += 1;
        } else if (rest_size < 3 || !isTrail(string[position + 2])) {
            *isValid = no;
            position += 2;
        } else if (rest_size < 4 || !isTrail(string[position + 3])) {
            *isValid = no;
            position += 3;
        } else {
            *codePoint = ((string[position]     & (byte)0x7 ) << 18)
                       | ((string[position + 1] & (byte)0x3F) << 12)
                       | ((string[position + 2] & (byte)0x3F) << 6)
                       |  (string[position + 3] & (byte)0x3F);
            *isValid = yes;
            position += 4;
        }

    } else {
        *isValid = no;
        position += 1;
    }

    *cursor = position;
    return yes;
}