예제 #1
0
/**
 * Decodes the FlarmNet.org file and puts the wanted
 * characters into the res pointer
 * @param file File handle
 * @param charCount Number of character to decode
 * @param res Pointer to be written in
 */
static void
LoadString(const char *bytes, size_t length, TCHAR *res, size_t res_size)
{
  const char *const end = bytes + length * 2;

#ifndef _UNICODE
  const char *const limit = res + res_size - 2;
#endif

  TCHAR *p = res;

  char tmp[3];
  tmp[2] = 0;

  while (bytes < end) {
    tmp[0] = *bytes++;
    tmp[1] = *bytes++;

    /* FLARMNet files are ISO-Latin-1, which is kind of short-sighted */

    const unsigned char ch = (unsigned char)strtoul(tmp, NULL, 16);
#ifdef _UNICODE
    /* Latin-1 can be converted to WIN32 wchar_t by casting */
    *p++ = ch;
#else
    /* convert to UTF-8 on all other platforms */

    if (p >= limit)
      break;

    p = Latin1ToUTF8(ch, p);
#endif
  }

  *p = 0;

#ifndef _UNICODE
  assert(ValidateUTF8(res));
#endif

  // Trim the string of any additional spaces
  StripRight(res);
}
예제 #2
0
파일: TestUTF8.cpp 프로젝트: Advi42/XCSoar
int main(int argc, char **argv)
{
  plan_tests(2 * ARRAY_SIZE(valid) +
             2 * ARRAY_SIZE(invalid) +
             2 * ARRAY_SIZE(length) +
             4 * ARRAY_SIZE(crop) +
             ARRAY_SIZE(latin1_chars) +
#ifndef _UNICODE
             ARRAY_SIZE(truncate_string_tests) +
#endif
             9 + 27);

  for (auto i : valid) {
    ok1(ValidateUTF8(i));
    ok1(LengthUTF8(i) == MyLengthUTF8(i));
  }

  for (auto i : invalid) {
    ok1(!ValidateUTF8(i));
    ok1(!MyValidateUTF8(i));
  }

  for (auto &l : length) {
    ok1(l.length == LengthUTF8(l.value));
    ok1(l.length == MyLengthUTF8(l.value));
  }

  char buffer[64];

  for (auto &l : latin1_chars) {
    *Latin1ToUTF8(l.ch, buffer) = 0;
    ok1(strcmp(l.utf8, buffer) == 0);
  }

  for (auto &c : crop) {
    strcpy(buffer, c.input);
    auto *end = CropIncompleteUTF8(buffer);
    ok1(strcmp(c.output, buffer) == 0);
    ok1(end != nullptr);
    ok1(*end == '\0');
    ok1(end == buffer + strlen(buffer));
  }

#ifndef _UNICODE
  TestTruncateString();
#endif

  {
    const char *p = "foo\xe7\x9b\xae";
    auto n = NextUTF8(p);
    ok1(n.first == 'f');
    ok1(n.second == p + 1);

    n = NextUTF8(p + 1);
    ok1(n.first == 'o');
    ok1(n.second == p + 2);

    n = NextUTF8(p + 2);
    ok1(n.first == 'o');
    ok1(n.second == p + 3);

    n = NextUTF8(p + 3);
    ok1(n.first == 30446);
    ok1(n.second == p + 6);

    n = NextUTF8(p + 6);
    ok1(n.first == 0);
  }

  /* test UnicodeToUTF8() */

  buffer[0] = 1;
  ok1(UnicodeToUTF8(0, buffer) == buffer + 1);
  ok1(buffer[0] == 0);

  ok1(UnicodeToUTF8(' ', buffer) == buffer + 1);
  ok1(buffer[0] == ' ');

  ok1(UnicodeToUTF8(0x7f, buffer) == buffer + 1);
  ok1(buffer[0] == 0x7f);

  ok1(UnicodeToUTF8(0xa2, buffer) == buffer + 2);
  ok1(buffer[0] == char(0xc2));
  ok1(buffer[1] == char(0xa2));

  ok1(UnicodeToUTF8(0x6fb3, buffer) == buffer + 3);
  ok1(buffer[0] == char(0xe6));
  ok1(buffer[1] == char(0xbe));
  ok1(buffer[2] == char(0xb3));

  ok1(UnicodeToUTF8(0xffff, buffer) == buffer + 3);
  ok1(buffer[0] == char(0xef));
  ok1(buffer[1] == char(0xbf));
  ok1(buffer[2] == char(0xbf));

  ok1(UnicodeToUTF8(0x10000, buffer) == buffer + 4);
  ok1(buffer[0] == char(0xf0));
  ok1(buffer[1] == char(0x90));
  ok1(buffer[2] == char(0x80));
  ok1(buffer[3] == char(0x80));

  ok1(UnicodeToUTF8(0x10ffff, buffer) == buffer + 4);
  ok1(buffer[0] == char(0xf4));
  ok1(buffer[1] == char(0x8f));
  ok1(buffer[2] == char(0xbf));
  ok1(buffer[3] == char(0xbf));

  return exit_status();
}
예제 #3
0
int main(int argc, char **argv)
{
  plan_tests(ARRAY_SIZE(valid) + ARRAY_SIZE(invalid) +
             ARRAY_SIZE(length) +
             ARRAY_SIZE(crop) +
             ARRAY_SIZE(latin1_chars) +
             9 + 27);

  for (auto i : valid)
    ok1(ValidateUTF8(i));

  for (auto i : invalid)
    ok1(!ValidateUTF8(i));

  for (auto &l : length)
    ok1(l.length == LengthUTF8(l.value));

  char buffer[64];

  for (auto &l : latin1_chars) {
    *Latin1ToUTF8(l.ch, buffer) = 0;
    ok1(strcmp(l.utf8, buffer) == 0);
  }

  for (auto &c : crop) {
    strcpy(buffer, c.input);
    CropIncompleteUTF8(buffer);
    ok1(strcmp(c.output, buffer) == 0);
  }

  {
    const char *p = "foo\xe7\x9b\xae";
    auto n = NextUTF8(p);
    ok1(n.first == 'f');
    ok1(n.second == p + 1);

    n = NextUTF8(p + 1);
    ok1(n.first == 'o');
    ok1(n.second == p + 2);

    n = NextUTF8(p + 2);
    ok1(n.first == 'o');
    ok1(n.second == p + 3);

    n = NextUTF8(p + 3);
    ok1(n.first == 30446);
    ok1(n.second == p + 6);

    n = NextUTF8(p + 6);
    ok1(n.first == 0);
  }

  /* test UnicodeToUTF8() */

  buffer[0] = 1;
  ok1(UnicodeToUTF8(0, buffer) == buffer + 1);
  ok1(buffer[0] == 0);

  ok1(UnicodeToUTF8(' ', buffer) == buffer + 1);
  ok1(buffer[0] == ' ');

  ok1(UnicodeToUTF8(0x7f, buffer) == buffer + 1);
  ok1(buffer[0] == 0x7f);

  ok1(UnicodeToUTF8(0xa2, buffer) == buffer + 2);
  ok1(buffer[0] == char(0xc2));
  ok1(buffer[1] == char(0xa2));

  ok1(UnicodeToUTF8(0x6fb3, buffer) == buffer + 3);
  ok1(buffer[0] == char(0xe6));
  ok1(buffer[1] == char(0xbe));
  ok1(buffer[2] == char(0xb3));

  ok1(UnicodeToUTF8(0xffff, buffer) == buffer + 3);
  ok1(buffer[0] == char(0xef));
  ok1(buffer[1] == char(0xbf));
  ok1(buffer[2] == char(0xbf));

  ok1(UnicodeToUTF8(0x10000, buffer) == buffer + 4);
  ok1(buffer[0] == char(0xf0));
  ok1(buffer[1] == char(0x90));
  ok1(buffer[2] == char(0x80));
  ok1(buffer[3] == char(0x80));

  ok1(UnicodeToUTF8(0x10ffff, buffer) == buffer + 4);
  ok1(buffer[0] == char(0xf4));
  ok1(buffer[1] == char(0x8f));
  ok1(buffer[2] == char(0xbf));
  ok1(buffer[3] == char(0xbf));

  return exit_status();
}
예제 #4
0
TCHAR *
ConvertLineReader::ReadLine()
{
    char *narrow = source.ReadLine();

    if (narrow == nullptr)
        return nullptr;

    // Check if there is byte order mark in front
    if (narrow[0] == (char)0xEF &&
            narrow[1] == (char)0xBB &&
            narrow[2] == (char)0xBF &&
            (charset == Charset::AUTO || charset == Charset::UTF8)) {
        // -> if so, skip it
        narrow += 3;

        /* if it was "AUTO", then explicitly switch to UTF-8 now */
        charset = Charset::UTF8;
    }

    if (charset == Charset::AUTO && !ValidateUTF8(narrow))
        /* invalid UTF-8 sequence detected: switch to ISO-Latin-1 */
        charset = Charset::ISO_LATIN_1;

#ifdef _UNICODE
    size_t narrow_length = strlen(narrow);

    TCHAR *t = tbuffer.get(narrow_length + 1);
    if (t == nullptr)
        return nullptr;

    if (narrow_length == 0) {
        t[0] = _T('\0');
        return t;
    }

    switch (charset) {
    case Charset::ISO_LATIN_1:
        iso_latin_1_to_tchar(t, narrow);
        break;

    default:
        int length = MultiByteToWideChar(CP_UTF8, 0, narrow, narrow_length,
                                         t, narrow_length);
        if (length == 0)
            return nullptr;

        t[length] = _T('\0');

        break;
    }

    return t;
#else
    switch (charset) {
        size_t buffer_size;
        const char *utf8;

    case Charset::ISO_LATIN_1:
        buffer_size = strlen(narrow) * 2 + 1;
        utf8 = Latin1ToUTF8(narrow, tbuffer.get(buffer_size), buffer_size);
        if (utf8 == nullptr)
            return narrow;
        return const_cast<char *>(utf8);

    case Charset::UTF8:
        if (!ValidateUTF8(narrow))
            /* abort on invalid UTF-8 sequence */
            return nullptr;

    /* fall through ... */

    case Charset::AUTO:
        return narrow;
    }

    /* unreachable */
    gcc_unreachable();
#endif
}