Example #1
0
TCHAR *
ConvertLineReader::read()
{
  char *narrow = source.read();

  if (narrow == NULL)
    return NULL;

  // Check if there is byte order mark in front
  if (narrow[0] == (char)0xEF &&
      narrow[1] == (char)0xBB &&
      narrow[2] == (char)0xBF)
    // -> if so, skip it
    narrow += 3;

#ifdef _UNICODE
  size_t narrow_length = strlen(narrow);

  TCHAR *t = tbuffer.get(narrow_length + 1);
  if (t == NULL)
    return NULL;

  if (narrow_length == 0) {
    t[0] = _T('\0');
    return t;
  }

  switch (m_charset) {
  case ISO_LATIN_1:
    iso_latin_1_to_tchar(t, narrow);
    break;

  default:
    int length = MultiByteToWideChar(code_page, 0, narrow, narrow_length,
                                     t, narrow_length);
    if (length == 0)
      return NULL;

    t[length] = _T('\0');

    break;
  }

  return t;
#else
  // XXX call iconv?
  return narrow;
#endif
}
Example #2
0
TCHAR *
ConvertLineReader::ReadLine()
{
    char *narrow = source.ReadLine();

    if (narrow == nullptr)
        return nullptr;

    // Check if there is byte order mark in front
    if (narrow[0] == (char)0xEF &&
            narrow[1] == (char)0xBB &&
            narrow[2] == (char)0xBF &&
            (charset == Charset::AUTO || charset == Charset::UTF8)) {
        // -> if so, skip it
        narrow += 3;

        /* if it was "AUTO", then explicitly switch to UTF-8 now */
        charset = Charset::UTF8;
    }

    if (charset == Charset::AUTO && !ValidateUTF8(narrow))
        /* invalid UTF-8 sequence detected: switch to ISO-Latin-1 */
        charset = Charset::ISO_LATIN_1;

#ifdef _UNICODE
    size_t narrow_length = strlen(narrow);

    TCHAR *t = tbuffer.get(narrow_length + 1);
    if (t == nullptr)
        return nullptr;

    if (narrow_length == 0) {
        t[0] = _T('\0');
        return t;
    }

    switch (charset) {
    case Charset::ISO_LATIN_1:
        iso_latin_1_to_tchar(t, narrow);
        break;

    default:
        int length = MultiByteToWideChar(CP_UTF8, 0, narrow, narrow_length,
                                         t, narrow_length);
        if (length == 0)
            return nullptr;

        t[length] = _T('\0');

        break;
    }

    return t;
#else
    switch (charset) {
        size_t buffer_size;
        const char *utf8;

    case Charset::ISO_LATIN_1:
        buffer_size = strlen(narrow) * 2 + 1;
        utf8 = Latin1ToUTF8(narrow, tbuffer.get(buffer_size), buffer_size);
        if (utf8 == nullptr)
            return narrow;
        return const_cast<char *>(utf8);

    case Charset::UTF8:
        if (!ValidateUTF8(narrow))
            /* abort on invalid UTF-8 sequence */
            return nullptr;

    /* fall through ... */

    case Charset::AUTO:
        return narrow;
    }

    /* unreachable */
    gcc_unreachable();
#endif
}