TCHAR * ConvertLineReader::read() { char *narrow = source.read(); if (narrow == NULL) return NULL; // Check if there is byte order mark in front if (narrow[0] == (char)0xEF && narrow[1] == (char)0xBB && narrow[2] == (char)0xBF) // -> if so, skip it narrow += 3; #ifdef _UNICODE size_t narrow_length = strlen(narrow); TCHAR *t = tbuffer.get(narrow_length + 1); if (t == NULL) return NULL; if (narrow_length == 0) { t[0] = _T('\0'); return t; } switch (m_charset) { case ISO_LATIN_1: iso_latin_1_to_tchar(t, narrow); break; default: int length = MultiByteToWideChar(code_page, 0, narrow, narrow_length, t, narrow_length); if (length == 0) return NULL; t[length] = _T('\0'); break; } return t; #else // XXX call iconv? return narrow; #endif }
TCHAR * ConvertLineReader::ReadLine() { char *narrow = source.ReadLine(); if (narrow == nullptr) return nullptr; // Check if there is byte order mark in front if (narrow[0] == (char)0xEF && narrow[1] == (char)0xBB && narrow[2] == (char)0xBF && (charset == Charset::AUTO || charset == Charset::UTF8)) { // -> if so, skip it narrow += 3; /* if it was "AUTO", then explicitly switch to UTF-8 now */ charset = Charset::UTF8; } if (charset == Charset::AUTO && !ValidateUTF8(narrow)) /* invalid UTF-8 sequence detected: switch to ISO-Latin-1 */ charset = Charset::ISO_LATIN_1; #ifdef _UNICODE size_t narrow_length = strlen(narrow); TCHAR *t = tbuffer.get(narrow_length + 1); if (t == nullptr) return nullptr; if (narrow_length == 0) { t[0] = _T('\0'); return t; } switch (charset) { case Charset::ISO_LATIN_1: iso_latin_1_to_tchar(t, narrow); break; default: int length = MultiByteToWideChar(CP_UTF8, 0, narrow, narrow_length, t, narrow_length); if (length == 0) return nullptr; t[length] = _T('\0'); break; } return t; #else switch (charset) { size_t buffer_size; const char *utf8; case Charset::ISO_LATIN_1: buffer_size = strlen(narrow) * 2 + 1; utf8 = Latin1ToUTF8(narrow, tbuffer.get(buffer_size), buffer_size); if (utf8 == nullptr) return narrow; return const_cast<char *>(utf8); case Charset::UTF8: if (!ValidateUTF8(narrow)) /* abort on invalid UTF-8 sequence */ return nullptr; /* fall through ... */ case Charset::AUTO: return narrow; } /* unreachable */ gcc_unreachable(); #endif }