// If the file contains a BOM this function will advance the file pointer by the BOM size (either 2 or 3) static bool GetUnicodeCpUsingBOM(const os::fs::file& File, uintptr_t& Codepage) { char Buffer[3]{}; size_t BytesRead = 0; if (!File.Read(Buffer, std::size(Buffer), BytesRead)) return false; std::string_view Signature(Buffer, std::size(Buffer)); if (BytesRead >= 2) { if (Signature.substr(0, 2) == encoding::get_signature_bytes(CP_UNICODE)) { Codepage = CP_UNICODE; File.SetPointer(2, nullptr, FILE_BEGIN); return true; } if (Signature.substr(0, 2) == encoding::get_signature_bytes(CP_REVERSEBOM)) { Codepage = CP_REVERSEBOM; File.SetPointer(2, nullptr, FILE_BEGIN); return true; } } if (BytesRead >= 3 && Signature == encoding::get_signature_bytes(CP_UTF8)) { Codepage = CP_UTF8; File.SetPointer(3, nullptr, FILE_BEGIN); return true; } File.SetPointer(0, nullptr, FILE_BEGIN); return false; }
// If the file contains a BOM this function will advance the file pointer by the BOM size (either 2 or 3) static bool GetFileCodepage(const os::fs::file& File, uintptr_t DefaultCodepage, uintptr_t& Codepage, bool& SignatureFound, bool& NotUTF8, bool& NotUTF16, bool UseHeuristics) { if (GetUnicodeCpUsingBOM(File, Codepage)) { SignatureFound = true; return true; } if (!UseHeuristics) return false; // TODO: configurable const size_t Size = 32768; char_ptr Buffer(Size); size_t ReadSize = 0; const auto ReadResult = File.Read(Buffer.get(), Size, ReadSize); File.SetPointer(0, nullptr, FILE_BEGIN); if (!ReadResult || !ReadSize) return false; if (GetUnicodeCpUsingWindows(Buffer.get(), ReadSize, Codepage)) return true; NotUTF16 = true; unsigned long long FileSize = 0; const auto WholeFileRead = File.GetSize(FileSize) && ReadSize == FileSize; bool PureAscii = false; if (encoding::is_valid_utf8({ Buffer.get(), ReadSize }, !WholeFileRead, PureAscii)) { if (!PureAscii) Codepage = CP_UTF8; else if (DefaultCodepage == CP_UTF8 || DefaultCodepage == encoding::codepage::ansi() || DefaultCodepage == encoding::codepage::oem()) Codepage = DefaultCodepage; else Codepage = encoding::codepage::ansi(); return true; } NotUTF8 = true; return GetCpUsingUniversalDetectorWithExceptions({ Buffer.get(), ReadSize }, Codepage); }
bool GetLangParam(const os::fs::file& LangFile, string_view const ParamName, string& strParam1, string* strParam2, uintptr_t CodePage) { const auto strFullParamName = concat(L'.', ParamName); const auto CurFilePos = LangFile.GetPointer(); SCOPE_EXIT{ LangFile.SetPointer(CurFilePos, nullptr, FILE_BEGIN); }; for (const auto& i: enum_file_lines(LangFile, CodePage)) { if (starts_with_icase(i.Str, strFullParamName)) { const auto EqPos = i.Str.find(L'='); if (EqPos != string::npos) { assign(strParam1, i.Str.substr(EqPos + 1)); if (strParam2) strParam2->clear(); const auto pos = strParam1.find(L','); if (pos != string::npos) { if (strParam2) { *strParam2 = trim_right(strParam1.substr(pos + 1)); } strParam1.resize(pos); } inplace::trim_right(strParam1); return true; } } else if (starts_with(i.Str, L'"')) { // '"' indicates some meaningful string. // Parameters can be only in the header, no point to go deeper return false; } } return false; }