static bool GetCpUsingUniversalDetectorWithExceptions(std::string_view const Str, uintptr_t& Codepage) { if (!GetCpUsingUniversalDetector(Str, Codepage)) return false; // This whole block shouldn't be here if (Global->Opt->strNoAutoDetectCP.Get() == L"-1"sv) { if (Global->Opt->CPMenuMode && static_cast<UINT>(Codepage) != encoding::codepage::ansi() && static_cast<UINT>(Codepage) != encoding::codepage::oem()) { const auto CodepageType = codepages::GetFavorite(Codepage); if (!(CodepageType & CPST_FAVORITE)) return false; } } else { if (contains(enum_tokens(Global->Opt->strNoAutoDetectCP.Get(), L",;"sv), str(Codepage))) return false; } return true; }
bool GetFileFormat( api::fs::file& file, uintptr_t& nCodePage, bool* pSignatureFound, bool bUseHeuristics, bool* pPureAscii) { DWORD dwTemp = 0; bool bSignatureFound = false; bool bDetect = false; bool bPureAscii = false; size_t Readed = 0; if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes { if (LOWORD(dwTemp) == SIGN_UNICODE) { nCodePage = CP_UNICODE; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if (LOWORD(dwTemp) == SIGN_REVERSEBOM) { nCodePage = CP_REVERSEBOM; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8) { nCodePage = CP_UTF8; file.SetPointer(3, nullptr, FILE_BEGIN); bSignatureFound = true; } else { file.SetPointer(0, nullptr, FILE_BEGIN); } } if (bSignatureFound) { bDetect = true; } else if (bUseHeuristics) { file.SetPointer(0, nullptr, FILE_BEGIN); size_t Size = 0x8000; // BUGBUG. TODO: configurable char_ptr Buffer(Size); size_t ReadSize = 0; bool ReadResult = file.Read(Buffer.get(), Size, ReadSize); file.SetPointer(0, nullptr, FILE_BEGIN); bPureAscii = ReadResult && !ReadSize; // empty file == pure ascii if (ReadResult && ReadSize) { // BUGBUG MSDN documents IS_TEXT_UNICODE_BUFFER_TOO_SMALL but there is no such thing if (ReadSize > 1) { int test = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK; IsTextUnicode(Buffer.get(), static_cast<int>(ReadSize), &test); // return value is ignored, it's ok. if (!(test & IS_TEXT_UNICODE_NOT_UNICODE_MASK) && (test & IS_TEXT_UNICODE_NOT_ASCII_MASK)) { if (test & IS_TEXT_UNICODE_UNICODE_MASK) { nCodePage = CP_UNICODE; bDetect = true; } else if (test & IS_TEXT_UNICODE_REVERSE_MASK) { nCodePage = CP_REVERSEBOM; bDetect = true; } } if (!bDetect && IsTextUTF8(Buffer.get(), ReadSize, bPureAscii)) { nCodePage = CP_UTF8; bDetect = true; } } if (!bDetect && !bPureAscii) { int cp = GetCpUsingUniversalDetector(Buffer.get(), ReadSize); if ( cp >= 0 ) { if (Global->Opt->strNoAutoDetectCP.Get() == L"-1") { if ( Global->Opt->CPMenuMode ) { if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() ) { long long selectType = Codepages().GetFavorite(cp); if (0 == (selectType & CPST_FAVORITE)) cp = -1; } } } else { std::vector<string> BannedCpList; split(BannedCpList, Global->Opt->strNoAutoDetectCP, STLF_UNIQUE); if (std::find(ALL_CONST_RANGE(BannedCpList), std::to_wstring(cp)) != BannedCpList.cend()) { cp = -1; } } } if (cp != -1) { nCodePage = cp; bDetect = true; } } } } if (pSignatureFound) *pSignatureFound = bSignatureFound; if (pPureAscii) *pPureAscii = bPureAscii; return bDetect; }
bool GetFileFormat(api::File& file, uintptr_t& nCodePage, bool* pSignatureFound, bool bUseHeuristics) { DWORD dwTemp=0; bool bSignatureFound = false; bool bDetect=false; DWORD Readed = 0; if (file.Read(&dwTemp, sizeof(dwTemp), Readed) && Readed > 1 ) // minimum signature size is 2 bytes { if (LOWORD(dwTemp) == SIGN_UNICODE) { nCodePage = CP_UNICODE; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if (LOWORD(dwTemp) == SIGN_REVERSEBOM) { nCodePage = CP_REVERSEBOM; file.SetPointer(2, nullptr, FILE_BEGIN); bSignatureFound = true; } else if ((dwTemp & 0x00FFFFFF) == SIGN_UTF8) { nCodePage = CP_UTF8; file.SetPointer(3, nullptr, FILE_BEGIN); bSignatureFound = true; } else { file.SetPointer(0, nullptr, FILE_BEGIN); } } if (bSignatureFound) { bDetect = true; } else if (bUseHeuristics) { file.SetPointer(0, nullptr, FILE_BEGIN); DWORD Size=0x8000; // BUGBUG. TODO: configurable char_ptr Buffer(Size); DWORD ReadSize = 0; bool ReadResult = file.Read(Buffer.get(), Size, ReadSize); file.SetPointer(0, nullptr, FILE_BEGIN); if (ReadResult && ReadSize) { int test= IS_TEXT_UNICODE_STATISTICS| IS_TEXT_UNICODE_REVERSE_STATISTICS| IS_TEXT_UNICODE_CONTROLS| IS_TEXT_UNICODE_REVERSE_CONTROLS| IS_TEXT_UNICODE_ILLEGAL_CHARS| IS_TEXT_UNICODE_ODD_LENGTH| IS_TEXT_UNICODE_NULL_BYTES; if (IsTextUnicode(Buffer.get(), ReadSize, &test)) { if (!(test&IS_TEXT_UNICODE_ODD_LENGTH) && !(test&IS_TEXT_UNICODE_ILLEGAL_CHARS)) { if ((test&IS_TEXT_UNICODE_NULL_BYTES) || (test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_CONTROLS)) { if ((test&IS_TEXT_UNICODE_CONTROLS) || (test&IS_TEXT_UNICODE_STATISTICS)) { nCodePage=CP_UNICODE; bDetect=true; } else if ((test&IS_TEXT_UNICODE_REVERSE_CONTROLS) || (test&IS_TEXT_UNICODE_REVERSE_STATISTICS)) { nCodePage=CP_REVERSEBOM; bDetect=true; } } } } else if (IsTextUTF8(Buffer.get(), ReadSize)) { nCodePage=CP_UTF8; bDetect=true; } else { int cp = GetCpUsingUniversalDetector(Buffer.get(), ReadSize); if ( cp >= 0 ) { if (Global->Opt->strNoAutoDetectCP.Get() == L"-1") { if ( Global->Opt->CPMenuMode ) { if ( static_cast<UINT>(cp) != GetACP() && static_cast<UINT>(cp) != GetOEMCP() ) { long long selectType = Global->CodePages->GetFavorite(cp); if (0 == (selectType & CPST_FAVORITE)) cp = -1; } } } else { const auto BannedCpList = StringToList(Global->Opt->strNoAutoDetectCP, STLF_UNIQUE); if (std::find(ALL_CONST_RANGE(BannedCpList), std::to_wstring(cp)) != BannedCpList.cend()) { cp = -1; } } } if (cp != -1) { nCodePage = cp; bDetect = true; } } } } if (pSignatureFound) { *pSignatureFound = bSignatureFound; } return bDetect; }