Example #1
0
// If the file contains a BOM this function will advance the file pointer by the BOM size (either 2 or 3)
static bool GetUnicodeCpUsingBOM(const os::fs::file& File, uintptr_t& Codepage)
{
	char Buffer[3]{};
	size_t BytesRead = 0;
	if (!File.Read(Buffer, std::size(Buffer), BytesRead))
		return false;

	std::string_view Signature(Buffer, std::size(Buffer));

	if (BytesRead >= 2)
	{
		if (Signature.substr(0, 2) == encoding::get_signature_bytes(CP_UNICODE))
		{
			Codepage = CP_UNICODE;
			File.SetPointer(2, nullptr, FILE_BEGIN);
			return true;
		}

		if (Signature.substr(0, 2) == encoding::get_signature_bytes(CP_REVERSEBOM))
		{
			Codepage = CP_REVERSEBOM;
			File.SetPointer(2, nullptr, FILE_BEGIN);
			return true;
		}
	}

	if (BytesRead >= 3 && Signature == encoding::get_signature_bytes(CP_UTF8))
	{
		Codepage = CP_UTF8;
		File.SetPointer(3, nullptr, FILE_BEGIN);
		return true;
	}

	File.SetPointer(0, nullptr, FILE_BEGIN);
	return false;
}
Example #2
0
// If the file contains a BOM this function will advance the file pointer by the BOM size (either 2 or 3)
static bool GetFileCodepage(const os::fs::file& File, uintptr_t DefaultCodepage, uintptr_t& Codepage, bool& SignatureFound, bool& NotUTF8, bool& NotUTF16, bool UseHeuristics)
{
	if (GetUnicodeCpUsingBOM(File, Codepage))
	{
		SignatureFound = true;
		return true;
	}

	if (!UseHeuristics)
		return false;

	// TODO: configurable
	const size_t Size = 32768;
	char_ptr Buffer(Size);
	size_t ReadSize = 0;

	const auto ReadResult = File.Read(Buffer.get(), Size, ReadSize);
	File.SetPointer(0, nullptr, FILE_BEGIN);

	if (!ReadResult || !ReadSize)
		return false;

	if (GetUnicodeCpUsingWindows(Buffer.get(), ReadSize, Codepage))
		return true;

	NotUTF16 = true;

	unsigned long long FileSize = 0;
	const auto WholeFileRead = File.GetSize(FileSize) && ReadSize == FileSize;
	bool PureAscii = false;

	if (encoding::is_valid_utf8({ Buffer.get(), ReadSize }, !WholeFileRead, PureAscii))
	{
		if (!PureAscii)
			Codepage = CP_UTF8;
		else if (DefaultCodepage == CP_UTF8 || DefaultCodepage == encoding::codepage::ansi() || DefaultCodepage == encoding::codepage::oem())
			Codepage = DefaultCodepage;
		else
			Codepage = encoding::codepage::ansi();

		return true;
	}

	NotUTF8 = true;

	return GetCpUsingUniversalDetectorWithExceptions({ Buffer.get(), ReadSize }, Codepage);
}
Example #3
0
bool GetLangParam(const os::fs::file& LangFile, string_view const ParamName, string& strParam1, string* strParam2, uintptr_t CodePage)
{
	const auto strFullParamName = concat(L'.', ParamName);
	const auto CurFilePos = LangFile.GetPointer();
	SCOPE_EXIT{ LangFile.SetPointer(CurFilePos, nullptr, FILE_BEGIN); };

	for (const auto& i: enum_file_lines(LangFile, CodePage))
	{
		if (starts_with_icase(i.Str, strFullParamName))
		{
			const auto EqPos = i.Str.find(L'=');

			if (EqPos != string::npos)
			{
				assign(strParam1, i.Str.substr(EqPos + 1));

				if (strParam2)
					strParam2->clear();

				const auto pos = strParam1.find(L',');

				if (pos != string::npos)
				{
					if (strParam2)
					{
						*strParam2 = trim_right(strParam1.substr(pos + 1));
					}

					strParam1.resize(pos);
				}

				inplace::trim_right(strParam1);
				return true;
			}
		}
		else if (starts_with(i.Str, L'"'))
		{
			// '"' indicates some meaningful string.
			// Parameters can be only in the header, no point to go deeper
			return false;
		}
	}

	return false;
}