Example #1
0
unsigned char* web_page::load_utf8_file( LPCWSTR path, bool is_html, LPCWSTR defEncoding )
{
	unsigned char* ret = NULL;

	HANDLE fl = CreateFile(path, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
	if(fl != INVALID_HANDLE_VALUE)
	{
		DWORD size = GetFileSize(fl, NULL);
		ret = new unsigned char[size + 1];

		DWORD cbRead = 0;
		if(size >= 3)
		{
			ReadFile(fl, ret, 3, &cbRead, NULL);
			if(ret[0] == '\xEF' && ret[1] == '\xBB' && ret[2] == '\xBF')
			{
				ReadFile(fl, ret, size - 3, &cbRead, NULL);
				ret[cbRead] = 0;
			} else
			{
				ReadFile(fl, ret + 3, size - 3, &cbRead, NULL);
				ret[cbRead + 3] = 0;
			}
		}
		CloseHandle(fl);
	}

	// try to convert encoding
	if(is_html)
	{
		std::wstring encoding;
		char* begin = StrStrIA((LPSTR) ret, "<meta");
		while(begin && encoding.empty())
		{
			char* end = StrStrIA(begin, ">");
			char* s1 = StrStrIA(begin, "Content-Type");
			if(s1 && s1 < end)
			{
				s1 = StrStrIA(begin, "charset");
				if(s1)
				{
					s1 += strlen("charset");
					while(!isalnum(s1[0]) && s1 < end)
					{
						s1++;
					}
					while((isalnum(s1[0]) || s1[0] == '-') && s1 < end)
					{
						encoding += s1[0];
						s1++;
					}
				}
			}
			if(encoding.empty())
			{
				begin = StrStrIA(begin + strlen("<meta"), "<meta");
			}
		}

		if(encoding.empty() && defEncoding)
		{
			encoding = defEncoding;
		}

		if(!encoding.empty())
		{
			if(!StrCmpI(encoding.c_str(), L"UTF-8"))
			{
				encoding.clear();
			}
		}

		if(!encoding.empty())
		{
			CoInitialize(NULL);

			IMultiLanguage* ml = NULL;
			HRESULT hr = CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_INPROC_SERVER, IID_IMultiLanguage, (LPVOID*) &ml);	

			MIMECSETINFO charset_src = {0};
			MIMECSETINFO charset_dst = {0};

			BSTR bstrCharSet = SysAllocString(encoding.c_str());
			ml->GetCharsetInfo(bstrCharSet, &charset_src);
			SysFreeString(bstrCharSet);

			bstrCharSet = SysAllocString(L"utf-8");
			ml->GetCharsetInfo(bstrCharSet, &charset_dst);
			SysFreeString(bstrCharSet);

			DWORD dwMode = 0;
			UINT  szDst = (UINT) strlen((LPSTR) ret) * 4;
			LPSTR dst = new char[szDst];

			if(ml->ConvertString(&dwMode, charset_src.uiInternetEncoding, charset_dst.uiInternetEncoding, (LPBYTE) ret, NULL, (LPBYTE) dst, &szDst) == S_OK)
			{
				dst[szDst] = 0;
				delete ret;
				ret = (unsigned char*) dst;
			} else
			{
				delete dst;
			}
			CoUninitialize();
		}
	}

	return ret;
}