Exemplo n.º 1
0
/*
	static
*/
VFromUnicodeConverter* XWinIntlMgr::NewFromUnicodeConverter(CharSet inCharSet)
{
	XWinFromUnicodeConverter* converter = NULL;
	IMultiLanguage2* multiLanguage = RetainMultiLanguage();
	if (multiLanguage != NULL)
	{
		converter = new XWinFromUnicodeConverter( multiLanguage, inCharSet);
		if (converter != NULL && !converter->IsValid())
		{
			delete converter;
			converter = NULL;
		}
		multiLanguage->Release();
	}
	return converter;
}
Exemplo n.º 2
0
	~CExconverterMLang()
	{
		if (m_pmlang != nullptr)
			m_pmlang->Release();
		if (m_hLibMLang != nullptr)
			FreeLibrary(m_hLibMLang);
	}
Exemplo n.º 3
0
	bool convertToUnicode(int srcCodepage, const char * src, size_t * srcbytes, wchar_t * dest, size_t *destchars)
	{
		UINT uisrcbytes = static_cast<UINT>(*srcbytes), uidestchars = static_cast<UINT>(*destchars);
		HRESULT hr = m_pmlang->ConvertStringToUnicode(&m_mlangcookie, srcCodepage, (char *)src, &uisrcbytes, dest, &uidestchars);
		*srcbytes = uisrcbytes;
		*destchars = uidestchars;
		return SUCCEEDED(hr) ? true : false;
	}
Exemplo n.º 4
0
	bool convertFromUnicode(int dstCodepage, const wchar_t * src, size_t * srcchars, char * dest, size_t *destbytes)
	{
		UINT uisrcchars = static_cast<UINT>(*srcchars), uidestbytes = static_cast<UINT>(*destbytes);
		HRESULT hr = m_pmlang->ConvertStringFromUnicode(&m_mlangcookie, dstCodepage, (wchar_t *)src, &uisrcchars, (char *)dest, &uidestbytes);
		*srcchars = uisrcchars;
		*destbytes = uidestbytes;
		return SUCCEEDED(hr) ? true : false;
	}
Exemplo n.º 5
0
	bool getCodePageInfo(int codepage, CodePageInfo *pCodePageInfo)
	{
		MIMECPINFO mcpi = {0};
		HRESULT hr = m_pmlang->GetCodePageInfo(codepage, GetSystemDefaultLangID(), &mcpi);
		if (FAILED(hr))
			return false;
		pCodePageInfo->fixedWidthFont = ucr::toTString(mcpi.wszFixedWidthFont);
		pCodePageInfo->bGDICharset = mcpi.bGDICharset;
		return true;
	}
Exemplo n.º 6
0
	bool getCodepageDescription(int codepage, String& sDescription)
	{
		wchar_t szDescription[256];
		HRESULT hr = m_pmlang->GetCodePageDescription(codepage, GetSystemDefaultLangID(), szDescription, sizeof(szDescription)/sizeof(wchar_t));
		if (FAILED(hr))
			return false;

		sDescription = ucr::toTString(szDescription);
		return true;
	}
Exemplo n.º 7
0
	bool getCodepageFromCharsetName(const String& sCharsetName, int& codepage)
	{
		MIMECSETINFO charsetInfo;
		BSTR bstrCharsetName = SysAllocString(ucr::toUTF16(sCharsetName).c_str());
		HRESULT hr = m_pmlang->GetCharsetInfo(bstrCharsetName, &charsetInfo);
		SysFreeString(bstrCharsetName);
		if (FAILED(hr))
			return false;
		codepage = charsetInfo.uiInternetEncoding;
		return true;
	}
Exemplo n.º 8
0
	std::vector<CodePageInfo> enumCodePages()
	{
		std::vector<CodePageInfo> cpinfo;
		IEnumCodePage *pEnumCodePage = nullptr;
		ULONG ccpInfo;
		HRESULT hr = m_pmlang->EnumCodePages(MIMECONTF_SAVABLE_BROWSER | MIMECONTF_VALID | MIMECONTF_VALID_NLS, 0, &pEnumCodePage);
		if (FAILED(hr))
			return cpinfo;
		std::unique_ptr<MIMECPINFO[]> pcpInfo(new MIMECPINFO[256]);
		if (FAILED(pEnumCodePage->Next(256, pcpInfo.get(), &ccpInfo)))
			return cpinfo;

		cpinfo.resize(ccpInfo);
		for (int i = 0; i < (int)ccpInfo; i++)
		{
			cpinfo[i].codepage = pcpInfo[i].uiCodePage;
			cpinfo[i].desc = ucr::toTString(pcpInfo[i].wszDescription);
		}

		return cpinfo;
	}
Exemplo n.º 9
0
	int detectInputCodepage(int autodetectType, int defcodepage, const char *data, size_t size)
	{
		int codepage;
		IMLangConvertCharset *pcc;
		UINT dstsize;
		UINT srcsize;
		HRESULT hr;

		hr = m_pmlang->CreateConvertCharset(autodetectType, ucr::CP_UCS2LE, MLCONVCHARF_AUTODETECT, &pcc);
		if (FAILED(hr))
			return defcodepage;
		srcsize = static_cast<UINT>(size);
		dstsize = static_cast<UINT>(size * sizeof(wchar_t));
		std::unique_ptr<unsigned char[]> pdst(new unsigned char[size * sizeof(wchar_t)]);
		SetLastError(0);
		hr = pcc->DoConversion((unsigned char *)data, &srcsize, pdst.get(), &dstsize);
		pcc->GetSourceCodePage((unsigned *)&codepage);
		if (FAILED(hr) || GetLastError() == ERROR_NO_UNICODE_TRANSLATION || codepage == autodetectType)
		{
			int codepagestotry[3] = {0};
			if (codepage == autodetectType)
			{
				if (size < 2 || (data[0] != 0 && data[1] != 0))
				{
					codepagestotry[0] = defcodepage;
					codepagestotry[1] = ucr::CP_UTF_8;
				}
			}
			else
			{
				if (size < 2 || (data[0] != 0 && data[1] != 0))
					codepagestotry[0] = ucr::CP_UTF_8;
			}
			codepage = defcodepage;
			size_t i;
			for (i = 0; i < sizeof(codepagestotry)/sizeof(codepagestotry[0]) - 1; i++)
			{
				if (codepagestotry[i] == 0) break;
				pcc->Initialize(codepagestotry[i], ucr::CP_UCS2LE, 0);
				srcsize = static_cast<UINT>(size);
				dstsize = static_cast<UINT>(size * sizeof(wchar_t));
				SetLastError(0);
				hr = pcc->DoConversion((unsigned char *)data, &srcsize, pdst.get(), &dstsize);
				if (FAILED(hr) || GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
					continue;
				codepage = codepagestotry[i];
				break;
			}
			if (codepagestotry[i] == 0 && (size % 2) == 0)
			{
				// UCS-2
				int lezerocount = 0;
				int lecrorlf = 0;
				int bezerocount = 0;
				int becrorlf = 0;
				for (i = 0; i < size; i += 2)
				{
					if (data[i] == 0)
					{
						bezerocount++;
						if (data[i + 1] == 0x0a || data[i + 1] == 0x0d)
							becrorlf++;
					}
					else if (data[i + 1] == 0)
					{
						lezerocount++;
						if (data[i] == 0x0a || data[i] == 0x0d)
							lecrorlf++;
					}
				}
				if (lezerocount > 0 || bezerocount > 0)
				{
					if ((lecrorlf == 0 && size < 512 || (lecrorlf > 0 && (size / lecrorlf > 1024))) && lezerocount > bezerocount)
						codepage = ucr::CP_UCS2LE;
					else if ((becrorlf == 0 && size < 512 || (becrorlf > 0 && (size / becrorlf > 1024))) && lezerocount < bezerocount)
						codepage = ucr::CP_UCS2BE;
				}
			}
		}
		if (codepage == 20127)
			return defcodepage;
		return codepage;
	}
Exemplo n.º 10
0
std::wstring load_text_file(const std::wstring &path, uint32_t codepage)
{
    struct F {
        static void release(IUnknown *x) {  x->Release(); }
    };

    IStream *stream;
    HRESULT hr = SHCreateStreamOnFileW(path.c_str(),
                                       STGM_READ | STGM_SHARE_DENY_WRITE,
                                       &stream);
    if (FAILED(hr)) win32::throw_error(path, hr);
    std::shared_ptr<IStream> streamPtr(stream, F::release);

    LARGE_INTEGER li = { 0 };
    ULARGE_INTEGER ui;
    HR(stream->Seek(li, STREAM_SEEK_END, &ui));
    if (ui.QuadPart > 0x100000) {
        throw std::runtime_error(strutil::w2us(path + L": file too big"));
    }
    size_t fileSize = ui.LowPart;
    HR(stream->Seek(li, STREAM_SEEK_SET, &ui));

    IMultiLanguage2 *mlang;
    HR(CoCreateInstance(CLSID_CMultiLanguage, 0, CLSCTX_INPROC_SERVER,
                IID_IMultiLanguage2, (void**)(&mlang)));
    std::shared_ptr<IMultiLanguage2> mlangPtr(mlang, F::release);

    if (!codepage) {
        DetectEncodingInfo encoding[5];
        INT nscores = 5;
        HR(mlang->DetectCodepageInIStream(0, GetACP(),
                                          stream, encoding, &nscores));
        /*
         * Usually DetectCodepageInIStream() puts the most appropriate choice
         * in the first place.
         * However, it tends to pick 8bit locale charset for the first place,
         * even if it is really an UTF-8 encoded file.
         */
        codepage = encoding[0].nCodePage;
        for (size_t i = 0; i < nscores; ++i)
            if (encoding[i].nCodePage == 65001) {
                codepage = 65001;
                break;
            }
        HR(stream->Seek(li, STREAM_SEEK_SET, &ui));
    }
    std::vector<char> ibuf(fileSize);
    ULONG nread;
    HR(stream->Read(&ibuf[0], ibuf.size(), &nread));

    DWORD ctx = 0;
    UINT size = ibuf.size(), cnt;
    HR(mlang->ConvertStringToUnicode(&ctx, codepage,
                                     &ibuf[0], &size, 0, &cnt));
    std::vector<wchar_t> obuf(cnt);
    size = ibuf.size();
    HR(mlang->ConvertStringToUnicode(&ctx, codepage,
                                     &ibuf[0], &size, &obuf[0], &cnt));
    obuf.push_back(0);
    // chop off BOM
    size_t bom = obuf.size() && obuf[0] == 0xfeff;
    return strutil::normalize_crlf(&obuf[bom], L"\n");
}