Beispiel #1
0
	bool convertToUnicode(int srcCodepage, const char * src, size_t * srcbytes, wchar_t * dest, size_t *destchars)
	{
		UINT uisrcbytes = static_cast<UINT>(*srcbytes), uidestchars = static_cast<UINT>(*destchars);
		HRESULT hr = m_pmlang->ConvertStringToUnicode(&m_mlangcookie, srcCodepage, (char *)src, &uisrcbytes, dest, &uidestchars);
		*srcbytes = uisrcbytes;
		*destchars = uidestchars;
		return SUCCEEDED(hr) ? true : false;
	}
Beispiel #2
0
std::wstring load_text_file(const std::wstring &path, uint32_t codepage)
{
    struct F {
        static void release(IUnknown *x) {  x->Release(); }
    };

    IStream *stream;
    HRESULT hr = SHCreateStreamOnFileW(path.c_str(),
                                       STGM_READ | STGM_SHARE_DENY_WRITE,
                                       &stream);
    if (FAILED(hr)) win32::throw_error(path, hr);
    std::shared_ptr<IStream> streamPtr(stream, F::release);

    LARGE_INTEGER li = { 0 };
    ULARGE_INTEGER ui;
    HR(stream->Seek(li, STREAM_SEEK_END, &ui));
    if (ui.QuadPart > 0x100000) {
        throw std::runtime_error(strutil::w2us(path + L": file too big"));
    }
    size_t fileSize = ui.LowPart;
    HR(stream->Seek(li, STREAM_SEEK_SET, &ui));

    IMultiLanguage2 *mlang;
    HR(CoCreateInstance(CLSID_CMultiLanguage, 0, CLSCTX_INPROC_SERVER,
                IID_IMultiLanguage2, (void**)(&mlang)));
    std::shared_ptr<IMultiLanguage2> mlangPtr(mlang, F::release);

    if (!codepage) {
        DetectEncodingInfo encoding[5];
        INT nscores = 5;
        HR(mlang->DetectCodepageInIStream(0, GetACP(),
                                          stream, encoding, &nscores));
        /*
         * Usually DetectCodepageInIStream() puts the most appropriate choice
         * in the first place.
         * However, it tends to pick 8bit locale charset for the first place,
         * even if it is really an UTF-8 encoded file.
         */
        codepage = encoding[0].nCodePage;
        for (size_t i = 0; i < nscores; ++i)
            if (encoding[i].nCodePage == 65001) {
                codepage = 65001;
                break;
            }
        HR(stream->Seek(li, STREAM_SEEK_SET, &ui));
    }
    std::vector<char> ibuf(fileSize);
    ULONG nread;
    HR(stream->Read(&ibuf[0], ibuf.size(), &nread));

    DWORD ctx = 0;
    UINT size = ibuf.size(), cnt;
    HR(mlang->ConvertStringToUnicode(&ctx, codepage,
                                     &ibuf[0], &size, 0, &cnt));
    std::vector<wchar_t> obuf(cnt);
    size = ibuf.size();
    HR(mlang->ConvertStringToUnicode(&ctx, codepage,
                                     &ibuf[0], &size, &obuf[0], &cnt));
    obuf.push_back(0);
    // chop off BOM
    size_t bom = obuf.size() && obuf[0] == 0xfeff;
    return strutil::normalize_crlf(&obuf[bom], L"\n");
}