Beispiel #1
0
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
		int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
{
	int length;
	LPWSTR targetStart;
	const BYTE* sourceStart;
	ConversionResult result;

	/* If cbMultiByte is 0, the function fails */

	if (cbMultiByte == 0)
		return 0;

	/* If cbMultiByte is -1, the string is null-terminated */

	if (cbMultiByte == -1)
		cbMultiByte = strlen((char*) lpMultiByteStr) + 1;

	/*
	 * if cchWideChar is 0, the function returns the required buffer size
	 * in characters for lpWideCharStr and makes no use of the output parameter itself.
	 */

	if (cchWideChar == 0)
	{
		sourceStart = (const BYTE*) lpMultiByteStr;
		targetStart = (WCHAR*) NULL;

		result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
				&targetStart, NULL, strictConversion);

		length = targetStart - ((WCHAR*) NULL);
		cchWideChar = length;
	}
	else
	{
		sourceStart = (const BYTE*) lpMultiByteStr;
		targetStart = lpWideCharStr;

		result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
				&targetStart, &targetStart[cchWideChar], strictConversion);

		length = targetStart - ((WCHAR*) lpWideCharStr);
		cchWideChar = length;
	}

	return cchWideChar;
}
Beispiel #2
0
std::wstring *
keymagic_driver::U8toU16(const std::string& u8)
{
	unsigned int length = u8.length();
	length++;

	const UTF8 * source = (UTF8*)u8.c_str();
	const UTF8 * sourceStart = source;
	const UTF8 * sourceEnd = source + length;
	
	UTF16 * target = new UTF16[length];
	UTF16 * targetStart = target;
	UTF16 * targetEnd = target + length;
	
	ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion);
	
	wchar_t * wcs = new wchar_t[length];
	memset(wcs, 0, length * sizeof (wchar_t));
	for (int i = 0; i < targetStart - target; i++) {
		wcs[i] = target[i];
	}

	std::wstring * s = new std::wstring(wcs);

	delete[] wcs;
	delete[] target;

	return s;
}
bool convertUTF8ToUTF16String(StringRef SrcUTF8,
                              SmallVectorImpl<UTF16> &DstUTF16) {
    assert(DstUTF16.empty());

    // Avoid OOB by returning early on empty input.
    if (SrcUTF8.empty())
        return true;

    const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
    const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());

    // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
    // as UTF-16 should always require the same amount or less code units than the
    // UTF-8 encoding.  Allocate one extra byte for the null terminator though,
    // so that someone calling DstUTF16.data() gets a null terminated string.
    // We resize down later so we don't have to worry that this over allocates.
    DstUTF16.resize(SrcUTF8.size()+1);
    UTF16 *Dst = &DstUTF16[0];
    UTF16 *DstEnd = Dst + DstUTF16.size();

    ConversionResult CR =
        ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
    assert(CR != targetExhausted);

    if (CR != conversionOK) {
        DstUTF16.clear();
        return false;
    }

    DstUTF16.resize(Dst - &DstUTF16[0]);
    DstUTF16.push_back(0);
    DstUTF16.pop_back();
    return true;
}
//------------------------------------------------------------------------------
// UTF8ToString16
//------------------------------------------------------------------------------
bool UTF8ToString16(const char *in, int len, std::string16 *out16) {
  assert(in);
  assert(len >= 0);
  assert(out16);

  if (len <= 0) {
    *out16 = STRING16(L"");
    return true;
  }

  const UTF8 *source_ptr = reinterpret_cast<const UTF8*>(in);
  const UTF8 *source_end_ptr = source_ptr + len;  // should point 'beyond last'

  // UTF16 string has at most as many 'characters' as UTF8 one.
  out16->resize(len);
  UTF16 *target_ptr = reinterpret_cast<UTF16*>(&(*out16)[0]);
  UTF16 *target_ptr_original = target_ptr;
  UTF16 *target_end_ptr = target_ptr + len;
  ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
                                               &target_ptr, target_end_ptr,
                                               strictConversion);

  // Resize to be the size of the # of converted characters.
  // Note that stl strings always account for \0 end-of-line character
  // automatically, so no need to do "+1" here.
  out16->resize(result == conversionOK ? target_ptr - target_ptr_original : 0);

  return result == conversionOK;
}
Beispiel #5
0
	inline int utf8_wchar(const std::string &utf8, std::wstring &wide)
	{
		// allocate space for worst-case
		wide.resize(utf8.size());
		wchar_t const* dst_start = wide.c_str();
		char const* src_start = utf8.c_str();
		ConversionResult ret;
		if (sizeof(wchar_t) == sizeof(UTF32))
		{
			ret = ConvertUTF8toUTF32((const UTF8**)&src_start, (const UTF8*)src_start
				+ utf8.size(), (UTF32**)&dst_start, (UTF32*)dst_start + wide.size()
				, lenientConversion);
			wide.resize(dst_start - wide.c_str());
			return ret;
		}
		else if (sizeof(wchar_t) == sizeof(UTF16))
		{
			ret = ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start
				+ utf8.size(), (UTF16**)&dst_start, (UTF16*)dst_start + wide.size()
				, lenientConversion);
			wide.resize(dst_start - wide.c_str());
			return ret;
		}
		else
		{
			return sourceIllegal;
		}
	}
Beispiel #6
0
	utf8_conv_result_t utf8_wchar(const std::string &utf8, std::wstring &wide)
	{
		// allocate space for worst-case
		wide.resize(utf8.size());
		wchar_t const* dst_start = wide.c_str();
		char const* src_start = utf8.c_str();
		ConversionResult ret;
		// TODO: 3 refactor this to use wchar_t as a template
		// it would cause less code to be generated without
		// relying on dead-code elimination and fix msvc constant
		// expression warning
		if (sizeof(wchar_t) == sizeof(UTF32))
		{
			ret = ConvertUTF8toUTF32((const UTF8**)&src_start, (const UTF8*)src_start
				+ utf8.size(), (UTF32**)&dst_start, (UTF32*)dst_start + wide.size()
				, lenientConversion);
			wide.resize(dst_start - wide.c_str());
			return (utf8_conv_result_t)ret;
		}
		else if (sizeof(wchar_t) == sizeof(UTF16))
		{
			ret = ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start
				+ utf8.size(), (UTF16**)&dst_start, (UTF16*)dst_start + wide.size()
				, lenientConversion);
			wide.resize(dst_start - wide.c_str());
			return (utf8_conv_result_t)ret;
		}
		else
		{
			return source_illegal;
		}
	}
Beispiel #7
0
	std::u16string &u16str() {
		if (NULL == m_pU16) {
			m_pU16 = new std::u16string();
			ConvertUTF8toUTF16(m_str, *m_pU16, lenientConversion);
		}

		return *m_pU16;
	}
Beispiel #8
0
std::wstring UTF8toWS(const AnsiString& s){
	std::wstring Value;
	if (s.size()==0)
	{
		return Value;		
	}
	Value.resize(s.size());
	//UTF16* buf = new UTF16[s.size()+1];

	const  UTF8* Start = (const UTF8*)s.c_str();
	const  UTF8* End = Start + s.size();

#ifdef WIN32
	UTF16* DestStart = (UTF16*)(&Value[0]);
	UTF16* DestEnd = DestStart + s.size();

	ConversionResult ret = ConvertUTF8toUTF16(&Start,End, &DestStart, DestEnd, strictConversion);
#else
	UTF32* DestStart = (UTF32*)(&Value[0]);
	UTF32* DestEnd = DestStart + s.size();

	ConversionResult ret = ConvertUTF8toUTF32(&Start,End, &DestStart, DestEnd, strictConversion);
#endif		
	if (ret != conversionOK)
	{
		if(ret == sourceIllegal) //
		{
			int n = mbstowcs(NULL,s.c_str(),0);
			Value.resize(n);
			setlocale(LC_ALL,"");
			mbstowcs((wchar_t*)Value.c_str(),s.c_str(),n);
			return Value;
		}else{
			//throw std::exception("UFT8 Convert Fail."); 这种工具函数扔出例外似乎只会对宿主程序构成骚扰
			//改为给出提示+可能的乱码
			Value = _T("UFT8TO16 Convert Fail:");

			int n = mbstowcs(NULL,s.c_str(),0);
			tstring s1(n,0);
			setlocale(LC_ALL,"");
			mbstowcs((wchar_t*)s1.c_str(),s.c_str(),n);

			Value.insert(Value.end(),s1.begin(),s1.end());
			return Value;
		}


	}
	/*	else{
	Value = (wchar_t*)buf;
	delete buf;		
	}
	*/
	tstring::size_type n = Value.find_first_of(_T('\0'));
	Value = Value.substr(0,n);

	return Value;
}
Beispiel #9
0
LPWSTR ConvertFileToUnicode(LPCWSTR fileName)
// convert a UTF8 text file to UNICODE
	{
	LPWSTR pszTempSource;
	DWORD dwNumBytes;
	HANDLE hFile;
	DWORD fileSize;
	LPSTR pNarrowBuf;
	uint16_t* pBufU;
	BOOL ok;

	// open file & get file size
	hFile = ::MakeSISOpenFile(fileName, GENERIC_READ, OPEN_EXISTING);
	if(hFile == INVALID_HANDLE_VALUE) throw ErrCannotOpenFile;
	fileSize = ::GetFileSize(hFile, NULL);

	// read in whole file
	pNarrowBuf = new CHAR [fileSize];
	if (!pNarrowBuf) throw ErrNotEnoughMemory;
	ok=::ReadFile(hFile, (LPVOID)pNarrowBuf, fileSize, &dwNumBytes, NULL);
	::CloseHandle(hFile);
	if (!ok) throw ErrCannotReadFile;

	// write to new temporary file
	pszTempSource=TempFileName(fileName);
	hFile = ::MakeSISOpenFile(pszTempSource, GENERIC_WRITE|GENERIC_READ, CREATE_NEW);
	if (hFile==INVALID_HANDLE_VALUE) throw ErrCannotOpenFile;

	pBufU=new uint16_t[fileSize];
	if (!pBufU) throw ErrNotEnoughMemory;

	// convert text to unicode
	const UTF8* sourceStart=(UTF8*)pNarrowBuf;
	const UTF8* sourceEnd=sourceStart+fileSize; 
	UTF16* targetStart;
	UTF16* targetEnd;
	while (ok && sourceStart<sourceEnd)
		{
		targetStart=(UTF16 *)pBufU;
		targetEnd=(UTF16 *)(pBufU+fileSize);
		int ret = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd,lenientConversion);
		if (ret == sourceIllegal) {
			*targetStart++ = *sourceStart++;
		}
		if (ok) ok=::WriteFile(hFile, (LPVOID)pBufU, ((char*)targetStart-(char*)pBufU), &dwNumBytes, NULL);
		}

	::CloseHandle(hFile);
	delete [] pNarrowBuf;
	delete [] pBufU;
	if (!ok) throw ErrCannotConvertFile;

	return pszTempSource;
	}
Beispiel #10
0
CYUTF16String CYPoolUTF16String(CYPool &pool, CYUTF8String utf8) {
    // XXX: this is wrong
    size_t size(utf8.size * 5);
    uint16_t *temp(new (pool) uint16_t[size]);

    const uint8_t *lhs(reinterpret_cast<const uint8_t *>(utf8.data));
    uint16_t *rhs(temp);
    _assert(ConvertUTF8toUTF16(&lhs, lhs + utf8.size, &rhs, rhs + size, lenientConversion) == conversionOK);

    *rhs = 0;
    return CYUTF16String(temp, rhs - temp);
}
	WideString StringUtils::utf8String2WideString( const String& utf8String )
	{
		size_t widesize = utf8String.length();
		WideString returnWideString;

		if ( sizeof( wchar_t ) == 2 )
		{
			returnWideString.resize( widesize + 1, L'\0' );
			const UTF8* sourcestart = reinterpret_cast<const UTF8*>( utf8String.c_str() );
			const UTF8* sourceend = sourcestart + widesize;
			UTF16* targetstart = reinterpret_cast<UTF16*>( &((returnWideString)[ 0 ]) );
			UTF16* thisFirstWChar = targetstart;
			UTF16* targetend = targetstart + widesize;
			ConversionResult res = ConvertUTF8toUTF16( &sourcestart, sourceend, &targetstart, targetend, strictConversion );
			returnWideString.resize(targetstart - thisFirstWChar);

			if ( res != conversionOK )
			{
				throw Exception(Exception::ERROR_UTF8_2_WIDE, String("Could not convert from UTF8 to wide string."));
			}

			*targetstart = 0;
		}

		else if ( sizeof( wchar_t ) == 4 )
		{
			returnWideString.resize( widesize + 1, L'\0' );
			const UTF8* sourcestart = reinterpret_cast<const UTF8*>( utf8String.c_str() );
			const UTF8* sourceend = sourcestart + widesize;
			UTF32* targetstart = reinterpret_cast<UTF32*>( &((returnWideString)[ 0 ]) );
			UTF32* thisFirstWChar = targetstart;
			UTF32* targetend = targetstart + widesize;
			ConversionResult res = ConvertUTF8toUTF32( &sourcestart, sourceend, &targetstart, targetend, strictConversion );
			returnWideString.resize(targetstart - thisFirstWChar);

			if ( res != conversionOK )
			{
				throw Exception(Exception::ERROR_UTF8_2_WIDE, String("Could not convert from UTF8 to wide string."));
			}

			*targetstart = 0;
		}

		else
		{
			throw Exception(Exception::ERROR_UTF8_2_WIDE, String("Could not convert from UTF8 to wide string."));
		}
		return returnWideString;
	}
Beispiel #12
0
	inline WString u2w(const char* utf8str, size_t len) {
		WString result;

		result.resize(len);
		if (sizeof(wchar_t) == sizeof(UTF16)) {
			const UTF8* srcstart = (UTF8*)&utf8str[0];
			const UTF8* srcend = srcstart + len;
			UTF16* dststart = (UTF16*)&result[0];
			UTF16* dstend = dststart + len;
			ConversionResult cr = ConvertUTF8toUTF16(&srcstart, srcend, &dststart, dstend, strictConversion);
			result.resize(dststart - (UTF16*)&result[0]);
		}

		return result;
	}
Beispiel #13
0
int str_utf8_to_u16(u16 **dst, u32 *dst_len, u8 *src, u32 src_len)
{
	*dst_len = src_len*sizeof(u16);
	*dst = malloc((*dst_len)+sizeof(u16));
	if(*dst == NULL)
		return -1;
	memset(*dst,0,(*dst_len)+sizeof(u16));
	
	UTF16 *target_start = *dst;
	UTF16 *target_end = (target_start + *dst_len);
	
	UTF8 *src_start = (UTF8*)src;
	UTF8 *src_end = (UTF8*)(src+src_len*sizeof(u8));
	
	return ConvertUTF8toUTF16 ((const UTF8 **)&src_start, src_end, &target_start, target_end, strictConversion);
}
void writeString(const char* value, int len, FILE* out) {
	UTF16* utf16String = new UTF16[len];
	const UTF8* sourceStart = (UTF8*) value;
	UTF8* sourceEnd = (UTF8*) value + len;
	UTF16* targetStart = utf16String;
	UTF16* targetEnd = utf16String + len;
	if (ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, lenientConversion) != conversionOK) {
		fprintf(stderr, "Unable to interpret argument as UTF8 text\n");
		throw ErrBadUTF8;
	}
	uint32_t datalen = (intptr_t) ((uint8_t*)targetStart - (uint8_t*)utf16String);
	writeUint8(datalen, out);
	for (UTF16* ptr = utf16String; ptr < targetStart; ptr++)
		writeUint16(*ptr, out);
	delete [] utf16String;
}
Beispiel #15
0
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
					   char *&ResultPtr, const UTF8 *&ErrorPtr)
{
	assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
	ConversionResult result = conversionOK;
	// Copy the character span over.
	if (WideCharWidth == 1) {
		const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
		if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
			result = sourceIllegal;
			ErrorPtr = Pos;
		} else {
			memcpy(ResultPtr, Source.data(), Source.size());
			ResultPtr += Source.size();
		}
	} else if (WideCharWidth == 2) {
		const UTF8 *sourceStart = (const UTF8*)Source.data();
		// FIXME: Make the type of the result buffer correct instead of
		// using reinterpret_cast.
		UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
		ConversionFlags flags = strictConversion;
		result = ConvertUTF8toUTF16(
			&sourceStart, sourceStart + Source.size(),
			&targetStart, targetStart + 2*Source.size(), flags);
		if (result == conversionOK)
			ResultPtr = reinterpret_cast<char*>(targetStart);
		else
			ErrorPtr = sourceStart;
	} else if (WideCharWidth == 4) {
		const UTF8 *sourceStart = (const UTF8*)Source.data();
		// FIXME: Make the type of the result buffer correct instead of
		// using reinterpret_cast.
		UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
		ConversionFlags flags = strictConversion;
		result = ConvertUTF8toUTF32(
			&sourceStart, sourceStart + Source.size(),
			&targetStart, targetStart + 4*Source.size(), flags);
		if (result == conversionOK)
			ResultPtr = reinterpret_cast<char*>(targetStart);
		else
			ErrorPtr = sourceStart;
	}
	assert((result != targetExhausted)
		&& "ConvertUTF8toUTFXX exhausted target buffer");
	return result == conversionOK;
}
std::wstring StringUtils::Utf8_To_wstring(const std::string& utf8string)
{
   if (utf8string.length()==0)
   {
      return std::wstring();
   }
   size_t widesize = utf8string.length();
   if (sizeof(wchar_t) == 2)
   {
      std::wstring resultstring;
      resultstring.resize(widesize, L'\0');
      const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
      const UTF8* sourceend = sourcestart + widesize;
      UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
      UTF16* targetend = targetstart + widesize;
      ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
      if (res != conversionOK)
      {
         return std::wstring(utf8string.begin(), utf8string.end());
      }
      *targetstart = 0;
      return std::wstring(resultstring.c_str());
   }
   else if (sizeof(wchar_t) == 4)
   {
      std::wstring resultstring;
      resultstring.resize(widesize, L'\0');
      const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
      const UTF8* sourceend = sourcestart + widesize;
      UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
      UTF32* targetend = targetstart + widesize;
      ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
      if (res != conversionOK)
      {
         return std::wstring(utf8string.begin(), utf8string.end());
      }
      *targetstart = 0;
      return std::wstring(resultstring.c_str());
   }
   else
   {
      assert(false);
   }
   return L"";
}
Beispiel #17
0
 static std::wstring FromUtf8(const std::string& utf8string)
 {
     size_t widesize = utf8string.length();
     if (sizeof(wchar_t) == 2)
     {
         wchar_t* widestringnative = new wchar_t[widesize+1];
         const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
         const UTF8* sourceend = sourcestart + widesize;
         UTF16* targetstart = reinterpret_cast<UTF16*>(widestringnative);
         UTF16* targetend = targetstart + widesize+1;
         ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
         if (res != conversionOK)
         {
             delete [] widestringnative;
             throw std::exception();
         }
         *targetstart = 0;
         std::wstring resultstring(widestringnative);
         delete [] widestringnative;
         return resultstring;
     }
     else if (sizeof(wchar_t) == 4)
     {
         wchar_t* widestringnative = new wchar_t[widesize+1];
         const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
         const UTF8* sourceend = sourcestart + widesize;
         UTF32* targetstart = reinterpret_cast<UTF32*>(widestringnative);
         UTF32* targetend = targetstart + widesize+1;
         ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
         if (res != conversionOK)
         {
             delete [] widestringnative;
             throw std::exception();
         }
         *targetstart = 0;
         std::wstring resultstring(widestringnative);
         delete [] widestringnative;
         return resultstring;
     }
     else
     {
         throw std::exception();
     }
     return L"";
 }
Beispiel #18
0
	WString u2w(const String& utf8str) {
		return u2w(utf8str.c_str(), utf8str.size());
#if 0
		size_t len = utf8str.size();
		WString result;

		result.resize(len);
		if (sizeof(wchar_t) == sizeof(UTF16)) {
			const UTF8* srcstart = (UTF8*)&utf8str[0];
			const UTF8* srcend = srcstart + len;
			UTF16* dststart = (UTF16*)&result[0];
			UTF16* dstend = dststart + len;
			ConversionResult cr = ConvertUTF8toUTF16(&srcstart, srcend, &dststart, dstend, strictConversion);
			result.resize(dststart - (UTF16*)&result[0]);
		}

		return result;
#endif
	}
Beispiel #19
0
    std::wstring FromUtf8(const std::string& utf8string)
    {
        size_t widesize = utf8string.length();
        if (sizeof(wchar_t) == 2)
        {
            std::wstring resultstring;
            resultstring.resize(widesize+1, L'\0');
            const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
            const UTF8* sourceend = sourcestart + widesize;
            UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
            UTF16* targetend = targetstart + widesize;
            ConversionResult res = ConvertUTF8toUTF16
		(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                throw std::exception("La falla!");
            }
            *targetstart = 0;
            return resultstring;
        }
        else if (sizeof(wchar_t) == 4)
        {
            std::wstring resultstring;
            resultstring.resize(widesize+1, L'\0');
            const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
            const UTF8* sourceend = sourcestart + widesize;
            UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
            UTF32* targetend = targetstart + widesize;
            ConversionResult res = ConvertUTF8toUTF32
		(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                throw std::exception("La falla!");
            }
            *targetstart = 0;
            return resultstring;
        }
        else
        {
            throw std::exception("La falla!");
        }
        return L"";
    }
Beispiel #20
0
intp StringParser::ConvertToBuffer(const char* str, size_t length, WHeapString& outBuffer)
{
	outBuffer.Clear();
	if (length == 0)
	{
		return 0;
	}

	outBuffer.ReserveSize(length + 1);
	const UTF8* sourceStart = reinterpret_cast<const UTF8*>(str);
	const UTF8* sourceEnd = sourceStart + length;
	constexpr bool isUTF16 = sizeof(wchar_t) == 2;
	if (isUTF16)
	{
		UTF16* targetStart = reinterpret_cast<UTF16*>(outBuffer.MutableBuffer());
		UTF16* targetEnd = targetStart + length;
		ConversionResult res = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion);
		*targetStart = 0;
		if (res == conversionOK)
		{
			intp count = targetStart - reinterpret_cast<UTF16*>(outBuffer.MutableBuffer());
			outBuffer.ForceSetLength(count);
			return count;
		}
	}
	else
	{
		UTF32* targetStart = reinterpret_cast<UTF32*>(outBuffer.MutableBuffer());
		UTF32* targetEnd = targetStart + length;
		ConversionResult res = ConvertUTF8toUTF32(&sourceStart, sourceEnd, &targetStart, targetEnd, strictConversion);
		*targetStart = 0;
		if (res == conversionOK)
		{
			intp count = targetStart - reinterpret_cast<UTF32*>(outBuffer.MutableBuffer());
			outBuffer.ForceSetLength(count);
			return count;
		}
	}

	return 0;

}
Beispiel #21
0
			static utf8_errors::error_code_enum convert(char const** src_start
				, char const* src_end
				, std::wstring& wide)
			{
				wchar_t* dst_start = &wide[0];
				int ret = ConvertUTF8toUTF16(
					reinterpret_cast<UTF8 const**>(src_start)
					, reinterpret_cast<UTF8 const*>(src_end)
					, reinterpret_cast<UTF16**>(&dst_start)
					, reinterpret_cast<UTF16*>(dst_start + wide.size())
					, lenientConversion);
				if (ret == sourceIllegal)
				{
					// assume Latin-1
					wide.clear();
					std::copy(reinterpret_cast<std::uint8_t const*>(*src_start)
						, reinterpret_cast<std::uint8_t const*>(src_end)
						, std::back_inserter(wide));
					return static_cast<utf8_errors::error_code_enum>(ret);
				}
				wide.resize(aux::numeric_cast<std::size_t>(dst_start - wide.c_str()));
				return static_cast<utf8_errors::error_code_enum>(ret);
			}
Beispiel #22
0
// Converts UTF8 to UTF16
uint16_t* utf8to16(uint8_t* txt, int &len16) {
	// Length of UTF8 version of the search string
	int len = 0;
	while (txt[len] != 0) {
		len++;
	}

	uint16_t* txt16 = (uint16_t*) malloc((len + 1)*2);
	uint16_t* txt16End = txt16;
	ConversionResult res = ConvertUTF8toUTF16((const UTF8**) &txt, (const UTF8*) (txt + len), 
			(UTF16**) &txt16End, (UTF16*) (txt16 + len*2), lenientConversion);

	if (res != conversionOK) {
		printf("Failed to convert input to UTF8");
		exit(ERR_INTERNAL_ERROR);
	}

	// Size of UTF16 version of the search string (in two byte characters)
	len16 = (int) (txt16End - txt16);
	// Set trailing zero
	txt16[len16] = 0;

	return txt16;
};
Beispiel #23
0
const bool FromUTF8(const std::vector<std::string::value_type> &utf8string, std::wstring &wcstring)
{
	if(utf8string.size()==0)
	{
		wcstring.assign(L"");
		return true;
	}

	std::vector<std::wstring::value_type> dest(utf8string.size(),0);		// dest will never be bigger than the input but could be smaller
	
	const UTF8 *sourcestart=reinterpret_cast<const UTF8 *>(&utf8string[0]);
	const UTF8 *sourceend=sourcestart+utf8string.size();
	
	if(sizeof(std::wstring::value_type)==2 && sizeof(UTF16)==2)
	{	
		UTF16 *deststart=reinterpret_cast<UTF16 *>(&dest[0]);
		UTF16 *destend=deststart+dest.size();
		
		ConversionResult rval=ConvertUTF8toUTF16(&sourcestart,sourceend,&deststart,destend,lenientConversion);
		
		if(rval!=conversionOK)
		{
			return false;	
		}
		
		wcstring.assign(dest.begin(),dest.end()-(destend-deststart));
		
	}
	else if(sizeof(std::wstring::value_type)==4 && sizeof(UTF32)==4)
	{
		UTF32 *deststart=reinterpret_cast<UTF32 *>(&dest[0]);
		UTF32 *destend=deststart+dest.size();
		
		ConversionResult rval=ConvertUTF8toUTF32(&sourcestart,sourceend,&deststart,destend,lenientConversion);

		if(rval!=conversionOK)
		{
			return false;
		}
		
		wcstring.assign(dest.begin(),dest.end()-(destend-deststart));
		
	}
	else
	{
		std::vector<UTF32> dest2(utf8string.size(),0);
		UTF32 *deststart=reinterpret_cast<UTF32 *>(&dest2[0]);
		UTF32 *destend=deststart+dest2.size();

		ConversionResult rval=ConvertUTF8toUTF32(&sourcestart,sourceend,&deststart,destend,lenientConversion);

		if(rval!=conversionOK)
		{
			return false;
		}

		wcstring.assign(dest2.begin(),dest2.end()-(destend-deststart));

	}

	return true;
}
Beispiel #24
0
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
                        int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
{
	LPWSTR targetStart;
#if !defined(WITH_ICU)
	const BYTE* sourceStart;
	int length;
	ConversionResult result;
#endif

	/* If cbMultiByte is 0, the function fails */

	if ((cbMultiByte == 0) || (cbMultiByte < -1))
		return 0;

	/* If cbMultiByte is -1, the string is null-terminated */

	if (cbMultiByte == -1)
	{
		size_t len = strlen((const char*) lpMultiByteStr);
		if (len >= INT32_MAX)
			return 0;
		cbMultiByte = (int)len + 1;
	}

	/*
	 * if cchWideChar is 0, the function returns the required buffer size
	 * in characters for lpWideCharStr and makes no use of the output parameter itself.
	 */
#if defined(WITH_ICU)
	{
		UErrorCode error;
		int32_t targetLength;
		int32_t targetCapacity;

		switch (CodePage)
		{
			case CP_ACP:
			case CP_UTF8:
				break;

			default:
				WLog_ERR(TAG, "Unsupported encoding %u", CodePage);
				return 0;
		}

		targetStart = lpWideCharStr;
		targetCapacity = cchWideChar;
		error = U_ZERO_ERROR;

		if (cchWideChar == 0)
		{
			u_strFromUTF8(NULL, 0, &targetLength,
			              lpMultiByteStr, cbMultiByte, &error);
			cchWideChar = targetLength;
		}
		else
		{
			u_strFromUTF8(targetStart, targetCapacity, &targetLength,
			              lpMultiByteStr, cbMultiByte, &error);
			cchWideChar = U_SUCCESS(error) ? targetLength : 0;
		}
	}
#else

	if (cchWideChar == 0)
	{
		sourceStart = (const BYTE*) lpMultiByteStr;
		targetStart = (WCHAR*) NULL;
		result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
		                            &targetStart, NULL, strictConversion);
		length = targetStart - ((WCHAR*) NULL);
	}
	else
	{
		sourceStart = (const BYTE*) lpMultiByteStr;
		targetStart = lpWideCharStr;
		result = ConvertUTF8toUTF16(&sourceStart, &sourceStart[cbMultiByte],
		                            &targetStart, &targetStart[cchWideChar], strictConversion);
		length = targetStart - ((WCHAR*) lpWideCharStr);
	}

	cchWideChar = (result == conversionOK) ? length : 0;
#endif
	return cchWideChar;
}
Beispiel #25
0
static
int _check_mtime(GeoIP *gi) {
	struct stat buf;
	if (gi->flags & GEOIP_CHECK_CACHE) {
		if (stat(gi->file_path, &buf) != -1) {
			if (buf.st_mtime != gi->mtime) {
				int name_len;
				wchar_t* wfilename;
				wchar_t const* dst_start;
				char const* src_start;
				/* GeoIP Database file updated */
				if (gi->flags & (GEOIP_MEMORY_CACHE | GEOIP_MMAP_CACHE)) {
#if !defined WIN32 && !defined __OS2__
				    if ( gi->flags & GEOIP_MMAP_CACHE) {
					munmap(gi->cache, gi->size);
					gi->cache = NULL;
				    } else
#endif
					 {
					/* reload database into memory cache */
					if ((gi->cache = (unsigned char*) realloc(gi->cache, buf.st_size)) == NULL) {
						fprintf(stderr,"Out of memory when reloading %s\n",gi->file_path);
						return -1;
					}
				    }
				}
				/* refresh filehandle */
				fclose(gi->GeoIPDatabase);
#ifdef WIN32
				assert(sizeof(wchar_t) == 2);
				name_len = strlen(gi->file_path);
				wfilename = malloc((name_len + 1) * sizeof(wchar_t));
				dst_start = wfilename;
				src_start = gi->file_path;
				ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start
					+ name_len+1, (UTF16**)&dst_start, (UTF16*)dst_start + name_len + 1
					, lenientConversion);
				gi->GeoIPDatabase = _wfopen(wfilename,L"rb");
				free(wfilename);
#else
				gi->GeoIPDatabase = fopen(gi->file_path,"rb");
#endif
				if (gi->GeoIPDatabase == NULL) {
					fprintf(stderr,"Error Opening file %s when reloading\n",gi->file_path);
					return -1;
				}
				gi->mtime = buf.st_mtime;
				gi->size = buf.st_size;

#if !defined WIN32 && !defined __OS2__
				if ( gi->flags & GEOIP_MMAP_CACHE) {
				    gi->cache = (unsigned char*)mmap(NULL, buf.st_size, PROT_READ, MAP_PRIVATE, fileno(gi->GeoIPDatabase), 0);
				    if ( gi->cache == MAP_FAILED ) {

					    fprintf(stderr,"Error remapping file %s when reloading\n",gi->file_path);
					    gi->cache = 0;
					    return -1;
				    }
				} else
#endif
				if ( gi->flags & GEOIP_MEMORY_CACHE ) {
				    if (fread(gi->cache, sizeof(unsigned char), buf.st_size, gi->GeoIPDatabase) != (size_t) buf.st_size) {
					    fprintf(stderr,"Error reading file %s when reloading\n",gi->file_path);
					    return -1;
					}
				}
				if (gi->databaseSegments != NULL) {
					free(gi->databaseSegments);
					gi->databaseSegments = NULL;
				}
				_setup_segments(gi);
				if (gi->databaseSegments == NULL) {
					fprintf(stderr, "Error reading file %s -- corrupt\n", gi->file_path);
					return -1;
				}
				if (gi->flags & GEOIP_INDEX_CACHE) {                        
					gi->index_cache = (unsigned char *) realloc(gi->index_cache, sizeof(unsigned char) * ((gi->databaseSegments[0] * (long)gi->record_length * 2)));
					if (gi->index_cache != NULL) {
						fseek(gi->GeoIPDatabase, 0, SEEK_SET);
						if (fread(gi->index_cache, sizeof(unsigned char), gi->databaseSegments[0] * (long)gi->record_length * 2, gi->GeoIPDatabase) != (size_t) (gi->databaseSegments[0]*(long)gi->record_length * 2)) {
							fprintf(stderr,"Error reading file %s where reloading\n",gi->file_path);
							return -1;
						}
					}
				}
			}
		}
	}
	return 0;
}
Beispiel #26
0
GeoIP* GeoIP_open (const char * filename, int flags) {
	struct stat buf;
	GeoIP * gi;
	size_t len;
#ifdef WIN32
	int name_len;
	wchar_t* wfilename;
	wchar_t const* dst_start;
	char const* src_start;
#endif

	gi = (GeoIP *)malloc(sizeof(GeoIP));
	if (gi == NULL)
		return NULL;
	len = sizeof(char) * (strlen(filename)+1);
	gi->file_path = (char*)malloc(len);
	if (gi->file_path == NULL) {
		free(gi);
		return NULL;
	}
	strncpy(gi->file_path, filename, len);
#ifdef WIN32
	assert(sizeof(wchar_t) == 2);
	name_len = strlen(filename);
	wfilename = malloc((name_len + 1) * sizeof(wchar_t));
	dst_start = wfilename;
	src_start = filename;
	ConvertUTF8toUTF16((const UTF8**)&src_start, (const UTF8*)src_start
		+ name_len+1, (UTF16**)&dst_start, (UTF16*)dst_start + name_len + 1
		, lenientConversion);
	gi->GeoIPDatabase = _wfopen(wfilename,L"rb");
	free(wfilename);
#else
	gi->GeoIPDatabase = fopen(filename,"rb");
#endif
	if (gi->GeoIPDatabase == NULL) {
		fprintf(stderr,"Error Opening file %s\n",filename);
		free(gi->file_path);
		free(gi);
		return NULL;
	} else {
		if (flags & (GEOIP_MEMORY_CACHE | GEOIP_MMAP_CACHE) ) {
			if (fstat(_fileno(gi->GeoIPDatabase), &buf) == -1) {
				fprintf(stderr,"Error stating file %s\n",filename);
				free(gi->file_path);
				free(gi);
				return NULL;
			}
			gi->mtime = buf.st_mtime;
			gi->size = buf.st_size;
#if !defined WIN32 && !defined __OS2__
			/* MMAP added my Peter Shipley */
			if ( flags & GEOIP_MMAP_CACHE) {
			    gi->cache = (unsigned char*)mmap(NULL, buf.st_size, PROT_READ, MAP_PRIVATE, fileno(gi->GeoIPDatabase), 0);
			    if ( gi->cache == MAP_FAILED ) {
				fprintf(stderr,"Error mmaping file %s\n",filename);
				free(gi->file_path);
				free(gi);
				return NULL;
			    }
			} else
#endif
			{
			    gi->cache = (unsigned char *) malloc(sizeof(unsigned char) * buf.st_size);

			    if (gi->cache != NULL) {
				if (fread(gi->cache, sizeof(unsigned char), buf.st_size, gi->GeoIPDatabase) != (size_t) buf.st_size) {
					fprintf(stderr,"Error reading file %s\n",filename);
					free(gi->cache);
					free(gi->file_path);
					free(gi);
					return NULL;
				}
			    }
			}
		} else {
			if (flags & GEOIP_CHECK_CACHE) {
				if (fstat(_fileno(gi->GeoIPDatabase), &buf) == -1) {
					fprintf(stderr,"Error stating file %s\n",filename);
					free(gi->file_path);
					free(gi);
					return NULL;
				}
				gi->mtime = buf.st_mtime;
			}
			gi->cache = NULL;
		}
		gi->flags = flags;
		gi->charset = GEOIP_CHARSET_ISO_8859_1;

		_setup_segments(gi);
		if (flags & GEOIP_INDEX_CACHE) {                        
			gi->index_cache = (unsigned char *) malloc(sizeof(unsigned char) * ((gi->databaseSegments[0] * (long)gi->record_length * 2)));
			if (gi->index_cache != NULL) {
				fseek(gi->GeoIPDatabase, 0, SEEK_SET);
				if (fread(gi->index_cache, sizeof(unsigned char), gi->databaseSegments[0] * (long)gi->record_length * 2, gi->GeoIPDatabase) != (size_t) (gi->databaseSegments[0]*(long)gi->record_length * 2)) {
					fprintf(stderr,"Error reading file %s\n",filename);
					free(gi->databaseSegments);
					free(gi->index_cache);
					free(gi);
					return NULL;
				}
			}
		} else {
			gi->index_cache = NULL;
		}
		return gi;
	}
}