예제 #1
0
void CCharsetConverter::utf8ToStringCharset(const CStdStringA& strSource, CStdStringA& strDest)
{
  if (m_iconvUtf8ToStringCharset == (iconv_t) - 1)
  {
    CStdString strCharset=g_langInfo.GetGuiCharSet();
    m_iconvUtf8ToStringCharset = iconv_open(strCharset.c_str(), UTF8_SOURCE);
  }

  if (m_iconvUtf8ToStringCharset != (iconv_t) - 1)
  {
    const char* src = strSource.c_str();
    size_t inBytes = strSource.length() + 1;

    char *dst = strDest.GetBuffer(inBytes);
    size_t outBytes = inBytes - 1;

    if (iconv_const(m_iconvUtf8ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t) -1)
    {
      strDest.ReleaseBuffer();
      // For some reason it failed (maybe wrong charset?). Nothing to do but
      // return the original..
      strDest = strSource;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #2
0
void CCharsetConverter::utf16LEtoUTF8(const void *strSource,
                                      CStdStringA &strDest)
{
  if (m_iconvUtf16LEtoUtf8 == (iconv_t) - 1)
    m_iconvUtf16LEtoUtf8 = iconv_open("UTF-8", "UTF-16LE");

  if (m_iconvUtf16LEtoUtf8 != (iconv_t) - 1)
  {
    size_t inBytes = 2;
    uint16_t *s = (uint16_t *)strSource;
    while (*s != 0)
    { 
      s++;
      inBytes += 2;
    }
    // UTF-8 is up to 4 bytes/character, or up to twice the length of UTF-16
    size_t outBytes = inBytes * 2;

    const char *src = (const char *)strSource;
    char *dst = strDest.GetBuffer(outBytes);
    if (iconv_const(m_iconvUtf16LEtoUtf8, &src, &inBytes, &dst, &outBytes) ==
        (size_t)-1)
    { // failed :(
      strDest.clear();
      strDest.ReleaseBuffer();
      return;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #3
0
void CCharsetConverter::utf8ToStringCharset(const CStdStringA& strSource, CStdStringA& strDest)
{
  if (m_iconvUtf8ToStringCharset == (iconv_t) - 1)
  {
    CStdString strCharset=g_langInfo.GetGuiCharSet();
    m_iconvUtf8ToStringCharset = iconv_open(strCharset.c_str(), UTF8_SOURCE);
  }

  if (m_iconvUtf8ToStringCharset != (iconv_t) - 1)
  {
    size_t inBytes  = strSource.length() + 1;
    size_t outBytes = strSource.length() + 1;
    const char *src = strSource.c_str();
    char       *dst = strDest.GetBuffer(inBytes);

    if (iconv_const(m_iconvUtf8ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    if (iconv_const(m_iconvUtf8ToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    strDest.ReleaseBuffer();
  }
}
예제 #4
0
void CCharsetConverter::utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest)
{
  if (m_iconvUtf32ToStringCharset == (iconv_t) - 1)
  {
    CStdString strCharset=g_langInfo.GetGuiCharSet();
    m_iconvUtf32ToStringCharset = iconv_open(strCharset.c_str(), "UTF-32LE");
  }

  if (m_iconvUtf32ToStringCharset != (iconv_t) - 1)
  {
    const unsigned long* ptr=strSource;
    while (*ptr) ptr++;
    const char* src = (const char*) strSource;
    size_t inBytes = (ptr-strSource+1)*4;

    char *dst = strDest.GetBuffer(inBytes);
    size_t outBytes = inBytes;

    if (iconv_const(m_iconvUtf32ToStringCharset, &src, &inBytes, &dst, &outBytes))
    {
      strDest.ReleaseBuffer();
      // For some reason it failed (maybe wrong charset?). Nothing to do but
      // return the original..
      strDest = (const char *)strSource;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #5
0
void CCharsetConverter::utf16BEtoUTF8(const CStdStringW& strSource, CStdStringA &strDest)
{
  if (m_iconvUtf16BEtoUtf8 == (iconv_t) - 1)
    m_iconvUtf16BEtoUtf8 = iconv_open("UTF-8", "UTF-16BE");

  if (m_iconvUtf16BEtoUtf8 != (iconv_t) - 1)
  {
    size_t inBytes  = (strSource.length() + 1) * sizeof(wchar_t);
    size_t outBytes = (strSource.length() + 1) * 4;
    const char *src = (const char*) strSource.c_str();
    char       *dst = strDest.GetBuffer(outBytes);

    if (iconv_const(m_iconvUtf16BEtoUtf8, &src, &inBytes, &dst, &outBytes))
    {
      CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    if (iconv(m_iconvUtf16BEtoUtf8, NULL, NULL, &dst, &outBytes))
    {
      CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #6
0
void CCharsetConverter::stringCharsetToUtf8(const CStdStringA& strSourceCharset, const CStdStringA& strSource, CStdStringA& strDest)
{
  iconv_t iconvString=iconv_open("UTF-8", strSourceCharset.c_str());

  if (iconvString != (iconv_t) - 1)
  {
    size_t inBytes  = (strSource.length() + 1);
    size_t outBytes = (strSource.length() + 1) * 4;
    const char *src = strSource.c_str();
    char       *dst = strDest.GetBuffer(outBytes);

    if (iconv_const(iconvString, &src, &inBytes, &dst, &outBytes) == (size_t) -1)
    {
      CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    if (iconv(iconvString, NULL, NULL, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    strDest.ReleaseBuffer();

    iconv_close(iconvString);
  }
}
예제 #7
0
void CCharsetConverter::stringCharsetToUtf8(const CStdStringA& strSourceCharset, const CStdStringA& strSource, CStdStringA& strDest)
{
  iconv_t iconvString=iconv_open("UTF-8", strSourceCharset.c_str());

  if (iconvString != (iconv_t) - 1)
  {
    const char* src = strSource.c_str();
    size_t inBytes = strSource.length() + 1;

    size_t outBytes = (inBytes * 4) + 1;
    char *dst = strDest.GetBuffer(outBytes);

    if (iconv_const(iconvString, &src, &inBytes, &dst, &outBytes) == (size_t) -1)
    {
      strDest.ReleaseBuffer();
      // For some reason it failed (maybe wrong charset?). Nothing to do but
      // return the original..
      strDest = strSource;
      return ;
    }

    strDest.ReleaseBuffer();

    iconv_close(iconvString);
  }
}
예제 #8
0
CStdString DocReader::ReadASCIIString(int iOffset, int iCharCount)
{
	CStdStringA sResult;
	ReadIntoBufferIgnoringBitsInDeletedList(iOffset, iCharCount, sResult.GetBuffer(iCharCount + 1), iCharCount+2);
	sResult.ReleaseBuffer();
	return sResult;
}
예제 #9
0
void TestSnapshotSaver::TestSaveBufferToTempFile()
{
    const CStdString sDestFileName( SnapshotSaver::SaveBufferToTempFile( _T("This is silly data") ) );
    CStdStringA sRtfBuffer;
    std::ifstream file( sDestFileName );
    file.get( sRtfBuffer.GetBuffer(100), 100 );
    sRtfBuffer.ReleaseBuffer();
    ::DeleteFile( sDestFileName.c_str() );

    assertTest( sRtfBuffer = _T("This is silly data") );
}
예제 #10
0
void CCharsetConverter::ucs2CharsetToStringCharset(const CStdStringW& strSource, CStdStringA& strDest, bool swap)
{
  if (m_iconvUcs2CharsetToStringCharset == (iconv_t) - 1)
  {
    CStdString strCharset=g_langInfo.GetGuiCharSet();
    m_iconvUcs2CharsetToStringCharset = iconv_open(strCharset.c_str(), "UTF-16LE");
  }

  if (m_iconvUcs2CharsetToStringCharset != (iconv_t) - 1)
  {
    CStdStringW strCopy = strSource;
    size_t inBytes  = (strCopy.length() + 1) * sizeof(wchar_t);
    size_t outBytes = (strCopy.length() + 1) * 4;
    const char *src = (const char*)strCopy.c_str();
    char       *dst = strDest.GetBuffer(inBytes);

    if (swap)
    {
      char* s = (char*) src;

      while (*s || *(s + 1))
      {
        char c = *s;
        *s = *(s + 1);
        *(s + 1) = c;

        s++;
        s++;
      }
    }

    if (iconv_const(m_iconvUcs2CharsetToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    if (iconv_const(m_iconvUcs2CharsetToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }

    strDest.ReleaseBuffer();
  }
}
CStdString CWordBinaryMetadataDiscoveryWorker::ConvertPropertyFromUTF8(CStdStringA sData)
{
	CStdString sOut;
	int iIncrement = 0;
	int iPos = 0;
	unsigned char* p = (unsigned char *)sData.GetBuffer(-1);

	while (iPos < sData.GetLength())
	{	// *p z y x w v u
		if (*p <= 127)	// 1 byte
		{
			sOut += *p;
			iIncrement = 1;;
		}
		else if (*p >= 192 && *p <= 223)	// 2 bytes
		{	// (z-192)*64 + (y-128)
			sOut += (64 * (*p - 192)) + *(p+1) - 128;
			iIncrement = 2;
		}
		else if (*p >= 224 && *p <= 239)	// 3 bytes
		{	// (z-224)*4096 + (y-128)*64 + (x-128)
			sOut += (4096 * (*p - 224)) + (64 * (*(p+1) - 128)) + *(p+2) - 128;
			iIncrement = 3;
		}
		//else if (*p >= 240 && *p <= 247)	// 4 bytes
		//{	// (z-240)*262144 + (y-128)*4096 + (x-128)*64 + (w-128)
		//	sOut += (262144 * (*p - 240)) + (4096 * (*(p+1) - 128)) + (64 * (*(p+2) - 128)) + *(p+3) - 128;
		//	iIncrement = 4;
		//}
		//else if (*p >= 248 && *p <= 251)	// 5 bytes
		//{	// (z-248)*16777216 + (y-128)*262144 + (x-128)*4096 + (w-128)*64 + (v-128)
		//	sOut += (16777216 * (*p - 248)) + (262144 * (*(p+1) - 128)) + (4096 * (*(p+2) - 128)) + (64 * (*(p+3) - 128)) + *(p+4) - 128;
		//	iIncrement = 5;
		//}
		//else if (*p >= 252 && *p <= 253)	// 6 bytes
		//{	// (z-252)*1073741824 + (y-128)*16777216 + (x-128)*262144 + (w-128)*4096 + (v-128)*64 + (u-128)
		//	sOut += (1073741824 * (*p - 252)) + (16777216 * (*(p+1) - 128)) + (262144 * (*(p+2) - 128)) + (4096 * (*(p+3) - 128)) + (64 * (*(p+4) - 128)) + *(p+5) - 128;
		//	iIncrement = 6;
		//}
		else
		{
			// error - leave as was
			return sData;
		}
		p += iIncrement;
		iPos += iIncrement;
	}
	return sOut;
}
예제 #12
0
void CCharsetConverter::utf16LEtoUTF8(const CStdStringW& strSource, CStdStringA &strDest)
{
  if (m_iconvUtf16LEtoUtf8 == (iconv_t) - 1)
    m_iconvUtf16LEtoUtf8 = iconv_open("UTF-8", "UTF-16LE");

  if (m_iconvUtf16LEtoUtf8 != (iconv_t) - 1)
  {
    const char* src = (const char*) strSource.c_str();
    size_t inBytes = (strSource.length() + 1)*sizeof(wchar_t);
    size_t outBytes = (inBytes + 1)*sizeof(wchar_t);  // UTF-8 is up to 4 bytes/character  
    char *dst = strDest.GetBuffer(outBytes);
    if (iconv_const(m_iconvUtf16LEtoUtf8, &src, &inBytes, &dst, &outBytes))
    { // failed :(
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #13
0
void CCharsetConverter::ucs2CharsetToStringCharset(const CStdStringW& strSource, CStdStringA& strDest, bool swap)
{
  if (m_iconvUcs2CharsetToStringCharset == (iconv_t) - 1)
  {
    CStdString strCharset=g_langInfo.GetGuiCharSet();
    m_iconvUcs2CharsetToStringCharset = iconv_open(strCharset.c_str(), "UTF-16LE");
  }

  if (m_iconvUcs2CharsetToStringCharset != (iconv_t) - 1)
  {
    const char* src = (const char*) strSource.c_str();
    size_t inBytes = (strSource.length() + 1) * sizeof(wchar_t);

    if (swap)
    {
      char* s = (char*) src;

      while (*s || *(s + 1))
      {
        char c = *s;
        *s = *(s + 1);
        *(s + 1) = c;

        s++;
        s++;
      }
    }

    char *dst = strDest.GetBuffer(inBytes);
    size_t outBytes = inBytes;

    if (iconv_const(m_iconvUcs2CharsetToStringCharset, &src, &inBytes, &dst, &outBytes))
    {
      strDest.ReleaseBuffer();
      // For some reason it failed (maybe wrong charset?). Nothing to do but
      // return the original..
      strDest = strSource;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #14
0
void CCharsetConverter::wToUTF8(const CStdStringW& strSource, CStdStringA &strDest)
{
  if (m_iconvWtoUtf8 == (iconv_t) - 1)
    m_iconvWtoUtf8 = iconv_open("UTF-8", WCHAR_CHARSET);

  if (m_iconvWtoUtf8 != (iconv_t) - 1)
  {
    const char* src = (const char*) strSource.c_str();
    size_t inBytes = (strSource.length() + 1) * sizeof(wchar_t);
    size_t outBytes = (inBytes + 1)*sizeof(wchar_t);  // some free for UTF-8 (up to 4 bytes/char)
    char *dst = strDest.GetBuffer(outBytes);
    if (iconv_const(m_iconvWtoUtf8, &src, &inBytes, &dst, &outBytes))
    { // failed :(
      CLog::Log(LOGERROR, "CCharsetConverter::wToUTF8 failed for subtitle.");
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #15
0
void CCharsetConverter::ucs2ToUTF8(const CStdStringW& strSource, CStdStringA& strDest)
{
  if (m_iconvUcs2CharsetToUtf8 == (iconv_t) - 1)
    m_iconvUcs2CharsetToUtf8 = iconv_open("UTF-8", "UCS-2LE");

  if (m_iconvUcs2CharsetToUtf8 != (iconv_t) - 1)
  {
    const char* src = (const char*) strSource.c_str();
    size_t inBytes = (strSource.length() + 1)*2;
    size_t outBytes = (inBytes + 1)*2;  // some free for UTF-8 (up to 4 bytes/char)
    char *dst = strDest.GetBuffer(outBytes);
    
    if (iconv_const(m_iconvUcs2CharsetToUtf8, &src, &inBytes, &dst, &outBytes) == (size_t) -1)
    { // failed :(
      CLog::Log(LOGERROR, "CCharsetConverter::ucs2ToUTF8 failed for Python with errno=%d", errno);
      strDest.ReleaseBuffer();
      strDest = strSource;
      return;
    }
    strDest.ReleaseBuffer();
  }
}
예제 #16
0
void CCharsetConverter::logicalToVisualBiDi(const CStdStringA& strSource, CStdStringA& strDest, FriBidiCharSet fribidiCharset, FriBidiCharType base)
{
  vector<CStdString> lines;
  CUtil::Tokenize(strSource, lines, "\n");
  CStdString resultString;

  for (unsigned int i = 0; i < lines.size(); i++)
  {
    int sourceLen = lines[i].length();
    FriBidiChar* logical = (FriBidiChar*) malloc((sourceLen + 1) * sizeof(FriBidiChar));
    FriBidiChar* visual = (FriBidiChar*) malloc((sourceLen + 1) * sizeof(FriBidiChar));
    // Convert from the selected charset to Unicode
    int len = fribidi_charset_to_unicode(fribidiCharset, (char*) lines[i].c_str(), sourceLen, logical);

    if (fribidi_log2vis(logical, len, &base, visual, NULL, NULL, NULL))
    {
      // Removes bidirectional marks
      //len = fribidi_remove_bidi_marks(visual, len, NULL, NULL, NULL);

      // Apperently a string can get longer during this transformation
      // so make sure we allocate the maximum possible character utf8
      // can generate atleast, should cover all bases
      char *result = strDest.GetBuffer(len*4);

      // Convert back from Unicode to the charset
      int len2 = fribidi_unicode_to_charset(fribidiCharset, visual, len, result);
      ASSERT(len2 <= len*4);
      strDest.ReleaseBuffer();

      resultString += strDest;
    }

    free(logical);
    free(visual);
  }

  strDest = resultString;
}
예제 #17
0
void CCharsetConverter::utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest)
{
  if (m_iconvUtf32ToStringCharset == (iconv_t) - 1)
  {
    CStdString strCharset=g_langInfo.GetGuiCharSet();
    m_iconvUtf32ToStringCharset = iconv_open(strCharset.c_str(), "UTF-32LE");
  }

  if (m_iconvUtf32ToStringCharset != (iconv_t) - 1)
  {
    const unsigned long* ptr=strSource;
    while (*ptr) ptr++;
    const char* src = (const char*) strSource;
    size_t inBytes = (ptr-strSource+1)*4;

    char *dst = strDest.GetBuffer(inBytes);
    size_t outBytes = inBytes;

    if (iconv_const(m_iconvUtf32ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = (const char *)strSource;
      return;
    }

    if (iconv(m_iconvUtf32ToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1)
    {
      CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
      strDest.ReleaseBuffer();
      strDest = (const char *)strSource;
      return;
    }

    strDest.ReleaseBuffer();
  }
}