void CCharsetConverter::utf8ToStringCharset(const CStdStringA& strSource, CStdStringA& strDest) { if (m_iconvUtf8ToStringCharset == (iconv_t) - 1) { CStdString strCharset=g_langInfo.GetGuiCharSet(); m_iconvUtf8ToStringCharset = iconv_open(strCharset.c_str(), UTF8_SOURCE); } if (m_iconvUtf8ToStringCharset != (iconv_t) - 1) { const char* src = strSource.c_str(); size_t inBytes = strSource.length() + 1; char *dst = strDest.GetBuffer(inBytes); size_t outBytes = inBytes - 1; if (iconv_const(m_iconvUtf8ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t) -1) { strDest.ReleaseBuffer(); // For some reason it failed (maybe wrong charset?). Nothing to do but // return the original.. strDest = strSource; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::utf16LEtoUTF8(const void *strSource, CStdStringA &strDest) { if (m_iconvUtf16LEtoUtf8 == (iconv_t) - 1) m_iconvUtf16LEtoUtf8 = iconv_open("UTF-8", "UTF-16LE"); if (m_iconvUtf16LEtoUtf8 != (iconv_t) - 1) { size_t inBytes = 2; uint16_t *s = (uint16_t *)strSource; while (*s != 0) { s++; inBytes += 2; } // UTF-8 is up to 4 bytes/character, or up to twice the length of UTF-16 size_t outBytes = inBytes * 2; const char *src = (const char *)strSource; char *dst = strDest.GetBuffer(outBytes); if (iconv_const(m_iconvUtf16LEtoUtf8, &src, &inBytes, &dst, &outBytes) == (size_t)-1) { // failed :( strDest.clear(); strDest.ReleaseBuffer(); return; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::utf8ToStringCharset(const CStdStringA& strSource, CStdStringA& strDest) { if (m_iconvUtf8ToStringCharset == (iconv_t) - 1) { CStdString strCharset=g_langInfo.GetGuiCharSet(); m_iconvUtf8ToStringCharset = iconv_open(strCharset.c_str(), UTF8_SOURCE); } if (m_iconvUtf8ToStringCharset != (iconv_t) - 1) { size_t inBytes = strSource.length() + 1; size_t outBytes = strSource.length() + 1; const char *src = strSource.c_str(); char *dst = strDest.GetBuffer(inBytes); if (iconv_const(m_iconvUtf8ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } if (iconv_const(m_iconvUtf8ToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest) { if (m_iconvUtf32ToStringCharset == (iconv_t) - 1) { CStdString strCharset=g_langInfo.GetGuiCharSet(); m_iconvUtf32ToStringCharset = iconv_open(strCharset.c_str(), "UTF-32LE"); } if (m_iconvUtf32ToStringCharset != (iconv_t) - 1) { const unsigned long* ptr=strSource; while (*ptr) ptr++; const char* src = (const char*) strSource; size_t inBytes = (ptr-strSource+1)*4; char *dst = strDest.GetBuffer(inBytes); size_t outBytes = inBytes; if (iconv_const(m_iconvUtf32ToStringCharset, &src, &inBytes, &dst, &outBytes)) { strDest.ReleaseBuffer(); // For some reason it failed (maybe wrong charset?). Nothing to do but // return the original.. strDest = (const char *)strSource; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::utf16BEtoUTF8(const CStdStringW& strSource, CStdStringA &strDest) { if (m_iconvUtf16BEtoUtf8 == (iconv_t) - 1) m_iconvUtf16BEtoUtf8 = iconv_open("UTF-8", "UTF-16BE"); if (m_iconvUtf16BEtoUtf8 != (iconv_t) - 1) { size_t inBytes = (strSource.length() + 1) * sizeof(wchar_t); size_t outBytes = (strSource.length() + 1) * 4; const char *src = (const char*) strSource.c_str(); char *dst = strDest.GetBuffer(outBytes); if (iconv_const(m_iconvUtf16BEtoUtf8, &src, &inBytes, &dst, &outBytes)) { CLog::Log(LOGERROR, "%s failed", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } if (iconv(m_iconvUtf16BEtoUtf8, NULL, NULL, &dst, &outBytes)) { CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::stringCharsetToUtf8(const CStdStringA& strSourceCharset, const CStdStringA& strSource, CStdStringA& strDest) { iconv_t iconvString=iconv_open("UTF-8", strSourceCharset.c_str()); if (iconvString != (iconv_t) - 1) { size_t inBytes = (strSource.length() + 1); size_t outBytes = (strSource.length() + 1) * 4; const char *src = strSource.c_str(); char *dst = strDest.GetBuffer(outBytes); if (iconv_const(iconvString, &src, &inBytes, &dst, &outBytes) == (size_t) -1) { CLog::Log(LOGERROR, "%s failed", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } if (iconv(iconvString, NULL, NULL, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); iconv_close(iconvString); } }
void CCharsetConverter::stringCharsetToUtf8(const CStdStringA& strSourceCharset, const CStdStringA& strSource, CStdStringA& strDest) { iconv_t iconvString=iconv_open("UTF-8", strSourceCharset.c_str()); if (iconvString != (iconv_t) - 1) { const char* src = strSource.c_str(); size_t inBytes = strSource.length() + 1; size_t outBytes = (inBytes * 4) + 1; char *dst = strDest.GetBuffer(outBytes); if (iconv_const(iconvString, &src, &inBytes, &dst, &outBytes) == (size_t) -1) { strDest.ReleaseBuffer(); // For some reason it failed (maybe wrong charset?). Nothing to do but // return the original.. strDest = strSource; return ; } strDest.ReleaseBuffer(); iconv_close(iconvString); } }
CStdString DocReader::ReadASCIIString(int iOffset, int iCharCount) { CStdStringA sResult; ReadIntoBufferIgnoringBitsInDeletedList(iOffset, iCharCount, sResult.GetBuffer(iCharCount + 1), iCharCount+2); sResult.ReleaseBuffer(); return sResult; }
void TestSnapshotSaver::TestSaveBufferToTempFile() { const CStdString sDestFileName( SnapshotSaver::SaveBufferToTempFile( _T("This is silly data") ) ); CStdStringA sRtfBuffer; std::ifstream file( sDestFileName ); file.get( sRtfBuffer.GetBuffer(100), 100 ); sRtfBuffer.ReleaseBuffer(); ::DeleteFile( sDestFileName.c_str() ); assertTest( sRtfBuffer = _T("This is silly data") ); }
void CCharsetConverter::ucs2CharsetToStringCharset(const CStdStringW& strSource, CStdStringA& strDest, bool swap) { if (m_iconvUcs2CharsetToStringCharset == (iconv_t) - 1) { CStdString strCharset=g_langInfo.GetGuiCharSet(); m_iconvUcs2CharsetToStringCharset = iconv_open(strCharset.c_str(), "UTF-16LE"); } if (m_iconvUcs2CharsetToStringCharset != (iconv_t) - 1) { CStdStringW strCopy = strSource; size_t inBytes = (strCopy.length() + 1) * sizeof(wchar_t); size_t outBytes = (strCopy.length() + 1) * 4; const char *src = (const char*)strCopy.c_str(); char *dst = strDest.GetBuffer(inBytes); if (swap) { char* s = (char*) src; while (*s || *(s + 1)) { char c = *s; *s = *(s + 1); *(s + 1) = c; s++; s++; } } if (iconv_const(m_iconvUcs2CharsetToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } if (iconv_const(m_iconvUcs2CharsetToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__); strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); } }
CStdString CWordBinaryMetadataDiscoveryWorker::ConvertPropertyFromUTF8(CStdStringA sData) { CStdString sOut; int iIncrement = 0; int iPos = 0; unsigned char* p = (unsigned char *)sData.GetBuffer(-1); while (iPos < sData.GetLength()) { // *p z y x w v u if (*p <= 127) // 1 byte { sOut += *p; iIncrement = 1;; } else if (*p >= 192 && *p <= 223) // 2 bytes { // (z-192)*64 + (y-128) sOut += (64 * (*p - 192)) + *(p+1) - 128; iIncrement = 2; } else if (*p >= 224 && *p <= 239) // 3 bytes { // (z-224)*4096 + (y-128)*64 + (x-128) sOut += (4096 * (*p - 224)) + (64 * (*(p+1) - 128)) + *(p+2) - 128; iIncrement = 3; } //else if (*p >= 240 && *p <= 247) // 4 bytes //{ // (z-240)*262144 + (y-128)*4096 + (x-128)*64 + (w-128) // sOut += (262144 * (*p - 240)) + (4096 * (*(p+1) - 128)) + (64 * (*(p+2) - 128)) + *(p+3) - 128; // iIncrement = 4; //} //else if (*p >= 248 && *p <= 251) // 5 bytes //{ // (z-248)*16777216 + (y-128)*262144 + (x-128)*4096 + (w-128)*64 + (v-128) // sOut += (16777216 * (*p - 248)) + (262144 * (*(p+1) - 128)) + (4096 * (*(p+2) - 128)) + (64 * (*(p+3) - 128)) + *(p+4) - 128; // iIncrement = 5; //} //else if (*p >= 252 && *p <= 253) // 6 bytes //{ // (z-252)*1073741824 + (y-128)*16777216 + (x-128)*262144 + (w-128)*4096 + (v-128)*64 + (u-128) // sOut += (1073741824 * (*p - 252)) + (16777216 * (*(p+1) - 128)) + (262144 * (*(p+2) - 128)) + (4096 * (*(p+3) - 128)) + (64 * (*(p+4) - 128)) + *(p+5) - 128; // iIncrement = 6; //} else { // error - leave as was return sData; } p += iIncrement; iPos += iIncrement; } return sOut; }
void CCharsetConverter::utf16LEtoUTF8(const CStdStringW& strSource, CStdStringA &strDest) { if (m_iconvUtf16LEtoUtf8 == (iconv_t) - 1) m_iconvUtf16LEtoUtf8 = iconv_open("UTF-8", "UTF-16LE"); if (m_iconvUtf16LEtoUtf8 != (iconv_t) - 1) { const char* src = (const char*) strSource.c_str(); size_t inBytes = (strSource.length() + 1)*sizeof(wchar_t); size_t outBytes = (inBytes + 1)*sizeof(wchar_t); // UTF-8 is up to 4 bytes/character char *dst = strDest.GetBuffer(outBytes); if (iconv_const(m_iconvUtf16LEtoUtf8, &src, &inBytes, &dst, &outBytes)) { // failed :( strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::ucs2CharsetToStringCharset(const CStdStringW& strSource, CStdStringA& strDest, bool swap) { if (m_iconvUcs2CharsetToStringCharset == (iconv_t) - 1) { CStdString strCharset=g_langInfo.GetGuiCharSet(); m_iconvUcs2CharsetToStringCharset = iconv_open(strCharset.c_str(), "UTF-16LE"); } if (m_iconvUcs2CharsetToStringCharset != (iconv_t) - 1) { const char* src = (const char*) strSource.c_str(); size_t inBytes = (strSource.length() + 1) * sizeof(wchar_t); if (swap) { char* s = (char*) src; while (*s || *(s + 1)) { char c = *s; *s = *(s + 1); *(s + 1) = c; s++; s++; } } char *dst = strDest.GetBuffer(inBytes); size_t outBytes = inBytes; if (iconv_const(m_iconvUcs2CharsetToStringCharset, &src, &inBytes, &dst, &outBytes)) { strDest.ReleaseBuffer(); // For some reason it failed (maybe wrong charset?). Nothing to do but // return the original.. strDest = strSource; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::wToUTF8(const CStdStringW& strSource, CStdStringA &strDest) { if (m_iconvWtoUtf8 == (iconv_t) - 1) m_iconvWtoUtf8 = iconv_open("UTF-8", WCHAR_CHARSET); if (m_iconvWtoUtf8 != (iconv_t) - 1) { const char* src = (const char*) strSource.c_str(); size_t inBytes = (strSource.length() + 1) * sizeof(wchar_t); size_t outBytes = (inBytes + 1)*sizeof(wchar_t); // some free for UTF-8 (up to 4 bytes/char) char *dst = strDest.GetBuffer(outBytes); if (iconv_const(m_iconvWtoUtf8, &src, &inBytes, &dst, &outBytes)) { // failed :( CLog::Log(LOGERROR, "CCharsetConverter::wToUTF8 failed for subtitle."); strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::ucs2ToUTF8(const CStdStringW& strSource, CStdStringA& strDest) { if (m_iconvUcs2CharsetToUtf8 == (iconv_t) - 1) m_iconvUcs2CharsetToUtf8 = iconv_open("UTF-8", "UCS-2LE"); if (m_iconvUcs2CharsetToUtf8 != (iconv_t) - 1) { const char* src = (const char*) strSource.c_str(); size_t inBytes = (strSource.length() + 1)*2; size_t outBytes = (inBytes + 1)*2; // some free for UTF-8 (up to 4 bytes/char) char *dst = strDest.GetBuffer(outBytes); if (iconv_const(m_iconvUcs2CharsetToUtf8, &src, &inBytes, &dst, &outBytes) == (size_t) -1) { // failed :( CLog::Log(LOGERROR, "CCharsetConverter::ucs2ToUTF8 failed for Python with errno=%d", errno); strDest.ReleaseBuffer(); strDest = strSource; return; } strDest.ReleaseBuffer(); } }
void CCharsetConverter::logicalToVisualBiDi(const CStdStringA& strSource, CStdStringA& strDest, FriBidiCharSet fribidiCharset, FriBidiCharType base) { vector<CStdString> lines; CUtil::Tokenize(strSource, lines, "\n"); CStdString resultString; for (unsigned int i = 0; i < lines.size(); i++) { int sourceLen = lines[i].length(); FriBidiChar* logical = (FriBidiChar*) malloc((sourceLen + 1) * sizeof(FriBidiChar)); FriBidiChar* visual = (FriBidiChar*) malloc((sourceLen + 1) * sizeof(FriBidiChar)); // Convert from the selected charset to Unicode int len = fribidi_charset_to_unicode(fribidiCharset, (char*) lines[i].c_str(), sourceLen, logical); if (fribidi_log2vis(logical, len, &base, visual, NULL, NULL, NULL)) { // Removes bidirectional marks //len = fribidi_remove_bidi_marks(visual, len, NULL, NULL, NULL); // Apperently a string can get longer during this transformation // so make sure we allocate the maximum possible character utf8 // can generate atleast, should cover all bases char *result = strDest.GetBuffer(len*4); // Convert back from Unicode to the charset int len2 = fribidi_unicode_to_charset(fribidiCharset, visual, len, result); ASSERT(len2 <= len*4); strDest.ReleaseBuffer(); resultString += strDest; } free(logical); free(visual); } strDest = resultString; }
void CCharsetConverter::utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest) { if (m_iconvUtf32ToStringCharset == (iconv_t) - 1) { CStdString strCharset=g_langInfo.GetGuiCharSet(); m_iconvUtf32ToStringCharset = iconv_open(strCharset.c_str(), "UTF-32LE"); } if (m_iconvUtf32ToStringCharset != (iconv_t) - 1) { const unsigned long* ptr=strSource; while (*ptr) ptr++; const char* src = (const char*) strSource; size_t inBytes = (ptr-strSource+1)*4; char *dst = strDest.GetBuffer(inBytes); size_t outBytes = inBytes; if (iconv_const(m_iconvUtf32ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed", __FUNCTION__); strDest.ReleaseBuffer(); strDest = (const char *)strSource; return; } if (iconv(m_iconvUtf32ToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1) { CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__); strDest.ReleaseBuffer(); strDest = (const char *)strSource; return; } strDest.ReleaseBuffer(); } }