void StringTestCase::testConversion() { checkConversion(Sid::String(), ""); checkConversion(Sid::String(4), ""); checkConversion(""); checkConversion("foo"); }
int main(int, char **) { initTest(); // validator FuseZipData::validateFileName("normal.name"); FuseZipData::validateFileName("path/to/normal.name"); FuseZipData::validateFileName(".hidden"); FuseZipData::validateFileName("path/to/.hidden"); FuseZipData::validateFileName("path/to/.hidden/dir"); FuseZipData::validateFileName("..superhidden"); FuseZipData::validateFileName("path/to/..superhidden"); FuseZipData::validateFileName("path/to/..superhidden/dir"); checkValidationException("", "empty file name"); checkValidationException("moo//moo", "bad file name (two slashes): "); // converter checkConversion("normal.name", true, false, "normal.name"); checkConversion("normal.name", true, true, "CUR/normal.name"); checkConversion("path/to/normal.name", true, false, "path/to/normal.name"); checkConversion("path/to/normal.name", true, true, "CUR/path/to/normal.name"); checkConvertException(".", "bad file name: ", true, false); checkConvertException("./", "bad file name: ", true, false); checkConvertException("abc/./cde", "bad file name: ", true, false); checkConvertException("abc/.", "bad file name: ", true, false); checkConversion(".hidden", false, false, ".hidden"); checkConversion("path/to/.hidden", false, false, "path/to/.hidden"); checkConversion("path/to/.hidden/dir", false, false, "path/to/.hidden/dir"); checkConvertException(".", "bad file name: .", false, true); checkConvertException(".", "bad file name: .", true, true); checkConvertException("/.", "bad file name: /.", true, true); checkConvertException("./", "bad file name: ./", false, false); checkConvertException("./", "bad file name: ./", true, false); checkConvertException("..", "bad file name: ..", false, true); checkConvertException("../", "paths relative to parent directory are not supported", false, true); checkConversion("../", true, true, "UP/"); checkConversion("../../../", true, true, "UPUPUP/"); checkConvertException("/..", "bad file name: /..", true, true); checkConvertException("/../blah", "bad file name: /../blah", true, true); checkConversion("../abc", true, true, "UP/abc"); checkConversion("../../../abc", true, true, "UPUPUP/abc"); checkConvertException("abc/../cde", "bad file name: ", false, false); checkConvertException("abc/../cde", "bad file name: ", true, true); checkConvertException("abc/..", "bad file name: ", false, false); checkConvertException("abc/..", "bad file name: ", true, true); checkConvertException("../abc/..", "bad file name: ", true, true); checkConvertException("/", "absolute paths are not supported in read-write mode", false, false); checkConvertException("/rootname", "absolute paths are not supported in read-write mode", false, false); checkConversion("/", true, true, "ROOT/"); checkConversion("/rootname", true, true, "ROOT/rootname"); checkConversion("/path/name", true, true, "ROOT/path/name"); return EXIT_SUCCESS; }
bool CCharsetDetection::ConvertPlainTextToUtf8(const std::string& textContent, std::string& converted, const std::string& serverReportedCharset, std::string& usedCharset) { converted.clear(); usedCharset.clear(); if (textContent.empty()) { usedCharset = "UTF-8"; // any charset can be used for empty content, use UTF-8 as default return true; } // try to get charset from Byte Order Mark std::string bomCharset(GetBomEncoding(textContent)); if (checkConversion(bomCharset, textContent, converted)) { usedCharset = bomCharset; return true; } // try charset from HTTP header (or from other out-of-band source) if (checkConversion(serverReportedCharset, textContent, converted)) { usedCharset = serverReportedCharset; return true; } // try UTF-8 if not tried before if (bomCharset != "UTF-8" && serverReportedCharset != "UTF-8" && checkConversion("UTF-8", textContent, converted)) { usedCharset = "UTF-8"; return true; } // try user charset std::string userCharset(g_langInfo.GetGuiCharSet()); if (checkConversion(userCharset, textContent, converted)) { usedCharset = userCharset; return true; } // try system default charset if (g_charsetConverter.systemToUtf8(textContent, converted, true)) { usedCharset = "char"; // synonym to system charset return true; } // try WINDOWS-1252 if (checkConversion("WINDOWS-1252", textContent, converted)) { usedCharset = "WINDOWS-1252"; return true; } // can't find correct charset // use one of detected as fallback if (!serverReportedCharset.empty()) usedCharset = serverReportedCharset; else if (!bomCharset.empty()) usedCharset = bomCharset; else if (!userCharset.empty()) usedCharset = userCharset; else usedCharset = "WINDOWS-1252"; CLog::Log(LOGWARNING, "%s: Can't correctly convert to UTF-8 charset, converting as \"%s\"", __FUNCTION__, usedCharset.c_str()); g_charsetConverter.ToUtf8(usedCharset, textContent, converted, false); return false; }
void checkConversion(const char* expected) { checkConversion(Sid::String(expected), expected); }
bool CCharsetDetection::ConvertHtmlToUtf8(const std::string& htmlContent, std::string& converted, const std::string& serverReportedCharset, std::string& usedHtmlCharset) { converted.clear(); usedHtmlCharset.clear(); if (htmlContent.empty()) { usedHtmlCharset = "UTF-8"; // any charset can be used for empty content, use UTF-8 as default return false; } // this is relaxed implementation of http://www.w3.org/TR/2013/CR-html5-20130806/single-page.html#determining-the-character-encoding // try to get charset from Byte Order Mark std::string bomCharset(GetBomEncoding(htmlContent)); if (checkConversion(bomCharset, htmlContent, converted)) { usedHtmlCharset = bomCharset; return true; } // try charset from HTTP header (or from other out-of-band source) if (checkConversion(serverReportedCharset, htmlContent, converted)) { usedHtmlCharset = serverReportedCharset; return true; } // try to find charset in HTML std::string declaredCharset(GetHtmlEncodingFromHead(htmlContent)); if (!declaredCharset.empty()) { if (declaredCharset.compare(0, 3, "UTF", 3) == 0) declaredCharset = "UTF-8"; // charset string was found in singlebyte mode, charset can't be multibyte encoding if (checkConversion(declaredCharset, htmlContent, converted)) { usedHtmlCharset = declaredCharset; return true; } } // try UTF-8 if not tried before if (bomCharset != "UTF-8" && serverReportedCharset != "UTF-8" && declaredCharset != "UTF-8" && checkConversion("UTF-8", htmlContent, converted)) { usedHtmlCharset = "UTF-8"; return false; // only guessed value } // try user charset std::string userCharset(g_langInfo.GetGuiCharSet()); if (checkConversion(userCharset, htmlContent, converted)) { usedHtmlCharset = userCharset; return false; // only guessed value } // try WINDOWS-1252 if (checkConversion("WINDOWS-1252", htmlContent, converted)) { usedHtmlCharset = "WINDOWS-1252"; return false; // only guessed value } // can't find exact charset // use one of detected as fallback if (!bomCharset.empty()) usedHtmlCharset = bomCharset; else if (!serverReportedCharset.empty()) usedHtmlCharset = serverReportedCharset; else if (!declaredCharset.empty()) usedHtmlCharset = declaredCharset; else if (!userCharset.empty()) usedHtmlCharset = userCharset; else usedHtmlCharset = "WINDOWS-1252"; CLog::Log(LOGWARNING, "%s: Can't correctly convert to UTF-8 charset, converting as \"%s\"", __FUNCTION__, usedHtmlCharset.c_str()); g_charsetConverter.ToUtf8(usedHtmlCharset, htmlContent, converted, false); return false; }