예제 #1
0
void StringTestCase::testConversion()
{
    checkConversion(Sid::String(), "");
    checkConversion(Sid::String(4), "");
    checkConversion("");
    checkConversion("foo");

}
예제 #2
0
int main(int, char **) {
    initTest();

    // validator
    FuseZipData::validateFileName("normal.name");
    FuseZipData::validateFileName("path/to/normal.name");

    FuseZipData::validateFileName(".hidden");
    FuseZipData::validateFileName("path/to/.hidden");
    FuseZipData::validateFileName("path/to/.hidden/dir");

    FuseZipData::validateFileName("..superhidden");
    FuseZipData::validateFileName("path/to/..superhidden");
    FuseZipData::validateFileName("path/to/..superhidden/dir");

    checkValidationException("", "empty file name");
    checkValidationException("moo//moo", "bad file name (two slashes): ");

    // converter
    checkConversion("normal.name", true, false, "normal.name");
    checkConversion("normal.name", true, true, "CUR/normal.name");
    checkConversion("path/to/normal.name", true, false, "path/to/normal.name");
    checkConversion("path/to/normal.name", true, true, "CUR/path/to/normal.name");

    checkConvertException(".", "bad file name: ", true, false);
    checkConvertException("./", "bad file name: ", true, false);
    checkConvertException("abc/./cde", "bad file name: ", true, false);
    checkConvertException("abc/.", "bad file name: ", true, false);

    checkConversion(".hidden", false, false, ".hidden");
    checkConversion("path/to/.hidden", false, false, "path/to/.hidden");
    checkConversion("path/to/.hidden/dir", false, false, "path/to/.hidden/dir");

    checkConvertException(".", "bad file name: .", false, true);
    checkConvertException(".", "bad file name: .", true, true);
    checkConvertException("/.", "bad file name: /.", true, true);
    checkConvertException("./", "bad file name: ./", false, false);
    checkConvertException("./", "bad file name: ./", true, false);

    checkConvertException("..", "bad file name: ..", false, true);
    checkConvertException("../", "paths relative to parent directory are not supported", false, true);
    checkConversion("../", true, true, "UP/");
    checkConversion("../../../", true, true, "UPUPUP/");

    checkConvertException("/..", "bad file name: /..", true, true);
    checkConvertException("/../blah", "bad file name: /../blah", true, true);

    checkConversion("../abc", true, true, "UP/abc");
    checkConversion("../../../abc", true, true, "UPUPUP/abc");

    checkConvertException("abc/../cde", "bad file name: ", false, false);
    checkConvertException("abc/../cde", "bad file name: ", true, true);
    checkConvertException("abc/..", "bad file name: ", false, false);
    checkConvertException("abc/..", "bad file name: ", true, true);
    checkConvertException("../abc/..", "bad file name: ", true, true);

    checkConvertException("/", "absolute paths are not supported in read-write mode", false, false);
    checkConvertException("/rootname", "absolute paths are not supported in read-write mode", false, false);

    checkConversion("/", true, true, "ROOT/");
    checkConversion("/rootname", true, true, "ROOT/rootname");
    checkConversion("/path/name", true, true, "ROOT/path/name");

    return EXIT_SUCCESS;
}
예제 #3
0
bool CCharsetDetection::ConvertPlainTextToUtf8(const std::string& textContent, std::string& converted, const std::string& serverReportedCharset, std::string& usedCharset)
{
  converted.clear();
  usedCharset.clear();
  if (textContent.empty())
  {
    usedCharset = "UTF-8"; // any charset can be used for empty content, use UTF-8 as default
    return true;
  }

  // try to get charset from Byte Order Mark
  std::string bomCharset(GetBomEncoding(textContent));
  if (checkConversion(bomCharset, textContent, converted))
  {
    usedCharset = bomCharset;
    return true;
  }

  // try charset from HTTP header (or from other out-of-band source)
  if (checkConversion(serverReportedCharset, textContent, converted))
  {
    usedCharset = serverReportedCharset;
    return true;
  }

  // try UTF-8 if not tried before
  if (bomCharset != "UTF-8" && serverReportedCharset != "UTF-8" && checkConversion("UTF-8", textContent, converted))
  {
    usedCharset = "UTF-8";
    return true;
  }

  // try user charset
  std::string userCharset(g_langInfo.GetGuiCharSet());
  if (checkConversion(userCharset, textContent, converted))
  {
    usedCharset = userCharset;
    return true;
  }

  // try system default charset
  if (g_charsetConverter.systemToUtf8(textContent, converted, true))
  {
    usedCharset = "char"; // synonym to system charset
    return true;
  }

  // try WINDOWS-1252
  if (checkConversion("WINDOWS-1252", textContent, converted))
  {
    usedCharset = "WINDOWS-1252";
    return true;
  }

  // can't find correct charset
  // use one of detected as fallback
  if (!serverReportedCharset.empty())
    usedCharset = serverReportedCharset;
  else if (!bomCharset.empty())
    usedCharset = bomCharset;
  else if (!userCharset.empty())
    usedCharset = userCharset;
  else
    usedCharset = "WINDOWS-1252";

  CLog::Log(LOGWARNING, "%s: Can't correctly convert to UTF-8 charset, converting as \"%s\"", __FUNCTION__, usedCharset.c_str());
  g_charsetConverter.ToUtf8(usedCharset, textContent, converted, false);

  return false;
}
예제 #4
0
void checkConversion(const char* expected)
{
    checkConversion(Sid::String(expected), expected);
}
예제 #5
0
bool CCharsetDetection::ConvertHtmlToUtf8(const std::string& htmlContent, std::string& converted, const std::string& serverReportedCharset, std::string& usedHtmlCharset)
{
  converted.clear();
  usedHtmlCharset.clear();
  if (htmlContent.empty())
  {
    usedHtmlCharset = "UTF-8"; // any charset can be used for empty content, use UTF-8 as default
    return false;
  }
  
  // this is relaxed implementation of http://www.w3.org/TR/2013/CR-html5-20130806/single-page.html#determining-the-character-encoding

  // try to get charset from Byte Order Mark
  std::string bomCharset(GetBomEncoding(htmlContent));
  if (checkConversion(bomCharset, htmlContent, converted))
  {
    usedHtmlCharset = bomCharset;
    return true;
  }

  // try charset from HTTP header (or from other out-of-band source)
  if (checkConversion(serverReportedCharset, htmlContent, converted))
  {
    usedHtmlCharset = serverReportedCharset;
    return true;
  }

  // try to find charset in HTML
  std::string declaredCharset(GetHtmlEncodingFromHead(htmlContent));
  if (!declaredCharset.empty())
  {
    if (declaredCharset.compare(0, 3, "UTF", 3) == 0)
      declaredCharset = "UTF-8"; // charset string was found in singlebyte mode, charset can't be multibyte encoding
    if (checkConversion(declaredCharset, htmlContent, converted))
    {
      usedHtmlCharset = declaredCharset;
      return true;
    }
  }

  // try UTF-8 if not tried before
  if (bomCharset != "UTF-8" && serverReportedCharset != "UTF-8" && declaredCharset != "UTF-8" && checkConversion("UTF-8", htmlContent, converted))
  {
    usedHtmlCharset = "UTF-8";
    return false; // only guessed value
  }

  // try user charset
  std::string userCharset(g_langInfo.GetGuiCharSet());
  if (checkConversion(userCharset, htmlContent, converted))
  {
    usedHtmlCharset = userCharset;
    return false; // only guessed value
  }

  // try WINDOWS-1252
  if (checkConversion("WINDOWS-1252", htmlContent, converted))
  {
    usedHtmlCharset = "WINDOWS-1252";
    return false; // only guessed value
  }

  // can't find exact charset
  // use one of detected as fallback
  if (!bomCharset.empty())
    usedHtmlCharset = bomCharset;
  else if (!serverReportedCharset.empty())
    usedHtmlCharset = serverReportedCharset;
  else if (!declaredCharset.empty())
    usedHtmlCharset = declaredCharset;
  else if (!userCharset.empty())
    usedHtmlCharset = userCharset;
  else
    usedHtmlCharset = "WINDOWS-1252";

  CLog::Log(LOGWARNING, "%s: Can't correctly convert to UTF-8 charset, converting as \"%s\"", __FUNCTION__, usedHtmlCharset.c_str());
  g_charsetConverter.ToUtf8(usedHtmlCharset, htmlContent, converted, false);

  return false;
}