コード例 #1
0
ファイル: test_ext_fb.cpp プロジェクト: Neomeng/hiphop-php
bool TestExtFb::test_fb_utf8ize() {
  {
    Variant s = "hon\xE7k";
    VERIFY(f_fb_utf8ize(ref(s)));
    VS(s, "honk");
  }
  {
    Variant s = "test\xE0\xB0\xB1\xE0";
    VERIFY(f_fb_utf8ize(ref(s)));
    VS(s, "test\xE0\xB0\xB1");
  }
  {
    Variant s = "test\xE0\xB0\xB1\xE0\xE0";
    VERIFY(f_fb_utf8ize(ref(s)));
    VS(s, "test\xE0\xB0\xB1");
  }
  return Count(true);
}
コード例 #2
0
ファイル: test_ext_fb.cpp プロジェクト: AviMoto/hiphop-php
bool TestExtFb::test_fb_utf8_strlen() {
  VS(f_fb_utf8_strlen(""), 0);
  VS(f_fb_utf8_strlen("a"), 1);
  VS(f_fb_utf8_strlen("ab"), 2);
  // Valid UTF-8 sequence returns code point count.
  VS(f_fb_utf8_strlen("\ub098\ub294"), 2);
  VS(f_fb_utf8_strlen(INVALID_UTF_8_STRING), 2);
  for (int i = 0; i < 2; i++) {
    // Test utf8ize() handling of invalid UTF-8 sequences and how
    // fb_utf8_strlen() counts them.
    // RuntimeOption::Utf8izeReplace set to non-zero value replaces invalid
    // bytes, including '\0' with a special UTF-8 code point: "\uFFFD".
    // RuntimeOption::Utf8izeReplace set to zero deletes the invalid
    // byte then continues parsing.
    RuntimeOption::Utf8izeReplace = (i == 0);
    {
      Variant s = String("abc\0def", 7, AttachLiteral);
      VS(s.toString().size(), 7);
      VS(f_fb_utf8_strlen(s), 7);

      f_fb_utf8ize(ref(s)); // Modifies s
      int ret = s.toString().size();
      if (RuntimeOption::Utf8izeReplace) {
        VS(ret, 9); // '\0' converted to "\uFFFD"
      } else {
        VS(ret, 6); // '\0' deleted from s
      }
      ret = f_fb_utf8_strlen(s);
      if (RuntimeOption::Utf8izeReplace) {
        VS(ret, 7); // '\0' and "\uFFFD" are both one code point, so no change
      } else {
        VS(ret, 6); // '\0' deleted, so one fewer code point
      }
    }
  }
  return Count(true);
}
コード例 #3
0
ファイル: test_ext_fb.cpp プロジェクト: AviMoto/hiphop-php
bool TestExtFb::test_fb_utf8ize() {
  for (int i = 0; i < 2; i++) {
    RuntimeOption::Utf8izeReplace = (i == 0);
    {
      Variant s = "hon\xE7k";
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "hon\uFFFDk");
      } else {
        VS(s, "honk");
      }
    }
    {
      Variant s = "test\xE0\xB0\xB1\xE0";
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "test\xE0\xB0\xB1\uFFFD");
      } else {
        VS(s, "test\xE0\xB0\xB1");
      }
    }
    {
      Variant s = "test\xE0\xB0\xB1\xE0\xE0";
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "test\xE0\xB0\xB1\uFFFD\uFFFD");
      } else {
        VS(s, "test\xE0\xB0\xB1");
      }
    }
    {
      Variant s = "\xfc";
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "\uFFFD");
      } else {
        VS(s, "");
      }
    }
    {
      Variant s = "\xfc\xfc";
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "\uFFFD\uFFFD");
      } else {
        VS(s, "");
      }
    }
    {
      // We intentionally consider null bytes invalid sequences.
      Variant s = String("abc\0def", 7, AttachLiteral);
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "abc\uFFFD""def");
      } else {
        VS(s, "abcdef");
      }
    }
    {
      // ICU treats this as as two code points.
      // The old implementation treated this as three code points.
      Variant s = INVALID_UTF_8_STRING;
      VERIFY(f_fb_utf8ize(ref(s)));
      if (RuntimeOption::Utf8izeReplace) {
        VS(s, "\uFFFD""\x28");
      } else {
        VS(s, "\x28");
      }
    }
  }
  return Count(true);
}
コード例 #4
0
bool TestExtString::test_htmlspecialchars() {
  VS(f_htmlspecialchars("<a href='test'>Test</a>", k_ENT_QUOTES),
     "&lt;a href=&#039;test&#039;&gt;Test&lt;/a&gt;");

  VS(f_bin2hex(f_htmlspecialchars("\xA0", k_ENT_COMPAT)), "a0");
  VS(f_bin2hex(f_htmlspecialchars("\xc2\xA0", k_ENT_COMPAT, "")), "c2a0");
  VS(f_bin2hex(f_htmlspecialchars("\xc2\xA0", k_ENT_COMPAT, "UTF-8")), "c2a0");
  String zfoo = String("\0foo", 4, AttachLiteral);
  VS(f_htmlspecialchars(zfoo, k_ENT_COMPAT), zfoo);
  VS(f_fb_htmlspecialchars(zfoo, k_ENT_COMPAT), zfoo);

  VS(f_fb_htmlspecialchars("abcdef'\"{}@gz", k_ENT_QUOTES,
                           "", Array::Create("z")),
     "abcdef&#039;&quot;&#123;&#125;&#064;g&#122;");

  VS(f_fb_htmlspecialchars("abcdef'\"\u00a1\uabcd", k_ENT_FB_UTF8,
                           "", Array::Create("d")),
     "abc&#100;ef&#039;&quot;&#xa1;&#xabcd;");

  VS(f_fb_htmlspecialchars("abcdef'\"\u00a1\uabcd", k_ENT_FB_UTF8_ONLY,
                           "", Array::Create("d")),
     "abcdef'\"&#xa1;&#xabcd;");

  String input =
    "\u00a1\xc2\x41"
    "\u0561\xd5\xe0"
    "\u3862\xe3\x80\xf0"
    "\U000218a3\xf0\xa1\xa2\x41"
    "hello\x80world"
    "\xed\xa0\x80"
    "\xe0\x80\xbc"
    "\xc2";

  bool s = RuntimeOption::Utf8izeReplace;
  RuntimeOption::Utf8izeReplace = false;
  Variant tmp = input;
  f_fb_utf8ize(ref(tmp));
  String sanitized = tmp.toString();

  VS(f_fb_htmlspecialchars(input, k_ENT_QUOTES, "", Array()), sanitized.data());

  VS(f_fb_htmlspecialchars(input,
                           k_ENT_FB_UTF8, "", Array()),
     "&#xa1;A"
     "&#x561;"
     "&#x3862;"
     "&#x218a3;A"
     "helloworld");

  VS(f_fb_htmlspecialchars(sanitized, k_ENT_QUOTES, "", Array()),
     sanitized.data());

  VS(f_fb_htmlspecialchars(zfoo, k_ENT_COMPAT, "UTF-8"), "foo");

  RuntimeOption::Utf8izeReplace = true;
  tmp = input;
  f_fb_utf8ize(ref(tmp));
  sanitized = tmp.toString();

  VS(f_fb_htmlspecialchars(input, k_ENT_QUOTES, "UtF-8", Array()),
     sanitized.data());

  VS(f_fb_htmlspecialchars(input, k_ENT_FB_UTF8, "utf-8", Array()),
     "&#xa1;&#xfffd;A"
     "&#x561;&#xfffd;&#xfffd;"
     "&#x3862;&#xfffd;&#xfffd;"
     "&#x218a3;&#xfffd;A"
     "hello&#xfffd;world"
     "&#xfffd;"
     "&#xfffd;"
     "&#xfffd;");

  VS(f_fb_htmlspecialchars(sanitized, k_ENT_QUOTES, "", Array()),
     sanitized.data());

  VS(f_fb_htmlspecialchars(zfoo, k_ENT_COMPAT, "UTF-8"), "\ufffdfoo");

  RuntimeOption::Utf8izeReplace = s;

  return Count(true);
}