bool TestExtFb::test_fb_utf8ize() { { Variant s = "hon\xE7k"; VERIFY(f_fb_utf8ize(ref(s))); VS(s, "honk"); } { Variant s = "test\xE0\xB0\xB1\xE0"; VERIFY(f_fb_utf8ize(ref(s))); VS(s, "test\xE0\xB0\xB1"); } { Variant s = "test\xE0\xB0\xB1\xE0\xE0"; VERIFY(f_fb_utf8ize(ref(s))); VS(s, "test\xE0\xB0\xB1"); } return Count(true); }
bool TestExtFb::test_fb_utf8_strlen() { VS(f_fb_utf8_strlen(""), 0); VS(f_fb_utf8_strlen("a"), 1); VS(f_fb_utf8_strlen("ab"), 2); // Valid UTF-8 sequence returns code point count. VS(f_fb_utf8_strlen("\ub098\ub294"), 2); VS(f_fb_utf8_strlen(INVALID_UTF_8_STRING), 2); for (int i = 0; i < 2; i++) { // Test utf8ize() handling of invalid UTF-8 sequences and how // fb_utf8_strlen() counts them. // RuntimeOption::Utf8izeReplace set to non-zero value replaces invalid // bytes, including '\0' with a special UTF-8 code point: "\uFFFD". // RuntimeOption::Utf8izeReplace set to zero deletes the invalid // byte then continues parsing. RuntimeOption::Utf8izeReplace = (i == 0); { Variant s = String("abc\0def", 7, AttachLiteral); VS(s.toString().size(), 7); VS(f_fb_utf8_strlen(s), 7); f_fb_utf8ize(ref(s)); // Modifies s int ret = s.toString().size(); if (RuntimeOption::Utf8izeReplace) { VS(ret, 9); // '\0' converted to "\uFFFD" } else { VS(ret, 6); // '\0' deleted from s } ret = f_fb_utf8_strlen(s); if (RuntimeOption::Utf8izeReplace) { VS(ret, 7); // '\0' and "\uFFFD" are both one code point, so no change } else { VS(ret, 6); // '\0' deleted, so one fewer code point } } } return Count(true); }
bool TestExtFb::test_fb_utf8ize() { for (int i = 0; i < 2; i++) { RuntimeOption::Utf8izeReplace = (i == 0); { Variant s = "hon\xE7k"; VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "hon\uFFFDk"); } else { VS(s, "honk"); } } { Variant s = "test\xE0\xB0\xB1\xE0"; VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "test\xE0\xB0\xB1\uFFFD"); } else { VS(s, "test\xE0\xB0\xB1"); } } { Variant s = "test\xE0\xB0\xB1\xE0\xE0"; VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "test\xE0\xB0\xB1\uFFFD\uFFFD"); } else { VS(s, "test\xE0\xB0\xB1"); } } { Variant s = "\xfc"; VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "\uFFFD"); } else { VS(s, ""); } } { Variant s = "\xfc\xfc"; VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "\uFFFD\uFFFD"); } else { VS(s, ""); } } { // We intentionally consider null bytes invalid sequences. Variant s = String("abc\0def", 7, AttachLiteral); VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "abc\uFFFD""def"); } else { VS(s, "abcdef"); } } { // ICU treats this as as two code points. // The old implementation treated this as three code points. Variant s = INVALID_UTF_8_STRING; VERIFY(f_fb_utf8ize(ref(s))); if (RuntimeOption::Utf8izeReplace) { VS(s, "\uFFFD""\x28"); } else { VS(s, "\x28"); } } } return Count(true); }
bool TestExtString::test_htmlspecialchars() { VS(f_htmlspecialchars("<a href='test'>Test</a>", k_ENT_QUOTES), "<a href='test'>Test</a>"); VS(f_bin2hex(f_htmlspecialchars("\xA0", k_ENT_COMPAT)), "a0"); VS(f_bin2hex(f_htmlspecialchars("\xc2\xA0", k_ENT_COMPAT, "")), "c2a0"); VS(f_bin2hex(f_htmlspecialchars("\xc2\xA0", k_ENT_COMPAT, "UTF-8")), "c2a0"); String zfoo = String("\0foo", 4, AttachLiteral); VS(f_htmlspecialchars(zfoo, k_ENT_COMPAT), zfoo); VS(f_fb_htmlspecialchars(zfoo, k_ENT_COMPAT), zfoo); VS(f_fb_htmlspecialchars("abcdef'\"{}@gz", k_ENT_QUOTES, "", Array::Create("z")), "abcdef'"{}@gz"); VS(f_fb_htmlspecialchars("abcdef'\"\u00a1\uabcd", k_ENT_FB_UTF8, "", Array::Create("d")), "abcdef'"¡ꯍ"); VS(f_fb_htmlspecialchars("abcdef'\"\u00a1\uabcd", k_ENT_FB_UTF8_ONLY, "", Array::Create("d")), "abcdef'\"¡ꯍ"); String input = "\u00a1\xc2\x41" "\u0561\xd5\xe0" "\u3862\xe3\x80\xf0" "\U000218a3\xf0\xa1\xa2\x41" "hello\x80world" "\xed\xa0\x80" "\xe0\x80\xbc" "\xc2"; bool s = RuntimeOption::Utf8izeReplace; RuntimeOption::Utf8izeReplace = false; Variant tmp = input; f_fb_utf8ize(ref(tmp)); String sanitized = tmp.toString(); VS(f_fb_htmlspecialchars(input, k_ENT_QUOTES, "", Array()), sanitized.data()); VS(f_fb_htmlspecialchars(input, k_ENT_FB_UTF8, "", Array()), "¡A" "ա" "㡢" "𡢣A" "helloworld"); VS(f_fb_htmlspecialchars(sanitized, k_ENT_QUOTES, "", Array()), sanitized.data()); VS(f_fb_htmlspecialchars(zfoo, k_ENT_COMPAT, "UTF-8"), "foo"); RuntimeOption::Utf8izeReplace = true; tmp = input; f_fb_utf8ize(ref(tmp)); sanitized = tmp.toString(); VS(f_fb_htmlspecialchars(input, k_ENT_QUOTES, "UtF-8", Array()), sanitized.data()); VS(f_fb_htmlspecialchars(input, k_ENT_FB_UTF8, "utf-8", Array()), "¡�A" "ա��" "㡢��" "𡢣�A" "hello�world" "�" "�" "�"); VS(f_fb_htmlspecialchars(sanitized, k_ENT_QUOTES, "", Array()), sanitized.data()); VS(f_fb_htmlspecialchars(zfoo, k_ENT_COMPAT, "UTF-8"), "\ufffdfoo"); RuntimeOption::Utf8izeReplace = s; return Count(true); }