StringView CachedScript::script() { if (!m_data) return { }; if (m_decodingState == NeverDecoded && TextEncoding(encoding()).isByteBasedEncoding() && m_data->size() && charactersAreAllASCII(reinterpret_cast<const LChar*>(m_data->data()), m_data->size())) { m_decodingState = DataAndDecodedStringHaveSameBytes; // If the encoded and decoded data are the same, there is no decoded data cost! setDecodedSize(0); m_decodedDataDeletionTimer.stop(); m_scriptHash = StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(m_data->data()), m_data->size()); } if (m_decodingState == DataAndDecodedStringHaveSameBytes) return { reinterpret_cast<const LChar*>(m_data->data()), m_data->size() }; if (!m_script) { m_script = m_decoder->decodeAndFlush(m_data->data(), encodedSize()); m_scriptHash = m_script.impl()->hash(); m_decodingState = DataAndDecodedStringHaveDifferentBytes; setDecodedSize(m_script.sizeInBytes()); } m_decodedDataDeletionTimer.restart(); return m_script; }
String String::fromUTF8(const LChar* stringStart, size_t length) { RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); if (!stringStart) return String(); if (!length) return emptyString(); if (charactersAreAllASCII(stringStart, length)) return StringImpl::create(stringStart, length); Vector<UChar, 1024> buffer(length); UChar* bufferStart = buffer.data(); UChar* bufferCurrent = bufferStart; const char* stringCurrent = reinterpret_cast<const char*>(stringStart); if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK) return String(); unsigned utf16Length = bufferCurrent - bufferStart; ASSERT(utf16Length < length); return StringImpl::create(bufferStart, utf16Length); }
ALWAYS_INLINE static void makeLower(const CharacterType* input, CharacterType* output, unsigned length) { // FIXME: If we need Unicode lowercasing here, then we probably want the real kind // that can potentially change the length of the string rather than the character // by character kind. If we don't need Unicode lowercasing, it would be good to // simplify this function. if (charactersAreAllASCII(input, length)) { // Fast case for all-ASCII. for (unsigned i = 0; i < length; i++) output[i] = toASCIILower(input[i]); } else { for (unsigned i = 0; i < length; i++) output[i] = Unicode::toLower(input[i]); } }
static bool isValidYouTubeVideo(const String& path) { if (!charactersAreAllASCII(path.characters(), path.length())) return false; unsigned int len = path.length(); if (len <= sizeof(slash_v_slash)) // check for more than just /v/ return false; CString str = path.lower().utf8(); const char* data = str.data(); if (memcmp(data, slash_v_slash, sizeof(slash_v_slash)) != 0) return false; // Start after /v/ for (unsigned int i = sizeof(slash_v_slash); i < len; i++) { char c = data[i]; // Check for alpha-numeric characters only. if (WTF::isASCIIAlphanumeric(c) || c == '_' || c == '-') continue; // The url can have more parameters such as &hl=en after the video id. // Once we start seeing extra parameters we can return true. return c == '&' && i > sizeof(slash_v_slash); } return true; }
StringView CachedScript::script() { if (!m_script && m_data) { if (m_ASCIIOptimizationState == Unknown && encodingMayBeAllASCII(encoding()) && m_data->size() && charactersAreAllASCII(reinterpret_cast<const LChar*>(m_data->data()), m_data->size())) { m_script = StringImpl::createWithoutCopying(reinterpret_cast<const LChar*>(m_data->data()), m_data->size()); m_ASCIIOptimizationState = DataAndDecodedStringHaveSameBytes; // If the encoded and decoded data are the same, there is no decoded data cost! setDecodedSize(0); m_decodedDataDeletionTimer.stop(); return m_script; } m_script = m_decoder->decodeAndFlush(m_data->data(), encodedSize()); m_ASCIIOptimizationState = DataAndDecodedStringHaveDifferentBytes; setDecodedSize(m_script.sizeInBytes()); } if (m_ASCIIOptimizationState == DataAndDecodedStringHaveDifferentBytes) m_decodedDataDeletionTimer.restart(); return m_script; }
// When asked for a CJK font with a native name under a non-CJK locale or // asked for a CJK font with a Romanized name under a CJK locale, // |GetTextFace| (after |CreateFont*|) returns a 'bogus' value (e.g. Arial). // This is not consistent with what MSDN says !! // Therefore, before we call |CreateFont*|, we have to map a Romanized name to // the corresponding native name under a CJK locale and vice versa // under a non-CJK locale. // See the corresponding gecko bugs at // https://bugzilla.mozilla.org/show_bug.cgi?id=373952 // https://bugzilla.mozilla.org/show_bug.cgi?id=231426 static bool LookupAltName(const String& name, String& altName) { struct FontCodepage { WCHAR* name; int codePage; }; struct NamePair { WCHAR* name; FontCodepage altNameCodepage; }; const int japaneseCodepage = 932; const int simplifiedChineseCodepage = 936; const int koreanCodepage = 949; const int traditionalChineseCodepage = 950; // FIXME(jungshik) : This list probably covers 99% of cases. // To cover the remaining 1% and cut down the file size, // consider accessing 'NAME' table of a truetype font // using |GetFontData| and caching the mapping. // In the table below, the ASCII keys are all lower-cased for // case-insensitive matching. static const NamePair namePairs[] = { // MS Pゴシック, MS PGothic {L"\xFF2D\xFF33 \xFF30\x30B4\x30B7\x30C3\x30AF", {L"MS PGothic", japaneseCodepage}}, {L"ms pgothic", {L"\xFF2D\xFF33 \xFF30\x30B4\x30B7\x30C3\x30AF", japaneseCodepage}}, // MS P明朝, MS PMincho {L"\xFF2D\xFF33 \xFF30\x660E\x671D", {L"MS PMincho", japaneseCodepage}}, {L"ms pmincho", {L"\xFF2D\xFF33 \xFF30\x660E\x671D", japaneseCodepage}}, // MSゴシック, MS Gothic {L"\xFF2D\xFF33 \x30B4\x30B7\x30C3\x30AF", {L"MS Gothic", japaneseCodepage}}, {L"ms gothic", {L"\xFF2D\xFF33 \x30B4\x30B7\x30C3\x30AF", japaneseCodepage}}, // MS 明朝, MS Mincho {L"\xFF2D\xFF33 \x660E\x671D", {L"MS Mincho", japaneseCodepage}}, {L"ms mincho", {L"\xFF2D\xFF33 \x660E\x671D", japaneseCodepage}}, // メイリオ, Meiryo {L"\x30E1\x30A4\x30EA\x30AA", {L"Meiryo", japaneseCodepage}}, {L"meiryo", {L"\x30E1\x30A4\x30EA\x30AA", japaneseCodepage}}, // 바탕, Batang {L"\xBC14\xD0D5", {L"Batang", koreanCodepage}}, {L"batang", {L"\xBC14\xD0D5", koreanCodepage}}, // 바탕체, Batangche {L"\xBC14\xD0D5\xCCB4", {L"Batangche", koreanCodepage}}, {L"batangche", {L"\xBC14\xD0D5\xCCB4", koreanCodepage}}, // 굴림, Gulim {L"\xAD74\xB9BC", {L"Gulim", koreanCodepage}}, {L"gulim", {L"\xAD74\xB9BC", koreanCodepage}}, // 굴림체, Gulimche {L"\xAD74\xB9BC\xCCB4", {L"Gulimche", koreanCodepage}}, {L"gulimche", {L"\xAD74\xB9BC\xCCB4", koreanCodepage}}, // 돋움, Dotum {L"\xB3CB\xC6C0", {L"Dotum", koreanCodepage}}, {L"dotum", {L"\xB3CB\xC6C0", koreanCodepage}}, // 돋움체, Dotumche {L"\xB3CB\xC6C0\xCCB4", {L"Dotumche", koreanCodepage}}, {L"dotumche", {L"\xB3CB\xC6C0\xCCB4", koreanCodepage}}, // 궁서, Gungsuh {L"\xAD81\xC11C", {L"Gungsuh", koreanCodepage}}, {L"gungsuh", {L"\xAD81\xC11C", koreanCodepage}}, // 궁서체, Gungsuhche {L"\xAD81\xC11C\xCCB4", {L"Gungsuhche", koreanCodepage}}, {L"gungsuhche", {L"\xAD81\xC11C\xCCB4", koreanCodepage}}, // 맑은 고딕, Malgun Gothic {L"\xB9D1\xC740 \xACE0\xB515", {L"Malgun Gothic", koreanCodepage}}, {L"malgun gothic", {L"\xB9D1\xC740 \xACE0\xB515", koreanCodepage}}, // 宋体, SimSun {L"\x5B8B\x4F53", {L"SimSun", simplifiedChineseCodepage}}, {L"simsun", {L"\x5B8B\x4F53", simplifiedChineseCodepage}}, // 宋体-ExtB, SimSun-ExtB {L"\x5B8B\x4F53-ExtB", {L"SimSun-ExtB", simplifiedChineseCodepage}}, {L"simsun-extb", {L"\x5B8B\x4F53-extb", simplifiedChineseCodepage}}, // 黑体, SimHei {L"\x9ED1\x4F53", {L"SimHei", simplifiedChineseCodepage}}, {L"simhei", {L"\x9ED1\x4F53", simplifiedChineseCodepage}}, // 新宋体, NSimSun {L"\x65B0\x5B8B\x4F53", {L"NSimSun", simplifiedChineseCodepage}}, {L"nsimsun", {L"\x65B0\x5B8B\x4F53", simplifiedChineseCodepage}}, // 微软雅黑, Microsoft Yahei {L"\x5FAE\x8F6F\x96C5\x9ED1", {L"Microsoft Yahei", simplifiedChineseCodepage}}, {L"microsoft yahei", {L"\x5FAE\x8F6F\x96C5\x9ED1", simplifiedChineseCodepage}}, // 仿宋, FangSong {L"\x4EFF\x5B8B", {L"FangSong", simplifiedChineseCodepage}}, {L"fangsong", {L"\x4EFF\x5B8B", simplifiedChineseCodepage}}, // 楷体, KaiTi {L"\x6977\x4F53", {L"KaiTi", simplifiedChineseCodepage}}, {L"kaiti", {L"\x6977\x4F53", simplifiedChineseCodepage}}, // 仿宋_GB2312, FangSong_GB2312 {L"\x4EFF\x5B8B_GB2312", {L"FangSong_GB2312", simplifiedChineseCodepage}}, {L"fangsong_gb2312", {L"\x4EFF\x5B8B_gb2312", simplifiedChineseCodepage}}, // 楷体_GB2312, KaiTi_GB2312 {L"\x6977\x4F53", {L"KaiTi_GB2312", simplifiedChineseCodepage}}, {L"kaiti_gb2312", {L"\x6977\x4F53_gb2312", simplifiedChineseCodepage}}, // 新細明體, PMingLiu {L"\x65B0\x7D30\x660E\x9AD4", {L"PMingLiu", traditionalChineseCodepage}}, {L"pmingliu", {L"\x65B0\x7D30\x660E\x9AD4", traditionalChineseCodepage}}, // 新細明體-ExtB, PMingLiu-ExtB {L"\x65B0\x7D30\x660E\x9AD4-ExtB", {L"PMingLiu-ExtB", traditionalChineseCodepage}}, {L"pmingliu-extb", {L"\x65B0\x7D30\x660E\x9AD4-extb", traditionalChineseCodepage}}, // 細明體, MingLiu {L"\x7D30\x660E\x9AD4", {L"MingLiu", traditionalChineseCodepage}}, {L"mingliu", {L"\x7D30\x660E\x9AD4", traditionalChineseCodepage}}, // 細明體-ExtB, MingLiu-ExtB {L"\x7D30\x660E\x9AD4-ExtB", {L"MingLiu-ExtB", traditionalChineseCodepage}}, {L"mingliu-extb", {L"x65B0\x7D30\x660E\x9AD4-extb", traditionalChineseCodepage}}, // 微軟正黑體, Microsoft JhengHei {L"\x5FAE\x8EDF\x6B63\x9ED1\x9AD4", {L"Microsoft JhengHei", traditionalChineseCodepage}}, {L"microsoft jhengHei", {L"\x5FAE\x8EDF\x6B63\x9ED1\x9AD4", traditionalChineseCodepage}}, // 標楷體, DFKai-SB {L"\x6A19\x6977\x9AD4", {L"DFKai-SB", traditionalChineseCodepage}}, {L"dfkai-sb", {L"\x6A19\x6977\x9AD4", traditionalChineseCodepage}}, // WenQuanYi Zen Hei {L"\x6587\x6cc9\x9a5b\x6b63\x9ed1", {L"WenQuanYi Zen Hei", traditionalChineseCodepage}}, {L"wenquanyi zen hei", {L"\x6587\x6cc9\x9a5b\x6b63\x9ed1", traditionalChineseCodepage}}, // WenQuanYi Zen Hei {L"\x6587\x6cc9\x9a7f\x6b63\x9ed1", {L"WenQuanYi Zen Hei", simplifiedChineseCodepage}}, {L"wenquanyi zen hei", {L"\x6587\x6cc9\x9a7f\x6b63\x9ed1", simplifiedChineseCodepage}}, // AR PL ShanHeiSun Uni, {L"\x6587\x9f0e\x0050\x004c\x7d30\x4e0a\x6d77\x5b8b\x0055\x006e\x0069", {L"AR PL ShanHeiSun Uni", traditionalChineseCodepage}}, {L"ar pl shanheisun uni", {L"\x6587\x9f0e\x0050\x004c\x7d30\x4e0a\x6d77\x5b8b\x0055\x006e\x0069", traditionalChineseCodepage}}, // AR PL ShanHeiSun Uni, {L"\x6587\x9f0e\x0050\x004c\x7ec6\x4e0a\x6d77\x5b8b\x0055\x006e\x0069", {L"AR PL ShanHeiSun Uni", simplifiedChineseCodepage}}, {L"ar pl shanheisun uni", {L"\x6587\x9f0e\x0050\x004c\x7ec6\x4e0a\x6d77\x5b8b\x0055\x006e\x0069", simplifiedChineseCodepage}}, // AR PL ZenKai Uni // Traditional Chinese and Simplified Chinese names are // identical. {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", {L"AR PL ZenKai Uni", traditionalChineseCodepage}}, {L"ar pl zenkai uni", {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", traditionalChineseCodepage}}, {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", {L"AR PL ZenKai Uni", simplifiedChineseCodepage}}, {L"ar pl zenkai uni", {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", simplifiedChineseCodepage}}, }; typedef HashMap<String, const FontCodepage*> NameMap; static NameMap* fontNameMap = 0; if (!fontNameMap) { fontNameMap = new NameMap; for (size_t i = 0; i < WTF_ARRAY_LENGTH(namePairs); ++i) fontNameMap->set(String(namePairs[i].name), &(namePairs[i].altNameCodepage)); } bool isAscii = false; String n; // use |lower| only for ASCII names // For non-ASCII names, we don't want to invoke an expensive // and unnecessary |lower|. if (charactersAreAllASCII(name)) { isAscii = true; n = name.lower(); } else n = name; NameMap::iterator iter = fontNameMap->find(n); if (iter == fontNameMap->end()) return false; static int systemCp = ::GetACP(); int fontCp = iter->second->codePage; if ((isAscii && systemCp == fontCp) || (!isAscii && systemCp != fontCp)) { altName = String(iter->second->name); return true; } return false; }
// FIXME: consider adding to WebKit String class static bool charactersAreAllASCII(const String& s) { return charactersAreAllASCII(s.characters(), s.length()); }