Esempio n. 1
0
StringView CachedScript::script()
{
    if (!m_data)
        return { };

    if (m_decodingState == NeverDecoded
        && TextEncoding(encoding()).isByteBasedEncoding()
        && m_data->size()
        && charactersAreAllASCII(reinterpret_cast<const LChar*>(m_data->data()), m_data->size())) {

        m_decodingState = DataAndDecodedStringHaveSameBytes;

        // If the encoded and decoded data are the same, there is no decoded data cost!
        setDecodedSize(0);
        m_decodedDataDeletionTimer.stop();

        m_scriptHash = StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(m_data->data()), m_data->size());
    }

    if (m_decodingState == DataAndDecodedStringHaveSameBytes)
        return { reinterpret_cast<const LChar*>(m_data->data()), m_data->size() };

    if (!m_script) {
        m_script = m_decoder->decodeAndFlush(m_data->data(), encodedSize());
        m_scriptHash = m_script.impl()->hash();
        m_decodingState = DataAndDecodedStringHaveDifferentBytes;
        setDecodedSize(m_script.sizeInBytes());
    }

    m_decodedDataDeletionTimer.restart();
    return m_script;
}
Esempio n. 2
0
String String::fromUTF8(const LChar* stringStart, size_t length)
{
    RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());

    if (!stringStart)
        return String();

    if (!length)
        return emptyString();

    if (charactersAreAllASCII(stringStart, length))
        return StringImpl::create(stringStart, length);

    Vector<UChar, 1024> buffer(length);
    UChar* bufferStart = buffer.data();

    UChar* bufferCurrent = bufferStart;
    const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
    if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK)
        return String();

    unsigned utf16Length = bufferCurrent - bufferStart;
    ASSERT(utf16Length < length);
    return StringImpl::create(bufferStart, utf16Length);
}
ALWAYS_INLINE static void makeLower(const CharacterType* input, CharacterType* output, unsigned length)
{
    // FIXME: If we need Unicode lowercasing here, then we probably want the real kind
    // that can potentially change the length of the string rather than the character
    // by character kind. If we don't need Unicode lowercasing, it would be good to
    // simplify this function.

    if (charactersAreAllASCII(input, length)) {
        // Fast case for all-ASCII.
        for (unsigned i = 0; i < length; i++)
            output[i] = toASCIILower(input[i]);
    } else {
        for (unsigned i = 0; i < length; i++)
            output[i] = Unicode::toLower(input[i]);
    }
}
static bool isValidYouTubeVideo(const String& path)
{
    if (!charactersAreAllASCII(path.characters(), path.length()))
        return false;
    unsigned int len = path.length();
    if (len <= sizeof(slash_v_slash)) // check for more than just /v/
        return false;
    CString str = path.lower().utf8();
    const char* data = str.data();
    if (memcmp(data, slash_v_slash, sizeof(slash_v_slash)) != 0)
        return false;
    // Start after /v/
    for (unsigned int i = sizeof(slash_v_slash); i < len; i++) {
        char c = data[i];
        // Check for alpha-numeric characters only.
        if (WTF::isASCIIAlphanumeric(c) || c == '_' || c == '-')
            continue;
        // The url can have more parameters such as &hl=en after the video id.
        // Once we start seeing extra parameters we can return true.
        return c == '&' && i > sizeof(slash_v_slash);
    }
    return true;
}
Esempio n. 5
0
StringView CachedScript::script()
{
    if (!m_script && m_data) {
        if (m_ASCIIOptimizationState == Unknown
            && encodingMayBeAllASCII(encoding())
            && m_data->size()
            && charactersAreAllASCII(reinterpret_cast<const LChar*>(m_data->data()), m_data->size())) {

            m_script = StringImpl::createWithoutCopying(reinterpret_cast<const LChar*>(m_data->data()), m_data->size());
            m_ASCIIOptimizationState = DataAndDecodedStringHaveSameBytes;

            // If the encoded and decoded data are the same, there is no decoded data cost!
            setDecodedSize(0);
            m_decodedDataDeletionTimer.stop();
            return m_script;
        }
        m_script = m_decoder->decodeAndFlush(m_data->data(), encodedSize());
        m_ASCIIOptimizationState = DataAndDecodedStringHaveDifferentBytes;
        setDecodedSize(m_script.sizeInBytes());
    }
    if (m_ASCIIOptimizationState == DataAndDecodedStringHaveDifferentBytes)
        m_decodedDataDeletionTimer.restart();
    return m_script;
}
// When asked for a CJK font with a native name under a non-CJK locale or
// asked for a CJK font with a Romanized name under a CJK locale,
// |GetTextFace| (after |CreateFont*|) returns a 'bogus' value (e.g. Arial).
// This is not consistent with what MSDN says !!
// Therefore, before we call |CreateFont*|, we have to map a Romanized name to 
// the corresponding native name under a CJK locale and vice versa 
// under a non-CJK locale.
// See the corresponding gecko bugs at 
// https://bugzilla.mozilla.org/show_bug.cgi?id=373952
// https://bugzilla.mozilla.org/show_bug.cgi?id=231426
static bool LookupAltName(const String& name, String& altName)
{
    struct FontCodepage {
        WCHAR* name;
        int codePage;
    };

    struct NamePair {
        WCHAR* name;
        FontCodepage altNameCodepage;
    };

    const int japaneseCodepage = 932;
    const int simplifiedChineseCodepage = 936;
    const int koreanCodepage = 949;
    const int traditionalChineseCodepage = 950;

    // FIXME(jungshik) : This list probably covers 99% of cases.
    // To cover the remaining 1% and cut down the file size,
    // consider accessing 'NAME' table of a truetype font
    // using |GetFontData| and caching the mapping.
    // In the table below, the ASCII keys are all lower-cased for
    // case-insensitive matching.
    static const NamePair namePairs[] = {
        // MS Pゴシック, MS PGothic
        {L"\xFF2D\xFF33 \xFF30\x30B4\x30B7\x30C3\x30AF", {L"MS PGothic", japaneseCodepage}},
        {L"ms pgothic", {L"\xFF2D\xFF33 \xFF30\x30B4\x30B7\x30C3\x30AF", japaneseCodepage}},
        // MS P明朝, MS PMincho
        {L"\xFF2D\xFF33 \xFF30\x660E\x671D", {L"MS PMincho", japaneseCodepage}},
        {L"ms pmincho", {L"\xFF2D\xFF33 \xFF30\x660E\x671D", japaneseCodepage}},
        // MSゴシック, MS Gothic
        {L"\xFF2D\xFF33 \x30B4\x30B7\x30C3\x30AF", {L"MS Gothic", japaneseCodepage}},
        {L"ms gothic", {L"\xFF2D\xFF33 \x30B4\x30B7\x30C3\x30AF", japaneseCodepage}},
        // MS 明朝, MS Mincho
        {L"\xFF2D\xFF33 \x660E\x671D", {L"MS Mincho", japaneseCodepage}},
        {L"ms mincho", {L"\xFF2D\xFF33 \x660E\x671D", japaneseCodepage}},
        // メイリオ, Meiryo
        {L"\x30E1\x30A4\x30EA\x30AA", {L"Meiryo", japaneseCodepage}},
        {L"meiryo", {L"\x30E1\x30A4\x30EA\x30AA", japaneseCodepage}},
        // 바탕, Batang
        {L"\xBC14\xD0D5", {L"Batang", koreanCodepage}},
        {L"batang", {L"\xBC14\xD0D5", koreanCodepage}},
        // 바탕체, Batangche
        {L"\xBC14\xD0D5\xCCB4", {L"Batangche", koreanCodepage}},
        {L"batangche", {L"\xBC14\xD0D5\xCCB4", koreanCodepage}},
        // 굴림, Gulim
        {L"\xAD74\xB9BC", {L"Gulim", koreanCodepage}},
        {L"gulim", {L"\xAD74\xB9BC", koreanCodepage}},
        // 굴림체, Gulimche
        {L"\xAD74\xB9BC\xCCB4", {L"Gulimche", koreanCodepage}},
        {L"gulimche", {L"\xAD74\xB9BC\xCCB4", koreanCodepage}},
        // 돋움, Dotum
        {L"\xB3CB\xC6C0", {L"Dotum", koreanCodepage}},
        {L"dotum", {L"\xB3CB\xC6C0", koreanCodepage}},
        // 돋움체, Dotumche
        {L"\xB3CB\xC6C0\xCCB4", {L"Dotumche", koreanCodepage}},
        {L"dotumche", {L"\xB3CB\xC6C0\xCCB4", koreanCodepage}},
        // 궁서, Gungsuh
        {L"\xAD81\xC11C", {L"Gungsuh", koreanCodepage}},
        {L"gungsuh", {L"\xAD81\xC11C", koreanCodepage}},
        // 궁서체, Gungsuhche
        {L"\xAD81\xC11C\xCCB4", {L"Gungsuhche", koreanCodepage}},
        {L"gungsuhche", {L"\xAD81\xC11C\xCCB4", koreanCodepage}},
        // 맑은 고딕, Malgun Gothic
        {L"\xB9D1\xC740 \xACE0\xB515", {L"Malgun Gothic", koreanCodepage}},
        {L"malgun gothic", {L"\xB9D1\xC740 \xACE0\xB515", koreanCodepage}},
        // 宋体, SimSun
        {L"\x5B8B\x4F53", {L"SimSun", simplifiedChineseCodepage}},
        {L"simsun", {L"\x5B8B\x4F53", simplifiedChineseCodepage}},
        // 宋体-ExtB, SimSun-ExtB
        {L"\x5B8B\x4F53-ExtB", {L"SimSun-ExtB", simplifiedChineseCodepage}},
        {L"simsun-extb", {L"\x5B8B\x4F53-extb", simplifiedChineseCodepage}},
        // 黑体, SimHei
        {L"\x9ED1\x4F53", {L"SimHei", simplifiedChineseCodepage}},
        {L"simhei", {L"\x9ED1\x4F53", simplifiedChineseCodepage}},
        // 新宋体, NSimSun
        {L"\x65B0\x5B8B\x4F53", {L"NSimSun", simplifiedChineseCodepage}},
        {L"nsimsun", {L"\x65B0\x5B8B\x4F53", simplifiedChineseCodepage}},
        // 微软雅黑, Microsoft Yahei
        {L"\x5FAE\x8F6F\x96C5\x9ED1", {L"Microsoft Yahei", simplifiedChineseCodepage}},
        {L"microsoft yahei", {L"\x5FAE\x8F6F\x96C5\x9ED1", simplifiedChineseCodepage}},
        // 仿宋, FangSong
        {L"\x4EFF\x5B8B",  {L"FangSong", simplifiedChineseCodepage}},
        {L"fangsong", {L"\x4EFF\x5B8B", simplifiedChineseCodepage}},
        // 楷体, KaiTi
        {L"\x6977\x4F53", {L"KaiTi", simplifiedChineseCodepage}},
        {L"kaiti", {L"\x6977\x4F53", simplifiedChineseCodepage}},
        // 仿宋_GB2312, FangSong_GB2312
        {L"\x4EFF\x5B8B_GB2312",  {L"FangSong_GB2312", simplifiedChineseCodepage}},
        {L"fangsong_gb2312", {L"\x4EFF\x5B8B_gb2312", simplifiedChineseCodepage}},
        // 楷体_GB2312, KaiTi_GB2312
        {L"\x6977\x4F53", {L"KaiTi_GB2312", simplifiedChineseCodepage}},
        {L"kaiti_gb2312", {L"\x6977\x4F53_gb2312", simplifiedChineseCodepage}},
        // 新細明體, PMingLiu
        {L"\x65B0\x7D30\x660E\x9AD4", {L"PMingLiu", traditionalChineseCodepage}},
        {L"pmingliu", {L"\x65B0\x7D30\x660E\x9AD4", traditionalChineseCodepage}},
        // 新細明體-ExtB, PMingLiu-ExtB
        {L"\x65B0\x7D30\x660E\x9AD4-ExtB", {L"PMingLiu-ExtB", traditionalChineseCodepage}},
        {L"pmingliu-extb", {L"\x65B0\x7D30\x660E\x9AD4-extb", traditionalChineseCodepage}},
        // 細明體, MingLiu
        {L"\x7D30\x660E\x9AD4", {L"MingLiu", traditionalChineseCodepage}},
        {L"mingliu", {L"\x7D30\x660E\x9AD4", traditionalChineseCodepage}},
        // 細明體-ExtB, MingLiu-ExtB
        {L"\x7D30\x660E\x9AD4-ExtB", {L"MingLiu-ExtB", traditionalChineseCodepage}},
        {L"mingliu-extb", {L"x65B0\x7D30\x660E\x9AD4-extb", traditionalChineseCodepage}},
        // 微軟正黑體, Microsoft JhengHei
        {L"\x5FAE\x8EDF\x6B63\x9ED1\x9AD4", {L"Microsoft JhengHei", traditionalChineseCodepage}},
        {L"microsoft jhengHei", {L"\x5FAE\x8EDF\x6B63\x9ED1\x9AD4", traditionalChineseCodepage}},
        // 標楷體, DFKai-SB
        {L"\x6A19\x6977\x9AD4", {L"DFKai-SB", traditionalChineseCodepage}},
        {L"dfkai-sb", {L"\x6A19\x6977\x9AD4", traditionalChineseCodepage}},
        // WenQuanYi Zen Hei
        {L"\x6587\x6cc9\x9a5b\x6b63\x9ed1", {L"WenQuanYi Zen Hei", traditionalChineseCodepage}},
        {L"wenquanyi zen hei", {L"\x6587\x6cc9\x9a5b\x6b63\x9ed1", traditionalChineseCodepage}},
        // WenQuanYi Zen Hei
        {L"\x6587\x6cc9\x9a7f\x6b63\x9ed1", {L"WenQuanYi Zen Hei", simplifiedChineseCodepage}},
        {L"wenquanyi zen hei", {L"\x6587\x6cc9\x9a7f\x6b63\x9ed1", simplifiedChineseCodepage}},
        // AR PL ShanHeiSun Uni,
        {L"\x6587\x9f0e\x0050\x004c\x7d30\x4e0a\x6d77\x5b8b\x0055\x006e\x0069",
         {L"AR PL ShanHeiSun Uni", traditionalChineseCodepage}},
        {L"ar pl shanheisun uni",
         {L"\x6587\x9f0e\x0050\x004c\x7d30\x4e0a\x6d77\x5b8b\x0055\x006e\x0069", traditionalChineseCodepage}},
        // AR PL ShanHeiSun Uni,
        {L"\x6587\x9f0e\x0050\x004c\x7ec6\x4e0a\x6d77\x5b8b\x0055\x006e\x0069",
         {L"AR PL ShanHeiSun Uni", simplifiedChineseCodepage}},
        {L"ar pl shanheisun uni",
         {L"\x6587\x9f0e\x0050\x004c\x7ec6\x4e0a\x6d77\x5b8b\x0055\x006e\x0069", simplifiedChineseCodepage}},
        // AR PL ZenKai Uni
        // Traditional Chinese and Simplified Chinese names are
        // identical.
        {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", {L"AR PL ZenKai Uni", traditionalChineseCodepage}},
        {L"ar pl zenkai uni", {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", traditionalChineseCodepage}},
        {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", {L"AR PL ZenKai Uni", simplifiedChineseCodepage}},
        {L"ar pl zenkai uni", {L"\x6587\x0050\x004C\x4E2D\x6977\x0055\x006E\x0069", simplifiedChineseCodepage}},
    };

    typedef HashMap<String, const FontCodepage*> NameMap;
    static NameMap* fontNameMap = 0;

    if (!fontNameMap) {
        fontNameMap = new NameMap;
        for (size_t i = 0; i < WTF_ARRAY_LENGTH(namePairs); ++i)
            fontNameMap->set(String(namePairs[i].name), &(namePairs[i].altNameCodepage));
    }

    bool isAscii = false; 
    String n;
    // use |lower| only for ASCII names 
    // For non-ASCII names, we don't want to invoke an expensive 
    // and unnecessary |lower|. 
    if (charactersAreAllASCII(name)) {
        isAscii = true;
        n = name.lower();
    } else
        n = name;

    NameMap::iterator iter = fontNameMap->find(n);
    if (iter == fontNameMap->end())
        return false;

    static int systemCp = ::GetACP();
    int fontCp = iter->second->codePage;

    if ((isAscii && systemCp == fontCp) || (!isAscii && systemCp != fontCp)) {
        altName = String(iter->second->name);
        return true;
    }

    return false;
}
Esempio n. 7
0
// FIXME: consider adding to WebKit String class
static bool charactersAreAllASCII(const String& s)
{
    return charactersAreAllASCII(s.characters(), s.length());
}