Пример #1
0
const UChar* getFontBasedOnUnicodeBlock(int ucs4, SkFontMgr* fontManager)
{
    static const UChar* emojiFonts[] = {L"Segoe UI Emoji", L"Segoe UI Symbol"};
    static const UChar* symbolFont = L"Segoe UI Symbol";
    const UChar* emojiFont = 0;
    static bool initialized = false;
    if (!initialized) {
        for (size_t i = 0; i < WTF_ARRAY_LENGTH(emojiFonts); i++) {
            if (isFontPresent(emojiFonts[i], fontManager)) {
                emojiFont = emojiFonts[i];
                break;
            }
        }
        initialized = true;
    }

    UBlockCode block = ublock_getCode(ucs4);
    switch (block) {
    case UBLOCK_EMOTICONS:
        return emojiFont;
    case UBLOCK_PLAYING_CARDS:
    case UBLOCK_MISCELLANEOUS_SYMBOLS:
    case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS:
    case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS:
    case UBLOCK_ALCHEMICAL_SYMBOLS:
    case UBLOCK_RUNIC:
    case UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS:
    case UBLOCK_DINGBATS:
        return symbolFont;
    default:
        return 0;
    };
}
// There are a lot of characters in USCRIPT_COMMON that can be covered
// by fonts for scripts closely related to them. See
// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
// FIXME: make this more efficient with a wider coverage
UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
{
    UBlockCode block = ublock_getCode(ucs4);
    switch (block) {
    case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
        return USCRIPT_HAN;
    case UBLOCK_HIRAGANA:
    case UBLOCK_KATAKANA:
        return USCRIPT_KATAKANA_OR_HIRAGANA;
    case UBLOCK_ARABIC:
        return USCRIPT_ARABIC;
    case UBLOCK_THAI:
        return USCRIPT_THAI;
    case UBLOCK_GREEK:
        return USCRIPT_GREEK;
    case UBLOCK_DEVANAGARI:
        // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
        // font for now although they're used by other scripts as well.
        // Without a context, we can't do any better.
        return USCRIPT_DEVANAGARI;
    case UBLOCK_ARMENIAN:
        return USCRIPT_ARMENIAN;
    case UBLOCK_GEORGIAN:
        return USCRIPT_GEORGIAN;
    case UBLOCK_KANNADA:
        return USCRIPT_KANNADA;
    case UBLOCK_GOTHIC:
        return USCRIPT_GOTHIC;
    default:
        return USCRIPT_COMMON;
    }
}
Пример #3
0
int UnicodeBlockSet::FindBlockIndex(UChar32 ch)
{
	int block = ublock_getCode(ch);
	if (block == UBLOCK_INVALID_CODE)
		block = UBLOCK_NO_BLOCK;

	return block;
}
Пример #4
0
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
    UErrorCode status = U_ZERO_ERROR;
    UScriptCode code = uscript_getScript(c, &status);
    if (U_SUCCESS(status)) {
        DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
        if (m != NULL) {
            const LanguageBreakEngine *engine = NULL;
            switch(code) {
            case USCRIPT_THAI:
                engine = new ThaiBreakEngine(m, status);
                break;
            case USCRIPT_KHMER:
                engine = new KhmerBreakEngine(m, status);
                break;

#if !UCONFIG_NO_NORMALIZATION
                // CJK not available w/o normalization
            case USCRIPT_HANGUL:
                engine = new CjkBreakEngine(m, kKorean, status);
                break;

            // use same BreakEngine and dictionary for both Chinese and Japanese
            case USCRIPT_HIRAGANA:
            case USCRIPT_KATAKANA:
            case USCRIPT_HAN:
                engine = new CjkBreakEngine(m, kChineseJapanese, status);
                break;
#if 0
            // TODO: Have to get some characters with script=common handled
            // by CjkBreakEngine (e.g. U+309B). Simply subjecting
            // them to CjkBreakEngine does not work. The engine has to
            // special-case them.
            case USCRIPT_COMMON:
            {
                UBlockCode block = ublock_getCode(code);
                if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
                   engine = new CjkBreakEngine(dict, kChineseJapanese, status);
                break;
            }
#endif
#endif

            default:
                break;
            }
            if (engine == NULL) {
                delete m;
            }
            else if (U_FAILURE(status)) {
                delete engine;
                engine = NULL;
            }
            return engine;
        }
    }
    return NULL;
}
Пример #5
0
UnicodeBlockSet::UnicodeBlockSet()
{
	int last_block = UBLOCK_INVALID_CODE;
	for(UChar32 u=0; u<=0x110000; ++u)
	{
		int block = ublock_getCode(u);
		if (block != last_block)
		{
			// last block end
			m_block_map[last_block].second = u-1;

			// current block begin
			m_block_map[block].first = u;
		}
		last_block = block;
	}

	m_block_map.erase(UBLOCK_INVALID_CODE);
	m_block_map.erase(UBLOCK_NO_BLOCK);

	InitBlockDescriptions();
}
Пример #6
0
Variant HHVM_STATIC_METHOD(IntlChar, getBlockCode, const Variant& arg) {
  GETCP(arg, cp);
  return ublock_getCode(cp);
}
static int Character_ofImpl(JNIEnv*, jclass, jint codePoint) {
    return ublock_getCode(codePoint);
}
Пример #8
0
//static int Character_ofImpl(JNIEnv*, jclass, jint codePoint) {
JNIEXPORT jint JNICALL
Java_java_lang_Character_ofImpl(JNIEnv*, jclass, jint codePoint) {
    return ublock_getCode(codePoint);
}
Пример #9
0
PassRefPtr<SimpleFontData> FontCache::systemFallbackForCharacters(const FontDescription& description, const SimpleFontData* originalFontData, bool, const UChar* characters, int length)
{
    String familyName;
    WCHAR name[LF_FACESIZE];

    UChar character = characters[0];
    const FontPlatformData& origFont = originalFontData->platformData();

    if (IMLangFontLinkType* langFontLink = getFontLinkInterface()) {
        HGDIOBJ oldFont = GetCurrentObject(g_screenDC, OBJ_FONT);
        HFONT hfont = 0;
        DWORD codePages = 0;
        UINT codePage = 0;
        // Try MLang font linking first.
        langFontLink->GetCharCodePages(character, &codePages);
        if (codePages && u_getIntPropertyValue(character, UCHAR_UNIFIED_IDEOGRAPH)) {
            // The CJK character may belong to multiple code pages. We want to
            // do font linking against a single one of them, preferring the default
            // code page for the user's locale.
            const Vector<DWORD, 4>& CJKCodePageMasks = getCJKCodePageMasks();
            unsigned numCodePages = CJKCodePageMasks.size();
            for (unsigned i = 0; i < numCodePages; ++i) {
                hfont = createMLangFont(langFontLink, g_screenDC, origFont, CJKCodePageMasks[i]);
                if (!hfont)
                    continue;

                SelectObject(g_screenDC, hfont);
                GetTextFace(g_screenDC, LF_FACESIZE, name);

                if (hfont && !(codePages & CJKCodePageMasks[i])) {
                    // We asked about a code page that is not one of the code pages
                    // returned by MLang, so the font might not contain the character.
                    if (!currentFontContainsCharacter(langFontLink, g_screenDC, hfont, character, name))
                    {
                        SelectObject(g_screenDC, oldFont);
                        langFontLink->ReleaseFont(hfont);
                        hfont = 0;
                        continue;
                    }
                }
                break;
            }
        } else {
            hfont = createMLangFont(langFontLink, g_screenDC, origFont, codePages, character);
            SelectObject(g_screenDC, hfont);
            GetTextFace(g_screenDC, LF_FACESIZE, name);
        }
        SelectObject(g_screenDC, oldFont);

        if (hfont) {
            familyName = name;
            langFontLink->ReleaseFont(hfont);
        } else
            FontPlatformData::mapKnownFont(codePages, familyName);
    }

    if (familyName.isEmpty())
        familyName = FontPlatformData::defaultFontFamily();

    if (!familyName.isEmpty()) {
        // FIXME: temporary workaround for Thai font problem
        FontDescription fontDescription(description);
        if (ublock_getCode(c) == UBLOCK_THAI && fontDescription.weight() > FontWeightNormal)
            fontDescription.setWeight(FontWeightNormal);

        FontPlatformData* result = getCachedFontPlatformData(fontDescription, familyName);
        if (result && result->hash() != origFont.hash()) {
            if (RefPtr<SimpleFontData> fontData = getCachedFontData(result, DoNotRetain))
                return fontData.release();
        }
    }

    return 0;
}
Пример #10
0
int fastiva_vm_Character_C$__ofImpl(jint codePoint) {
    return ublock_getCode(codePoint);
}
Пример #11
0
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which) {
    UErrorCode errorCode;

    if(which<UCHAR_BINARY_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_BINARY_LIMIT) {
        return (int32_t)u_hasBinaryProperty(c, which);
    } else if(which<UCHAR_INT_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_INT_LIMIT) {
        switch(which) {
        case UCHAR_BIDI_CLASS:
            return (int32_t)u_charDirection(c);
        case UCHAR_BLOCK:
            return (int32_t)ublock_getCode(c);
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_CANONICAL_COMBINING_CLASS:
            return u_getCombiningClass(c);
#endif
        case UCHAR_DECOMPOSITION_TYPE:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
        case UCHAR_EAST_ASIAN_WIDTH:
            return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
        case UCHAR_GENERAL_CATEGORY:
            return (int32_t)u_charType(c);
        case UCHAR_JOINING_GROUP:
            return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
        case UCHAR_JOINING_TYPE:
            return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
        case UCHAR_LINE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
        case UCHAR_NUMERIC_TYPE: {
            int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
            return UPROPS_NTV_GET_TYPE(ntv);
        }
        case UCHAR_SCRIPT:
            errorCode=U_ZERO_ERROR;
            return (int32_t)uscript_getScript(c, &errorCode);
        case UCHAR_HANGUL_SYLLABLE_TYPE: {
            /* see comments on gcbToHst[] above */
            int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
            if(gcb<LENGTHOF(gcbToHst)) {
                return gcbToHst[gcb];
            } else {
                return U_HST_NOT_APPLICABLE;
            }
        }
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_NFD_QUICK_CHECK:
        case UCHAR_NFKD_QUICK_CHECK:
        case UCHAR_NFC_QUICK_CHECK:
        case UCHAR_NFKC_QUICK_CHECK:
            return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)>>8;
        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)&0xff;
#endif
        case UCHAR_GRAPHEME_CLUSTER_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
        case UCHAR_SENTENCE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
        case UCHAR_WORD_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
        default:
            return 0; /* undefined */
        }
    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
// FIXME:
//  - Handle 'Inherited', 'Common' and 'Unknown'
//    (see http://www.unicode.org/reports/tr24/#Usage_Model )
//    For 'Inherited' and 'Common', perhaps we need to
//    accept another parameter indicating the previous family
//    and just return it.
//  - All the characters (or characters up to the point a single
//    font can cover) need to be taken into account
const UChar* getFallbackFamily(const UChar* characters,
                               int length,
                               FontDescription::GenericFamilyType generic,
                               UChar32* charChecked,
                               UScriptCode* scriptChecked)
{
    ASSERT(characters && characters[0] && length > 0);
    UScriptCode script = USCRIPT_COMMON;

    // Sometimes characters common to script (e.g. space) is at
    // the beginning of a string so that we need to skip them
    // to get a font required to render the string.
    int i = 0;
    UChar32 ucs4 = 0;
    while (i < length && script == USCRIPT_COMMON || script == USCRIPT_INVALID_CODE) {
        U16_NEXT(characters, i, length, ucs4);
        UErrorCode err = U_ZERO_ERROR;
        script = uscript_getScript(ucs4, &err);
        // silently ignore the error
    }

    // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
    // Han (determined in a locale-dependent way above). Full-width ASCII
    // characters are rather widely used in Japanese and Chinese documents and
    // they're fully covered by Chinese, Japanese and Korean fonts.
    if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
        script = USCRIPT_HAN;

    // There are a lot of characters in USCRIPT_COMMON that can be covered
    // by fonts for scripts closely related to them. See
    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
    // FIXME: make this more efficient with a wider coverage
    if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) {
        UBlockCode block = ublock_getCode(ucs4);
        switch (block) {
        case UBLOCK_BASIC_LATIN:
            script = USCRIPT_LATIN;
            break;
        case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
            script = USCRIPT_HAN;
            break;
        case UBLOCK_HIRAGANA:
        case UBLOCK_KATAKANA:
            script = USCRIPT_HIRAGANA;
            break;
        case UBLOCK_ARABIC:
            script = USCRIPT_ARABIC;
            break;
        case UBLOCK_GREEK:
            script = USCRIPT_GREEK;
            break;
        case UBLOCK_DEVANAGARI:
            // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
            // font for now although they're used by other scripts as well.
            // Without a context, we can't do any better.
            script = USCRIPT_DEVANAGARI;
            break;
        case UBLOCK_ARMENIAN:
            script = USCRIPT_ARMENIAN;
            break;
        case UBLOCK_GEORGIAN:
            script = USCRIPT_GEORGIAN;
            break;
        case UBLOCK_KANNADA:
            script = USCRIPT_KANNADA;
            break;
        }
    }

    // Another lame work-around to cover non-BMP characters.
    const UChar* family = getFontFamilyForScript(script, generic);
    if (!family) {
        int plane = ucs4 >> 16;
        switch (plane) {
        case 1:
            family = L"code2001";
            break;
        case 2:
            family = L"simsun-extb";
            break;
        default:
            family = L"lucida sans unicode";
        }
    }
Пример #13
0
UBlockCode __hs_ublock_getCode(UChar32 c)
{
    return ublock_getCode(c);
}